fstpackage / synthetic
1
#  syntheticbench - R package for benchmarking of dataset serialization
2
#
3
#  Copyright (C) 2019-present, Mark AJ Klik
4
#
5
#  This file is part of the lazyvec R package.
6
#
7
#  The lazyvec R package is free software: you can redistribute it and/or modify it
8
#  under the terms of the GNU Affero General Public License version 3 as
9
#  published by the Free Software Foundation.
10
#
11
#  The lazyvec R package is distributed in the hope that it will be useful, but
12
#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
#  FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
14
#  for more details.
15
#
16
#  You should have received a copy of the GNU Affero General Public License along
17
#  with the lazyvec R package. If not, see <http://www.gnu.org/licenses/>.
18
#
19
#  You can contact the author at:
20
#  - syntheticbench R package source repository : https://github.com/fstpackage/syntheticbench
21

22

23
# predefined allowed characters
24
char_pool <- c(LETTERS, letters, 0:9)
25

26

27
generate_string <- function(size) {
28 0
  paste0(sample(char_pool, size), collapse = "")
29
}
30

31

32
#' Generate a character vector with certain distribution of string lengths
33
#'
34
#' @param length length of the vector
35
#' @param max_distict_values maximum number of disctict values in the vector
36
#' @param min_str_size minimum string length
37
#' @param max_str_size maximum string length
38
#'
39
#' @return character vector
40
#' @export
41
sample_string <- function(length, min_str_size = 1, max_str_size = 10, max_distict_values = NULL) {
42

43 0
  if (is.null(max_distict_values)) {
44 0
    sizes <- sample(min_str_size:max_str_size, length, replace = TRUE)
45 0
    return(sapply(sizes, generate_string))
46
  }
47

48 0
  sizes <- sample(min_str_size:max_str_size, max_distict_values, replace = TRUE)
49 0
  x <- sapply(sizes, generate_string)  # unique values
50 0
  sample(x, length, replace = TRUE)
51
}

Read our documentation on viewing source code .

Loading