1
#' @export
2

3
mgsub_censor = function(string, pattern, censor="*", split=any(nchar(censor) > 1), seed=NULL, ...) {
4
  #' @title Safe, multiple censoring of text strings
5
  #'
6
  #' @description \code{mgsub_censor} - A safe, simultaneous, multiple global string censoring
7
  #'  (replace matches with a censoring character like '*')
8
  #'
9
  #' @param string a character vector to censor
10
  #' @param pattern regular expressions used to identify where to censor
11
  #' @param censor character to use in censoring - see details
12
  #' @param split if a multicharacter censor pattern is provided, should it be
13
  #' split to preserve original string length
14
  #' @param seed optional parameter to fix sampling of multicharacter censors
15
  #' @param \dots arguments to pass to \code{\link[base:grep]{regexpr}} /
16
  #' \code{\link[base:grep]{sub}}
17
  #' @rdname mgsub_censor
18
  #' @return Censored string.
19
  #' @details When censor is provided as a >1 length vector or as a multicharacter
20
  #' string with split = TRUE, it will be sampled to return random censoring patterns.
21
  #' This can be helpful if you want to create cartoonish swear censoring. If
22
  #' needed, the randomization can be controlled with the seed argument.
23
  #'
24
  #' @examples
25
  #' mgsub_censor("Flowers for a friend", pattern=c("low"), censor="*")
26

27 6
  if (all(is.na(string))) return(string)
28 6
  sna = !is.na(string)
29 6
  result = vapply(string[sna],
30 6
                  censor_worker,
31 6
                  c(""),
32 6
                  USE.NAMES = FALSE,
33 6
                  pattern = pattern,
34 6
                  censor = censor,
35 6
                  split = split,
36 6
                  seed = seed,
37
                  ...)
38 6
  string[sna] = result
39 6
  return(string)
40
}
41

42
censor_worker = function(string, pattern, censor, split=any(nchar(censor) > 1), seed=NULL, ...) {
43
  #' @title mgsub_censor worker
44
  #'
45
  #' @description The hard worker doing everything for mgsub_censor
46
  #'
47
  #' @param string a character vector where replacements are sought
48
  #' @param pattern Character string to be matched in the given character vector
49
  #' @param censor character to use in censoring - see details
50
  #' @param split if a multicharacter censor pattern is provided, should it be
51
  #' split to preserve original string length
52
  #' @param seed optional parameter to fix sampling of multicharacter censors
53
  #' @param \dots arguments to pass to regexpr family
54

55 6
  x0 = do.call(rbind, lapply(seq_along(pattern),
56 6
                             get_matches,
57 6
                             string = string,
58 6
                             pattern = pattern,
59
                             ...))
60 6
  x0 = matrix(x0[x0[, 2] != -1, ], ncol = 4)
61 6
  if (nrow(x0) == 0) return(string)
62 6
  if (nrow(x0) > 1) {
63 6
    x = x0[order(x0[, 3], decreasing = T), ]
64 6
    x = filter_overlap(x) #nolint
65 6
    x = x[order(x[, 2]), , drop = FALSE]
66
  } else {
67 6
    x = x0
68
  }
69 6
  for (i in rev(seq_len(nrow(x)))) {
70 6
    s = x[i, 2]
71 6
    e = x[i, 4]
72 6
    p = pattern[x[i, 1]]
73 6
    if (split) censor = unlist(strsplit(censor, ""))
74 6
    if (!is.null(seed)) set.seed(seed)
75 6
    r = if (length(censor) > 1) {
76 6
      paste(sample(censor, x[i, 3], replace = TRUE), collapse = "")
77
    } else {
78 6
      paste(rep(censor, x[i, 3]), collapse = "")
79
    }
80 6
    pre = if (s > 1) substr(string, 1, s - 1) else ""
81 6
    r0 = sub(p, r, substr(string, s, e), ...)
82 6
    end = if (e < nchar(string)) substr(string, e + 1, nchar(string)) else ""
83 6
    string = paste0(pre, r0, end)
84
  }
85 6
  return(string)
86
}

Read our documentation on viewing source code .

Loading