1
|
|
#' @export
|
2
|
|
|
3
|
|
mgsub = function(string, pattern, replacement, recycle=FALSE, ...) {
|
4
|
|
#' @title Safe, multiple gsub
|
5
|
|
#'
|
6
|
|
#' @description \code{mgsub} - A safe, simultaneous, multiple global string
|
7
|
|
#' replacement wrapper that allows access to multiple methods of specifying
|
8
|
|
#' matches and replacements.
|
9
|
|
#'
|
10
|
|
#' @param string a character vector where replacements are sought
|
11
|
|
#' @param pattern Character string to be matched in the given character vector
|
12
|
|
#' @param replacement Character string equal in length to pattern or of length
|
13
|
|
#' one which are a replacement for matched pattern.
|
14
|
|
#' @param recycle logical. should replacement be recycled if lengths differ?
|
15
|
|
#' @param \dots arguments to pass to \code{\link[base]{regexpr}} /
|
16
|
|
#' \code{\link[base]{sub}}
|
17
|
|
#' @rdname mgsub
|
18
|
|
#' @return Converted string.
|
19
|
|
#' @examples
|
20
|
|
#' mgsub("hey, ho", pattern = c("hey", "ho"), replacement = c("ho", "hey"))
|
21
|
|
#' mgsub("developer", pattern = c("e", "p"), replacement = c("p", "e"))
|
22
|
|
#' mgsub("The chemical Dopaziamine is fake",
|
23
|
|
#' pattern = c("dopa(.*?) ", "fake"),
|
24
|
|
#' replacement = c("mega\\1 ", "real"),
|
25
|
|
#' ignore.case = TRUE)
|
26
|
|
|
27
|
6
|
if (all(is.na(string))) return(string)
|
28
|
6
|
sna = !is.na(string)
|
29
|
6
|
if (!is.logical(recycle)) stop("Recycle must be a boolean")
|
30
|
6
|
if (!recycle & length(pattern) != length(replacement)) {
|
31
|
6
|
stop("pattern and replacement vectors must be the same length")
|
32
|
|
}
|
33
|
6
|
if (length(replacement) > length(pattern)) {
|
34
|
6
|
warning("You provided more replacements than
|
35
|
6
|
search strings - some will be dropped")
|
36
|
6
|
replacement = replacement[seq_along(pattern)]
|
37
|
|
}
|
38
|
6
|
if (recycle & length(pattern) != length(replacement)) {
|
39
|
6
|
lp = length(pattern)
|
40
|
6
|
lr = length(replacement)
|
41
|
6
|
replacement = rep(replacement, ceiling(lp / lr))[seq_along(pattern)]
|
42
|
|
}
|
43
|
6
|
result = vapply(X = string[sna],
|
44
|
6
|
FUN = worker,
|
45
|
6
|
FUN.VALUE = c(""),
|
46
|
6
|
USE.NAMES = FALSE,
|
47
|
6
|
pattern = pattern,
|
48
|
6
|
replacement = replacement, ...)
|
49
|
6
|
string[sna] = result
|
50
|
6
|
return(string)
|
51
|
|
}
|
52
|
|
|
53
|
|
worker = function(string, pattern, replacement, ...) {
|
54
|
|
#' @title mgsub worker
|
55
|
|
#'
|
56
|
|
#' @description The hard worker doing everything for mgsub
|
57
|
|
#'
|
58
|
|
#' @param string a character vector where replacements are sought
|
59
|
|
#' @param pattern Character string to be matched in the given character vector
|
60
|
|
#' @param replacement Character string equal in length to pattern or of length
|
61
|
|
#' one which are a replacement for matched pattern.
|
62
|
|
#' @param \dots arguments to pass to regexpr family
|
63
|
|
|
64
|
6
|
x0 = do.call(rbind, lapply(seq_along(pattern),
|
65
|
6
|
get_matches,
|
66
|
6
|
string = string,
|
67
|
6
|
pattern = pattern, ...))
|
68
|
6
|
x0 = matrix(x0[x0[, 2] != -1, ], ncol = 4)
|
69
|
6
|
uid = unique(x0[, 1])
|
70
|
6
|
if (nrow(x0) == 0) return(string)
|
71
|
6
|
if (length(unique(x0[, 1])) == 1) {
|
72
|
6
|
return(fast_replace(string, pattern[uid], replacement[uid], ...)) # nolint
|
73
|
|
}
|
74
|
6
|
if (nrow(x0) > 1) {
|
75
|
6
|
x = x0[order(x0[, 3], decreasing = T), ]
|
76
|
6
|
x = filter_overlap(x) # nolint
|
77
|
6
|
uid = unique(x[, 1])
|
78
|
6
|
if (length(uid) == 1) {
|
79
|
6
|
return(fast_replace(string, pattern[uid], replacement[uid], ...)) # nolint
|
80
|
|
}
|
81
|
6
|
x = x[order(x[, 2]), ]
|
82
|
|
}
|
83
|
6
|
for (i in rev(seq_len(nrow(x)))) {
|
84
|
6
|
s = x[i, 2]
|
85
|
6
|
e = x[i, 4]
|
86
|
6
|
p = pattern[x[i, 1]]
|
87
|
6
|
r = replacement[x[i, 1]]
|
88
|
6
|
pre = if (s > 1) substr(string, 1, s - 1) else ""
|
89
|
6
|
r0 = sub(p, r, substr(string, s, e), ...)
|
90
|
6
|
end = if (e < nchar(string)) substr(string, e + 1, nchar(string)) else ""
|
91
|
6
|
string = paste0(pre, r0, end)
|
92
|
|
}
|
93
|
6
|
return(string)
|
94
|
|
}
|