bmewing / mgsub
1
#' @export
2

3
mgsub = function(string,pattern,replacement,recycle=FALSE,...){
4
  #' @title Safe, multiple gsub
5
  #' 
6
  #' @description \code{mgsub} - A safe, simultaneous, multiple global string 
7
  #' replacement wrapper that allows access to multiple methods of specifying 
8
  #' matches and replacements.
9
  #'
10
  #' @param string a character vector where replacements are sought
11
  #' @param pattern Character string to be matched in the given character vector
12
  #' @param replacement Character string equal in length to pattern or of length 
13
  #' one which are a replacement for matched pattern.
14
  #' @param recycle logical. should replacement be recycled if lengths differ?
15
  #' @param \dots arguments to pass to \code{\link[base]{regexpr}} / 
16
  #' \code{\link[base]{sub}}
17
  #' @rdname mgsub
18
  #' @return Converted string.
19
  #' @examples
20
  #' mgsub("hey, ho",pattern=c("hey","ho"),replacement=c("ho","hey"))
21
  #' mgsub("developer",pattern=c("e","p"),replacement=c("p","e"))
22
  #' mgsub("The chemical Dopaziamine is fake",
23
  #'       pattern=c("dopa(.*?) ","fake"),
24
  #'       replacement=c("mega\\1 ","real"),
25
  #'       ignore.case=TRUE)
26

27 2
  if(all(is.na(string))) return(string)
28 2
  sna = !is.na(string)
29 2
  if(!is.logical(recycle)) stop("Recycle must be a boolean")
30 2
  if(!recycle & length(pattern) != length(replacement)){
31 2
    stop("pattern and replacement vectors must be the same length")
32
  }
33 2
  if(length(replacement) > length(pattern)){
34 2
    warning("You provided more replacements than 
35 2
            search strings - some will be dropped")
36 2
    replacement = replacement[seq_along(pattern)]
37
  }
38 2
  if(recycle & length(pattern) != length(replacement)){
39 2
    lp = length(pattern)
40 2
    lr = length(replacement)
41 2
    replacement = rep(replacement,ceiling(lp/lr))[seq_along(pattern)]
42
  } 
43 2
  result = vapply(X = string[sna],
44 2
                  FUN = worker,
45 2
                  FUN.VALUE = c(""),
46 2
                  USE.NAMES = FALSE,
47 2
                  pattern=pattern,
48 2
                  replacement=replacement,...)
49 2
  string[sna] = result
50 2
  return(string)
51
}
52

53
worker = function(string,pattern,replacement,...){
54
  #' @title mgsub worker
55
  #' 
56
  #' @description The hard worker doing everything for mgsub
57
  #' 
58
  #' @param string a character vector where replacements are sought
59
  #' @param pattern Character string to be matched in the given character vector
60
  #' @param replacement Character string equal in length to pattern or of length 
61
  #' one which are a replacement for matched pattern.
62
  #' @param \dots arguments to pass to regexpr family
63
  
64 2
  x0 = do.call(rbind,lapply(seq_along(pattern),
65 2
                            getMatches,
66 2
                            string=string,
67 2
                            pattern=pattern,...))
68 2
  x0 = matrix(x0[x0[,2] != -1,],ncol=4)
69 2
  uid = unique(x0[,1])
70 2
  if(nrow(x0)==0) return(string)
71 2
  if(length(unique(x0[,1])) == 1){
72 2
    return(fastReplace(string,pattern[uid],replacement[uid],...))
73
  }
74 2
  if(nrow(x0) > 1){
75 2
    x = x0[order(x0[,3],decreasing = T),]
76 2
    x = filterOverlap(x)
77 2
    uid = unique(x[,1])
78 2
    if(length(uid) == 1){
79 2
      return(fastReplace(string,pattern[uid],replacement[uid],...))
80
    }
81 2
    x = x[order(x[,2]),] 
82
  }
83 2
  for(i in nrow(x):1){
84 2
    s = x[i,2]
85 2
    e = x[i,4]
86 2
    p = pattern[x[i,1]]
87 2
    r = replacement[x[i,1]]
88 2
    pre = if(s > 1) substr(string,1,s-1) else ""
89 2
    r0 = sub(p,r,substr(string,s,e),...)
90 2
    end = if(e < nchar(string)) substr(string,e+1,nchar(string)) else ""
91 2
    string = paste0(pre,r0,end)
92
  }
93 2
  return(string)
94
}

Read our documentation on viewing source code .

Loading