Compare 678d76a ... +0 ... c51893f

Coverage Reach
mgsub.R mgsub_censor.R helper_functions.R

No flags found

Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.

e.g., #unittest #integration

#production #enterprise

#frontend #backend

Learn more about Codecov Flags here.


@@ -1,94 +1,94 @@
Loading
1 1
#' @export
2 2
3 -
mgsub = function(string,pattern,replacement,recycle=FALSE,...){
3 +
mgsub = function(string, pattern, replacement, recycle=FALSE, ...) {
4 4
  #' @title Safe, multiple gsub
5 -
  #' 
6 -
  #' @description \code{mgsub} - A safe, simultaneous, multiple global string 
7 -
  #' replacement wrapper that allows access to multiple methods of specifying 
5 +
  #'
6 +
  #' @description \code{mgsub} - A safe, simultaneous, multiple global string
7 +
  #' replacement wrapper that allows access to multiple methods of specifying
8 8
  #' matches and replacements.
9 9
  #'
10 10
  #' @param string a character vector where replacements are sought
11 11
  #' @param pattern Character string to be matched in the given character vector
12 -
  #' @param replacement Character string equal in length to pattern or of length 
12 +
  #' @param replacement Character string equal in length to pattern or of length
13 13
  #' one which are a replacement for matched pattern.
14 14
  #' @param recycle logical. should replacement be recycled if lengths differ?
15 -
  #' @param \dots arguments to pass to \code{\link[base]{regexpr}} / 
15 +
  #' @param \dots arguments to pass to \code{\link[base]{regexpr}} /
16 16
  #' \code{\link[base]{sub}}
17 17
  #' @rdname mgsub
18 18
  #' @return Converted string.
19 19
  #' @examples
20 -
  #' mgsub("hey, ho",pattern=c("hey","ho"),replacement=c("ho","hey"))
21 -
  #' mgsub("developer",pattern=c("e","p"),replacement=c("p","e"))
20 +
  #' mgsub("hey, ho", pattern = c("hey", "ho"), replacement = c("ho", "hey"))
21 +
  #' mgsub("developer", pattern = c("e", "p"), replacement = c("p", "e"))
22 22
  #' mgsub("The chemical Dopaziamine is fake",
23 -
  #'       pattern=c("dopa(.*?) ","fake"),
24 -
  #'       replacement=c("mega\\1 ","real"),
25 -
  #'       ignore.case=TRUE)
23 +
  #'       pattern = c("dopa(.*?) ", "fake"),
24 +
  #'       replacement = c("mega\\1 ", "real"),
25 +
  #'       ignore.case = TRUE)
26 26
27 -
  if(all(is.na(string))) return(string)
27 +
  if (all(is.na(string))) return(string)
28 28
  sna = !is.na(string)
29 -
  if(!is.logical(recycle)) stop("Recycle must be a boolean")
30 -
  if(!recycle & length(pattern) != length(replacement)){
29 +
  if (!is.logical(recycle)) stop("Recycle must be a boolean")
30 +
  if (!recycle & length(pattern) != length(replacement)) {
31 31
    stop("pattern and replacement vectors must be the same length")
32 32
  }
33 -
  if(length(replacement) > length(pattern)){
34 -
    warning("You provided more replacements than 
33 +
  if (length(replacement) > length(pattern)) {
34 +
    warning("You provided more replacements than
35 35
            search strings - some will be dropped")
36 36
    replacement = replacement[seq_along(pattern)]
37 37
  }
38 -
  if(recycle & length(pattern) != length(replacement)){
38 +
  if (recycle & length(pattern) != length(replacement)) {
39 39
    lp = length(pattern)
40 40
    lr = length(replacement)
41 -
    replacement = rep(replacement,ceiling(lp/lr))[seq_along(pattern)]
42 -
  } 
41 +
    replacement = rep(replacement, ceiling(lp / lr))[seq_along(pattern)]
42 +
  }
43 43
  result = vapply(X = string[sna],
44 44
                  FUN = worker,
45 45
                  FUN.VALUE = c(""),
46 46
                  USE.NAMES = FALSE,
47 -
                  pattern=pattern,
48 -
                  replacement=replacement,...)
47 +
                  pattern = pattern,
48 +
                  replacement = replacement, ...)
49 49
  string[sna] = result
50 50
  return(string)
51 51
}
52 52
53 -
worker = function(string,pattern,replacement,...){
53 +
worker = function(string, pattern, replacement, ...) {
54 54
  #' @title mgsub worker
55 -
  #' 
55 +
  #'
56 56
  #' @description The hard worker doing everything for mgsub
57 -
  #' 
57 +
  #'
58 58
  #' @param string a character vector where replacements are sought
59 59
  #' @param pattern Character string to be matched in the given character vector
60 -
  #' @param replacement Character string equal in length to pattern or of length 
60 +
  #' @param replacement Character string equal in length to pattern or of length
61 61
  #' one which are a replacement for matched pattern.
62 62
  #' @param \dots arguments to pass to regexpr family
63 -
  
64 -
  x0 = do.call(rbind,lapply(seq_along(pattern),
65 -
                            getMatches,
66 -
                            string=string,
67 -
                            pattern=pattern,...))
68 -
  x0 = matrix(x0[x0[,2] != -1,],ncol=4)
69 -
  uid = unique(x0[,1])
70 -
  if(nrow(x0)==0) return(string)
71 -
  if(length(unique(x0[,1])) == 1){
72 -
    return(fastReplace(string,pattern[uid],replacement[uid],...))
63 +
64 +
  x0 = do.call(rbind, lapply(seq_along(pattern),
65 +
                            get_matches,
66 +
                            string = string,
67 +
                            pattern = pattern, ...))
68 +
  x0 = matrix(x0[x0[, 2] != -1, ], ncol = 4)
69 +
  uid = unique(x0[, 1])
70 +
  if (nrow(x0) == 0) return(string)
71 +
  if (length(unique(x0[, 1])) == 1) {
72 +
    return(fast_replace(string, pattern[uid], replacement[uid], ...)) # nolint
73 73
  }
74 -
  if(nrow(x0) > 1){
75 -
    x = x0[order(x0[,3],decreasing = T),]
76 -
    x = filterOverlap(x)
77 -
    uid = unique(x[,1])
78 -
    if(length(uid) == 1){
79 -
      return(fastReplace(string,pattern[uid],replacement[uid],...))
74 +
  if (nrow(x0) > 1) {
75 +
    x = x0[order(x0[, 3], decreasing = T), ]
76 +
    x = filter_overlap(x) # nolint
77 +
    uid = unique(x[, 1])
78 +
    if (length(uid) == 1) {
79 +
      return(fast_replace(string, pattern[uid], replacement[uid], ...)) # nolint
80 80
    }
81 -
    x = x[order(x[,2]),] 
81 +
    x = x[order(x[, 2]), ]
82 82
  }
83 -
  for(i in nrow(x):1){
84 -
    s = x[i,2]
85 -
    e = x[i,4]
86 -
    p = pattern[x[i,1]]
87 -
    r = replacement[x[i,1]]
88 -
    pre = if(s > 1) substr(string,1,s-1) else ""
89 -
    r0 = sub(p,r,substr(string,s,e),...)
90 -
    end = if(e < nchar(string)) substr(string,e+1,nchar(string)) else ""
91 -
    string = paste0(pre,r0,end)
83 +
  for (i in rev(seq_len(nrow(x)))) {
84 +
    s = x[i, 2]
85 +
    e = x[i, 4]
86 +
    p = pattern[x[i, 1]]
87 +
    r = replacement[x[i, 1]]
88 +
    pre = if (s > 1) substr(string, 1, s - 1) else ""
89 +
    r0 = sub(p, r, substr(string, s, e), ...)
90 +
    end = if (e < nchar(string)) substr(string, e + 1, nchar(string)) else ""
91 +
    string = paste0(pre, r0, end)
92 92
  }
93 93
  return(string)
94 -
}

@@ -1,87 +1,86 @@
Loading
1 1
#' @export
2 2
3 -
mgsub_censor = function(string,pattern,censor="*",split=any(nchar(censor) > 1),seed=NULL,...){
3 +
mgsub_censor = function(string, pattern, censor="*", split=any(nchar(censor) > 1), seed=NULL, ...) {
4 4
  #' @title Safe, multiple censoring of text strings
5 -
  #' 
5 +
  #'
6 6
  #' @description \code{mgsub_censor} - A safe, simultaneous, multiple global string censoring
7 7
  #'  (replace matches with a censoring character like '*')
8 8
  #'
9 9
  #' @param string a character vector to censor
10 10
  #' @param pattern regular expressions used to identify where to censor
11 11
  #' @param censor character to use in censoring - see details
12 -
  #' @param split if a multicharacter censor pattern is provided, should it be 
12 +
  #' @param split if a multicharacter censor pattern is provided, should it be
13 13
  #' split to preserve original string length
14 14
  #' @param seed optional parameter to fix sampling of multicharacter censors
15 -
  #' @param \dots arguments to pass to \code{\link[base:grep]{regexpr}} / 
15 +
  #' @param \dots arguments to pass to \code{\link[base:grep]{regexpr}} /
16 16
  #' \code{\link[base:grep]{sub}}
17 17
  #' @rdname mgsub_censor
18 18
  #' @return Censored string.
19 -
  #' @details When censor is provided as a >1 length vector or as a multicharacter 
20 -
  #' string with split = TRUE, it will be sampled to return random censoring patterns. 
21 -
  #' This can be helpful if you want to create cartoonish swear censoring. If 
19 +
  #' @details When censor is provided as a >1 length vector or as a multicharacter
20 +
  #' string with split = TRUE, it will be sampled to return random censoring patterns.
21 +
  #' This can be helpful if you want to create cartoonish swear censoring. If
22 22
  #' needed, the randomization can be controlled with the seed argument.
23 -
  #' 
23 +
  #'
24 24
  #' @examples
25 -
  #' mgsub_censor("Flowers for a friend",pattern=c("low"),censor="*")
26 -
  
27 -
  if(all(is.na(string))) return(string)
25 +
  #' mgsub_censor("Flowers for a friend", pattern=c("low"), censor="*")
26 +
27 +
  if (all(is.na(string))) return(string)
28 28
  sna = !is.na(string)
29 -
  result = vapply(string[sna]
30 -
                 ,censor_worker
31 -
                 ,c("")
32 -
                 ,USE.NAMES = FALSE
33 -
                 ,pattern=pattern
34 -
                 ,censor=censor
35 -
                 ,split=split
36 -
                 ,seed=seed
37 -
                 ,...)
29 +
  result = vapply(string[sna],
30 +
                  censor_worker,
31 +
                  c(""),
32 +
                  USE.NAMES = FALSE,
33 +
                  pattern = pattern,
34 +
                  censor = censor,
35 +
                  split = split,
36 +
                  seed = seed,
37 +
                  ...)
38 38
  string[sna] = result
39 39
  return(string)
40 40
}
41 41
42 -
censor_worker = function(string,pattern,censor,split=any(nchar(censor) > 1),seed=NULL,...){
42 +
censor_worker = function(string, pattern, censor, split=any(nchar(censor) > 1), seed=NULL, ...) {
43 43
  #' @title mgsub_censor worker
44 -
  #' 
44 +
  #'
45 45
  #' @description The hard worker doing everything for mgsub_censor
46 -
  #' 
46 +
  #'
47 47
  #' @param string a character vector where replacements are sought
48 48
  #' @param pattern Character string to be matched in the given character vector
49 49
  #' @param censor character to use in censoring - see details
50 -
  #' @param split if a multicharacter censor pattern is provided, should it be 
50 +
  #' @param split if a multicharacter censor pattern is provided, should it be
51 51
  #' split to preserve original string length
52 52
  #' @param seed optional parameter to fix sampling of multicharacter censors
53 53
  #' @param \dots arguments to pass to regexpr family
54 -
  
55 -
  x0 = do.call(rbind,lapply(seq_along(pattern)
56 -
                           ,getMatches
57 -
                           ,string=string
58 -
                           ,pattern=pattern
59 -
                           ,...))
60 -
  x0 = matrix(x0[x0[,2] != -1,],ncol=4)
61 -
  uid = unique(x0[,1])
62 -
  if(nrow(x0)==0) return(string)
63 -
  if(nrow(x0) > 1){
64 -
    x = x0[order(x0[,3],decreasing = T),]
65 -
    x = filterOverlap(x)
66 -
    x = x[order(x[,2]),,drop=FALSE]
54 +
55 +
  x0 = do.call(rbind, lapply(seq_along(pattern),
56 +
                             get_matches,
57 +
                             string = string,
58 +
                             pattern = pattern,
59 +
                             ...))
60 +
  x0 = matrix(x0[x0[, 2] != -1, ], ncol = 4)
61 +
  if (nrow(x0) == 0) return(string)
62 +
  if (nrow(x0) > 1) {
63 +
    x = x0[order(x0[, 3], decreasing = T), ]
64 +
    x = filter_overlap(x) #nolint
65 +
    x = x[order(x[, 2]), , drop = FALSE]
67 66
  } else {
68 67
    x = x0
69 68
  }
70 -
  for(i in nrow(x):1){
71 -
    s = x[i,2]
72 -
    e = x[i,4]
73 -
    p = pattern[x[i,1]]
74 -
    if(split) censor = unlist(strsplit(censor,""))
75 -
    if(!is.null(seed)) set.seed(seed)
76 -
    r = if(length(censor) > 1){
77 -
      paste(sample(censor,x[i,3],replace=TRUE),collapse="")
69 +
  for (i in rev(seq_len(nrow(x)))) {
70 +
    s = x[i, 2]
71 +
    e = x[i, 4]
72 +
    p = pattern[x[i, 1]]
73 +
    if (split) censor = unlist(strsplit(censor, ""))
74 +
    if (!is.null(seed)) set.seed(seed)
75 +
    r = if (length(censor) > 1) {
76 +
      paste(sample(censor, x[i, 3], replace = TRUE), collapse = "")
78 77
    } else {
79 -
      paste(rep(censor,x[i,3]),collapse="")
78 +
      paste(rep(censor, x[i, 3]), collapse = "")
80 79
    }
81 -
    pre = if(s > 1) substr(string,1,s-1) else ""
82 -
    r0 = sub(p,r,substr(string,s,e),...)
83 -
    end = if(e < nchar(string)) substr(string,e+1,nchar(string)) else ""
84 -
    string = paste0(pre,r0,end)
80 +
    pre = if (s > 1) substr(string, 1, s - 1) else ""
81 +
    r0 = sub(p, r, substr(string, s, e), ...)
82 +
    end = if (e < nchar(string)) substr(string, e + 1, nchar(string)) else ""
83 +
    string = paste0(pre, r0, end)
85 84
  }
86 85
  return(string)
87 86
}

@@ -1,60 +1,60 @@
Loading
1 -
fastReplace = function(string,pattern,replacement,...){
1 +
fast_replace = function(string, pattern, replacement, ...) {
2 2
  #' @title Fast escape replace
3 -
  #' 
3 +
  #'
4 4
  #' @description Fast escape function for limited case where only one pattern
5 5
  #' provided actually matches anything
6 -
  #' 
6 +
  #'
7 7
  #' @param string a character vector where replacements are sought
8 8
  #' @param pattern Character string to be matched in the given character vector
9 -
  #' @param replacement Character string equal in length to pattern or of length 
9 +
  #' @param replacement Character string equal in length to pattern or of length
10 10
  #' one which are a replacement for matched pattern.
11 11
  #' @param \dots arguments to pass to gsub
12 -
  
13 -
  for(i in seq_along(pattern)){
14 -
    string = gsub(pattern[i],replacement[i],string,...)
12 +
13 +
  for (i in seq_along(pattern)) {
14 +
    string = gsub(pattern[i], replacement[i], string, ...)
15 15
  }
16 16
  return(string)
17 17
}
18 18
19 -
getMatches = function(string,pattern,i,...){
19 +
get_matches = function(string, pattern, i, ...) {
20 20
  #' @title Get all matches
21 -
  #' 
21 +
  #'
22 22
  #' @description Helper function to be used in a loop to check each pattern
23 23
  #' provided for matches
24 -
  #' 
24 +
  #'
25 25
  #' @param string a character vector where replacements are sought
26 26
  #' @param pattern Character string to be matched in the given character vector
27 27
  #' @param i an iterator provided by a looping function
28 28
  #' @param \dots arguments to pass to gregexpr
29 -
  
30 -
  tmp = gregexpr(pattern[i],string,...)
29 +
30 +
  tmp = gregexpr(pattern[i], string, ...)
31 31
  start = tmp[[1]]
32 -
  length = attr(tmp[[1]],"match.length")
33 -
  return(matrix(cbind(i,start,length,start+length-1),ncol=4))
32 +
  length = attr(tmp[[1]], "match.length")
33 +
  return(matrix(cbind(i, start, length, start + length - 1), ncol = 4))
34 34
}
35 35
36 -
filterOverlap = function(x){
36 +
filter_overlap = function(x) {
37 37
  #' @title Filter overlaps from matches
38 -
  #' 
38 +
  #'
39 39
  #' @description Helper function used to identify which results from gregexpr
40 40
  #' overlap other matches and filter out shorter, overlapped results
41 -
  #' 
42 -
  #' @param x Matrix of gregexpr results, 4 columns, index of column matched, 
43 -
  #' start of match, length of match, end of match. Produced exclusively from 
41 +
  #'
42 +
  #' @param x Matrix of gregexpr results, 4 columns, index of column matched,
43 +
  #' start of match, length of match, end of match. Produced exclusively from
44 44
  #' a worker function in mgsub
45 -
  for(i in nrow(x):2){
46 -
    s = x[i,2]
47 -
    ps = x[1:(i-1),2]
48 -
    e = x[i,4]
49 -
    pe = x[1:(i-1),4]
50 -
    if(any(ps <= s & pe >= s)){
51 -
      x = x[-i,]
45 +
  for (i in nrow(x):2) {
46 +
    s = x[i, 2]
47 +
    ps = x[1:(i - 1), 2]
48 +
    e = x[i, 4]
49 +
    pe = x[1:(i - 1), 4]
50 +
    if (any(ps <= s & pe >= s)) {
51 +
      x = x[-i, ]
52 52
      next
53 53
    }
54 -
    if(any(ps <= e & pe >= e)){
55 -
      x = x[-i,]
54 +
    if (any(ps <= e & pe >= e)) {
55 +
      x = x[-i, ]
56 56
      next
57 57
    }
58 58
  }
59 -
  return(matrix(x,ncol=4))
59 +
  return(matrix(x, ncol = 4))
60 60
}

Everything is accounted for!

No changes detected that need to be reviewed.
What changes does Codecov check for?
Lines, not adjusted in diff, that have changed coverage data.
Files that introduced coverage data that had none before.
Files that have missing coverage data that once were tracked.
Files Coverage
R 100.00%
Project Totals (3 files) 100.00%
Loading