juliasilge / tidytext
1
#' Wrapper around unnest_tokens for regular expressions
2
#'
3
#' This function is a wrapper around `unnest_tokens( token = "regex" )`.
4
#'
5
#' @seealso
6
#' + [unnest_tokens()]
7
#'
8
#' @inheritParams unnest_tokens
9
#' @inheritParams tokenizers::tokenize_regex
10
#'
11
#' @param ... Extra arguments passed on to \link[tokenizers]{tokenizers}
12
#'
13
#' @export
14
#' @importFrom dplyr enquo
15
#'
16
#' @examples
17
#' library(dplyr)
18
#' library(janeaustenr)
19
#'
20
#' d <- tibble(txt = prideprejudice)
21
#'
22
#' d %>%
23
#'   unnest_regex(word, txt, pattern = "Chapter [\\\\d]")
24
#'
25
unnest_regex <- function(
26
  tbl,
27
  output,
28
  input,
29
  pattern = "\\s+",
30
  format = c("text", "man", "latex", "html", "xml"),
31
  to_lower = TRUE,
32
  drop = TRUE,
33
  collapse = NULL,
34
  ...
35
){
36 1
  format <- arg_match(format)
37 1
  unnest_tokens(tbl,
38 1
                !! enquo(output),
39 1
                !! enquo(input),
40 1
                token = "regex",
41 1
                format = format,
42 1
                to_lower = to_lower,
43 1
                drop = drop,
44 1
                collapse = collapse,
45 1
                pattern = pattern,
46
                ...
47
  )
48
}

Read our documentation on viewing source code .

Loading