juliasilge / tidytext
1
#' Wrapper around unnest_tokens for n-grams
2
#'
3
#' These functions are wrappers around `unnest_tokens( token = "ngrams" )`
4
#' and `unnest_tokens( token = "skip_ngrams" )` .
5
#'
6
#' @seealso
7
#' + [unnest_tokens()]
8
#'
9
#' @inheritParams tokenizers::tokenize_ngrams
10
#' @inheritParams tokenizers::tokenize_skip_ngrams
11
#' @inheritParams unnest_tokens
12
#'
13
#' @param ... Extra arguments passed on to \link[tokenizers]{tokenizers}
14
#'
15
#' @export
16
#' @rdname unnest_ngrams
17
#' @importFrom dplyr enquo
18
#'
19
#' @examples
20
#' library(dplyr)
21
#' library(janeaustenr)
22
#'
23
#' d <- tibble(txt = prideprejudice)
24
#'
25
#' d %>%
26
#'   unnest_ngrams(word, txt, n = 2)
27
#'
28
#' d %>%
29
#'   unnest_skip_ngrams(word, txt, n = 3, k = 1)
30
#'
31
unnest_ngrams <- function(
32
  tbl,
33
  output,
34
  input,
35
  n = 3L,
36
  n_min = n,
37
  ngram_delim = " ",
38
  format = c("text", "man", "latex", "html", "xml"),
39
  to_lower = TRUE,
40
  drop = TRUE,
41
  collapse = NULL,
42
  ...
43
){
44 1
  format <- arg_match(format)
45 1
  unnest_tokens(tbl,
46 1
                !! enquo(output),
47 1
                !! enquo(input),
48 1
                format = format,
49 1
                to_lower = to_lower,
50 1
                drop = drop,
51 1
                collapse = collapse,
52 1
                token = "ngrams",
53 1
                n = n,
54 1
                n_min = n_min,
55 1
                ngram_delim = ngram_delim,
56
                ...
57
  )
58
}
59

60
#' @export
61
#' @rdname unnest_ngrams
62
#' @importFrom dplyr enquo
63
unnest_skip_ngrams <- function(
64
  tbl,
65
  output,
66
  input,
67
  n_min = 1,
68
  n = 3,
69
  k = 1,
70
  format = c("text", "man", "latex", "html", "xml"),
71
  to_lower = TRUE,
72
  drop = TRUE,
73
  collapse = NULL,
74
  ...
75
){
76 1
  format <- arg_match(format)
77 1
  unnest_tokens(tbl,
78 1
                !! enquo(output),
79 1
                !! enquo(input),
80 1
                format = format,
81 1
                to_lower = to_lower,
82 1
                drop = drop,
83 1
                collapse = collapse,
84 1
                token = "skip_ngrams",
85 1
                n = n,
86 1
                n_min = n_min,
87 1
                k = k,
88
                ...
89
  )
90
}

Read our documentation on viewing source code .

Loading