juliasilge / tidytext
1
#' Wrapper around unnest_tokens for Penn Treebank Tokenizer
2
#'
3
#' This function is a wrapper around `unnest_tokens( token = "ptb" )`.
4
#'
5
#' @seealso
6
#' + [unnest_tokens()]
7
#'
8
#' @inheritParams unnest_tokens
9
#' @inheritParams tokenizers::tokenize_ptb
10
#'
11
#' @param ... Extra arguments passed on to \link[tokenizers]{tokenizers}
12
#'
13
#' @export
14
#' @importFrom dplyr enquo
15
#'
16
#' @examples
17
#' library(dplyr)
18
#' library(janeaustenr)
19
#'
20
#' d <- tibble(txt = prideprejudice)
21
#'
22
#' d %>%
23
#'   unnest_ptb(word, txt)
24
#'
25
unnest_ptb <- function(
26
  tbl,
27
  output,
28
  input,
29
  format = c("text", "man", "latex", "html", "xml"),
30
  to_lower = TRUE,
31
  drop = TRUE,
32
  collapse = NULL,
33
  ...
34
){
35 1
  format <- arg_match(format)
36 1
  unnest_tokens(tbl,
37 1
                !! enquo(output),
38 1
                !! enquo(input),
39 1
                format = format,
40 1
                to_lower = to_lower,
41 1
                drop = drop,
42 1
                collapse = collapse,
43 1
                token = "ptb",
44
                ...
45
  )
46
}

Read our documentation on viewing source code .

Loading