quanteda / spacyr
1
#' Initialize spaCy
2
#' 
3
#' Initialize spaCy to call from R. 
4
#' @return NULL
5
#' @param model Language package for loading spaCy. Example: \code{en_core_web_sm} (English) and
6
#' \code{de_core_web_sm} (German). Default is \code{en_core_web_sm}.
7
#' @param python_executable the full path to the Python executable, for which
8
#'   spaCy is installed
9
#' @param ask logical; if \code{FALSE}, use the first spaCy installation found;
10
#'   if \code{TRUE}, list available spaCy installations and prompt the user for
11
#'   which to use. If another (e.g. \code{python_executable}) is set, then this
12
#'   value will always be treated as \code{FALSE}.
13
#' @param virtualenv set a path to the Python virtual environment with spaCy
14
#'   installed Example: \code{virtualenv = "~/myenv"}
15
#' @param condaenv set a path to the anaconda virtual environment with spaCy
16
#'   installed Example: \code{condalenv = "myenv"}
17
#' @param entity logical; if \code{FALSE} is selected, named entity recognition
18
#'   is turned off in spaCy. This will speed up the parsing as it will exclude
19
#'   \code{ner} from the pipeline. For details of spaCy pipeline, see
20
#'   \url{https://spacy.io/usage/processing-pipelines}. The option \code{FALSE}
21
#'   is available only for spaCy version 2.0.0 or higher.
22
#' @param check_env logical; check whether conda/virtual environment generated
23
#'   by \code{spacyr_istall()} exists
24
#' @param refresh_settings logical; if \code{TRUE}, spacyr will ignore the saved
25
#'   settings in the profile and initiate a search of new settings.
26
#' @param save_profile logical; if \code{TRUE}, the current spaCy setting will
27
#'   be saved for the future use.
28
#' @export
29
#' @author Akitaka Matsuo
30
spacy_initialize <- function(model = "en_core_web_sm",
31
                             python_executable = NULL,
32
                             virtualenv = NULL,
33
                             condaenv = NULL,
34
                             ask = FALSE,
35
                             refresh_settings = FALSE,
36
                             save_profile = FALSE,
37
                             check_env = TRUE,
38
                             entity = TRUE) {
39

40
    # here are a number of checkings
41 2
    if (!is.null(options("spacy_initialized")$spacy_initialized)) {
42 2
        message("spaCy is already initialized")
43 2
        return(NULL)
44
    }
45

46
    # once python is initialized, you cannot change the python executables
47 2
    if (!is.null(options("python_initialized")$python_initialized)) {
48 2
        message("Python space is already attached.  If you want to switch to a different Python, please restart R.")
49
    }
50
    # NEW: if spacy_condaenv exists use it
51
    else {
52 2
        set_spacy_python_option(python_executable,
53 2
                                virtualenv,
54 2
                                condaenv,
55 2
                                check_env,
56 2
                                refresh_settings,
57 2
                                ask,
58 2
                                model)
59
    }
60

61
    ## check settings and start reticulate python
62 2
    settings <- check_spacy_python_options()
63 2
    if (!is.null(settings)) {
64
        ####
65 2
        if (settings$key == "spacy_python_executable") {
66 0
            if (check_spacy_model(settings$val, model) != "OK") {
67 0
                stop("spaCy or language model ", model, " is not installed in ", settings$val)
68
            }
69 0
            reticulate::use_python(settings$val, required = TRUE)
70
        }
71 0
        else if (settings$key == "spacy_virtualenv") reticulate::use_virtualenv(settings$val, required = TRUE)
72 2
        else if (settings$key == "spacy_condaenv") {
73 2
            reticulate::use_condaenv(settings$val, required = TRUE)
74
        }
75
    }
76 2
    options("python_initialized" = TRUE) # next line could cause non-recoverable error
77 2
    spacyr_pyexec(pyfile = system.file("python", "spacyr_class.py",
78 2
                                       package = "spacyr"))
79

80 2
    spacyr_pyassign("model", model)
81 2
    spacyr_pyassign("spacy_entity", entity)
82 2
    options("spacy_entity" = entity)
83 2
    spacyr_pyexec(pyfile = system.file("python", "initialize_spacyPython.py",
84 2
                                       package = "spacyr"))
85

86 2
    spacy_version <- spacyr_pyget("spacy_version")
87 2
    if (entity == FALSE && as.integer(substr(spacy_version, 1, 1)) < 2){
88 0
        message("entity == FALSE is only available for spaCy version 2.0.0 or higher")
89 0
        options("spacy_entity" = TRUE)
90
    }
91 2
    message("successfully initialized (spaCy Version: ", spacy_version, ", language model: ", model, ")")
92 2
    settings <- check_spacy_python_options()
93 2
    message('(python options: type = "', sub("spacy_", "", settings$key), '", value = "', settings$val, '")')
94 2
    options("spacy_initialized" = TRUE)
95

96 2
    if (save_profile == TRUE){
97 0
        save_spacy_options(settings$key, settings$val)
98
    }
99
}
100

101
#' Finalize spaCy
102
#' 
103
#' While running spaCy on Python through R, a Python process is always running
104
#' in the background and Rsession will take up a lot of memory (typically over
105
#' 1.5GB). \code{spacy_finalize()} terminates the Python process and frees up
106
#' the memory it was using.
107
#' @return NULL
108
#' @export
109
#' @author Akitaka Matsuo
110
spacy_finalize <- function() {
111 2
    if (is.null(getOption("spacy_initialized"))) {
112 0
        stop("Nothing to finalize. spaCy is not initialized")
113
    }
114 2
    spacyr_pyexec(pyfile = system.file("python", "finalize_spacyPython.py",
115 2
                                       package = "spacyr"))
116 2
    options("spacy_initialized" = NULL)
117
}
118

119
#' Find spaCy
120
#' 
121
#' Locate the user's version of Python for which spaCy installed.
122
#' @return spacy_python
123
#' @export
124
#' @param model name of the language model
125
#' @param ask logical; if \code{FALSE}, use the first spaCy installation found; 
126
#'   if \code{TRUE}, list available spaCy installations and prompt the user 
127
#'   for which to use. If another (e.g. \code{python_executable}) is set, then 
128
#'   this value will always be treated as \code{FALSE}.
129
#'  
130
#' @keywords internal
131
#' @importFrom data.table data.table
132
find_spacy <- function(model = "en_core_web_sm", ask){
133 2
    spacy_found <- `:=` <- NA
134 2
    spacy_python <- NULL
135 2
    options(warn = -1)
136 2
    py_execs <- if (is_windows()) {
137 0
        system2("where", "python", stdout = TRUE)
138 2
    } else if (is_osx() && file.exists("~/.bash_profile")) {
139 0
        c(system2("source", "~/.bash_profile; which -a python", stdout = TRUE),
140 0
          system2("source", "~/.bash_profile; which -a python3", stdout = TRUE))
141
    } else {
142 2
        c(system2("which", "-a python", stdout = TRUE),
143 2
          system2("which", "-a python3", stdout = TRUE))
144
    }
145 2
    py_execs <- unique(py_execs)
146 2
    options(warn = 0)
147

148 2
    if (length(py_execs) == 0 | grepl("not find", py_execs[1])[1]){
149 0
        return(NA)
150
    }
151 2
    df_python_check <- data.table::data.table(py_execs, spacy_found = 0)
152 2
    for (i in 1:nrow(df_python_check)) {
153 2
        py_exec <- df_python_check[i, py_execs]
154 2
        sys_message <- check_spacy_model(py_exec, model)
155 2
        if (sys_message == "OK") {
156 2
            df_python_check[i, spacy_found := 1]
157
        }
158
    }
159

160 2
    if (df_python_check[, sum(spacy_found)] == 0) {
161 0
        return(NULL)
162 2
    } else if (df_python_check[, sum(spacy_found)] == 1) {
163 0
        spacy_python <- df_python_check[spacy_found == 1, py_execs]
164 0
        message("spaCy (language model: ", model, ") is installed in ", spacy_python)
165 2
    } else if (ask == FALSE) {
166 2
        spacy_python <- df_python_check[spacy_found == 1, py_execs][1]
167 2
        message("spaCy (language model: ", model, ") is installed in more than one python")
168 2
        message("spacyr will use ", spacy_python, " (because ask = FALSE)")
169
    } else {
170 0
        spacy_pythons <- df_python_check[spacy_found == 1, py_execs]
171 0
        message("spaCy (language model: ", model, ") is installed in more than one python")
172 0
        number <- utils::menu(spacy_pythons, title = "Please select python:")
173 0
        if (number == 0) {
174 0
            stop("Initialization was canceled by user", call. = FALSE)
175
        }
176 0
        spacy_python <- spacy_pythons[number]
177 0
        message("spacyr will use: ", spacy_python)
178
    }
179 2
    return(spacy_python)
180
}
181

182

183
#' Find spaCy env
184
#' 
185
#' check whether conda/virtual environment for spaCy exists
186
#' @export
187
#'  
188
#' @keywords internal
189
find_spacy_env <- function(){
190 2
    if (is.null(tryCatch(reticulate::conda_binary("auto"), error = function(e) NULL))){
191 0
        return(FALSE)
192
    }
193 2
    found <- if ("spacy_condaenv" %in% reticulate::conda_list(conda = "auto")$name) {
194 2
        TRUE
195 2
    } else if (file.exists(file.path("~/.virtualenvs", "spacy_virtualenv", "bin", "activate"))) {
196 0
        TRUE
197
    } else {
198 0
        FALSE
199
    }
200 2
    return(found)
201
}
202
    
203

204
check_spacy_model <- function(py_exec, model) {
205 2
    options(warn = -1)
206 2
    py_exist <- if (is_windows()) {
207 0
        if (py_exec %in% system2("where", "python", stdout = TRUE)) {
208 0
            py_exec
209
        } else {
210 0
            NULL
211
        }
212
    } else {
213 2
        system2("which", py_exec, stdout = TRUE)
214
    }
215

216 2
    if (length(py_exist) == 0) {
217 2
        stop(py_exec, " is not a python executable")
218
    }
219 2
    tryCatch({
220 2
        sys_message <-
221 2
            system2(py_exec, c(sprintf("-c \"import spacy; spacy.load('%s'); print('OK')\"", model)),
222 2
                    stderr = TRUE, stdout = TRUE)
223
    })
224 2
    options(warn = 0)
225 2
    return(paste(sys_message, collapse = " "))
226
}
227

228

229
set_spacy_python_option <- function(python_executable = NULL,
230
                                    virtualenv = NULL,
231
                                    condaenv = NULL,
232
                                    check_env = TRUE,
233
                                    refresh_settings = FALSE,
234
                                    ask = NULL,
235
                                    model = NULL) {
236 2
    if (refresh_settings) clear_spacy_options()
237

238 2
    if (!is.null(check_spacy_python_options())) {
239 0
        settings <- check_spacy_python_options()
240 0
        message("spacy python option is already set, spacyr will use:\n\t",
241 0
                sub("spacy_", "", settings$key), ' = "', settings$val, '"')
242
    }
243
    # a user can specify only one
244 2
    else if (sum(!is.null(c(python_executable, virtualenv, condaenv))) > 1) {
245 0
        stop(paste("Too many python environments are specified, please select only one",
246 0
                   "from python_executable, virtualenv, and condaenv"))
247
    }
248
    # give warning when nothing is specified
249 2
    else if (sum(!is.null(c(python_executable, virtualenv, condaenv))) == 1){
250 2
        if (!is.null(python_executable)) {
251 2
            if (check_spacy_model(python_executable, model) != "OK"){
252 0
                stop("spaCy or language model ", model, " is not installed in ", python_executable)
253
            }
254 0
            clear_spacy_options()
255 0
            options(spacy_python_executable = python_executable)
256
        }
257 0
        else if (!is.null(virtualenv)) {
258 0
            clear_spacy_options()
259 0
            options(spacy_virtualenv = virtualenv)
260
        }
261 0
        else if (!is.null(condaenv)) {
262 0
            clear_spacy_options()
263 0
            options(spacy_condaenv = condaenv)
264
        }
265
    }
266 2
    else if (check_env &&
267 2
              !(is.null(tryCatch(reticulate::conda_binary("auto"), error = function(e) NULL))) &&
268 2
              "spacy_condaenv" %in% reticulate::conda_list(conda = "auto")$name) {
269 2
        message("Found 'spacy_condaenv'. spacyr will use this environment")
270 2
        clear_spacy_options()
271 2
        options(spacy_condaenv = "spacy_condaenv")
272
    }
273 0
    else if (check_env && file.exists(file.path("~/.virtualenvs", "spacy_virtualenv", "bin", "activate"))) {
274 0
        message("Found 'spacy_virtualenv'. spacyr will use this environment")
275 0
        clear_spacy_options()
276 0
        options(spacy_virtualenv = "~/.virtualenvs/spacy_virtualenv")
277
    }
278
    else {
279 0
        message("Finding a python executable with spaCy installed...")
280 0
        spacy_python <- find_spacy(model, ask = ask)
281 0
        if (is.null(spacy_python)) {
282 0
            stop("spaCy or language model ", model, " is not installed in any of python executables.")
283 0
        } else if (is.na(spacy_python)) {
284 0
            stop("No python was found on system PATH")
285
        } else {
286 0
            options(spacy_python_executable = spacy_python)
287
        }
288
    }
289 2
    return(NULL)
290
}
291

292
clear_spacy_options <- function(){
293 2
    options(spacy_python_executable = NULL)
294 2
    options(spacy_condaenv = NULL)
295 2
    options(spacy_virtualenv = NULL)
296
}
297

298
check_spacy_python_options <- function() {
299 2
    settings <- NULL
300 2
    for (k in c("spacy_python_executable",
301 2
               "spacy_condaenv",
302 2
               "spacy_virtualenv")) {
303 2
        if (!is.null(getOption(k))) {
304 2
            settings$key <- k
305 2
            settings$val <- getOption(k)
306
        }
307
    }
308 2
    return(settings)
309
}
310

311
save_spacy_options <- function(key, val, prompt = TRUE) {
312 0
    prof_file <- "~/.Rprofile"
313 0
    if (!is.null(getOption("spacy_prompt"))) prompt <- getOption("spacy_prompt")
314

315 0
    ans <- if (prompt) {
316 0
        utils::menu(c("No", "Yes"),
317 0
                    title = sprintf('Do you want to set the option, \'%s = "%s"\' , as a default (y|[n])? ', key, val))
318 0
    } else 2
319 0
    if (ans == 2) {
320 0
        rprofile <- if (file.exists(prof_file)) readLines(prof_file) else NULL
321 0
        rprofile <- grep("options\\(\\s*spacy_.+\\)", rprofile, value = TRUE, invert = TRUE)
322 0
        rprofile <- c(rprofile, sprintf('options(%s = "%s")', key, val))
323 0
        write(rprofile, file = prof_file)
324 0
        message("The option was saved. The option will be used in spacy_initialize() in future")
325
    } else {
326 0
        message("The option was not saved (user cancelled)")
327
    }
328
}

Read our documentation on viewing source code .

Loading