ropensci / taxize

Compare 6103549 ... +7 ... e14f735

Coverage Reach
classification.R class2tree.R get_eolid.R get_uid.R get_wormsid.R get_boldid.R get_wiki.R zzz.R taxize_cite.R get_tolid.R get_natservid.R get_gbifid.R get_tsn.R synonyms.R get_pow.R get_nbnid.R get_tpsid.R get_col.R id2name.R children.R ping.R sci2comm.R gnr_resolve.R ncbi_children.R downstream.R comm2sci.R get_iucn.R tax_name.R lowest_common.R worms_downstream.R genbank2uid.R gbif_downstream.R apg.R gbif_helpers.R itis_downstream.R progressor.R eol_pages.R iucn_summary.R bold_search.R ncbi_downstream.R tax_rank.R get_ids.R limited_print.R bold_downstream.R ipni_search.R ncbi_get_taxon_summary.R fungorum.R scrapenames.r gbif_name_usage.R eubon.R taxon_state.R http.R tpl_get.r getkey.R vascan_search.r gni_details.R pow_search.R tax_agg.R upstream.R eol_search.R apg_lookup.R gni_parse.R get_utils.R tol_id2name.R gni_search.R tp_search.R tp_accnames.R rename.R gn_parse.R key_helpers.R tp_synonyms.R tp_summary.R tpl_families.r eol_dataobjects.R itis_hierarchy.R tp_dist.R resolve.R names_list.r ion.R taxize_options.R downstream-utils.R iucn_id.R tp_refs.R plantminer.R nbn_search.R gbif_parse.R iplant_resolve.R taxize_capwords.r itis_acceptname.R itis_terms.R aaa.R rankagg.R eubon_children.R eubon_hierarchy.R iucn_getname.R itis_getrecord.R itis_lsid.R itis_native.R taxize_ldfast.R itis_kingdomnames.R gnr_datasources.R col.R onload.R nbn_classification.R status_codes.R eubon_capabilities.R itis_taxrank.R gni_helpers.R ncbi_getbyname.R get_ubioid.R ncbi_search.R nbn_synonyms.R tol_resolve.R ncbi_getbyid.R phylomatic_tree.R gisd_isinvasive.R ubio_classification_search.R tp_classification.R tnrs_sources.r ubio_search.R tpl_search.r tnrs.R ubio_classification.R eol_invasive.R eol_utiils.R itis_refs.R itis_name.R ubio_synonyms.R eol_hierarchy.R phylomatic_format.R ubio_id.R

No flags found

Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.

e.g., #unittest #integration

#production #enterprise

#frontend #backend

Learn more about Codecov Flags here.

Showing 13 of 49 files from the diff.
Newly tracked file
R/get_col.R created.
Newly tracked file
R/taxize_options.R created.

@@ -274,10 +274,15 @@
Loading
274 274
275 275
ncbi_rate_limit_pause <- function(key) {
276 276
  # NCBI limits requests to three per second when no key and ten per second with key
277 +
  ncbi_sleep <- taxize_env$options$ncbi_sleep_sec
277 278
  if (is.null(key)) {
278 -
    Sys.sleep(0.334)
279 +
    tmz <- c(ncbi_sleep, 0.334)
280 +
    time_sleep <- tmz[which.max(tmz)]
281 +
    Sys.sleep(time_sleep)
279 282
  } else {
280 -
    Sys.sleep(0.101)
283 +
    tmz <- c(ncbi_sleep, 0.101)
284 +
    time_sleep <- tmz[which.max(tmz)]
285 +
    Sys.sleep(tmz)
281 286
  }
282 287
}
283 288

@@ -0,0 +1,284 @@
Loading
1 +
#' Get IDs from the Catalogue of Life
2 +
#'
3 +
#' @export
4 +
#' @param sci (character) one or more scientific names. Or, a `taxon_state`
5 +
#' object (see [taxon-state])
6 +
#' @param ask logical; should get_col be run in interactive mode?
7 +
#' If TRUE and more than one ID is found for the species, the user is
8 +
#' asked for input. If `FALSE` NA is returned for multiple matches.
9 +
#' @param messages logical; If TRUE the actual taxon queried is printed on the
10 +
#' console.
11 +
#' @param rows numeric; Any number from 1 to infinity. If the default NA, all
12 +
#' rows are considered. Note that this function still only gives back a col
13 +
#' class object with one to many identifiers. See `get_col_()` to get back all,
14 +
#' or a subset, of the raw data that you are presented during the ask process.
15 +
#' @param minRank,maxRank (character) filter by rank. See [rcol::cp_nu_search()]
16 +
#' for options
17 +
#' @param status (character) filter by taxonomic status. one of: accepted,
18 +
#' doubtful, ambiguous synonym
19 +
#' @param fuzzy (logical) `TRUE` or `FALSE`. default: `NULL`
20 +
#' @param x Input to [as.col()]
21 +
#' @param limit (numeric) number of records to return, applies to `get_col_`
22 +
#' only
23 +
#' @param check logical; Check if ID matches any existing on the DB, only used
24 +
#' in `as.col()`
25 +
#' @param ... Ignored
26 +
#' @template getreturn
27 +
#' @family taxonomic-ids
28 +
#' @seealso [classification()]
29 +
#' @details Internally we use [rcol::cp_nu_search()]
30 +
#' @examples \dontrun{
31 +
#' get_col(sci='Poa annua')
32 +
#' get_col(sci='Pinus contorta')
33 +
#' get_col(sci='Puma concolor')
34 +
#' 
35 +
#' #lots of queries
36 +
#' spp <- names_list("species", 10)
37 +
#' res <- get_col(spp)
38 +
#' res
39 +
#' xx <- taxon_last()
40 +
#' xx
41 +
#'
42 +
#' # multiple names
43 +
#' get_col(c("Poa annua", "Pinus contorta"))
44 +
#'
45 +
#' # specify rows to limit choices available
46 +
#' get_col(sci='Satyrium', status = NULL)
47 +
#' get_col(sci='Satyrium', rows=10)
48 +
#' get_col(sci='Satyrium', rows=1:3)
49 +
#'
50 +
#' # When not found, NA given
51 +
#' get_col(sci="uaudnadndj")
52 +
#' get_col(c("Chironomus riparius", "uaudnadndj"))
53 +
#'
54 +
#' # Narrow down results to a division or rank, or both
55 +
#' ## Satyrium example
56 +
#' ### Results w/o narrowing
57 +
#' get_col("Satyrium")
58 +
#' ### w/ phylum
59 +
#' get_col("Satyrium", phylum = "Tracheophyta")
60 +
#' get_col("Satyrium", phylum = "Arthropoda")
61 +
#' ### w/ phylum & rank
62 +
#' get_col("Satyrium", phylum = "Arthropoda", rank = "genus")
63 +
#'
64 +
#' ## min/max rank example
65 +
#' get_col("Poa", rank = "genus")
66 +
#' get_col("Poa", family = "Thripidae")
67 +
#'
68 +
#' # Fuzzy filter
69 +
#' get_col("A*", fuzzy = FALSE)
70 +
#' get_col("A*", fuzzy = TRUE)
71 +
#'
72 +
#' # Convert a uid without class information to a uid class
73 +
#' as.col(get_col("Poa annua")) # already a uid, returns the same
74 +
#' as.col(get_col(c("Poa annua","Puma concolor"))) # same
75 +
#' as.col(2704179) # numeric
76 +
#' as.col(c(2704179,2435099,3171445)) # numeric vector, length > 1
77 +
#' as.col("2704179") # character
78 +
#' as.col(c("2704179","2435099","3171445")) # character vector, length > 1
79 +
#' as.col(list("2704179","2435099","3171445")) # list, either numeric or character
80 +
#' ## dont check, much faster
81 +
#' as.col("2704179", check=FALSE)
82 +
#' as.col(2704179, check=FALSE)
83 +
#' as.col(2704179, check=FALSE)
84 +
#' as.col(c("2704179","2435099","3171445"), check=FALSE)
85 +
#' as.col(list("2704179","2435099","3171445"), check=FALSE)
86 +
#'
87 +
#' (out <- as.col(c(2704179,2435099,3171445)))
88 +
#' data.frame(out)
89 +
#' as.uid( data.frame(out) )
90 +
#'
91 +
#' # Get all data back
92 +
#' get_col_("Puma concolor")
93 +
#' get_col_(c("Pinus", "uaudnadndj"))
94 +
#' get_col_(c("Pinus", "Puma"), rows=5)
95 +
#' get_col_(c("Pinus", "Puma"), rows=1:5)
96 +
#'
97 +
#' # use curl options
98 +
#' invisible(get_col("Quercus douglasii", verbose = TRUE))
99 +
#' }
100 +
get_col <- function(sci, ask = TRUE, messages = TRUE, rows = NA,
101 +
  status = "accepted", minRank = NULL, maxRank = NULL, fuzzy = FALSE, ...) {
102 +
103 +
  assert(sci, c("character", "taxon_state"))
104 +
  assert(ask, "logical")
105 +
  assert(messages, "logical")
106 +
  assert(status, "character")
107 +
  assert(minRank, "character")
108 +
  assert(maxRank, "character")
109 +
  assert(fuzzy, "logical")
110 +
  assert_rows(rows)
111 +
112 +
  if (inherits(sci, "character")) {
113 +
    tstate <- taxon_state$new(class = "col", names = sci)
114 +
    items <- sci
115 +
  } else {
116 +
    assert_state(sci, "col")
117 +
    tstate <- sci
118 +
    sci <- tstate$taxa_remaining()
119 +
    items <- c(sci, tstate$taxa_completed())
120 +
  }
121 +
122 +
  prog <- progressor$new(items = items, suppress = !messages)
123 +
  done <- tstate$get()
124 +
  for (i in seq_along(done)) prog$completed(names(done)[i], done[[i]]$att)
125 +
  prog$prog_start()
126 +
127 +
  for (i in seq_along(sci)) {
128 +
    direct <- FALSE
129 +
    mssg(messages, "\nRetrieving data for taxon '", sci[i], "'\n")
130 +
    res <- rcol::cp_nu_search(q=sci[i], status = status,
131 +
      minRank = minRank, maxRank = maxRank,
132 +
      fuzzy = fuzzy, dataset_key = "3LR", limit = 100)
133 +
    df <- res$result
134 +
    mm <- NROW(df) > 1
135 +
136 +
    if (NROW(df) == 0) df <- data.frame(NULL)
137 +
138 +
    if (NROW(df) == 0) {
139 +
      mssg(messages, m_not_found_sp_altclass)
140 +
      id <- NA_character_
141 +
      att <- "not found"
142 +
    } else {
143 +
      usage <- df$usage$name
144 +
      df <- cbind(id=df$id, usage[,c("scientificName", "rank")],
145 +
        status=df$usage$status)
146 +
      id <- df$id
147 +
      att <- "found"
148 +
    }
149 +
150 +
    # not found
151 +
    if (length(id) == 0) {
152 +
      mssg(messages, m_not_found_sp_altclass)
153 +
      id <- NA_character_
154 +
      att <- "not found"
155 +
    }
156 +
157 +
    if (length(id) > 1) {
158 +
      # check for exact match
159 +
      matchtmp <- df[as.character(df$scientificName) %in% sci[i], "id"]
160 +
      if (length(matchtmp) == 1) {
161 +
        id <- as.character(matchtmp)
162 +
        direct <- TRUE
163 +
      } else {
164 +
        df <- sub_rows(df, rows)
165 +
        if (NROW(df) == 0) {
166 +
          id <- NA_character_
167 +
          att <- "not found"
168 +
        } else {
169 +
          id <- df$id
170 +
          if (length(id) == 1) {
171 +
            rank_taken <- as.character(df$rank)
172 +
            att <- "found"
173 +
          }
174 +
        }
175 +
176 +
        # more than one found -> user input
177 +
        if (length(id) > 1) {
178 +
          if (ask) {
179 +
            # prompt
180 +
            message("\n\n")
181 +
            message("\nMore than one COL ID found for taxon '", sci[i], "'!\n
182 +
            Enter rownumber of taxon (other inputs will return 'NA'):\n")
183 +
            rownames(df) <- 1:NROW(df)
184 +
            print(df)
185 +
            take <- scan(n = 1, quiet = TRUE, what = 'raw')
186 +
187 +
            if (length(take) == 0) {
188 +
              take <- 'notake'
189 +
              att <- 'nothing chosen'
190 +
            }
191 +
            if (take %in% seq_len(NROW(df))) {
192 +
              take <- as.numeric(take)
193 +
              message("Input accepted, took id '",
194 +
                      as.character(df$id[take]), "'.\n")
195 +
              id <- as.character(df$id[take])
196 +
              att <- "found"
197 +
            } else {
198 +
              id <- NA_character_
199 +
              att <- "not found"
200 +
              mssg(messages, "\nReturned 'NA'!\n\n")
201 +
            }
202 +
          } else {
203 +
            if (length(id) != 1) {
204 +
              warning(sprintf(m_more_than_one_found, "col", sci[i]),
205 +
                call. = FALSE)
206 +
              id <- NA_character_
207 +
              att <- m_na_ask_false
208 +
            }
209 +
          }
210 +
        }
211 +
      }
212 +
    }
213 +
    res <- list(id = id, att = att, multiple = mm, direct = direct)
214 +
    prog$completed(sci[i], att)
215 +
    prog$prog(att)
216 +
    tstate$add(sci[i], res)
217 +
  }
218 +
  out <- tstate$get()
219 +
  ids <- structure(as.character(unlist(pluck(out, "id"))), class = "col",
220 +
                   match = pluck_un(out, "att", ""),
221 +
                   multiple_matches = pluck_un(out, "multiple", logical(1)),
222 +
                   pattern_match = pluck_un(out, "direct", logical(1)))
223 +
  on.exit(prog$prog_summary(), add = TRUE)
224 +
  on.exit(tstate$exit, add = TRUE)
225 +
  add_uri(ids, get_url_templates$col)
226 +
}
227 +
228 +
#' @export
229 +
#' @rdname get_col
230 +
as.col <- function(x, check=FALSE) UseMethod("as.col")
231 +
232 +
#' @export
233 +
#' @rdname get_col
234 +
as.col.col <- function(x, check=FALSE) x
235 +
236 +
#' @export
237 +
#' @rdname get_col
238 +
as.col.character <- function(x, check=TRUE) if(length(x) == 1) make_col(x, check) else collapse(x, make_col, "col", check=check)
239 +
240 +
#' @export
241 +
#' @rdname get_col
242 +
as.col.list <- function(x, check=TRUE) if(length(x) == 1) make_col(x, check) else collapse(x, make_col, "col", check=check)
243 +
244 +
#' @export
245 +
#' @rdname get_col
246 +
as.col.data.frame <- function(x, check = TRUE) {
247 +
  structure(x$ids, class = "col", match = x$match,
248 +
            multiple_matches = x$multiple_matches,
249 +
            pattern_match = x$pattern_match, uri = x$uri)
250 +
}
251 +
252 +
#' @export
253 +
#' @rdname get_col
254 +
as.data.frame.col <- function(x, ...){
255 +
  data.frame(ids = as.character(unclass(x)),
256 +
             class = "col",
257 +
             match = attr(x, "match"),
258 +
             multiple_matches = attr(x, "multiple_matches"),
259 +
             pattern_match = attr(x, "pattern_match"),
260 +
             uri = attr(x, "uri"),
261 +
             stringsAsFactors = FALSE)
262 +
}
263 +
264 +
make_col <- function(x, check=TRUE) make_generic(x, get_url_templates$col, "col", check)
265 +
266 +
check_col <- function(x){
267 +
  tryid <- tryCatch(rcol::cp_ds("{key}/taxon/{id}", key = "3LR", id = x),
268 +
    error = function(e) e)
269 +
  if ( "error" %in% class(tryid) && is.null(tryid$id) ) FALSE else TRUE
270 +
}
271 +
272 +
#' @export
273 +
#' @rdname get_col
274 +
get_col_ <- function(sci, messages = TRUE, limit = 1000) {
275 +
  stats::setNames(lapply(sci, get_col_help, messages = messages,
276 +
    limit = limit), sci)
277 +
}
278 +
279 +
get_col_help <- function(sci, messages, limit) {
280 +
  mssg(messages, "\nRetrieving data for taxon '", sci, "'\n")
281 +
  df <- rcol::cp_nu_search(q=sci, dataset_key = "3LR", limit = limit)
282 +
  if (!is.null(df)) df <- nmslwr(df)
283 +
  return(df)
284 +
}

@@ -62,5 +62,6 @@
Loading
62 62
  itis = "https://www.itis.gov/servlet/SingleRpt/SingleRpt?search_topic=TSN&search_value=%s",
63 63
  nbn = "https://species.nbnatlas.org/species/%s",
64 64
  natserv = "https://explorer.natureserve.org/Taxon/ELEMENT_GLOBAL.2.%s",
65 -
  iucn = iucn_base_url
65 +
  iucn = iucn_base_url,
66 +
  col = "https://data.catalogueoflife.org/dataset/3LR/taxon/%s"
66 67
)

@@ -15,6 +15,10 @@
Loading
15 15
#' @section HTTP version:
16 16
#' We hard code `http_version = 2L` to use HTTP/1.1 in HTTP requests to
17 17
#' the Entrez API. See `curl::curl_symbols('CURL_HTTP_VERSION')` 
18 +
#' 
19 +
#' @section Rate limits:
20 +
#' In case you run into errors due to your rate limit being exceeded, see
21 +
#' [taxize_options()], where you can set `ncbi_sleep`.
18 22
#'
19 23
#' @return one or more NCBI taxonomic IDs
20 24
#' @examples \dontrun{

@@ -29,6 +29,10 @@
Loading
29 29
#' @section HTTP version for NCBI requests:
30 30
#' We hard code `http_version = 2L` to use HTTP/1.1 in HTTP requests to
31 31
#' the Entrez API. See `curl::curl_symbols('CURL_HTTP_VERSION')` 
32 +
#' 
33 +
#' @section Rate limits:
34 +
#' In case you run into errors due to your rate limit being exceeded, see
35 +
#' [taxize_options()], where you can set `ncbi_sleep`.
32 36
#'
33 37
#' @author Scott Chamberlain
34 38
#' @examples \dontrun{

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Learn more Showing 3 files with coverage changes found.

New file R/taxize_options.R
New
Loading file...
New file R/get_col.R
New
Loading file...
Changes in R/progressor.R
-1
+1
Loading file...
Files Coverage
R 0.01% 61.04%
Project Totals (131 files) 61.04%
Loading