ropensci / aRxiv
1
#' Count number of results for a given search
2
#'
3
#' Count the number of results for a given search. Useful to check
4
#' before attempting to pull down a very large number of records.
5
#'
6
#' @param query Search pattern as a string; a vector of such strings is
7
#' also allowed, in which case the elements are combined with `AND`.
8
#' @param id_list arXiv doc IDs, as comma-delimited string or a vector
9
#' of such strings
10
#'
11
#' @return Number of results (integer). An attribute
12
#' `"search_info"` contains information about the search
13
#' parameters and the time at which it was performed.
14
#'
15
#' @seealso [arxiv_search()], [query_terms()],
16
#' [arxiv_cats()]
17
#'
18
#' @examples
19
#' \dontshow{old_delay <- getOption("aRxiv_delay")
20
#'           options(aRxiv_delay=1)}
21
#' \donttest{
22
#' # count papers in category stat.AP (applied statistics)
23
#' arxiv_count(query = "cat:stat.AP")
24
#'
25
#' # count papers by Peter Hall in any stat category
26
#' arxiv_count(query = 'au:"Peter Hall" AND cat:stat*')
27
#'
28
#' # count papers for a range of dates
29
#' #    here, everything in 2013
30
#' arxiv_count("submittedDate:[2013 TO 2014]")
31
#' }
32
#' \dontshow{options(aRxiv_delay=old_delay)}
33
#'
34
#' @export
35
arxiv_count <-
36
function(query=NULL, id_list=NULL)
37
{
38 1
    query_url <- "http://export.arxiv.org/api/query"
39

40 1
    query <- paste_query(query)
41 1
    id_list <- paste_id_list(id_list)
42

43 1
    if(is_blank(query) && is_blank(id_list)) return(0)
44

45 1
    delay_if_necessary()
46
    # do search
47
    # (extra messy to avoid possible problems when testing on CRAN
48
    #    timeout_action defined in timeout.R)
49 1
    body <- list(search_query=query, id_list=id_list,
50 1
                 start=0, max_gresults=0)
51 1
    body <- drop_nulls(body)
52 1
    search_result <- try(httr::POST(query_url,
53 1
                                    body=body,
54 1
                                    httr::timeout(get_arxiv_timeout())))
55 1
    if(inherits(search_result, "try-error")) {
56 0
        timeout_action()
57 0
        return(invisible(NULL))
58
    }
59

60 1
    set_arxiv_time() # set time for last call to arXiv
61

62
    # convert XML results to a list
63 1
    listresult <- result2list(search_result)
64

65
    # handle null return
66 0
    if(is.null(listresult)) result <- 0
67
    else {
68

69
        # check for arXiv error
70 1
        error_message <- arxiv_error_message(listresult)
71 1
        if(!is.null(error_message)) {
72 0
            stop("arXiv error: ", error_message)
73
        }
74

75
        # check for general http error
76 1
        httr::stop_for_status(search_result)
77

78
        # return totalResults
79 1
        result <- as.integer(listresult$totalResults)
80
    }
81

82 1
    attr(result, "search_info") <-
83 1
        search_attributes(query, id_list, NULL, NULL, NULL, NULL)
84

85
    # assign class to avoid printing attributes
86 1
    class(result) <- c("arxiv_count", "integer")
87 1
    result
88
}
89

90
# to avoid printing attributes
91
#' @export
92
print.arxiv_count <-
93
function(x, ...)
94
{
95 0
    print(as.vector(x), ...)
96
}
97

98
# omit search_info attribute
99
#    also, if arxiv_count, unclass
100
omit_attr <-
101
function(x)
102
{
103 1
    attr(x, "search_info") <- NULL
104 1
    attr(x, "total_results") <- NULL
105

106 1
    if(inherits(x, "arxiv_count"))
107 1
        x <- unclass(x)
108

109 1
    x
110
}

Read our documentation on viewing source code .

Loading