ropensci / bibtex

@@ -0,0 +1,4 @@
Loading
1 +
.onLoad <- function(libname, pkgname) {
2 +
  backports::import(pkgname, "trimws")
3 +
  invisible()
4 +
}

@@ -1,187 +1,196 @@
Loading
1 -
# backport of simplified trimws() (introduced in R-3.3.0)
2 -
trimws <- function(x) {
3 -
  sub("^[[:space:]]+", "", sub("[[:space:]]$", "", x))
4 -
}
5 -
6 1
UnlistSplitClean <- function(s) {
7 2
  unlist(strsplit(gsub("[{}]", "", trimws(s)), " "))
8 3
}
9 4
10 5
#' @importFrom tools deparseLatex latexToUtf8 parseLatex
11 -
cleanupLatex <- function (x){
12 -
  if (!length(x))
6 +
cleanupLatex <- function(x) {
7 +
  if (!length(x)) {
13 8
    return(x)
9 +
  }
14 10
15 -
  if (any(grepl('mkbib', x))){
16 -
    x <- gsub('mkbibquote', 'dQuote', x)
17 -
    x <- gsub('mkbibemph', 'emph', x)
18 -
    x <- gsub('mkbibbold', 'bold', x)
11 +
  if (any(grepl("mkbib", x))) {
12 +
    x <- gsub("mkbibquote", "dQuote", x)
13 +
    x <- gsub("mkbibemph", "emph", x)
14 +
    x <- gsub("mkbibbold", "bold", x)
19 15
  }
20 -
  x <- gsub('\\\\hyphen', '-', x)
16 +
  x <- gsub("\\\\hyphen", "-", x)
21 17
22 18
  latex <- try(parseLatex(x), silent = TRUE)
23 19
  if (inherits(latex, "try-error")) {
24 20
    x
25 -
  }else {
21 +
  } else {
26 22
    x <- deparseLatex(latexToUtf8(latex), dropBraces = TRUE)
27 -
    if (grepl("\\\\[[:punct:]]", x)){
28 -
      x <- gsub("\\\\'I", '\u00cd', x)
29 -
      x <- gsub("\\\\'i", '\u00ed', x)
30 -
      x <- gsub('\\\\"I', '\u00cf', x)
31 -
      x <- gsub('\\\\"i', '\u00ef', x)
32 -
      x <- gsub("\\\\\\^I", '\u00ce', x)
33 -
      x <- gsub("\\\\\\^i", '\u00ee', x)
34 -
      x <- gsub("\\\\`I", '\u00cc', x)
35 -
      x <- gsub("\\\\`i", '\u00ec', x)
36 -
      Encoding(x) <- 'UTF-8'
23 +
    if (grepl("\\\\[[:punct:]]", x)) {
24 +
      x <- gsub("\\\\'I", "\u00cd", x)
25 +
      x <- gsub("\\\\'i", "\u00ed", x)
26 +
      x <- gsub('\\\\"I', "\u00cf", x)
27 +
      x <- gsub('\\\\"i', "\u00ef", x)
28 +
      x <- gsub("\\\\\\^I", "\u00ce", x)
29 +
      x <- gsub("\\\\\\^i", "\u00ee", x)
30 +
      x <- gsub("\\\\`I", "\u00cc", x)
31 +
      x <- gsub("\\\\`i", "\u00ec", x)
32 +
      Encoding(x) <- "UTF-8"
37 33
    }
38 34
    x
39 35
  }
40 36
}
41 37
42 38
#' @importFrom utils as.personList
43 -
ArrangeAuthors <- function (x){
39 +
ArrangeAuthors <- function(x) {
44 40
  rx <- "(?i)[[:space:]]+and[[:space:]]+"
45 -
  x <- gsub('[[:space:]]{2,}', ' ', x, useBytes = TRUE)
41 +
  x <- gsub("[[:space:]]{2,}", " ", x, useBytes = TRUE)
46 42
  authors <- lapply(strsplit(x, rx, perl = TRUE)[[1]], ArrangeSingleAuthor)
47 43
  as.personList(authors)
48 44
}
49 45
50 -
ArrangeSingleAuthor <- function(y){
51 -
  if (grepl('[\\]', y)){
46 +
ArrangeSingleAuthor <- function(y) {
47 +
  if (grepl("[\\]", y)) {
52 48
    tmp <- try(parseLatex(y), TRUE)
53 -
    if (!inherits(tmp, 'try-error'))
49 +
    if (!inherits(tmp, "try-error")) {
54 50
      y <- deparseLatex(latexToUtf8(tmp))
51 +
    }
55 52
  }
56 53
  parts <- unlist(strsplit(y, ", ?(?![^{}]*})", perl = TRUE))
57 54
  len.parts <- length(parts)
58 -
  if (len.parts == 1L){
55 +
  if (len.parts == 1L) {
59 56
    #     parts <- "{Barnes} {and} {Noble,} {Inc.}"
60 -
    if (grepl("[^{][[:print:]][}]$", parts)){
61 -
      s <- unlist(strsplit(parts, ''))
57 +
    if (grepl("[^{][[:print:]][}]$", parts)) {
58 +
      s <- unlist(strsplit(parts, ""))
62 59
      i <- length(s) - 1L
63 60
      paren <- 1
64 -
      while (paren > 0 && i > 0){
65 -
        if (s[i] == '{'){
61 +
      while (paren > 0 && i > 0) {
62 +
        if (s[i] == "{") {
66 63
          paren <- paren - 1L
67 -
        }else if (s[i] == '}'){
64 +
        } else if (s[i] == "}") {
68 65
          paren <- paren + 1L
69 66
        }
70 67
        i <- i - 1L
71 68
      }
72 -
      last <- paste0(s[(i+2):(length(s)-1)], collapse = '')
69 +
      last <- paste0(s[(i + 2):(length(s) - 1)], collapse = "")
73 70
      first <- NULL
74 -
      if (i > 0)
75 -
        first <- paste0(s[seq_len(i-1)], collapse = '')
76 -
      person(UnlistSplitClean(first), cleanupLatex(last))  # Mathew {McLean IX}
77 -
    }else{
71 +
      if (i > 0) {
72 +
        first <- paste0(s[seq_len(i - 1)], collapse = "")
73 +
      }
74 +
      person(UnlistSplitClean(first), cleanupLatex(last)) # Mathew {McLean IX}
75 +
    } else {
78 76
      vonrx <- "(^|[[:space:]])([[:lower:]+[:space:]?]+)[[:space:]]"
79 77
      m <- regexec(vonrx, parts)
80 78
      von <- unlist(regmatches(parts, m))[3L]
81 -
      if (!is.na(von)){
79 +
      if (!is.na(von)) {
82 80
        name <- unlist(strsplit(parts, vonrx))
83 -
        if (length(name) == 1L){  # von Bommel
84 -
          person(family=c(cleanupLatex(von), cleanupLatex(name)))
85 -
        }else{  # Mark von Bommel
86 -
          person(given = UnlistSplitClean(name[1L]), family=c(cleanupLatex(von), cleanupLatex(name[2L])))
81 +
        if (length(name) == 1L) { # von Bommel
82 +
          person(family = c(cleanupLatex(von), cleanupLatex(name)))
83 +
        } else { # Mark von Bommel
84 +
          person(given = UnlistSplitClean(name[1L]), family = c(cleanupLatex(von), cleanupLatex(name[2L])))
87 85
        }
88 -
      }else{  # George Bernard Shaw
86 +
      } else { # George Bernard Shaw
89 87
        name <- UnlistSplitClean(parts)
90 88
        len.name <- length(name)
91 -
        if (len.name <= 1L){
89 +
        if (len.name <= 1L) {
92 90
          person(family = name)
93 -
        }else{
91 +
        } else {
94 92
          person(given = name[seq_len(len.name - 1L)], family = name[len.name])
95 93
        }
96 94
      }
97 95
    }
98 -
  }else if (len.parts == 2L){
99 -
    if (grepl('^[{]', parts[1L])){  # e.g. {de Gama}, Vasco
96 +
  } else if (len.parts == 2L) {
97 +
    if (grepl("^[{]", parts[1L])) { # e.g. {de Gama}, Vasco
100 98
      person(UnlistSplitClean(parts[2L]), UnlistSplitClean(parts[1L]))
101 -
    }else{
99 +
    } else {
102 100
      vonrx <- "^([[:lower:]+[:space:]?]+)[[:space:]]"
103 101
      m <- regexec(vonrx, parts[1L])
104 102
      von <- unlist(regmatches(parts[1L], m))[2]
105 -
      if (is.na(von)){  # e.g. Smith, John Paul
103 +
      if (is.na(von)) { # e.g. Smith, John Paul
106 104
        person(UnlistSplitClean(parts[2L]), cleanupLatex(parts[1L]))
107 -
      }else{  # e.g. de la Soul, John
108 -
        person(UnlistSplitClean(parts[2L]), c(cleanupLatex(von), cleanupLatex(sub(vonrx, '', parts[1L]))))
105 +
      } else { # e.g. de la Soul, John
106 +
        person(UnlistSplitClean(parts[2L]), c(cleanupLatex(von), cleanupLatex(sub(vonrx, "", parts[1L]))))
109 107
      }
110 108
    }
111 -
  }else if (len.parts == 3L){
109 +
  } else if (len.parts == 3L) {
112 110
    vonrx <- "^([[:lower:]+[:space:]?]+)[[:space:]]"
113 111
    m <- regexec(vonrx, parts[1L])
114 112
    von <- unlist(regmatches(parts[1L], m))[2]
115 -
    if (is.na(von)){  # e.g. White, Jr., Walter
113 +
    if (is.na(von)) { # e.g. White, Jr., Walter
116 114
      person(UnlistSplitClean(parts[3L]), c(cleanupLatex(parts[1L]), cleanupLatex(parts[2L])))
117 -
    }else{  # e.g. des White, Jr., Walter
118 -
      person(UnlistSplitClean(parts[3L]),
119 -
             c(cleanupLatex(von), cleanupLatex(sub(vonrx, '', parts[1L])), cleanupLatex(parts[2L])))
115 +
    } else { # e.g. des White, Jr., Walter
116 +
      person(
117 +
        UnlistSplitClean(parts[3L]),
118 +
        c(cleanupLatex(von), cleanupLatex(sub(vonrx, "", parts[1L])), cleanupLatex(parts[2L]))
119 +
      )
120 120
    }
121 -
  }else{
122 -
    stop('Invalid author/editor format.')
121 +
  } else {
122 +
    stop("Invalid author/editor format.")
123 123
  }
124 124
}
125 125
126 126
#' @importFrom utils bibentry person citation installed.packages toBibtex
127 -
make.bib.entry <- function( x ){
128 -
    type <- attr( x, "entry" )
129 -
    key  <- attr( x, "key" )
130 -
131 -
    y <- as.list( x )
132 -
    names(y) <- tolower( names(y) )
133 -
134 -
    err.fun <- function(e){
135 -
         message( sprintf( "ignoring entry '%s' (line %d) because :\n\t%s\n",
136 -
                         key,
137 -
                         attr(x, "srcref")[1],
138 -
                         conditionMessage( e ) ) )
139 -
          NULL
140 -
    }
127 +
make.bib.entry <- function(x) {
128 +
  type <- attr(x, "entry")
129 +
  key <- attr(x, "key")
141 130
142 -
    if( "author" %in% names(y) ){
143 -
        y[["author"]] <- tryCatch(ArrangeAuthors( y[["author"]] ), error = err.fun)
144 -
        if (is.null(y[["author"]]))
145 -
            return()
131 +
  y <- as.list(x)
132 +
  names(y) <- tolower(names(y))
133 +
134 +
  err.fun <- function(e) {
135 +
    message(sprintf(
136 +
      "ignoring entry '%s' (line %d) because :\n\t%s\n",
137 +
      key,
138 +
      attr(x, "srcref")[1],
139 +
      conditionMessage(e)
140 +
    ))
141 +
    NULL
142 +
  }
143 +
144 +
  if ("author" %in% names(y)) {
145 +
    y[["author"]] <- tryCatch(ArrangeAuthors(y[["author"]]), error = err.fun)
146 +
    if (is.null(y[["author"]])) {
147 +
      return()
146 148
    }
147 -
    if( "editor" %in% names(y) ){
148 -
        y[["editor"]] <- tryCatch(ArrangeAuthors( y[["editor"]] ), error = err.fun)
149 -
        if (is.null(y[["editor"]]))
150 -
            return()
149 +
  }
150 +
  if ("editor" %in% names(y)) {
151 +
    y[["editor"]] <- tryCatch(ArrangeAuthors(y[["editor"]]), error = err.fun)
152 +
    if (is.null(y[["editor"]])) {
153 +
      return()
151 154
    }
155 +
  }
152 156
153 -
    # if there is a date entryn try to extract the year (#15)
154 -
    fields <- names(y)
155 -
    if( "date" %in% fields && !"year" %in% fields ){
156 -
      y$year <- format( as.Date( y$date), "%Y" )
157 -
    }
157 +
  # if there is a date entryn try to extract the year (#15)
158 +
  fields <- names(y)
159 +
  if ("date" %in% fields && !"year" %in% fields) {
160 +
    y$year <- format(as.Date(y$date), "%Y")
161 +
  }
158 162
159 -
    tryCatch(bibentry( bibtype = type, key = key, other = y ), error = err.fun)
163 +
  tryCatch(bibentry(bibtype = type, key = key, other = y), error = err.fun)
160 164
}
161 165
162 -
make.citation.list <- function( x, header, footer){
163 -
    rval <- list()
164 -
    for( i in seq_along(x) ){
165 -
        if( !is.null(x[[i]] ) )
166 -
            rval <- c( rval, x[[i]] )
166 +
make.citation.list <- function(x) {
167 +
  rval <- list()
168 +
  for (i in seq_along(x)) {
169 +
    if (!is.null(x[[i]])) {
170 +
      rval <- c(rval, x[[i]])
167 171
    }
168 -
    class(rval) <- c( "bibentry" )
169 -
    rval
172 +
  }
173 +
  class(rval) <- c("bibentry")
174 +
  rval
170 175
}
171 176
172 177
findBibFile <- function(package) {
173 -
    if( package %in% c("base", "datasets", "graphics", "grDevices",
174 -
                       "methods", "stats", "stats4", "tools", "utils" )
175 -
       ) {
176 -
        system.file( "bib", sprintf( "%s.bib", package ), package = "bibtex" )
177 -
    } else {
178 -
      reference_locations <- c("REFERENCES.bib", "inst/REFERENCES.bib")
179 -
      for(file_path in reference_locations) {
180 -
        attempt <- system.file( file_path, package = package )
181 -
        if( nzchar(attempt) ) return( attempt )
178 +
  if (package %in% c(
179 +
    "base", "datasets", "graphics", "grDevices",
180 +
    "methods", "stats", "stats4", "tools", "utils"
181 +
  )
182 +
  ) {
183 +
    system.file("bib", sprintf("%s.bib", package), package = "bibtex")
184 +
  } else {
185 +
    reference_locations <- c("REFERENCES.bib", "inst/REFERENCES.bib")
186 +
    for (file_path in reference_locations) {
187 +
      attempt <- system.file(file_path, package = package)
188 +
      if (nzchar(attempt)) {
189 +
        return(attempt)
182 190
      }
183 -
      stop( sprintf( "no bibtex database for package '%s'", package ) )
184 191
    }
192 +
    stop(sprintf("no bibtex database for package '%s'", package))
193 +
  }
185 194
}
186 195
187 196
#' convenience wrapper around .External call
@@ -196,10 +205,97 @@
Loading
196 205
#'
197 206
#' @param file file name
198 207
#' @param encoding encoding
199 -
#' @param srcfile output of \code{\link{srcfile}}
208 +
#' @param srcfile Deprecated
200 209
#' @export
201 -
do_read_bib <- function(file, encoding = "unknown", srcfile){
202 -
  .External( "do_read_bib", file=file, encoding=encoding, srcfile=srcfile, PACKAGE = "bibtex" )
210 +
do_read_bib <- function(file, encoding = "unknown", srcfile) {
211 +
  if (!missing("srcfile")) {
212 +
    message("'srcfile' argument is deprecated.")
213 +
  }
214 +
215 +
  # Assess the extension of the file
216 +
  if (!file.exists(file)) stop("Error: unable to open file to read")
217 +
218 +
  # Read all as UTF-8
219 +
  lines <- readLines(file, encoding = encoding, warn = FALSE)
220 +
221 +
  # Aux
222 +
  trimlines <- trimws(lines)
223 +
224 +
  # Map bib file
225 +
  # Init and end of citation
226 +
  init <- grep("^@", trimlines)
227 +
228 +
  # Identify type of entry
229 +
  entry <- unlist(lapply(trimlines[init], function(x) {
230 +
    unlist(strsplit(x, "\\{"))[1]
231 +
  }))
232 +
233 +
234 +
  # Map fo document
235 +
  map_bib <- data.frame(
236 +
    entry = trimws(entry),
237 +
    entry_lower = trimws(tolower(entry)),
238 +
    init = init,
239 +
    # The end of each entry is at most the
240 +
    # previous line of the next entry
241 +
    end = c(init[-1] - 1, length(lines))
242 +
  )
243 +
244 +
  # Extract map string
245 +
  map_string <- map_bib[map_bib$entry_lower == "@string", ]
246 +
247 +
  if (nrow(map_string) > 0) {
248 +
    # This is needed for multiline strings
249 +
250 +
    stringlines <- lapply(seq_len(nrow(map_string)), function(x) {
251 +
      init <- map_string$init[x]
252 +
      end <- map_string$end[x]
253 +
      string_line <- lines[seq(init, end)]
254 +
      # Guess lines outside of the entry
255 +
256 +
      guess_eostring <- max(grep("\\}$", string_line))
257 +
258 +
      string_line <- string_line[seq(1, guess_eostring)]
259 +
260 +
      # Collapse to single line
261 +
      string_line <- paste0(string_line, collapse = "\n")
262 +
      string_line
263 +
    })
264 +
265 +
    stringlines <- unlist(stringlines)
266 +
267 +
    map_string_end <- tryCatch(parse_strings(map_string, stringlines),
268 +
      warning = function(e) {
269 +
        NULL
270 +
      },
271 +
      error = function(e) {
272 +
        stop("Error when parsing strings of ",
273 +
          file,
274 +
          call. = FALSE
275 +
        )
276 +
      }
277 +
    )
278 +
  } else {
279 +
    map_string_end <- NULL
280 +
  }
281 +
282 +
283 +
  # Select entries only, i.e. exclude preamble, comment and string
284 +
  map_entries <- map_bib[!map_bib$entry_lower %in% c(
285 +
    "@preamble",
286 +
    "@string", "@comment"
287 +
  ), ]
288 +
289 +
290 +
  # Parse single entry
291 +
  end <- lapply(seq_len(nrow(map_entries)), function(x) {
292 +
    parse_single_entry(
293 +
      map_entries$init[x], map_entries$end[x], lines,
294 +
      map_string_end
295 +
    )
296 +
  })
297 +
298 +
  return(end)
203 299
}
204 300
205 301
#' bibtex parser
@@ -215,10 +311,8 @@
Loading
215 311
#'         stats, stats4, tools and utils) are treated specially: this package
216 312
#'         contains bibtex entries for these packages.
217 313
#' @param encoding encoding
218 -
#' @param header header of the citation list.
219 -
#'        By default this is made from the \samp{Preamble} entries found in
220 -
#'        the bib file.
221 -
#' @param footer footer of the citation list
314 +
#' @param header DEPRECATED.
315 +
#' @param footer DEPRECATED
222 316
#'
223 317
#' @return An object of class \code{"bibentry"}, similar to those obtained by the
224 318
#'        \code{\link[utils]{bibentry}} function.
@@ -227,59 +321,64 @@
Loading
227 321
#'
228 322
#' @examples
229 323
#' ## this package has a REFERENCES.bib file
230 -
#' bib <- read.bib( package = "bibtex" )
324 +
#' bib <- read.bib(package = "bibtex")
231 325
#'
232 326
#' ## bibtex collects bibtex entries for R base packages
233 -
#' base.bib <- read.bib( package = "base" )
327 +
#' base.bib <- read.bib(package = "base")
234 328
#'
235 329
#' \dontshow{
236 -
#' bib <- read.bib( package = "base" )
237 -
#' bib <- read.bib( package = "datasets" )
238 -
#' bib <- read.bib( package = "graphics" )
239 -
#' bib <- read.bib( package = "grDevices" )
240 -
#' bib <- read.bib( package = "methods" )
241 -
#' bib <- read.bib( package = "stats" )
242 -
#' bib <- read.bib( package = "stats4" )
243 -
#' bib <- read.bib( package = "tools" )
244 -
#' bib <- read.bib( package = "utils" )
330 +
#' bib <- read.bib(package = "base")
331 +
#' bib <- read.bib(package = "datasets")
332 +
#' bib <- read.bib(package = "graphics")
333 +
#' bib <- read.bib(package = "grDevices")
334 +
#' bib <- read.bib(package = "methods")
335 +
#' bib <- read.bib(package = "stats")
336 +
#' bib <- read.bib(package = "stats4")
337 +
#' bib <- read.bib(package = "tools")
338 +
#' bib <- read.bib(package = "utils")
245 339
#' }
246 340
#' @export
247 -
read.bib <- function(file = findBibFile(package) ,
248 -
         package = "bibtex",
249 -
         encoding = "unknown",
250 -
         header = if( length(preamble) ) paste( preamble, sep = "\n" ) else "",
251 -
         footer = "" )
252 -
{
253 -
    if( !is.character( file ) ){
254 -
        stop( "'read.bib' only supports reading from files, 'file' should be a character vector of length one" )
341 +
read.bib <- function(file = findBibFile(package),
342 +
                     package = "bibtex",
343 +
                     encoding = "unknown",
344 +
                     header,
345 +
                     footer) {
346 +
  if (!missing("header")) {
347 +
    message("'header' argument is deprecated.")
348 +
  }
349 +
350 +
  if (!missing("footer")) {
351 +
    message("'footer' argument is deprecated.")
352 +
  }
353 +
  if (!is.character(file)) {
354 +
    stop("'read.bib' only supports reading from files, 'file' should be a character vector of length one")
355 +
  }
356 +
357 +
  out <- tryCatch(do_read_bib(
358 +
    file = file,
359 +
    encoding = encoding
360 +
  ),
361 +
  error = function(e) {
362 +
    stop("Invalid bib file", call. = FALSE)
363 +
  }, warning = function(w) {
364 +
    if (any(grepl("syntax error, unexpected [$]end", w))) {
365 +
      NULL
255 366
    }
256 -
    srcfile <- switch( encoding,
257 -
                      "unknown" = srcfile( file ),
258 -
                      srcfile( file, encoding = encoding ) )
259 -
260 -
    out <- withCallingHandlers(tryCatch(.External( "do_read_bib", file = file,
261 -
                     encoding = encoding, srcfile = srcfile ),
262 -
                       error = function(e){
263 -
                           if(!any(grepl("unprotect_ptr", e)))
264 -
                              stop(geterrmessage(), call. = FALSE)
265 -
                           else
266 -
                              stop("Invalid bib file", call. = FALSE)
267 -
                       }), warning = function(w){
268 -
                             if( any( grepl( "syntax error, unexpected [$]end", w)))
269 -
                               invokeRestart("muffleWarning")
270 -
                           })
271 -
    # keys <- lapply(out, function(x) attr(x, 'key'))
272 -
    at  <- attributes(out)
273 -
    if((typeof(out) != "integer") || (getRversion() < "3.0.0"))
274 -
        out <- lapply( out, make.bib.entry )
275 -
    else
276 -
        out <- list()
277 -
    preamble <- at[["preamble"]]
278 -
279 -
    out <- make.citation.list( out, header, footer )
280 -
    attr( out, "strings") <- at[["strings"]]
281 -
    names(out) <- unlist(out$key)
282 -
    out
367 +
  }
368 +
  )
369 +
  # keys <- lapply(out, function(x) attr(x, 'key'))
370 +
  at <- attributes(out)
371 +
  if ((typeof(out) != "integer") || (getRversion() < "3.0.0")) {
372 +
    out <- lapply(out, make.bib.entry)
373 +
  } else {
374 +
    out <- list()
375 +
  }
376 +
  preamble <- at[["preamble"]]
377 +
378 +
  out <- make.citation.list(out)
379 +
  attr(out, "strings") <- at[["strings"]]
380 +
  names(out) <- unlist(out$key)
381 +
  out
283 382
}
284 383
285 384
#' Generate a Bibtex File from Package Citations
@@ -305,75 +404,90 @@
Loading
305 404
#' @export
306 405
#' @examples
307 406
#'
308 -
#' write.bib(c('bibtex', 'utils', 'tools'), file='references')
309 -
#' bibs <- read.bib('references.bib')
310 -
#' write.bib(bibs, 'references2.bib')
311 -
#' md5 <- tools::md5sum(c('references.bib', 'references2.bib'))
407 +
#' write.bib(c("bibtex", "utils", "tools"), file = "references")
408 +
#' bibs <- read.bib("references.bib")
409 +
#' write.bib(bibs, "references2.bib")
410 +
#' md5 <- tools::md5sum(c("references.bib", "references2.bib"))
312 411
#' md5[1] == md5[2]
313 412
#'
314 -
#' \dontshow{unlink(c('references.bib', 'references2.bib'))}
315 -
write.bib <- function(entry, file="Rpackages.bib", append = FALSE, verbose = TRUE) {
316 -
    bibs <-
317 -
    if( inherits(entry, "bibentry") )    entry
318 -
    else if( is.character(entry) ){
319 -
        if( length(entry) == 0 ){
320 -
            if( verbose ) message("Empty package list: nothing to be done.")
321 -
            return(invisible())
413 +
#' \dontshow{
414 +
#' unlink(c("references.bib", "references2.bib"))
415 +
#' }
416 +
write.bib <- function(entry, file = "Rpackages.bib", append = FALSE, verbose = TRUE) {
417 +
  bibs <-
418 +
    if (inherits(entry, "bibentry")) {
419 +
      entry
420 +
    } else if (is.character(entry)) {
421 +
      if (length(entry) == 0) {
422 +
        if (verbose) message("Empty package list: nothing to be done.")
423 +
        return(invisible())
424 +
      }
425 +
      pkgs <- entry
426 +
      if (is.null(pkgs)) { ## use all installed packages
427 +
        pkgs <- unique(installed.packages()[, 1])
428 +
      }
429 +
      bibs <- sapply(pkgs, function(x) try(citation(x)), simplify = FALSE)
430 +
      # bibs <- lapply(pkgs, function(x) try(toBibtex(citation(x))))
431 +
      n.installed <- length(bibs)
432 +
433 +
      ## omit failed citation calls
434 +
      ok <- sapply(bibs, inherits, "bibentry")
435 +
      pkgs <- pkgs[ok]
436 +
      bibs <- bibs[ok]
437 +
      n.converted <- sum(ok)
438 +
439 +
      ## add bibtex keys to each entry
440 +
      pkgs <- lapply(seq_along(pkgs), function(i) {
441 +
        if (length(bibs[[i]]) > 1) {
442 +
          paste(pkgs[i], 1:length(bibs[[i]]), sep = "")
443 +
        } else {
444 +
          pkgs[i]
322 445
        }
323 -
        pkgs <- entry
324 -
        if( is.null(pkgs) ) ## use all installed packages
325 -
            pkgs <- unique(installed.packages()[,1])
326 -
        bibs <- sapply(pkgs, function(x) try(citation(x)), simplify=FALSE)
327 -
        #bibs <- lapply(pkgs, function(x) try(toBibtex(citation(x))))
328 -
        n.installed <- length(bibs)
329 -
330 -
        ## omit failed citation calls
331 -
        ok <- sapply(bibs, inherits, "bibentry")
332 -
        pkgs <- pkgs[ok]
333 -
        bibs <- bibs[ok]
334 -
        n.converted <- sum(ok)
335 -
336 -
        ## add bibtex keys to each entry
337 -
        pkgs <- lapply(seq_along(pkgs), function(i) if(length(bibs[[i]]) > 1)
338 -
                        paste(pkgs[i], 1:length(bibs[[i]]), sep = "") else pkgs[i])
339 -
        pkgs <- do.call("c", pkgs)
340 -
        bibs <- do.call("c", bibs)
341 -
        # formatting function for bibtex keys:
342 -
        # names with special characters must be enclosed in {}, others not.
343 -
        as.bibkey <- function(x){
344 -
            i <- grep("[.]", x)
345 -
            if( length(i) > 0 )
346 -
                x[i] <- paste("{", x[i], "}", sep='')
347 -
            x
446 +
      })
447 +
      pkgs <- do.call("c", pkgs)
448 +
      bibs <- do.call("c", bibs)
449 +
      # formatting function for bibtex keys:
450 +
      # names with special characters must be enclosed in {}, others not.
451 +
      as.bibkey <- function(x) {
452 +
        i <- grep("[.]", x)
453 +
        if (length(i) > 0) {
454 +
          x[i] <- paste("{", x[i], "}", sep = "")
348 455
        }
349 -
        bibs <- mapply(function(b,k){ b$key <- k; b}, bibs, pkgs, SIMPLIFY=FALSE)
350 -
        bibs <- do.call("c", bibs)
351 -
352 -
        if(verbose) message("Converted ", n.converted, " of ", n.installed, " package citations to BibTeX")
353 -
        bibs
354 -
    } else
355 -
        stop("Invalid argument 'entry': expected a bibentry object or a character vector of package names.")
456 +
        x
457 +
      }
458 +
      bibs <- mapply(function(b, k) {
459 +
        b$key <- k
460 +
        b
461 +
      }, bibs, pkgs, SIMPLIFY = FALSE)
462 +
      bibs <- do.call("c", bibs)
356 463
357 -
    if( length(bibs) == 0 ){
358 -
        if( verbose ) message("Empty bibentry list: nothing to be done.")
359 -
        return(invisible())
464 +
      if (verbose) message("Converted ", n.converted, " of ", n.installed, " package citations to BibTeX")
465 +
      bibs
466 +
    } else {
467 +
      stop("Invalid argument 'entry': expected a bibentry object or a character vector of package names.")
360 468
    }
361 469
362 -
    ## write everything to a single .bib file
363 -
    if( is.null(file) )
364 -
        file <- stdout()
365 -
    else if( is.character(file) ){
366 -
        if( !grepl("\\.bib$", file) ) # add .bib extension if necessary
367 -
        file <- paste(file, '.bib', sep='')
470 +
  if (length(bibs) == 0) {
471 +
    if (verbose) message("Empty bibentry list: nothing to be done.")
472 +
    return(invisible())
473 +
  }
474 +
475 +
  ## write everything to a single .bib file
476 +
  if (is.null(file)) {
477 +
    file <- stdout()
478 +
  } else if (is.character(file)) {
479 +
    if (!grepl("\\.bib$", file)) { # add .bib extension if necessary
480 +
      file <- paste(file, ".bib", sep = "")
368 481
    }
482 +
  }
369 483
370 -
    fh <- file(file, open = if(append) "a+" else "w+" )
371 -
    on.exit( if( isOpen(fh) ) close(fh) )
372 -
    if( verbose ) message("Writing ", length(bibs) , " Bibtex entries ... ", appendLF=FALSE)
373 -
    writeLines(toBibtex(bibs), fh)
374 -
    #writeLines(do.call("c", lapply(bibs, as.character)), fh)
375 -
    if(verbose) message("OK\nResults written to file '", file, "'")
484 +
  fh <- file(file, open = if (append) "a+" else "w+")
485 +
  on.exit(if (isOpen(fh)) close(fh))
486 +
  if (verbose) message("Writing ", length(bibs), " Bibtex entries ... ", appendLF = FALSE)
487 +
  writeLines(toBibtex(bibs), fh)
488 +
  # writeLines(do.call("c", lapply(bibs, as.character)), fh)
489 +
  if (verbose) message("OK\nResults written to file '", file, "'")
376 490
377 -
    ## return Bibtex items invisibly
378 -
    invisible(bibs)
491 +
  ## return Bibtex items invisibly
492 +
  invisible(bibs)
379 493
}

@@ -0,0 +1,405 @@
Loading
1 +
# Helper functions for do_read_bib()
2 +
3 +
#' Parse (at)string keys
4 +
#'
5 +
#' Returns a data.frame with all the string keys and their value to be used on
6 +
#' the `bib` file.
7 +
#'
8 +
#' @param map_string data frame with at least one column. Used as index, the
9 +
#' only condition is that it must have one row per string.
10 +
#'
11 +
#' @param stringlines A vector of characters with the same length that
12 +
#'   map_string (in terms of number of rows), containing the text of each
13 +
#'   string.
14 +
#'
15 +
#' @details
16 +
#' Both inputs are generated on [do_read_bib()]. This function parses the
17 +
#' (at) string sequentially, i.e. if the first string is used for concatenation
18 +
#' in the second string it should work.
19 +
#'
20 +
#'
21 +
#' @examples
22 +
#'
23 +
#' map_string <- data.frame(
24 +
#'   entry = c("@STRING", "@STRING", "@STRING")
25 +
#' )
26 +
#'
27 +
#'
28 +
#' stringlines <- c(
29 +
#'   "@STRING{STOC-key = \"OX{\\singleletter{stoc}}\"}",
30 +
#'   "@STRING{ACM = \"The OX Association for Computing Machinery\"}",
31 +
#'   "@STRING{STOC = ACM # \" Symposium on the Theory of Computing\"}"
32 +
#' )
33 +
#'
34 +
#'
35 +
#' parse_strings(map_string, stringlines)
36 +
#' @noRd
37 +
parse_strings <- function(map_string,
38 +
                          stringlines) {
39 +
40 +
  # This needs to be done in a loop, since strings can depend also
41 +
  # on values of previous strings
42 +
43 +
  parsed_string <- NULL
44 +
45 +
  for (i in seq_along(stringlines)) {
46 +
    row <- map_string[i, ]
47 +
    singleline <- stringlines[i]
48 +
49 +
    map_braces <- check_balanced_braces(singleline, row$init)
50 +
51 +
    # Extract string between first and last brace
52 +
53 +
    first_bracket_position <- map_braces$pos[1]
54 +
    last_bracket_position <- min(map_braces[map_braces$cum_flag == 0, ]$pos)
55 +
56 +
    singleline <- trimws(substr(
57 +
      singleline, first_bracket_position,
58 +
      last_bracket_position
59 +
    ))
60 +
61 +
    # Remove also the braces
62 +
    singleline <- trimws(gsub("^\\{|\\}$", "", singleline))
63 +
64 +
    # Split on name/value
65 +
    splitted <- unlist(strsplit(singleline, "="))
66 +
67 +
    finalrow <- data.frame(
68 +
      name_string = trimws(splitted[1]),
69 +
      value = trimws(splitted[-1])
70 +
    )
71 +
72 +
    # As character to avoid compatibility issues with older R versions
73 +
    finalrow$name_string <- as.character(finalrow$name_string)
74 +
    finalrow$value <- as.character(finalrow$value)
75 +
76 +
    parsed_string <- rbind(parsed_string, finalrow)
77 +
  }
78 +
79 +
  # Need to replace and concat values on loop
80 +
  # Iterate over map_parsed and concat
81 +
  init <- parsed_string[1, ]
82 +
83 +
  # Trim leading and trailing commas
84 +
  init$value <- trimws(init$value, "both")
85 +
  init$value <- trimws(init$value, "both", whitespace = "\"")
86 +
87 +
  # If only one string return
88 +
  if (nrow(parsed_string) == 1) {
89 +
    return(init)
90 +
  }
91 +
  for (j in seq(2, nrow(parsed_string))) {
92 +
93 +
    # Search if need to replace and concat
94 +
    row <- parsed_string[j, ]
95 +
96 +
    value <- replace_string_and_concat(
97 +
      row$value,
98 +
      init$name_string,
99 +
      init$value
100 +
    )
101 +
102 +
    row$value <- trimws(value, "both", "\"")
103 +
104 +
    init <- rbind(init, row)
105 +
  }
106 +
107 +
  return(init)
108 +
}
109 +
110 +
#' Parse a single entry
111 +
#'
112 +
#'
113 +
#' Parses a character string to an R object, that would be used to create
114 +
#' a [bibentry()] object on a later stage.
115 +
#'
116 +
#' @return A named character object with two attributes: entry with the
117 +
#' BibTeX entry type and key with the assigned key on the file.
118 +
#'
119 +
#' @param init Number of line where the entry begins
120 +
#' @param end Number of line where the entry ends
121 +
#' @param lines The content of the .bib file to be parsed, `init` and `end`
122 +
#'   are used to select the corresponding lines
123 +
#' @param map_string_end The output of [parse_strings()] or `NULL`.
124 +
#'
125 +
#'
126 +
#' @examples
127 +
#'
128 +
#' lines <- readLines(system.file("bib", "utils.bib", package = "bibtex"))
129 +
#' parse_single_entry(
130 +
#'   init = 2,
131 +
#'   end = 8,
132 +
#'   lines = lines,
133 +
#'   map_string_end = NULL
134 +
#' )
135 +
#' @noRd
136 +
parse_single_entry <- function(init, end, lines, map_string_end) {
137 +
  entry_lines <- lines[seq(init, end)]
138 +
139 +
  # Skipping empty lines
140 +
  entry_lines <- entry_lines[entry_lines != ""]
141 +
142 +
  # Guess lines outside of the entry
143 +
144 +
  guess_eoentry <- max(grep("\\}$", entry_lines))
145 +
146 +
  entry_lines <- entry_lines[seq(1, guess_eoentry)]
147 +
148 +
  # Collapse to single line
149 +
  entry_collapsed <- paste0(entry_lines, collapse = "\n")
150 +
151 +
  # Check balanced braces
152 +
  map_braces <- check_balanced_braces(entry_collapsed, init)
153 +
154 +
  # Extract string from beginning to first balanced brace
155 +
156 +
  lastbrace <- min(map_braces[map_braces$cum_flag == 0, ]$pos)
157 +
158 +
  # This would remove the last brace, it is what we want
159 +
  cleaned_entry <- trimws(substr(entry_collapsed, 1, lastbrace - 1))
160 +
161 +
  # Identify fields
162 +
  # Safest pattern I found:
163 +
  # ,<space or newline>singleword<space or newline>=
164 +
  # E.g:
165 +
  # ...}, author={...}
166 +
  # ...",\ntitle ="
167 +
  # etc.
168 +
  # As field entry could have -, _ or : need to replace when identifying the
169 +
  # fields. This improve the regex matching pattern
170 +
171 +
  cleaned_entry_sub <-
172 +
    gsub("(?<=[[:alnum:]])_|(?<=[[:alnum:]])-|(?<=[[:alnum:]]):", "x",
173 +
      cleaned_entry,
174 +
      perl = TRUE
175 +
    )
176 +
177 +
  # Protect commas on brackets to avoid error on splitting
178 +
  protected <- gsub(",(?![^\\}]*(\\{|$))", "|",
179 +
    cleaned_entry_sub,
180 +
    perl = TRUE
181 +
  )
182 +
183 +
  posfields <- unlist(gregexpr(",\\s*\\w+\\s*=", protected))
184 +
  # Add init and last pos
185 +
  posfields <- sort(unique(c(1, posfields, nchar(cleaned_entry))))
186 +
187 +
  formatted_bib <- lapply(seq_len(length(posfields) - 1), function(x) {
188 +
    str <- substr(cleaned_entry, posfields[x], posfields[x + 1])
189 +
190 +
    # Cleanup leading and trailing comma and blanks
191 +
    str <- trimws(gsub("^,|,$", "", str))
192 +
  })
193 +
194 +
  formatted_bib <- unlist(formatted_bib)
195 +
196 +
  # Each field is now on a single line
197 +
198 +
  # # Extract type of entry and key
199 +
  # String to first comma
200 +
  entry_key <- formatted_bib[1]
201 +
202 +
  entry_key <- gsub("@", "", unlist(strsplit(entry_key, "\\{")))
203 +
  entry_name <- trimws(entry_key[1])
204 +
  key_name <- trimws(entry_key[2])
205 +
206 +
  # Now treat fields
207 +
208 +
  fields <- formatted_bib[-1]
209 +
210 +
  treat_fields <- vapply(seq_along(fields), function(x) {
211 +
    string <- fields[x]
212 +
    equalsign <- grep("=", strsplit(string, "")[[1]])
213 +
    field_name <- trimws(substr(string, 1, equalsign - 1))
214 +
    field_value <- trimws(substr(string, equalsign + 1, nchar(string)))
215 +
216 +
    c(field_name, field_value)
217 +
  },
218 +
  FUN.VALUE = character(2)
219 +
  )
220 +
221 +
  field_names <- trimws(treat_fields[1, ])
222 +
  field_value <- trimws(treat_fields[2, ])
223 +
224 +
225 +
  # Try hard to replace with string values
226 +
227 +
  field_value <- lapply(field_value, replace_string_and_concat,
228 +
    string_names = map_string_end$name_string,
229 +
    string_values = map_string_end$value
230 +
  )
231 +
232 +
  field_value <- unlist(field_value)
233 +
234 +
  # Lead/trailing curly braces ¿?
235 +
  field_value <- gsub("^\\{|\\}$", "", field_value)
236 +
237 +
  # Lead/trailing quotes
238 +
  field_value <- gsub("^\"|\"$", "", field_value)
239 +
240 +
241 +
  # Build final object
242 +
243 +
  attr(field_value, "entry") <- trimws(entry_name)
244 +
  names(field_value) <- trimws(field_names)
245 +
  attr(field_value, "key") <- key_name
246 +
  attr(field_value, "srcref") <- seq(init, end)
247 +
248 +
  return(field_value)
249 +
}
250 +
251 +
252 +
#' Replaces a word with a (at)string value and concatenate
253 +
#'
254 +
#' Helper function
255 +
#'
256 +
#'
257 +
#' @noRd
258 +
#'
259 +
#' @param x values to evaluate
260 +
#' @param string_names Names of the strings detected on the .bib file
261 +
#' @param string_values Values of the string
262 +
#'
263 +
#' @return A string with the specified treatment
264 +
#'
265 +
#' @examples
266 +
#' x <- "\"CIA means \" # CIA"
267 +
#' string <- c("CIA", "FBI")
268 +
#' value <- c("Central Intelligence Agency", "Federal Bureau of Investigation")
269 +
#'
270 +
#' # Replace and concat
271 +
#' replace_string_and_concat(x, string, value)
272 +
#' x_alt <- "CIA # \" can be abbreviated as CIA, and FBI stands for \" # FBI"
273 +
#'
274 +
#' replace_string_and_concat(x_alt, string, value)
275 +
#'
276 +
#'
277 +
#' # Full replacement
278 +
#' x2 <- "CIA"
279 +
#' replace_string_and_concat(x2, string, value)
280 +
#'
281 +
#' # No replacement
282 +
#' x3 <- "\"CIA\""
283 +
#' replace_string_and_concat(x3, string, value)
284 +
#'
285 +
#' # Protected
286 +
#' x4 <- "{CIA}"
287 +
#' replace_string_and_concat(x3, string, value)
288 +
replace_string_and_concat <- function(x, string_names, string_values) {
289 +
290 +
  # As characters instead of factors
291 +
  string_names <- as.character(string_names)
292 +
  string_values <- as.character(string_values)
293 +
294 +
  # If has braces then return the same value
295 +
  if (grepl("\\{|\\}", x)) {
296 +
    return(x)
297 +
  }
298 +
299 +
  # If has a concat try to replace
300 +
  # Use lower for avoid caps mismatching
301 +
  if (tolower(x) %in% tolower(string_names)) {
302 +
    # Case of full replacement
303 +
    index <- match(tolower(x), tolower(string_names))
304 +
    return(paste0("\"", string_values[index], "\""))
305 +
  } else if (grepl(" # ", x)) {
306 +
    # Hardest case, replace and concat
307 +
    parts <- unlist(strsplit(x, " # "))
308 +
309 +
    parts <- lapply(parts, function(y) {
310 +
      if (tolower(y) %in% tolower(string_names)) {
311 +
        index2 <- match(tolower(y), tolower(string_names))
312 +
        string_values[index2]
313 +
      } else {
314 +
        y
315 +
      }
316 +
    })
317 +
318 +
    # Flatten and clean
319 +
    newval <- paste(trimws(parts, "both", "\""), collapse = "")
320 +
321 +
    return(trimws(newval))
322 +
  } else {
323 +
    return(x)
324 +
  }
325 +
}
326 +
327 +
328 +
#' Error catching
329 +
#'
330 +
#' Evaluates if braces \{ are balanced or not and returns an error instead.
331 +
#'
332 +
#' @param x The string or entry to be evaluated
333 +
#' @param line The number of line of the string/entry evaluated. This is used
334 +
#'   for generating an informative error only.
335 +
#'
336 +
#' @examples
337 +
#' if (FALSE) {
338 +
#'   # Dont run, throws an error
339 +
#'   error <- paste0(
340 +
#'     "@misc{murdoch:2010, author = {Duncan Murdoch}",
341 +
#'     "title = {I {am} unbalanced",
342 +
#'     "year = 2010",
343 +
#'     "url = {http://developer.r-project.org/parseRd.pdf}}"
344 +
#'   )
345 +
#'
346 +
#'   check_balanced_braces(error, 3)
347 +
#' }
348 +
#'
349 +
#'
350 +
#' ok <- paste0(
351 +
#'   "@misc{murdoch:2010, author = {Duncan Murdoch}",
352 +
#'   "title = {I {am} balanced}",
353 +
#'   "year = 2010",
354 +
#'   "url = {http://developer.r-project.org/parseRd.pdf}}"
355 +
#' )
356 +
#'
357 +
#' check_balanced_braces(ok, 3)
358 +
#' @noRd
359 +
check_balanced_braces <- function(x, line) {
360 +
  # Check if braces are balanced ----
361 +
  # Split the line into chars, extract the position of
362 +
  # braces (left and right) and manipulate
363 +
364 +
  to_chars <- unlist(strsplit(x, ""))
365 +
366 +
  # Create table with brace map
367 +
368 +
  pos_leftbrace <- grep("\\{", to_chars)
369 +
  pos_rightbrace <- grep("\\}", to_chars)
370 +
371 +
  map_braces <- data.frame(
372 +
    brace = c(
373 +
      rep("{", length(pos_leftbrace)),
374 +
      rep("}", length(pos_rightbrace))
375 +
    ),
376 +
    pos = c(pos_leftbrace, pos_rightbrace)
377 +
  )
378 +
379 +
  map_braces <- map_braces[order(map_braces$pos), ]
380 +
381 +
  # Asign index to each brace: left = 1 and right = -1
382 +
  # Then cumsum. This reveals the structure: e.g:
383 +
  # @aa{x, a=1, b={s{S}}, f="x{x}x"}
384 +
  # This is translated as
385 +
  # {  { {   }  }   {   } }
386 +
  # 1  1 1   -1 -1  1  -1 -1
387 +
  # 1  2 3   2   1   2  1  0
388 +
389 +
  map_braces$flag <- ifelse(map_braces$brace == "{", 1, -1)
390 +
  map_braces$cum_flag <- cumsum(map_braces$flag)
391 +
392 +
  # If there is no 0 the braces are unbalanced: Throw an error
393 +
  if (!(0 %in% map_braces$cum_flag)) {
394 +
    message(
395 +
      "Error: Unbalanced braces on entry (line ",
396 +
      line,
397 +
      "). Invalid .bib file"
398 +
    )
399 +
400 +
    # Trigger an error for tryCatch(es)
401 +
    stop()
402 +
  }
403 +
404 +
  return(map_braces)
405 +
}
Files Coverage
R 91.07%
Project Totals (3 files) 91.07%

No yaml found.

Create your codecov.yml to customize your Codecov experience

Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading