1
###########################################################################/**
2
# @RdocFunction .readText
3
#
4
# @title "Reads the content of a local or an online text file"
5
#
6
# \description{
7
#  @get "title".
8
# }
9
#
10
# @synopsis
11
#
12
# \arguments{
13
#   \item{con}{A @character string specifying a local file or a URL,
14
#      or a @connection.}
15
#   \item{...}{Not used.}
16
#   \item{maxAge}{A @numeric scalar specifying the maximum age (in seconds)
17
#      of cached URL contents before downloading and recaching.
18
#      If zero or less, the URL will always be downloaded.}
19
# }
20
#
21
# \value{
22
#   Returns a @character string.
23
# }
24
#
25
# \section{Caching URL}{
26
#   When reading online URLs, it may take time a significant time to
27
#   read its content.  If the content is changing rarely, it is possible
28
#   to cache the content locally.  If a cached version is found, then it
29
#   is read instead.
30
#   It is possible to control how often a file should be recached.  If the
31
#   cache is older than argument \code{maxAge} (in seconds), then the file
32
#   is redownloaded and recached.
33
# }
34
#
35
# \section{Newline substitution}{
36
#   All occurrences of \code{\\r\\n} and \code{\\r} are replaced with
37
#   \code{\\n} such that all lines are ending in \code{\\n} regardless
38
#   of encoding.
39
# }
40
#
41
# @author
42
#
43
# @keyword file
44
# @keyword IO
45
# @keyword internal
46
#*/###########################################################################
47
.readText <- function(con, ..., maxAge=getOption("R.rsp::downloadIfOlderThan", -Inf)) {
48 1
  if (is.character(con)) {
49 1
    file <- con
50

51
    # Is the file local and an URL?
52 1
    isUrl <- isUrl(file)
53

54

55
    # (a) If URL, download to temporary directory
56 1
    if (isUrl) {
57 0
      url <- file
58 0
      path <- tempdir()
59 0
      filename <- getChecksum(url)
60 0
      pathname <- file.path(path, filename)
61

62
      # By default, download URL
63 0
      download <- TRUE
64

65
      # Unless...
66 0
      if (isFile(pathname)) {
67
        # Age (in seconds) when downloaded file is considered too old
68 0
        maxAge <- as.double(maxAge)
69 0
        if (is.na(maxAge)) maxAge <- -Inf
70 0
        maxAge <- Arguments$getDouble(maxAge)
71
        # Time when file was downloaded
72 0
        mtime <- file.info(pathname)$mtime
73
        # Age of downloaded file in seconds
74 0
        dtime <- Sys.time() - mtime
75 0
        units(dtime) <- "secs"
76 0
        download <- isTRUE(dtime > maxAge)
77
      }
78

79 0
      if (download) {
80 0
        withoutGString({
81 0
          pathname <- downloadFile(url, filename=pathname, skip=FALSE)
82
        })
83
      }
84

85 0
      if (isFile(pathname)) file <- pathname
86
    } # if (isUrl)
87

88

89
    # (b) Try to open file connection
90 1
    con <- tryCatch({
91 1
      suppressWarnings({
92 1
        file(file, open="rb")
93
      })
94 1
    }, error = function(ex) {
95
      # (b) If failed, try to download file first
96 0
      if (regexpr("^https://", file, ignore.case=TRUE) == -1L) {
97 0
        throw(ex)
98
      }
99 0
      url <- file
100 0
      withoutGString({
101 0
        pathname <- downloadFile(url, path=tempdir())
102
      })
103 0
      file(pathname, open="rb")
104
    })
105 1
    on.exit(close(con))
106
  }
107

108
  # Sanity check
109 1
  stop_if_not(inherits(con, "connection"))
110

111

112 1
  bfr <- NULL
113 1
  while (TRUE) {
114 1
    bfrT <- readChar(con, nchars=1e6)
115 1
    if (length(bfrT) == 0L) break
116 1
    bfrT <- gsub("\r\n", "\n", bfrT, fixed=TRUE)
117 1
    bfrT <- gsub("\r", "\n", bfrT, fixed=TRUE)
118 1
    bfr <- c(bfr, bfrT)
119
  }
120 1
  bfr <- paste(bfr, collapse="")
121
  
122 1
  if (FALSE) {
123 0
    bfr <- strsplit(bfr, split="\n", fixed=TRUE)
124 0
    bfr <- unlist(bfr, use.names=FALSE)
125
  }
126

127
  ## Sanity check
128 1
  stop_if_not(is.character(bfr), length(bfr) == 1L)
129

130 1
  bfr
131
} # .readText()

Read our documentation on viewing source code .

Loading