r-lib / xml2
1
#' Find nodes that match an xpath expression.
2
#'
3
#' Xpath is like regular expressions for trees - it's worth learning if
4
#' you're trying to extract nodes from arbitrary locations in a document.
5
#' Use `xml_find_all` to find all matches - if there's no match you'll
6
#' get an empty result. Use `xml_find_first` to find a specific match -
7
#' if there's no match you'll get an `xml_missing` node.
8
#'
9
#' @section Deprecated functions:
10
#' `xml_find_one()` has been deprecated. Instead use
11
#' `xml_find_first()`.
12

13
#' @param xpath A string containing an xpath (1.0) expression.
14
#' @inheritParams xml_name
15
#' @param ... Further arguments passed to or from other methods.
16
#' @return `xml_find_all` returns a nodeset if applied to a node, and a nodeset
17
#'   or a list of nodesets if applied to a nodeset. If there are no matches,
18
#'   the nodeset(s) will be empty. Within each nodeset, the result will always
19
#'   be unique; repeated nodes are automatically de-duplicated.
20
#'
21
#'   `xml_find_first` returns a node if applied to a node, and a nodeset
22
#'   if applied to a nodeset. The output is *always* the same size as
23
#'   the input. If there are no matches, `xml_find_first` will return a
24
#'   missing node; if there are multiple matches, it will return the first
25
#'   only.
26
#'
27
#'   `xml_find_num`, `xml_find_chr`, `xml_find_lgl` return
28
#'   numeric, character and logical results respectively.
29
#' @export
30
#' @seealso [xml_ns_strip()] to remove the default namespaces
31
#' @examples
32
#' x <- read_xml("<foo><bar><baz/></bar><baz/></foo>")
33
#' xml_find_all(x, ".//baz")
34
#' xml_path(xml_find_all(x, ".//baz"))
35
#'
36
#' # Note the difference between .// and //
37
#' # //  finds anywhere in the document (ignoring the current node)
38
#' # .// finds anywhere beneath the current node
39
#' (bar <- xml_find_all(x, ".//bar"))
40
#' xml_find_all(bar, ".//baz")
41
#' xml_find_all(bar, "//baz")
42
#'
43
#' # Find all vs find one -----------------------------------------------------
44
#' x <- read_xml("<body>
45
#'   <p>Some <b>text</b>.</p>
46
#'   <p>Some <b>other</b> <b>text</b>.</p>
47
#'   <p>No bold here!</p>
48
#' </body>")
49
#' para <- xml_find_all(x, ".//p")
50
#'
51
#' # By default, if you apply xml_find_all to a nodeset, it finds all matches,
52
#' # de-duplicates them, and returns as a single nodeset. This means you
53
#' # never know how many results you'll get
54
#' xml_find_all(para, ".//b")
55
#'
56
#' # If you set flatten to FALSE, though, xml_find_all will return a list of
57
#' # nodesets, where each nodeset contains the matches for the corresponding
58
#' # node in the original nodeset.
59
#' xml_find_all(para, ".//b", flatten = FALSE)
60
#'
61
#' # xml_find_first only returns the first match per input node. If there are 0
62
#' # matches it will return a missing node
63
#' xml_find_first(para, ".//b")
64
#' xml_text(xml_find_first(para, ".//b"))
65
#'
66
#' # Namespaces ---------------------------------------------------------------
67
#' # If the document uses namespaces, you'll need use xml_ns to form
68
#' # a unique mapping between full namespace url and a short prefix
69
#' x <- read_xml('
70
#'  <root xmlns:f = "http://foo.com" xmlns:g = "http://bar.com">
71
#'    <f:doc><g:baz /></f:doc>
72
#'    <f:doc><g:baz /></f:doc>
73
#'  </root>
74
#' ')
75
#' xml_find_all(x, ".//f:doc")
76
#' xml_find_all(x, ".//f:doc", xml_ns(x))
77
xml_find_all <- function(x, xpath, ns = xml_ns(x), ...) {
78 1
  UseMethod("xml_find_all")
79
}
80

81
#' @export
82
xml_find_all.xml_missing <- function(x, xpath, ns = xml_ns(x), ...) {
83 1
  xml_nodeset()
84
}
85

86
#' @export
87
xml_find_all.xml_node <- function(x, xpath, ns = xml_ns(x), ...) {
88 1
  nodes <- .Call(xpath_search, x$node, x$doc, xpath, ns, Inf)
89 1
  xml_nodeset(nodes)
90
}
91

92
#' @param flatten A logical indicating whether to return a single, flattened
93
#'   nodeset or a list of nodesets.
94
#' @export
95
#' @rdname xml_find_all
96
xml_find_all.xml_nodeset <- function(x, xpath, ns = xml_ns(x), flatten = TRUE, ...) {
97 1
  if (length(x) == 0)
98 1
    return(xml_nodeset())
99

100 1
  res <- lapply(x, function(x) .Call(xpath_search, x$node, x$doc, xpath, ns, Inf))
101

102 1
  if (isTRUE(flatten)) {
103 1
    return(xml_nodeset(unlist(recursive = FALSE, res)))
104
  }
105

106 1
  res[] <- lapply(res, xml_nodeset)
107 1
  res
108
}
109

110
#' @export
111
#' @rdname xml_find_all
112
xml_find_first <- function(x, xpath, ns = xml_ns(x)) {
113 1
  UseMethod("xml_find_first")
114
}
115

116
xml_find_first.xml_missing <- function(x, xpath, ns = xml_ns(x)) {
117 1
  xml_missing()
118
}
119

120
#' @export
121
xml_find_first.xml_node <- function(x, xpath, ns = xml_ns(x)) {
122 1
  res <- .Call(xpath_search, x$node, x$doc, xpath, ns, 1)
123 1
  if (length(res) == 1) {
124 1
     res[[1]]
125
  } else {
126 1
    res
127
  }
128
}
129

130
#' @export
131
xml_find_first.xml_nodeset <- function(x, xpath, ns = xml_ns(x)) {
132 1
  if (length(x) == 0)
133 1
    return(xml_nodeset())
134

135 1
  xml_nodeset(lapply(x, function(x)
136 1
      xml_find_first(x, xpath = xpath, ns = ns)), deduplicate = FALSE)
137
}
138

139

140
#' @export
141
#' @rdname xml_find_all
142
xml_find_num <- function(x, xpath, ns = xml_ns(x)) {
143 1
  UseMethod("xml_find_num")
144
}
145

146
#' @export
147
xml_find_num.xml_node <- function(x, xpath, ns = xml_ns(x)) {
148 1
  res <- .Call(xpath_search, x$node, x$doc, xpath, ns, Inf)
149 1
  if (!is.numeric(res)) {
150 1
    stop("result of type: ", sQuote(class(res)), ", not numeric", call. = FALSE)
151
  }
152 1
  res
153
}
154

155
#' @export
156
xml_find_num.xml_nodeset <- function(x, xpath, ns = xml_ns(x)) {
157 1
  if (length(x) == 0)
158 1
    return(numeric())
159

160 0
  vapply(x, function(x) xml_find_num(x, xpath = xpath, ns = ns), numeric(1))
161
}
162

163
#' @export
164
xml_find_num.xml_missing <- function(x, xpath, ns = xml_ns(x)) {
165 1
   numeric(0)
166
}
167

168
#' @export
169
#' @rdname xml_find_all
170
xml_find_chr <- function(x, xpath, ns = xml_ns(x)) {
171 1
  UseMethod("xml_find_chr")
172
}
173

174
#' @export
175
xml_find_chr.xml_node <- function(x, xpath, ns = xml_ns(x)) {
176 1
  res <- .Call(xpath_search, x$node, x$doc, xpath, ns, Inf)
177 1
  if (!is.character(res)) {
178 1
    stop("result of type: ", sQuote(class(res)), ", not character", call. = FALSE)
179
  }
180 1
  res
181
}
182

183
#' @export
184
xml_find_chr.xml_nodeset <- function(x, xpath, ns = xml_ns(x)) {
185 1
  if (length(x) == 0)
186 1
    return(character())
187

188 0
  vapply(x, function(x) xml_find_chr(x, xpath = xpath, ns = ns), character(1))
189
}
190

191
#' @export
192
xml_find_chr.xml_missing <- function(x, xpath, ns = xml_ns(x)) {
193 1
   character(0)
194
}
195

196
#' @export
197
#' @rdname xml_find_all
198
xml_find_lgl <- function(x, xpath, ns = xml_ns(x)) {
199 1
  UseMethod("xml_find_lgl")
200
}
201

202
#' @export
203
xml_find_lgl.xml_node <- function(x, xpath, ns = xml_ns(x)) {
204 1
  res <- .Call(xpath_search, x$node, x$doc, xpath, ns, Inf)
205 1
  if (!is.logical(res)) {
206 1
    stop("result of type: ", sQuote(class(res)), ", not logical", call. = FALSE)
207
  }
208 1
  res
209
}
210

211
#' @export
212
xml_find_lgl.xml_nodeset <- function(x, xpath, ns = xml_ns(x)) {
213 1
  if (length(x) == 0)
214 1
    return(logical())
215

216 0
  vapply(x, function(x) xml_find_lgl(x, xpath = xpath, ns = ns), logical(1))
217
}
218

219
#' @export
220
xml_find_lgl.xml_missing <- function(x, xpath, ns = xml_ns(x)) {
221 1
   logical(0)
222
}
223

224
# Deprecated functions ----------------------------------------------------
225

226
#' @rdname xml_find_all
227
#' @usage NULL
228
#' @export
229
xml_find_one <- function(x, xpath, ns = xml_ns(x)) {
230 0
  .Deprecated("xml_find_first")
231 0
  UseMethod("xml_find_first")
232
}

Read our documentation on viewing source code .

Loading