slu-openGIS / censusxy
Showing 4 of 86 files from the diff.
Other files ignored by Codecov
LICENSE has changed.
docs/link.svg has changed.
NAMESPACE has changed.
docs/sitemap.xml has changed.
R/stl_homicides.R has changed.
docs/index.html has changed.
docs/pkgdown.css has changed.
docs/pkgdown.yml has changed.
index.md is new.
.Rbuildignore has changed.
R/cxy_replace.R was deleted.
index.Rmd has changed.
docs/logo.png has changed.
docs/docsearch.js has changed.
docs/favicon.ico has changed.
docs/404.html has changed.
docs/authors.html has changed.
docs/pkgdown.js has changed.
R/cxy_geocoder.R was deleted.
README.md has changed.
R/cxy_geocode.R was deleted.
NEWS.md has changed.
R/cxy_id.R was deleted.
R/cxy_prep.R was deleted.
R/cxy_split.R was deleted.
README.Rmd has changed.
DESCRIPTION has changed.

@@ -0,0 +1,175 @@
Loading
1 +
#' Geocode Single Structured Address
2 +
#'
3 +
#' @description Provides access to the structured single address geocoding API from the US Census Bureau.
4 +
#'
5 +
#' @param street String containing street address
6 +
#' @param city Optional String containing city
7 +
#' @param state Optional String containing state
8 +
#' @param zip Optional String or Integer containing 5-digit Zip Code
9 +
#' @param return One of 'locations' or 'geographies' See Details.
10 +
#' @param benchmark Optional ID or Name of Census Benchmark. See Details.
11 +
#' @param vintage Optional ID or Name of Census Vintage. See Details.
12 +
#'
13 +
#' @return A data.frame containing matched address or NULL if not matches
14 +
#'
15 +
#' @importFrom httr GET content
16 +
#'
17 +
#' @export
18 +
cxy_single <- function(street, city = NULL, state = NULL, zip = NULL, return = 'locations', benchmark = 'Public_AR_Current', vintage = NULL){
19 +
20 +
  # Check Specification of Arguments
21 +
  if(missing(street)){
22 +
    stop('`street` is a required argument')
23 +
  }
24 +
25 +
  if(!return %in% c('locations', 'geographies')){
26 +
    stop("`return` must be one of 'locations' or 'geographies'")
27 +
  }
28 +
29 +
  if(return == 'locations' & !is.null(vintage)){
30 +
    warning("Vintage ignored for return = 'locations'")
31 +
  }
32 +
33 +
  if(return == 'geographies' & is.null(vintage)){
34 +
    stop("`vintage` must be specified for return = 'geographies'")
35 +
  }
36 +
37 +
  # Warn for Omission
38 +
  if(is.null(city) | is.null(state) | is.null(zip)){
39 +
    warning('Omission of `city`, `state` or `zip` greatly reduces the speed and accuracy of the geocoder.')
40 +
  }
41 +
42 +
  url <- paste0('https://geocoding.geo.census.gov/geocoder/',return,'/address')
43 +
  req <-
44 +
    httr::GET(url,
45 +
              query = list(
46 +
                benchmark = benchmark,
47 +
                vintage = vintage,
48 +
                street = street,
49 +
                city = city,
50 +
                state = state,
51 +
                zip = zip,
52 +
                format = 'json'
53 +
              )
54 +
    )
55 +
  cnt <- httr::content(req)
56 +
57 +
  # Check for API Errors
58 +
  if(!is.null(cnt$errors)){
59 +
    stop(cnt$errors[[1]])
60 +
  }
61 +
62 +
  matches <- cnt$result$addressMatches
63 +
  if(length(matches) < 1){
64 +
    return(NULL)
65 +
  }
66 +
  df <- data.frame(matches)
67 +
  return(df)
68 +
}
69 +
70 +
#' Single Unstructured Address
71 +
#'
72 +
#' @description Provides access to the oneline single address geocoding API from the US Census Bureau.
73 +
#'
74 +
#' @param address String containing a single line address
75 +
#' @param return One of 'locations' or 'geographies' See Details.
76 +
#' @param benchmark Optional ID or Name of Census Benchmark. See Details.
77 +
#' @param vintage Optional ID or Name of Census Vintage. See Details.
78 +
#'
79 +
#' @return A data.frame containing matched address or NULL if not matches
80 +
#'
81 +
#' @importFrom httr GET content
82 +
#'
83 +
#'
84 +
#' @export
85 +
cxy_oneline <- function(address, return = 'locations', benchmark = 'Public_AR_Current', vintage = NULL){
86 +
87 +
  # Check Specification of Arguments
88 +
  if(missing(address)){
89 +
    stop('`address` is a required argument')
90 +
  }
91 +
92 +
  if(!return %in% c('locations', 'geographies')){
93 +
    stop("`return` must be one of 'locations' or 'geographies'")
94 +
  }
95 +
96 +
  if(return == 'locations' & !is.null(vintage)){
97 +
    warning("Vintage ignored for return = 'locations'")
98 +
  }
99 +
100 +
  if(return == 'geographies' & is.null(vintage)){
101 +
    stop("`vintage` must be specified for return = 'geographies'")
102 +
  }
103 +
104 +
  url <- paste0('https://geocoding.geo.census.gov/geocoder/',return,'/onelineaddress')
105 +
  req <-
106 +
    httr::GET(url,
107 +
              query = list(
108 +
                benchmark = benchmark,
109 +
                vintage = vintage,
110 +
                address = address,
111 +
                format = 'json'
112 +
              )
113 +
    )
114 +
  cnt <- httr::content(req)
115 +
116 +
  # Check for API Errors
117 +
  if(!is.null(cnt$errors)){
118 +
    stop(cnt$errors[[1]])
119 +
  }
120 +
121 +
  matches <- cnt$result$addressMatches
122 +
  if(length(matches) < 1){
123 +
    return(NULL)
124 +
  }
125 +
  df <- data.frame(matches)
126 +
  return(df)
127 +
}
128 +
129 +
130 +
#' Single GeoLookup
131 +
#'
132 +
#' @description Provides access to the GeoLookup API of the US Census Bureau. Returns census geographies for a single geographic point.
133 +
#'
134 +
#' @param lon Numeric or String Containing Longitude (x) of Point
135 +
#' @param lat Numeric or String Containing Latitude (y) of Point
136 +
#' @param benchmark Optional ID or Name of Census Benchmark. See Details.
137 +
#' @param vintage Optional ID or Name of Census Vintage. See Details.
138 +
#'
139 +
#' @return A data.frame containing matched address or NULL if not matches
140 +
#'
141 +
#' @details This function can be used to locate geographic information given a geographic point. It does not provide an address like a reverse-geocoder
142 +
#'
143 +
#' @importFrom httr GET content
144 +
#'
145 +
#' @export
146 +
cxy_geography <- function(lon, lat, benchmark = 'Public_AR_Current', vintage = 'Current_Current'){
147 +
  url <- 'https://geocoding.geo.census.gov/geocoder/geographies/coordinates'
148 +
  req <-
149 +
    httr::GET(url,
150 +
              query = list(
151 +
                benchmark = benchmark,
152 +
                vintage = vintage,
153 +
                x = lon,
154 +
                y = lat,
155 +
                format = 'json'
156 +
              )
157 +
    )
158 +
  cnt <- httr::content(req)
159 +
160 +
  # Check for API Errors
161 +
  if(!is.null(cnt$errors)){
162 +
    stop(cnt$errors[[1]])
163 +
  }
164 +
165 +
  # Check for Matches
166 +
  matches <- cnt$result$geographies
167 +
  if(length(matches) < 1){
168 +
    return(NULL)
169 +
  }
170 +
171 +
  df <- data.frame(matches)
172 +
173 +
  return(df)
174 +
}
175 +

@@ -0,0 +1,47 @@
Loading
1 +
# Interal Function for Geocoding with the Batch Endpoint
2 +
3 +
batch_geocoder <- function(df, return, timeout, benchmark, vintage){
4 +
  # Write a Temporary CSV
5 +
  tmp <- tempfile(fileext = '.csv')
6 +
  utils::write.table(df, tmp, col.names = FALSE, row.names = FALSE,
7 +
                     na = '', sep = ',')
8 +
9 +
  url <- paste0('https://geocoding.geo.census.gov/geocoder/',return,'/addressbatch')
10 +
  req <-
11 +
    httr::POST(url,
12 +
               body = list(
13 +
                 addressFile = httr::upload_file(tmp),
14 +
                 benchmark = benchmark,
15 +
                 vintage = vintage,
16 +
                 format = 'json'
17 +
               ),
18 +
               encode = 'multipart',
19 +
               httr::timeout(timeout * 60)
20 +
    )
21 +
  cnt <- httr::content(req, as = 'text', encoding = 'UTF-8')
22 +
23 +
  # Error if Benchmark/Vintage Invalid
24 +
  # Not Perfect, Error is Returned as HTML, could be other errors... (BAD API DESIGN!)
25 +
  # Could Cause Large Batches to FAIL if API Fails Unexpectedly
26 +
  if(grepl('<p>', cnt)){
27 +
    stop('API Failed Unexpectedly, Did you supply an Invalid Benchmark or Vintage?')
28 +
  }
29 +
30 +
  cols <- switch (return,
31 +
                  'locations' = c('id', 'address', 'status', 'quality', 'matched_address', 'coords', 'tiger_line_id', 'tiger_side'),
32 +
                  'geographies' = c('id', 'address', 'status', 'quality', 'matched_address', 'coords', 'tiger_line_id', 'tiger_side', 'state_id', 'county_id', 'tract_id', 'block_id')
33 +
  )
34 +
  df <- utils::read.csv(text = cnt, header = FALSE,
35 +
                        col.names = cols,
36 +
                        fill = TRUE, stringsAsFactors = FALSE,
37 +
                        na.strings = '')
38 +
39 +
  # Split Lon/Lat
40 +
  df$coords <- as.character(df$coords)
41 +
  lonlat <- strsplit(df$coords, split = ',')
42 +
43 +
  df$lon <- vapply(lonlat,function(x){x[1]}, 'numeric')
44 +
  df$lat <- vapply(lonlat,function(x){x[2]}, 'numeric')
45 +
  
46 +
  return(df)
47 +
}

@@ -0,0 +1,49 @@
Loading
1 +
#' Get Current Valid Benchmarks
2 +
#'
3 +
#' @usage cxy_benchmarks()
4 +
#'
5 +
#' @return A data.frame containing valid Census Benchmarks
6 +
#'
7 +
#' @importFrom httr GET content
8 +
#'
9 +
#' @examples cxy_benchmarks()
10 +
#'
11 +
#' @export
12 +
cxy_benchmarks <- function(){
13 +
  req <- httr::GET('https://geocoding.geo.census.gov/geocoder/benchmarks')
14 +
  cnt <- httr::content(req)
15 +
  df <- do.call(rbind.data.frame, cnt$benchmarks)
16 +
  return(df)
17 +
}
18 +
19 +
#' Get Current Valid Vintages
20 +
#'
21 +
#' @usage cxy_vintages(benchmark)
22 +
#'
23 +
#' @param benchmark Name or ID of Census Benchmark
24 +
#'
25 +
#' @return A data.frame containing valid Census Vintages for a given benchmark
26 +
#'
27 +
#' @importFrom httr GET content
28 +
#'
29 +
#' @examples cxy_vintages('Public_AR_Census2010')
30 +
#'
31 +
#' @export
32 +
cxy_vintages <- function(benchmark){
33 +
  if(missing(benchmark)){
34 +
    stop('`benchmark` is a required argument')
35 +
  }
36 +
37 +
  req <-
38 +
    httr::GET('https://geocoding.geo.census.gov/geocoder/vintages',
39 +
              query = list(
40 +
                benchmark = benchmark
41 +
              )
42 +
    )
43 +
  cnt <- httr::content(req)
44 +
  if(length(cnt) < 1){
45 +
    stop('Not a Valid Benchmark')
46 +
  }
47 +
  df <- do.call(rbind.data.frame, cnt$vintages)
48 +
  return(df)
49 +
}

@@ -0,0 +1,214 @@
Loading
1 +
#' Geocode a data.frame of many rows
2 +
#'
3 +
#' @description
4 +
#' Provides access to the US Census Bureau batch endpoints for locations and geographies.
5 +
#' The function implements iteration and optional parallelization in order to geocode datasets larger than the API limit of 10,000 and more efficiently than sending 10,000 per request.
6 +
#'  It also supports multiple outputs, including SF class objects.
7 +
#'
8 +
#'
9 +
#' @param .data data.frame containing columns with structured address data
10 +
#' @param id Optional String - Name of column containing unique ID
11 +
#' @param street String - Name of column containing street address
12 +
#' @param city Optional String - Name of column containing city
13 +
#' @param state Optional String - Name of column containing state
14 +
#' @param zip Optional String - Name of column containing zip code
15 +
#' @param return One of 'locations' or 'geographies' denoting returned information from the API
16 +
#' @param benchmark Optional Census Benchmark to geocode against. See Details.
17 +
#' @param vintage Optional Census Vintage to geocode against. See Details.
18 +
#' @param timeout Numeric, in minutes, how long until request times out
19 +
#' @param parallel Integer, number of cores greater than one if parallel requests are desired. See Details.
20 +
#' @param class One of 'dataframe' or 'sf' denoting the output class. 'sf' will only return matched addresses.
21 +
#' @param output One of 'simple' or 'full' denoting the returned columns. Simple returns just coordinates.
22 +
#'
23 +
#' @return A data.frame or sf object containing geocoded results
24 +
#'
25 +
#' @details
26 +
#' Parallel requests are not currently supported on Windows.
27 +
#' You may not specify more cores than the system reports are available
28 +
#' If you do, the maximum number of available cores will be used.
29 +
#'
30 +
#' If you want to append census geographies, you must specify a valid vintage for your benchmark. You may use the \code{cxy_vintages()} function to obtain valid Vintages.
31 +
#'  See \code{vignette('censusxy')} for a full walkthrough.
32 +
#'
33 +
#' @importFrom httr POST upload_file timeout content
34 +
#' @importFrom utils write.table read.csv
35 +
#'
36 +
#' @export
37 +
cxy_geocode <- function(.data, id = NULL, street, city = NULL, state = NULL, zip = NULL, return = 'locations', benchmark = 'Public_AR_Current', vintage = NULL, timeout = 30, parallel = 1, class = 'dataframe', output = 'simple'){
38 +
39 +
  # Check Specification of Arguments
40 +
  if(missing(.data) | missing(street)){
41 +
    stop('`.data` and `street` are required arguments')
42 +
  }
43 +
  if(!is.null(id) && any(duplicated(.data[[id]]))){
44 +
    stop('Rows in the `id` column are not unique')
45 +
  }
46 +
  if(!return %in% c('locations', 'geographies')){
47 +
    stop("`return` must be one of 'locations' or 'geographies'")
48 +
  }
49 +
  if(return == 'locations' & !is.null(vintage)){
50 +
    warning("Vintage ignored for return = 'locations'")
51 +
  }
52 +
  if(return == 'geographies' & is.null(vintage)){
53 +
    stop("`vintage` must be specified for return = 'geographies'")
54 +
  }
55 +
  if(!class %in% c('dataframe', 'sf')){
56 +
    stop("`class` must be one of 'dataframe' or 'sf'")
57 +
  }
58 +
  if(class == 'sf' & !requireNamespace('sf')){
59 +
    stop('Please install the `sf` package to use the sf output feature')
60 +
  }
61 +
  if(!output %in% c('simple', 'full')){
62 +
    stop("`output` must be one of 'simple' or 'full'")
63 +
  }
64 +
65 +
  # Warn for Omission
66 +
  if(is.null(city) | is.null(state) | is.null(zip)){
67 +
    warning('Omission of `city`, `state` or `zip` greatly reduces the speed and accuracy of the geocoder.')
68 +
  }
69 +
70 +
  # Check Parallel Configuration
71 +
  if(parallel > 1){
72 +
    # Check OS
73 +
    if(.Platform$OS.type != 'unix'){
74 +
      stop('Parallelization is only available on Unix Platforms')
75 +
    }
76 +
    # Check if Available
77 +
    if(!requireNamespace('parallel')){
78 +
      stop('Please install the `parallel` package to use parallel functionality')
79 +
    }
80 +
    # Check Number of Cores
81 +
    avail_cores <- parallel::detectCores()
82 +
    if(parallel > avail_cores){
83 +
      warning('More cores specified than are available, using ', avail_cores, ' cores instead')
84 +
      core_count <- avail_cores
85 +
    }else{
86 +
      core_count <- parallel
87 +
    }
88 +
  }
89 +
90 +
  # Handle NA Arguments
91 +
  n <- nrow(.data)
92 +
93 +
  if(!is.null(id)){
94 +
    if(!id %in% names(.data)){
95 +
      stop(id, ' is not a defined column name in the data.frame')
96 +
    }
97 +
    # Need to Sort User Data for Later Column Binding
98 +
    .data <- .data[order(.data[[id]]),]
99 +
    id <- .data[[id]]
100 +
  }else{
101 +
    id <- seq(n)
102 +
  }
103 +
104 +
  if(!street %in% names(.data)){
105 +
    stop(street, ' is not a defined column name in the data.frame')
106 +
  }
107 +
108 +
  if(!is.null(city)){
109 +
    if(!city %in% names(.data)){
110 +
      stop(city, ' is not a defined column name in the data.frame')
111 +
    }
112 +
    city <- .data[[city]]
113 +
  }else{
114 +
    city <- rep_len(NA, n)
115 +
  }
116 +
117 +
  if(!is.null(state)){
118 +
    if(!state %in% names(.data)){
119 +
      stop(state, ' is not a defined column name in the data.frame')
120 +
    }
121 +
    state <- .data[[state]]
122 +
  }else{
123 +
    state <- rep_len(NA, n)
124 +
  }
125 +
126 +
  if(!is.null(zip)){
127 +
    if(!zip %in% names(.data)){
128 +
      stop(zip, ' is not a defined column name in the data.frame')
129 +
    }
130 +
    zip <- .data[[zip]]
131 +
  }else{
132 +
    zip <- rep_len(NA, n)
133 +
  }
134 +
135 +
  # Build a Data.frame
136 +
  df <- data.frame(
137 +
    id = id,
138 +
    street = .data[[street]],
139 +
    city = city,
140 +
    state = state,
141 +
    zip = zip,
142 +
    stringsAsFactors = FALSE
143 +
  )
144 +
145 +
  # Extract unique addresses
146 +
  uniq <- df[which(!duplicated(paste(df$street, df$city, df$state, df$zip))),]
147 +
148 +
149 +
  if(parallel > 1){
150 +
    # Split by Core Count, Maximizing the Size of Batches
151 +
    # Calculate Split Factor (Halve Batch Sizes until appropriately under threshold)
152 +
    splt_fac <- core_count
153 +
    while (nrow(uniq) / splt_fac > 1000) {
154 +
      splt_fac <- 2 * splt_fac
155 +
    }
156 +
157 +
    batches <- split(uniq, rep_len(seq(splt_fac), nrow(uniq)) )
158 +
159 +
    results <- parallel::mclapply(batches, batch_geocoder,
160 +
                                  return, timeout, benchmark, vintage,
161 +
                                  mc.cores = core_count)
162 +
163 +
  }else{ # Non Parallel
164 +
    # Split and Iterate
165 +
    batches <- split(uniq, (seq(nrow(uniq))-1) %/% 1000 )
166 +
    results <- lapply(batches, batch_geocoder,
167 +
                      return, timeout, benchmark, vintage)
168 +
169 +
  }
170 +
171 +
  # Row Bind the List of Responses
172 +
  api_output <- do.call(rbind, results)
173 +
174 +
  # Join Results with unique
175 +
  uniq_join <- merge(uniq, api_output, by = 'id' , all.x = TRUE, sort = TRUE)
176 +
177 +
  # Join Uniq with original df and sort
178 +
  all_join <- merge(df, uniq_join, by = c('street', 'city', 'state', 'zip'), all.x = TRUE)
179 +
  all_join <- all_join[order(all_join$id.x),]
180 +
181 +
  # Add cxy_ prefix to names
182 +
  names(all_join) <- paste0('cxy_', names(all_join))
183 +
  # Coerce to Numeric Cooridates
184 +
  all_join$cxy_lat <- as.numeric(all_join$cxy_lat)
185 +
  all_join$cxy_lon <- as.numeric(all_join$cxy_lon)
186 +
187 +
188 +
  # Output Type
189 +
  if(output == 'simple'){
190 +
    if(return == 'geographies'){
191 +
      return_df <- cbind(.data, all_join[,c('cxy_lon', 'cxy_lat', 'cxy_state_id', 'cxy_county_id', 'cxy_tract_id', 'cxy_block_id')])
192 +
    }else{
193 +
      return_df <- cbind(.data, all_join[,c('cxy_lon', 'cxy_lat')])
194 +
    }
195 +
  }
196 +
  else if(output == 'full'){
197 +
    if(return == 'geographies'){
198 +
      return_df <- cbind(.data, all_join[,c('cxy_address', 'cxy_status', 'cxy_quality', 'cxy_matched_address', 'cxy_tiger_line_id', 'cxy_tiger_side', 'cxy_lon', 'cxy_lat', 'cxy_state_id', 'cxy_county_id', 'cxy_tract_id', 'cxy_block_id')])
199 +
    }else{
200 +
      return_df <- cbind(.data, all_join[,c('cxy_address', 'cxy_status', 'cxy_quality', 'cxy_matched_address', 'cxy_tiger_line_id', 'cxy_tiger_side', 'cxy_lon', 'cxy_lat')])
201 +
    }
202 +
  }
203 +
204 +
  # Optionally, Return an SF Object
205 +
  if(class == 'sf'){
206 +
    valid <- return_df[which(!is.na(return_df$cxy_lat)),]
207 +
    sf <- sf::st_as_sf(valid, coords = c('cxy_lat', 'cxy_lon'), crs = 4269) # NAD83
208 +
    # Message Number of Rows Removed
209 +
    message(nrow(return_df) - nrow(valid), ' rows removed to create an sf object. These were addresses that the geocoder could not match.')
210 +
    return(sf)
211 +
  }
212 +
213 +
  return(return_df)
214 +
}
Files Coverage
R 86.83%
Project Totals (4 files) 86.83%
Notifications are pending CI completion. Periodically Codecov will check the CI state, when complete notifications will be submitted. Push notifications now.
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading