Skip to content

Commit f53ed81

Browse files
authored
Merge pull request #123 from mountainMath/list-region-fix
cran 0.2 update Ready for merge and cran submission Updated version passed checks on * local * travis * rhub
2 parents eef1251 + aa6b022 commit f53ed81

47 files changed

Lines changed: 1561 additions & 418 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.Rbuildignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,6 @@
88
R/hex_sticker.R
99
^doc$
1010
^Meta$
11+
images
12+
images/*
13+
card_fix.sh

DESCRIPTION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: cancensus
22
Type: Package
3-
Title: Canadian Census Data and Geography from the 'CensusMapper' API
4-
Version: 0.1.8
3+
Title: Access, Retrieve, and Work with Canadian Census Data and Geography
4+
Version: 0.2.0
55
Authors@R: c(
66
person("Jens", "von Bergmann", email = "jens@mountainmath.ca", role = c("aut"), comment = "API creator and maintainer"),
77
person("Dmitry", "Shkolnik", email = "shkolnikd@gmail.com", role = c("aut", "cre"), comment = "Package maintainer, responsible for correspondence"),
@@ -24,7 +24,7 @@ Imports: digest (>= 0.1),
2424
httr (>= 1.0.0),
2525
jsonlite (>= 1.0),
2626
rlang
27-
RoxygenNote: 6.1.0
27+
RoxygenNote: 6.1.1
2828
Suggests: knitr,
2929
ggplot2,
3030
leaflet,

NEWS.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
## cancensus 0.2.0
2+
3+
### Major changes
4+
- Added a `max_leaves_option` for the `search_census_vectors` function. Adds functionality to set maximum depth for child census vectors, i.e. `max_level=NA` as an additional parameter. Then e.g. `max_level=1` would only get direct children and no grandchildren.
5+
- Resolved issues with mislabelled CD UID for CSD level data
6+
- Improve reliability of character/numeric alignments for geographic UIDS
7+
- `get_census_geography` is now soft deprecated and rolled into the standard `get_census` with parameters
8+
9+
### Minor changes
10+
- Minor documentation improvements
11+
- Soften dependency on `readr` package
12+
- Allow for search of internal CensusMapper variables
13+
114
## cancensus 0.1.8
215

316
### Major changes

R/cancensus.R

Lines changed: 44 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,6 @@
33
#' This function allows convenient access to Canadian census data and boundary
44
#' files through the CensusMapper API. An API key is required to retrieve data.
55
#'
6-
#' \code{get_census_geometry} is a convenience function
7-
#' that retrieves only Census geography boundaries.
8-
#'
96
#' For help selecting regions and vectors, see \code{\link{list_census_regions}}
107
#' and \code{\link{list_census_vectors}}, or check out the interactive selection
118
#' tool at \url{https://censusmapper.ca/api}.
@@ -71,7 +68,7 @@ get_census <- function (dataset, regions, level=NA, vectors=c(), geo_format = NA
7168
} else if (is.null(names(regions)) || !all(names(regions) %in% VALID_LEVELS)) {
7269
stop("regions must be composed of valid census aggregation levels.")
7370
} else {
74-
regions <- jsonlite::toJSON(regions)
71+
regions <- jsonlite::toJSON(lapply(regions,as.character)) # cast to character in case regions are supplied as numeric/interger
7572
}
7673

7774
# Remind to set cache directory
@@ -105,7 +102,7 @@ get_census <- function (dataset, regions, level=NA, vectors=c(), geo_format = NA
105102
stop("the `geo_format` parameter must be one of 'sf', 'sp', or NA")
106103
}
107104

108-
base_url="https://CensusMapper.ca/api/v1/"
105+
base_url=paste0(cancensus_base_url(),"/api/v1/")
109106
# load data variables
110107
if (length(vectors)>0 || is.na(geo_format)) {
111108
param_string <- paste0("regions=", regions,
@@ -128,25 +125,38 @@ get_census <- function (dataset, regions, level=NA, vectors=c(), geo_format = NA
128125
httr::GET(url)
129126
}
130127
handle_cm_status_code(response, NULL)
131-
na_strings <- c("x", "F", "...", "..")
128+
na_strings <- c("x", "F", "...", "..", "-","N")
129+
130+
as.num = function(x, na.strings = "NA") {
131+
stopifnot(is.character(x))
132+
na = x %in% na.strings
133+
x[na] = 0
134+
x = as.numeric(x)
135+
x[na] = NA_real_
136+
x
137+
}
138+
132139
# Read the data file and transform to proper data types
133140
result <- if (requireNamespace("readr", quietly = TRUE)) {
134141
# Use readr::read_csv if it's available.
135142
httr::content(response, type = "text", encoding = "UTF-8") %>%
136143
readr::read_csv(na = na_strings,
137-
col_types = list(.default = "d", GeoUID = "c",
138-
Type = "c", "Region Name" = "c")) %>%
144+
col_types = list(.default = "c")) %>%
145+
dplyr::mutate_at(c(dplyr::intersect(names(.),c("Population","Households","Dwellings","Area (sq km)")),
146+
names(.)[grepl("v_",names(.))]),
147+
as.num,na.strings=na_strings) %>%
139148
dplyr::mutate(Type = as.factor(.data$Type),
140149
`Region Name` = as.factor(.data$`Region Name`))
141150
} else {
142151
httr::content(response, type = "text", encoding = "UTF-8") %>%
143152
textConnection %>%
144-
utils::read.csv(na = na_strings,
145-
colClasses = c("GeoUID" = "character",
146-
"Type" = "factor",
147-
"Region Name" = "factor"),
148-
stringsAsFactors = FALSE, check.names = FALSE) %>%
149-
dplyr::as_tibble()
153+
utils::read.csv(colClasses = "character", stringsAsFactors = FALSE, check.names = FALSE) %>%
154+
dplyr::as_tibble() %>%
155+
dplyr::mutate_at(c(dplyr::intersect(names(.),c("Population","Households","Dwellings","Area (sq km)")),
156+
names(.)[grepl("v_",names(.))]),
157+
as.num,na.strings=na_strings) %>%
158+
dplyr::mutate(Type = as.factor(.data$Type),
159+
`Region Name` = as.factor(.data$`Region Name`))
150160
}
151161
if (is.na(geo_format)) result <- result %>% transform_geo(level)
152162
attr(result, "last_updated") <- Sys.time()
@@ -231,15 +241,10 @@ get_census <- function (dataset, regions, level=NA, vectors=c(), geo_format = NA
231241

232242
#' @rdname get_census
233243
#' @export
234-
#'
235-
#' @examples
236-
#' \dontrun{
237-
#' # Query the API for census subdivision boundary geometry within Vancouver.
238-
#' vc_csds <- get_census_geometry(dataset='CA16', regions=list(CMA="59933"),
239-
#' level='CSD', geo_format = "sf")
240-
#'}
241-
get_census_geometry <- function (dataset, level, regions, geo_format = "sf", ...) {
242-
return(get_census(dataset, level, regions, vectors=c(), geo_format=geo_format, ...))
244+
#' @keywords internal
245+
get_census_geometry <- function (dataset, regions, level=NA, geo_format = "sf", ...) {
246+
.Deprecated("get_census")
247+
return(get_census(dataset=dataset, regions=regions, level=level, vectors=c(), geo_format=geo_format, ...))
243248
}
244249

245250
# This is the set of valid census aggregation levels, also used in the named
@@ -407,6 +412,8 @@ parent_census_vectors <- function(vector_list){
407412
#' @param vector_list The list of vectors to be used
408413
#' @param leaves_only Boolean flag to indicate if only leaf vectors should be returned,
409414
#' i.e. vectors that don't have children
415+
#' @param max_level optional, maximum depth to look for child vectors. Default is NA will return all
416+
#' child census vectors..
410417
#'
411418
#' @export
412419
#'
@@ -416,16 +423,18 @@ parent_census_vectors <- function(vector_list){
416423
#' list_census_vectors("CA16") %>%
417424
#' filter(vector == "v_CA16_4092") %>%
418425
#' child_census_vectors(TRUE)
419-
child_census_vectors <- function(vector_list, leaves_only=FALSE){
426+
child_census_vectors <- function(vector_list, leaves_only=FALSE,max_level=NA){
420427
base_list <- vector_list
421428
dataset <- attr(base_list,'dataset')
422-
n=0
429+
n <- 0
430+
child_level <- 1
423431
if (!is.null(dataset)) {
424432
vector_list <-
425433
list_census_vectors(dataset, use_cache = TRUE, quiet = TRUE) %>%
426434
dplyr::filter(.data$parent_vector %in% base_list$vector) %>%
427435
dplyr::distinct(vector, .keep_all = TRUE)
428-
while (n!=nrow(vector_list)) {
436+
while (n!=nrow(vector_list) && (is.na(max_level) || child_level<max_level)) {
437+
child_level <- child_level+1
429438
n=nrow(vector_list)
430439
new_list <- list_census_vectors(dataset, use_cache = TRUE, quiet = TRUE) %>%
431440
dplyr::filter(.data$parent_vector %in% vector_list$vector)
@@ -538,14 +547,15 @@ list_census_regions <- function(dataset, use_cache = FALSE, quiet = FALSE) {
538547
handle_cm_status_code(response, NULL)
539548
content <- httr::content(response, type = "text", encoding = "UTF-8")
540549
result <- if (!requireNamespace("readr", quietly = TRUE)) {
541-
dplyr::as_data_frame(utils::read.csv(textConnection(content), stringsAsFactors = FALSE))
550+
dplyr::as_data_frame(utils::read.csv(textConnection(content), colClasses = 'character',stringsAsFactors = FALSE))
542551
} else {
543-
readr::read_csv(content)
552+
readr::read_csv(content,col_types = readr::cols(.default='c'))
544553
}
545554
result <- dplyr::select(result, region = .data$geo_uid, .data$name,
546555
level = .data$type, pop = .data$population,
547556
municipal_status = .data$flag, .data$CMA_UID,
548-
.data$CD_UID, .data$PR_UID)
557+
.data$CD_UID, .data$PR_UID) %>%
558+
dplyr::mutate(pop=as.integer(.data$pop))
549559
attr(result, "last_updated") <- Sys.time()
550560
save(result, file = cache_file)
551561
result
@@ -763,6 +773,10 @@ transform_geo <- function(g, level) {
763773
new=c("GeoUID","Shape Area" ,"Type" ,"Dwellings","Households","Population","Adjusted Population (previous Census)","NHS Non-Return Rate","Quality Flags","Population 2011","Population 2016","Households 2011","Households 2016","Dwellings 2011","Dwellings 2016")
764774
)
765775
#geo uid name changes
776+
if (level=='Regions') {
777+
l=g$t %>% unique()
778+
if (length(l)==1) level=l
779+
}
766780
if (level=='DB') {
767781
name_change <- name_change %>% rbind(
768782
c('rpid','DA_UID'),
@@ -786,7 +800,7 @@ transform_geo <- function(g, level) {
786800
}
787801
if (level=='CSD') {
788802
name_change <- name_change %>% rbind(
789-
c('rpid','CSD_UID'),
803+
c('rpid','CD_UID'),
790804
c('rgid','PR_UID'),
791805
c('ruid','CMA_UID'))
792806
}

R/helpers.R

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
cancensus_base_url <- function(){
2+
url <- getOption("cancensus.base_url")
3+
if (is.null(url)) url <- "https://censusmapper.ca"
4+
url
5+
}

README.md

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,13 @@
77

88
<a href="https://mountainmath.github.io/cancensus/index.html"><img src="https://raw.githubusercontent.com/mountainMath/cancensus/master/images/cancensus-sticker.png" alt="cancensus logo" align="right" width = "25%" height = "25%"/></a>
99

10+
1011
Access, retrieve, and work with Canadian Census data and geography.
1112

1213
* Download data and Census geography in tidy and analysis-ready format
1314
* Convenience tools for searching for and working with Census regions and variable hierarchies
1415
* Provides Census geography in multiple R spatial formats
15-
* Provides data and geography at multiple Census geographic levels including province, Census Metropolitan Area, Census Division, Census Subdividision, Census Tract, and Dissemination Areas
16+
* Provides data and geography at multiple Census geographic levels including province, Census Metropolitan Area, Census Division, Census Subdivision, Census Tract, and Dissemination Areas
1617
* Provides up-to-date data for the 2016, 2011, and 2006 Censuses
1718

1819
### Reference
@@ -40,7 +41,7 @@ To check your API key, just go to "Edit Profile" (in the top-right of the Census
4041

4142
### Local Cache
4243

43-
For performance reasons, and to avoid unneccessarily drawing down API quotas, **cancensus** caches data queries under the hood. By default, **cancensus** caches in R's temporary directory, but this cache is not persistent across sessions. In order to speed up performance, reduce quota usage, and reduce the need for unnecessary network calls, we recommend assigning a persistent local cache using `options(cancensus.cache_path = 'XXX')`, this enables better control over the data. This option can be stored stored in your .Rprofile alongside your API key. Users will be prompted with a suggestion to change their default cache location when making API calls if one has not been set yet.
44+
For performance reasons, and to avoid unnecessarily drawing down API quotas, **cancensus** caches data queries under the hood. By default, **cancensus** caches in R's temporary directory, but this cache is not persistent across sessions. In order to speed up performance, reduce quota usage, and reduce the need for unnecessary network calls, we recommend assigning a persistent local cache using `options(cancensus.cache_path = 'XXX')`, this enables better control over the data. This option can be stored stored in your .Rprofile alongside your API key. Users will be prompted with a suggestion to change their default cache location when making API calls if one has not been set yet.
4445

4546
### Currently available datasets
4647

@@ -98,24 +99,25 @@ We'd love to feature examples of work or projects that use cancensus.
9899

99100
If you wish to cite cancensus:
100101

101-
von Bergmann, J., Aaron Jacobs, Dmitry Shkolnik (2019). cancensus: an R package to
102-
access, retrieve, and work with Canadian Census data and geography. v0.1.8.
102+
von Bergmann, J., Aaron Jacobs, Dmitry Shkolnik (2019). cancensus: R package to
103+
access, retrieve, and work with Canadian Census data and geography. v0.2.0.
104+
103105

104106
A BibTeX entry for LaTeX users is
105107
```
106108
@Manual{,
107109
author = {Jens {von Bergmann} and Dmitry Shkolnik and Aaron Jacobs},
108-
title = {cancensus: An R Package to Access, Retrieve, and Work With Canadian Census Data and Geography},
110+
title = {cancensus: R package to access, retrieve, and work With Canadian Census data and geography},
109111
year = {2019},
110-
note = {R package version 0.1.8},
112+
note = {R package version 0.2.0},
111113
url = {https://mountainmath.github.io/cancensus/},
112114
}
113115
```
114116
### Related packages
115117

116118
The cancensus package is designed for working with Canadian Census data. In addition to Census data, Statistics Canada provides access to a vast [socio-economic data repository](https://www150.statcan.gc.ca/n1/en/type/data) with thousands of data tables available for public access.
117119

118-
The [cansim package](https://mountainmath.github.io/cansim/index.html) is designed to retrieve and work with public Statistics Canada data tables. The cansim prepares retrieved data tables as analysis-ready tidy dataframes and provides a number of convenience tools and functions to make it easier to work with Statistics Canada data.
120+
The [cansim package](https://mountainmath.github.io/cansim/index.html) is designed to retrieve and work with public Statistics Canada data tables. The cansim prepares retrieved data tables as analysis-ready tidy dataframes and provides a number of convenience tools and functions to make it easier to work with Statistics Canada data. It is available on CRAN and on [Github](https://github.com/mountainMath/cancensus).
119121

120122
Data downloaded through the cansim package that comes with standard geographic attributes will typically share a common geographic ID that can be matched to Census data.
121123

card_fix.sh

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
og_img1='<meta property="og:image" content="/logo.png"'
2+
og_img2='<meta property="og:image" content="https://mountainmath.github.io/cancensus/logo.png"'
3+
4+
replace_meta () {
5+
file=$(find . -name '*.html')
6+
for i in $file; do
7+
cat "${i}" | sed "s,${og_img1},${og_img2},g" > "temp"
8+
cat "temp" > "${i}"
9+
done
10+
rm temp
11+
}
12+
13+
cd ./docs
14+
replace_meta
15+
cd articles
16+
replace_meta
17+
cd ..
18+
cd news
19+
replace_meta
20+
cd ..
21+
cd reference
22+
replace_meta
23+
cd ..
24+
cd ..

cran-comments.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
## Update - v.0.2.0
2+
3+
Maintenance update of package with some quality of life improvements. Changes include:
4+
* Improved variable tree searching
5+
* Better handling of numeric/string/factor alignments
6+
* Backward compatibility for some soft deprecated functions
7+
8+
Release checked locally, on r-hub, and winbuilder.
9+
110
## Update - v.0.1.8
211

312
Minor update of package. Changes include:

docs/LICENSE-text.html

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/apple-touch-icon-120x120.png

16.3 KB
Loading

0 commit comments

Comments
 (0)