Skip to content

Commit 40f4ba9

Browse files
authored
Merge pull request #200 from mountainMath/v0.5.6
V0.5.6
2 parents 4979e91 + 5778b0e commit 40f4ba9

71 files changed

Lines changed: 896 additions & 638 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

DESCRIPTION

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: cancensus
22
Type: Package
33
Title: Access, Retrieve, and Work with Canadian Census Data and Geography
4-
Version: 0.5.5
4+
Version: 0.5.6
55
Authors@R: c(
66
person("Jens", "von Bergmann", email = "[email protected]", role = c("aut"), comment = "API creator and maintainer"),
77
person("Dmitry", "Shkolnik", email = "[email protected]", role = c("aut", "cre"), comment = "Package maintainer, responsible for correspondence"),
@@ -24,15 +24,13 @@ Imports: digest (>= 0.1),
2424
httr (>= 1.0.0),
2525
jsonlite (>= 1.0),
2626
rlang
27-
RoxygenNote: 7.2.1
27+
RoxygenNote: 7.2.3
2828
Suggests: knitr,
2929
ggplot2,
3030
leaflet,
3131
mapdeck,
3232
rmarkdown,
3333
readr,
34-
rgdal,
35-
rgeos,
3634
scales,
3735
sp,
3836
sf,

NEWS.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# cancensus 0.5.6
2+
3+
- fix issue when using named vectors to query data for non-existent geographies, return NULL in this case instead of throwing error
4+
- fix problem with population centre geographic data download
5+
- support newly released Forward Sortation Area geography for statcan geography and WDS functionality
6+
- remove instances of new native R pipe |> with dplyr pipe %>% to preserve compatibility with older R versions
7+
18
# cancensus 0.5.5
29

310
- add functionality for direct access to StatCan census WDS for 2021

R/cancensus.R

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,10 @@ get_census <- function (dataset, regions, level=NA, vectors=c(), geo_format = NA
268268
to_rename <- setNames(names(result),gsub(":.*","",names(result)))
269269
to_rename <- to_rename[names(to_rename)!=as.character(to_rename)]
270270
if (length(to_rename)>0) result <- result %>% dplyr::rename(!!!to_rename)
271-
if (!is.null(names(vectors))) result <- result %>% dplyr::rename(!!! vectors)
271+
if (!is.null(names(vectors))) {
272+
to_rename <- vectors[as.character(vectors) %in% names(result)]
273+
if (length(to_rename)>0) result <- result %>% dplyr::rename(!!! to_rename)
274+
}
272275
}
273276
}
274277

R/census_regions.R

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@
3434
#' @export
3535
#'
3636
#' @examples
37+
#' \dontrun{
3738
#' list_census_regions('CA16')
39+
#' }
3840
list_census_regions <- function(dataset, use_cache = TRUE, quiet = FALSE) {
3941
dataset <- translate_dataset(dataset)
4042
cache_file <- file.path(tempdir(),paste0(dataset, "_regions.rda"))
@@ -200,16 +202,16 @@ add_unique_names_to_region_list <- function(region_list) {
200202
dplyr::group_by(.data$name) %>%
201203
dplyr::mutate(count=dplyr::n()) %>%
202204
dplyr::mutate(Name=dplyr::case_when(.data$count==1 ~ name,
203-
TRUE ~ paste0(.data$name," (",.data$municipal_status,")"))) |>
205+
TRUE ~ paste0(.data$name," (",.data$municipal_status,")"))) %>%
204206
dplyr::group_by(.data$Name) %>%
205207
dplyr::mutate(count=dplyr::n()) %>%
206208
dplyr::mutate(Name=dplyr::case_when(.data$count==1 ~ Name,
207-
TRUE ~ paste0(.data$Name," (",.data$region,")"))) |>
208-
dplyr::select(-.data$count) |>
209+
TRUE ~ paste0(.data$Name," (",.data$region,")"))) %>%
210+
dplyr::select(-.data$count) %>%
209211
dplyr::ungroup()
210212

211213
if (length(gs)>1) {
212-
r <- r |>
214+
r <- r %>%
213215
dplyr::group_by(dplyr::across(dplyr::all_of(gs)))
214216
}
215217
r

R/geographies.R

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#'
66
#' @param census_year census year to get the data for, right now only 2021 is supported
77
#' @param level geographic level to return the data for, valid choices are
8-
#' "PR","CD","CMACA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR"
8+
#' "PR","CD","CMACA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR", "FSA"
99
#' @param type type of geographic data, valid choices area "cartographic" or "digital"
1010
#' @param cache_path optional path to cache the data. If the cancensus cache path is set the geographic data gets
1111
#' cached in the "geographies" subdirectory of the cancensus cache path.
@@ -24,7 +24,7 @@ get_statcan_geographies <- function(census_year,level,type="cartographic",
2424
cache_path = NULL,timeout=1000,
2525
refresh=FALSE,quiet=FALSE) {
2626
valid_census_years <- c("2021")
27-
valid_levels <- c("PR","CD","CMACA","CMA","CA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR")
27+
valid_levels <- c("PR","CD","CMACA","CMA","CA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR","POPCTR","FSA")
2828
valid_types <- c("cartographic","digital")
2929
if (!(census_year %in% valid_census_years)) {
3030
stop(paste0("Census year must be one of ",paste0(valid_census_years,collapse = ", "),"."))
@@ -35,7 +35,7 @@ get_statcan_geographies <- function(census_year,level,type="cartographic",
3535
if (!(level %in% valid_levels)) {
3636
stop(paste0("Level must be one of ",paste0(valid_levels,collapse = ", "),"."))
3737
}
38-
level_map <- c("CMACA"="CMA","CA"="CMA","POPCNTR","PC")
38+
level_map <- c("CMACA"="CMA","CA"="CMA","POPCNTR"="PC","POPCTR"="PC")
3939
if (level %in% names(level_map)) level <-level_map[[level]]
4040
geo_base_path <- cache_path("geographies")
4141
if (!dir.exists(geo_base_path)) dir.create(geo_base_path)
@@ -56,6 +56,14 @@ get_statcan_geographies <- function(census_year,level,type="cartographic",
5656
utils::download.file(url,tmp,mode="wb",quiet=quiet)
5757
options(timeout = old_timeout)
5858
utils::unzip(tmp,exdir = exdir)
59+
fs <- dir(exdir,full.names = TRUE)
60+
if (length(fs)==1 && dir.exists(fs)) {
61+
tmp_dir <- file.path(geo_base_path,"XXXX")
62+
file.rename(exdir,tmp_dir)
63+
fs <- dir(tmp_dir,full.names = TRUE)
64+
file.rename(fs,exdir)
65+
unlink(tmp_dir)
66+
}
5967
} else {
6068
if (!quiet) message("Reading geographic data from local cache.")
6169
}

R/helpers.R

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@ cache_path <- function(...) {
2222
if (nchar(cache_dir)==0) {
2323
if (!is.null(getOption("cancensus.cache_path"))) {
2424
cache_dir <- getOption("cancensus.cache_path")
25-
} else cache_dir <- tempdir()
25+
} else {
26+
cache_dir <- tempdir()
27+
message(cm_no_cache_path_message)
28+
}
2629
}
2730
if (!is.character(cache_dir)) {
2831
stop("Corrupt 'CM_CACHE_PATH' environment variable or 'cancensus.cache_path' option. Must be a path.",

R/user_settings.R

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ set_cancensus_api_key <- function(key, overwrite = FALSE, install = FALSE){
4343
keyconcat <- paste0("CM_API_KEY='", key, "'")
4444
# Append API key to .Renviron file
4545
write(keyconcat, renv, sep = "\n", append = TRUE)
46-
message('Your API key has been stored in your .Renviron and can be accessed by Sys.getenv("CM_API_KEY"). \nTo use now, restart R or run `readRenviron("~/.Renviron")`')
46+
Sys.setenv(CM_API_KEY = key)
47+
message('Your API key has been stored in your .Renviron and can be accessed by Sys.getenv("CM_API_KEY").')
4748
} else {
4849
message("API key set for duration of session. To install your API key for use across sessions, run this function with `install = TRUE`.")
4950
Sys.setenv(CM_API_KEY = key)
@@ -65,7 +66,7 @@ set_cancensus_api_key <- function(key, overwrite = FALSE, install = FALSE){
6566
#'\dontrun{
6667
#' set_cancensus_cache_path("~/cancensus_cache")
6768
#'
68-
#' # This will set the cache path permanently until ovewritten again
69+
#' # This will set the cache path permanently until overwritten again
6970
#' set_cancensus_cache_path("~/cancensus_cache", install = TRUE)
7071
#' }
7172
set_cancensus_cache_path <- function(cache_path, overwrite = FALSE, install = FALSE){
@@ -93,7 +94,8 @@ set_cancensus_cache_path <- function(cache_path, overwrite = FALSE, install = FA
9394
keyconcat <- paste0("CM_CACHE_PATH='", cache_path, "'")
9495
# Append cache path .Renviron file
9596
write(keyconcat, renv, sep = "\n", append = TRUE)
96-
message('Your cache path has been stored in your .Renviron and can be accessed by Sys.getenv("CM_CACHE_PATH"). \nTo use now, restart R or run readRenviron("~/.Renviron").')
97+
message('Your cache path has been stored in your .Renviron and can be accessed by Sys.getenv("CM_CACHE_PATH").')
98+
Sys.setenv('CM_CACHE_PATH' = cache_path)
9799
} else {
98100
message("Cache set for duration of session. To permanently add your cache path for use across sessions, run this function with install = TRUE.")
99101
Sys.setenv('CM_CACHE_PATH' = cache_path)
@@ -138,9 +140,8 @@ show_cancensus_cache_path <- function() {
138140
cm_no_cache_path_message <- paste(
139141
"Census data is currently stored temporarily.\n\n",
140142
"In order to speed up performance, reduce API quota usage, and reduce",
141-
"unnecessary network calls, please set up a persistent cache directory by",
142-
"setting the environment variable CM_CACHE_PATH= '<path to cancensus cache directory>' or ",
143-
"setting options(cancensus.cache_path = '<path to cancensus cache directory>')\n\n",
144-
"You may add this environment varianble to your .Renviron",
145-
"or add this option, together with your API key, to your .Rprofile.\n\n"
143+
"unnecessary network calls, please set up a persistent cache directory via",
144+
"`set_cancensus_cache_path('<local cache path>', install = TRUE)`.\n",
145+
"This will add your cache directory as environment varianble to your .Renviron to be",
146+
"used across sessions and projects.\n\n"
146147
)

R/wds.R

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
#'
77
#' @param census_year census year to get the data for, right now only 2021 is supported
88
#' @param level geographic level to return the data for, valid choices are
9-
#' "PR","CD","CMACA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR"
9+
#' "PR","CD","CMACA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR", "FSA"
1010
#' @param refresh default is `FALSE` will refresh the temporary cache if `TRUE`
1111
#' @return tibble with the metadata
1212
#'
@@ -18,7 +18,7 @@
1818
#' @export
1919
get_statcan_wds_metadata <- function(census_year,level,refresh=FALSE){
2020
valid_census_years <- c("2021")
21-
valid_levels <- c("PR","CD","CMACA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR")
21+
valid_levels <- c("PR","CD","CMACA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR","FSA")
2222
if (!(census_year %in% valid_census_years)) {
2323
stop(paste0("Census year must be one of ",paste0(valid_census_years,collapse = ", "),"."))
2424
}
@@ -34,24 +34,24 @@ get_statcan_wds_metadata <- function(census_year,level,refresh=FALSE){
3434
code_lists <- xml2::xml_find_all(d,"//structure:Codelist")
3535

3636
meta_data <- lapply(code_lists, \(cl){
37-
codelist_id <- cl |> xml2::xml_attr("id")
38-
agencyID <- cl |> xml2::xml_attr("agencyID")
39-
codelist_en <- cl |> xml2::xml_find_all("common:Name[@xml:lang='en']") |> xml2::xml_text()
40-
codelist_fr <- cl |> xml2::xml_find_all("common:Name[@xml:lang='fr']") |> xml2::xml_text()
41-
description_en <- cl |> xml2::xml_find_all("common:Name[@xml:lang='en']") |> xml2::xml_text()
42-
description_fr <- cl |> xml2::xml_find_all("common:Name[@xml:lang='fr']") |> xml2::xml_text()
43-
codes <- cl |> xml2::xml_find_all("structure:Code")
37+
codelist_id <- cl %>% xml2::xml_attr("id")
38+
agencyID <- cl %>% xml2::xml_attr("agencyID")
39+
codelist_en <- cl %>% xml2::xml_find_all("common:Name[@xml:lang='en']") %>% xml2::xml_text()
40+
codelist_fr <- cl %>% xml2::xml_find_all("common:Name[@xml:lang='fr']") %>% xml2::xml_text()
41+
description_en <- cl %>% xml2::xml_find_all("common:Name[@xml:lang='en']") %>% xml2::xml_text()
42+
description_fr <- cl %>% xml2::xml_find_all("common:Name[@xml:lang='fr']") %>% xml2::xml_text()
43+
codes <- cl %>% xml2::xml_find_all("structure:Code")
4444
dplyr::tibble(`Agency ID`=agencyID,
4545
`Codelist ID`=codelist_id,
4646
`Codelist en`=codelist_en,
4747
`Codelist fr`=codelist_fr,
48-
ID=codes |> xml2::xml_attr("id"),
49-
en=codes |> xml2::xml_find_all("common:Name[@xml:lang='en']") |> xml2::xml_text(),
50-
fr=codes |> xml2::xml_find_all("common:Name[@xml:lang='fr']") |> xml2::xml_text(),
51-
`Parent ID`=codes |> xml2::xml_find_all("structure:Parent/Ref",flatten=FALSE) |>
52-
lapply(\(d)ifelse(is.null(d),NA,xml2::xml_attr(d,"id"))) |> unlist()
48+
ID=codes %>% xml2::xml_attr("id"),
49+
en=codes %>% xml2::xml_find_all("common:Name[@xml:lang='en']") %>% xml2::xml_text(),
50+
fr=codes %>% xml2::xml_find_all("common:Name[@xml:lang='fr']") %>% xml2::xml_text(),
51+
`Parent ID`=codes %>% xml2::xml_find_all("structure:Parent/Ref",flatten=FALSE) %>%
52+
lapply(\(d)ifelse(is.null(d),NA,xml2::xml_attr(d,"id"))) %>% unlist()
5353
)
54-
}) |>
54+
}) %>%
5555
dplyr::bind_rows()
5656
meta_data
5757
}
@@ -116,22 +116,22 @@ get_statcan_wds_data <- function(DGUIDs,
116116
census_year <- "2021"
117117
meta_data <- get_statcan_wds_metadata(census_year,level,refresh = refresh)
118118

119-
levels <- meta_data |>
119+
levels <- meta_data %>%
120120
dplyr::filter(.data$`Codelist ID`=="CL_GEO_LEVEL")
121121

122-
meta_geos <- meta_data |>
122+
meta_geos <- meta_data %>%
123123
dplyr::filter(.data$`Codelist ID`==paste0("CL_GEO_",level))
124-
meta_characteristics <- meta_data |>
124+
meta_characteristics <- meta_data %>%
125125
dplyr::filter(.data$`Codelist ID`=="CL_CHARACTERISTIC")
126126

127127
name_field <- language #paste0(language,"_description")
128128

129-
data <- readr::read_csv(wds_data_tempfile,col_types = readr::cols(.default="c")) |>
130-
dplyr::mutate(dplyr::across(dplyr::matches("OBS_VALUE|TNR_CI_"),as.numeric)) |>
131-
dplyr::left_join(meta_geos |>
129+
data <- readr::read_csv(wds_data_tempfile,col_types = readr::cols(.default="c")) %>%
130+
dplyr::mutate(dplyr::across(dplyr::matches("OBS_VALUE|TNR_CI_"),as.numeric)) %>%
131+
dplyr::left_join(meta_geos %>%
132132
dplyr::select(GEO_DESC=.data$ID,GEO_NAME=!!as.name(name_field)),
133-
by="GEO_DESC") |>
134-
dplyr::left_join(meta_characteristics |>
133+
by="GEO_DESC") %>%
134+
dplyr::left_join(meta_characteristics %>%
135135
dplyr::select(CHARACTERISTIC=.data$ID,CHARACTERISTIC_NAME=!!as.name(name_field)),
136136
by="CHARACTERISTIC")
137137

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,13 @@ library(cancensus)
2929

3030
Alternatively, the latest development version can be installed from Github.
3131
```
32-
devtools::install_github("mountainmath/cancensus")
32+
remotes::install_github("mountainmath/cancensus")
3333
library(cancensus)
3434
```
3535

3636
### API key
3737

38-
**cancensus** requires a valid CensusMapper API key to use. You can obtain a free API key by [signing up](https://censusmapper.ca/users/sign_up) for a CensusMapper account. To check your API key, just go to "Edit Profile" (in the top-right of the CensusMapper menu bar). Once you have your key, you can store it in your system environment so it is automatically used in API calls. To do so just enter `set_cancensus_api_key(<your_api_key>', install = TRUE)`.
38+
**cancensus** requires a valid CensusMapper API key to use. You can obtain a free API key by [signing up](https://censusmapper.ca/users/sign_up) for a CensusMapper account. To check your API key, just go to "Edit Profile" (in the top-right of the CensusMapper menu bar). Once you have your key, you can store it in your system environment so it is automatically used in API calls. To do so just enter `set_cancensus_api_key('<your_api_key>', install = TRUE)`.
3939

4040
CensusMapper API keys are free and public API quotas are generous; however, due to incremental costs of serving large quantities of data, there are some limits to API usage in place. For most use cases, these API limits should not be an issue. Production uses with large extracts of detailed geographies may run into API quota limits.
4141

@@ -45,13 +45,13 @@ For larger quotas, please get in touch with Jens [directly](mailto:jens@censusma
4545

4646
### Local Cache
4747

48-
For performance reasons, and to avoid unnecessarily drawing down API quotas, **cancensus** caches data queries under the hood. By default, **cancensus** caches in R's temporary directory, but this cache is not persistent across sessions. In order to speed up performance, reduce quota usage, and reduce the need for unnecessary network calls, we recommend assigning a persistent local cache using `set_cancensus_cache_path(<local cache path>, install = TRUE)`, this enables more efficient loading and reuse of downloaded data. Users will be prompted with a suggestion to change their default cache location when making API calls if one has not been set yet.
48+
For performance reasons, and to avoid unnecessarily drawing down API quotas, **cancensus** caches data queries under the hood. By default, **cancensus** caches in R's temporary directory, but this cache is not persistent across sessions. In order to speed up performance, reduce quota usage, and reduce the need for unnecessary network calls, we recommend assigning a persistent local cache using `set_cancensus_cache_path('<local cache path>', install = TRUE)`, this enables more efficient loading and reuse of downloaded data. Users will be prompted with a suggestion to change their default cache location when making API calls if one has not been set yet.
4949

5050
Starting with version 0.5.2 **cancensus** will automatically check if for data that has been recalled by Statistics Canada and is stored in the local cache via the new data recall API implemented in [CensusMapper](https://censusmapper.ca). Statistics Canada occasionally detects and corrects errors in their census data releases, and **cancensus** will download a list of recalled data at the first invocation of `get_census()` in each session and emit a warning if it detected locally cached data that has been recalled. Removal of the cached recalled data has to be done explicitly by the user via the `remove_recalled_chached_data()` function. If data was cached with **cancensus** versions prior to version 0.5.0 there is insufficient metadata to determine all instances of recalled cached data, but the package will check every time cached data is loaded and can identify recalled data at this point at the latest and issues a warning if recalled data is loaded.
5151

5252
### Currently available datasets
5353

54-
**cancensus** can access Statistics Canada Census data for Census years 1996, 2001, 2006, 2011, 2016, and 2021. You can run `list_census_datasets` to check what datasets are currently available for access through the CensusMapper API. Additional data for the 2021 Census will be included in Censusmapper within a day or two after public release by Statistics Canada. Statistics Canada maintains a release schedule for the Census 2021 Program which can be viewed on their [website](https://www12.statcan.gc.ca/census-recensement/2021/ref/prodserv/release-diffusion-eng.cfm).
54+
**cancensus** can access Statistics Canada Census data for Census years 1996, 2001, 2006, 2011, 2016, and 2021. You can run `list_census_datasets` to check what datasets are currently available for access through the CensusMapper API. Additional data for the 2021 Census will be included in CensusMapper within a day or two after public release by Statistics Canada. Statistics Canada maintains a release schedule for the Census 2021 Program which can be viewed on their [website](https://www12.statcan.gc.ca/census-recensement/2021/ref/prodserv/release-diffusion-eng.cfm).
5555

5656
Thanks to contributions by the Canada Mortgage and Housing Corporation (CMHC), **cancensus** now includes additional Census-linked datasets as open-data releases. These include annual taxfiler data at the census tract level for tax years 2000 through 2018, which includes data on incomes and demographics, as well as specialized crosstabs for Structural type of dwelling by Document type, which details occupancy status for residences. These crosstabs are available for the 2001, 2006, 2011, 2016, and 2021 Census years at all levels starting with census tract.
5757

@@ -139,7 +139,7 @@ There are several other jurisdiction where census data is available via R packag
139139
If you wish to cite cancensus:
140140

141141
von Bergmann, J., Aaron Jacobs, Dmitry Shkolnik (2022). cancensus: R package to
142-
access, retrieve, and work with Canadian Census data and geography. v0.5.5.
142+
access, retrieve, and work with Canadian Census data and geography. v0.5.6.
143143

144144

145145
A BibTeX entry for LaTeX users is
@@ -148,7 +148,7 @@ A BibTeX entry for LaTeX users is
148148
author = {Jens {von Bergmann} and Dmitry Shkolnik and Aaron Jacobs},
149149
title = {cancensus: R package to access, retrieve, and work with Canadian Census data and geography},
150150
year = {2022},
151-
note = {R package version 0.5.5},
151+
note = {R package version 0.5.6},
152152
url = {https://mountainmath.github.io/cancensus/}
153153
}
154154
```

0 commit comments

Comments
 (0)