Skip to content

Commit a39c509

Browse files
authored
Merge pull request #192 from mountainMath/v0.5.5
add ability to download original statcan geographies and query the WDS
2 parents a673f0e + 11e6bcb commit a39c509

85 files changed

Lines changed: 2611 additions & 219 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.Rbuildignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,4 @@ lastMiKTeXException
2525

2626
^doc$
2727

28-
^R/geo_suite.R
2928
^CRAN-SUBMISSION$

DESCRIPTION

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: cancensus
22
Type: Package
33
Title: Access, Retrieve, and Work with Canadian Census Data and Geography
4-
Version: 0.5.4
4+
Version: 0.5.5
55
Authors@R: c(
66
person("Jens", "von Bergmann", email = "jens@mountainmath.ca", role = c("aut"), comment = "API creator and maintainer"),
77
person("Dmitry", "Shkolnik", email = "shkolnikd@gmail.com", role = c("aut", "cre"), comment = "Package maintainer, responsible for correspondence"),
@@ -38,7 +38,8 @@ Suggests: knitr,
3838
sf,
3939
geojsonsf,
4040
tidyr,
41-
lwgeom
41+
lwgeom,
42+
xml2
4243
VignetteBuilder: knitr
4344
URL: https://github.com/mountainMath/cancensus, https://mountainmath.github.io/cancensus/, https://censusmapper.ca/api
4445
BugReports: https://github.com/mountainMath/cancensus/issues

NAMESPACE

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@ export(find_census_vectors)
1111
export(get_census)
1212
export(get_census_geometry)
1313
export(get_intersecting_geometries)
14+
export(get_statcan_geo_suite)
15+
export(get_statcan_geographic_attributes)
16+
export(get_statcan_geographies)
17+
export(get_statcan_geography_relationships)
18+
export(get_statcan_wds_data)
19+
export(get_statcan_wds_metadata)
1420
export(label_vectors)
1521
export(list_cancensus_cache)
1622
export(list_census_datasets)

NEWS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
# cancensus 0.5.5
2+
3+
- add functionality for direct access to StatCan census WDS for 2021
4+
- add functionality to download original StatCan geographies for 2021
5+
- update CODES_TABLE for 2021 census
6+
17
# cancensus 0.5.4
28

39
- added ability to query census datasets by year

R/geo_suite.R

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,26 @@
1-
2-
3-
41
#' Read the geosuite data
52
#'
63
#' @description
7-
#' Reads the geosuite data for the given level and census year. Data gets cached after first download.
4+
#' Reads the geosuite data for the given level and census year. Data gets cached after first download if the
5+
#' cancensus cache path has been set. For older
6+
#' years `get_statcan_geographic_attributes()` can fill in most of the information
87
#'
98
#' @param level geographic level to return the data for, valid choices are
10-
#' "DB", "DA", "ADA", "CT", "CSD", "CMA", "CD", "PR"
9+
#' "DB", "DA", "ADA", "CT", "CSD", "CMA", "CD", "PR", "FED", "DPL", "ER", "PN", "POPCTR"
1110
#' @param census_year census year to get the data for, right now only 2021 is supported
1211
#' @param refresh (logical) refresh the cache if true
1312
#' @return tibble with the geosuite data
1413
#'
15-
#' @keywords internal
1614
#'
1715
#' @examples
1816
#' # list add the cached census data
1917
#' \dontrun{
20-
#' get_geo_suite("DA","2021")
18+
#' get_statcan_geo_suite("DA","2021")
2119
#' }
2220
#' @export
23-
get_geo_suite <- function(level,census_year="2021",refresh=FALSE){
21+
get_statcan_geo_suite <- function(level,census_year="2021",refresh=FALSE){
2422
valid_years <- c("2021") #seq(2001,2021,5) %>% as.character()
25-
valid_levels <- c("DB", "DA", "CT", "ADA", "CSD", "CMA", "CD", "PR")
23+
valid_levels <- c("DB", "DA", "CT", "ADA", "CSD", "CMA", "CD", "PR","FED","DPL","ER","PN","POPCTR")
2624
if (!(as.character(census_year) %in% valid_years)) {
2725
stop(paste0("Only census years ",paste0(valid_years,collapse = ", "),
2826
" are supported for GeoSuite"))
@@ -77,10 +75,12 @@ get_geo_suite <- function(level,census_year="2021",refresh=FALSE){
7775
#'
7876
#' @description
7977
#' Reads the Dissemination Geographies Relationship File for the given census year. The table contains
80-
#' the information on how all the geographic levels are related for each area. A reference guide is available
78+
#' the information on how all the geographic levels are related for each area. Data gets cached after first download if the
79+
#' cancensus cache path has been set. A reference guide is available
8180
#' at https://www150.statcan.gc.ca/n1/en/catalogue/982600032021001
8281
#'
83-
#' @param census_year census year to get the data for, right now only 2021 is supported
82+
#' @param census_year census year to get the data for, right now only 2021 is supported, for older
83+
#' years `get_statcan_geographic_attributes()` can fill in most of the information
8484
#' @param refresh (logical) refresh the cache if true
8585
#' @return tibble with the relationship data
8686
#'
@@ -89,10 +89,10 @@ get_geo_suite <- function(level,census_year="2021",refresh=FALSE){
8989
#' @examples
9090
#' # list add the cached census data
9191
#' \dontrun{
92-
#' get_geography_relationship("2021")
92+
#' get_statcan_geography_relationships("2021")
9393
#' }
9494
#' @export
95-
get_geography_relationship <- function(census_year="2021", refresh=FALSE){
95+
get_statcan_geography_relationships <- function(census_year="2021", refresh=FALSE){
9696
valid_years <- c("2021")
9797
if (!(as.character(census_year) %in% valid_years)) {
9898
stop(paste0("Only census years ",paste0(valid_years,collapse = ", "),
@@ -113,28 +113,30 @@ get_geography_relationship <- function(census_year="2021", refresh=FALSE){
113113
#' @description
114114
#' Reads the Geographies Attributes File for the given census year. The table contains
115115
#' the information on how all the geographic levels are related for each area, and population, dwelling and household counts.
116-
#' A reference guide is available
116+
#' Data gets cached after first download if the
117+
#' cancensus cache path has been set. A reference guide is available
117118
#' at https://www150.statcan.gc.ca/n1/en/catalogue/92-151-G2021001
118119
#'
119-
#' @param census_year census year to get the data for, right now only 2006, 2011 and 2016 are supported
120+
#' @param census_year census year to get the data for, right now only 2006, 2011, 2016, 2021 are supported
120121
#' @param refresh (logical) refresh the cache if true
121122
#' @return tibble with the relationship data
122123
#'
123124
#' @examples
124125
#' # list add the cached census data
125-
#' get_geographic_attributes("2016")
126-
#'
126+
#' \dontrun{
127+
#' get_statcan_geographic_attributes("2021")
128+
#' }
127129
#' @export
128-
get_geographic_attributes <- function(census_year="2016",refresh=FALSE){
130+
get_statcan_geographic_attributes <- function(census_year="2021",refresh=FALSE){
129131
census_year <- as.character(census_year)[1]
130-
valid_years <- seq(2006,2016,5) %>% as.character
132+
valid_years <- seq(2006,2021,5) %>% as.character
131133
if (!(as.character(census_year) %in% valid_years)) {
132134
stop(paste0("Only census years ",paste0(valid_years,collapse = ", "),
133135
" are supported for the geographic relationship file."))
134136
}
135-
urls <- c("2016"="https://www12.statcan.gc.ca/census-recensement/2016/geo/ref/gaf/files-fichiers/2016_92-151_XBB_txt.zip",
137+
urls <- c("2021"="https://www12.statcan.gc.ca/census-recensement/2021/geo/aip-pia/attribute-attribs/files-fichiers/2021_92-151_X.zip",
138+
"2016"="https://www12.statcan.gc.ca/census-recensement/2016/geo/ref/gaf/files-fichiers/2016_92-151_XBB_txt.zip",
136139
"2011"="https://www12.statcan.gc.ca/census-recensement/2011/geo/ref/files-fichiers/2011_92-151_XBB_txt.zip",
137-
#"2011"="https://www12.statcan.gc.ca/census-recensement/2011/geo/ref/files-fichiers/2011_92-151_XBB_xlsx.zip",
138140
"2006"="https://www12.statcan.gc.ca/census-recensement/2011/geo/ref/files-fichiers/2006_92-151_XBB_txt.zip")
139141

140142
base_path <- cache_path("attribute_files")
@@ -147,8 +149,9 @@ get_geographic_attributes <- function(census_year="2016",refresh=FALSE){
147149
utils::download.file(urls[[census_year]],tmp)
148150
utils::unzip(tmp,exdir = base_path_year)
149151
}
150-
file <- dir(base_path_year,pattern="\\.txt",full.names = TRUE)
151-
if (census_year=="2016") {
152+
if (census_year=="2021") file <- dir(base_path_year,pattern="\\.csv",full.names = TRUE)
153+
else file <- dir(base_path_year,pattern="\\.txt",full.names = TRUE)
154+
if (census_year %in% c("2016","2021")) {
152155
result <- readr::read_csv(file,col_types = readr::cols(.default="c"),
153156
locale = readr::locale(encoding ="Windows-1252"))
154157
} else {
@@ -175,3 +178,7 @@ get_geographic_attributes <- function(census_year="2016",refresh=FALSE){
175178
dplyr::mutate(dplyr::across(dplyr::matches("DBpop\\d{4}|DBtdwell\\d{4}|DBurdwell\\d{4}|DBarea"),as.numeric))
176179
}
177180

181+
182+
183+
184+

R/geographies.R

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#' Read the geosuite data
2+
#'
3+
#' @description
4+
#' Reads the original unprocessed geographic boundary files from Statistics Canada
5+
#'
6+
#' @param census_year census year to get the data for, right now only 2021 is supported
7+
#' @param level geographic level to return the data for, valid choices are
8+
#' "PR","CD","CMACA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR"
9+
#' @param type type of geographic data, valid choices area "cartographic" or "digital"
10+
#' @param cache_path optional path to cache the data. If the cancensus cache path is set the geographic data gets
11+
#' cached in the "geographies" subdirectory of the cancensus cache path.
12+
#' @param timeout optional timeout parameter, adjust as needed if the data download times out when using slow connections
13+
#' @param refresh (logical) refresh the cache if true
14+
#' @param quiet (logical) suppress messages if `TRUE`
15+
#' @return a spatial dataframe with the geographic data
16+
#'
17+
#' @examples
18+
#' # get the digital geographic boundaries for provinces and territories
19+
#' \dontrun{
20+
#' get_statcan_geographies(census_year="2021",level="PR",type="digital")
21+
#' }
22+
#' @export
23+
get_statcan_geographies <- function(census_year,level,type="cartographic",
24+
cache_path = NULL,timeout=1000,
25+
refresh=FALSE,quiet=FALSE) {
26+
valid_census_years <- c("2021")
27+
valid_levels <- c("PR","CD","CMACA","CMA","CA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR")
28+
valid_types <- c("cartographic","digital")
29+
if (!(census_year %in% valid_census_years)) {
30+
stop(paste0("Census year must be one of ",paste0(valid_census_years,collapse = ", "),"."))
31+
}
32+
if (!(type %in% valid_types)) {
33+
stop(paste0("Type must be one of ",paste0(valid_types,collapse = ", "),"."))
34+
}
35+
if (!(level %in% valid_levels)) {
36+
stop(paste0("Level must be one of ",paste0(valid_levels,collapse = ", "),"."))
37+
}
38+
level_map <- c("CMACA"="CMA","CA"="CMA","POPCNTR","PC")
39+
if (level %in% names(level_map)) level <-level_map[[level]]
40+
geo_base_path <- cache_path("geographies")
41+
if (!dir.exists(geo_base_path)) dir.create(geo_base_path)
42+
geo_base_path <- file.path(geo_base_path,type)
43+
if (!dir.exists(geo_base_path)) dir.create(geo_base_path)
44+
exdir <- file.path(geo_base_path,level)
45+
if (refresh || !dir.exists(exdir) || length(dir(exdir,"\\.shp$"))==0) {
46+
old_timeout <- getOption("timeout")
47+
if (type=="cartographic") typeID <- "b" else typeID <- "a"
48+
if (nchar(level)==2) filler="_000"
49+
else if (nchar(level)==3) filler="000"
50+
else {
51+
stop(paste0("Problem, don't know how to get geographic data for level ",level,"."))
52+
}
53+
url <- paste0("https://www12.statcan.gc.ca/census-recensement/",census_year,"/geo/sip-pis/boundary-limites/files-fichiers/l",tolower(level),filler,typeID,"21a_e.zip")
54+
tmp <- tempfile()
55+
options(timeout = timeout)
56+
utils::download.file(url,tmp,mode="wb",quiet=quiet)
57+
options(timeout = old_timeout)
58+
utils::unzip(tmp,exdir = exdir)
59+
} else {
60+
if (!quiet) message("Reading geographic data from local cache.")
61+
}
62+
path <- dir(exdir,"\\.shp$",full.names = TRUE)
63+
64+
geos <- sf::read_sf(path)
65+
geos
66+
}

R/helpers.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,15 +101,15 @@ check_recalled_data_and_warn <- function(meta_file,params){
101101
cached_data<-generate_metadata(meta_file,params)
102102
recalled_data <- list_recalled_cached_data(cached_data,warn_only_once=TRUE)
103103
if (!is.null(recalled_data) && nrow(recalled_data)>0) {
104-
warning("Currently loaded data has been recalled. Use\nlist_recalled_cached_data()\nto inspect recalled locally cached data and\nnremove_recalled_cached_data()\nto remove recalled data.")
104+
warning("Currently loaded data has been recalled. Use\nlist_recalled_cached_data()\nto inspect recalled locally cached data and\nremove_recalled_cached_data()\nto remove recalled data.")
105105
}
106106
d<-NULL
107107
}
108108

109109
check_for_recalled_data_and_warn <- function(){
110110
recalled_data <- list_recalled_cached_data(warn_only_once=TRUE)
111111
if (!is.null(recalled_data) && nrow(recalled_data)>0) {
112-
warning(paste0("Some locally cached data has been recalled. Use\nlist_recalled_cached_data()\nto inspect recalled locally cached data and\nnremove_recalled_cached_data()\nto remove recalled data."))
112+
warning(paste0("Some locally cached data has been recalled. Use\nlist_recalled_cached_data()\nto inspect recalled locally cached data and\nremove_recalled_cached_data()\nto remove recalled data."))
113113
}
114114
d<-NULL
115115
}
@@ -129,7 +129,7 @@ first_run_checks <- function(){
129129
#' @name CODES_TABLE
130130
#' @docType data
131131
#' @author derived from StatCan definitions
132-
#' @references \url{https://www12.statcan.gc.ca/census-recensement/2016/ref/dict/geo012-eng.cfm}
132+
#' @references \url{https://www12.statcan.gc.ca/census-recensement/2021/geo/ref/domain-domaine/index2021-eng.cfm?lang=e&id=CSDtype}, \url{https://www12.statcan.gc.ca/census-recensement/2021/geo/ref/domain-domaine/index2021-eng.cfm?lang=e&id=CDtype}
133133
#' @keywords data
134134
NULL
135135

R/sysdata.rda

1.13 KB
Binary file not shown.

0 commit comments

Comments
 (0)