Skip to content

Commit 4da898e

Browse files
authored
Merge pull request #188 from mountainMath/v0.5.4
V0.5.4
2 parents 9374331 + af9ec76 commit 4da898e

62 files changed

Lines changed: 383 additions & 103 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: cancensus
22
Type: Package
33
Title: Access, Retrieve, and Work with Canadian Census Data and Geography
4-
Version: 0.5.3
4+
Version: 0.5.4
55
Authors@R: c(
66
person("Jens", "von Bergmann", email = "jens@mountainmath.ca", role = c("aut"), comment = "API creator and maintainer"),
77
person("Dmitry", "Shkolnik", email = "shkolnikd@gmail.com", role = c("aut", "cre"), comment = "Package maintainer, responsible for correspondence"),

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
export(add_unique_names_to_region_list)
34
export(as_census_region_list)
45
export(census_vectors)
56
export(child_census_vectors)

NEWS.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# cancensus - 0.5.4
2+
- added ability to query census datasets by year
3+
- added a convenience function for creating unique names within given selection of regions from `list_census_regions()`
4+
- added a check and context menu to install `sf` package when user requests spatial data but does not have the required package installed as opposed to erroring out.
5+
- improved checking that correct spatial formats are requested
6+
- preparing for 'sp' spatial format usage deprecation in future versions
7+
18
# cancensus - 0.5.3
29
- Added a check and context menu to install `sf` package when user requests spatial data but does not have the required package installed as opposed to stopping with an error.
310
- fixes a bug in the local data recall check

R/cancensus.R

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ get_census <- function (dataset, regions, level=NA, vectors=c(), geo_format = NA
6262
data_version<-NULL
6363
geo_version<-NULL
6464

65+
dataset <- translate_dataset(dataset)
66+
6567
# Check region selection validity
6668
if (is.na(level)) level="Regions"
6769

@@ -92,14 +94,31 @@ get_census <- function (dataset, regions, level=NA, vectors=c(), geo_format = NA
9294
stop("The `sf` package is required to return geographies.")
9395
}
9496

95-
# Check if SF is installed when asking for spatial data
96-
if(geo_format == "sf" && !("sf" %in% utils::installed.packages())) {
97-
if (utils::menu(c("Yes", "No"),
98-
title= paste("The `sf` package is required to return geographies. Would you like to install?")) == "1") {
99-
utils::install.packages('sf')
100-
} else {
101-
print("Cancelling installation and retrieving tabular data only.")
102-
geo_format <- NA
97+
# --------- Spatial format checks --------------------------------------------------------------------#
98+
# This section checks that proper spatial formats are requested. If users select spatial data and
99+
# don't have the 'sf' package installed, will prompt them with a menu to install it, otherwise we will
100+
# return spatial data only. If users select 'sp' format, will advise them that usage is deprecated and nudge
101+
# to install 'sf' package.
102+
if (!is.na(geo_format)) {
103+
if(!geo_format %in% c("sf","sp")) {
104+
stop("the `geo_format` parameter should be 'sf', 'sp', or NA")
105+
} else if(geo_format == "sf" && !("sf" %in% utils::installed.packages())) {
106+
if (utils::menu(c("Install package", "Return tabular data without geo"),
107+
title= paste("The `sf` package is required to return geographies. Would you like to install?")) == "1") {
108+
utils::install.packages('sf')
109+
} else {
110+
message("Retrieving tabular data only. Please install 'sf' package if you wish to use Census data as spatial data.")
111+
geo_format <- NA
112+
}
113+
} else if(geo_format == "sp" && !("sf" %in% utils::installed.packages())) {
114+
message("The use of 'sp' format in cancensus package is now deprecated.\nPlease install 'sf' package to return spatial format data.")
115+
if (utils::menu(c("Install package", "Return tabular data without geo"),
116+
title= paste("Would you like to install 'sf' to continue?")) == "1") {
117+
utils::install.packages('sf')
118+
} else {
119+
message("Retrieving tabular data only. Please install 'sf' package if you wish to use Census data as spatial data.")
120+
geo_format <- NA
121+
}
103122
}
104123
}
105124

@@ -360,6 +379,7 @@ list_census_datasets <- function(use_cache = TRUE, quiet = FALSE) {
360379
#' # Attribution string for the 2006 and 2016 census datasets
361380
#' dataset_attribution(c('CA06','CA16'))
362381
dataset_attribution <- function(datasets){
382+
datasets <- lapply(datasets,translate_dataset) %>% unlist()
363383
attribution <-list_census_datasets(quiet=TRUE) %>%
364384
dplyr::filter(.data$dataset %in% datasets) %>%
365385
dplyr::pull(.data$attribution)

R/census_regions.R

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#' @examples
3737
#' list_census_regions('CA16')
3838
list_census_regions <- function(dataset, use_cache = TRUE, quiet = FALSE) {
39+
dataset <- translate_dataset(dataset)
3940
cache_file <- file.path(tempdir(),paste0(dataset, "_regions.rda"))
4041

4142
if (!use_cache || !file.exists(cache_file)) {
@@ -81,7 +82,7 @@ list_census_regions <- function(dataset, use_cache = TRUE, quiet = FALSE) {
8182
#' names matching specific queries. Users can optionally specify the target geography level
8283
#' (e.g. \code{level = 'CMA'}, \code{level = 'CSD'}, etc.). Alternatively, calling
8384
#' \code{explore_census_vectors()} will launch the interactive region selection tool on
84-
#' the Censusmapper site in a new web page or tab.
85+
#' the CensusMapper site in a new web page or tab.
8586
#'
8687
#' @param searchterm The term to search for e.g. \code{"Victoria"}.
8788
#' Search terms are case insensitive. If unable to find a given search term,
@@ -91,12 +92,11 @@ list_census_regions <- function(dataset, use_cache = TRUE, quiet = FALSE) {
9192
#' @param level One of \code{NA}, \code{'C'}, \code{'PR'}, \code{'CMA'}, \code{'CD'}, or \code{'CSD'}.
9293
#' If specified, only return variables of specified `level`.
9394
#' @param ... Further arguments passed on to \code{\link{list_census_regions}}.
95+
#' @return A census region list of the same format as `list_census_regions()` containing the matches.
9496
#'
9597
#' @export
9698
#'
9799
#' @examples
98-
#' search_census_regions('Victoria', 'CA16')
99-
#'
100100
#' \dontrun{
101101
#' # This will return a warning that no match was found, but will suggest similar named regions.
102102
#' search_census_regions('Victorea', 'CA16')
@@ -175,6 +175,46 @@ as_census_region_list <- function(tbl) {
175175
regions
176176
}
177177

178+
#' Convenience function for creating unique names from region list
179+
#'
180+
#' @description Names of municipalities are not always unique, especially at the CSD level. This function
181+
#' takes as input a subset of a regions list as generated from `list_census_regions()` and de-duplicates names as
182+
#' needed by adding the municipal status in parenthesis. If this does not de-duplicate the name then the
183+
#' geographic identifier will be further added in parenthesis behind that.
184+
#'
185+
#' @param region_list a subset of a regions list as gotten from `list_census_regions()`
186+
#' @return The same list of regions with an extra column `Name` with de-duplicated names.
187+
#' @export
188+
#'
189+
#' @examples
190+
#' \dontrun{
191+
#' # This will return a warning that no match was found, but will suggest similar named regions.
192+
#' library(dplyr)
193+
#' list_census_regions("CA21") %>%
194+
#' filter(level=="CSD", CMA_UID=="59933") %>%
195+
#' add_unique_names_to_region_list()
196+
#' }
197+
add_unique_names_to_region_list <- function(region_list) {
198+
gs <- dplyr::groups(region_list)
199+
r<-region_list %>%
200+
dplyr::group_by(.data$name) %>%
201+
dplyr::mutate(count=dplyr::n()) %>%
202+
dplyr::mutate(Name=dplyr::case_when(.data$count==1 ~ name,
203+
TRUE ~ paste0(.data$name," (",.data$municipal_status,")"))) |>
204+
dplyr::group_by(.data$Name) %>%
205+
dplyr::mutate(count=dplyr::n()) %>%
206+
dplyr::mutate(Name=dplyr::case_when(.data$count==1 ~ Name,
207+
TRUE ~ paste0(.data$Name," (",.data$region,")"))) |>
208+
dplyr::select(-.data$count) |>
209+
dplyr::ungroup()
210+
211+
if (length(gs)>1) {
212+
r <- r |>
213+
dplyr::group_by(dplyr::across(dplyr::all_of(gs)))
214+
}
215+
r
216+
}
217+
178218

179219
#' Lookup a municipal geography type from code - BETA
180220
#'

R/census_vectors.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#' list_census_vectors('CA16')
2828
#' }
2929
list_census_vectors <- function(dataset, use_cache = TRUE, quiet = TRUE) {
30+
dataset <- translate_dataset(dataset)
3031
cache_file <- file.path(tempdir(),paste0(dataset, "_vectors.rda"))
3132
if (!use_cache || !file.exists(cache_file)) {
3233
url <- paste0(cancensus_base_url(),"/api/v1/vector_info/", dataset, ".csv")

R/helpers.R

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,19 @@ cache_path <- function(...) {
3838
}
3939

4040

41+
translate_dataset <- function(dataset) {
42+
dataset <- as.character(dataset)
43+
translations <- c("1996"="CA1996",
44+
"2001"="CA01",
45+
"2006"="CA06",
46+
"2011"="CA11",
47+
"2016"="CA16",
48+
"2021"="CA21")
49+
#dataset <- toupper(dataset)
50+
if (dataset %in% names(translations)) dataset=as.character(translations[dataset])
51+
dataset
52+
}
53+
4154
clean_vector_list <- function(vector_list,dataset=NULL){
4255
if (!inherits(vector_list,"data.frame")) {
4356
if (inherits(vector_list,"character")) {

R/intersect_geometry.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ get_intersecting_geometries <- function(dataset, level, geometry, simplified = F
4747
api_key <- robust_api_key(api_key)
4848
have_api_key <- valid_api_key(api_key)
4949
result <- NULL
50+
dataset <- translate_dataset(dataset)
5051

5152
if ("sf" %in% class(geometry)) {
5253
geometry=sf::st_geometry(geometry)

R/vector_discovery.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,7 @@ keyword_search <- function(query_terms, census_vector_list, interactive = TRUE)
282282
#'
283283
#' }
284284
explore_census_vectors <- function(dataset = "CA16") {
285+
dataset <- translate_dataset(dataset)
285286
message("Opening interactive census variable explorer at censusmapper.ca/api in the browser")
286287
utils::browseURL(paste0("https://censusmapper.ca/api/",dataset,"#api_variable"))
287288
}
@@ -309,6 +310,7 @@ explore_census_vectors <- function(dataset = "CA16") {
309310
#'
310311
#' }
311312
explore_census_regions <- function(dataset = "CA16") {
313+
dataset <- translate_dataset(dataset)
312314
message("Opening interactive census region explorer at censusmapper.ca/api in the browser")
313315
utils::browseURL(paste0("https://censusmapper.ca/api/",dataset,"#api_region"))
314316
}

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ For larger quotas, please get in touch with Jens [directly](mailto:jens@censusma
4747

4848
For performance reasons, and to avoid unnecessarily drawing down API quotas, **cancensus** caches data queries under the hood. By default, **cancensus** caches in R's temporary directory, but this cache is not persistent across sessions. In order to speed up performance, reduce quota usage, and reduce the need for unnecessary network calls, we recommend assigning a persistent local cache using `set_cancensus_cache_path(<local cache path>, install = TRUE)`, this enables more efficient loading and reuse of downloaded data. Users will be prompted with a suggestion to change their default cache location when making API calls if one has not been set yet.
4949

50-
Starting with version 0.5.2 **cancensus** will automatically check if for data that has been recalled by Statistics Canada and is stored in the local cache via the new data recall API implemented in [CensusMapper](https://censusmapper.ca). Statistics Canada occasionally detects and corrects errors in their census data releases, and **cancensus** will download a list of recalled data at the first invocation of `get_census()` in each session and emit a warning if it detected locally cached data that has been recalled. Removal of the cached recalled data has to be done explicitly by the user via the `remove_recalled_chached_data()` function. If data was cached with **cancenus** versions prior to version 0.5.0 there is insufficient metadata to determine all instances of recalled cached data, but the package will check every time cached data is loaded and can identify recalled data at this point at the latest and issues a warning if recalled data is loaded.
50+
Starting with version 0.5.2 **cancensus** will automatically check if for data that has been recalled by Statistics Canada and is stored in the local cache via the new data recall API implemented in [CensusMapper](https://censusmapper.ca). Statistics Canada occasionally detects and corrects errors in their census data releases, and **cancensus** will download a list of recalled data at the first invocation of `get_census()` in each session and emit a warning if it detected locally cached data that has been recalled. Removal of the cached recalled data has to be done explicitly by the user via the `remove_recalled_chached_data()` function. If data was cached with **cancensus** versions prior to version 0.5.0 there is insufficient metadata to determine all instances of recalled cached data, but the package will check every time cached data is loaded and can identify recalled data at this point at the latest and issues a warning if recalled data is loaded.
5151

5252
### Currently available datasets
5353

@@ -164,7 +164,7 @@ The [tongfen package](https://mountainmath.github.io/tongfen/index.html) automat
164164

165165
### Statistics Canada Attribution
166166

167-
Subject to the Statistics Canada Open Licence Agreement, licensed products using Statistics Canada data should employ the following aknowledgement of source:
167+
Subject to the Statistics Canada Open License Agreement, licensed products using Statistics Canada data should employ the following aknowledgement of source:
168168

169169
```
170170
Acknowledgment of Source

0 commit comments

Comments
 (0)