Skip to content

Commit 04ee217

Browse files
authored
Merge pull request #134 from mountainMath/v0.4.2
V0.4.2 CRAN version
2 parents fdc9943 + 046a8ad commit 04ee217

133 files changed

Lines changed: 27803 additions & 9092 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/R-CMD-check.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ jobs:
3131
env:
3232
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
3333
R_KEEP_PKG_SOURCE: yes
34+
COMPILE_VIG: ${{ secrets.COMPILE_VIG }}
3435

3536
steps:
3637
- uses: actions/checkout@v4

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: cansim
22
Type: Package
33
Title: Accessing Statistics Canada Data Table and Vectors
4-
Version: 0.4.1
4+
Version: 0.4.2
55
Authors@R: c(
66
person("Jens", "von Bergmann", email = "jens@mountainmath.ca", role = c("aut","cre")),
77
person("Dmitry", "Shkolnik", email = "shkolnikd@gmail.com", role = c("aut")))
@@ -33,7 +33,7 @@ Imports: digest (>= 0.6),
3333
RSQLite (>= 2.3),
3434
utils (>= 4.3),
3535
dbplyr (>= 2.5)
36-
RoxygenNote: 7.3.1
36+
RoxygenNote: 7.3.2
3737
Suggests:
3838
knitr,
3939
rmarkdown,

NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
export(add_cansim_vectors_to_template)
34
export(add_provincial_abbreviations)
45
export(cansim_old_to_new)
56
export(cansim_repartition_cached_table)
@@ -15,6 +16,7 @@ export(get_cansim_connection)
1516
export(get_cansim_cube_metadata)
1617
export(get_cansim_data_for_table_coord_periods)
1718
export(get_cansim_key_release_schedule)
19+
export(get_cansim_series_info_cube_coord)
1820
export(get_cansim_sqlite)
1921
export(get_cansim_table_info)
2022
export(get_cansim_table_last_release_date)
@@ -23,6 +25,7 @@ export(get_cansim_table_overview)
2325
export(get_cansim_table_short_notes)
2426
export(get_cansim_table_subject)
2527
export(get_cansim_table_survey)
28+
export(get_cansim_table_template)
2629
export(get_cansim_table_url)
2730
export(get_cansim_vector)
2831
export(get_cansim_vector_for_latest_periods)

NEWS.md

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
# cansim 0.4.2
2+
## Minor changes
3+
* ensure proper ordering of levels even if StatCan metadata is not ordered
4+
* better error messages and information on how to disable peer checking when StatCan SSL certificates have problems
5+
* automatically batch vector or coordinate data retrieval in case users request more than 300 series at a time
6+
## Major changes
7+
* enable series information by table and coordinate
8+
* generate table template and facilitate adding vector info to aid pinpointed data download
9+
* enable downloading of data by vector and multiple coordinates in get_cansim_data_for_table_coord_periods (breaking changes with change to parameter)
10+
111
# cansim 0.4.1
212
## Minor changes
313
* fix problem with parsing census data tables
@@ -17,7 +27,7 @@
1727
## Minor changes
1828
* fix problem with reading French tables released by the census division
1929
* restore original column order after converting to factors
20-
* convery geography column to factor if available
30+
* convert geography column to factor if available
2131
* fix problem with `add_provincial_abbreviations` that could lead to mislabelling of provinces in some cases
2232
* improve handling of metadata, enable downloading only metadata instead of only via full table download
2333
* fold metadata into data when accessing via vector or coordinates
@@ -56,7 +66,7 @@
5666
# cansim 0.3.10
5767
## Minor changes
5868
* Better error handling when StatCan returns empty tables
59-
* Add Hierachy for Geography in sqlite tables
69+
* Add Hierarchy for Geography in sqlite tables
6070
* Better fallback and warning messages when StatCan table categories are internally inconsistent
6171
* Performance improvements
6272

@@ -83,7 +93,7 @@
8393
# cansim 0.3.6
8494
## Major changes
8595
* Fold part of `normalize_cansim_values` into the default table and vector output, in particular always add a scaled variable column called `val_norm` and an imputed `Date` column and covert categories to factors by default.
86-
* New `get_cansim_sqlite` function that stores tables in an SQLite database and facilitates access and managemet of data.
96+
* New `get_cansim_sqlite` function that stores tables in an SQLite database and facilitates access and management of data.
8797

8898
## Minor changes
8999
* Adapt to changes in dplyr, tidyr, and tibble

R/cansim.R

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,7 @@ normalize_cansim_values <- function(data, replacement_value="val_norm", normaliz
117117
cansimTableNumber <- cleaned_ndm_table_number(cansimTableNumber)
118118
cleaned_number <- cleaned_ndm_table_number(cansimTableNumber)
119119
cleaned_language <- cleaned_ndm_language(language)
120-
geography_columns <- case_when(cleaned_language=="eng" ~
121-
c("Geography","Geographic name","Geography of origin"),
122-
TRUE ~ c(paste0("G",intToUtf8(0x00E9),"ographie"),
123-
paste0("Nom g",intToUtf8(0x00E9),"ographique"),
124-
paste0("G",intToUtf8(0x00E9),"ographie d'origine")))
120+
geography_columns <- geography_colum_names(cleaned_language)
125121

126122
base_table <- naked_ndm_table_number(cansimTableNumber)
127123
path <- paste0(base_path_for_table_language(cansimTableNumber,language),".zip")
@@ -156,7 +152,8 @@ normalize_cansim_values <- function(data, replacement_value="val_norm", normaliz
156152
for (field in fields) {
157153
if (!is.null(getOption("cansim.debug"))) message(paste0('Converting ',field,' to factors'))
158154
tryCatch({
159-
level_table <- get_deduped_column_level_data(cansimTableNumber = cansimTableNumber,language=language,column=field)
155+
level_table <- get_deduped_column_level_data(cansimTableNumber = cansimTableNumber,language=language,column=field) %>%
156+
arrange(as.integer(.data$`...dim`),as.integer(.data$`...id`))
160157
if (!(field %in% names(data))) {
161158
geography_column <- ifelse(cleaned_language=="eng","Geography|Geographic name",paste0("G",intToUtf8(0x00E9),"ographie|Nom g",intToUtf8(0x00E9),"ographique"))
162159
data_geography_column <- ifelse(language=="eng","GEO",paste0("G",intToUtf8(0x00C9),"O"))
@@ -1030,7 +1027,7 @@ get_cansim_table_notes <- function(cansimTableNumber,language="en",refresh=FALSE
10301027
tidyr::unnest_longer(!!note_id_column) %>%
10311028
mutate(!!note_id_column:=as.character(!!as.name(note_id_column))) %>%
10321029
full_join(notes %>% mutate(!!note_id_column:=as.character(!!as.name(note_id_column))),by=note_id_column) %>%
1033-
arrange(!!as.integer(as.name(note_id_column)))
1030+
arrange(as.integer(!!as.name(note_id_column)))
10341031
} else {
10351032
full_notes <- get_cansim_cube_metadata(cansimTableNumber,type="notes",refresh=refresh)
10361033
members <- get_cansim_cube_metadata(cansimTableNumber,type="members",refresh = refresh)
@@ -1064,7 +1061,7 @@ get_cansim_table_notes <- function(cansimTableNumber,language="en",refresh=FALSE
10641061
#' This can be used to check when a table has last been updated.
10651062
#'
10661063
#' @param cansimTableNumber the NDM table number
1067-
#' @return A datatime object if a release data is available, NULL otherwise.
1064+
#' @return A datetime object if a release data is available, NULL otherwise.
10681065
#'
10691066
#' @examples
10701067
#' \dontrun{

R/cansim_helpers.R

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,14 @@ get_with_timeout_retry <- function(url,timeout=200,retry=3,path=NA,warn_only=FAL
8585
httr::timeout(timeout))
8686
}
8787
if (!is.null(response$error)){
88+
if ("curl_error_peer_failed_verification" %in% class(response$error)) {
89+
stop(stringr::str_wrap(gsub(".+\\): ","",as.character(response$error),80)),"\n",
90+
"This means that the authenticity of the StatCan API server can't be verified.\n",
91+
"Statistics Canada has a history of failty SSL certificats on their API,\n",
92+
"if you are reasonably sure that your connection is not getting hijacked you\n",
93+
"can disable peer checking for the duration of the R session by typing\n\n",
94+
"httr::set_config(httr::config(ssl_verifypeer=0,ssl_verifystatus=0))","\n\n","into the console.")
95+
}
8896
if (retry>0) {
8997
message("Got timeout from StatCan, trying again")
9098
response <- get_with_timeout_retry(url,timeout=timeout,retry=retry-1,path=path)
@@ -127,6 +135,14 @@ post_with_timeout_retry <- function(url,body,timeout=200,retry=3,warn_only=FALSE
127135
httr::add_headers("Content-Type"="application/json"),
128136
httr::timeout(timeout))
129137
if (!is.null(response$error)){
138+
if ("curl_error_peer_failed_verification" %in% class(response$error)) {
139+
stop(stringr::str_wrap(gsub(".+\\): ","",as.character(response$error),80)),"\n",
140+
"This means that the authenticity of the StatCan API server can't be verified.\n",
141+
"Statistics Canada has a history of failty SSL certificats on their API,\n",
142+
"if you are reasonably sure that your connection is not getting hijacked you\n",
143+
"can disable peer checking for the duration of the R session by typing\n\n",
144+
"httr::set_config(httr::config(ssl_verifypeer=0,ssl_verifystatus=0))","\n\n","into the console.")
145+
}
130146
if (retry>0) {
131147
message("Got timeout from StatCan, trying again")
132148
response <- post_with_timeout_retry(url,body=body,timeout=timeout,retry=retry-1)
@@ -530,3 +546,26 @@ rename_columns_for_language <- function(data,from_language,to_language) {
530546
rename(!!!renames)
531547

532548
}
549+
550+
geography_colum_names <- function(language) {
551+
geography_columns <- case_when(language=="eng" ~
552+
c("Geography","Geographic name","Geography of origin"),
553+
TRUE ~ c(paste0("G",intToUtf8(0x00E9),"ographie"),
554+
paste0("Nom g",intToUtf8(0x00E9),"ographique"),
555+
paste0("G",intToUtf8(0x00E9),"ographie d'origine")))
556+
}
557+
558+
559+
normalize_coordinates <- function(coordinates){
560+
coordinates <- lapply(coordinates,\(coordinate)
561+
coordinate %>%
562+
strsplit("\\.") %>%
563+
unlist() %>%
564+
c(., rep(0, pmax(0,10-length(.)))) %>%
565+
paste(collapse = ".")
566+
) %>% unlist()
567+
568+
}
569+
570+
571+

0 commit comments

Comments
 (0)