Skip to content

Commit 8942485

Browse files
authored
Merge pull request #139 from mountainMath/v0.4.4
fix problem with metadata parsing and table names and add convenience functions for cache management
2 parents 9203126 + 1e248d0 commit 8942485

94 files changed

Lines changed: 752 additions & 295 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: cansim
22
Type: Package
33
Title: Accessing Statistics Canada Data Table and Vectors
4-
Version: 0.4.3
4+
Version: 0.4.4
55
Authors@R: c(
66
person("Jens", "von Bergmann", email = "jens@mountainmath.ca", role = c("aut","cre")),
77
person("Dmitry", "Shkolnik", email = "shkolnikd@gmail.com", role = c("aut")))

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ export(remove_cansim_cached_tables)
3939
export(remove_cansim_sqlite_cached_table)
4040
export(search_cansim_cubes)
4141
export(search_cansim_tables)
42+
export(set_cansim_cache_path)
43+
export(show_cansim_cache_path)
4244
export(view_cansim_webpage)
4345
import(dplyr)
4446
importFrom(purrr,map)

NEWS.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
1+
# cansim 0.4.4
2+
## Minor changes
3+
* fix a problem with metadata parsing does not work properly for table names
4+
* make documentations more consistent wrt default langauge names
5+
* add convenience functions for setting cache paths for data accessed via get_cansim_connection
6+
17
# cansim 0.4.3
28
## Minor changes
39
* better handling of duplicated levels in metadata, ignore duplication for geography names of census tables but emit warning
410
* fix issue with accessing tables without footnotes
511

6-
712
# cansim 0.4.2
813
## Minor changes
914
* ensure proper ordering of levels even if StatCan metadata is not ordered

R/cansim.R

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ cansim_old_to_new <- function(oldCansimTableNumber){
254254
filter(.data$CANSIM_ID == as.integer(cleaned_number)) %>%
255255
pull(.data$PRODUCT_ID)
256256
if (identical(new_number, integer(0))) {
257-
stop(paste0("Unable to match old CANSIM table number ",cleaned_number))
257+
stop(paste0("Unable to match old CANSIM table number ",cleaned_number),call.=FALSE)
258258
}
259259
n=as.character(new_number)
260260
new_table <- paste0(substr(n,1,2),"-",substr(n,3,4),"-",substr(n,5,8))
@@ -746,7 +746,7 @@ get_cansim_column_categories <- function(cansimTableNumber, column, language="en
746746
dplyr::pull(!!as.name(dimension_id_column))
747747
data_path <- paste0(base_path_for_table_language(cleaned_number,language),".Rda_column_",column_index)
748748
if (!file.exists(data_path)){
749-
stop(paste0("Unkown column ",column))
749+
stop(paste0("Unkown column ",column),call.=FALSE)
750750
}
751751
result <- readRDS(file=data_path)
752752
} else {
@@ -781,7 +781,7 @@ get_cansim_column_categories <- function(cansimTableNumber, column, language="en
781781
exceeded_hierarchy_warning_message=exceeded_hierarchy_warning_message)
782782

783783
if (nrow(result)==0){
784-
stop(paste0("Unkown column ",column))
784+
stop(paste0("Unkown column ",column),call.=FALSE)
785785
}
786786
}
787787

@@ -930,7 +930,7 @@ get_cansim_table_url <- function(cansimTableNumber, language = "en"){
930930
url=paste0("https://www150.statcan.gc.ca/t1/wds/rest/getFullTableDownloadCSV/",naked_ndm_table_number(cansimTableNumber),"/",l)
931931
response <- httr::GET(url)
932932
if (response$status_code!=200) {
933-
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response))
933+
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response),call.=FALSE)
934934
}
935935
httr::content(response)$object
936936
}
@@ -956,7 +956,7 @@ get_cansim_changed_tables <- function(start_date,end_date=NULL){
956956
last_available_date = last_available_date -1
957957
}
958958
if (start_date>last_available_date) {
959-
stop(paste0("Last available date is ",last_available_date,", please try with a start date on or before that date."))
959+
stop(paste0("Last available date is ",last_available_date,", please try with a start date on or before that date."),call.=FALSE)
960960
}
961961
if (is.null(end_date)) end_date=start_date
962962
if (as.Date(end_date) > last_available_date) {
@@ -977,7 +977,7 @@ get_cansim_changed_tables <- function(start_date,end_date=NULL){
977977
url=paste0("https://www150.statcan.gc.ca/t1/wds/rest/getChangedCubeList/",strftime(date,"%Y-%m-%d"))
978978
response <- httr::GET(url)
979979
if (response$status_code!=200) {
980-
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response))
980+
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response),call.=FALSE)
981981
}
982982
httr::content(response)$object %>%
983983
map(function(o)tibble(productId=o$productId,releaseTime=o$releaseTime)) %>%

R/cansim_helpers.R

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ table_base_path <- function(cansimTableNumber) {
4141

4242
file_path_for_table_language <- function(cansimTableNumber, language){
4343
language <- cleaned_ndm_language(language)
44-
if (is.na(language)) stop(paste0("Unkown Lanaguage ",language))
44+
if (is.na(language)) stop(paste0("Unkown Lanaguage ",language),call.=FALSE)
4545
base_table <- naked_ndm_table_number(cansimTableNumber)
4646
file.path(paste0(base_table,"-",language))
4747
}
@@ -91,7 +91,7 @@ get_with_timeout_retry <- function(url,timeout=200,retry=3,path=NA,warn_only=FAL
9191
"Statistics Canada has a history of failty SSL certificats on their API,\n",
9292
"if you are reasonably sure that your connection is not getting hijacked you\n",
9393
"can disable peer checking for the duration of the R session by typing\n\n",
94-
"httr::set_config(httr::config(ssl_verifypeer=0,ssl_verifystatus=0))","\n\n","into the console.")
94+
"httr::set_config(httr::config(ssl_verifypeer=0,ssl_verifystatus=0))","\n\n","into the console.",call.=FALSE)
9595
}
9696
if (retry>0) {
9797
message("Got timeout from StatCan, trying again")
@@ -141,7 +141,7 @@ post_with_timeout_retry <- function(url,body,timeout=200,retry=3,warn_only=FALSE
141141
"Statistics Canada has a history of failty SSL certificats on their API,\n",
142142
"if you are reasonably sure that your connection is not getting hijacked you\n",
143143
"can disable peer checking for the duration of the R session by typing\n\n",
144-
"httr::set_config(httr::config(ssl_verifypeer=0,ssl_verifystatus=0))","\n\n","into the console.")
144+
"httr::set_config(httr::config(ssl_verifypeer=0,ssl_verifystatus=0))","\n\n","into the console.",call.=FALSE)
145145
}
146146
if (retry>0) {
147147
message("Got timeout from StatCan, trying again")
@@ -185,23 +185,6 @@ short_prov.en <- c(
185185
"Canada"="CAN"
186186
)
187187

188-
# short_prov.fr <- c(
189-
# "Colombie-Britannique"="BC",
190-
# "Alberta"="AB",
191-
# "Saskatchewan"="SK",
192-
# "Manitoba"="MB",
193-
# "Ontario"="ON",
194-
# "Qu\U00E9bec"="QC",
195-
# "Nouveau-Brunswick"="NB",
196-
# "\u00CEle-du-Prince-\U00C9douard"="PE",
197-
# "Nouvelle-\U00C9cosse"="NS",
198-
# "Terre-Neuve-et-Labrador"="NL",
199-
# "Yukon"="YT",
200-
# "Territoires du Nord-Ouest"="NT",
201-
# "Nunavut"="NU",
202-
# "Territoires du Nord-Ouest incluant Nunavut"="NTNU",
203-
# "Canada"="CAN"
204-
# )
205188

206189
short_prov.fr <- setNames(c(
207190
"BC",
@@ -290,7 +273,7 @@ get_cansim_code_set <- function(code_set=c("scalar", "frequency", "symbol", "sta
290273
refresh=FALSE){
291274
code_sets <- c("scalar", "frequency", "symbol", "status", "uom", "survey", "subject", "wdsResponseStatus")
292275
if (length(code_set)!=1 | !(code_set %in% code_sets)) {
293-
stop(paste0("Invalid code set, code_set must be one of ",paste0(code_sets,collapse=", ")))
276+
stop(paste0("Invalid code set, code_set must be one of ",paste0(code_sets,collapse=", ")),call.=FALSE)
294277
}
295278
path=file.path(tempdir(),"cansim_code_sets.Rmd")
296279
if (refresh | !file.exists(path)) {
@@ -301,7 +284,7 @@ get_cansim_code_set <- function(code_set=c("scalar", "frequency", "symbol", "sta
301284
saveRDS(content,path)
302285
} else {
303286
warning("Problem downloading code sets.")
304-
stop(httr::content(r))
287+
stop(httr::content(r),call.=FALSE)
305288
}
306289
} else {
307290
content <- readRDS(path)
@@ -410,7 +393,7 @@ format_file_size <- function (x, units = "b", standard = "auto", digits = 1L, ..
410393
else if (endsWith(units, "b"))
411394
standard <- "legacy"
412395
else if (units == "kB")
413-
stop("For SI units, specify 'standard = \"SI\"'")
396+
stop("For SI units, specify 'standard = \"SI\"'",call.=FALSE)
414397
}
415398
}
416399
base <- known_bases[[standard]]
@@ -426,7 +409,7 @@ format_file_size <- function (x, units = "b", standard = "auto", digits = 1L, ..
426409
1L
427410
if (is.na(power))
428411
stop(gettextf("Unit \"%s\" is not part of standard \"%s\"",
429-
sQuote(units), sQuote(standard)), domain = NA)
412+
sQuote(units), sQuote(standard)), domain = NA,call.=FALSE)
430413
}
431414
unit <- units_map[power + 1L]
432415
if (power == 0 && standard == "legacy")
@@ -579,5 +562,19 @@ normalize_coordinates <- function(coordinates){
579562

580563
}
581564

582-
565+
get_robust_cache_path <- function(cache_path) {
566+
if (is.null(cache_path) || cache_path=="") {
567+
cache_path <- Sys.getenv("CANSIM_CACHE_PATH")
568+
if (cache_path=="") cache_path <- getOption("cansim.cache_path",default="")
569+
if (cache_path=="") {
570+
cache_path <- file.path(tempdir(),"cansim_cache")
571+
if (!dir.exists(cache_path)) dir.create(cache_path)
572+
message(cansim_no_cache_path_message)
573+
}
574+
}
575+
if (!dir.exists(cache_path)) {
576+
stop("Cache path ",cache_path," does not exist, please create it first.",call.=FALSE)
577+
}
578+
cache_path
579+
}
583580

R/cansim_metadata.R

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,22 @@ parse_metadata <- function(meta,data_path){
3939
if (length(grep("\u201C|\u201D",meta_part))>0){
4040
meta_part <- meta_part %>% gsub("\u201C|\u201D",'"',x=.)
4141
}
42-
utils::read.delim(text=meta_part,sep=table_delim,header=TRUE,stringsAsFactors=FALSE,
42+
d<-utils::read.delim(text=meta_part,sep=table_delim,header=FALSE,stringsAsFactors=FALSE,
4343
quote="\"",na.strings="",
4444
colClasses="character",check.names=FALSE) %>%
4545
as_tibble()
46+
if (nrow(d>1)) {
47+
nn <- as.character(d[1,])
48+
d <- d %>%
49+
select(which(!is.na(nn))) %>%
50+
setNames(na.omit(nn)) %>%
51+
slice(-1)
52+
}
4653
} else {
47-
suppressWarnings(readr::read_delim(paste0(meta_part,collapse="\n"),
54+
d<- suppressWarnings(readr::read_delim(paste0(meta_part,collapse="\n"),
4855
delim=table_delim, col_types = readr::cols(.default="c")))
4956
}
57+
d
5058
}
5159

5260
read_notes <- function(meta_part) {
@@ -156,7 +164,7 @@ add_hierarchy <- function(meta_x,parent_member_id_column,member_id_column,hierar
156164
get_cansim_cube_metadata <- function(cansimTableNumber, type="overview",refresh=FALSE){
157165
type <- type[1]
158166
if (!(type %in% c("overview", "members", "notes", "corrections"))) {
159-
stop("type must be one of 'overview', 'members', 'notes', or 'corrections'")
167+
stop("type must be one of 'overview', 'members', 'notes', or 'corrections'",call.=FALSE)
160168
}
161169
tmp_base <- table_base_path(cansimTableNumber)
162170
if (!dir.exists(tmp_base)) dir.create(tmp_base)
@@ -172,7 +180,7 @@ get_cansim_cube_metadata <- function(cansimTableNumber, type="overview",refresh=
172180
httr::add_headers("Content-Type"="application/json")
173181
)
174182
if (response$status_code!=200) {
175-
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response))
183+
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response),call.=FALSE)
176184
}
177185
data <- httr::content(response)
178186
data1 <- Filter(function(x)x$status=="SUCCESS",data)
@@ -336,7 +344,7 @@ get_cansim_cube_metadata <- function(cansimTableNumber, type="overview",refresh=
336344
#' get_cansim_table_template("34-10-0013")
337345
#' }
338346
#' @export
339-
get_cansim_table_template <- function(cansimTableNumber, language="eng",refresh=FALSE){
347+
get_cansim_table_template <- function(cansimTableNumber, language="english",refresh=FALSE){
340348
cansimTableNumber <- cleaned_ndm_table_number(cansimTableNumber)
341349
member_info <- get_cansim_cube_metadata(cansimTableNumber, type="members", refresh=refresh)
342350

@@ -430,7 +438,7 @@ get_cansim_series_info_cube_coord <- function(cansimTableNumber,coordinates, tim
430438
httr::timeout(timeout)
431439
)
432440
if (response$status_code!=200) {
433-
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response))
441+
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response),call.=FALSE)
434442
}
435443
data <- httr::content(response)
436444
data1 <- Filter(function(x)x$status=="SUCCESS",data)
@@ -477,14 +485,14 @@ get_cansim_series_info_cube_coord <- function(cansimTableNumber,coordinates, tim
477485
add_cansim_vectors_to_template <- function(template, refresh=FALSE) {
478486

479487
if (!("cansimTableNumber" %in% names(template))) {
480-
stop("The template does not have a cansimTableNumber column.")
488+
stop("The template does not have a cansimTableNumber column.",call.=FALSE)
481489
}
482490
if (!("COORDINATE" %in% names(template))) {
483-
stop("The template does not have a COORDINATE column.")
491+
stop("The template does not have a COORDINATE column.",call.=FALSE)
484492
}
485493

486494
if (nrow(template)==0) {
487-
stop("No rows in the template.")
495+
stop("No rows in the template.",call.=FALSE)
488496
}
489497

490498
tnr <- unique(template$cansimTableNumber)

0 commit comments

Comments
 (0)