Skip to content

Commit 2cb48cf

Browse files
authored
Merge pull request #68 from mountainMath/use-ref-date
Fix issues with change in StatCan API limit.
2 parents 0082b1a + 4ae06b9 commit 2cb48cf

44 files changed

Lines changed: 175 additions & 133 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: cansim
22
Type: Package
33
Title: Accessing Statistics Canada Data Table and Vectors
4-
Version: 0.3.0
4+
Version: 0.3.1
55
Authors@R: c(
66
person("Jens", "von Bergmann", email = "jens@mountainmath.ca", role = c("cre")),
77
person("Dmitry", "Shkolnik", email = "shkolnikd@gmail.com", role = c("aut")))

NEWS.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
## cansim 0.3.1
2+
3+
### Major changes
4+
- Fixes issues arising from StatCan changing their API row limit
5+
6+
### Minor changes
7+
- Optimize vector retrieval by REF_DATE
8+
19
## cansim 0.3.0
210

311
### Major changes

R/cansim_helpers.R

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ response_status_code_translation <- list(
4242
"8"="Invalid number of reference periods"
4343
)
4444

45+
response_error_translation <- list(
46+
"503"="StatCan website is currently unavailable"
47+
)
48+
4549
get_with_timeout_retry <- function(url,timeout=200,retry=3,path=NA){
4650
if (!is.na(path)) {
4751
response <- purrr::safely(httr::GET)(url,httr::timeout(timeout),httr::write_disk(path,overwrite = TRUE))
@@ -56,6 +60,10 @@ get_with_timeout_retry <- function(url,timeout=200,retry=3,path=NA){
5660
message("Got timeout from StatCan, giving up")
5761
response=response$result
5862
}
63+
} else if (response$result$status_code %in% names(response_error_translation)){
64+
stop(sprintf("%s\nReturned status code %s",response_error_translation[[as.character(response$result$status_code)]], response$result$status_code),call.=FALSE)
65+
} else if (response$result$status_code != 200){
66+
stop(sprintf("Problem downloading data, returned status code %s.",response$result$status_code),call.=FALSE)
5967
} else {
6068
response=response$result
6169
}

R/cansim_vectors.R

Lines changed: 38 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
MAX_PERIODS = 6000
2+
13
extract_vector_data <- function(data1){
24
vf=list("DECIMALS"="decimals",
35
"VALUE"="value",
@@ -8,8 +10,10 @@ extract_vector_data <- function(data1){
810
"frequencyCode"="frequencyCode",
911
"SCALAR_ID"="scalarFactorCode")
1012
result <- purrr::map(data1,function(d){
13+
vdp <- d$object$vectorDataPoint
14+
if (length(vdp)==0) {return(NULL)}
1115
value_data = lapply(vf,function(f){
12-
x=purrr::map(d$object$vectorDataPoint,function(cc)cc[[f]])
16+
x=purrr::map(vdp,function(cc)cc[[f]])
1317
x[sapply(x, is.null)] <- NA
1418
unlist(x)
1519
}) %>%
@@ -75,37 +79,36 @@ rename_vectors <- function(data,vectors){
7579
#' @export
7680
get_cansim_vector<-function(vectors, start_time, end_time=Sys.Date(), use_ref_date=TRUE){
7781
start_time=as.Date(start_time)
78-
end_time=as.Date(end_time)
79-
if (!use_ref_date) {
80-
time_format="%Y-%m-%dT%H:%m"
81-
vectors=gsub("^v","",vectors) # allow for leading "v" by conditionally stripping it
82-
url="https://www150.statcan.gc.ca/t1/wds/rest/getBulkVectorDataByRange"
83-
vectors_string=paste0('"vectorIds":[',paste(purrr::map(as.character(vectors),function(x)paste0('"',x,'"')),collapse = ", "),"]")
84-
time_string=paste0('"startDataPointReleaseDate": "',strftime(start_time,time_format),
85-
'","endDataPointReleaseDate": "',strftime(end_time,time_format),'"')
86-
response <- post_with_timeout_retry(url, body=paste0("{",vectors_string,",",time_string,"}"))
87-
if (is.null(response)) return(response)
88-
if (response$status_code!=200) {
89-
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response))
90-
}
91-
data <- httr::content(response)
92-
data1 <- Filter(function(x)x$status=="SUCCESS",data)
93-
data2 <- Filter(function(x)x$status!="SUCCESS",data)
94-
if (length(data2)>0) {
95-
message(paste0("Failed to load data for ",length(data2)," vector(s)."))
96-
data2 %>% purrr::map(function(x){
97-
message(paste0("Problem downloading data: ",response_status_code_translation[as.character(x$object$responseStatusCode)]))
98-
})
99-
}
82+
original_end_time=as.Date(end_time)
83+
if (use_ref_date) end_time=Sys.Date() else end_time=original_end_time
84+
time_format="%Y-%m-%dT%H:%m"
85+
vectors=gsub("^v","",vectors) # allow for leading "v" by conditionally stripping it
86+
url="https://www150.statcan.gc.ca/t1/wds/rest/getBulkVectorDataByRange"
87+
vectors_string=paste0('"vectorIds":[',paste(purrr::map(as.character(vectors),function(x)paste0('"',x,'"')),collapse = ", "),"]")
88+
time_string=paste0('"startDataPointReleaseDate": "',strftime(start_time,time_format),
89+
'","endDataPointReleaseDate": "',strftime(end_time,time_format),'"')
90+
response <- post_with_timeout_retry(url, body=paste0("{",vectors_string,",",time_string,"}"))
91+
if (is.null(response)) return(response)
92+
if (response$status_code!=200) {
93+
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response))
94+
}
95+
data <- httr::content(response)
96+
data1 <- Filter(function(x)x$status=="SUCCESS",data)
97+
data2 <- Filter(function(x)x$status!="SUCCESS",data)
98+
if (length(data2)>0) {
99+
message(paste0("Failed to load data for ",length(data2)," vector(s)."))
100+
data2 %>% purrr::map(function(x){
101+
message(paste0("Problem downloading data: ",response_status_code_translation[as.character(x$object$responseStatusCode)]))
102+
})
103+
}
100104

101-
if (length(data1)>0)
102-
result <- extract_vector_data(data1) %>%
103-
rename_vectors(vectors)
104-
else
105-
result <- tibble::tibble()
106-
} else {
107-
result <- get_cansim_vector_for_latest_periods(vectors,periods=10000) %>%
108-
filter(as.Date(.data$REF_DATE)>=start_time,as.Date(.data$REF_DATE)<=end_time)
105+
if (length(data1)>0)
106+
result <- extract_vector_data(data1) %>% rename_vectors(vectors)
107+
else
108+
result <- tibble::tibble()
109+
if (use_ref_date) {
110+
result <- result %>%
111+
filter(as.Date(.data$REF_DATE)>=start_time,as.Date(.data$REF_DATE)<=original_end_time)
109112
}
110113
result
111114
}
@@ -124,6 +127,10 @@ get_cansim_vector<-function(vectors, start_time, end_time=Sys.Date(), use_ref_da
124127
#'
125128
#' @export
126129
get_cansim_vector_for_latest_periods<-function(vectors, periods=1){
130+
if (periods*length(vectors)>MAX_PERIODS) {
131+
periods=pmin(periods,floor(as.numeric(MAX_PERIODS)/length(vectors)))
132+
warning(paste0("Can access at most ",MAX_PERIODS," data points, capping value to ",periods," periods per vector."))
133+
}
127134
vectors=gsub("^v","",vectors) # allow for leading "v" by conditionally stripping it
128135
url="https://www150.statcan.gc.ca/t1/wds/rest/getDataFromVectorsAndLatestNPeriods"
129136
vectors_string=paste0("[",paste(purrr::map(as.character(vectors),function(x)paste0('{"vectorId":',x,',"latestN":',periods,'}')),collapse = ", "),"]")

docs/LICENSE-text.html

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/LICENSE.html

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/articles/cansim.html

Lines changed: 9 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/articles/index.html

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/articles/listing_cansim_tables.html

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
2.04 KB
Loading

0 commit comments

Comments
 (0)