Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@ fix_meta.sh
^CRAN-SUBMISSION$
^\.github$
R-CMD-check-old.yaml
^\.claude$
^CLAUDE\.md$
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: cansim
Type: Package
Title: Accessing Statistics Canada Data Table and Vectors
Version: 0.4.4
Version: 0.4.5
Authors@R: c(
person("Jens", "von Bergmann", email = "jens@mountainmath.ca", role = c("aut","cre")),
person("Dmitry", "Shkolnik", email = "shkolnikd@gmail.com", role = c("aut")))
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# cansim 0.4.5
## Minor changes
* better connection error handling

# cansim 0.4.4
## Minor changes
* fix a problem with metadata parsing does not work properly for table names
Expand Down
16 changes: 12 additions & 4 deletions R/cansim.R
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,9 @@ normalize_cansim_values <- function(data, replacement_value="val_norm", normaliz
for (field in fields) {
if (!is.null(getOption("cansim.debug"))) message(paste0('Converting ',field,' to factors'))
tryCatch({
# get_deduped_column_level_data now returns pre-sorted data, no need for arrange()
level_table <- get_deduped_column_level_data(cansimTableNumber = cansimTableNumber,
language=language,column=field) %>%
arrange(as.integer(.data$`...dim`),as.integer(.data$`...id`))
language=language,column=field)
if (!(field %in% names(data))) {
geography_column <- ifelse(cleaned_language=="eng","Geography|Geographic name",paste0("G",intToUtf8(0x00E9),"ographie|Nom g",intToUtf8(0x00E9),"ographique"))
data_geography_column <- ifelse(language=="eng","GEO",paste0("G",intToUtf8(0x00C9),"O"))
Expand Down Expand Up @@ -191,8 +191,8 @@ normalize_cansim_values <- function(data, replacement_value="val_norm", normaliz
"than with StatCan, or if this problem can't be resolved, please flag this as an issue in the\n",
"{cansim} repository at https://github.com/mountainMath/cansim/issues."))
} else {
data <- data %>%
mutate(!!field:=factor(!!as.name(field),levels=level_table$...name))
# Use base R for factor conversion - faster than dplyr's mutate for this operation
data[[field]] <- factor(data[[field]], levels = level_table$...name)
}

},
Expand Down Expand Up @@ -929,6 +929,10 @@ get_cansim_table_url <- function(cansimTableNumber, language = "en"){
l <- cleaned_ndm_language(language) %>% substr(1,2)
url=paste0("https://www150.statcan.gc.ca/t1/wds/rest/getFullTableDownloadCSV/",naked_ndm_table_number(cansimTableNumber),"/",l)
response <- httr::GET(url)
if (is.null(response)){return(response)}
if (is.null(response$status_code)) {
stop("Problem downloading data.\n",response$error,call.=FALSE)
}
if (response$status_code!=200) {
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response),call.=FALSE)
}
Expand Down Expand Up @@ -976,6 +980,10 @@ get_cansim_changed_tables <- function(start_date,end_date=NULL){
lapply(function(date){
url=paste0("https://www150.statcan.gc.ca/t1/wds/rest/getChangedCubeList/",strftime(date,"%Y-%m-%d"))
response <- httr::GET(url)
if (is.null(response)){return(response)}
if (is.null(response$status_code)) {
stop("Problem downloading data.\n",response$error,call.=FALSE)
}
if (response$status_code!=200) {
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response),call.=FALSE)
}
Expand Down
47 changes: 27 additions & 20 deletions R/cansim_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,10 @@ get_cansim_code_set <- function(code_set=c("scalar", "frequency", "symbol", "sta
if (refresh | !file.exists(path)) {
url='https://www150.statcan.gc.ca/t1/wds/rest/getCodeSets'
r<-get_with_timeout_retry(url)
if (is.null(r)||is.null(r$status_code)){
warning("Problem downloading code sets.")
return(NULL)
}
if (r$status_code==200) {
content <- httr::content(r)
saveRDS(content,path)
Expand Down Expand Up @@ -434,20 +438,24 @@ get_deduped_column_level_data <- function(cansimTableNumber,language,column) {
column = column,
language = language)

# full level values from metadata
# full level values from metadata - combine mutates for efficiency
level_table <- columns %>%
select(...dim=!!as.name(dimension_id_column),
...id=!!as.name(member_id_column),
...name=!!as.name(member_name_column),
...parent_id=!!as.name(parent_member_id_column)) %>%
mutate(...n=as.integer(.data$...id)) %>%
arrange("...n") %>%
select(-"...n") %>%
mutate(...count=n(),.by=c("...dim","...name")) %>%
mutate(...duplicated=.data$...count>1) %>%
mutate(...original=!.data$...duplicated) %>%
mutate(...original_name=.data$...name) %>%
mutate(...last_parent_id=.data$...parent_id)
...parent_id=!!as.name(parent_member_id_column))

# Sort once using base R for efficiency
level_table <- level_table[order(as.integer(level_table$...id)), ]

# Compute duplicates in one pass
level_table <- level_table %>%
mutate(...count=n(),
...duplicated=.data$...count>1,
...original=.data$...count==1,
...original_name=.data$...name,
...last_parent_id=.data$...parent_id,
.by=c("...dim","...name"))

fixed_level_table <- NULL
# don't try to dedup census geographies, too messy
Expand All @@ -459,27 +467,26 @@ get_deduped_column_level_data <- function(cansimTableNumber,language,column) {
filter(.data$...dim!="1")
}

# try to dedup
# try to dedup - only if there are duplicates
max_run <- 30
while (sum(level_table$...duplicated)>0 && max_run>0) { # deals with 36-10-0580
max_run <- max_run - 1
# Use join-based approach for deduplication as it handles dynamic parent chains efficiently
level_table <- level_table %>%
left_join(level_table %>% select("...id","...dim",...parent_name="...original_name",...new_parent_id="...last_parent_id"),
by=c("...last_parent_id"="...id","...dim"="...dim")) %>%
mutate(...name=case_when(.data$...duplicated & is.na(.data$...parent_name) ~ paste0(.data$...name," [",.data$...id,"]"),
.data$...duplicated & !is.na(.data$...parent_name) ~ paste0(.data$...name," ==> ",.data$...parent_name),
TRUE ~ .data$...name)) %>%
mutate(...last_parent_id=ifelse(.data$...duplicated,
.data$...new_parent_id,
.data$...last_parent_id)) %>%
mutate(...count=n(),.by=c("...dim","...name")) %>%
mutate(...duplicated=.data$...count>1) %>%
TRUE ~ .data$...name),
...last_parent_id=ifelse(.data$...duplicated, .data$...new_parent_id, .data$...last_parent_id)) %>%
mutate(...count=n(), ...duplicated=.data$...count>1, .by=c("...dim","...name")) %>%
select(-any_of(c("...parent_name","...new_parent_id")))

}

bind_rows(fixed_level_table,level_table) %>%
arrange("...dim") %>%
result <- bind_rows(fixed_level_table,level_table)
# Sort by dim and id for final output
result <- result[order(as.integer(result$...dim), as.integer(result$...id)), ]
result %>%
select("...dim","...id","...name","...original","...original_name")
}

Expand Down
12 changes: 10 additions & 2 deletions R/cansim_tables_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,11 @@ list_cansim_cubes <- function(lite=FALSE,refresh=FALSE,quiet=FALSE){
if (!quiet) message("Retrieving cube information from StatCan servers...")
url=ifelse(lite,"https://www150.statcan.gc.ca/t1/wds/rest/getAllCubesListLite","https://www150.statcan.gc.ca/t1/wds/rest/getAllCubesList")
r<-get_with_timeout_retry(url,retry=0,warn_only=TRUE)
if (r$status_code==200) {
if (is.null(r)||is.null(r$status_code)){
warning("Could not retrieve cube list from StatCan servers.")
return(NULL)
}
if (!is.null(r$status_code) && r$status_code==200) {
content <- httr::content(r)

header <- content[[1]] %>%
Expand Down Expand Up @@ -173,7 +177,11 @@ list_cansim_cubes <- function(lite=FALSE,refresh=FALSE,quiet=FALSE){
#'
#' @export
search_cansim_cubes <- function(search_term, refresh=FALSE){
list_cansim_cubes(refresh = refresh) %>%
cube_list <- list_cansim_cubes(refresh = refresh)
if (is.null(cube_list)) {
stop("Could not retrieve cube list from StatCan servers.",call.=FALSE)
}
cube_list %>%
filter(grepl(search_term,.data$cubeTitleEn,ignore.case = TRUE) |
grepl(search_term,.data$cubeTitleFr,ignore.case = TRUE) |
grepl(search_term,.data$surveyEn,ignore.case = TRUE) |
Expand Down
16 changes: 15 additions & 1 deletion R/cansim_vectors.R
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,9 @@ get_cansim_vector<-function(vectors, start_time = as.Date("1800-01-01"), end_tim
timeout = timeout)
}
if (is.null(response)) return(response)
if (is.null(response$status_code)) {
stop("Problem downloading data.\n",response$error,call.=FALSE)
}
if (response$status_code!=200) {
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response),call.=FALSE)
}
Expand Down Expand Up @@ -317,6 +320,9 @@ get_cansim_vector_for_latest_periods<-function(vectors, periods=NULL,
message(paste0("Accessing CANSIM NDM vectors from Statistics Canada",addition))
response <- post_with_timeout_retry(url, body=vectors_string, timeout = timeout)
if (is.null(response)) return(response)
if (is.null(response$status_code)) {
stop("Problem downloading data.\n",response$error,call.=FALSE)
}
if (response$status_code!=200) {
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response),call.=FALSE)
}
Expand Down Expand Up @@ -411,7 +417,7 @@ get_cansim_data_for_table_coord_periods<-function(tableCoordinates, periods=NULL
if ("list" %in% class(tableCoordinates)) {
tableCoordinates <- tibble::enframe(tableCoordinates) %>%
setNames(c("cansimTableNumber","COORDINATE")) %>%
tidyr::unnest_longer(.data$COORDINATE)
tidyr::unnest_longer("COORDINATE")
}
tableCoordinates <- tableCoordinates %>%
mutate(cansimTableNumber=naked_ndm_table_number(.data$cansimTableNumber)) %>%
Expand Down Expand Up @@ -457,6 +463,10 @@ get_cansim_data_for_table_coord_periods<-function(tableCoordinates, periods=NULL
}
message(paste0("Accessing CANSIM NDM coordinates from Statistics Canada",addition))
response <- post_with_timeout_retry(url, body=body_string, timeout = timeout)
if (is.null(response)) {return(response)}
if (is.null(response$status_code)) {
stop("Problem downloading data.\n",response$error,call.=FALSE)
}
if (response$status_code!=200) {
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response),call.=FALSE)
}
Expand Down Expand Up @@ -564,6 +574,10 @@ get_cansim_vector_info <- function(vectors){
url="https://www150.statcan.gc.ca/t1/wds/rest/getSeriesInfoFromVector"
vectors_string=paste0("[",paste(purrr::map(as.character(vectors),function(x)paste0('{"vectorId":',x,'}')),collapse = ", "),"]")
response <- post_with_timeout_retry(url, body=vectors_string)
if (is.null(response)){return(response)}
if (is.null(response$status_code)) {
stop("Problem downloading data.\n",response$error,call.=FALSE)
}
if (response$status_code!=200) {
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response),call.=FALSE)
}
Expand Down
43 changes: 38 additions & 5 deletions R/hex_sticker.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
#' Internal function to generate hex sticker
#' @keywords internal
generate_cansim_hex_sticker <- function (){
income_data <- cansim::get_cansim("11-10-0239") %>%
cansim::normalize_cansim_values()
income_age_groups <- c("16 to 24 years", "25 to 34 years" , "35 to 44 years" , "45 to 54 years" ,"55 to 64 years", "65 years and over")
income_plot_data <- income_data %>%
income_plot_data <- cansim::get_cansim_connection("11-10-0239") %>%
dplyr::filter(Sex=="Both sexes",
Statistics=="Median income (excluding zeros)",
`Income source`=="Total income",
`Age group` %in% income_age_groups) %>%
cansim::collect_and_normalize() %>%
dplyr::mutate(`Age group`=factor(`Age group`,levels=income_age_groups)) %>%
dplyr::group_by(GEO,`Age group`) %>%
dplyr::left_join(dplyr::filter(.,Date==min(Date)) %>%
Expand All @@ -19,15 +18,39 @@ generate_cansim_hex_sticker <- function (){
pd <- income_plot_data %>% dplyr::filter(GEO=="Canada")
ed <- pd %>% dplyr::filter(Date==max(Date))

ca_data <- cancensus::get_census("CA16",regions=list(C="01"),geo_format='sf') %>% sf::st_transform(102002)
crs <- 'PROJCS["Canada_Lambert_Conformal_Conic",
GEOGCS["NAD83",
DATUM["North_American_Datum_1983",
SPHEROID["GRS 1980",6378137,298.257222101,
AUTHORITY["EPSG","7019"]],
AUTHORITY["EPSG","6269"]],
PRIMEM["Greenwich",0,
AUTHORITY["EPSG","8901"]],
UNIT["degree",0.0174532925199433,
AUTHORITY["EPSG","9122"]],
AUTHORITY["EPSG","4269"]],
PROJECTION["Lambert_Conformal_Conic_2SP"],
PARAMETER["latitude_of_origin",40],
PARAMETER["central_meridian",-96],
PARAMETER["standard_parallel_1",50],
PARAMETER["standard_parallel_2",70],
PARAMETER["false_easting",0],
PARAMETER["false_northing",0],
UNIT["metre",1,
AUTHORITY["EPSG","9001"]],
AXIS["Easting",EAST],
AXIS["Northing",NORTH],
AUTHORITY["ESRI","102002"]]'

ca_data <- cancensus::get_census("CA16",regions=list(C="01"),geo_format='sf') %>% sf::st_transform(crs)
q <- ggplot2::ggplot(ca_data) +
ggplot2::geom_sf(fill="grey20",size=0.01) +
ggplot2::theme_void() +
hexSticker::theme_transparent()
bbox=sf::st_bbox(ca_data)
p<-ggplot2::ggplot(pd,ggplot2::aes(x=Date,y=VALUE,color=`Age group`)) +
ggplot2::geom_line() +
ggplot2::scale_color_brewer(palette="Dark2",guide=FALSE) +
ggplot2::scale_color_brewer(palette="Dark2",guide='none') +
ggplot2::labs(x="",y="") +
ggplot2::theme_void() +
hexSticker::theme_transparent()
Expand All @@ -45,6 +68,16 @@ generate_cansim_hex_sticker <- function (){
p_color="white",
filename=here::here("images/cansim-sticker.png"))

if (FALSE) {
hexSticker::sticker(pp, package="CanViz",
p_size=8, p_y=1.5,
s_x=1, s_y=0.78, s_width=1.5, s_height=1.5,
h_color="#FF0000",
h_fill= "grey40",
p_color="white",
filename=here::here("~/Downloads/canviz-sticker.svg"))
}

}

#' Internal function to update table list
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
[![CRAN status](https://www.r-pkg.org/badges/version/cansim)](https://CRAN.R-project.org/package=cansim)
[![CRAN_Downloads_Badge](https://cranlogs.r-pkg.org/badges/cansim)](https://cranlogs.r-pkg.org/badges/cansim)
[![R-CMD-check](https://github.com/mountainMath/cansim/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/mountainMath/cansim/actions/workflows/R-CMD-check.yaml)
[![DOI](https://img.shields.io/badge/doi-10.32614/CRAN.package.cansim-#d2b24a.svg)](https://doi.org/10.32614/CRAN.package.cansim)
[![DOI](https://img.shields.io/badge/DOI-10.32614/CRAN.package.cansim-d2b24a.svg)](https://doi.org/10.32614/CRAN.package.cansim)
<!-- badges: end -->

<a href="https://mountainmath.github.io/cansim/index.html"><img src="https://raw.githubusercontent.com/mountainMath/cansim/master/images/cansim-sticker.png" alt="cansim logo" align="right" width = "25%" height = "25%"/></a>
<a href="https://mountainmath.github.io/cansim/index.html"><img id="readme-logo" src="https://raw.githubusercontent.com/mountainMath/cansim/master/images/cansim-sticker.png" alt="cansim logo" align="right" width = "25%" height = "25%"/></a>

An R package to retrieve and work with public Statistics Canada data tables.

Expand Down
5 changes: 5 additions & 0 deletions cran-comments.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,8 @@ There were no ERRORs or WARNINGs or NOTEs.
* fix a problem with metadata parsing does not work properly for table names
* make documentations more consistent wrt default langauge names
* add convenience functions for setting cache paths for data accessed via get_cansim_connection

# cansim 0.4.5
## Minor changes
* better connection error handling

2 changes: 1 addition & 1 deletion docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/LICENSE-text.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/LICENSE.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/cansim.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/index.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/listing_cansim_tables.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion docs/articles/partial_table_data_download.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading