Skip to content

Commit 9203126

Browse files
authored
Merge pull request #135 from mountainMath/v0.4.3
V0.4.3
2 parents 04ee217 + 6af649e commit 9203126

85 files changed

Lines changed: 273 additions & 170 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: cansim
22
Type: Package
33
Title: Accessing Statistics Canada Data Table and Vectors
4-
Version: 0.4.2
4+
Version: 0.4.3
55
Authors@R: c(
66
person("Jens", "von Bergmann", email = "jens@mountainmath.ca", role = c("aut","cre")),
77
person("Dmitry", "Shkolnik", email = "shkolnikd@gmail.com", role = c("aut")))

NEWS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
# cansim 0.4.3
2+
## Minor changes
3+
* better handling of duplicated levels in metadata, ignore duplication for geography names of census tables but emit warning
4+
* fix issue with accessing tables without footnotes
5+
6+
17
# cansim 0.4.2
28
## Minor changes
39
* ensure proper ordering of levels even if StatCan metadata is not ordered

R/cansim.R

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,8 @@ normalize_cansim_values <- function(data, replacement_value="val_norm", normaliz
152152
for (field in fields) {
153153
if (!is.null(getOption("cansim.debug"))) message(paste0('Converting ',field,' to factors'))
154154
tryCatch({
155-
level_table <- get_deduped_column_level_data(cansimTableNumber = cansimTableNumber,language=language,column=field) %>%
155+
level_table <- get_deduped_column_level_data(cansimTableNumber = cansimTableNumber,
156+
language=language,column=field) %>%
156157
arrange(as.integer(.data$`...dim`),as.integer(.data$`...id`))
157158
if (!(field %in% names(data))) {
158159
geography_column <- ifelse(cleaned_language=="eng","Geography|Geographic name",paste0("G",intToUtf8(0x00E9),"ographie|Nom g",intToUtf8(0x00E9),"ographique"))
@@ -171,7 +172,9 @@ normalize_cansim_values <- function(data, replacement_value="val_norm", normaliz
171172
column_position <- which(names(data)==field)
172173
column_before <- names(data)[column_position-1]
173174

174-
data$`...id` <- stringr::str_split(data[[coordinate_column]],"\\.") %>% lapply(\(x)x[dimension_id]) %>% unlist()
175+
data$`...id` <- stringr::str_split(data[[coordinate_column]],"\\.") %>%
176+
lapply(\(x)x[dimension_id]) %>%
177+
unlist()
175178

176179
data <- data %>%
177180
select(-all_of(field)) %>%

R/cansim_helpers.R

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -460,31 +460,43 @@ get_deduped_column_level_data <- function(cansimTableNumber,language,column) {
460460
mutate(...n=as.integer(.data$...id)) %>%
461461
arrange("...n") %>%
462462
select(-"...n") %>%
463-
mutate(...count=n(),.by="...name") %>%
463+
mutate(...count=n(),.by=c("...dim","...name")) %>%
464464
mutate(...duplicated=.data$...count>1) %>%
465465
mutate(...original=!.data$...duplicated) %>%
466466
mutate(...original_name=.data$...name) %>%
467-
mutate(...name=ifelse(.data$...duplicated & is.na(.data$...parent_id),
468-
paste0(.data$...name," [",.data$...id,"]"), # deals with 36-10-0108
469-
.data$...name)) %>%
470-
mutate(...count=n(),.by="...name") %>%
471-
mutate(...duplicated=.data$...count>1)
467+
mutate(...last_parent_id=.data$...parent_id)
468+
469+
fixed_level_table <- NULL
470+
# don't try to dedup census geographies, too messy
471+
if (substr(naked_ndm_table_number(cansimTableNumber),1,4)=="9810" && sum(filter(level_table,.data$...dim=="1")$...duplicated)>0) {
472+
warning(paste0("Table ",cansimTableNumber," is a census data table that has duplicate geography names, not converting to factors. Treat with caution when accessng geographies by name and check geographic identifiers."))
473+
fixed_level_table <- level_table %>%
474+
filter(.data$...dim=="1")
475+
level_table <- level_table %>%
476+
filter(.data$...dim!="1")
477+
}
472478

479+
# try to dedup
473480
max_run <- 30
474481
while (sum(level_table$...duplicated)>0 && max_run>0) { # deals with 36-10-0580
475482
max_run <- max_run - 1
476483
level_table <- level_table %>%
477-
left_join(level_table %>% select("...id",...parent_name="...name"),
478-
by=c("...parent_id"="...id")) %>%
479-
mutate(...name=ifelse(.data$...duplicated,
480-
paste0(.data$...name," ==> ",.data$...parent_name),
481-
.data$...name)) %>%
482-
select(-"...parent_name") %>%
483-
mutate(...count=n(),.by="...name") %>%
484-
mutate(...duplicated=.data$...count>1)
484+
left_join(level_table %>% select("...id","...dim",...parent_name="...original_name",...new_parent_id="...last_parent_id"),
485+
by=c("...last_parent_id"="...id","...dim"="...dim")) %>%
486+
mutate(...name=case_when(.data$...duplicated & is.na(.data$...parent_name) ~ paste0(.data$...name," [",.data$...id,"]"),
487+
.data$...duplicated & !is.na(.data$...parent_name) ~ paste0(.data$...name," ==> ",.data$...parent_name),
488+
TRUE ~ .data$...name)) %>%
489+
mutate(...last_parent_id=ifelse(.data$...duplicated,
490+
.data$...new_parent_id,
491+
.data$...last_parent_id)) %>%
492+
mutate(...count=n(),.by=c("...dim","...name")) %>%
493+
mutate(...duplicated=.data$...count>1) %>%
494+
select(-any_of(c("...parent_name","...new_parent_id")))
495+
485496
}
486497

487-
level_table %>%
498+
bind_rows(fixed_level_table,level_table) %>%
499+
arrange("...dim") %>%
488500
select("...dim","...id","...name","...original","...original_name")
489501
}
490502

R/cansim_metadata.R

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,11 @@ get_cansim_cube_metadata <- function(cansimTableNumber, type="overview",refresh=
233233
mutate(across(where(is.integer),as.character)) %>%
234234
arrange(as.integer(.data$footnoteId))
235235
}) %>%
236-
unique() %>%
236+
unique()
237+
if (nrow(m3)>0) {
238+
m3 <- m3 %>%
237239
arrange(as.integer(.data$footnoteId),as.integer(.data$dimensionPositionId),as.integer(.data$memberId))
240+
}
238241
saveRDS(m3, meta3_path)
239242
} else {
240243
m3 <- readRDS(meta3_path)
@@ -465,7 +468,8 @@ get_cansim_series_info_cube_coord <- function(cansimTableNumber,coordinates, tim
465468
#' @examples
466469
#' \dontrun{
467470
#' template <- get_cansim_table_template("34-10-0013")
468-
#' template |> filter(Geography=="Canada") |>
471+
#' template |>
472+
#' filter(Geography=="Canada") |>
469473
#' add_cansim_vectors_to_template()
470474
#' }
471475
#' @export

R/cansim_vectors.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ get_cansim_data_for_table_coord_periods<-function(tableCoordinates, periods=NULL
466466
new_failed_coordinates <- NULL
467467
if (length(data2)>0) {
468468
# message(paste0("Failed to load for ",length(data2)," coordinates "))
469-
new_failed_coordinates <- data2 %>% purrr::map(function(x){x$object$coordinate}) |> unlist()
469+
new_failed_coordinates <- data2 %>% purrr::map(function(x){x$object$coordinate}) %>% unlist()
470470
new_failed_coordinates <- tibble::tibble(cansimTableNumber=unique(working_data$cansimTableNumber),
471471
COORDINATE=new_failed_coordinates)
472472

README.md

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
# cansim
22

33
<!-- badges: start -->
4-
[![R-CMD-check](https://github.com/mountainMath/cansim/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/mountainMath/cansim/actions/workflows/R-CMD-check.yaml)
54
[![CRAN status](https://www.r-pkg.org/badges/version/cansim)](https://CRAN.R-project.org/package=cansim)
65
[![CRAN_Downloads_Badge](https://cranlogs.r-pkg.org/badges/cansim)](https://cranlogs.r-pkg.org/badges/cansim)
6+
[![R-CMD-check](https://github.com/mountainMath/cansim/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/mountainMath/cansim/actions/workflows/R-CMD-check.yaml)
7+
[![DOI](https://img.shields.io/badge/doi-10.32614/CRAN.package.cansim-#d2b24a.svg)](https://doi.org/10.32614/CRAN.package.cansim)
78
<!-- badges: end -->
89

910
<a href="https://mountainmath.github.io/cansim/index.html"><img src="https://raw.githubusercontent.com/mountainMath/cansim/master/images/cansim-sticker.png" alt="cansim logo" align="right" width = "25%" height = "25%"/></a>
@@ -219,19 +220,17 @@ If you want to get in touch, we are pretty good at responding via email or via t
219220

220221
### Related packages
221222

222-
* The [CANSIM2R package](https://CRAN.R-project.org/package=CANSIM2R) provides functionality to download entire StatCan tables.
223-
224-
* The [statcanR package](https://CRAN.R-project.org/package=statcanR) provides functionality to download entire StatCan tables and basic data discovery functionality.
225-
226-
* [CANSIM-dataviewer](https://github.com/bcgov/CANSIM-dataviewer) is another tool that depends on the existing *CANSIM2R* package with a focus on uses for the Province of British Columbia.
223+
* The [statcanR package](https://CRAN.R-project.org/package=statcanR) is an alternative package providing basic access to StatCan NDM tables and data discovery.
227224

228225
* [cancensus](https://github.com/mountainMath/cancensus) is a package designed to access, retrieve, and work with Canadian Census data and geography. The *cansim* package is designed to work in conjunction with *cancensus* and data can easily be joined on standard geographic identifiers exposed and harmonized by both packages.
229226

227+
* [cmhc](https://github.com/mountainMath/cmhc) is a package designed to access, retrieve, and work with CMHC data.
228+
230229
### Cite cansim
231230

232231
If you wish to cite the `cansim` package in your work:
233232

234-
von Bergmann, J., Dmitry Shkolnik (2024). cansim: functions and convenience tools for accessing Statistics Canada data tables. v0.4.2. DOI: 10.32614/CRAN.package.cansim
233+
von Bergmann, J., Dmitry Shkolnik (2024). cansim: functions and convenience tools for accessing Statistics Canada data tables. v0.4.3. DOI: 10.32614/CRAN.package.cansim
235234

236235
A BibTeX entry for LaTeX users is
237236

@@ -241,7 +240,7 @@ A BibTeX entry for LaTeX users is
241240
title = {cansim: functions and convenience tools for accessing Statistics Canada data tables},
242241
year = {2025},
243242
doi = {10.32614/CRAN.package.cansim},
244-
note = {R package version 0.4.2},
243+
note = {R package version 0.4.3},
245244
url = {https://mountainmath.github.io/cansim/}
246245
}
247246
```

cran-comments.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,3 +141,8 @@ There were no ERRORs or WARNINGs or NOTEs.
141141
* generate table template and facilitate adding vector info to aid pinpointed data download
142142
* enable downloading of data by vector and multiple coordinates in get_cansim_data_for_table_coord_periods (breaking changes with change to parameter)
143143

144+
# cansim 0.4.3
145+
## Minor changes
146+
* better handling of duplicated levels in metadata, ignore duplication for geography names of census tables but emit warning
147+
* fix issue with accessing tables without footnotes
148+

docs/404.html

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/LICENSE-text.html

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)