Skip to content

Commit 6289b12

Browse files
authored
Merge pull request #116 from mountainMath/v0.3.15
accommodate quirky census division table
2 parents 3400f7f + 2d1f1d5 commit 6289b12

66 files changed

Lines changed: 295 additions & 286 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: cansim
22
Type: Package
33
Title: Accessing Statistics Canada Data Table and Vectors
4-
Version: 0.3.14
4+
Version: 0.3.15
55
Authors@R: c(
66
person("Jens", "von Bergmann", email = "jens@mountainmath.ca", role = c("cre")),
77
person("Dmitry", "Shkolnik", email = "shkolnikd@gmail.com", role = c("aut")))
@@ -30,7 +30,7 @@ Imports: digest (>= 0.1),
3030
utils,
3131
DBI,
3232
RSQLite
33-
RoxygenNote: 7.2.1
33+
RoxygenNote: 7.2.3
3434
Suggests: knitr,
3535
rmarkdown,
3636
scales,

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# cansim 0.3.15
2+
## Minor changes
3+
* accommodate quirks in table 98-10-0017
4+
15
# cansim 0.3.14
26
## Minor changes
37
* Better header parsing to avoid warning messages

R/cansim.R

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,7 @@ fold_in_metadata_for_columns <- function(data,data_path,column_names){
414414

415415
meta2 <- readRDS(paste0(data_path,"2"))
416416

417+
417418
if (!is.null(getOption("cansim.debug"))) message('Generating base hierarchy')
418419
hierarchy_data <- tibble(X=pull(data,coordinate_column) %>% unique) %>%
419420
setNames(coordinate_column) %>%
@@ -532,11 +533,21 @@ get_cansim <- function(cansimTableNumber, language="english", refresh=FALSE, tim
532533
as.character()
533534

534535
symbols <- which(header=="Symbol")
536+
if (length(symbols)==0) {
537+
symbols <- which(header=="Symbols")
538+
}
535539

536540
if (length(symbols)>1) {
537541
header[symbols] <- paste0("Symbol ",seq(1,length(symbols)))
538542
}
539543

544+
coordinate_column <- ifelse(cleaned_language=="eng","COORDINATE",paste0("COORDONN",intToUtf8(0x00C9),"ES"))
545+
if (!(coordinate_column %in% header)) {
546+
ci <- which(grepl(coordinate_column,header,ignore.case = TRUE))
547+
if (length(ci)==1) {
548+
header[ci] <- coordinate_column
549+
}
550+
}
540551

541552

542553
data <- csv_reader(file.path(exdir, paste0(base_table, ".csv")),

R/cansim_helpers.R

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -266,16 +266,16 @@ transform_value_column <- function(data,value_column){
266266
symbols <- which(grepl("^Symbol( \\d+)*$",names(data)))
267267
if (!(value_column %in% names(data)) & length(symbols)>1) {
268268
#message("\nTransforming to long form.")
269-
dimension_grep_string <- paste0("^.+ \\(",length(symbols),"[A-Z]*\\):.+\\[\\d+\\]$")
269+
dimension_grep_string <- paste0("^.+ \\(",length(symbols),"[A-Za-z]*\\):.+\\[\\d+\\]$")
270270
dimensions <- which(grepl(dimension_grep_string,names(data)))
271271
if (sum(symbols!=dimensions+1)>0) {
272272
warning("Unable to identify dimensions")
273273
} else {
274-
count_type <- stringr::str_match(names(data)[dimensions][1],paste0("(\\(",length(symbols),"[A-Z]*\\))"))[1,2]
275-
dimension_members <- gsub(paste0("^.+ \\(",length(symbols),"[A-Z]*\\): *"),"",names(data)[dimensions]) %>%
274+
count_type <- stringr::str_match(names(data)[dimensions][1],paste0("(\\(",length(symbols),"[A-Za-z]*\\))"))[1,2]
275+
dimension_members <- gsub(paste0("^.+ \\(",length(symbols),"[A-Za-z]*\\): *"),"",names(data)[dimensions]) %>%
276276
gsub(" *\\[\\d+\\]$","",.)
277277
member_ids <- stringr::str_extract(names(data)[dimensions],"\\[\\d+\\]$") %>% gsub("\\[|\\]","",.)
278-
dimension_name <- gsub(paste0(" \\(",length(symbols),"[A-Z]*\\):.+\\[\\d+\\]"),"",names(data)[dimensions]) %>%
278+
dimension_name <- gsub(paste0(" \\(",length(symbols),"[A-Za-z]*\\):.+\\[\\d+\\]"),"",names(data)[dimensions]) %>%
279279
unique() %>% paste0(.," ",count_type)
280280

281281
if (length(dimension_name)>1) {
@@ -292,10 +292,9 @@ transform_value_column <- function(data,value_column){
292292
tidyr::pivot_longer(matches(" --- "), names_pattern="^(.+) --- (.+)$",
293293
names_to=c(paste0("Member ID: ",dimension_name),".value")) %>%
294294
dplyr::left_join(member_names,by=paste0("Member ID: ",dimension_name))
295-
if ("Coordinate" %in% names(data)) {
295+
if ("COORDINATE" %in% names(data)) {
296296
data <- data %>%
297-
dplyr::mutate(COORDINATE = paste0(.data$Coordinate,".",!!as.name(paste0("Member ID: ",dimension_name)))) %>%
298-
dplyr::select(-.data$Coordinate)
297+
dplyr::mutate(COORDINATE = paste0(.data$COORDINATE,".",!!as.name(paste0("Member ID: ",dimension_name))))
299298
}
300299

301300
data <- data %>%

R/cansim_sql.R

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -98,28 +98,11 @@ get_cansim_sqlite <- function(cansimTableNumber, language="english", refresh=FAL
9898
col_types = list(.default = "c")))
9999

100100

101-
headers <- readr::read_delim(file.path(exdir, paste0(base_table, ".csv")),
102-
delim=delim,
103-
col_types = list(.default = "c"),
104-
n_max = 1) %>%
105-
names()
106-
107-
hd <- headers[duplicated(toupper(headers))]
108-
109-
if (length(hd)>0) {
110-
dupes <- headers[toupper(headers) %in% hd]
111-
unlink(exdir, recursive=TRUE)
112-
stop(paste0("This table has duplicated columns names: ",paste0(dupes,collapse = ", "),
113-
".\nThis is not allowed for SQLite databases, please use the 'get_cansim' method for this table."))
114-
}
115101

116102
meta_base_path <- paste0(base_path_for_table_language(cansimTableNumber,language,cache_path),".Rda")
117103
parse_metadata(meta,data_path = meta_base_path)
118104

119105

120-
to_drop <- intersect(headers,"TERMINATED") # not in use yet
121-
122-
123106
scale_string <- ifelse(language=="fr","IDENTIFICATEUR SCALAIRE","SCALAR_ID")
124107
value_string <- ifelse(language=="fr","VALEUR","VALUE")
125108
# scale_string2 <- ifelse(language=="fr","FACTEUR SCALAIRE","SCALAR_FACTOR")
@@ -153,12 +136,34 @@ get_cansim_sqlite <- function(cansimTableNumber, language="english", refresh=FAL
153136
as.character()
154137

155138
symbols <- which(grepl("^Symbol( .+)*$",header,ignore.case = TRUE))
139+
if (length(symbols)==0) {
140+
symbols <- which(grepl("^Symbols( .+)*$",header,ignore.case = TRUE))
141+
}
142+
156143
sl <- length(symbols)
157144

158145
if (sl>1) {
159146
header[symbols] <- paste0("Symbol ",seq(1,sl))
160147
}
161148

149+
if (!(coordinate_column %in% header)) {
150+
ci <- which(grepl(coordinate_column,header,ignore.case = TRUE))
151+
if (length(ci)==1) {
152+
header[ci] <- coordinate_column
153+
}
154+
}
155+
156+
hd <- header[duplicated(toupper(header))]
157+
158+
if (length(hd)>0) {
159+
dupes <- header[toupper(header) %in% hd]
160+
unlink(exdir, recursive=TRUE)
161+
stop(paste0("This table has duplicated columns names: ",paste0(dupes,collapse = ", "),
162+
".\nThis is not allowed for SQLite databases, please use the 'get_cansim' method for this table."))
163+
}
164+
165+
166+
162167
chunk_size=ceiling(5000000/pmax(sl,1))
163168

164169
csv2sqlite(file.path(exdir, paste0(base_table, ".csv")),

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,15 +173,15 @@ If you want to get in touch, we are pretty good at responding via email or via t
173173

174174
If you wish to cite the `cansim` package in your work:
175175

176-
von Bergmann, J., Dmitry Shkolnik (2022). cansim: functions and convenience tools for accessing Statistics Canada data tables. v0.3.14.
176+
von Bergmann, J., Dmitry Shkolnik (2023). cansim: functions and convenience tools for accessing Statistics Canada data tables. v0.3.15.
177177

178178
A BibTeX entry for LaTeX users is
179179
```
180180
@Manual{cansim,
181181
author = {Jens {von Bergmann} and Dmitry Shkolnik},
182182
title = {cansim: functions and convenience tools for accessing Statistics Canada data tables},
183-
year = {2022},
184-
note = {R package version 0.3.14},
183+
year = {2023},
184+
note = {R package version 0.3.15},
185185
url = {https://mountainmath.github.io/cansim/}
186186
}
187187
```

cran-comments.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
## Test environments
2-
* local OS X install, R 4.2.2
2+
* local OS X install, R 4.3.1
33
* GitHub Action macOS-latest, windows-lastest (3.6), ubuntu-20.04 (devel, release)
44

55
## R CMD check results
@@ -96,4 +96,8 @@ There were no ERRORs or WARNINGs or NOTEs.
9696
* Better header parsing to avoid warning messages
9797
* Fix problem with some semi-wide tables
9898

99+
# cansim 0.3.14
100+
## Minor changes
101+
* accommodate quirks in table 98-10-0017
102+
99103

docs/404.html

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/LICENSE-text.html

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/LICENSE.html

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)