Skip to content

Commit fe10ad4

Browse files
authored
123 derive language status (#130)
* update derive_language_status and the test for it * fix typo
1 parent 9a5542e commit fe10ad4

5 files changed

Lines changed: 39 additions & 27 deletions

File tree

R/derive_vars.R

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ derive_vars <- function(data) {
2121

2222
#' @title Derive language status
2323
#'
24-
#' @description Derive the status of each programmming language as "access" (access only), "knowledge" (knowledge only), "both" or "neither".
24+
#' @description Derive the status of each programmming language as "use" (use only), "knowledge" (knowledge only), "both" or "neither".
2525
#'
2626
#' @param data tidied CARS wave 3 data (data.frame).
2727
#'
@@ -30,21 +30,21 @@ derive_vars <- function(data) {
3030

3131
derive_language_status <- function(data) {
3232

33-
lang_list <- colnames(data)[grepl("access_", colnames(data))]
33+
lang_list <- colnames(data)[grepl("use_", colnames(data))]
3434

3535
lang_list <- lang_list[!grepl("other", lang_list)]
3636

37-
lang_list <- gsub("access_", "", lang_list)
37+
lang_list <- gsub("use_", "", lang_list)
3838

3939
new_vars <- sapply(lang_list, function(lang) {
40-
access_col <- data[[paste0("access_", lang)]]
40+
use_col <- data[[paste0("use_", lang)]]
4141

4242
knowledge_col <- data[[paste0("knowledge_", lang)]]
4343

44-
dplyr::case_when(access_col == "Yes" & knowledge_col == "Yes" ~ "Both",
45-
access_col == "Yes" & knowledge_col != "Yes" ~ "Access Only",
46-
access_col != "Yes" & knowledge_col == "Yes" ~ "Knowledge Only",
47-
access_col != "Yes" & knowledge_col != "Yes" ~ "Neither")
44+
dplyr::case_when(use_col == "Yes" & knowledge_col == "Yes" ~ "Both",
45+
use_col == "Yes" & knowledge_col != "Yes" ~ "Use Only",
46+
use_col != "Yes" & knowledge_col == "Yes" ~ "Knowledge Only",
47+
use_col != "Yes" & knowledge_col != "Yes" ~ "Neither")
4848
})
4949

5050
colnames(new_vars) <- paste0("status_", lang_list)

R/frequency-tables.R

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ summarise_rap_opinions <- function(data, config, question, prop = TRUE, sample =
186186

187187
#' @title Summarise programming language status
188188
#'
189-
#' @description calculate counts of responents reporting access to, knowledge of, or both for each programming language.
189+
#' @description calculate counts of respondents reporting knowledge of, use of, or both for each programming language.
190190
#'
191191
#' @param data full CARS dataset after pre-processing
192192
#'
@@ -203,9 +203,11 @@ summarise_language_status <- function(data) {
203203
"status_python",
204204
"status_SPSS",
205205
"status_stata",
206-
"status_matlab")
206+
"status_matlab",
207+
"status_dax",
208+
"status_spark")
207209

208-
levels <- c("Access Only", "Both", "Knowledge Only")
210+
levels <- c("Use Only", "Both", "Knowledge Only")
209211

210212
labels <- c("R",
211213
"SQL",
@@ -214,9 +216,13 @@ summarise_language_status <- function(data) {
214216
"Python",
215217
"SPSS",
216218
"Stata",
217-
"Matlab")
219+
"Matlab",
220+
"DAX",
221+
"Spark")
218222

219-
frequencies <- calculate_freqs(data, questions, levels, labels)
223+
data[] <- lapply(data, factor, levels = levels)
224+
225+
frequencies <- calculate_freqs(data, questions, labels, prop = TRUE, sample = FALSE)
220226

221227
return(frequencies)
222228

man/derive_language_status.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/summarise_language_status.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-derive_language_status.R

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,25 @@
33
# Input data should never include missing data.
44
dummy_data <- data.frame(
55
knowledge_R = c("Yes", "Yes"),
6-
access_R = c("No", "No"),
6+
use_R = c("No", "No"),
77
knowledge_SQL = c("Yes", "Yes"),
8-
access_SQL = c("Don't know", "No"),
8+
use_SQL = c("Don't know", "No"),
99
knowledge_SAS = c("Yes", "Yes"),
10-
access_SAS = c("Yes", "Yes"),
10+
use_SAS = c("Yes", "Yes"),
1111
knowledge_VBA = c("No", "No"),
12-
access_VBA = c("Yes", "Yes"),
12+
use_VBA = c("Yes", "Yes"),
1313
knowledge_python = c("No", "No"),
14-
access_python = c("Don't know", "Don't know"),
14+
use_python = c("Don't know", "Don't know"),
1515
knowledge_SPSS = c("No", "No"),
16-
access_SPSS = c("No", "Yes"),
16+
use_SPSS = c("No", "Yes"),
1717
knowledge_stata = c("Don't know", "No"),
18-
access_stata = c("Yes", "No"),
18+
use_stata = c("Yes", "No"),
1919
knowledge_matlab = c("Yes", "No"),
20-
access_matlab = c("Yes", "No")
20+
use_matlab = c("Yes", "No"),
21+
knowledge_dax = c("Yes", "No"),
22+
use_dax = c("No", "No"),
23+
use_spark = c("Don't know", "Don't know"),
24+
knowledge_spark = c("Don't know", "Don't know")
2125
)
2226

2327
test_that("derive_language_status output is as expected", {
@@ -29,11 +33,13 @@ test_that("derive_language_status output is as expected", {
2933
status_R = c("Knowledge Only", "Knowledge Only"),
3034
status_SQL = c("Knowledge Only", "Knowledge Only"),
3135
status_SAS = c("Both", "Both"),
32-
status_VBA = c("Access Only", "Access Only"),
36+
status_VBA = c("Use Only", "Use Only"),
3337
status_python = c("Neither", "Neither"),
34-
status_SPSS = c("Neither", "Access Only"),
35-
status_stata = c("Access Only", "Neither"),
36-
status_matlab = c("Both", "Neither")
38+
status_SPSS = c("Neither", "Use Only"),
39+
status_stata = c("Use Only", "Neither"),
40+
status_matlab = c("Both", "Neither"),
41+
status_dax = c("Knowledge Only", "Neither"),
42+
status_spark = c("Neither", "Neither")
3743
)
3844

3945
expect_equal(got, expected)

0 commit comments

Comments
 (0)