Skip to content

Commit 0e8fcd3

Browse files
ramiromagnoShiyuC
andauthored
0115 ct alert message (#121)
* Apply review comments * style file * lint issue * Add function to check ISO8601 format * Closes #101 and #113 * Closes #115 This update introduces an informative message about unmappable CT terms. All functions calling `ct_map()` benefit from this feature: `hardcode_ct()` and `assign_ct()`. * Add unit test * Address Ramm's review comments `inform_on_ct_mappability()` is now lenient on empty string and `NA` terms. * Fix typo --------- Co-authored-by: Shiyu Chen <shiyu.chen@atorusresearch.com>
1 parent ce5b922 commit 0e8fcd3

File tree

5 files changed

+201
-0
lines changed

5 files changed

+201
-0
lines changed

R/ct.R

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,59 @@ ct_mappings <- function(ct_spec, from = ct_spec_vars("from"), to = ct_spec_vars(
184184
ct_mappings
185185
}
186186

187+
#' Identify CT mappable terms
188+
#'
189+
#' @description
190+
#'
191+
#' [is_ct_mappable()] returns a logical vector indicating whether each element
192+
#' of `x` is found in the `from` values used for controlled terminology recoding.
193+
#'
194+
#' Empty strings (blanks) and `NA` values are treated specially and are
195+
#' considered mappable terms, even though they might not be.
196+
#'
197+
#' This function is useful for checking in advance which terms in a vector can be
198+
#' recoded given a specified controlled terminology mapping.
199+
#'
200+
#' @param x A character vector of terms to be evaluated for recoding.
201+
#' @param from A character vector of controlled terminology terms that `x` will
202+
#' be compared against.
203+
#'
204+
#' @return A logical vector of the same length as `x`, where `TRUE` indicates the
205+
#' corresponding term in `x` is found in `from`, and `FALSE` otherwise.
206+
#'
207+
#' @keywords internal
208+
is_ct_mappable <- function(x, from) {
209+
!is.na(index_for_recode(x = x, from = from)) | is.na(x) | x %in% ""
210+
}
211+
212+
#' Inform on the mappability of terms to controlled terminology
213+
#'
214+
#' [inform_on_ct_mappability()] checks whether all values in `x` can be mapped
215+
#' using the controlled terminology terms in `from`. It raises an informative
216+
#' message if any values in `x` are not mappable.
217+
#'
218+
#' @param x A character vector of terms to be checked.
219+
#' @param from A character vector of valid controlled terminology terms.
220+
#'
221+
#' @returns Invisibly returns `TRUE` if all terms are mappable; otherwise,
222+
#' prints an informative message and returns `FALSE` invisibly.
223+
#'
224+
#' @keywords internal
225+
inform_on_ct_mappability <- function(x, from) {
226+
is_mappable <- is_ct_mappable(x, from)
227+
if (all(is_mappable)) {
228+
return(invisible(TRUE))
229+
}
230+
231+
unmappable <- unique(x[!is_mappable])
232+
233+
cli::cli_alert_info("These terms could not be mapped per the controlled terminology: {.val {unmappable}}.",
234+
wrap = FALSE
235+
)
236+
237+
invisible(FALSE)
238+
}
239+
187240
#' Recode according to controlled terminology
188241
#'
189242
#' [ct_map()] recodes a vector following a controlled terminology.
@@ -246,6 +299,7 @@ ct_map <-
246299
ct_spec <- dplyr::filter(ct_spec, .data[[ct_spec_vars("ct_clst")]] %in% ct_clst)
247300

248301
mappings <- ct_mappings(ct_spec, from = from, to = to)
302+
inform_on_ct_mappability(x, from = mappings$from)
249303
recode(
250304
x,
251305
from = mappings$from,

inst/WORDLIST

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,14 @@ RELREC
7272
SDTMIG
7373
SV
7474
GSK
75+
mappable
76+
mappability
77+
IDVAR
78+
QLABEL
79+
QNAM
80+
QORIG
81+
RFENDTC
82+
RFXENDTC
83+
suppquals
84+
tformat
85+
dformat

man/inform_on_ct_mappability.Rd

Lines changed: 23 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/is_ct_mappable.Rd

Lines changed: 29 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-hardcode.R

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,87 @@ test_that("hardcode_ct works as expected", {
7979

8080
expect_identical(result, expected_result)
8181
})
82+
83+
test_that("hardcode_ct informs about unmappable terms", {
84+
md1 <-
85+
tibble::tribble(
86+
~oak_id, ~raw_source, ~patient_number, ~MDRAW,
87+
1L, "MD1", 101L, "BABY ASPIRIN",
88+
2L, "MD1", 102L, "CORTISPORIN",
89+
3L, "MD1", 103L, NA_character_,
90+
4L, "MD1", 104L, "DIPHENHYDRAMINE HCL"
91+
)
92+
93+
cm_inter <-
94+
tibble::tribble(
95+
~oak_id, ~raw_source, ~patient_number, ~CMTRT, ~CMINDC,
96+
1L, "MD1", 101L, "BABY ASPIRIN", NA,
97+
2L, "MD1", 102L, "CORTISPORIN", "NAUSEA",
98+
3L, "MD1", 103L, "ASPIRIN", "ANEMIA",
99+
4L, "MD1", 104L, "DIPHENHYDRAMINE HCL", "NAUSEA",
100+
5L, "MD1", 105L, "PARACETAMOL", "PYREXIA"
101+
)
102+
103+
(ct_spec <- read_ct_spec_example("ct-01-cm"))
104+
105+
exp_msg <- "These terms could not be mapped per the controlled terminology: \"GENERAL CONCOMITANT MEDICATIONS\"."
106+
expect_message(
107+
hardcode_ct(
108+
tgt_dat = cm_inter,
109+
tgt_var = "CMCAT",
110+
raw_dat = md1,
111+
raw_var = "MDRAW",
112+
tgt_val = "GENERAL CONCOMITANT MEDICATIONS",
113+
ct_spec = ct_spec,
114+
ct_clst = "C66729"
115+
),
116+
exp_msg
117+
)
118+
})
119+
120+
test_that("hardcode_ct does not consider blanks or NAs to be CT unmappable", {
121+
md1 <-
122+
tibble::tribble(
123+
~oak_id, ~raw_source, ~patient_number, ~MDRAW,
124+
1L, "MD1", 101L, "BABY ASPIRIN",
125+
2L, "MD1", 102L, "CORTISPORIN",
126+
3L, "MD1", 103L, NA_character_,
127+
4L, "MD1", 104L, "DIPHENHYDRAMINE HCL"
128+
)
129+
130+
cm_inter <-
131+
tibble::tribble(
132+
~oak_id, ~raw_source, ~patient_number, ~CMTRT, ~CMINDC,
133+
1L, "MD1", 101L, "BABY ASPIRIN", NA,
134+
2L, "MD1", 102L, "CORTISPORIN", "NAUSEA",
135+
3L, "MD1", 103L, "ASPIRIN", "ANEMIA",
136+
4L, "MD1", 104L, "DIPHENHYDRAMINE HCL", "NAUSEA",
137+
5L, "MD1", 105L, "PARACETAMOL", "PYREXIA"
138+
)
139+
140+
(ct_spec <- read_ct_spec_example("ct-01-cm"))
141+
142+
expect_silent(
143+
hardcode_ct(
144+
tgt_dat = cm_inter,
145+
tgt_var = "CMCAT",
146+
raw_dat = md1,
147+
raw_var = "MDRAW",
148+
tgt_val = "",
149+
ct_spec = ct_spec,
150+
ct_clst = "C66729"
151+
)
152+
)
153+
154+
expect_silent(
155+
hardcode_ct(
156+
tgt_dat = cm_inter,
157+
tgt_var = "CMCAT",
158+
raw_dat = md1,
159+
raw_var = "MDRAW",
160+
tgt_val = NA_character_,
161+
ct_spec = ct_spec,
162+
ct_clst = "C66729"
163+
)
164+
)
165+
})

0 commit comments

Comments
 (0)