diff --git a/DESCRIPTION b/DESCRIPTION index 6afa9987..b28ecb0b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -50,6 +50,7 @@ Imports: cli, dplyr, ggrepel, + gmp, magrittr, purrr, r2dii.analysis (>= 0.5.1), diff --git a/NEWS.md b/NEWS.md index 266f8ff5..78b1cffe 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # pacta.loanbook (development version) * fixed bug in `pacta_loanbook_deps()` (#229) +* added `is_valid_lei()` (#150) # pacta.loanbook 0.1.0 diff --git a/R/is_valid_lei.R b/R/is_valid_lei.R new file mode 100644 index 00000000..0503a17d --- /dev/null +++ b/R/is_valid_lei.R @@ -0,0 +1,33 @@ +is_valid_lei <- function(x) { + if (is.data.frame(x) && identical(length(x), 1L)) { + x <- x[[1L]] + } + + x[is.na(x)] <- "XXX" # set NAs to something sure to return FALSE + x_factored <- as.factor(x) + x_levels <- levels(x_factored) + + x_levels <- toupper(x_levels) + x_levels <- gsub(pattern = "[[:blank:]]", replacement = "", x_levels) + valid_struct <- grepl(pattern = "^[[:alnum:][:digit:]]{20}$", x = x_levels) + + valid_lei <- + vapply( + X = x_levels, + FUN = function(x) { + x <- + pmatch( + x = unlist(strsplit(x, split = NULL, fixed = TRUE)), + table = c(0:9, LETTERS), + duplicates.ok = TRUE + ) + x <- paste(x - 1L, collapse = "") + x <- sub(pattern = "^0+", replacement = "", x = x) + gmp::mod.bigz(x, 97) == 1 + }, + FUN.VALUE = logical(1), + USE.NAMES = FALSE + ) + + valid_struct[as.numeric(x_factored)] & valid_lei[as.numeric(x_factored)] +} diff --git a/tests/testthat/test-is_valid_lei.R b/tests/testthat/test-is_valid_lei.R new file mode 100644 index 00000000..6ce50e37 --- /dev/null +++ b/tests/testthat/test-is_valid_lei.R @@ -0,0 +1,76 @@ +test_that("correctly identifies valid LEIs", { + expect_true(is_valid_lei("11111111111111111104")) # manufactured valid + expect_true(is_valid_lei("XXXXXXXXXXXXXXXXXX35")) # manufactured valid + expect_true(is_valid_lei("xxxxxxxxxxxxxxxxxx35")) # manufactured valid with lowercase + expect_true(is_valid_lei("54930084UKLVMY22DS16")) # known valid + expect_true(is_valid_lei("213800WSGIIZCXF1P572")) # known valid + expect_true(is_valid_lei("5493000IBP32UQZ0KL24")) # known valid + expect_true(is_valid_lei("L3I9ZG2KFGXZ61BMYR72")) # known valid + expect_true(is_valid_lei(" L3I9ZG2KFGXZ61BMYR72 ")) # with leading/trailing space + expect_true(is_valid_lei("L3I9 ZG2KFGXZ61BMYR 72")) # with internal space +}) + +test_that("correctly identifies invalid LEIs", { + expect_false(is_valid_lei("18500033")) # too short + expect_false(is_valid_lei("?8500033XH6RG332SX89")) # non-alphanumeric character + expect_false(is_valid_lei("XXXXXXXXXXXXXXXXXX00")) # manufactured invalid +}) + +test_that("validates multiple LEIs", { + leis <- c(invalid_lei = "XXX", valid_lei = "XXXXXXXXXXXXXXXXXX35") + expect_identical(is_valid_lei(leis), c(FALSE, TRUE)) + expect_identical(is_valid_lei(data.frame(leis)[1L]), c(FALSE, TRUE)) +}) + +test_that("always outputs a logical vector", { + # invalid and valid codes + leis <- c(invalid_lei = "XXX", valid_lei = "XXXXXXXXXXXXXXXXXX35") + expect_vector(is_valid_lei(leis[1L]), ptype = logical(), size = 1L) + expect_vector(is_valid_lei(leis[2L]), ptype = logical(), size = 1L) + expect_vector(is_valid_lei(leis), ptype = logical(), size = 2L) + + # expected possible uses + leis_df <- data.frame(lei = leis) + + out <- dplyr::mutate(leis_df, valid_lei = is_valid_lei(lei))$valid_lei + expect_vector(out, ptype = logical(), size = 2L) + + out <- is_valid_lei(leis_df$lei) + expect_vector(out, ptype = logical(), size = 2L) + + out <- is_valid_lei(leis_df["lei"]) + expect_vector(out, ptype = logical(), size = 2L) + + out <- is_valid_lei(leis_df[1L]) + expect_vector(out, ptype = logical(), size = 2L) + + out <- is_valid_lei(leis_df[["lei"]]) + expect_vector(out, ptype = logical(), size = 2L) + + out <- is_valid_lei(leis_df[[1L]]) + expect_vector(out, ptype = logical(), size = 2L) + + out <- is_valid_lei(tibble::as_tibble(leis_df)["lei"]) + expect_vector(out, ptype = logical(), size = 2L) + + # unexpected input types + expect_vector(is_valid_lei(1L), ptype = logical(), size = 1L) + expect_vector(is_valid_lei(1L:2L), ptype = logical(), size = 2L) + expect_vector(is_valid_lei(TRUE), ptype = logical(), size = 1L) + expect_vector(is_valid_lei(FALSE), ptype = logical(), size = 1L) + expect_vector(is_valid_lei(c(TRUE, FALSE)), ptype = logical(), size = 2L) + expect_vector(is_valid_lei(NA), ptype = logical(), size = 1L) + expect_vector(is_valid_lei(c(NA, NA)), ptype = logical(), size = 2L) +}) + +test_that("returns `FALSE` for unexpected values", { + expect_identical(is_valid_lei(NA), FALSE) + expect_identical(is_valid_lei(NaN), FALSE) + expect_identical(is_valid_lei(Inf), FALSE) + expect_identical(is_valid_lei(NA_integer_), FALSE) + expect_identical(is_valid_lei(NA_real_), FALSE) + expect_identical(is_valid_lei(NA_complex_), FALSE) + expect_identical(is_valid_lei(NA_character_), FALSE) + expect_identical(is_valid_lei(c(TRUE, FALSE)), c(FALSE, FALSE)) + expect_identical(is_valid_lei(1L:2L), c(FALSE, FALSE)) +})