From c63fd7ba72a5dd456874fb7de84f7cbdc808ffd1 Mon Sep 17 00:00:00 2001 From: olivroy Date: Thu, 2 Jan 2025 12:00:39 -0500 Subject: [PATCH 1/5] run styler + document --- R/clean_names.R | 15 +++++++-------- man/remove_empty.Rd | 4 ++-- tests/testthat/test-clean-names.R | 20 ++++++++++---------- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/R/clean_names.R b/R/clean_names.R index 388144d..abdcd73 100644 --- a/R/clean_names.R +++ b/R/clean_names.R @@ -33,7 +33,7 @@ #' support using `clean_names()` on `sf` and `tbl_graph` (from #' `tidygraph`) objects as well as on database connections through #' `dbplyr`. For cleaning other named objects like named lists -#' and vectors, use `make_clean_names()`. When `set_labels` is set to `TRUE`, the old names, +#' and vectors, use `make_clean_names()`. When `set_labels` is set to `TRUE`, the old names, #' stored as column labels, can be restored using `sjlabelled::label_to_colnames()`. #' #' @export @@ -83,14 +83,13 @@ clean_names.default <- function(dat, ..., set_labels = FALSE) { if (is.null(names(dat))) { dimnames(dat) <- lapply(dimnames(dat), make_clean_names, ...) } else { - if (set_labels){ + if (set_labels) { old_names <- names(dat) - for (i in seq_along(old_names)){ + for (i in seq_along(old_names)) { attr(dat[[i]], "label") <- old_names[[i]] } } names(dat) <- make_clean_names(names(dat), ...) - } dat } @@ -112,9 +111,9 @@ clean_names.sf <- function(dat, ..., set_labels = FALSE) { sf_cleaned <- make_clean_names(sf_names[cols_to_rename], ...) # rename original df names(dat)[cols_to_rename] <- sf_cleaned - - if(set_labels){ - for (i in seq_along(sf_names[cols_to_rename])){ + + if (set_labels) { + for (i in seq_along(sf_names[cols_to_rename])) { attr(dat[[i]], "label") <- sf_names[[i]] } } @@ -131,7 +130,7 @@ clean_names.tbl_graph <- function(dat, ...) { call. = FALSE ) } # nocov end - + dplyr::rename_all(dat, .funs = make_clean_names, ...) } diff --git a/man/remove_empty.Rd b/man/remove_empty.Rd index 66652c2..f9d14dc 100644 --- a/man/remove_empty.Rd +++ b/man/remove_empty.Rd @@ -13,8 +13,8 @@ remove_empty(dat, which = c("rows", "cols"), cutoff = 1, quiet = TRUE) value of which is provided, defaults to removing both empty rows and empty columns, declaring the behavior with a printed message.} -\item{cutoff}{Under what fraction (>0 to <=1) of non-empty rows or columns should -\code{which} be removed? Lower values keep more rows/columns, higher values drop more.} +\item{cutoff}{a row/col should have more than this fraction of non-NA values to be +retained. E.g., \code{cutoff = 0.8} means that rows/cols that are 20\% or more missing will be dropped.} \item{quiet}{Should messages be suppressed (\code{TRUE}) or printed (\code{FALSE}) indicating the summary of empty columns or rows removed?} diff --git a/tests/testthat/test-clean-names.R b/tests/testthat/test-clean-names.R index 15bb942..bc4e704 100644 --- a/tests/testthat/test-clean-names.R +++ b/tests/testthat/test-clean-names.R @@ -190,14 +190,14 @@ test_that("labels are created in default method (feature request #563)", { dat_df <- dplyr::tibble(`a a` = c(11, 22), `b b` = c(2, 3)) dat_df_clean_labels <- clean_names(dat_df, set_labels = TRUE) dat_df_clean <- clean_names(dat_df) - - for (i in seq_along(names(dat_df))){ + + for (i in seq_along(names(dat_df))) { # check that old names are saved as labels when set_labels is TRUE expect_equal(attr(dat_df_clean_labels[[i]], "label"), names(dat_df)[[i]]) # check that old names are not stored if set_labels is not TRUE expect_null(attr(dat_df_clean[[i]], "label")) - } - + } + # expect names are always cleaned expect_equal(names(dat_df_clean), c("a_a", "b_b")) expect_equal(names(dat_df_clean_labels), c("a_a", "b_b")) @@ -605,19 +605,19 @@ test_that("Tests for cases beyond default snake for sf objects", { test_that("labels are created in sf method (feature request #563)", { skip_if_not_installed("sf") - + dat_df <- dplyr::tibble(`a a` = c(11, 22), `b b` = c(2, 3)) dat_sf <- dat_df - dat_sf$x <- c(1,2) - dat_sf$y <- c(1,2) + dat_sf$x <- c(1, 2) + dat_sf$y <- c(1, 2) dat_sf <- sf::st_as_sf(dat_sf, coords = c("x", "y")) dat_sf_clean_labels <- clean_names(dat_sf, set_labels = TRUE) dat_sf_clean <- clean_names(dat_sf) - - for (i in seq_along(names(dat_df))){ + + for (i in seq_along(names(dat_df))) { # check that old names are saved as labels when set_labels is TRUE expect_equal(attr(dat_sf_clean_labels[[i]], "label"), names(dat_sf)[[i]]) - + # check that old names are not stored if set_labels is not TRUE expect_null(attr(dat_sf_clean[[i]], "label")) } From e86c99f2963a96ba3fd8455c8f536cfa954e1f59 Mon Sep 17 00:00:00 2001 From: olivroy Date: Thu, 2 Jan 2025 14:10:34 -0500 Subject: [PATCH 2/5] add example. #540 --- vignettes/janitor.Rmd | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/vignettes/janitor.Rmd b/vignettes/janitor.Rmd index 5e64c70..f50a20b 100644 --- a/vignettes/janitor.Rmd +++ b/vignettes/janitor.Rmd @@ -6,7 +6,7 @@ output: toc: true toc_depth: 3 vignette: > - %\VignetteIndexEntry{janitor} + %\VignetteIndexEntry{Overview of janitor functions} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -15,7 +15,7 @@ vignette: > knitr::opts_chunk$set(collapse = TRUE, comment = "#>") library(janitor) ``` -The janitor functions expedite the initial data exploration and cleaning that comes with any new data set. This catalog describes the usage for each function. +The janitor functions expedite the initial data exploration and cleaning that comes with any new data set. This catalog describes the usage for each function. # Major functions Functions for everyday use. @@ -212,6 +212,17 @@ For instance, here a vector with a date and an Excel datetime sees both values s convert_to_date(c("2020-02-29", "40000.1")) ``` +### Coerce different strings together. + +`paste_skip_na()` can be used to skip `NA` values when coercing multiple columns together. +This is useful since `paste()` just pastes `"NA"` instead of ignoring it. + +```{r} +paste("x", NA, "y") + +paste_skip_na("x", NA, "y") +``` + ### Elevate column names stored in a data.frame row If a data.frame has the intended variable names stored in one of its rows, `row_to_names()` will elevate the specified row to become the names of the data.frame and optionally (by default) remove the row in which names were stored and/or the rows above it. From 22051d227bc6781de761b7f6783bb0f270574da0 Mon Sep 17 00:00:00 2001 From: olivroy Date: Thu, 2 Jan 2025 14:13:55 -0500 Subject: [PATCH 3/5] update gh actions --- .github/workflows/R-CMD-check.yaml | 7 ++----- .github/workflows/pkgdown.yaml | 3 +-- .github/workflows/style.yaml | 2 +- .github/workflows/test-coverage.yaml | 11 ++++------- DESCRIPTION | 2 ++ README.Rmd | 2 +- README.md | 4 ++-- 7 files changed, 13 insertions(+), 18 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 008f9be..562fe0f 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -4,9 +4,8 @@ on: push: branches: [main, master] pull_request: - branches: [main, master] -name: R-CMD-check +name: R-CMD-check.yaml permissions: read-all @@ -43,9 +42,7 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: | - any::sf - any::rcmdcheck + extra-packages: any::rcmdcheck needs: check - uses: r-lib/actions/check-r-package@v2 diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index c9f0165..bfc9f4d 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -4,12 +4,11 @@ on: push: branches: [main, master] pull_request: - branches: [main, master] release: types: [published] workflow_dispatch: -name: pkgdown +name: pkgdown.yaml permissions: read-all diff --git a/.github/workflows/style.yaml b/.github/workflows/style.yaml index fd16f69..a860006 100644 --- a/.github/workflows/style.yaml +++ b/.github/workflows/style.yaml @@ -4,7 +4,7 @@ on: push: paths: ["**.[rR]", "**.[qrR]md", "**.[rR]markdown", "**.[rR]nw", "**.[rR]profile"] -name: Style +name: style.yaml permissions: read-all diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index e9da5d0..e050312 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -4,9 +4,8 @@ on: push: branches: [main, master] pull_request: - branches: [main, master] -name: test-coverage +name: test-coverage.yaml permissions: read-all @@ -25,10 +24,7 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: | - any::sf - any::covr - any::xml2 + extra-packages: any::covr, any::xml2 needs: coverage - name: Test coverage @@ -43,7 +39,8 @@ jobs: - uses: codecov/codecov-action@v4 with: - fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }} + # Fail if error if not on PR, or if on PR and token is given + fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }} file: ./cobertura.xml plugin: noop disable_search: true diff --git a/DESCRIPTION b/DESCRIPTION index 43586ce..0ad030d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -50,6 +50,8 @@ Suggests: VignetteBuilder: knitr Config/testthat/edition: 3 +Config/Needs/check: sf +Config/Needs/coverage: sf Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) diff --git a/README.Rmd b/README.Rmd index ed66eec..b8e27cb 100644 --- a/README.Rmd +++ b/README.Rmd @@ -26,7 +26,7 @@ options(width = 110) [![R-CMD-check](https://github.com/sfirke/janitor/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/sfirke/janitor/actions/workflows/R-CMD-check.yaml) -[![Coverage Status](https://img.shields.io/codecov/c/github/sfirke/janitor/main.svg)](https://app.codecov.io/github/sfirke/janitor?branch=main) +[![Codecov test coverage](https://codecov.io/gh/sfirke/janitor/graph/badge.svg)](https://app.codecov.io/gh/sfirke/janitor) [![lifecycle](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version-ago/janitor)](https://cran.r-project.org/package=janitor) ![!Monthly Downloads](https://cranlogs.r-pkg.org/badges/janitor) diff --git a/README.md b/README.md index 3b96b42..7231e20 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,8 @@ [![R-CMD-check](https://github.com/sfirke/janitor/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/sfirke/janitor/actions/workflows/R-CMD-check.yaml) -[![Coverage -Status](https://img.shields.io/codecov/c/github/sfirke/janitor/main.svg)](https://app.codecov.io/github/sfirke/janitor?branch=main) +[![Codecov test +coverage](https://codecov.io/gh/sfirke/janitor/graph/badge.svg)](https://app.codecov.io/gh/sfirke/janitor) [![lifecycle](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version-ago/janitor)](https://cran.r-project.org/package=janitor) ![!Monthly Downloads](https://cranlogs.r-pkg.org/badges/janitor) From 0edcb1524a94f4d2fa9746611af967e4d8c67d74 Mon Sep 17 00:00:00 2001 From: olivroy Date: Thu, 2 Jan 2025 14:26:10 -0500 Subject: [PATCH 4/5] Add config to strip trailing whitespace and ensure correct line end --- janitor.Rproj | 3 +++ 1 file changed, 3 insertions(+) diff --git a/janitor.Rproj b/janitor.Rproj index e189768..99416ae 100644 --- a/janitor.Rproj +++ b/janitor.Rproj @@ -12,6 +12,9 @@ Encoding: UTF-8 RnwWeave: Sweave LaTeX: pdfLaTeX +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes + BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source From 6857a98f2a685aa25f82461b0337b9f68438b75e Mon Sep 17 00:00:00 2001 From: olivroy Date: Thu, 2 Jan 2025 14:27:00 -0500 Subject: [PATCH 5/5] Simplify a test --- DESCRIPTION | 2 +- tests/testthat/test-clean-names.R | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0ad030d..fc12cbf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -44,7 +44,7 @@ Suggests: rmarkdown, RSQLite, sf, - testthat (>= 3.0.0), + testthat (>= 3.2.0), tibble, tidygraph VignetteBuilder: diff --git a/tests/testthat/test-clean-names.R b/tests/testthat/test-clean-names.R index bc4e704..7fc0c77 100644 --- a/tests/testthat/test-clean-names.R +++ b/tests/testthat/test-clean-names.R @@ -482,9 +482,8 @@ test_that("Names are cleaned appropriately without attaching sf", { # last column name issue_578_sf <- readRDS("testdata/issue-578-sf.rds") issue_578_sf_clean <- clean_names(issue_578_sf) - expect_error( - print(issue_578_sf_clean), - NA + expect_no_error( + print(issue_578_sf_clean) ) }) @@ -724,5 +723,5 @@ test_that("groupings are preserved, #260", { df_grouped <- iris %>% dplyr::group_by(Sepal.Length, Sepal.Width) # nonsense for analysis but doesn't matter df_grouped_renamed <- df_grouped %>% clean_names(case = "lower_camel") expect_equal(dplyr::group_vars(df_grouped_renamed), c("sepalLength", "sepalWidth")) # group got renamed - expect_equal(names(df_grouped_renamed), c("sepalLength", "sepalWidth", "petalLength", "petalWidth", "species")) + expect_named(df_grouped_renamed, c("sepalLength", "sepalWidth", "petalLength", "petalWidth", "species")) })