Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: pathfindR
Type: Package
Title: Enrichment Analysis Utilizing Active Subnetworks
Version: 2.5.1.9001
Version: 2.5.1.9002
Authors@R: c(person("Ege", "Ulgen",
role = c("cre", "cph"),
email = "[email protected]",
Expand Down
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,9 @@ import(knitr)
import(parallel)
import(pathfindR.data)
import(rmarkdown)
importFrom(ggkegg,pathway)
importFrom(httr,GET)
importFrom(httr,content)
importFrom(httr,http_error)
importFrom(httr,status_code)
importFrom(httr,timeout)
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## Minor Changes and Bug Fixes
- fixed missing argument issue in `get_gene_sets_list`(#230)
- refactored to introduce `safe_get_content` so that URL access issues are handled more gracefully

# pathfindR 2.5.1

Expand Down
67 changes: 61 additions & 6 deletions R/data_generation.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,59 @@
#' Safely download and parse web content
#'
#' This helper function retrieves content from a given URL using \pkg{httr}.
#' It ensures that common issues (e.g. no internet, timeouts, HTTP errors,
#' or parsing errors) are handled gracefully with clear, informative error messages.
#'
#' @param url Character string. The URL of the resource to download.
#' @param ... Additional arguments passed to \code{\link[httr]{GET}}.
#' @param timeout_sec Numeric. Timeout in seconds for the request (default = 10).
#'
#' @return A character string containing the parsed content of the response
#' (UTF-8 encoded). On failure, an error is raised with a clear message.
#'
#' @details
#' This function is intended for use inside package functions.
#' For examples, vignettes, or tests, wrap calls in a connectivity check
#' (e.g. using \code{http_error(HEAD(url))}) to avoid CRAN failures
#' when the resource is temporarily unavailable.
#'
#' @examples
#' \dontrun{
#' # Retrieve the latest BioGRID release page
#' result <- safe_get_content("https://downloads.thebiogrid.org/BioGRID/Latest-Release/")
#' }
#'
#' @importFrom httr GET timeout http_error status_code content
safe_get_content <- function(url, ..., timeout_sec = 10) {
res <- tryCatch(
{
GET(url, timeout(timeout_sec), ...)
},
error = function(e) {
stop("Failed to retrieve resource from ", url,
". Error: ", conditionMessage(e), call. = FALSE)
}
)

# Check HTTP status
if (http_error(res)) {
stop("The resource at ", url, " is unavailable. HTTP status: ",
status_code(res), call. = FALSE)
}

# Return parsed content (default: text if HTML, raw if binary, etc.)
content <- tryCatch(
content(res, as = "text", encoding = "UTF-8"),
error = function(e) {
stop("Failed to parse content from ", url,
". Error: ", conditionMessage(e), call. = FALSE)
}
)

return(content)
}


#' Process Data frame of Protein-protein Interactions
#'
#' @param pin_df data frame of protein-protein interactions with 2 columns:
Expand Down Expand Up @@ -56,9 +112,8 @@ get_biogrid_pin <- function(org = "Homo_sapiens", path2pin, release = "latest")
}

if (release == "latest") {
result <- httr::GET("https://downloads.thebiogrid.org/BioGRID/Latest-Release/")
result <- httr::content(result, "text")

result <- safe_get_content("https://downloads.thebiogrid.org/BioGRID/Latest-Release/")

h2_matches <- regexpr("(?<=<h2>BioGRID Release\\s)(\\d\\.\\d\\.\\d+)", result, perl = TRUE)
release <- regmatches(result, h2_matches)
}
Expand Down Expand Up @@ -178,20 +233,20 @@ gset_list_from_gmt <- function(path2gmt, descriptions_idx = 2) {
#' \item{gene_sets - A list containing KEGG IDs for the genes involved in each KEGG pathway}
#' \item{descriptions - A named vector containing the descriptions for each KEGG pathway}
#' }
#' @importFrom ggkegg pathway
get_kegg_gsets <- function(org_code = "hsa") {

message("Grab a cup of coffee, this will take a while...")

all_pathways_url <- paste0("https://rest.kegg.jp/list/pathway/", org_code)
all_pathways_result <- httr::GET(all_pathways_url)
all_pathways_result <- httr::content(all_pathways_result, "text")
all_pathways_result <- safe_get_content(all_pathways_url)
parsed_all_pathways_result <- strsplit(all_pathways_result, "\n")[[1]]
pathway_ids <- vapply(parsed_all_pathways_result, function(x) unlist(strsplit(x, "\t"))[1], "id")
pathway_descriptons <- vapply(parsed_all_pathways_result, function(x) unlist(strsplit(x, "\t"))[2], "description")
names(pathway_descriptons) <- pathway_ids

genes_by_pathway <- lapply(pathway_ids, function(pw_id) {
pathways_graph <- ggkegg::pathway(pid = pw_id, directory = tempdir(), use_cache = FALSE, return_tbl_graph = FALSE)
pathways_graph <- pathway(pid = pw_id, directory = tempdir(), use_cache = FALSE, return_tbl_graph = FALSE)
all_pw_kegg_ids <- igraph::V(pathways_graph)$name[igraph::V(pathways_graph)$type == "gene"]
all_pw_kegg_ids <- unlist(strsplit(all_pw_kegg_ids, " "))
all_pw_kegg_ids <- unique(all_pw_kegg_ids)
Expand Down
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,4 @@ reference:
- gset_list_from_gmt
- create_HTML_report
- isColor
- safe_get_content
37 changes: 37 additions & 0 deletions man/safe_get_content.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

103 changes: 94 additions & 9 deletions tests/testthat/test-data_generation.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,62 @@
## Tests for functions related to data generation - June 2025
## Tests for functions related to data generation - September 2025
library(httr)
library(ggkegg)

test_that("safe_get_content handles GET error via mocking", {
fake_GET <- function(...) stop("Simulated connection failure")

with_mocked_bindings(
{
expect_error(
safe_get_content("http://example.com"),
regexp = "Failed to retrieve resource"
)
},
GET = fake_GET
)
})

test_that("safe_get_content handles HTTP error via mocking", {
fake_GET <- function(...) {
structure(
list(status_code = 500L),
class = "response"
)
}

with_mocked_bindings(
{
expect_error(
safe_get_content("http://example.com"),
regexp = "unavailable"
)
},
GET = fake_GET
)
})

test_that("safe_get_content handles content parsing failure via mocking", {
fake_GET <- function(...) {
structure(
list(status_code = 200L),
class = "response"
)
}

fake_content <- function(...) stop("Simulated parsing failure")

with_mocked_bindings(
{
expect_error(
safe_get_content("http://example.com"),
regexp = "Failed to parse content"
)
},
GET = fake_GET,
content = fake_content
)
})


set.seed(123)
gene_pool <- paste0("Gene", 1:100)
Expand Down Expand Up @@ -36,6 +94,7 @@ test_that("`get_biogrid_pin()` -- returns a path to a valid PIN file", {
})

test_that("`get_biogrid_pin()` -- determines and downloads the latest version", {
mockery::stub(get_biogrid_pin, "pathfindR:::safe_get_content", NULL)
mockery::stub(get_biogrid_pin, "utils::download.file", NULL)
mockery::stub(get_biogrid_pin, "utils::unzip", list(Name = "BIOGRID-ORGANISM-Homo_sapiens-X.X.X.tab3.txt"))
mockery::stub(get_biogrid_pin, "utils::read.delim", toy_biogrid_pin)
Expand Down Expand Up @@ -92,14 +151,40 @@ test_that("`gset_list_from_gmt()` -- works as expected", {

test_that("`get_kegg_gsets()` -- works as expected", {
skip_on_cran()
mock_response <- "eco00010\tdescription\neco00071\tdescription2"

# mocked binding to manage sequential responses
mock_response <- "pathway1\tdescription\npathway2\tdescription2"

mock_pw_graph1 <- igraph::graph_from_data_frame(
data.frame(from = c("A", "B"), to = c("B", "C")),
vertices = data.frame(
name = c("A", "B", "C"),
type = c("gene", "not_gene", "gene")
)
)

mock_pw_graph2 <- igraph::graph_from_data_frame(
data.frame(from = c("D", "F"), to = c("E", "G")),
vertices = data.frame(
name = c("D", "E", "F", "G"),
type = c("gene", "gene", "not_gene", "gene")
)
)

mock_pathway <- function(pid, ...) {
if (pid == "pathway1") {
return(mock_pw_graph1)
} else if (pid == "pathway2") {
return(mock_pw_graph2)
} else {
stop("Unknown pid")
}
}

with_mocked_bindings(
{
expect_is(toy_eco_kegg <- pathfindR:::get_kegg_gsets(), "list")
},
content = function(...) mock_response, .package = "httr"
safe_get_content = function(...) mock_response,
pathway = mock_pathway
)

expect_length(toy_eco_kegg, 2)
Expand All @@ -108,11 +193,11 @@ test_that("`get_kegg_gsets()` -- works as expected", {
expect_length(toy_eco_kegg[["gene_sets"]], 2)
expect_length(toy_eco_kegg[["descriptions"]], 2)

expect_true(toy_eco_kegg[["descriptions"]]["eco00010"] == "description")
expect_true(toy_eco_kegg[["descriptions"]]["eco00071"] == "description2")
expect_true(toy_eco_kegg[["descriptions"]]["pathway1"] == "description")
expect_true(toy_eco_kegg[["descriptions"]]["pathway2"] == "description2")

expect_length(toy_eco_kegg[["gene_sets"]][["eco00010"]], 47)
expect_length(toy_eco_kegg[["gene_sets"]][["eco00071"]], 15)
expect_identical(toy_eco_kegg[["gene_sets"]][["pathway1"]], c("A", "C"))
expect_identical(toy_eco_kegg[["gene_sets"]][["pathway2"]], c("D", "E", "G"))
})

test_that("`get_reactome_gsets()` -- works as expected", {
Expand Down
Loading