Skip to content

Commit b94f2e7

Browse files
committed
CIVIC, NCI Thesaurus, MitelmanDB and Depmap update
1 parent a06cc33 commit b94f2e7

File tree

12 files changed

+64
-23
lines changed

12 files changed

+64
-23
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ Package: pharmOncoX
22
Type: Package
33
Title: Molecularly targeted cancer drugs and biomarkers
44
Version: 1.6.10
5+
URL: https://sigven.github.io/pharmOncoX
56
Authors@R:
67
c(person(given = "Sigve",
78
family = "Nakken",
@@ -19,7 +20,6 @@ Description: This data package collects anticancer drug information from
1920
also allows for the retrieval of curated biomarkers from multiple
2021
freely available resources (CIViC, CGI, Mitelman database).
2122
License: MIT + file LICENSE
22-
URL: https://github.com/sigven/pharmOncoX
2323
BugReports: https://github.com/sigven/pharmOncoX/issues
2424
Depends:
2525
R (>= 4.1.0)

NEWS.md

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# Version 1.7.0
2+
3+
* CIViC update (20240807)
4+
* NCI Thesaurus update (24.07e)
5+
* MitelmanDB update (20240715)
6+
* New dataset: DepMap (cell line) RNA fusion events
7+
18
# Version 1.6.10
29

310
* Fixed some erroneous drug classifications
@@ -6,24 +13,24 @@
613

714
# Version 1.6.8 (June 7th 2024)
815

9-
* NCI update (24.05d)
16+
* NCI Thesaurus update (24.05d)
1017

1118
# Version 1.6.7 (May 23rd 2024)
1219

13-
* NCI update (24.04e)
14-
* Updated CIViC (20240523)
20+
* NCI Thesaurus update (24.04e)
21+
* CIViC update (20240523)
1522

1623
# Version 1.6.4 (April 30th 2024)
1724

1825
* Improved clinical (tumor site) annotations of fusions from MitelmanDB
1926

2027
# Version 1.6.3 (April 26th 2024)
2128

22-
* Updated CIViC (20240426)
29+
* CIViC update (20240426)
2330

2431
# Version 1.6.2 (April 12th 2024)
2532

26-
* Updated NCI Thesaurus (24.03d)
33+
* NCI Thesaurus update (24.03d)
2734

2835
# Version 1.6.1 (March 26th 2024)
2936

@@ -39,7 +46,7 @@
3946

4047
# Version 1.5.8 (February 6th 2024)
4148

42-
* NCI Thesaurus 24.01e
49+
* NCI Thesaurus update (24.01e)
4350

4451
# Version 1.5.7 (February 3rd 2024)
4552

R/sysdata.rda

-3 Bytes
Binary file not shown.

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# pharmOncoX <a href="https://sigven.github.io/pharmOncoX/"><img src="man/figures/logo.png" align="right" height="104" width="90"/></a>
44

5-
**pharmOncoX** is an R package that provides access to targeted and non-targeted cancer drugs, and genomic cancer biomarkers. Cancer drugs include comprehensive annotations per target, drug mechanism-of-action, approval dates, clinical trial phases for various indications etc. Drugs are further classified according to the [Anatomical Therapeutic Chemical (ATC) Classification System](https://www.whocc.no/atc_ddd_index/), enabling a filtering of cancer drugs according to their main types of action.
5+
**pharmOncoX** is an R package that provides access to targeted and non-targeted cancer drugs, and genomic cancer biomarkers. Cancer drugs include comprehensive annotations per target, drug mechanism-of-action, approval dates, clinical trial phases for various indications etc. Drugs are further classified according to the [Anatomical Therapeutic Chemical (ATC) Classification System](https://www.whocc.no/atc_ddd_index/), enabling a filtering of cancer drugs according to their main types of action. The package also provides access to data on actionable genomic aberrations (i.e. molecular biomarkers), including gene fusions, mutations, copy number alterations, and expression biomarkers.
66

77

88
## Getting started

data-raw/biomarker_utilities.R

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ expand_hgvs_terms <- function(var, aa_dict, add_codon_markers = FALSE) {
430430
}
431431

432432
load_civic_biomarkers <- function(
433-
datestamp = '20240130',
433+
datestamp = '20240709',
434434
compound_synonyms = NULL,
435435
hg38_fasta =
436436
"/Users/sigven/research/DB/hg38/hg38.fa",
@@ -2630,8 +2630,26 @@ load_custom_fusion_db <- function() {
26302630
return(biomarker_items)
26312631
}
26322632

2633+
load_depmap_fusions <- function(db_datestamp = "24Q2"){
2634+
2635+
# Load DepMap fusions
2636+
depmap_data <- list()
2637+
depmap_data[['fusions']] <- as.data.frame(read.csv(
2638+
file = "data-raw/depmap/OmicsFusionFiltered.csv", header = T))
2639+
2640+
depmap_data[['models']] <- as.data.frame(read.csv(
2641+
file = "data-raw/depmap/Model.csv", header = T)) |>
2642+
dplyr::select(
2643+
ModelID, CellLineName, OncotreeLineage,
2644+
OncotreePrimaryDisease, OncotreeCode,
2645+
Age, Sex, PrimaryOrMetastasis, SampleCollectionSite,
2646+
SourceType
2647+
)
2648+
return(depmap_data)
2649+
}
2650+
26332651
load_mitelman_db <- function(cache_dir = NA,
2634-
db_datestamp = "20240415") {
2652+
db_datestamp = "20240715") {
26352653

26362654
# Load Mitelman database
26372655
# dos2unix -q -n MBCA.TXT.DATA MBCA.TXT
@@ -2672,7 +2690,7 @@ load_mitelman_db <- function(cache_dir = NA,
26722690

26732691
fusion_event_data <- as.data.frame(readr::read_tsv(
26742692
file = file.path(
2675-
cache_dir, "mitelmandb", "MBCA.TXT"),
2693+
cache_dir, "mitelmandb", "MBCA.TXT.DATA"),
26762694
show_col_types = F, guess_max = 100000)) |>
26772695
dplyr::filter(stringr::str_detect(GeneShort,"::")) |>
26782696
dplyr::rename(variant = GeneShort,

data-raw/custom_drug_target_regex_nci.tsv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
pattern symbol
22
OMX-0407 SIK3
3-
RMC-9085 KRAS
3+
RMC-9085|Olomorasib KRAS
44
3706674 KRAS
55
Rineterkib ERK1
66
Rineterkib ERK2

data-raw/data-raw.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ opentargets_version <-
2121
metadata$compounds[metadata$compounds$source_abbreviation == "opentargets",
2222
"source_version"]
2323
package_datestamp <- stringr::str_replace_all(Sys.Date(),"-","")
24-
chembl_pubchem_datestamp <- '20240429'
24+
chembl_pubchem_datestamp <- '20240708'
2525

2626
## set logging layout
2727
lgr::lgr$appenders$console$set_layout(
@@ -74,7 +74,7 @@ drug_sets <- list()
7474
## Get all anticancer drugs, NCI thesaurus + DGIdb
7575
drug_sets[['nci']] <- get_nci_drugs(
7676
nci_db_release = nci_db_release,
77-
overwrite = F,
77+
overwrite = T,
7878
path_data_raw = path_data_raw,
7979
path_data_processed = path_data_tmp_processed)
8080

@@ -188,6 +188,8 @@ raw_biomarkers[['mitelmandb']] <-
188188
cache_dir = file.path(path_data_raw, "biomarkers"))
189189
raw_biomarkers[['custom_fusions']] <-
190190
load_custom_fusion_db()
191+
raw_biomarkers[['depmap']] <-
192+
load_depmap_fusions()
191193

192194
raw_biomarkers[['custom_fusions']]$variant <-
193195
raw_biomarkers[['custom_fusions']]$variant |>

data-raw/drug_name_black_list.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ Butanilicaine Hydrochloride
44
Tesmilifene Hydrochloride
55
8H9 131I
66
ABC-294640
7+
Neladenoson Bialanate
8+
Sodium Caseinate
79
AXL-1717
810
AZD-7451
911
TAS-115

data-raw/drug_utilities.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1750,8 +1750,10 @@ map_curated_targets <- function(gene_info = NULL,
17501750
hit$drug_approved_noncancer <- FALSE
17511751

17521752
## set general indications for unknown cases
1753-
if(is.na(hit$disease_efo_id) & is.na(hit$disease_efo_label) &
1754-
is.na(hit$cui) & is.na(hit$cui_name)){
1753+
if(is.na(hit$disease_efo_id) &
1754+
is.na(hit$disease_efo_label) &
1755+
is.na(hit$cui) &
1756+
is.na(hit$cui_name)){
17551757
hit$disease_efo_id = "EFO:0000311"
17561758
hit$disease_efo_label = "cancer"
17571759
hit$cui = "C0006826"

data-raw/metadata_pharm_oncox.xlsx

119 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)