Skip to content

Commit d73894f

Browse files
bschilderclaude
andcommitted
Initial echoAI package: AI/ML variant-level predictions
Migrate all IMPACT functions from echoannot and echolocatoR/R_tmp: - IMPACT_query() and IMPACT_process() moved from echoannot - IMPACT_files.rda dataset moved from echoannot - 13 additional functions from R_tmp/IMPACT.R refactored into 7 modular files (enrichment, plotting, heatmap, postprocessing, annotation download, LD scores) - Tests moved from echoannot - README, Docker vignette, and main vignette added Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
0 parents  commit d73894f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+3255
-0
lines changed

.Rbuildignore

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
.*\.Rproj$
2+
^\.Rproj\.user$
3+
^README.Rmd
4+
^\.github$
5+
^doc$
6+
^Meta$
7+
^codecov\.yml$
8+
^_pkgdown\.yml$
9+
^docs$
10+
^pkgdown$
11+
Dockerfile
12+
^LICENSE\.md$
13+
14+
node_modules$
15+
package-lock\.json$
16+
package\.json$

.github/workflows/rworkflows.yml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
name: rworkflows
2+
'on':
3+
push:
4+
branches:
5+
- master
6+
- main
7+
- devel
8+
- RELEASE_**
9+
pull_request:
10+
branches:
11+
- master
12+
- main
13+
- devel
14+
- RELEASE_**
15+
jobs:
16+
rworkflows:
17+
permissions: write-all
18+
runs-on: ${{ matrix.config.os }}
19+
name: ${{ matrix.config.os }} (${{ matrix.config.r }})
20+
container: ${{ matrix.config.cont }}
21+
strategy:
22+
fail-fast: ${{ false }}
23+
matrix:
24+
config:
25+
- os: ubuntu-latest
26+
bioc: devel
27+
r: auto
28+
cont: ghcr.io/bioconductor/bioconductor_docker:devel
29+
rspm: ~
30+
- os: macOS-latest
31+
bioc: release
32+
r: auto
33+
cont: ~
34+
rspm: ~
35+
- os: windows-latest
36+
bioc: release
37+
r: auto
38+
cont: ~
39+
rspm: ~
40+
steps:
41+
- uses: neurogenomics/rworkflows@master
42+
with:
43+
run_bioccheck: ${{ false }}
44+
run_rcmdcheck: ${{ true }}
45+
as_cran: ${{ true }}
46+
run_vignettes: ${{ true }}
47+
has_testthat: ${{ true }}
48+
run_covr: ${{ true }}
49+
run_pkgdown: ${{ true }}
50+
has_runit: ${{ false }}
51+
has_latex: ${{ false }}
52+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
53+
run_docker: ${{ true }}
54+
docker_registry: ghcr.io

.gitignore

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# R project files
2+
*.Rproj
3+
.Rproj.user
4+
.Ruserdata
5+
# History files
6+
.Rhistory
7+
.Rapp.history
8+
# Session Data files
9+
.RData
10+
# User-specific files
11+
.Ruserdata
12+
# .DS_Store
13+
# find . -name .DS_Store -print0 | xargs -0 git rm -f --ignore-unmatch
14+
.DS_Store
15+
./.DS_Store
16+
./**/.DS_Store
17+
./**/**/.DS_Store
18+
./**/**/**/.DS_Store
19+
./**/**/**/**/.DS_Store
20+
./**/**/**/**/**/.DS_Store
21+
./**/**/**/**/**/**/.DS_Store
22+
# Example code in package build process
23+
*-Ex.R
24+
# Output files from R CMD build
25+
/*.tar.gz
26+
# Output files from R CMD check
27+
/*.Rcheck/
28+
# RStudio files
29+
.Rproj.user/
30+
# produced vignettes
31+
vignettes/*.html
32+
vignettes/*.pdf
33+
vignettes/*.R
34+
# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
35+
.httr-oauth
36+
# knitr and R markdown default cache directories
37+
*_cache/
38+
/cache/
39+
# Temporary files created by R markdown
40+
*.utf8.md
41+
*.knit.md
42+
# R Environment Variables
43+
.Renviron
44+
*.tbi
45+
Rplots.pdf

DESCRIPTION

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
Package: echoAI
2+
Type: Package
3+
Title: Echoverse Module: AI/ML Variant-Level Predictions
4+
Version: 0.99.0
5+
Date: 2026-03-14
6+
Authors@R:
7+
c(person(given = "Brian",
8+
family = "Schilder",
9+
role = c("aut","cre"),
10+
email = "brian_schilder@alumni.brown.edu",
11+
comment = c(ORCID = "0000-0001-5949-2191")),
12+
person(given = "Jack",
13+
family = "Humphrey",
14+
role = c("aut"),
15+
email = "Jack.Humphrey@mssm.edu",
16+
comment = c(ORCID = "0000-0002-6274-6620")),
17+
person(given = "Towfique",
18+
family = "Raj",
19+
role = c("aut"),
20+
email = "towfique.raj@mssm.edu",
21+
comment = c(ORCID = "0000-0002-9355-5704"))
22+
)
23+
Description: Echoverse module: API access to variant-level AI/ML predictions
24+
including IMPACT (Inference and Modeling of Phenotype-related ACtive
25+
Transcription) for immune cell annotations. Part of the echoverse suite
26+
for genomic fine-mapping.
27+
URL: https://github.com/RajLabMSSM/echoAI
28+
BugReports: https://github.com/RajLabMSSM/echoAI/issues
29+
Encoding: UTF-8
30+
Depends: R (>= 4.1)
31+
biocViews: Software, Genetics, FunctionalGenomics, SystemsBiology
32+
Imports:
33+
echotabix,
34+
echodata,
35+
downloadR,
36+
data.table,
37+
dplyr,
38+
stats,
39+
utils,
40+
methods,
41+
ggplot2
42+
Suggests:
43+
rworkflows,
44+
markdown,
45+
rmarkdown,
46+
remotes,
47+
knitr,
48+
BiocStyle,
49+
covr,
50+
testthat (>= 3.0.0),
51+
ggpubr,
52+
patchwork,
53+
ggrepel,
54+
ggridges,
55+
ComplexHeatmap,
56+
RColorBrewer,
57+
pals,
58+
shades,
59+
reshape2,
60+
stringr,
61+
R.utils,
62+
tidyr
63+
Remotes:
64+
github::RajLabMSSM/echodata,
65+
github::RajLabMSSM/echotabix,
66+
github::RajLabMSSM/downloadR
67+
RoxygenNote: 7.3.3
68+
VignetteBuilder: knitr
69+
License: GPL-3
70+
Config/testthat/edition: 3
71+
LazyData: true

NAMESPACE

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Generated by roxygen2: do not edit by hand
2+
3+
export(IMPACT_compute_enrichment)
4+
export(IMPACT_get_annotation_key)
5+
export(IMPACT_get_annotations)
6+
export(IMPACT_get_ldscores)
7+
export(IMPACT_get_top_annotations)
8+
export(IMPACT_heatmap)
9+
export(IMPACT_iterate_enrichment)
10+
export(IMPACT_iterate_get_annotations)
11+
export(IMPACT_plot_enrichment)
12+
export(IMPACT_plot_impact_score)
13+
export(IMPACT_postprocess_annotations)
14+
export(IMPACT_query)
15+
export(IMPACT_snp_group_boxplot)
16+
importFrom(data.table,":=")
17+
importFrom(data.table,data.table)
18+
importFrom(data.table,dcast)
19+
importFrom(data.table,fread)
20+
importFrom(data.table,fwrite)
21+
importFrom(data.table,melt.data.table)
22+
importFrom(data.table,merge.data.table)
23+
importFrom(data.table,rbindlist)
24+
importFrom(downloadR,zenodo_upload)
25+
importFrom(dplyr,arrange)
26+
importFrom(dplyr,group_by)
27+
importFrom(dplyr,mutate)
28+
importFrom(dplyr,mutate_at)
29+
importFrom(dplyr,n_distinct)
30+
importFrom(dplyr,select)
31+
importFrom(dplyr,slice)
32+
importFrom(dplyr,slice_head)
33+
importFrom(dplyr,summarise)
34+
importFrom(dplyr,summarise_at)
35+
importFrom(dplyr,top_n)
36+
importFrom(echodata,find_consensus_snps_no_polyfun)
37+
importFrom(echodata,snp_group_colorDict)
38+
importFrom(echodata,snp_group_filters)
39+
importFrom(echotabix,construct_query)
40+
importFrom(echotabix,convert)
41+
importFrom(echotabix,query)
42+
importFrom(ggplot2,aes)
43+
importFrom(ggplot2,element_blank)
44+
importFrom(ggplot2,element_text)
45+
importFrom(ggplot2,facet_grid)
46+
importFrom(ggplot2,geom_boxplot)
47+
importFrom(ggplot2,geom_col)
48+
importFrom(ggplot2,geom_density)
49+
importFrom(ggplot2,geom_hline)
50+
importFrom(ggplot2,geom_jitter)
51+
importFrom(ggplot2,geom_point)
52+
importFrom(ggplot2,geom_violin)
53+
importFrom(ggplot2,geom_vline)
54+
importFrom(ggplot2,ggplot)
55+
importFrom(ggplot2,ggsave)
56+
importFrom(ggplot2,labs)
57+
importFrom(ggplot2,scale_color_viridis_c)
58+
importFrom(ggplot2,scale_fill_manual)
59+
importFrom(ggplot2,theme)
60+
importFrom(ggplot2,theme_bw)
61+
importFrom(ggplot2,ylim)
62+
importFrom(stats,median)
63+
importFrom(stats,setNames)
64+
importFrom(utils,combn)
65+
importFrom(utils,data)

R/IMPACT_enrichment.R

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
#' Compute enrichment of IMPACT scores
2+
#'
3+
#' Conduct IMPACT enrichment between SNP groups and fine-mapping methods.
4+
#' Enrichment is computed as the ratio of IMPACT signal in a SNP group
5+
#' to the proportion of SNPs in that group.
6+
#'
7+
#' @param annot_melt A melted \code{data.table} of IMPACT annotations
8+
#' with columns including IMPACT_score, SNP, TF, Tissue, Cell, CellDeriv,
9+
#' and various fine-mapping result columns.
10+
#' @param locus Optional locus name to add to the output.
11+
#'
12+
#' @return A \code{data.table} of enrichment results per SNP group and
13+
#' annotation.
14+
#'
15+
#' @export
16+
#' @family IMPACT
17+
#' @importFrom dplyr group_by summarise arrange n_distinct
18+
#' @importFrom data.table rbindlist
19+
#' @examples
20+
#' \dontrun{
21+
#' enrich <- IMPACT_compute_enrichment(annot_melt = annot_melt,
22+
#' locus = "BST1")
23+
#' }
24+
IMPACT_compute_enrichment <- function(annot_melt,
25+
locus = NULL) {
26+
27+
TF <- Tissue <- Cell <- CellDeriv <- IMPACT_score <- SNP <- NULL;
28+
leadSNP <- Support <- ABF.CS <- FINEMAP.CS <- SUSIE.CS <- NULL;
29+
POLYFUN_SUSIE.CS <- Consensus_SNP <- enrichment <- SNP.group <- NULL;
30+
31+
annot_melt[is.na(annot_melt$IMPACT_score), "IMPACT_score"] <- 0
32+
33+
SNP.groups <- list(
34+
"leadGWAS" = annot_melt |>
35+
dplyr::group_by(TF, Tissue, Cell, CellDeriv) |>
36+
dplyr::summarise(
37+
enrichment = (sum(IMPACT_score[leadSNP], na.rm = TRUE) /
38+
sum(IMPACT_score, na.rm = TRUE)) /
39+
(dplyr::n_distinct(SNP[leadSNP], na.rm = TRUE) /
40+
dplyr::n_distinct(SNP, na.rm = TRUE))),
41+
"UCS" = annot_melt |>
42+
dplyr::group_by(TF, Tissue, Cell, CellDeriv) |>
43+
dplyr::summarise(
44+
enrichment = (sum(IMPACT_score[Support > 0], na.rm = TRUE) /
45+
sum(IMPACT_score, na.rm = TRUE)) /
46+
(dplyr::n_distinct(SNP[Support > 0], na.rm = TRUE) /
47+
dplyr::n_distinct(SNP, na.rm = TRUE))),
48+
"ABF_CS" = annot_melt |>
49+
dplyr::group_by(TF, Tissue, Cell, CellDeriv) |>
50+
dplyr::summarise(
51+
enrichment = (sum(IMPACT_score[ABF.CS > 0], na.rm = TRUE) /
52+
sum(IMPACT_score, na.rm = TRUE)) /
53+
(dplyr::n_distinct(SNP[ABF.CS > 0], na.rm = TRUE) /
54+
dplyr::n_distinct(SNP, na.rm = TRUE))),
55+
"FINEMAP_CS" = annot_melt |>
56+
dplyr::group_by(TF, Tissue, Cell, CellDeriv) |>
57+
dplyr::summarise(
58+
enrichment = (sum(IMPACT_score[FINEMAP.CS > 0],
59+
na.rm = TRUE) /
60+
sum(IMPACT_score, na.rm = TRUE)) /
61+
(dplyr::n_distinct(SNP[FINEMAP.CS > 0], na.rm = TRUE) /
62+
dplyr::n_distinct(SNP, na.rm = TRUE))),
63+
"SUSIE_CS" = annot_melt |>
64+
dplyr::group_by(TF, Tissue, Cell, CellDeriv) |>
65+
dplyr::summarise(
66+
enrichment = (sum(IMPACT_score[SUSIE.CS > 0],
67+
na.rm = TRUE) /
68+
sum(IMPACT_score, na.rm = TRUE)) /
69+
(dplyr::n_distinct(SNP[SUSIE.CS > 0], na.rm = TRUE) /
70+
dplyr::n_distinct(SNP, na.rm = TRUE))),
71+
"POLYFUN_CS" = annot_melt |>
72+
dplyr::group_by(TF, Tissue, Cell, CellDeriv) |>
73+
dplyr::summarise(
74+
enrichment = (sum(IMPACT_score[POLYFUN_SUSIE.CS > 0],
75+
na.rm = TRUE) /
76+
sum(IMPACT_score, na.rm = TRUE)) /
77+
(dplyr::n_distinct(SNP[POLYFUN_SUSIE.CS > 0],
78+
na.rm = TRUE) /
79+
dplyr::n_distinct(SNP, na.rm = TRUE))),
80+
"Consensus" = annot_melt |>
81+
dplyr::group_by(TF, Tissue, Cell, CellDeriv) |>
82+
dplyr::summarise(
83+
enrichment = (sum(IMPACT_score[Consensus_SNP],
84+
na.rm = TRUE) /
85+
sum(IMPACT_score, na.rm = TRUE)) /
86+
(dplyr::n_distinct(SNP[Consensus_SNP], na.rm = TRUE) /
87+
dplyr::n_distinct(SNP, na.rm = TRUE)))
88+
)
89+
enrich <- data.table::rbindlist(SNP.groups, idcol = "SNP.group") |>
90+
dplyr::arrange(-enrichment)
91+
enrich <- cbind(Locus = locus, enrich)
92+
enrich$TF <- factor(enrich$TF, ordered = TRUE)
93+
enrich$SNP.group <- factor(enrich$SNP.group,
94+
levels = names(SNP.groups),
95+
ordered = TRUE)
96+
return(enrich)
97+
}
98+
99+
100+
#' Iterate IMPACT enrichment tests
101+
#'
102+
#' Run \code{\link{IMPACT_compute_enrichment}} across all unique loci
103+
#' in the ANNOT_MELT dataset.
104+
#'
105+
#' @param ANNOT_MELT A melted \code{data.table} of IMPACT annotations
106+
#' that must include a \code{Locus} column.
107+
#' @param verbose Print messages.
108+
#'
109+
#' @return A \code{data.table} of enrichment results across all loci.
110+
#'
111+
#' @export
112+
#' @family IMPACT
113+
#' @importFrom data.table rbindlist
114+
#' @examples
115+
#' \dontrun{
116+
#' ENRICH <- IMPACT_iterate_enrichment(ANNOT_MELT = ANNOT_MELT)
117+
#' }
118+
IMPACT_iterate_enrichment <- function(ANNOT_MELT,
119+
verbose = TRUE) {
120+
121+
Locus <- NULL;
122+
123+
ENRICH <- lapply(unique(ANNOT_MELT$Locus), function(locus) {
124+
messager("+ IMPACT:: Locus =", locus, v = verbose)
125+
annot_melt <- subset(ANNOT_MELT, Locus == locus)
126+
enrich <- IMPACT_compute_enrichment(annot_melt = annot_melt,
127+
locus = locus)
128+
return(enrich)
129+
})
130+
ENRICH <- data.table::rbindlist(ENRICH)
131+
return(ENRICH)
132+
}

0 commit comments

Comments
 (0)