Skip to content

Commit b86c812

Browse files
authored
Merge pull request #21 from icbi-lab/copykat
copyKAT function
2 parents e8960f9 + bbac98e commit b86c812

File tree

9 files changed

+220
-9
lines changed

9 files changed

+220
-9
lines changed

.github/workflows/test.yml

+30-5
Original file line numberDiff line numberDiff line change
@@ -8,37 +8,62 @@ on:
88

99
jobs:
1010
test:
11-
runs-on: ${{ matrix.os }}
11+
runs-on: ${{ matrix.os.os }}
12+
name: ${{ matrix.os.os }} (R=${{ matrix.R }}, Python=${{ matrix.python-version }})
1213
strategy:
1314
fail-fast: false
1415
matrix:
1516
python-version: [3.8]
16-
os: [ubuntu-latest, macos-latest, windows-latest]
17+
R: ['release']
18+
os:
19+
- {os: ubuntu-latest, rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
20+
- {os: macos-latest}
21+
- {os: windows-latest}
22+
23+
env:
24+
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
25+
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
26+
RSPM: ${{ matrix.os.rspm }}
1727

1828
steps:
1929
- uses: actions/checkout@v2
2030
with:
2131
fetch-depth: 0 # required for setuptools-scm
32+
2233
- uses: actions/cache@v1
2334
with:
2435
path: ~/.cache/pip
25-
key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }}
36+
key: ${{ runner.os.os }}-pip-${{ hashFiles('pyproject.toml') }}
2637
restore-keys: |
27-
${{ runner.os }}-pip-
38+
${{ runner.os.os }}-pip-
39+
2840
- name: Install macOS system dependencies
2941
if: matrix.os == 'macos-latest'
3042
run: |
3143
brew install cairo pkg-config autoconf automake libtool
44+
3245
- name: Set up Python ${{ matrix.python-version }}
3346
uses: actions/setup-python@v1
3447
with:
3548
python-version: ${{ matrix.python-version }}
49+
50+
- name: Setup R
51+
uses: r-lib/actions/setup-r@v1
52+
with:
53+
r-version: ${{ matrix.R }}
54+
3655
- name: Install dependencies
3756
run: |
38-
pip install .[test]
57+
pip install .[test,copykat]
58+
59+
- name: Install R dependencies
60+
run: |
61+
Rscript -e "install.packages('remotes')" -e "remotes::install_github('navinlabcode/copykat')"
62+
3963
- name: Check black formatting
4064
run: |
4165
black --check .
66+
4267
- name: Test with pytest
4368
run: |
4469
pytest

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
test_copykat*
12
*.code-workspace
23
.vscode/*
34
!.vscode/settings.json.default

README.rst

+16-3
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ but plays nicely with `scanpy <https://scanpy.readthedocs.io/en/stable/index.htm
2929
.. image:: img/infercnv_heatmap.png
3030
:align: center
3131
:alt: The main result of infercnv
32-
33-
32+
33+
3434
**WARNING**:
35-
35+
3636
**This package is still experimental. The results have not been validated,
3737
except in that they look similar, but not identical, to the results of InferCNV.**
3838

@@ -84,6 +84,19 @@ There are several alternative options to install infercnvpy:
8484
.. where `tag` is one of `these tags <https://quay.io/repository/biocontainers/infercnvpy?tab=tags>`_.
8585
8686
87+
To (optionally) run the :code:`copyKAT` algorithm, you need a working R installation
88+
and the `copykat <https://github.com/navinlabcode/copykat#step-1-installation>`_ package
89+
installed. Usually, if :code:`R` is in your :code:`PATH`, `rpy2 <https://rpy2.github.io/>`_ automatically
90+
detects your R installation. If you get an error message while importing :code:`infercnvpy`,
91+
try setting the :code:`R_HOME` environment variable before importing infercnvpy:
92+
93+
.. code-block:: python
94+
95+
import os
96+
os.environ["R_HOME"] = "/usr/lib/R"
97+
import infercnvpy
98+
99+
87100
Release notes
88101
^^^^^^^^^^^^^
89102
See the `release section <https://github.com/icbi-lab/infercnvpy/releases>`_.

docs/api.rst

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ InferCNV
5555
:toctree: ./generated
5656

5757
infercnv
58+
copykat
5859
cnv_score
5960

6061
Embeddings

docs/references.bib

+12
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,15 @@ @article{Tirosh2016
2323
month = {11}
2424
}
2525

26+
@article{Gao2021,
27+
doi = {10.1038/s41587-020-00795-2},
28+
url = {https://doi.org/10.1038/s41587-020-00795-2},
29+
year = {2021},
30+
month = jan,
31+
publisher = {Nature},
32+
volume = {39},
33+
pages = {599--608},
34+
author = {Gao R. and Bai S. and Henderson YC and Lin Y. and Schalck A. and Yan Y. and Kumar T. and Hu M. and Sei E. and Davis A. and Wang F. and Shaitelman SF and Wang JR and Chen K. and Moulder S. and Lai SY and Navin NE},
35+
title = {Delineating copy number and clonal substructure in human tumors from single-cell transcriptomes},
36+
journal = {Nature Biotechnology}
37+
}

infercnvpy/tests/test_tools.py

+6
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from infercnvpy.tl._infercnv import _get_reference
44
import pytest
55
import numpy as np
6+
import scanpy as sc
67
import numpy.testing as npt
78

89

@@ -50,6 +51,11 @@ def test_infercnv(adata_oligodendroma, reference_key, reference_cat):
5051
)
5152

5253

54+
def test_copykat(adata_oligodendroma):
55+
sc.pp.subsample(adata_oligodendroma, n_obs=50)
56+
cnv.tl.copykat(adata_oligodendroma)
57+
58+
5359
def test_workflow(adata_oligodendroma):
5460
cnv.tl.infercnv(adata_oligodendroma)
5561
cnv.tl.pca(adata_oligodendroma)

infercnvpy/tl/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from typing import Union
22
from ._infercnv import infercnv, cnv_score
3+
from ._copykat import copykat
34
import numpy as np
45
from anndata import AnnData
56
import scanpy as sc

infercnvpy/tl/_copykat.py

+148
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
from typing import Optional
2+
import pandas as pd
3+
from scipy.sparse import issparse
4+
from anndata import AnnData
5+
from scanpy import logging
6+
import os
7+
from multiprocessing import cpu_count
8+
9+
10+
def copykat(
11+
adata: AnnData,
12+
gene_ids: str = "S",
13+
segmentation_cut: float = 0.1,
14+
distance: str = "euclidean",
15+
s_name: str = "copykat_result",
16+
min_genes_chr: int = 5,
17+
key_added: str = "cnv",
18+
inplace: bool = True,
19+
layer: str = None,
20+
n_jobs: Optional[int] = None,
21+
) -> pd.DataFrame:
22+
"""Inference of genomic copy number and subclonal structure.
23+
24+
Runs CopyKAT (Copynumber Karyotyping of Tumors) :cite:`Gao2021` based on integrative
25+
Bayesian approaches to identify genome-wide aneuploidy at 5MB resolution
26+
in single cells to separate tumor cells from normal cells, and tumor
27+
subclones using high-throughput sc-RNAseq data.
28+
29+
Note on input data from the original authors:
30+
31+
The matrix values are often the count of unique molecular identifier (UMI)
32+
from nowadays high througput single cell RNAseq data. The early generation of
33+
scRNAseq data may be summarized as TPM values or total read counts,
34+
which should also work.
35+
36+
This means that unlike for :func:`infercnvpy.tl.infercnv` the input data
37+
should not be log-transformed.
38+
39+
CopyKAT also does NOT require running :func:`infercnvpy.io.genomic_position_from_gtf`,
40+
it infers the genomic position from the gene symbols in `adata.var_names`.
41+
42+
You can find more info on GitHub: https://github.com/navinlabcode/copykat
43+
44+
Parameters
45+
----------
46+
adata
47+
annotated data matrix
48+
key_added
49+
Key under which the copyKAT scores will be stored in `adata.obsm` and `adata.uns`.
50+
inplace
51+
If True, store the result in adata, otherwise return it.
52+
gene_ids
53+
gene id type: Symbol ("S") or Ensemble ("E").
54+
segmentation_cut
55+
segmentation parameters, input 0 to 1; larger looser criteria.
56+
distance
57+
distance methods include "euclidean", and correlation coverted distance include "pearson" and "spearman".
58+
s_name
59+
sample (output file) name.
60+
min_genes_chr
61+
minimal number of genes per chromosome for cell filtering.
62+
n_jobs
63+
Number of cores to use for copyKAT analysis. Per default, uses all cores
64+
available on the system. Multithreading does not work on Windows and this
65+
value will be ignored.
66+
67+
Returns
68+
-------
69+
Depending on the value of `inplace`, either returns `None` or a vector
70+
with scores.
71+
"""
72+
73+
if n_jobs is None:
74+
n_jobs = cpu_count()
75+
if os.name != "posix":
76+
n_jobs = 1
77+
78+
try:
79+
from rpy2.robjects.packages import importr
80+
from rpy2.robjects import pandas2ri, numpy2ri
81+
from rpy2.robjects.conversion import localconverter
82+
from rpy2 import robjects as ro
83+
except ImportError:
84+
raise ImportError("copyKAT requires rpy2 to be installed. ")
85+
86+
try:
87+
copyKAT = importr("copykat")
88+
except ImportError:
89+
raise ImportError(
90+
"copyKAT requires a valid R installation with the following packages: "
91+
"copykat"
92+
)
93+
94+
logging.info("Preparing R objects")
95+
with localconverter(ro.default_converter + numpy2ri.converter):
96+
expr = adata.X if layer is None else tmp_adata.layers[layer]
97+
if issparse(expr):
98+
expr = expr.T.toarray()
99+
else:
100+
expr = expr.T
101+
ro.globalenv["expr_r"] = ro.conversion.py2rpy(expr)
102+
ro.globalenv["gene_names"] = ro.conversion.py2rpy(list(adata.var.index))
103+
ro.globalenv["cell_IDs"] = ro.conversion.py2rpy(list(adata.obs.index))
104+
ro.globalenv["n_jobs"] = ro.conversion.py2rpy(n_jobs)
105+
ro.globalenv["gene_ids"] = ro.conversion.py2rpy(gene_ids)
106+
ro.globalenv["segmentation_cut"] = ro.conversion.py2rpy(segmentation_cut)
107+
ro.globalenv["distance"] = ro.conversion.py2rpy(distance)
108+
ro.globalenv["s_name"] = ro.conversion.py2rpy(s_name)
109+
ro.globalenv["min_gene_chr"] = ro.conversion.py2rpy(min_genes_chr)
110+
111+
logging.info("Running copyKAT")
112+
ro.r(
113+
f"""
114+
rownames(expr_r) <- gene_names
115+
colnames(expr_r) <- cell_IDs
116+
copyKAT_run <- copykat(rawmat = expr_r, id.type = gene_ids, ngene.chr = min_gene_chr, win.size = 25,
117+
KS.cut = segmentation_cut, sam.name = s_name, distance = distance, norm.cell.names = "",
118+
n.cores = n_jobs, output.seg = FALSE)
119+
copyKAT_result <- copyKAT_run$CNAmat
120+
colnames(copyKAT_result) <- c('chrom', 'chrompos', 'abspos', cell_IDs)
121+
"""
122+
)
123+
124+
with localconverter(
125+
ro.default_converter + numpy2ri.converter + pandas2ri.converter
126+
):
127+
copyKAT_result = ro.conversion.rpy2py(ro.globalenv["copyKAT_result"])
128+
129+
chrom_pos = {
130+
"chr_pos": {
131+
f"chr{chrom}": int(pos)
132+
for pos, chrom in copyKAT_result.loc[:, ["chrom"]]
133+
.drop_duplicates()
134+
.itertuples()
135+
}
136+
}
137+
138+
# Drop cols
139+
new_cpkat = copyKAT_result.drop(["chrom", "chrompos", "abspos"], axis=1).values
140+
141+
# transpose
142+
new_cpkat_trans = new_cpkat.T
143+
144+
if inplace:
145+
adata.uns[key_added] = chrom_pos
146+
adata.obsm["X_%s" % key_added] = new_cpkat_trans
147+
else:
148+
return new_cpkat_trans

pyproject.toml

+5-1
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,13 @@ requires = [
3636
]
3737

3838
[tool.flit.metadata.requires-extra]
39+
copykat = [
40+
'rpy2'
41+
]
3942
test = [
4043
'pytest',
41-
'black'
44+
'black',
45+
'pre-commit',
4246
]
4347
doc = [
4448
'sphinx>=3.0.1,<3.1',

0 commit comments

Comments
 (0)