Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
126 commits
Select commit Hold shift + click to select a range
241208c
main script with more comments
Congm12 Aug 8, 2024
96e873f
add comment to hmrf main
Congm12 Aug 8, 2024
0cdbcc2
add versions in setup
Congm12 Aug 8, 2024
0b66b45
black formatted codes
Congm12 Aug 11, 2024
4901b56
add logging and tidy for calicost_main
Aug 12, 2024
00813b3
add logging info for hmrf_concatenate_pipeline.
Aug 12, 2024
0ecfca4
add logging for hmrfmix_concatenate_pipeline
Aug 12, 2024
7eb9ba1
add logging for hmm_NB_BB_nophasing
Aug 12, 2024
3b10e3f
adding logging for update params in utils_hmm
Aug 12, 2024
a5ddc39
add logging of hmm_NB_BB_nophasing_v2
Aug 12, 2024
4d23df4
add logging to hmm_NB_BB_phaseswitch
Aug 12, 2024
29235a2
add logging to hmrf and utils_IO.
Aug 12, 2024
9686939
add logging to hmrf_pipeline
Aug 12, 2024
1da81bf
add logging on Weighted_NegativeBinomial
Aug 12, 2024
46f692b
add logging of emission fitting.
Aug 12, 2024
b44b999
finish logging utils distribution fitting
Aug 12, 2024
a3a569d
set error on MKL thread setting in calicost_supervised.
Aug 12, 2024
7641681
fix == bug in setup.py
Aug 12, 2024
a08f2e7
fix errors with configuration of inner and outer loops
Aug 12, 2024
a51d402
fix issues around string and bracks from additional logging.
Aug 12, 2024
849c20f
fix errors around disp=False (statsmodels) and inconsistent conda/pip…
Aug 12, 2024
ea22b72
fix issue with multiple loggers.
Aug 12, 2024
b6f94b1
fix prefix bug
Aug 12, 2024
8eb653c
revert imports & prefix checks.
Aug 15, 2024
b657a9e
revert imports in calicost supervised.
Aug 15, 2024
e340a40
revert import for hmm_nophasing
Aug 15, 2024
25b35d1
revert imports for hmm_NB_BB_nophasing_v2
Aug 15, 2024
bdd5056
revert imports for hmm phaseswitch
Aug 15, 2024
6cc7e6b
revert imports for hmrf
Aug 15, 2024
3d7f0b7
revert imports for utils IO.
Aug 15, 2024
3b34e64
revert imports for utils hmm
Aug 15, 2024
627d6b9
revert imports for distribution fitting.
Aug 15, 2024
06a368b
add utils hmm njit import
Aug 15, 2024
d343620
fix calicost supervised imports.
Aug 15, 2024
91dfaa2
fix runtime logging.
Aug 15, 2024
2c7c9b5
import bug
Aug 15, 2024
8303c62
fix spelling mistake
Aug 15, 2024
9ed0df5
log comparative likelihoods.
Aug 15, 2024
96d6fa3
fix
Aug 15, 2024
9d3c141
fix
Aug 15, 2024
690fd30
improve runtime logging
Aug 16, 2024
c11b253
update utils hmm logging to show n spots.
Aug 16, 2024
0a3e3d0
fix logging error from multiple args
Aug 16, 2024
7118f8d
fix typo in hmrf logging
Aug 16, 2024
41e0740
update logging strings.
Aug 16, 2024
ffe9f5d
logging edits
Aug 16, 2024
c75ec89
log hmrfmix_reassignment_posterior_concatenate.
Aug 16, 2024
8f3eb26
more logging improvements. remove deprecated code.
Aug 16, 2024
32c4239
add use defaults statement
Aug 16, 2024
3463769
edit logging for ARI and bb diff.
Aug 16, 2024
b6a95ad
log neyman pearson.
Aug 16, 2024
79796c6
improved logging.
Aug 16, 2024
80f9c21
update logging.
Aug 16, 2024
bc44623
update logging
Aug 16, 2024
64c691d
improve logging
Aug 16, 2024
dddc8c9
improved logging
Aug 16, 2024
e807de0
fix logging
Aug 16, 2024
d1ae926
edit logging
Aug 16, 2024
3501640
fix logging
Aug 16, 2024
cfad541
fix logging
Aug 16, 2024
a321ff6
fix bugs
Aug 16, 2024
d4db18d
fix bugs
Aug 17, 2024
94c5a15
fix
Aug 17, 2024
776eeb0
logging fixes
Aug 18, 2024
4d5466a
fix
Aug 18, 2024
11f7c93
add Baum-Welch log lines.
Aug 18, 2024
9d70901
fix
Aug 18, 2024
c91591d
fix logging.
Aug 18, 2024
2003e04
log bandwidth
Aug 18, 2024
27603ec
fix logging
Aug 18, 2024
fafef1f
fix
Aug 18, 2024
ee1810e
fix logging
Aug 18, 2024
03db264
fix logging bug
Aug 18, 2024
cef4eb2
add TODO for gammas in phasing beta binomial
Aug 18, 2024
fbbb9b5
fix
Aug 18, 2024
3409014
fix
Aug 18, 2024
f3c06bc
fix
Aug 18, 2024
48a34a4
fix
Aug 18, 2024
c58ea89
add hmrf logging
Aug 18, 2024
c7d09d9
add cpas
Aug 18, 2024
fbfe56b
log phasing baum welch.
Aug 19, 2024
8969c35
fix
Aug 19, 2024
726bc11
log initial alpha and tau
Aug 19, 2024
6f85e89
fix
Aug 19, 2024
160ca9c
fix
Aug 19, 2024
4a8103f
fix
Aug 19, 2024
575ca76
fix
Aug 19, 2024
75a9132
fix
Aug 19, 2024
35ee342
fix
Aug 19, 2024
0b8720a
write Weighted Beta Binom chain file
Aug 20, 2024
137e4bb
fix
Aug 20, 2024
3d46a85
finishing touches
Aug 20, 2024
9ac416b
ABC for emission models
Aug 26, 2024
2bd629d
abc for emission models
Aug 26, 2024
d4a0599
fix
Aug 26, 2024
431936c
fixes
Aug 26, 2024
bed12f1
fix
Aug 26, 2024
ef27f84
fix chain logging
Aug 26, 2024
22bceb7
fix
Aug 26, 2024
5787061
gzip chains
Aug 26, 2024
16f10e1
fix
Aug 26, 2024
0424169
fix
Aug 26, 2024
207e873
update instance counts.
Aug 26, 2024
f945854
fix
Aug 26, 2024
1613259
fix
Aug 26, 2024
2218ebf
fix
Aug 26, 2024
37eee03
fix
Aug 26, 2024
9591ccc
cleanup before adding ARI for HMM states.
Aug 26, 2024
af2fa1a
precision on likelihood chain
Aug 26, 2024
8416be5
fix
Aug 26, 2024
9882e5e
add HMM ARI
Aug 26, 2024
0febc15
fix
Aug 26, 2024
e0c3712
fix
Aug 26, 2024
ec676e6
fix
Aug 26, 2024
1b56487
fix
Aug 26, 2024
2977647
fix
Aug 26, 2024
ea01b9e
make parent dirs for chains
Aug 26, 2024
890d4a7
fix
Aug 26, 2024
4a1039c
add hamming distances
Aug 26, 2024
ec8881b
fixes
Aug 26, 2024
d883c34
add # of states for hamming.
Aug 26, 2024
6f66d31
fix
Aug 26, 2024
28b0b81
fix increment bug.
Aug 26, 2024
3463cc3
fix
Aug 26, 2024
15de4a7
Merge pull request #10 from michaelJwilson/main_logged_verbose_abc
Congm12 Aug 26, 2024
5c95c47
remove examples
Sep 2, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
name: calicost_env
name: calicost
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This corresponds to condo environment name, right? If so, shall we change it back as the README and readthedocs documentations still have conda activate calicost_env in the installation instruction.

channels:
- conda-forge
- bioconda
- defaults
dependencies:
- python==3.10
- numpy==1.24.4
- scipy==1.11.3
- samtools==1.18
- bcftools==1.18
- cellsnp-lite
Expand Down
57 changes: 28 additions & 29 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,32 @@
import setuptools

setuptools.setup(
name='calicost',
version='v1.0.0',
python_requires='>=3.8',
packages=['calicost'],
package_dir={'': 'src'},
author='Cong Ma',
author_email='[email protected]',
description='Allele-specific CNAs and spatial cancer clone inference',
long_description='CalicoST infers allele-specific copy number aberrations and cancer clones in spatially resolved transcriptomics data',
url='https://github.com/raphael-group/CalicoST',
install_requires=[
'numpy',
'scipy',
'pandas',
'scikit-learn',
'scanpy',
'anndata',
'numba',
'tqdm',
'statsmodels',
'networkx',
'matplotlib',
'seaborn',
'pysam',
'ete3',
'ipykernel'
],
include_package_data=True
name="calicost",
version="v1.0.0",
python_requires=">=3.8",
packages=["calicost"],
package_dir={"": "src"},
author="Cong Ma",
author_email="[email protected]",
description="Allele-specific CNAs and spatial cancer clone inference",
long_description="CalicoST infers allele-specific copy number aberrations and cancer clones in spatially resolved transcriptomics data",
url="https://github.com/raphael-group/CalicoST",
install_requires=[
"numpy==1.24.4",
"scipy==1.11.3",
"pandas==2.1.1",
"scikit-learn==1.3.2",
"scanpy==1.9.6",
"anndata==0.10.3",
"numba==0.60.0",
"tqdm==4.66.1",
"statsmodels==0.14.0",
"networkx==3.2.1",
"matplotlib==3.7.3",
"seaborn==0.12.2",
"pysam==0.22.1",
"ete3==3.1.3",
"ipykernel",
],
include_package_data=True,
)

2 changes: 1 addition & 1 deletion src/calicost/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = 'v1.0.0'
__version__ = "v1.0.0"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about set the version to v1.1?

220 changes: 112 additions & 108 deletions src/calicost/allele_starch_generateconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,117 +19,119 @@
def read_configuration_file(filename):
##### [Default settings] #####
config = {
"spaceranger_dir" : None,
"snp_dir" : None,
"output_dir" : None,
"spaceranger_dir": None,
"snp_dir": None,
"output_dir": None,
# supporting files and preprocessing arguments
"hgtable_file" : None,
"normalidx_file" : None,
"tumorprop_file" : None,
"supervision_clone_file" : None,
"filtergenelist_file" : None,
"filterregion_file" : None,
"binsize" : 1,
"rdrbinsize" : 1,
"hgtable_file": None,
"normalidx_file": None,
"tumorprop_file": None,
"supervision_clone_file": None,
"filtergenelist_file": None,
"filterregion_file": None,
"binsize": 1,
"rdrbinsize": 1,
# "secondbinning_min_umi" : 500,
"max_nbins" : 1200,
"avg_umi_perbinspot" : 1.5,
"bafonly" : True,
"max_nbins": 1200,
"avg_umi_perbinspot": 1.5,
"bafonly": True,
# phase switch probability
"nu" : 1,
"logphase_shift" : 1,
"npart_phasing" : 2,
"nu": 1,
"logphase_shift": 1,
"npart_phasing": 2,
# HMRF configurations
"n_clones" : None,
"n_clones_rdr" : 2,
"min_spots_per_clone" : 100,
"min_avgumi_per_clone" : 10,
"maxspots_pooling" : 7,
"tumorprop_threshold" : 0.5,
"max_iter_outer" : 20,
"nodepotential" : "max", # max or weighted_sum
"initialization_method" : "rectangle", # rectangle or datadrive
"num_hmrf_initialization_start" : 0,
"num_hmrf_initialization_end" : 10,
"spatial_weight" : 2.0,
"construct_adjacency_method" : "hexagon",
"construct_adjacency_w" : 1.0,
"n_clones": None,
"n_clones_rdr": 2,
"min_spots_per_clone": 100,
"min_avgumi_per_clone": 10,
"maxspots_pooling": 7,
"tumorprop_threshold": 0.5,
"max_iter_outer": 20,
"nodepotential": "max", # max or weighted_sum
"initialization_method": "rectangle", # rectangle or datadrive
"num_hmrf_initialization_start": 0,
"num_hmrf_initialization_end": 10,
"spatial_weight": 2.0,
"construct_adjacency_method": "hexagon",
"construct_adjacency_w": 1.0,
# HMM configurations
"n_states" : None,
"params" : None,
"t" : None,
"t_phaseing" : 1-1e-4,
"fix_NB_dispersion" : False,
"shared_NB_dispersion" : True,
"fix_BB_dispersion" : False,
"shared_BB_dispersion" : True,
"max_iter" : 30,
"tol" : 1e-3,
"gmm_random_state" : 0,
"np_threshold" : 2.0,
"np_eventminlen" : 10
"n_states": None,
"params": None,
"t": None,
"t_phaseing": 1 - 1e-4,
"fix_NB_dispersion": False,
"shared_NB_dispersion": True,
"fix_BB_dispersion": False,
"shared_BB_dispersion": True,
"max_iter": 30,
"tol": 1e-3,
"gmm_random_state": 0,
"np_threshold": 2.0,
"np_eventminlen": 10,
}

argument_type = {
"spaceranger_dir" : "str",
"snp_dir" : "str",
"output_dir" : "str",
"spaceranger_dir": "str",
"snp_dir": "str",
"output_dir": "str",
# supporting files and preprocessing arguments
"hgtable_file" : "str",
"normalidx_file" : "str",
"tumorprop_file" : "str",
"supervision_clone_file" : "str",
"filtergenelist_file" : "str",
"filterregion_file" : "str",
"binsize" : "int",
"rdrbinsize" : "int",
"hgtable_file": "str",
"normalidx_file": "str",
"tumorprop_file": "str",
"supervision_clone_file": "str",
"filtergenelist_file": "str",
"filterregion_file": "str",
"binsize": "int",
"rdrbinsize": "int",
# "secondbinning_min_umi" : "int",
"max_nbins" : "int",
"avg_umi_perbinspot" : "float",
"bafonly" : "bool",
"max_nbins": "int",
"avg_umi_perbinspot": "float",
"bafonly": "bool",
# phase switch probability
"nu" : "float",
"logphase_shift" : "float",
"npart_phasing" : "int",
"nu": "float",
"logphase_shift": "float",
"npart_phasing": "int",
# HMRF configurations
"n_clones" : "int",
"n_clones_rdr" : "int",
"min_spots_per_clone" : "int",
"min_avgumi_per_clone" : "int",
"maxspots_pooling" : "int",
"tumorprop_threshold" : "float",
"max_iter_outer" : "int",
"nodepotential" : "str",
"initialization_method" : "str",
"num_hmrf_initialization_start" : "int",
"num_hmrf_initialization_end" : "int",
"spatial_weight" : "float",
"construct_adjacency_method" : "str",
"construct_adjacency_w" : "float",
"n_clones": "int",
"n_clones_rdr": "int",
"min_spots_per_clone": "int",
"min_avgumi_per_clone": "int",
"maxspots_pooling": "int",
"tumorprop_threshold": "float",
"max_iter_outer": "int",
"nodepotential": "str",
"initialization_method": "str",
"num_hmrf_initialization_start": "int",
"num_hmrf_initialization_end": "int",
"spatial_weight": "float",
"construct_adjacency_method": "str",
"construct_adjacency_w": "float",
# HMM configurations
"n_states" : "int",
"params" : "str",
"t" : "eval",
"t_phaseing" : "eval",
"fix_NB_dispersion" : "bool",
"shared_NB_dispersion" : "bool",
"fix_BB_dispersion" : "bool",
"shared_BB_dispersion" : "bool",
"max_iter" : "int",
"tol" : "float",
"gmm_random_state" : "int",
"np_threshold" : "float",
"np_eventminlen" : "int"
"n_states": "int",
"params": "str",
"t": "eval",
"t_phaseing": "eval",
"fix_NB_dispersion": "bool",
"shared_NB_dispersion": "bool",
"fix_BB_dispersion": "bool",
"shared_BB_dispersion": "bool",
"max_iter": "int",
"tol": "float",
"gmm_random_state": "int",
"np_threshold": "float",
"np_eventminlen": "int",
}

##### [ read configuration file to update settings ] #####
with open(filename, 'r') as fp:
with open(filename, "r") as fp:
for line in fp:
if line.strip() == "" or line[0] == "#":
continue
# strs = [x.replace(" ", "") for x in line.strip().split(":") if x != ""]
strs = [x.strip() for x in line.strip().split(":") if x != ""]
assert strs[0] in config.keys(), f"{strs[0]} is not a valid configuration parameter! Configuration parameters are: {list(config.keys())}"
assert (
strs[0] in config.keys()
), f"{strs[0]} is not a valid configuration parameter! Configuration parameters are: {list(config.keys())}"
if strs[1].upper() == "NONE":
config[strs[0]] = None
elif argument_type[strs[0]] == "str":
Expand All @@ -141,7 +143,7 @@ def read_configuration_file(filename):
elif argument_type[strs[0]] == "eval":
config[strs[0]] = eval(strs[1])
elif argument_type[strs[0]] == "bool":
config[strs[0]] = (strs[1].upper() == "TRUE")
config[strs[0]] = strs[1].upper() == "TRUE"
elif argument_type[strs[0]] == "list_str":
config[strs[0]] = strs[1].split(" ")
# assertions
Expand All @@ -153,10 +155,9 @@ def read_configuration_file(filename):


def write_config_file(outputfilename, config):
list_argument_io = ["spaceranger_dir",
"snp_dir",
"output_dir"]
list_argument_sup = ["hgtable_file",
list_argument_io = ["spaceranger_dir", "snp_dir", "output_dir"]
list_argument_sup = [
"hgtable_file",
"normalidx_file",
"tumorprop_file",
"supervision_clone_file",
Expand All @@ -167,11 +168,11 @@ def write_config_file(outputfilename, config):
# "secondbinning_min_umi",
"max_nbins",
"avg_umi_perbinspot",
"bafonly"]
list_argument_phase = ["nu",
"logphase_shift",
"npart_phasing"]
list_argument_hmrf = ["n_clones",
"bafonly",
]
list_argument_phase = ["nu", "logphase_shift", "npart_phasing"]
list_argument_hmrf = [
"n_clones",
"n_clones_rdr",
"min_spots_per_clone",
"min_avgumi_per_clone",
Expand All @@ -180,12 +181,14 @@ def write_config_file(outputfilename, config):
"max_iter_outer",
"nodepotential",
"initialization_method",
"num_hmrf_initialization_start",
"num_hmrf_initialization_start",
"num_hmrf_initialization_end",
"spatial_weight",
"construct_adjacency_method",
"construct_adjacency_w"]
list_argument_hmm = ["n_states",
"construct_adjacency_w",
]
list_argument_hmm = [
"n_states",
"params",
"t",
"t_phaseing",
Expand All @@ -197,8 +200,9 @@ def write_config_file(outputfilename, config):
"tol",
"gmm_random_state",
"np_threshold",
"np_eventminlen"]
with open(outputfilename, 'w') as fp:
"np_eventminlen",
]
with open(outputfilename, "w") as fp:
#
for k in list_argument_io:
fp.write(f"{k} : {config[k]}\n")
Expand Down Expand Up @@ -232,10 +236,10 @@ def main(argv):
config = read_configuration_file(template_configuration_file)
for r in range(hmrf_seed_s, hmrf_seed_t):
config["num_hmrf_initialization_start"] = r
config["num_hmrf_initialization_end"] = r+1
config["num_hmrf_initialization_end"] = r + 1
write_config_file(f"{outputdir}/configfile{r}", config)


if __name__ == "__main__":
if len(sys.argv) > 1:
main(sys.argv)
main(sys.argv)
Loading