From 68f9ae799b1f3f895c67cb6d5d7c77c1e2575b4f Mon Sep 17 00:00:00 2001 From: siebrenf Date: Mon, 17 Nov 2025 14:17:14 +0100 Subject: [PATCH 1/4] update genomepy + fix install --- requirements.yaml | 8 +++++--- seq2science/util.py | 13 +++++++++++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/requirements.yaml b/requirements.yaml index ede648ee8..f3e380eaa 100644 --- a/requirements.yaml +++ b/requirements.yaml @@ -5,9 +5,9 @@ channels: dependencies: - conda-forge::python=3.10 - bioconda::snakemake-minimal=7.25.0 - - conda-forge::conda=24.1.2 - - conda-forge::mamba=1.5.9 - - bioconda::genomepy=0.16.2 + - conda-forge::conda=23.5.2 + - conda-forge::mamba=1.5.1 + - bioconda::genomepy=0.16.3 - conda-forge::biopython=1.81 - conda-forge::filelock=3.10.7 - conda-forge::pyyaml=6.0 @@ -20,3 +20,5 @@ dependencies: - conda-forge::argcomplete=3.0.5 - conda-forge::tabulate=0.9.0 - conda-forge::conda-ecosystem-user-package-isolation=1.0 + - conda-forge::setuptools=59.4.0 + - conda-forge::toml=0.10.2 diff --git a/seq2science/util.py b/seq2science/util.py index e7226945c..4d23c0f8a 100644 --- a/seq2science/util.py +++ b/seq2science/util.py @@ -1039,12 +1039,21 @@ def _get_current_version(package): """ Attempt to return a given package's version """ - # package-isolation is not a package + # conda-ecosystem-user-package-isolation is not a package # xdg keeps its version in a pyproject.toml (not included) # argcomplete keeps its version in a setup.py (not included) # trackhub versioning is weird # mamba is not a package - if package in ["conda-ecosystem-user-package-isolation", "xdg", "argcomplete", "trackhub", "mamba"]: + # setuptools and toml are for packaging only + if package in [ + "conda-ecosystem-user-package-isolation", + "xdg", + "argcomplete", + "trackhub", + "mamba", + "setuptools", + "toml", + ]: return None if package == "python": return sys.version.split()[0] From f60af62d961665e65c6cec8548f0938c8977fb56 Mon Sep 17 00:00:00 2001 From: siebrenf Date: Mon, 17 Nov 2025 17:08:17 +0100 Subject: [PATCH 2/4] update genomepy version everywhere --- seq2science/envs/assembly_stats.yaml | 2 +- seq2science/envs/pytxi.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/seq2science/envs/assembly_stats.yaml b/seq2science/envs/assembly_stats.yaml index 1fd0002df..9eecd16bd 100644 --- a/seq2science/envs/assembly_stats.yaml +++ b/seq2science/envs/assembly_stats.yaml @@ -5,5 +5,5 @@ channels: dependencies: - conda-forge::python=3.9 - conda-forge::matplotlib-base=3.7.1 - - bioconda::genomepy=0.15.0 + - bioconda::genomepy=0.16.3 - conda-forge::conda-ecosystem-user-package-isolation=1.0 diff --git a/seq2science/envs/pytxi.yaml b/seq2science/envs/pytxi.yaml index dffb516e8..1d9193a2f 100644 --- a/seq2science/envs/pytxi.yaml +++ b/seq2science/envs/pytxi.yaml @@ -3,7 +3,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::genomepy=0.15.0 + - bioconda::genomepy=0.16.3 - conda-forge::conda-ecosystem-user-package-isolation=1.0 - pip: - git+https://github.com/vanheeringen-lab/pytxi.git@v0.1.2 From b9953954ed819e29fb6d2bf1819d59b96b07d6b6 Mon Sep 17 00:00:00 2001 From: siebrenf Date: Mon, 17 Nov 2025 17:13:31 +0100 Subject: [PATCH 3/4] update gimme --- seq2science/envs/gimme.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seq2science/envs/gimme.yaml b/seq2science/envs/gimme.yaml index 0ebffbcb5..bac371a07 100644 --- a/seq2science/envs/gimme.yaml +++ b/seq2science/envs/gimme.yaml @@ -3,7 +3,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::gimmemotifs-minimal=0.18.0 + - bioconda::gimmemotifs-minimal=0.18.1 - bioconda::gffread=0.12.7 - bioconda::orthofinder=2.5.4 - conda-forge::xgboost=1.0.2 From 0a5093446ea2e39c9494a338138151b9bf16858d Mon Sep 17 00:00:00 2001 From: siebrenf Date: Sat, 22 Nov 2025 09:15:07 +0100 Subject: [PATCH 4/4] less searching --- seq2science/rules/configuration_generic.smk | 6 +++--- seq2science/rules/configuration_logging.smk | 2 ++ seq2science/rules/configuration_workflows.smk | 12 ++++++++++++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/seq2science/rules/configuration_generic.smk b/seq2science/rules/configuration_generic.smk index f64a9663f..afcb1c77c 100644 --- a/seq2science/rules/configuration_generic.smk +++ b/seq2science/rules/configuration_generic.smk @@ -336,10 +336,10 @@ if WORKFLOW != "download_fastq": used_assemblies = list(set(samples["assembly"])) # we make a temporary _used assemblies as gimme maelstrom might need more assemblies downloaded # and those assemblies need to be saved in the local/remote assemblies variable - if "motif2factors_reference" in config and config["run_gimme_maelstrom"]: + _used_assemblies = used_assemblies + if (config.get("run_gimme_maelstrom") and config.get("infer_motif2factors") and + set(samples["assembly"]) - set(config.get("motif2factors_database_references"))): _used_assemblies = used_assemblies + config["motif2factors_reference"] + config["motif2factors_database_references"] - else: - _used_assemblies = used_assemblies # dictionary with which providers to use per genome providers = PickleDict(os.path.join(CACHE_DIR, "providers.p")) diff --git a/seq2science/rules/configuration_logging.smk b/seq2science/rules/configuration_logging.smk index 11db16087..c4b32fba3 100644 --- a/seq2science/rules/configuration_logging.smk +++ b/seq2science/rules/configuration_logging.smk @@ -154,6 +154,8 @@ if not config.get("no_config_log"): ("min_mapping_quality", no_aligners), ("only_primary_align", no_aligners), ("remove_blacklist", no_aligners), + ("motif2factors_database_references", not config.get("motif2factors_database_references")), + ("motif2factors_reference", not config.get("motif2factors_reference")), ("tx2gene_from_gtf", config.get("quantifier") != "salmon"), ("tximeta", config.get("quantifier") != "salmon"), ("deseq2", not config.get("contrasts")), diff --git a/seq2science/rules/configuration_workflows.smk b/seq2science/rules/configuration_workflows.smk index 61aed28e5..75ffcacbb 100644 --- a/seq2science/rules/configuration_workflows.smk +++ b/seq2science/rules/configuration_workflows.smk @@ -71,6 +71,18 @@ if config.get("peak_caller", False): "To run gimme maelstrom you need more than one biological replicate!" ) + # only need additional assemblies when ortholog inference is truly needed + if config["infer_motif2factors"] is True: + if len(set(samples["assembly"]) - set(config["motif2factors_database_references"])) == 0: + config["infer_motif2factors"] = False + if config["infer_motif2factors"] is False: + config["motif2factors_database_references"] = [] + config["motif2factors_reference"] = [] + else: + config["infer_motif2factors"] = False + config["motif2factors_database_references"] = [] + config["motif2factors_reference"] = [] + # make sure that both maximum and minimum insert sizes are existing when one of them is used if config.get("min_template_length") and not config.get("max_template_length"): config["max_template_length"] = 1_000_000_000