diff --git a/README.md b/README.md index 56af976..5797632 100644 --- a/README.md +++ b/README.md @@ -83,8 +83,6 @@ log: run_one_path: /usr/bin/run-one -metadata_archive: /path/to/metadata/archive - transfer_details: user: username host: remote.host.com @@ -99,6 +97,7 @@ sequencers: NovaSeqXPlus: sequencing_path: /sequencing/NovaSeqXPlus remote_destination: /Illumina/NovaSeqXPlus + metadata_archive: /path/to/metadata/archive/NovaSeqXPlus_data metadata_for_statusdb: - RunInfo.xml - RunParameters.xml @@ -150,6 +149,7 @@ Run status is tracked in CouchDB with events including: - Remote storage is accessible via rsync over SSH - CouchDB is accessible and the database exists - Metadata files (e.g., RunInfo.xml) are present in run directories for status database updates and sync to metadata archive location +- The flowcell ID is set to correspond to the ID that is scanned with a barcode scanner during sequencing setup in the lab ### Status Files diff --git a/dataflow_transfer/run_classes/element_runs.py b/dataflow_transfer/run_classes/element_runs.py index a3ea109..2b543e2 100644 --- a/dataflow_transfer/run_classes/element_runs.py +++ b/dataflow_transfer/run_classes/element_runs.py @@ -9,9 +9,6 @@ class ElementRun(Run): def __init__(self, run_dir, configuration): super().__init__(run_dir, configuration) self.final_file = "RunUploaded.json" - self.flowcell_id = self.run_id.split("_")[ - -1 - ] # This is true for all except Teton runs @register_run_class @@ -22,6 +19,10 @@ class AVITIRun(ElementRun): def __init__(self, run_dir, configuration): self.run_id_format = ( - r"^\d{8}_AV\d{6}_(A|BP)\d{10}$" # 20251007_AV242106_A2507535225 + r"^\d{8}_AV\d{6}_(A|B)\d{10}$" # 20251007_AV242106_A2507535225 ) super().__init__(run_dir, configuration) + self.flowcell_id = self.run_id.split("_")[-1][1:] # 2507535225 + + +# TODO: Add Teton run class diff --git a/dataflow_transfer/run_classes/generic_runs.py b/dataflow_transfer/run_classes/generic_runs.py index daf88dc..351bc87 100644 --- a/dataflow_transfer/run_classes/generic_runs.py +++ b/dataflow_transfer/run_classes/generic_runs.py @@ -25,8 +25,7 @@ def __init__(self, run_dir, configuration): self.run_dir, ".metadata_rsync_exitcode" ) self.metadata_destination = os.path.join( - self.configuration.get("metadata_archive"), - getattr(self, "run_type", None), + self.sequencer_config.get("metadata_archive"), self.run_id, ) self.final_rsync_exitcode_file = os.path.join( diff --git a/dataflow_transfer/run_classes/illumina_runs.py b/dataflow_transfer/run_classes/illumina_runs.py index 7eab9bb..12cfa11 100644 --- a/dataflow_transfer/run_classes/illumina_runs.py +++ b/dataflow_transfer/run_classes/illumina_runs.py @@ -23,6 +23,7 @@ def __init__(self, run_dir, configuration): r"^\d{8}_[A-Z0-9]+_\d{4}_[A-Z0-9]+$" # 20251010_LH00202_0284_B22CVHTLT1 ) super().__init__(run_dir, configuration) + self.flowcell_id = self.run_id.split("_")[-1][1:] # 22CVHTLT1 @register_run_class @@ -60,3 +61,4 @@ class MiSeqi100Run(IlluminaRun): def __init__(self, run_dir, configuration): self.run_id_format = r"^\d{8}_[A-Z0-9]+_\d{4}_[A-Z0-9]{10}-SC3$" # 20260128_SH01140_0002_ASC2150561-SC3 super().__init__(run_dir, configuration) + self.flowcell_id = self.run_id.split("_")[-1][1:] # SC2150561-SC3 diff --git a/dataflow_transfer/tests/test_run_classes.py b/dataflow_transfer/tests/test_run_classes.py index 58238cb..e9bd993 100644 --- a/dataflow_transfer/tests/test_run_classes.py +++ b/dataflow_transfer/tests/test_run_classes.py @@ -12,7 +12,6 @@ def novaseqxplus_testobj(tmp_path): config = { "log": {"file": "test.log"}, "transfer_details": {"user": "testuser", "host": "testhost"}, - "metadata_archive": "/data/metadata_archive", "statusdb": { "username": "dbuser", "password": "dbpass", @@ -23,6 +22,7 @@ def novaseqxplus_testobj(tmp_path): "NovaSeqXPlus": { "remote_destination": "/data/NovaSeqXPlus", "metadata_for_statusdb": ["RunInfo.xml", "RunParameters.xml"], + "metadata_archive": "/data/metadata_archive/NovaSeqXPlus", "ignore_folders": ["nosync"], "remote_rsync_options": ["--chmod=Dg+s,g+rw"], "metadata_rsync_options": [ @@ -44,7 +44,6 @@ def nextseq_testobj(tmp_path): config = { "log": {"file": "test.log"}, "transfer_details": {"user": "testuser", "host": "testhost"}, - "metadata_archive": "/data/metadata_archive", "statusdb": { "username": "dbuser", "password": "dbpass", @@ -55,6 +54,7 @@ def nextseq_testobj(tmp_path): "NextSeq": { "remote_destination": "/data/NextSeq", "metadata_for_statusdb": ["RunInfo.xml", "RunParameters.xml"], + "metadata_archive": "/data/metadata_archive/NextSeq", "ignore_folders": ["nosync"], "remote_rsync_options": ["--chmod=Dg+s,g+rw"], "metadata_rsync_options": [ @@ -76,7 +76,6 @@ def miseqseq_testobj(tmp_path): config = { "log": {"file": "test.log"}, "transfer_details": {"user": "testuser", "host": "testhost"}, - "metadata_archive": "/data/metadata_archive", "statusdb": { "username": "dbuser", "password": "dbpass", @@ -87,6 +86,7 @@ def miseqseq_testobj(tmp_path): "MiSeq": { "remote_destination": "/data/MiSeq", "metadata_for_statusdb": ["RunInfo.xml", "RunParameters.xml"], + "metadata_archive": "/data/metadata_archive/MiSeq", "ignore_folders": ["nosync"], "remote_rsync_options": ["--chmod=Dg+s,g+rw"], "metadata_rsync_options": [ @@ -108,7 +108,6 @@ def miseqseqi100_testobj(tmp_path): config = { "log": {"file": "test.log"}, "transfer_details": {"user": "testuser", "host": "testhost"}, - "metadata_archive": "/data/metadata_archive", "statusdb": { "username": "dbuser", "password": "dbpass", @@ -119,6 +118,7 @@ def miseqseqi100_testobj(tmp_path): "MiSeqi100": { "remote_destination": "/data/MiSeqi100", "metadata_for_statusdb": ["RunInfo.xml", "RunParameters.xml"], + "metadata_archive": "/data/metadata_archive/MiSeqi100", "ignore_folders": ["nosync"], "remote_rsync_options": ["--chmod=Dg+s,g+rw"], "metadata_rsync_options": [ @@ -169,6 +169,20 @@ def test_confirm_run_type(run_fixture, expected_run_type, request): run_obj.confirm_run_type() +@pytest.mark.parametrize( + "run_fixture, expected_flowcell", + [ + ("novaseqxplus_testobj", "22CVHTLT1"), + ("nextseq_testobj", "AAHFHCCM5"), + ("miseqseq_testobj", "000000000-M6D7K"), + ("miseqseqi100_testobj", "SC2150561-SC3"), + ], +) +def test_flowcell_id_is_computed(run_fixture, expected_flowcell, request): + run_obj = request.getfixturevalue(run_fixture) + assert run_obj.flowcell_id == expected_flowcell + + @pytest.mark.parametrize( "run_fixture", [ diff --git a/pyproject.toml b/pyproject.toml index 675894c..c8c4f30 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ ignore = [ [project] name = "dataflow_transfer" -version = "1.1.1" +version = "1.1.2" description = "Script for transferring sequencing data from sequencers to storage" authors = [ { name = "Sara Sjunnebo", email = "sara.sjunnebo@scilifelab.se" },