From 2e2905873ee5fc72a5285219b7a0fd2946313c20 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Sun, 8 Jun 2025 19:55:32 -0400 Subject: [PATCH 1/3] empty new variants test --- .../write_new_variants_parquet_test.py | 100 ++++++++++++------ 1 file changed, 70 insertions(+), 30 deletions(-) diff --git a/v03_pipeline/lib/tasks/exports/write_new_variants_parquet_test.py b/v03_pipeline/lib/tasks/exports/write_new_variants_parquet_test.py index 4951527bf..baf072224 100644 --- a/v03_pipeline/lib/tasks/exports/write_new_variants_parquet_test.py +++ b/v03_pipeline/lib/tasks/exports/write_new_variants_parquet_test.py @@ -32,11 +32,13 @@ class WriteNewVariantsParquetTest(MockedDatarootTestCase): - def setUp(self) -> None: - super().setUp() + def test_empty_write_new_variants_parquet_test( + self, + ) -> None: ht = hl.read_table( TEST_SNV_INDEL_ANNOTATIONS, ) + ht = ht.head(0) ht.write( new_variants_table_path( ReferenceGenome.GRCh38, @@ -44,48 +46,48 @@ def setUp(self) -> None: TEST_RUN_ID, ), ) - ht = hl.read_table( - TEST_GRCH37_SNV_INDEL_ANNOTATIONS, + worker = luigi.worker.Worker() + task = WriteNewVariantsParquetTask( + reference_genome=ReferenceGenome.GRCh38, + dataset_type=DatasetType.SNV_INDEL, + sample_type=SampleType.WGS, + callset_path='fake_callset', + project_guids=[ + 'fake_project', + ], + project_pedigree_paths=['fake_pedigree'], + skip_validation=True, + run_id=TEST_RUN_ID, ) - ht.write( - new_variants_table_path( - ReferenceGenome.GRCh37, + worker.add(task) + worker.run() + self.assertTrue(task.output().exists()) + self.assertTrue(task.complete()) + df = pd.read_parquet( + new_variants_parquet_path( + ReferenceGenome.GRCh38, DatasetType.SNV_INDEL, TEST_RUN_ID, ), ) - ht = hl.read_table( - TEST_MITO_ANNOTATIONS, - ) - ht.write( - new_variants_table_path( - ReferenceGenome.GRCh38, - DatasetType.MITO, - TEST_RUN_ID, - ), + self.assertEqual( + len(df), + 0, ) + + def test_snv_indel_write_new_variants_parquet_test( + self, + ) -> None: ht = hl.read_table( - TEST_SV_ANNOTATIONS, - ) - ht.write( - new_variants_table_path( - ReferenceGenome.GRCh38, - DatasetType.SV, - TEST_RUN_ID, - ), + TEST_SNV_INDEL_ANNOTATIONS, ) - ht = hl.read_table(TEST_GCNV_ANNOTATIONS) ht.write( new_variants_table_path( ReferenceGenome.GRCh38, - DatasetType.GCNV, + DatasetType.SNV_INDEL, TEST_RUN_ID, ), ) - - def test_write_new_variants_parquet_test( - self, - ) -> None: worker = luigi.worker.Worker() task = WriteNewVariantsParquetTask( reference_genome=ReferenceGenome.GRCh38, @@ -211,6 +213,16 @@ def test_write_new_variants_parquet_test( def test_grch37_write_new_variants_parquet_test( self, ) -> None: + ht = hl.read_table( + TEST_GRCH37_SNV_INDEL_ANNOTATIONS, + ) + ht.write( + new_variants_table_path( + ReferenceGenome.GRCh37, + DatasetType.SNV_INDEL, + TEST_RUN_ID, + ), + ) worker = luigi.worker.Worker() task = WriteNewVariantsParquetTask( reference_genome=ReferenceGenome.GRCh37, @@ -322,6 +334,16 @@ def test_mito_write_new_variants_parquet_test( self, write_new_variants_table_task: Mock, ) -> None: + ht = hl.read_table( + TEST_MITO_ANNOTATIONS, + ) + ht.write( + new_variants_table_path( + ReferenceGenome.GRCh38, + DatasetType.MITO, + TEST_RUN_ID, + ), + ) write_new_variants_table_task.return_value = MockCompleteTask() worker = luigi.worker.Worker() task = WriteNewVariantsParquetTask( @@ -410,6 +432,16 @@ def test_sv_write_new_variants_parquet_test( self, write_new_variants_table_task: Mock, ) -> None: + ht = hl.read_table( + TEST_SV_ANNOTATIONS, + ) + ht.write( + new_variants_table_path( + ReferenceGenome.GRCh38, + DatasetType.SV, + TEST_RUN_ID, + ), + ) write_new_variants_table_task.return_value = MockCompleteTask() worker = luigi.worker.Worker() task = WriteNewVariantsParquetTask( @@ -478,6 +510,14 @@ def test_gcnv_write_new_variants_parquet_test( self, write_new_variants_table_task: Mock, ) -> None: + ht = hl.read_table(TEST_GCNV_ANNOTATIONS) + ht.write( + new_variants_table_path( + ReferenceGenome.GRCh38, + DatasetType.GCNV, + TEST_RUN_ID, + ), + ) write_new_variants_table_task.return_value = MockCompleteTask() worker = luigi.worker.Worker() task = WriteNewVariantsParquetTask( From dead12676fd28beeb7b784621b92d4d9511cfab4 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Tue, 10 Jun 2025 12:00:36 -0400 Subject: [PATCH 2/3] fix merge conflicts --- .../write_new_variants_parquet_test.py | 20 ++----------------- 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/v03_pipeline/lib/tasks/exports/write_new_variants_parquet_test.py b/v03_pipeline/lib/tasks/exports/write_new_variants_parquet_test.py index 278ea0a58..4e7bcb40f 100644 --- a/v03_pipeline/lib/tasks/exports/write_new_variants_parquet_test.py +++ b/v03_pipeline/lib/tasks/exports/write_new_variants_parquet_test.py @@ -87,19 +87,10 @@ def test_snv_indel_write_new_variants_parquet_test( TEST_SNV_INDEL_ANNOTATIONS, ) ht.write( - variant_annotations_table_path( + new_variants_table_path( ReferenceGenome.GRCh38, DatasetType.SNV_INDEL, TEST_RUN_ID, - DatasetType.GCNV, - ), - ) - ht.write( - remapped_and_subsetted_callset_path( - ReferenceGenome.GRCh38, - DatasetType.GCNV, - 'fake_callset', - 'fake_project', ), ) worker = luigi.worker.Worker() @@ -530,19 +521,12 @@ def test_gcnv_write_new_variants_parquet_test( ) -> None: ht = hl.read_table(TEST_GCNV_ANNOTATIONS) ht.write( - new_variants_table_path( - ReferenceGenome.GRCh38, - DatasetType.GCNV, - TEST_RUN_ID, - ), - ) - write_new_variants_table_task.return_value = MockCompleteTask() - get_callset_ht.return_value = hl.read_table( variant_annotations_table_path( ReferenceGenome.GRCh38, DatasetType.GCNV, ), ) + get_callset_ht.return_value = ht update_variant_annotations_task.return_value = MockCompleteTask() worker = luigi.worker.Worker() task = WriteNewVariantsParquetTask( From 212086954129510e088c945fd624df63275da935 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Tue, 10 Jun 2025 12:06:17 -0400 Subject: [PATCH 3/3] ruff --- .../lib/tasks/exports/write_new_variants_parquet_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/v03_pipeline/lib/tasks/exports/write_new_variants_parquet_test.py b/v03_pipeline/lib/tasks/exports/write_new_variants_parquet_test.py index 4e7bcb40f..938400162 100644 --- a/v03_pipeline/lib/tasks/exports/write_new_variants_parquet_test.py +++ b/v03_pipeline/lib/tasks/exports/write_new_variants_parquet_test.py @@ -13,7 +13,6 @@ from v03_pipeline.lib.paths import ( new_variants_parquet_path, new_variants_table_path, - remapped_and_subsetted_callset_path, variant_annotations_table_path, ) from v03_pipeline.lib.tasks.exports.write_new_variants_parquet import (