Skip to content

Commit f0e8e60

Browse files
authored
minimal new variants changes (#1214)
* minimal new variants changes * missed one
1 parent fe65de1 commit f0e8e60

File tree

6 files changed

+31
-8
lines changed

6 files changed

+31
-8
lines changed

v03_pipeline/lib/core/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22

33
MIGRATION_RUN_ID = 'hail_search_to_clickhouse_migration'
4+
VARIANTS_MIGRATION_RUN_ID = 'annotations_to_variants_tables'
45
GRCH37_TO_GRCH38_LIFTOVER_REF_PATH = (
56
'gs://hail-common/references/grch37_to_grch38.over.chain.gz'
67
if os.environ.get('HAIL_DATAPROC') == '1'

v03_pipeline/lib/core/dataset_type.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,10 @@ def export_vcf_annotation_fns(self) -> list[Callable[..., hl.Expression]]:
370370
def should_write_new_transcripts(self):
371371
return self == DatasetType.SNV_INDEL
372372

373+
@property
374+
def should_write_new_variant_details(self):
375+
return self == DatasetType.SNV_INDEL
376+
373377
@property
374378
def overwrite_male_non_par_calls(self) -> None:
375379
return self == DatasetType.SV

v03_pipeline/lib/paths.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,21 @@ def new_transcripts_parquet_path(
353353
)
354354

355355

356+
def new_variant_details_parquet_path(
357+
reference_genome: ReferenceGenome,
358+
dataset_type: DatasetType,
359+
run_id: str,
360+
) -> str:
361+
return os.path.join(
362+
runs_path(
363+
reference_genome,
364+
dataset_type,
365+
),
366+
run_id,
367+
'new_variant_details.parquet',
368+
)
369+
370+
356371
def new_variants_parquet_path(
357372
reference_genome: ReferenceGenome,
358373
dataset_type: DatasetType,

v03_pipeline/lib/tasks/dataproc/base_run_job_on_dataproc.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import re
12
import time
23

34
import google.api_core.exceptions
@@ -45,7 +46,7 @@ def task(self):
4546

4647
@property
4748
def job_id(self):
48-
return f'{self.task.task_family}-{self.reference_genome[-2:]}-{self.run_id}-{self.attempt_id}'
49+
return f'{self.task.task_family}-{self.reference_genome[-2:]}-{re.sub("_", "-", self.dataset_type.value)}-{self.run_id}-{self.attempt_id}'
4950

5051
def requires(self) -> [luigi.Task]:
5152
return [self.clone(CreateDataprocClusterTask)]

v03_pipeline/lib/tasks/dataproc/misc.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,16 @@
33
import luigi
44

55
from v03_pipeline.lib.core import Env, ReferenceGenome
6-
from v03_pipeline.lib.core.constants import (
7-
MIGRATION_RUN_ID,
8-
)
6+
from v03_pipeline.lib.core.constants import MIGRATION_RUN_ID, VARIANTS_MIGRATION_RUN_ID
97

108
CLUSTER_NAME_PREFIX = 'pipeline-runner'
119

1210

1311
def get_cluster_name(reference_genome: ReferenceGenome, run_id: str):
1412
if MIGRATION_RUN_ID in run_id:
1513
return f'{Env.DEPLOYMENT_TYPE}-{CLUSTER_NAME_PREFIX}-{reference_genome.value.lower()}-hs-to-clckhse-mgrtn'
14+
if VARIANTS_MIGRATION_RUN_ID in run_id:
15+
return f'{Env.DEPLOYMENT_TYPE}-{CLUSTER_NAME_PREFIX}-{reference_genome.value.lower()}-vrnts-mgrtn'
1616
return f'{Env.DEPLOYMENT_TYPE}-{CLUSTER_NAME_PREFIX}-{reference_genome.value.lower()}-{run_id}'
1717

1818

v03_pipeline/lib/tasks/dataproc/run_pipeline_on_dataproc_test.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def test_job_already_exists_failed(
5454
mock_logger.error.assert_has_calls(
5555
[
5656
call(
57-
'Job RunPipelineTask-38-manual__2024-04-03-0 entered ERROR state',
57+
'Job RunPipelineTask-38-SNV-INDEL-manual__2024-04-03-0 entered ERROR state',
5858
),
5959
],
6060
)
@@ -128,7 +128,7 @@ def test_job_failed(
128128
mock_logger.info.assert_has_calls(
129129
[
130130
call(
131-
'Waiting for Job completion RunPipelineTask-38-manual__2024-04-05-1',
131+
'Waiting for Job completion RunPipelineTask-38-SNV-INDEL-manual__2024-04-05-1',
132132
),
133133
],
134134
)
@@ -175,8 +175,10 @@ def test_job_success(
175175
mock_logger.info.assert_has_calls(
176176
[
177177
call(
178-
'Waiting for Job completion RunPipelineTask-38-manual__2024-04-06-0',
178+
'Waiting for Job completion RunPipelineTask-38-SNV-INDEL-manual__2024-04-06-0',
179+
),
180+
call(
181+
'Job RunPipelineTask-38-SNV-INDEL-manual__2024-04-06-0 is complete',
179182
),
180-
call('Job RunPipelineTask-38-manual__2024-04-06-0 is complete'),
181183
],
182184
)

0 commit comments

Comments
 (0)