Skip to content

Commit fe65de1

Browse files
authored
Clickhouse Schema testcase (#1213)
* Clickhouse Schema testcase * ruff
1 parent 250c34b commit fe65de1

File tree

3 files changed

+131
-105
lines changed

3 files changed

+131
-105
lines changed

v03_pipeline/bin/pipeline_worker_test.py

Lines changed: 2 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from v03_pipeline.lib.core import DatasetType, ReferenceGenome, SampleType
1212
from v03_pipeline.lib.core.environment import Env
1313
from v03_pipeline.lib.misc.clickhouse import (
14-
STAGING_CLICKHOUSE_DATABASE,
1514
ClickhouseReferenceDataset,
1615
get_clickhouse_client,
1716
)
@@ -20,6 +19,7 @@
2019
loading_pipeline_deadletter_queue_dir,
2120
loading_pipeline_queue_dir,
2221
)
22+
from v03_pipeline.lib.test.clickhouse_schema_testcase import ClickhouseSchemaTestCase
2323
from v03_pipeline.lib.test.misc import copy_project_pedigree_to_mocked_dir
2424
from v03_pipeline.lib.test.mocked_reference_datasets_testcase import (
2525
MockedReferenceDatasetsTestCase,
@@ -40,110 +40,7 @@ def output(self):
4040
return luigi.LocalTarget('output.txt')
4141

4242

43-
class PipelineWorkerTest(MockedReferenceDatasetsTestCase):
44-
def setUp(self):
45-
super().setUp()
46-
client = get_clickhouse_client()
47-
client.execute(
48-
f"""
49-
DROP DATABASE IF EXISTS {STAGING_CLICKHOUSE_DATABASE};
50-
""",
51-
)
52-
client.execute(
53-
f"""
54-
DROP DATABASE IF EXISTS {Env.CLICKHOUSE_DATABASE};
55-
""",
56-
)
57-
client.execute(
58-
f"""
59-
CREATE DATABASE {Env.CLICKHOUSE_DATABASE};
60-
""",
61-
)
62-
client = get_clickhouse_client(database=Env.CLICKHOUSE_DATABASE)
63-
client.execute(
64-
"""
65-
CREATE DICTIONARY seqrdb_gene_ids
66-
(
67-
`gene_id` String,
68-
`seqrdb_id` String,
69-
`affected` String
70-
)
71-
PRIMARY KEY gene_id
72-
SOURCE(NULL())
73-
LIFETIME(0)
74-
LAYOUT(HASHED())
75-
""",
76-
)
77-
client.execute(
78-
"""
79-
CREATE DICTIONARY seqrdb_affected_status_dict
80-
(
81-
`family_guid` String,
82-
`sampleId` String,
83-
`affected` String
84-
)
85-
PRIMARY KEY family_guid, sampleId
86-
SOURCE(NULL())
87-
LIFETIME(0)
88-
LAYOUT(COMPLEX_KEY_HASHED())
89-
""",
90-
)
91-
client.execute(
92-
"""
93-
CREATE DICTIONARY `GRCh38/SNV_INDEL/project_partitions_dict`
94-
(
95-
`project_guid` String,
96-
`n_partitions` UInt32
97-
)
98-
PRIMARY KEY project_guid
99-
SOURCE(NULL())
100-
LIFETIME(0)
101-
LAYOUT(COMPLEX_KEY_HASHED())
102-
""",
103-
)
104-
with open(TEST_SCHEMA) as f:
105-
sql = f.read()
106-
commands = [cmd.strip() for cmd in sql.split(';') if cmd.strip()]
107-
for cmd in commands:
108-
client.execute(cmd)
109-
client.execute(
110-
f"""
111-
CREATE DICTIONARY `GRCh38/SNV_INDEL/gt_stats_dict`
112-
(
113-
`key` UInt32,
114-
`ac_wes` UInt64,
115-
`ac_wgs` UInt64,
116-
`ac_affected` UInt64,
117-
`hom_wes` UInt64,
118-
`hom_wgs` UInt64,
119-
`hom_affected` UInt64
120-
)
121-
PRIMARY KEY key
122-
SOURCE(
123-
CLICKHOUSE(
124-
USER {Env.CLICKHOUSE_WRITER_USER} PASSWORD {Env.CLICKHOUSE_WRITER_PASSWORD}
125-
DB {Env.CLICKHOUSE_DATABASE} TABLE `GRCh38/SNV_INDEL/gt_stats`
126-
)
127-
)
128-
LIFETIME(0)
129-
LAYOUT(FLAT(MAX_ARRAY_SIZE 1000000000))
130-
""",
131-
)
132-
133-
def tearDown(self):
134-
super().tearDown()
135-
client = get_clickhouse_client()
136-
client.execute(
137-
f"""
138-
DROP DATABASE IF EXISTS {STAGING_CLICKHOUSE_DATABASE};
139-
""",
140-
)
141-
client.execute(
142-
f"""
143-
DROP DATABASE IF EXISTS {Env.CLICKHOUSE_DATABASE};
144-
""",
145-
)
146-
43+
class PipelineWorkerTest(MockedReferenceDatasetsTestCase, ClickhouseSchemaTestCase):
14744
@patch.object(
14845
ClickhouseReferenceDataset,
14946
'for_reference_genome_dataset_type',
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
import unittest
2+
3+
from v03_pipeline.lib.core import Env
4+
from v03_pipeline.lib.misc.clickhouse import (
5+
STAGING_CLICKHOUSE_DATABASE,
6+
get_clickhouse_client,
7+
)
8+
9+
TEST_SCHEMA = 'v03_pipeline/var/test/test_clickhouse_schema.sql'
10+
11+
12+
class ClickhouseSchemaTestCase(unittest.TestCase):
13+
def setUp(self):
14+
super().setUp()
15+
client = get_clickhouse_client()
16+
client.execute(
17+
f"""
18+
DROP DATABASE IF EXISTS {STAGING_CLICKHOUSE_DATABASE};
19+
""",
20+
)
21+
client.execute(
22+
f"""
23+
DROP DATABASE IF EXISTS {Env.CLICKHOUSE_DATABASE};
24+
""",
25+
)
26+
client.execute(
27+
f"""
28+
CREATE DATABASE {Env.CLICKHOUSE_DATABASE};
29+
""",
30+
)
31+
client = get_clickhouse_client(database=Env.CLICKHOUSE_DATABASE)
32+
client.execute(
33+
"""
34+
CREATE DICTIONARY seqrdb_gene_ids
35+
(
36+
`gene_id` String,
37+
`seqrdb_id` String,
38+
`affected` String
39+
)
40+
PRIMARY KEY gene_id
41+
SOURCE(NULL())
42+
LIFETIME(0)
43+
LAYOUT(HASHED())
44+
""",
45+
)
46+
client.execute(
47+
"""
48+
CREATE DICTIONARY seqrdb_affected_status_dict
49+
(
50+
`family_guid` String,
51+
`sampleId` String,
52+
`affected` String
53+
)
54+
PRIMARY KEY family_guid, sampleId
55+
SOURCE(NULL())
56+
LIFETIME(0)
57+
LAYOUT(COMPLEX_KEY_HASHED())
58+
""",
59+
)
60+
client.execute(
61+
"""
62+
CREATE DICTIONARY `GRCh38/SNV_INDEL/project_partitions_dict`
63+
(
64+
`project_guid` String,
65+
`n_partitions` UInt32
66+
)
67+
PRIMARY KEY project_guid
68+
SOURCE(NULL())
69+
LIFETIME(0)
70+
LAYOUT(COMPLEX_KEY_HASHED())
71+
""",
72+
)
73+
client.execute(
74+
"""
75+
CREATE DICTIONARY `GRCh38/SNV_INDEL/reference_data/gnomad_genomes`
76+
(
77+
`key` UInt32,
78+
`filter_af` Decimal(9, 8)
79+
)
80+
PRIMARY KEY key
81+
SOURCE(NULL())
82+
LIFETIME(0)
83+
LAYOUT(COMPLEX_KEY_HASHED())
84+
""",
85+
)
86+
with open(TEST_SCHEMA) as f:
87+
sql = f.read()
88+
commands = [cmd.strip() for cmd in sql.split(';') if cmd.strip()]
89+
for cmd in commands:
90+
client.execute(cmd)
91+
client.execute(
92+
f"""
93+
CREATE DICTIONARY `GRCh38/SNV_INDEL/gt_stats_dict`
94+
(
95+
`key` UInt32,
96+
`ac_wes` UInt64,
97+
`ac_wgs` UInt64,
98+
`ac_affected` UInt64,
99+
`hom_wes` UInt64,
100+
`hom_wgs` UInt64,
101+
`hom_affected` UInt64
102+
)
103+
PRIMARY KEY key
104+
SOURCE(
105+
CLICKHOUSE(
106+
USER {Env.CLICKHOUSE_WRITER_USER} PASSWORD {Env.CLICKHOUSE_WRITER_PASSWORD}
107+
DB {Env.CLICKHOUSE_DATABASE} TABLE `GRCh38/SNV_INDEL/gt_stats`
108+
)
109+
)
110+
LIFETIME(0)
111+
LAYOUT(FLAT(MAX_ARRAY_SIZE 1000000000))
112+
""",
113+
)
114+
115+
def tearDown(self):
116+
super().tearDown()
117+
client = get_clickhouse_client()
118+
client.execute(
119+
f"""
120+
DROP DATABASE IF EXISTS {STAGING_CLICKHOUSE_DATABASE};
121+
""",
122+
)
123+
client.execute(
124+
f"""
125+
DROP DATABASE IF EXISTS {Env.CLICKHOUSE_DATABASE};
126+
""",
127+
)

v03_pipeline/lib/test/mocked_dataroot_testcase.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
class MockedDatarootTestCase(unittest.TestCase):
1111
def setUp(self) -> None:
12+
super().setUp()
1213
patcher = patch(
1314
'v03_pipeline.lib.paths.Env',
1415
wraps=Env,
@@ -20,6 +21,7 @@ def setUp(self) -> None:
2021
setattr(self.mock_env, field_name, tempfile.TemporaryDirectory().name)
2122

2223
def tearDown(self) -> None:
24+
super().tearDown()
2325
for field_name in Env.__dataclass_fields__:
2426
if os.path.isdir(getattr(self.mock_env, field_name)):
2527
shutil.rmtree(getattr(self.mock_env, field_name))

0 commit comments

Comments
 (0)