Skip to content

Commit c12b96e

Browse files
authored
Merge pull request #67 from Sage-Bionetworks-Workflows/DPE-1445
[DPE 1445] Add HTAN H5AD validation tool
2 parents cde0ff3 + 8d56b5b commit c12b96e

File tree

8 files changed

+126
-1
lines changed

8 files changed

+126
-1
lines changed

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# https://setuptools.pypa.io/en/latest/references/keywords.html
55

66
[metadata]
7-
version = 1.7.5
7+
version = 1.8.0
88
name = dcqc
99
description = Python package for performing quality control (QC) for data coordination (DC)
1010
author = Bruno Grande

src/dcqc/suites/suites.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,4 @@ class H5ADSuite(HDF5Suite):
8686
"""
8787

8888
file_type = FileType.get_file_type("H5AD")
89+
add_tests = (tests.H5adHtanValidatorTest,)

src/dcqc/tests/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from dcqc.tests.bioformats_info_test import BioFormatsInfoTest
99
from dcqc.tests.file_extension_test import FileExtensionTest
1010
from dcqc.tests.grep_date_test import GrepDateTest
11+
from dcqc.tests.h5ad_htan_validator_test import H5adHtanValidatorTest
1112
from dcqc.tests.json_load_test import JsonLoadTest
1213
from dcqc.tests.jsonld_load_test import JsonLdLoadTest
1314
from dcqc.tests.libtiff_info_test import LibTiffInfoTest
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from dcqc.target import SingleTarget
2+
from dcqc.tests.base_test import ExternalBaseTest, Process, TestTier
3+
4+
5+
class H5adHtanValidatorTest(ExternalBaseTest):
6+
"""
7+
Based on [HTAN H5AD Validator](https://github.com/ncihtan/h5ad)
8+
This is an h5ad validator for HTAN Phase 2 single
9+
cell/single nuclei RNA-sequencing data.
10+
"""
11+
12+
tier = TestTier.INTERNAL_CONFORMANCE
13+
pass_code = 0
14+
fail_code = 1
15+
failure_reason_location = "std_out"
16+
target: SingleTarget
17+
18+
def generate_process(self) -> Process:
19+
path = self.target.file.stage()
20+
command_args = [
21+
"python",
22+
"/usr/local/bin/h5ad.py",
23+
f"'{path.name}'",
24+
]
25+
process = Process(
26+
container="ghcr.io/sage-bionetworks-workflows/htan-h5ad-validator:0.1.1",
27+
command_args=command_args,
28+
)
29+
return process

tests/conftest.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ def test_files(get_data):
6565
tiff_dirty_datetime_path = get_data("test_image_dirty_datetime.tif")
6666
tiff_date_in_tag_path = get_data("date_tag.tif")
6767
invalid_xml_ome_tiff_path = get_data("invalid_xml.ome.tif")
68+
htan_good_h5ad_path = get_data("htan_good.h5ad")
69+
htan_bad_h5ad_path = get_data("htan_bad.h5ad")
6870
invalid_xml_metadata = {
6971
"file_type": "tiff",
7072
"md5_checksum": "a2550a887091d51351d547c8beae8f0c",
@@ -98,6 +100,14 @@ def test_files(get_data):
98100
"file_type": "tiff",
99101
"md5_checksum": "28a9ee7d0e994d494068ce8d6cda0268",
100102
}
103+
htan_good_h5ad_metadata = {
104+
"file_type": "h5ad",
105+
"md5_checksum": "ba11278e2c19b10851a66587e17ec97f",
106+
}
107+
htan_bad_h5ad_metadata = {
108+
"file_type": "h5ad",
109+
"md5_checksum": "e6f5e1a812c699af489352652cf0238d",
110+
}
101111
test_files = {
102112
"date_in_tag_tiff": File(tiff_date_in_tag_path.as_posix(), tiff_metadata),
103113
"good_txt": File(txt_path.as_posix(), good_metadata),
@@ -115,6 +125,8 @@ def test_files(get_data):
115125
"dirty_datetime_in_tag_tiff": File(
116126
tiff_dirty_datetime_path.as_posix(), tiff_dirty_datetime_metadata
117127
),
128+
"htan_good_h5ad": File(htan_good_h5ad_path.as_posix(), htan_good_h5ad_metadata),
129+
"htan_bad_h5ad": File(htan_bad_h5ad_path.as_posix(), htan_bad_h5ad_metadata),
118130
}
119131

120132
# Create an in-memory remote file based on the good file

tests/data/htan_bad.h5ad

11.4 MB
Binary file not shown.

tests/data/htan_good.h5ad

10.1 MB
Binary file not shown.

tests/test_external_tests.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,3 +437,85 @@ def test_that_the_tiffdatetimetest_correctly_interprets_exit_code_0_and_1(
437437
)
438438
test_status = test.get_status()
439439
assert test_status == TestStatus.FAIL
440+
441+
442+
class TestH5adHtanValidatorTest:
443+
@pytest.fixture(scope="function", autouse=True)
444+
def setup_method(self, test_targets):
445+
self.good_h5ad_target = test_targets["htan_good_h5ad"]
446+
self.good_h5ad_test = tests.H5adHtanValidatorTest(self.good_h5ad_target)
447+
self.bad_h5ad_target = test_targets["htan_bad_h5ad"]
448+
self.bad_h5ad_test = tests.H5adHtanValidatorTest(self.bad_h5ad_target)
449+
self.txt_target = test_targets["good_txt"]
450+
self.txt_test = tests.H5adHtanValidatorTest(self.txt_target)
451+
452+
def test_that_the_command_is_produced(self):
453+
process = self.good_h5ad_test.generate_process()
454+
assert "/usr/local/bin/h5ad.py" in process.command
455+
456+
@docker_enabled_test
457+
def test_that_the_exit_code_is_1_when_it_should_be(self):
458+
process = self.bad_h5ad_test.generate_process()
459+
executor = DockerExecutor(
460+
process.container, process.command, self.bad_h5ad_target.file.url
461+
)
462+
executor.execute()
463+
assert "File: htan_bad.h5ad" in executor.std_out
464+
assert "Cellxgene run has errors. " in executor.std_out
465+
assert "HTAN Validation Failed." in executor.std_out
466+
assert "'cellxgene-schema output: Starting validation" in executor.std_out
467+
assert "HTAN-specific Validation Errors:" in executor.std_out
468+
assert executor.exit_code == "1"
469+
470+
@docker_enabled_test
471+
def test_that_the_exit_code_is_0_when_it_should_be(self):
472+
process = self.good_h5ad_test.generate_process()
473+
executor = DockerExecutor(
474+
process.container, process.command, self.good_h5ad_target.file.url
475+
)
476+
executor.execute()
477+
assert "File: htan_good.h5ad" in executor.std_out
478+
assert "Cellxgene run successful." in executor.std_out
479+
assert "Validation Passed!" in executor.std_out
480+
assert executor.exit_code == "0"
481+
482+
@docker_enabled_test
483+
def test_that_wrong_file_type_has_1_exit_code(self):
484+
process = self.txt_test.generate_process()
485+
executor = DockerExecutor(
486+
process.container, process.command, self.txt_target.file.url
487+
)
488+
executor.execute()
489+
print(executor.std_out)
490+
assert executor.std_out == (
491+
"HTAN h5ad File Validator\n"
492+
"File: test.txt\n"
493+
"An error occurred while trying to open test.txt\n"
494+
"Unable to synchronously open file (file signature not found)\n"
495+
"\n"
496+
)
497+
assert executor.exit_code == "1"
498+
499+
def test_that_the_test_correctly_interprets_exit_code_0_and_1(self, mocker):
500+
# 1 is pass, 0 is fail
501+
with TemporaryDirectory() as tmp_dir:
502+
path_0 = Path(tmp_dir, "code_0.txt")
503+
path_1 = Path(tmp_dir, "code_1.txt")
504+
path_0.write_text("0")
505+
path_1.write_text("1")
506+
fail_outputs = {"std_out": path_1, "std_err": path_1, "exit_code": path_0}
507+
pass_outputs = {"std_out": path_0, "std_err": path_0, "exit_code": path_1}
508+
509+
test = tests.TiffDateTimeTest(self.good_h5ad_target)
510+
mocker.patch.object(
511+
test, "_find_process_outputs", return_value=pass_outputs
512+
)
513+
test_status = test.get_status()
514+
assert test_status == TestStatus.PASS
515+
516+
test = tests.TiffDateTimeTest(self.bad_h5ad_target)
517+
mocker.patch.object(
518+
test, "_find_process_outputs", return_value=fail_outputs
519+
)
520+
test_status = test.get_status()
521+
assert test_status == TestStatus.FAIL

0 commit comments

Comments
 (0)