diff --git a/setup.cfg b/setup.cfg index c4e7d23a..eb1ac410 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,7 +4,7 @@ # https://setuptools.pypa.io/en/latest/references/keywords.html [metadata] -version = 1.7.5 +version = 1.8.0 name = dcqc description = Python package for performing quality control (QC) for data coordination (DC) author = Bruno Grande diff --git a/src/dcqc/suites/suites.py b/src/dcqc/suites/suites.py index 70445bb3..8b058be6 100644 --- a/src/dcqc/suites/suites.py +++ b/src/dcqc/suites/suites.py @@ -86,3 +86,4 @@ class H5ADSuite(HDF5Suite): """ file_type = FileType.get_file_type("H5AD") + add_tests = (tests.H5adHtanValidatorTest,) diff --git a/src/dcqc/tests/__init__.py b/src/dcqc/tests/__init__.py index f775fd24..5e21e462 100644 --- a/src/dcqc/tests/__init__.py +++ b/src/dcqc/tests/__init__.py @@ -8,6 +8,7 @@ from dcqc.tests.bioformats_info_test import BioFormatsInfoTest from dcqc.tests.file_extension_test import FileExtensionTest from dcqc.tests.grep_date_test import GrepDateTest +from dcqc.tests.h5ad_htan_validator_test import H5adHtanValidatorTest from dcqc.tests.json_load_test import JsonLoadTest from dcqc.tests.jsonld_load_test import JsonLdLoadTest from dcqc.tests.libtiff_info_test import LibTiffInfoTest diff --git a/src/dcqc/tests/h5ad_htan_validator_test.py b/src/dcqc/tests/h5ad_htan_validator_test.py new file mode 100644 index 00000000..3aa5f629 --- /dev/null +++ b/src/dcqc/tests/h5ad_htan_validator_test.py @@ -0,0 +1,29 @@ +from dcqc.target import SingleTarget +from dcqc.tests.base_test import ExternalBaseTest, Process, TestTier + + +class H5adHtanValidatorTest(ExternalBaseTest): + """ + Based on [HTAN H5AD Validator](https://github.com/ncihtan/h5ad) + This is an h5ad validator for HTAN Phase 2 single + cell/single nuclei RNA-sequencing data. + """ + + tier = TestTier.INTERNAL_CONFORMANCE + pass_code = 0 + fail_code = 1 + failure_reason_location = "std_out" + target: SingleTarget + + def generate_process(self) -> Process: + path = self.target.file.stage() + command_args = [ + "python", + "/usr/local/bin/h5ad.py", + f"'{path.name}'", + ] + process = Process( + container="ghcr.io/sage-bionetworks-workflows/htan-h5ad-validator:0.1.1", + command_args=command_args, + ) + return process diff --git a/tests/conftest.py b/tests/conftest.py index 731b5898..852b1a26 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -65,6 +65,8 @@ def test_files(get_data): tiff_dirty_datetime_path = get_data("test_image_dirty_datetime.tif") tiff_date_in_tag_path = get_data("date_tag.tif") invalid_xml_ome_tiff_path = get_data("invalid_xml.ome.tif") + htan_good_h5ad_path = get_data("htan_good.h5ad") + htan_bad_h5ad_path = get_data("htan_bad.h5ad") invalid_xml_metadata = { "file_type": "tiff", "md5_checksum": "a2550a887091d51351d547c8beae8f0c", @@ -98,6 +100,14 @@ def test_files(get_data): "file_type": "tiff", "md5_checksum": "28a9ee7d0e994d494068ce8d6cda0268", } + htan_good_h5ad_metadata = { + "file_type": "h5ad", + "md5_checksum": "ba11278e2c19b10851a66587e17ec97f", + } + htan_bad_h5ad_metadata = { + "file_type": "h5ad", + "md5_checksum": "e6f5e1a812c699af489352652cf0238d", + } test_files = { "date_in_tag_tiff": File(tiff_date_in_tag_path.as_posix(), tiff_metadata), "good_txt": File(txt_path.as_posix(), good_metadata), @@ -115,6 +125,8 @@ def test_files(get_data): "dirty_datetime_in_tag_tiff": File( tiff_dirty_datetime_path.as_posix(), tiff_dirty_datetime_metadata ), + "htan_good_h5ad": File(htan_good_h5ad_path.as_posix(), htan_good_h5ad_metadata), + "htan_bad_h5ad": File(htan_bad_h5ad_path.as_posix(), htan_bad_h5ad_metadata), } # Create an in-memory remote file based on the good file diff --git a/tests/data/htan_bad.h5ad b/tests/data/htan_bad.h5ad new file mode 100644 index 00000000..cda3ba8e Binary files /dev/null and b/tests/data/htan_bad.h5ad differ diff --git a/tests/data/htan_good.h5ad b/tests/data/htan_good.h5ad new file mode 100644 index 00000000..e8d4bfbf Binary files /dev/null and b/tests/data/htan_good.h5ad differ diff --git a/tests/test_external_tests.py b/tests/test_external_tests.py index 05347b35..14432f4b 100644 --- a/tests/test_external_tests.py +++ b/tests/test_external_tests.py @@ -437,3 +437,85 @@ def test_that_the_tiffdatetimetest_correctly_interprets_exit_code_0_and_1( ) test_status = test.get_status() assert test_status == TestStatus.FAIL + + +class TestH5adHtanValidatorTest: + @pytest.fixture(scope="function", autouse=True) + def setup_method(self, test_targets): + self.good_h5ad_target = test_targets["htan_good_h5ad"] + self.good_h5ad_test = tests.H5adHtanValidatorTest(self.good_h5ad_target) + self.bad_h5ad_target = test_targets["htan_bad_h5ad"] + self.bad_h5ad_test = tests.H5adHtanValidatorTest(self.bad_h5ad_target) + self.txt_target = test_targets["good_txt"] + self.txt_test = tests.H5adHtanValidatorTest(self.txt_target) + + def test_that_the_command_is_produced(self): + process = self.good_h5ad_test.generate_process() + assert "/usr/local/bin/h5ad.py" in process.command + + @docker_enabled_test + def test_that_the_exit_code_is_1_when_it_should_be(self): + process = self.bad_h5ad_test.generate_process() + executor = DockerExecutor( + process.container, process.command, self.bad_h5ad_target.file.url + ) + executor.execute() + assert "File: htan_bad.h5ad" in executor.std_out + assert "Cellxgene run has errors. " in executor.std_out + assert "HTAN Validation Failed." in executor.std_out + assert "'cellxgene-schema output: Starting validation" in executor.std_out + assert "HTAN-specific Validation Errors:" in executor.std_out + assert executor.exit_code == "1" + + @docker_enabled_test + def test_that_the_exit_code_is_0_when_it_should_be(self): + process = self.good_h5ad_test.generate_process() + executor = DockerExecutor( + process.container, process.command, self.good_h5ad_target.file.url + ) + executor.execute() + assert "File: htan_good.h5ad" in executor.std_out + assert "Cellxgene run successful." in executor.std_out + assert "Validation Passed!" in executor.std_out + assert executor.exit_code == "0" + + @docker_enabled_test + def test_that_wrong_file_type_has_1_exit_code(self): + process = self.txt_test.generate_process() + executor = DockerExecutor( + process.container, process.command, self.txt_target.file.url + ) + executor.execute() + print(executor.std_out) + assert executor.std_out == ( + "HTAN h5ad File Validator\n" + "File: test.txt\n" + "An error occurred while trying to open test.txt\n" + "Unable to synchronously open file (file signature not found)\n" + "\n" + ) + assert executor.exit_code == "1" + + def test_that_the_test_correctly_interprets_exit_code_0_and_1(self, mocker): + # 1 is pass, 0 is fail + with TemporaryDirectory() as tmp_dir: + path_0 = Path(tmp_dir, "code_0.txt") + path_1 = Path(tmp_dir, "code_1.txt") + path_0.write_text("0") + path_1.write_text("1") + fail_outputs = {"std_out": path_1, "std_err": path_1, "exit_code": path_0} + pass_outputs = {"std_out": path_0, "std_err": path_0, "exit_code": path_1} + + test = tests.TiffDateTimeTest(self.good_h5ad_target) + mocker.patch.object( + test, "_find_process_outputs", return_value=pass_outputs + ) + test_status = test.get_status() + assert test_status == TestStatus.PASS + + test = tests.TiffDateTimeTest(self.bad_h5ad_target) + mocker.patch.object( + test, "_find_process_outputs", return_value=fail_outputs + ) + test_status = test.get_status() + assert test_status == TestStatus.FAIL