|
23 | 23 | add_file_stats, |
24 | 24 | add_multiqc_stats, |
25 | 25 | add_samtools_stats, |
| 26 | + add_tataki_edam, |
26 | 27 | add_vcftools_stats, |
27 | 28 | add_workflow_entity, |
28 | 29 | compute_sha256, |
@@ -1716,6 +1717,174 @@ def test_generated_by_has_vcftools_software(self, tmp_path: Path, mocker: "Mocke |
1716 | 1717 | assert generated_by is not None |
1717 | 1718 |
|
1718 | 1719 |
|
| 1720 | +class TestAddTatakiEdam: |
| 1721 | + _TATAKI_JSON = json.dumps( |
| 1722 | + { |
| 1723 | + "/work/result.tsv": { |
| 1724 | + "id": "http://edamontology.org/format_3475", |
| 1725 | + "label": "TSV", |
| 1726 | + "decompressed": {"id": None, "label": None}, |
| 1727 | + }, |
| 1728 | + } |
| 1729 | + ).encode() |
| 1730 | + |
| 1731 | + def _make_run_dir_with_outputs(self, tmp_path: Path, output_files: dict[str, str | bytes]) -> Path: |
| 1732 | + """Create a run directory with output files and generate an RO-Crate.""" |
| 1733 | + return create_run_dir( |
| 1734 | + tmp_path, |
| 1735 | + RUN_ID, |
| 1736 | + exit_code="0", |
| 1737 | + end_time="2024-01-01T00:10:00", |
| 1738 | + wf_params="{}", |
| 1739 | + output_files=output_files, |
| 1740 | + outputs_json=[], |
| 1741 | + ) |
| 1742 | + |
| 1743 | + def test_skips_when_docker_not_available(self, tmp_path: Path) -> None: |
| 1744 | + """Should return immediately when docker binary is not found.""" |
| 1745 | + rd = self._make_run_dir_with_outputs(tmp_path, {"result.tsv": "a\tb\n"}) |
| 1746 | + crate = create_base_crate() |
| 1747 | + with patch("sapporo.ro_crate.shutil.which", return_value=None): |
| 1748 | + add_tataki_edam(crate, rd) |
| 1749 | + # No EDAM entities added |
| 1750 | + edam_entities = [e for e in crate.get_entities() if "edamontology.org" in str(e.id)] |
| 1751 | + assert edam_entities == [] |
| 1752 | + |
| 1753 | + def test_skips_when_outputs_dir_missing(self, tmp_path: Path) -> None: |
| 1754 | + """Should return immediately when outputs directory does not exist.""" |
| 1755 | + rd = create_run_dir(tmp_path, RUN_ID, exit_code="0", end_time="2024-01-01T00:10:00", wf_params="{}") |
| 1756 | + crate = create_base_crate() |
| 1757 | + add_tataki_edam(crate, rd) |
| 1758 | + edam_entities = [e for e in crate.get_entities() if "edamontology.org" in str(e.id)] |
| 1759 | + assert edam_entities == [] |
| 1760 | + |
| 1761 | + def test_skips_when_no_output_files(self, tmp_path: Path) -> None: |
| 1762 | + """Should return immediately when outputs directory is empty.""" |
| 1763 | + rd = create_run_dir(tmp_path, RUN_ID, exit_code="0", end_time="2024-01-01T00:10:00", wf_params="{}") |
| 1764 | + from sapporo.config import RUN_DIR_STRUCTURE |
| 1765 | + |
| 1766 | + rd.joinpath(RUN_DIR_STRUCTURE["outputs_dir"]).mkdir(parents=True, exist_ok=True) |
| 1767 | + crate = create_base_crate() |
| 1768 | + add_tataki_edam(crate, rd) |
| 1769 | + edam_entities = [e for e in crate.get_entities() if "edamontology.org" in str(e.id)] |
| 1770 | + assert edam_entities == [] |
| 1771 | + |
| 1772 | + def test_skips_on_docker_failure(self, tmp_path: Path, mocker: "MockerFixture") -> None: |
| 1773 | + """Should log warning and return when Docker command fails.""" |
| 1774 | + from subprocess import CompletedProcess |
| 1775 | + |
| 1776 | + rd = self._make_run_dir_with_outputs(tmp_path, {"result.tsv": "a\tb\n"}) |
| 1777 | + crate = create_base_crate() |
| 1778 | + mocker.patch("sapporo.ro_crate.shutil.which", return_value="/usr/bin/docker") |
| 1779 | + mocker.patch( |
| 1780 | + "sapporo.ro_crate.subprocess.run", |
| 1781 | + return_value=CompletedProcess(args=[], returncode=1, stdout=b"", stderr=b"image not found"), |
| 1782 | + ) |
| 1783 | + add_tataki_edam(crate, rd) |
| 1784 | + edam_entities = [e for e in crate.get_entities() if "edamontology.org" in str(e.id)] |
| 1785 | + assert edam_entities == [] |
| 1786 | + |
| 1787 | + def test_skips_on_timeout(self, tmp_path: Path, mocker: "MockerFixture") -> None: |
| 1788 | + """Should log warning and return when Docker command times out.""" |
| 1789 | + import subprocess as sp |
| 1790 | + |
| 1791 | + rd = self._make_run_dir_with_outputs(tmp_path, {"result.tsv": "a\tb\n"}) |
| 1792 | + crate = create_base_crate() |
| 1793 | + mocker.patch("sapporo.ro_crate.shutil.which", return_value="/usr/bin/docker") |
| 1794 | + mocker.patch("sapporo.ro_crate.subprocess.run", side_effect=sp.TimeoutExpired(cmd="docker", timeout=300)) |
| 1795 | + add_tataki_edam(crate, rd) |
| 1796 | + edam_entities = [e for e in crate.get_entities() if "edamontology.org" in str(e.id)] |
| 1797 | + assert edam_entities == [] |
| 1798 | + |
| 1799 | + def test_skips_on_invalid_json(self, tmp_path: Path, mocker: "MockerFixture") -> None: |
| 1800 | + """Should log warning and return when tataki output is not valid JSON.""" |
| 1801 | + from subprocess import CompletedProcess |
| 1802 | + |
| 1803 | + rd = self._make_run_dir_with_outputs(tmp_path, {"result.tsv": "a\tb\n"}) |
| 1804 | + crate = create_base_crate() |
| 1805 | + mocker.patch("sapporo.ro_crate.shutil.which", return_value="/usr/bin/docker") |
| 1806 | + mocker.patch( |
| 1807 | + "sapporo.ro_crate.subprocess.run", |
| 1808 | + return_value=CompletedProcess(args=[], returncode=0, stdout=b"not json", stderr=b""), |
| 1809 | + ) |
| 1810 | + add_tataki_edam(crate, rd) |
| 1811 | + edam_entities = [e for e in crate.get_entities() if "edamontology.org" in str(e.id)] |
| 1812 | + assert edam_entities == [] |
| 1813 | + |
| 1814 | + def test_enriches_encoding_format(self, tmp_path: Path, mocker: "MockerFixture") -> None: |
| 1815 | + """Should replace encodingFormat with EDAM entity from tataki.""" |
| 1816 | + from subprocess import CompletedProcess |
| 1817 | + |
| 1818 | + from sapporo.config import RUN_DIR_STRUCTURE |
| 1819 | + |
| 1820 | + rd = self._make_run_dir_with_outputs(tmp_path, {"result.tsv": "col1\tcol2\nval1\tval2\n"}) |
| 1821 | + mocker.patch("sapporo.ro_crate.shutil.which", return_value="/usr/bin/docker") |
| 1822 | + mocker.patch( |
| 1823 | + "sapporo.ro_crate.subprocess.run", |
| 1824 | + return_value=CompletedProcess(args=[], returncode=0, stdout=self._TATAKI_JSON, stderr=b""), |
| 1825 | + ) |
| 1826 | + |
| 1827 | + jsonld = generate_ro_crate_metadata(rd) |
| 1828 | + graph = jsonld["@graph"] |
| 1829 | + |
| 1830 | + # Find the output file entity |
| 1831 | + outputs_prefix = RUN_DIR_STRUCTURE["outputs_dir"] |
| 1832 | + file_entity = next( |
| 1833 | + (e for e in graph if e.get("@id", "").startswith(outputs_prefix) and "result.tsv" in e.get("@id", "")), None |
| 1834 | + ) |
| 1835 | + assert file_entity is not None |
| 1836 | + |
| 1837 | + # encodingFormat should be the EDAM entity reference |
| 1838 | + enc = file_entity["encodingFormat"] |
| 1839 | + assert enc == {"@id": "http://edamontology.org/format_3475"} |
| 1840 | + |
| 1841 | + # EDAM entity should exist in the graph |
| 1842 | + edam_entity = next((e for e in graph if e.get("@id") == "http://edamontology.org/format_3475"), None) |
| 1843 | + assert edam_entity is not None |
| 1844 | + assert edam_entity["@type"] == "Thing" |
| 1845 | + assert edam_entity["name"] == "TSV" |
| 1846 | + |
| 1847 | + def test_skips_undetected_files(self, tmp_path: Path, mocker: "MockerFixture") -> None: |
| 1848 | + """Should not modify encodingFormat when tataki returns null id.""" |
| 1849 | + from subprocess import CompletedProcess |
| 1850 | + |
| 1851 | + from sapporo.config import RUN_DIR_STRUCTURE |
| 1852 | + |
| 1853 | + tataki_null = json.dumps( |
| 1854 | + { |
| 1855 | + "/work/unknown.bin": { |
| 1856 | + "id": None, |
| 1857 | + "label": None, |
| 1858 | + "decompressed": {"id": None, "label": None}, |
| 1859 | + }, |
| 1860 | + } |
| 1861 | + ).encode() |
| 1862 | + |
| 1863 | + rd = self._make_run_dir_with_outputs(tmp_path, {"unknown.bin": "\x00\x01\x02"}) |
| 1864 | + mocker.patch("sapporo.ro_crate.shutil.which", return_value="/usr/bin/docker") |
| 1865 | + mocker.patch( |
| 1866 | + "sapporo.ro_crate.subprocess.run", |
| 1867 | + return_value=CompletedProcess(args=[], returncode=0, stdout=tataki_null, stderr=b""), |
| 1868 | + ) |
| 1869 | + |
| 1870 | + jsonld = generate_ro_crate_metadata(rd) |
| 1871 | + graph = jsonld["@graph"] |
| 1872 | + |
| 1873 | + # File entity should exist with original encodingFormat (not replaced) |
| 1874 | + outputs_prefix = RUN_DIR_STRUCTURE["outputs_dir"] |
| 1875 | + file_entity = next( |
| 1876 | + (e for e in graph if e.get("@id", "").startswith(outputs_prefix) and "unknown.bin" in e.get("@id", "")), |
| 1877 | + None, |
| 1878 | + ) |
| 1879 | + assert file_entity is not None |
| 1880 | + |
| 1881 | + # No EDAM entity from tataki should be in the graph |
| 1882 | + tataki_edam = [ |
| 1883 | + e for e in graph if e.get("@id", "").startswith("http://edamontology.org/format_") and e.get("name") is None |
| 1884 | + ] |
| 1885 | + assert tataki_edam == [] |
| 1886 | + |
| 1887 | + |
1719 | 1888 | class TestAddFileStats: |
1720 | 1889 | def test_skips_when_docker_not_available(self, tmp_path: Path) -> None: |
1721 | 1890 | """Should return immediately when docker binary is not found.""" |
|
0 commit comments