Skip to content

Commit b9d016f

Browse files
authored
Merge pull request #39 from Sage-Bionetworks-Workflows/bwmac/orca-175/add_csv_updater
[ORCA-175] Add `CsvUpdater` and `update_csv` command
2 parents a46b491 + c21bc9b commit b9d016f

File tree

16 files changed

+570
-1
lines changed

16 files changed

+570
-1
lines changed

src/dcqc/main.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from dcqc.suites.suite_abc import SuiteABC
1414
from dcqc.target import SingleTarget
1515
from dcqc.tests.base_test import BaseTest, ExternalTestMixin
16+
from dcqc.updaters import CsvUpdater
1617

1718
# Make commands optional to allow for `dcqc --version`
1819
app = Typer(invoke_without_command=True)
@@ -204,3 +205,15 @@ def qc_file(
204205
report = JsonReport()
205206
suite_json = report.generate(suite)
206207
json.dump(suite_json, sys.stdout, indent=2)
208+
209+
210+
@app.command()
211+
def update_csv(
212+
suites_file: Path = input_path_arg,
213+
input_file: Path = input_path_arg,
214+
output_file: Path = output_path_arg,
215+
):
216+
"""Update input CSV file with dcqc_status column"""
217+
suites = JsonParser.parse_objects(suites_file, SuiteABC)
218+
updater = CsvUpdater(input_file, output_file)
219+
updater.update(suites)

src/dcqc/suites/suite_abc.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,3 +291,9 @@ def from_dict(cls, dictionary: SerializedObject) -> SuiteABC:
291291
def get_base_class(cls):
292292
"""Retrieve base class."""
293293
return SuiteABC
294+
295+
def get_status(self) -> SuiteStatus:
296+
"""Compute (if applicable) and return the suite status."""
297+
if self._status == SuiteStatus.NONE:
298+
self._status = self.compute_status()
299+
return self._status

src/dcqc/updaters.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
from collections import defaultdict
2+
from csv import DictWriter
3+
from dataclasses import dataclass
4+
from pathlib import Path
5+
from typing import List
6+
7+
from dcqc.parsers import CsvParser
8+
from dcqc.suites.suite_abc import SuiteABC
9+
10+
11+
@dataclass
12+
class CsvUpdater:
13+
input_path: Path
14+
output_path: Path
15+
parser: CsvParser
16+
17+
def __init__(self, input_path: Path, output_path: Path):
18+
self.output_path = output_path
19+
self.input_path = input_path
20+
21+
def update(self, suites: List[SuiteABC]):
22+
suite_dict = defaultdict(list)
23+
# {url: [list_of_statuses]} data structure to allow for multi-file targets
24+
# TODO add support for suites with multiple files in them (multi)
25+
for suite in suites:
26+
url = suite.target.files[0].url
27+
status = suite.get_status()
28+
suite_dict[url].append(status.value)
29+
# Evaluate dcqc_status for each url
30+
collapsed_dict = {}
31+
for url, statuses in suite_dict.items():
32+
if "RED" in statuses:
33+
collapsed_dict[url] = "RED"
34+
elif "AMBER" in statuses:
35+
collapsed_dict[url] = "AMBER"
36+
elif "GREEN" in statuses:
37+
collapsed_dict[url] = "GREEN"
38+
else:
39+
collapsed_dict[url] = "NONE"
40+
# Create CSV data structure
41+
row_list = []
42+
parser = CsvParser(self.input_path)
43+
for _, csv_data in parser.list_rows():
44+
csv_data["dcqc_status"] = collapsed_dict[csv_data["url"]]
45+
row_list.append(csv_data)
46+
47+
if row_list:
48+
keys = row_list[0].keys()
49+
# Export updated CSV
50+
self.output_path.parent.mkdir(parents=True, exist_ok=True)
51+
with open(
52+
str(self.output_path), "w+", newline="", encoding="utf-8"
53+
) as output_file:
54+
dict_writer = DictWriter(output_file, keys)
55+
dict_writer.writeheader()
56+
dict_writer.writerows(row_list)
57+
else:
58+
raise ValueError("No rows found in input CSV")

tests/conftest.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,13 @@
1010
from datetime import datetime
1111
from getpass import getuser
1212
from pathlib import Path
13+
from unittest.mock import MagicMock
1314
from uuid import uuid4
1415

1516
import pytest
1617

1718
from dcqc.file import File
18-
from dcqc.suites.suite_abc import SuiteABC
19+
from dcqc.suites.suite_abc import SuiteABC, SuiteStatus
1920
from dcqc.target import SingleTarget
2021

2122
CNFPATH = Path(__file__).resolve()
@@ -129,3 +130,36 @@ def _get_output(filename: str) -> Path:
129130
return output
130131

131132
yield _get_output
133+
134+
135+
@pytest.fixture
136+
def mocked_suites_single_targets():
137+
mock_dict_single = {
138+
"syn://syn51585496": SuiteStatus.GREEN,
139+
"syn://syn51585494": SuiteStatus.RED,
140+
"syn://syn51585495": SuiteStatus.AMBER,
141+
"syn://syn51585493": SuiteStatus.NONE,
142+
}
143+
mocked_suites = []
144+
for url, status in mock_dict_single.items():
145+
suite = MagicMock(cls=SuiteABC)
146+
suite.target.files[0].url = url
147+
suite.get_status.return_value = status
148+
mocked_suites.append(suite)
149+
return mocked_suites
150+
151+
152+
# @pytest.fixture
153+
# def mocked_suites_multi_targets():
154+
# mock_dict_multi = {
155+
# "syn://syn51585496": SuiteStatus.GREEN,
156+
# "syn://syn51585494": SuiteStatus.RED,
157+
# "syn://syn51585495": SuiteStatus.AMBER,
158+
# }
159+
# mocked_suites = []
160+
# for url, status in mock_dict_multi.items():
161+
# suite = MagicMock(cls=SuiteABC)
162+
# suite.target.files[0].url = url
163+
# suite.get_status.return_value = status
164+
# mocked_suites.append(suite)
165+
# return mocked_suites

tests/data/empty_input.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
url,file_type,md5_checksum

tests/data/generate.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from dcqc import tests
1212
from dcqc.file import File
1313
from dcqc.mixins import SerializableMixin
14+
from dcqc.parsers import JsonParser
1415
from dcqc.reports import JsonReport
1516
from dcqc.suites.suite_abc import SuiteABC
1617
from dcqc.target import SingleTarget
@@ -60,3 +61,15 @@ def export(obj: SerializableMixin | Sequence[SerializableMixin], filename: str):
6061
skipped_tests = ["LibTiffInfoTest"]
6162
suite = SuiteABC.from_tests(suite_tests, required_tests, skipped_tests)
6263
export(suite, "suite.json")
64+
65+
# suites.json
66+
input_jsons = [
67+
Path(file_path)
68+
for file_path in [
69+
"tests/data/suites_files/suites_1.json",
70+
"tests/data/suites_files/suites_2.json",
71+
"tests/data/suites_files/suites_3.json",
72+
]
73+
]
74+
suites = [JsonParser.parse_object(json_, SuiteABC) for json_ in input_jsons]
75+
export(suites, "suites.json")

tests/data/input.csv

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
url,file_type,md5_checksum
2+
syn://syn51585496,TXT,38b86a456d1f441008986c6f798d5ef9
3+
syn://syn51585494,TXT,a542e9b744bedcfd874129ab0f98c4ff
4+
syn://syn51585495,TIFF,38b86a456d1f441008986c6f798d5ef9

tests/data/suites.json

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
[
2+
{
3+
"type": "TiffSuite",
4+
"target": {
5+
"id": "0001",
6+
"files": [
7+
{
8+
"url": "syn://syn51585496",
9+
"metadata": {
10+
"md5_checksum": "c7b08f6decb5e7572efbe6074926a843"
11+
},
12+
"type": "TIFF",
13+
"name": "circuit.tif",
14+
"local_path": "/tmp/dcqc-staged-7onezxv1/circuit.tif"
15+
}
16+
],
17+
"type": "SingleTarget"
18+
},
19+
"suite_status": {
20+
"required_tests": [
21+
"Md5ChecksumTest",
22+
"FileExtensionTest",
23+
"LibTiffInfoTest"
24+
],
25+
"skipped_tests": [],
26+
"status": "GREEN"
27+
},
28+
"tests": [
29+
{
30+
"type": "FileExtensionTest",
31+
"tier": 1,
32+
"is_external_test": false,
33+
"status": "passed"
34+
},
35+
{
36+
"type": "GrepDateTest",
37+
"tier": 4,
38+
"is_external_test": true,
39+
"status": "passed"
40+
},
41+
{
42+
"type": "LibTiffInfoTest",
43+
"tier": 2,
44+
"is_external_test": true,
45+
"status": "passed"
46+
},
47+
{
48+
"type": "Md5ChecksumTest",
49+
"tier": 1,
50+
"is_external_test": false,
51+
"status": "passed"
52+
},
53+
{
54+
"type": "TiffTag306DateTimeTest",
55+
"tier": 4,
56+
"is_external_test": true,
57+
"status": "passed"
58+
}
59+
]
60+
},
61+
{
62+
"type": "TiffSuite",
63+
"target": {
64+
"id": "0002",
65+
"files": [
66+
{
67+
"url": "syn://syn51585494",
68+
"metadata": {
69+
"md5_checksum": "9cee1b0e8c4d051fabea82b62ae69404"
70+
},
71+
"type": "TIFF",
72+
"name": "test_contains_word_date.tif",
73+
"local_path": "/tmp/dcqc-staged-ddxo9fx2/test_contains_word_date.tif"
74+
}
75+
],
76+
"type": "SingleTarget"
77+
},
78+
"suite_status": {
79+
"required_tests": [
80+
"Md5ChecksumTest",
81+
"FileExtensionTest",
82+
"LibTiffInfoTest"
83+
],
84+
"skipped_tests": [],
85+
"status": "RED"
86+
},
87+
"tests": [
88+
{
89+
"type": "FileExtensionTest",
90+
"tier": 1,
91+
"is_external_test": false,
92+
"status": "passed"
93+
},
94+
{
95+
"type": "GrepDateTest",
96+
"tier": 4,
97+
"is_external_test": true,
98+
"status": "failed"
99+
},
100+
{
101+
"type": "LibTiffInfoTest",
102+
"tier": 2,
103+
"is_external_test": true,
104+
"status": "failed"
105+
},
106+
{
107+
"type": "Md5ChecksumTest",
108+
"tier": 1,
109+
"is_external_test": false,
110+
"status": "passed"
111+
},
112+
{
113+
"type": "TiffTag306DateTimeTest",
114+
"tier": 4,
115+
"is_external_test": true,
116+
"status": "passed"
117+
}
118+
]
119+
},
120+
{
121+
"type": "TiffSuite",
122+
"target": {
123+
"id": "0003",
124+
"files": [
125+
{
126+
"url": "syn://syn51585495",
127+
"metadata": {
128+
"md5_checksum": "28a9ee7d0e994d494068ce8d6cda0268"
129+
},
130+
"type": "TIFF",
131+
"name": "test_image_dirty_datetime.tif",
132+
"local_path": "/tmp/dcqc-staged-5m6d8fdj/test_image_dirty_datetime.tif"
133+
}
134+
],
135+
"type": "SingleTarget"
136+
},
137+
"suite_status": {
138+
"required_tests": [
139+
"Md5ChecksumTest",
140+
"FileExtensionTest",
141+
"LibTiffInfoTest"
142+
],
143+
"skipped_tests": [],
144+
"status": "AMBER"
145+
},
146+
"tests": [
147+
{
148+
"type": "FileExtensionTest",
149+
"tier": 1,
150+
"is_external_test": false,
151+
"status": "passed"
152+
},
153+
{
154+
"type": "GrepDateTest",
155+
"tier": 4,
156+
"is_external_test": true,
157+
"status": "passed"
158+
},
159+
{
160+
"type": "LibTiffInfoTest",
161+
"tier": 2,
162+
"is_external_test": true,
163+
"status": "passed"
164+
},
165+
{
166+
"type": "Md5ChecksumTest",
167+
"tier": 1,
168+
"is_external_test": false,
169+
"status": "passed"
170+
},
171+
{
172+
"type": "TiffTag306DateTimeTest",
173+
"tier": 4,
174+
"is_external_test": true,
175+
"status": "failed"
176+
}
177+
]
178+
}
179+
]

0 commit comments

Comments
 (0)