Skip to content

Commit c123c35

Browse files
committed
thread tool profile into verify and gate csv sep default
1 parent 1c987b0 commit c123c35

File tree

7 files changed

+96
-16
lines changed

7 files changed

+96
-16
lines changed

lib/galaxy/tool_util/verify/__init__.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@
3838
tifffile = None # type: ignore[assignment, unused-ignore]
3939

4040

41+
from packaging.version import Version
42+
4143
from galaxy.tool_util.parser.util import (
4244
DEFAULT_DELTA,
4345
DEFAULT_DELTA_FRAC,
@@ -75,6 +77,7 @@ def verify(
7577
keep_outputs_dir: Optional[str] = None,
7678
verify_extra_files: Optional[Callable] = None,
7779
mode="file",
80+
profile: Optional[str] = None,
7881
):
7982
"""Verify the content of a test output using test definitions described by attributes.
8083
@@ -99,8 +102,10 @@ def get_filename(filename: str) -> str:
99102
assertions = attributes.get("assert_list", None)
100103
if assertions is not None:
101104
try:
102-
# Auto-detect separator based on file type
103-
sep = "," if attributes.get("ftype") == "csv" else "\t"
105+
# Auto-detect separator based on file type for profile >= 26.0
106+
sep: Optional[str] = None
107+
if profile and Version(profile) >= Version("26.0"):
108+
sep = "," if attributes.get("ftype") == "csv" else "\t"
104109
verify_assertions(output_content, attributes["assert_list"], attributes.get("decompress", False), sep=sep)
105110
except AssertionError as err:
106111
errmsg = f"{item_label} different than expected\n"

lib/galaxy/tool_util/verify/_types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
class ToolTestDescriptionDict(TypedDict):
3939
tool_id: str
4040
tool_version: Optional[str]
41+
profile: NotRequired[Optional[str]]
4142
name: str
4243
test_index: int
4344
inputs: ExpandedToolInputsJsonified

lib/galaxy/tool_util/verify/asserts/tabular.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
)
1414
from ._util import _assert_number
1515

16-
Sep = Annotated[str, AssertionParameter("Separator defining columns, default: tab (or comma for csv)")]
16+
Sep = Annotated[
17+
str, AssertionParameter("Separator defining columns, default: tab (or comma for csv with profile >= 26.0)")
18+
]
1719
Comment = Annotated[
1820
str,
1921
AssertionParameter(
@@ -55,8 +57,8 @@ def assert_has_n_columns(
5557
5658
Optionally a column separator (``sep``) and comment character(s)
5759
can be specified (``comment``, default is empty string). The first non-comment
58-
line is used for determining the number of columns. The default separator is
59-
tab for most tabular data types, but comma for csv files.
60+
line is used for determining the number of columns. For tools with profile >= 26.0,
61+
the default separator is tab for most tabular data types, but comma for csv files.
6062
"""
6163
first_line = get_first_line(output, comment)
6264
n_columns = len(first_line.split(sep))

lib/galaxy/tool_util/verify/interactor.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,13 +140,15 @@ class ValidToolTestDict(TypedDict):
140140
error: Literal[False]
141141
tool_id: str
142142
tool_version: str
143+
profile: NotRequired[Optional[str]]
143144
test_index: int
144145

145146

146147
class InvalidToolTestDict(TypedDict):
147148
error: Literal[True]
148149
tool_id: str
149150
tool_version: str
151+
profile: NotRequired[Optional[str]]
150152
test_index: int
151153
inputs: Any
152154
exception: str
@@ -303,7 +305,13 @@ def get_tool_tests(self, tool_id: str, tool_version: Optional[str] = None) -> Li
303305
return response.json()
304306

305307
def verify_output_collection(
306-
self, output_collection_def, output_collection_id, history, tool_id, tool_version=None
308+
self,
309+
output_collection_def,
310+
output_collection_id,
311+
history,
312+
tool_id,
313+
tool_version=None,
314+
profile: Optional[str] = None,
307315
):
308316
data_collection = self._get(
309317
f"dataset_collections/{output_collection_id}", data={"instance_type": "history"}
@@ -319,6 +327,7 @@ def verify_dataset(element, element_attrib, element_outfile):
319327
attributes=element_attrib,
320328
tool_id=tool_id,
321329
tool_version=tool_version,
330+
profile=profile,
322331
)
323332
except AssertionError as e:
324333
raise AssertionError(
@@ -327,7 +336,17 @@ def verify_dataset(element, element_attrib, element_outfile):
327336

328337
verify_collection(output_collection_def, data_collection, verify_dataset)
329338

330-
def verify_output(self, history_id, jobs, output_data, output_testdef, tool_id, maxseconds, tool_version=None):
339+
def verify_output(
340+
self,
341+
history_id,
342+
jobs,
343+
output_data,
344+
output_testdef,
345+
tool_id,
346+
maxseconds,
347+
tool_version=None,
348+
profile: Optional[str] = None,
349+
):
331350
outfile = output_testdef.outfile
332351
attributes = output_testdef.attributes
333352
name = output_testdef.name
@@ -342,6 +361,7 @@ def verify_output(self, history_id, jobs, output_data, output_testdef, tool_id,
342361
attributes=attributes,
343362
tool_id=tool_id,
344363
tool_version=tool_version,
364+
profile=profile,
345365
)
346366
except AssertionError as e:
347367
raise AssertionError(f"Output {name}: {str(e)}")
@@ -378,6 +398,7 @@ def verify_output(self, history_id, jobs, output_data, output_testdef, tool_id,
378398
primary_attributes,
379399
tool_id=tool_id,
380400
tool_version=tool_version,
401+
profile=profile,
381402
)
382403
except AssertionError as e:
383404
raise AssertionError(f"Primary output {name}: {str(e)}")
@@ -386,7 +407,9 @@ def wait_for_jobs(self, history_id, jobs, maxseconds):
386407
for job in jobs:
387408
self.wait_for_job(job["id"], history_id, maxseconds)
388409

389-
def verify_output_dataset(self, history_id, hda_id, outfile, attributes, tool_id, tool_version=None):
410+
def verify_output_dataset(
411+
self, history_id, hda_id, outfile, attributes, tool_id, tool_version=None, profile: Optional[str] = None
412+
):
390413
fetcher = self.__dataset_fetcher(history_id)
391414
test_data_downloader = self.__test_data_downloader(tool_id, tool_version, attributes)
392415
verify_hid(
@@ -396,6 +419,7 @@ def verify_output_dataset(self, history_id, hda_id, outfile, attributes, tool_id
396419
dataset_fetcher=fetcher,
397420
test_data_downloader=test_data_downloader,
398421
keep_outputs_dir=self.keep_outputs_dir,
422+
profile=profile,
399423
)
400424
self._verify_metadata(history_id, hda_id, attributes)
401425

@@ -1300,6 +1324,7 @@ def verify_hid(
13001324
test_data_downloader,
13011325
dataset_fetcher=None,
13021326
keep_outputs_dir: Optional[str] = None,
1327+
profile: Optional[str] = None,
13031328
):
13041329
assert dataset_fetcher is not None
13051330

@@ -1322,6 +1347,7 @@ def verify_extra_files(extra_files):
13221347
get_filecontent=test_data_downloader,
13231348
keep_outputs_dir=keep_outputs_dir,
13241349
verify_extra_files=verify_extra_files,
1350+
profile=profile,
13251351
)
13261352

13271353

@@ -1757,6 +1783,7 @@ def register_exception(e: Exception):
17571783
tool_id=job["tool_id"],
17581784
maxseconds=maxseconds,
17591785
tool_version=testdef.tool_version,
1786+
profile=testdef.profile,
17601787
)
17611788
except Exception as e:
17621789
register_exception(e)
@@ -1816,7 +1843,11 @@ def register_exception(e: Exception):
18161843
# the job completed so re-hit the API for more information.
18171844
data_collection_id = data_collection_list[name]["id"]
18181845
galaxy_interactor.verify_output_collection(
1819-
output_collection_def, data_collection_id, history, job["tool_id"]
1846+
output_collection_def,
1847+
data_collection_id,
1848+
history,
1849+
job["tool_id"],
1850+
profile=testdef.profile,
18201851
)
18211852
except Exception as e:
18221853
register_exception(e)
@@ -1993,6 +2024,7 @@ class ToolTestDescription:
19932024
name: str
19942025
tool_id: str
19952026
tool_version: Optional[str]
2027+
profile: Optional[str]
19962028
test_index: int
19972029
num_outputs: Optional[int]
19982030
stdout: Optional[AssertionList]
@@ -2041,6 +2073,7 @@ def __init__(self, json_dict: ToolTestDescriptionDict):
20412073
self.request_schema = json_dict.get("request_schema", None)
20422074
self.tool_id = json_dict["tool_id"]
20432075
self.tool_version = json_dict.get("tool_version")
2076+
self.profile = json_dict.get("profile")
20442077
self.maxseconds = json_dict.get("maxseconds")
20452078

20462079
def test_data(self):
@@ -2067,6 +2100,7 @@ def to_dict(self) -> ToolTestDescriptionDict:
20672100
"test_index": self.test_index,
20682101
"tool_id": self.tool_id,
20692102
"tool_version": self.tool_version,
2103+
"profile": self.profile,
20702104
"required_files": self.required_files,
20712105
"required_data_tables": self.required_data_tables,
20722106
"required_loc_files": self.required_loc_files,

lib/galaxy/tool_util/verify/parse.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ def _description_from_tool_source(
144144
required_data_tables,
145145
required_loc_files,
146146
)
147+
profile = tool_source.parse_profile()
147148
processed_test_dict = ValidToolTestDict(
148149
{
149150
"inputs": processed_inputs,
@@ -164,16 +165,19 @@ def _description_from_tool_source(
164165
"required_loc_files": required_loc_files,
165166
"tool_id": tool_id,
166167
"tool_version": tool_version,
168+
"profile": profile,
167169
"test_index": test_index,
168170
"maxseconds": maxseconds,
169171
"error": False,
170172
}
171173
)
172174
except Exception:
175+
profile = tool_source.parse_profile()
173176
processed_test_dict = InvalidToolTestDict(
174177
{
175178
"tool_id": tool_id,
176179
"tool_version": tool_version,
180+
"profile": profile,
177181
"test_index": test_index,
178182
"inputs": {},
179183
"error": True,

lib/galaxy/tool_util/xsd/galaxy.xsd

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2608,8 +2608,8 @@ range of expected occurences can be specified by ``min`` and/or ``max``.
26082608
26092609
Optionally a column separator (``sep``) and comment character(s)
26102610
can be specified (``comment``, default is empty string). The first non-comment
2611-
line is used for determining the number of columns. The default separator is
2612-
tab for most tabular data types, but comma for csv files.
2611+
line is used for determining the number of columns. For tools with profile >= 26.0,
2612+
the default separator is tab for most tabular data types, but comma for csv files.
26132613
26142614
$attribute_list::5]]></xs:documentation>
26152615
</xs:annotation>
@@ -2636,7 +2636,7 @@ $attribute_list::5]]></xs:documentation>
26362636
</xs:attribute>
26372637
<xs:attribute name="sep" type="xs:string" use="optional">
26382638
<xs:annotation>
2639-
<xs:documentation xml:lang="en"><![CDATA[Separator defining columns, default: tab (or comma for csv)]]></xs:documentation>
2639+
<xs:documentation xml:lang="en"><![CDATA[Separator defining columns, default: tab (or comma for csv with profile >= 26.0)]]></xs:documentation>
26402640
</xs:annotation>
26412641
</xs:attribute>
26422642
<xs:attribute name="comment" type="xs:string" use="optional">

test/unit/tool_util/test_verify_function.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,9 @@ def test_sim_size_failure_still_updates(tmp_path):
9595
assert (tmp_path / filename).open("rb").read() == b"expected"
9696

9797

98-
def test_csv_ftype_auto_sep():
99-
"""test that ftype='csv' automatically sets separator for has_n_columns assertion"""
100-
item_label = "csv test"
98+
def test_csv_ftype_auto_sep_profile_26():
99+
"""For profile >= 26.0, ftype='csv' automatically sets separator for has_n_columns."""
100+
item_label = "csv test profile 26.0"
101101
output_content = b"col1,col2,col3\n"
102102
attributes = {
103103
"ftype": "csv",
@@ -110,16 +110,50 @@ def test_csv_ftype_auto_sep():
110110
],
111111
}
112112

113-
# This should pass because ftype="csv" triggers sep="," auto-detection
113+
# With profile >= 26.0, ftype="csv" triggers sep="," auto-detection
114114
verify(
115115
item_label,
116116
output_content,
117117
attributes=attributes,
118118
filename=None,
119119
get_filecontent=t_data_downloader_for(output_content),
120+
profile="26.0",
120121
)
121122

122123

124+
def test_csv_ftype_auto_sep_legacy_profile():
125+
"""Without profile, default behavior still uses tab separator for has_n_columns."""
126+
item_label = "csv test legacy profile"
127+
output_content = b"col1,col2,col3\n"
128+
attributes = {
129+
"ftype": "csv",
130+
"assert_list": [
131+
{
132+
"tag": "has_n_columns",
133+
"attributes": {"n": "3"},
134+
"children": [],
135+
}
136+
],
137+
}
138+
139+
# Without a profile, sep auto-detection is not applied, so the default
140+
# separator remains a tab character. Splitting a comma-separated line on
141+
# tabs yields 1 column instead of 3, so this should raise an AssertionError.
142+
raised = False
143+
try:
144+
verify(
145+
item_label,
146+
output_content,
147+
attributes=attributes,
148+
filename=None,
149+
get_filecontent=t_data_downloader_for(output_content),
150+
)
151+
except AssertionError:
152+
raised = True
153+
154+
assert raised
155+
156+
123157
def test_tabular_ftype_auto_sep():
124158
"""test that ftype='tabular' uses tab separator for has_n_columns assertion"""
125159
item_label = "tabular test"

0 commit comments

Comments
 (0)