thread tool profile into verify and gate csv sep default

d-callan · d-callan · commit c123c3556616 · 2025-12-08T23:43:16.000-05:00
diff --git a/lib/galaxy/tool_util/verify/__init__.py b/lib/galaxy/tool_util/verify/__init__.py
@@ -38,6 +38,8 @@
     tifffile = None  # type: ignore[assignment, unused-ignore]
 
 
+from packaging.version import Version
+
 from galaxy.tool_util.parser.util import (
     DEFAULT_DELTA,
     DEFAULT_DELTA_FRAC,
@@ -75,6 +77,7 @@ def verify(
     keep_outputs_dir: Optional[str] = None,
     verify_extra_files: Optional[Callable] = None,
     mode="file",
+    profile: Optional[str] = None,
 ):
     """Verify the content of a test output using test definitions described by attributes.
 
@@ -99,8 +102,10 @@ def get_filename(filename: str) -> str:
     assertions = attributes.get("assert_list", None)
     if assertions is not None:
         try:
-            # Auto-detect separator based on file type
-            sep = "," if attributes.get("ftype") == "csv" else "\t"
+            # Auto-detect separator based on file type for profile >= 26.0
+            sep: Optional[str] = None
+            if profile and Version(profile) >= Version("26.0"):
+                sep = "," if attributes.get("ftype") == "csv" else "\t"
             verify_assertions(output_content, attributes["assert_list"], attributes.get("decompress", False), sep=sep)
         except AssertionError as err:
             errmsg = f"{item_label} different than expected\n"
diff --git a/lib/galaxy/tool_util/verify/_types.py b/lib/galaxy/tool_util/verify/_types.py
@@ -38,6 +38,7 @@
 class ToolTestDescriptionDict(TypedDict):
     tool_id: str
     tool_version: Optional[str]
+    profile: NotRequired[Optional[str]]
     name: str
     test_index: int
     inputs: ExpandedToolInputsJsonified
diff --git a/lib/galaxy/tool_util/verify/asserts/tabular.py b/lib/galaxy/tool_util/verify/asserts/tabular.py
@@ -13,7 +13,9 @@
 )
 from ._util import _assert_number
 
-Sep = Annotated[str, AssertionParameter("Separator defining columns, default: tab (or comma for csv)")]
+Sep = Annotated[
+    str, AssertionParameter("Separator defining columns, default: tab (or comma for csv with profile >= 26.0)")
+]
 Comment = Annotated[
     str,
     AssertionParameter(
@@ -55,8 +57,8 @@ def assert_has_n_columns(
 
     Optionally a column separator (``sep``) and comment character(s)
     can be specified (``comment``, default is empty string). The first non-comment
-    line is used for determining the number of columns. The default separator is
-    tab for most tabular data types, but comma for csv files.
+    line is used for determining the number of columns. For tools with profile >= 26.0,
+    the default separator is tab for most tabular data types, but comma for csv files.
     """
     first_line = get_first_line(output, comment)
     n_columns = len(first_line.split(sep))
diff --git a/lib/galaxy/tool_util/verify/interactor.py b/lib/galaxy/tool_util/verify/interactor.py
@@ -140,13 +140,15 @@ class ValidToolTestDict(TypedDict):
     error: Literal[False]
     tool_id: str
     tool_version: str
+    profile: NotRequired[Optional[str]]
     test_index: int
 
 
 class InvalidToolTestDict(TypedDict):
     error: Literal[True]
     tool_id: str
     tool_version: str
+    profile: NotRequired[Optional[str]]
     test_index: int
     inputs: Any
     exception: str
@@ -303,7 +305,13 @@ def get_tool_tests(self, tool_id: str, tool_version: Optional[str] = None) -> Li
         return response.json()
 
     def verify_output_collection(
-        self, output_collection_def, output_collection_id, history, tool_id, tool_version=None
+        self,
+        output_collection_def,
+        output_collection_id,
+        history,
+        tool_id,
+        tool_version=None,
+        profile: Optional[str] = None,
     ):
         data_collection = self._get(
             f"dataset_collections/{output_collection_id}", data={"instance_type": "history"}
@@ -319,6 +327,7 @@ def verify_dataset(element, element_attrib, element_outfile):
                     attributes=element_attrib,
                     tool_id=tool_id,
                     tool_version=tool_version,
+                    profile=profile,
                 )
             except AssertionError as e:
                 raise AssertionError(
@@ -327,7 +336,17 @@ def verify_dataset(element, element_attrib, element_outfile):
 
         verify_collection(output_collection_def, data_collection, verify_dataset)
 
-    def verify_output(self, history_id, jobs, output_data, output_testdef, tool_id, maxseconds, tool_version=None):
+    def verify_output(
+        self,
+        history_id,
+        jobs,
+        output_data,
+        output_testdef,
+        tool_id,
+        maxseconds,
+        tool_version=None,
+        profile: Optional[str] = None,
+    ):
         outfile = output_testdef.outfile
         attributes = output_testdef.attributes
         name = output_testdef.name
@@ -342,6 +361,7 @@ def verify_output(self, history_id, jobs, output_data, output_testdef, tool_id,
                 attributes=attributes,
                 tool_id=tool_id,
                 tool_version=tool_version,
+                profile=profile,
             )
         except AssertionError as e:
             raise AssertionError(f"Output {name}: {str(e)}")
@@ -378,6 +398,7 @@ def verify_output(self, history_id, jobs, output_data, output_testdef, tool_id,
                     primary_attributes,
                     tool_id=tool_id,
                     tool_version=tool_version,
+                    profile=profile,
                 )
             except AssertionError as e:
                 raise AssertionError(f"Primary output {name}: {str(e)}")
@@ -386,7 +407,9 @@ def wait_for_jobs(self, history_id, jobs, maxseconds):
         for job in jobs:
             self.wait_for_job(job["id"], history_id, maxseconds)
 
-    def verify_output_dataset(self, history_id, hda_id, outfile, attributes, tool_id, tool_version=None):
+    def verify_output_dataset(
+        self, history_id, hda_id, outfile, attributes, tool_id, tool_version=None, profile: Optional[str] = None
+    ):
         fetcher = self.__dataset_fetcher(history_id)
         test_data_downloader = self.__test_data_downloader(tool_id, tool_version, attributes)
         verify_hid(
@@ -396,6 +419,7 @@ def verify_output_dataset(self, history_id, hda_id, outfile, attributes, tool_id
             dataset_fetcher=fetcher,
             test_data_downloader=test_data_downloader,
             keep_outputs_dir=self.keep_outputs_dir,
+            profile=profile,
         )
         self._verify_metadata(history_id, hda_id, attributes)
 
@@ -1300,6 +1324,7 @@ def verify_hid(
     test_data_downloader,
     dataset_fetcher=None,
     keep_outputs_dir: Optional[str] = None,
+    profile: Optional[str] = None,
 ):
     assert dataset_fetcher is not None
 
@@ -1322,6 +1347,7 @@ def verify_extra_files(extra_files):
         get_filecontent=test_data_downloader,
         keep_outputs_dir=keep_outputs_dir,
         verify_extra_files=verify_extra_files,
+        profile=profile,
     )
 
 
@@ -1757,6 +1783,7 @@ def register_exception(e: Exception):
                     tool_id=job["tool_id"],
                     maxseconds=maxseconds,
                     tool_version=testdef.tool_version,
+                    profile=testdef.profile,
                 )
             except Exception as e:
                 register_exception(e)
@@ -1816,7 +1843,11 @@ def register_exception(e: Exception):
             # the job completed so re-hit the API for more information.
             data_collection_id = data_collection_list[name]["id"]
             galaxy_interactor.verify_output_collection(
-                output_collection_def, data_collection_id, history, job["tool_id"]
+                output_collection_def,
+                data_collection_id,
+                history,
+                job["tool_id"],
+                profile=testdef.profile,
             )
         except Exception as e:
             register_exception(e)
@@ -1993,6 +2024,7 @@ class ToolTestDescription:
     name: str
     tool_id: str
     tool_version: Optional[str]
+    profile: Optional[str]
     test_index: int
     num_outputs: Optional[int]
     stdout: Optional[AssertionList]
@@ -2041,6 +2073,7 @@ def __init__(self, json_dict: ToolTestDescriptionDict):
         self.request_schema = json_dict.get("request_schema", None)
         self.tool_id = json_dict["tool_id"]
         self.tool_version = json_dict.get("tool_version")
+        self.profile = json_dict.get("profile")
         self.maxseconds = json_dict.get("maxseconds")
 
     def test_data(self):
@@ -2067,6 +2100,7 @@ def to_dict(self) -> ToolTestDescriptionDict:
             "test_index": self.test_index,
             "tool_id": self.tool_id,
             "tool_version": self.tool_version,
+            "profile": self.profile,
             "required_files": self.required_files,
             "required_data_tables": self.required_data_tables,
             "required_loc_files": self.required_loc_files,
diff --git a/lib/galaxy/tool_util/verify/parse.py b/lib/galaxy/tool_util/verify/parse.py
@@ -144,6 +144,7 @@ def _description_from_tool_source(
             required_data_tables,
             required_loc_files,
         )
+        profile = tool_source.parse_profile()
         processed_test_dict = ValidToolTestDict(
             {
                 "inputs": processed_inputs,
@@ -164,16 +165,19 @@ def _description_from_tool_source(
                 "required_loc_files": required_loc_files,
                 "tool_id": tool_id,
                 "tool_version": tool_version,
+                "profile": profile,
                 "test_index": test_index,
                 "maxseconds": maxseconds,
                 "error": False,
             }
         )
     except Exception:
+        profile = tool_source.parse_profile()
         processed_test_dict = InvalidToolTestDict(
             {
                 "tool_id": tool_id,
                 "tool_version": tool_version,
+                "profile": profile,
                 "test_index": test_index,
                 "inputs": {},
                 "error": True,
diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd
@@ -2608,8 +2608,8 @@ range of expected occurences can be specified by ``min`` and/or ``max``.
 
 Optionally a column separator (``sep``) and comment character(s)
 can be specified (``comment``, default is empty string). The first non-comment
-line is used for determining the number of columns. The default separator is
-tab for most tabular data types, but comma for csv files.
+line is used for determining the number of columns. For tools with profile >= 26.0,
+the default separator is tab for most tabular data types, but comma for csv files.
 
 $attribute_list::5]]></xs:documentation>
         </xs:annotation>
@@ -2636,7 +2636,7 @@ $attribute_list::5]]></xs:documentation>
           </xs:attribute>
           <xs:attribute name="sep" type="xs:string" use="optional">
             <xs:annotation>
-              <xs:documentation xml:lang="en"><![CDATA[Separator defining columns, default: tab (or comma for csv)]]></xs:documentation>
+              <xs:documentation xml:lang="en"><![CDATA[Separator defining columns, default: tab (or comma for csv with profile >= 26.0)]]></xs:documentation>
             </xs:annotation>
           </xs:attribute>
           <xs:attribute name="comment" type="xs:string" use="optional">
diff --git a/test/unit/tool_util/test_verify_function.py b/test/unit/tool_util/test_verify_function.py
@@ -95,9 +95,9 @@ def test_sim_size_failure_still_updates(tmp_path):
     assert (tmp_path / filename).open("rb").read() == b"expected"
 
 
-def test_csv_ftype_auto_sep():
-    """test that ftype='csv' automatically sets separator for has_n_columns assertion"""
-    item_label = "csv test"
+def test_csv_ftype_auto_sep_profile_26():
+    """For profile >= 26.0, ftype='csv' automatically sets separator for has_n_columns."""
+    item_label = "csv test profile 26.0"
     output_content = b"col1,col2,col3\n"
     attributes = {
         "ftype": "csv",
@@ -110,16 +110,50 @@ def test_csv_ftype_auto_sep():
         ],
     }
 
-    # This should pass because ftype="csv" triggers sep="," auto-detection
+    # With profile >= 26.0, ftype="csv" triggers sep="," auto-detection
     verify(
         item_label,
         output_content,
         attributes=attributes,
         filename=None,
         get_filecontent=t_data_downloader_for(output_content),
+        profile="26.0",
     )
 
 
+def test_csv_ftype_auto_sep_legacy_profile():
+    """Without profile, default behavior still uses tab separator for has_n_columns."""
+    item_label = "csv test legacy profile"
+    output_content = b"col1,col2,col3\n"
+    attributes = {
+        "ftype": "csv",
+        "assert_list": [
+            {
+                "tag": "has_n_columns",
+                "attributes": {"n": "3"},
+                "children": [],
+            }
+        ],
+    }
+
+    # Without a profile, sep auto-detection is not applied, so the default
+    # separator remains a tab character. Splitting a comma-separated line on
+    # tabs yields 1 column instead of 3, so this should raise an AssertionError.
+    raised = False
+    try:
+        verify(
+            item_label,
+            output_content,
+            attributes=attributes,
+            filename=None,
+            get_filecontent=t_data_downloader_for(output_content),
+        )
+    except AssertionError:
+        raised = True
+
+    assert raised
+
+
 def test_tabular_ftype_auto_sep():
     """test that ftype='tabular' uses tab separator for has_n_columns assertion"""
     item_label = "tabular test"

Original file line number	Diff line number	Diff line change
`@@ -144,6 +144,7 @@ def _description_from_tool_source(`
`144`	`144`	`required_data_tables,`
`145`	`145`	`required_loc_files,`
`146`	`146`	`)`
	`147`	`+ profile = tool_source.parse_profile()`
`147`	`148`	`processed_test_dict = ValidToolTestDict(`
`148`	`149`	`{`
`149`	`150`	`"inputs": processed_inputs,`
`@@ -164,16 +165,19 @@ def _description_from_tool_source(`
`164`	`165`	`"required_loc_files": required_loc_files,`
`165`	`166`	`"tool_id": tool_id,`
`166`	`167`	`"tool_version": tool_version,`
	`168`	`+ "profile": profile,`
`167`	`169`	`"test_index": test_index,`
`168`	`170`	`"maxseconds": maxseconds,`
`169`	`171`	`"error": False,`
`170`	`172`	`}`
`171`	`173`	`)`
`172`	`174`	`except Exception:`
	`175`	`+ profile = tool_source.parse_profile()`
`173`	`176`	`processed_test_dict = InvalidToolTestDict(`
`174`	`177`	`{`
`175`	`178`	`"tool_id": tool_id,`
`176`	`179`	`"tool_version": tool_version,`
	`180`	`+ "profile": profile,`
`177`	`181`	`"test_index": test_index,`
`178`	`182`	`"inputs": {},`
`179`	`183`	`"error": True,`