New implementation of _sort_field_names based on Python list sorting and unit test (#682)

Damian-AI · web-flow · commit dd2b98a1f7d6 · 2025-07-02T14:02:52.000-06:00
diff --git a/packages/seacas/scripts/exomerge3.py b/packages/seacas/scripts/exomerge3.py
@@ -473,31 +473,27 @@ class ExodusModel(object):
     ELEMENT_ORDER["line3"] = 2
     ELEMENT_ORDER["point"] = 1
 
-    # define components of multi-component fields
-    MULTI_COMPONENT_FIELD_SUBSCRIPTS = dict()
-    MULTI_COMPONENT_FIELD_SUBSCRIPTS["vector"] = ("x", "y", "z")
-    MULTI_COMPONENT_FIELD_SUBSCRIPTS["symmetric_3x3_tensor"] = (
-        "xx",
-        "yy",
-        "zz",
-        "xy",
-        "yz",
-        "zx",
-    )
-    MULTI_COMPONENT_FIELD_SUBSCRIPTS["full_3x3_tensor"] = (
-        "xx",
-        "yy",
-        "zz",
-        "xy",
-        "yz",
-        "zx",
-        "yx",
-        "zy",
-        "xz",
-    )
-    ALL_MULTI_COMPONENT_FIELD_SUBSCRIPTS = set(
-        itertools.chain(*list(MULTI_COMPONENT_FIELD_SUBSCRIPTS.values()))
-    )
+    # A dictionary defining the order of components in multi-component fields.
+    # See "_sort_field_names" method for details.
+    _FIELD_NAME_SUBSCRIPT_ORDER = {
+                    "xx": 1,
+                    "yy": 2,
+                    "zz": 3,
+                    "xy": 4,
+                    "yz": 5,
+                    "zx": 6,
+                    "yx": 7,
+                    "zy": 8,
+                    "xz": 9,
+                    "x": 10,
+                    "y": 11,
+                    "z": 12,
+                }
+
+    # Regular expression used to parse field names. It splits the name into three named groups: base_name, component, and integration_point.
+    # See "_sort_field_names" method for details.
+    _FIELD_NAME_REGEX = re.compile(fr"^(?P<base_name>.*?)(?:[_]?)(?P<component>{'|'.join(_FIELD_NAME_SUBSCRIPT_ORDER.keys())})?(?:[_]?(?P<integration_point>\d+))?$")
+
 
     def __init__(self):
         """Initialize the model."""
@@ -6915,123 +6911,54 @@ def create_timestep(self, timestep):
         for name, values in list(self.global_variables.items()):
             values.insert(timestep_index, self._get_default_field_value(name))
 
-    def _replace_name_case(self, new_list, original_list):
-        """
-        Return the lowercase version of all strings in the given list.
-
-        Example:
-        >>> model._replace_name_case(['x', 'z', 'fred'], ['X', 'Fred', 'Z'])
-        ['X', 'Z', 'Fred']
-
-        """
-        original_case = dict((x.lower(), x) for x in original_list)
-        if len(original_case) != len(original_list):
-            self._warning(
-                "Ambiguous string case.",
-                "There are multiple strings in the list which have "
-                "identical lowercase representations.  One will be "
-                "chosen at random.",
-            )
-        for item in new_list:
-            if item.lower() not in original_case:
-                self._bug(
-                    "Unrecognized string.",
-                    'The string "%s" appears in the new list but '
-                    "not in the original list." % item,
-                )
-        return [original_case[x.lower()] for x in new_list]
-
-    def _sort_field_names(self, original_field_names):
+    def _sort_field_names(self, original_field_names: list[str]) -> list[str]:
         """
         Return field names sorted in a SIERRA-friendly manner.
 
         In order for SIERRA to recognize vectors, tensors, and element fields
         with multiple integration points, fields must be sorted in a specific
-        order.  This function provides that sort order.
+        order. This function provides that sort order.
 
         As fields within exomerge are stored in a set, exomerge has no internal
-        or natural field order.  This routine is only necessary for writing to
+        or natural field order. This routine is only necessary for writing to
         ExodusII files.
 
+        This method recognizes the following field naming patterns:
+
+        - <base_name>_<component>_<integration_point>. E.g. "unrotated_stress_xx_1"
+        - <base_name>_<integration_point>. E.g. "ln_strain_1"
+        - <base_name>_<component>. E.g. "Displacement_X" or "SIGMA_XX"
+        - <base_name>. E.g. "temperature"
+
+        Same patterns but omitting the underscore are also recognized:
+
+        - <base_name><component><integration_point>. E.g. "unrotated_stressxx1"
+        - <base_name><integration_point>. E.g. "ln_strain1"
+        - <base_name><component>. E.g. "DisplacementX" or "SIGMAXX"
+        - <base_name>. E.g. "temperature"
+
+        The sorting is done by the base name (alphabetically), then by the
+        integration point (1, 2, 3), and finally by the component
+        (according to the "_FIELD_NAME_SUBSCRIPT_ORDER" dictionary).
         """
-        field_names = [x.lower() for x in original_field_names]
-        # Look through all fields to find multi-component fields and store
-        # these as tuples of the following form:
-        # ('base_name', 'component', integration_points)
-        multicomponent_fields = set()
-        for name in field_names:
-            # see if it has an integration point
-            if re.match(".*_[0-9]+$", name):
-                (name, integration_point) = name.rsplit("_", 1)
-                integration_point = int(integration_point)
-            else:
-                integration_point = None
-            # see if it possibly has a component
-            if re.match(".*_.+$", name):
-                component = name.rsplit("_", 1)[1]
-                if component in self.ALL_MULTI_COMPONENT_FIELD_SUBSCRIPTS:
-                    name = name.rsplit("_", 1)[0]
-                    multicomponent_fields.add((name, component, integration_point))
-        # now sort multi-component fields
-        base_names = set(x for x, _, _ in multicomponent_fields)
-        sorted_field_names = dict()
-        field_names = set(field_names)
-        for base_name in base_names:
-            # find all components of this form
-            components = set(
-                x for name, x, _ in multicomponent_fields if name == base_name
-            )
-            # find max integration point value
-            integration_points = set(
-                x
-                for name, _, x in multicomponent_fields
-                if name == base_name and x is not None
-            )
-            if integration_points:
-                integration_point_count = max(
-                    x
-                    for name, _, x in multicomponent_fields
-                    if name == base_name and x is not None
-                )
-            else:
-                integration_point_count = None
 
-            # see if the components match the form of something
-            matching_form = None
-            for form, included_components in list(
-                self.MULTI_COMPONENT_FIELD_SUBSCRIPTS.items()
-            ):
-                if set(included_components) == components:
-                    matching_form = form
-            if not matching_form:
-                continue
-            # see if all components and integration points are present
-            mid = [
-                "_" + x for x in self.MULTI_COMPONENT_FIELD_SUBSCRIPTS[matching_form]
-            ]
-            if integration_point_count is None:
-                last = [""]
-            else:
-                last = ["_" + str(x + 1) for x in range(integration_point_count)]
-            all_names = [base_name + m + s for s in last for m in mid]
-            if set(all_names).issubset(field_names):
-                sorted_field_names[all_names[0]] = all_names
-                field_names = field_names - set(all_names)
-        # sort field names which are not part of multicomponent fields
-        field_names = sorted(field_names)
-        # for each list of field names, find place to splice into list
-        place_to_insert = dict()
-        for name in list(sorted_field_names.keys()):
-            place = bisect.bisect_left(field_names, name)
-            if place not in place_to_insert:
-                place_to_insert[place] = [name]
-            else:
-                place_to_insert[place].append(name)
-        # splice them in
-        for place in sorted(list(place_to_insert.keys()), reverse=True):
-            for name in place_to_insert[place]:
-                field_names[place:place] = sorted_field_names[name]
-        return self._replace_name_case(field_names, original_field_names)
+        def _sorting_key(elem: str) -> tuple[str, int, int]:
+            """This inner function transforms each element of the list "original_field_names"
+            into another element that will be used for sorting purposes
+            """
+
+            match = self._FIELD_NAME_REGEX.match(elem.lower()).groupdict()  # type: ignore
+
+            base_name = str(match["base_name"])
+            integration_point = int(match["integration_point"]) if match["integration_point"] is not None else 0
+
+            # Transform the component to a letter according to the _FIELD_NAME_SUBSCRIPT_ORDER
+            component = self._FIELD_NAME_SUBSCRIPT_ORDER[match["component"]] if match["component"] is not None else 0
+
+            return (base_name, integration_point, component)
+
+        original_field_names.sort(key=_sorting_key)
+        return original_field_names
 
     def _reorder_list(self, the_list, new_index):
         """
diff --git a/packages/seacas/scripts/tests/exomerge_unit_test.py b/packages/seacas/scripts/tests/exomerge_unit_test.py
@@ -603,7 +603,7 @@ def _topology_test(self):
     # Tests should return None if successful (no return statement needed)
     # Tests should return False if the test was unable to be run.
     # Tests should raise an exception or exit(1) if unsuccessful.
-
+    
     def _test_calculate_element_volumes(self):
         ids = self.model._get_standard_element_block_ids()
         if not ids:
@@ -1874,8 +1874,47 @@ def test(self):
         print("\nSuccess")
 
 
+    # The following functions are unit tests for private functions of exomerge.
+    def _test_sort_field_names(self):
+        """Unittest for _sort_field_names method.
+
+        In this test, we will create a list of field names that are sorted according to
+        SIERRA conventions, then randomly shuffle them to simulate unsorted input.
+
+        Both naming conventions with and without underscores will be tested.
+        """
+
+        # List of all possible field names sorted according to SIERRA conventions.
+        sorted_names = [
+            "Displacement_X", "Displacement_Y", "Displacement_Z",
+            "ln_strain_1", "ln_strain_2", "ln_strain_3", "ln_strain_4",  # scalar field defined in integration points
+            "SIGMA_XX", "SIGMA_YY", "SIGMA_ZZ", "SIGMA_XY", "SIGMA_YZ", "SIGMA_ZX", "SIGMA_YX", "SIGMA_ZY", "SIGMA_XZ", # asymmetric tensor
+            "unrotated_stress_xx_1", "unrotated_stress_yy_1", "unrotated_stress_zz_1", "unrotated_stress_xy_1", "unrotated_stress_yz_1", "unrotated_stress_zx_1",  # Symmetric tensor with integration points
+            "unrotated_stress_xx_2", "unrotated_stress_yy_2", "unrotated_stress_zz_2", "unrotated_stress_xy_2", "unrotated_stress_yz_2", "unrotated_stress_zx_2",
+            "unrotated_stress_xx_3", "unrotated_stress_yy_3", "unrotated_stress_zz_3", "unrotated_stress_xy_3", "unrotated_stress_yz_3", "unrotated_stress_zx_3",
+            "unrotated_stress_xx_12", "unrotated_stress_yy_12", "unrotated_stress_zz_12", "unrotated_stress_xy_12", "unrotated_stress_yz_12", "unrotated_stress_zx_12",  # Try with a number bigger than 9
+            "velocity"  # scalar field
+        ]
+
+        # Randomly shuffle the names to simulate unsorted input
+        unsorted_names = sorted_names.copy()
+        random.shuffle(unsorted_names)
+        assert sorted_names == self.model._sort_field_names(unsorted_names), "Failed to sort names with underscores.\nExpected: {}\nGot: {}".format(
+            sorted_names, self.model._sort_field_names(unsorted_names)
+        )
+
+        # Test sorting names without underscores
+        sorted_names_no_underscores = [name.replace("_", "") for name in sorted_names]
+        unsorted_names_no_underscores = sorted_names_no_underscores.copy()
+        random.shuffle(unsorted_names_no_underscores)
+        assert sorted_names_no_underscores == self.model._sort_field_names(unsorted_names_no_underscores), "Failed to sort names without underscores. \nExpected: {}\nGot: {}".format(
+            sorted_names_no_underscores, self.model._sort_field_names(unsorted_names_no_underscores)
+        )
+
+
 # if this module is executed (as opposed to imported), run the tests
-if __name__ == "__main__":
+if __name__ == "__main__": 
+
     if len(sys.argv) > 2:
         sys.stderr.write("Invalid syntax.\n")
         exit(1)
@@ -1885,3 +1924,12 @@ def test(self):
         tester.min_tests = int(sys.argv[1])
         tester.max_tests = tester.min_tests
     tester.test()
+
+    # Run unittest for private functions
+    print("\nRunning unittest for private functions in exomerge.py...")
+    input_dir = os.path.dirname(__file__)
+    temp_exo_path = os.path.join(input_dir, "exomerge_unit_test.e")
+    tester = ExomergeUnitTester()
+    tester.model = exomerge.import_model(temp_exo_path)
+    print("[1]_test_sort_field_names")
+    tester._test_sort_field_names()