Merge branch 'experimental' into kant/update-packaging

pgleeson · web-flow · commit 67032ddecb8a · 2026-04-29T13:04:58.000+01:00
diff --git a/.github/workflows/ci_python.yml b/.github/workflows/ci_python.yml
@@ -0,0 +1,46 @@
+name: Test Python
+
+on:
+  push:
+    branches: [ master, development, experimental, test* ]
+  pull_request:
+    branches: [ master, development, experimental, test* ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+
+    steps:
+
+    - uses: actions/checkout@v6
+    - name: Set up Python  ${{ matrix.python-version }}
+      uses: actions/setup-python@v6
+      with:
+        python-version:  ${{ matrix.python-version }}
+
+    - name: Print refs
+      run: |
+        echo "github.ref is: ${{ github.ref }}"
+        echo "github.base_ref is: ${{ github.base_ref }}"
+
+    - name: Install Python package
+      run: |
+        cd src/Python
+        pip install .
+
+    - name: Run tests
+      run: |
+        cd src/Python/tests
+        python tests.py 
+        python diagnostic_test.py ../../../tests/minimax.wcon 
+
+    - name: Final version info
+      run: |
+        pip list
+        
+
diff --git a/.gitignore b/.gitignore
@@ -19,3 +19,5 @@ openworm/
 src/scala/target/*
 src/scala/**/target/*
 
+/src/Python/example_saved_file.WCON
+/src/Python/wcon/wcon_schema.json
diff --git a/src/Python/examples/view_wcon.py b/src/Python/examples/view_wcon.py
@@ -4,5 +4,5 @@
 sys.path.append('..')
 from wcon import WCONWorms, MeasurementUnit
 
-file_name = 'asic-1 (ok415) on food L_2010_07_08__11_46_40___7___5.wcon'
+file_name = '../../../tests/minimax.wcon'
 w = WCONWorms.load_from_file(file_name)
diff --git a/src/Python/setup.py b/src/Python/setup.py
@@ -12,7 +12,8 @@
 from codecs import open
 from os import path
 import os
-from wcon.version import __version__
+import shutil
+exec(open('wcon/version.py').read())
 
 here = path.abspath(path.dirname(__file__))
 readme_path = path.join(here, 'README.md')
@@ -24,7 +25,13 @@
     with open(readme_path, encoding='utf-8') as f:
         long_description += f.read()
 
-print(os.listdir('.'))  # DEBUG
+# The canonical wcon_schema.json lives at the repository root so it can be
+# shared by every language implementation. setuptools cannot package files
+# from outside the package directory, so copy it into wcon/ at build time.
+repo_schema = path.join(here, '..', '..', 'wcon_schema.json')
+pkg_schema = path.join(here, 'wcon', 'wcon_schema.json')
+if path.exists(repo_schema):
+    shutil.copyfile(repo_schema, pkg_schema)
 
 setup(
     name='wcon',
@@ -51,8 +58,9 @@
     ],
     keywords='C. elegans worm tracking',
     packages=['wcon'],
-    package_data={'': ['../../wcon_schema.json']},
-    install_requires=['jsonschema']
+    package_data={'wcon': ['wcon_schema.json']},
+    include_package_data=True,
+    install_requires=['jsonschema', 'six', 'scipy', 'pandas', 'psutil'],
     # Actually also requires numpy, scipy and numpy but I don't want to force
     # pip to install these since pip is bad at that for those packages.
 )
diff --git a/src/Python/tests/diagnostic_test.py b/src/Python/tests/diagnostic_test.py
@@ -18,6 +18,7 @@
 idx = pd.IndexSlice
 import numpy as np
 import time
+import pprint as pp
 
 dir_path = os.path.dirname(os.path.realpath(__file__))
 sys.path.append(os.path.join(dir_path, '..'))
@@ -52,6 +53,12 @@ def timing_function():
                                           validate_against_schema=False)
             print("Time to load w1: " + str(timing_function() - start_time))
 
+            print(" ------- W1 has " + str(len(w1.data)) + " rows and " +
+                  str(len(w1.data.columns)) + " columns")
+
+            print(pp.pformat(w1.data_as_odict))
+            print (' -------- ')
+
             # Save these worm tracks to a file, then load that file
             test_path = 'test.wcon'
             start_time = timing_function()
@@ -63,6 +70,12 @@ def timing_function():
                                           validate_against_schema=False)
             print("Time to load w2: " + str(timing_function() - start_time))
 
+            print(" ------- W2 has " + str(len(w2.data)) + " rows and " +
+                  str(len(w2.data.columns)) + " columns")
+
+            print(pp.pformat(w2.data_as_odict))
+            print (' -------- ')
+
             # x1 = w1.data.loc[:, idx[0, 'x', 0]].fillna(0)
             # x2 = w2.data.loc[:, idx[0, 'x', 0]].fillna(0)
             # cmm = np.flatnonzero(x1 != x2)
@@ -77,6 +90,14 @@ def timing_function():
             # "id" first in a data segment, etc.)
             w3 = WCONWorms.load_from_file(test_path,
                                           validate_against_schema=False)
+            
+
+            print(" ------- W3 has " + str(len(w3.data)) + " rows and " +
+                  str(len(w3.data.columns)) + " columns")
+
+            print(pp.pformat(w3.data_as_odict))
+            print (' -------- ')
+
             assert(w2 == w3)
             assert(w1 == w2)
             assert(w1 == w3)
diff --git a/src/Python/wcon/measurement_unit.py b/src/Python/wcon/measurement_unit.py
@@ -39,6 +39,18 @@ def C2C(x):
     """
     return x
 
+def F2C(x):
+    return convert_temperature(x,'F','C')
+def C2F(x):
+    return convert_temperature(x,'C','F')
+def K2F(x):
+    return convert_temperature(x,'K','F')
+def C2K(x):
+    return convert_temperature(x,'C','K')
+def K2C(x):
+    return convert_temperature(x,'K','C')
+def F2K(x):
+    return convert_temperature(x,'F','K')
 
 class MeasurementUnitAtom():
     """
@@ -633,11 +645,14 @@ def _create_from_atomic(cls, unit_string):
     @classmethod
     def _create_from_node(cls, node):
         """
-        node: is ast.Num or ast.BinOp or ast.UnaryOp or ast.Str or ast.Name
+        node: is ast.Constant (numeric) or ast.BinOp or ast.UnaryOp or ast.Name
             The expression to be transformed into a MeasurementUnit
 
         """
-        if isinstance(node, ast.Constant):  # <number>
+        # ast.Num was deprecated in Python 3.8 and removed in 3.14;
+        # ast.Constant now represents all literal values.
+        if isinstance(node, ast.Constant) and isinstance(
+                node.value, (int, float)):  # <number>
             n = node.value
             assert(n != 0)  # A unit cannot have zero in the expression
 
diff --git a/src/Python/wcon/version.py b/src/Python/wcon/version.py
@@ -6,4 +6,4 @@
 # 2) we can import it in setup.py for the same reason
 # 3) we can import it into your module module
 # (from http://stackoverflow.com/questions/458550/)
-__version__ = '1.1.0'
+__version__ = '1.2.1'
diff --git a/src/Python/wcon/wcon_data.py b/src/Python/wcon/wcon_data.py
@@ -119,6 +119,13 @@ def df_upsert(src, dest):
             dest_sliced.sort_index(axis=1, inplace=True)
             src_sliced.sort_index(axis=1, inplace=True)
 
+            # Align src_sliced's row/column labels to dest_sliced. The two
+            # were built with independent .isin() masks so column order may
+            # differ; pandas >=1.x refuses to compare DataFrames whose
+            # labels are not identical.
+            src_sliced = src_sliced.reindex(index=dest_sliced.index,
+                                            columns=dest_sliced.columns)
+
             # Obtain a mask of the conflicts in the current segment
             # as compared with all previously loaded data.  That is:
             # NaN NaN = False
@@ -189,24 +196,30 @@ def convert_origin(df):
             # `for` loop loops through both `x` and `y`.
 
             if offset in cur_worm.columns.get_level_values(0):
-                # Consider offset as 0 if not available in a certain frame
-                ox_column = cur_worm.loc[:, (offset)].fillna(0).astype('float64')
+                # Consider offset as 0 if not available in a certain frame.
+                # Coerce to numeric: the parser can leave the offset column
+                # with object dtype (mixed str/int entries) when offsets
+                # are present in some segments but not others.
+                ox_column = cur_worm.loc[:, (offset)].apply(
+                    pd.to_numeric, errors='coerce').fillna(0)
 
                 # Shift our 'x' values by offset
-                all_x_columns = cur_worm.loc[:, (coord)].fillna(0).astype('float64')
-                ox_affine_change = (np.array(ox_column) *
+                all_x_columns = cur_worm.loc[:, (coord)].apply(
+                    pd.to_numeric, errors='coerce')
+                ox_affine_change = (np.array(ox_column, dtype=float) *
                                     np.ones(all_x_columns.shape))
                 all_x_columns += ox_affine_change
 
                 if centroid in cur_worm.columns.get_level_values(0):
-                    cx_column = cur_worm.loc[:, (centroid)]
+                    cx_column = cur_worm.loc[:, (centroid)].apply(
+                        pd.to_numeric, errors='coerce')
                     # Shift the centroid by the offset
                     cx_column += ox_column
 
                     # Now make the centroid our new offset, since the rule
                     # is that if the offset exists, the centroid is not
                     # the offset, but we want it to be.
-                    cx_affine_change = (np.array(cx_column) *
+                    cx_affine_change = (np.array(cx_column, dtype=float) *
                                         np.ones(all_x_columns.shape))
                     all_x_columns -= cx_affine_change
 
@@ -227,7 +240,8 @@ def convert_origin(df):
     # This is so DataFrames with and without offsets
     # will show as comparing identically.
     for offset_key in offset_keys:
-        df.drop(offset_key, axis=1, level='key', inplace=True, errors='ignore')
+        df.drop(offset_key, axis=1, level='key', inplace=True,
+                errors='ignore')
 
     # Because of a known issue in Pandas
     # (https://github.com/pydata/pandas/issues/2770), the dropped columns
@@ -405,7 +419,7 @@ def _obtain_time_series_data_frame(time_series_data):
         cur_df = pd.DataFrame(cur_data, columns=cur_columns)
 
         cur_df.index = cur_timeframes
-        cur_df.index.name = 't'
+        cur_df.index.names = ['t']
 
         # We want the index (time) to be in order.
         cur_df.sort_index(axis=0, inplace=True)
@@ -469,7 +483,7 @@ def _obtain_time_series_data_frame(time_series_data):
         with warnings.catch_warnings():
             warnings.filterwarnings(action="ignore", category=FutureWarning)
             df_odict[worm_id] = \
-                df_odict[worm_id].convert_dtypes(convert_floating=True)
+                df_odict[worm_id].infer_objects()
 
         # If 'head' or 'ventral' is NaN, we must specify '?' since
         # otherwise, when saving this object, to specify "no value" we would
@@ -481,21 +495,27 @@ def _obtain_time_series_data_frame(time_series_data):
 
         # We must replace NaN with None, otherwise the JSON encoder will
         # save 'NaN' as the string and this will get rejected by our schema
-        # on any subsequent loads
-        # Note we can't use .fillna(None) due to this issue:
-        # https://github.com/pydata/pandas/issues/1972
+        # on any subsequent loads.
+        # Pandas 3.0 infers 'str' dtype for these columns, and assigning
+        # NaN on a str-dtype column coerces to the string 'nan'. Force
+        # object dtype and map both real NaN and stringified 'nan' back
+        # to None so downstream JSON serialization writes null.
         df_keys = set(df_odict[worm_id].columns.get_level_values('key'))
         for k in ['head', 'ventral']:
             if k in df_keys:
-                cur_slice = df_odict[worm_id].loc[:, idx[:, k, :]]
-                df_odict[worm_id].loc[:, idx[:, k, :]] = \
-                    cur_slice.fillna(value=np.nan)
-
-        # Make sure aspect_size is a float, since only floats are nullable:
+                df = df_odict[worm_id]
+                for col in [c for c in df.columns if c[1] == k]:
+                    s = df[col].astype(object)
+                    df[col] = s.where(s.notna() & (s != 'nan'), None)
+
+        # Make sure aspect_size is a float, since only floats are nullable.
+        # Replace the column whole rather than assigning via .loc[]; pandas
+        # 2.x preserves the parent column's existing (object/str) dtype on
+        # .loc[] assignment and raises TypeError on non-string values.
         if 'aspect_size' in df_keys:
-            df_odict[worm_id].loc[:, idx[:, 'aspect_size', :]] = \
-                df_odict[worm_id].loc[:, idx[:, 'aspect_size', :]] \
-                .astype(float)
+            df = df_odict[worm_id]
+            for col in [c for c in df.columns if c[1] == 'aspect_size']:
+                df[col] = df[col].astype(float)
 
     return sort_odict(df_odict)
 
diff --git a/src/Python/wcon/wcon_parser.py b/src/Python/wcon/wcon_parser.py
@@ -385,21 +385,30 @@ def to_canon(self):
         for data_key in self.units:
             mu = self.units[data_key]
 
-            # Don't bother to "convert" units that are already in their
-            # canonical form.
-            if mu.unit_string == mu.canonical_unit_string:
-                continue
-
             tmu = self.units['t']
+            already_canonical = (mu.unit_string == mu.canonical_unit_string)
             for worm_id in w.worm_ids:
 
                 try:
-                    # Apply across all worm ids and all aspects
-                    mu_slice = \
-                        w._data[worm_id].loc[:, idx[:, data_key, :]].copy().astype('float64')
-
-                    w._data[worm_id].loc[:, idx[:, data_key, :]] = \
-                        mu_slice.map(mu.to_canon)
+                    df = w._data[worm_id]
+                    target_cols = [c for c in df.columns
+                                   if c[1] == data_key]
+                    if not target_cols:
+                        raise KeyError(data_key)
+
+                    # The parser can leave numeric columns with object
+                    # dtype (e.g. mixed int/str entries from how segments
+                    # are merged). Coerce so downstream arithmetic and
+                    # JSON serialization treat them as numbers, even when
+                    # the unit is already canonical and no conversion is
+                    # otherwise required. Replace each column whole rather
+                    # than via .loc[] assignment, which would preserve the
+                    # parent column's existing (object) dtype.
+                    for col in target_cols:
+                        new_col = pd.to_numeric(df[col], errors='coerce')
+                        if not already_canonical:
+                            new_col = new_col.apply(mu.to_canon)
+                        df[col] = new_col
                 except KeyError:
                     # Just ignore cases where there are "units" entries but no
                     # corresponding data
@@ -822,6 +831,7 @@ def pd_equals(df1, df2):
         return False
 
     try:
+        # pd.util.testing was removed in pandas 2.0; use pd.testing.
         pd.testing.assert_frame_equal(df1, df2)
     except AssertionError:
         return False