Skip to content

Commit 67032dd

Browse files
authored
Merge branch 'experimental' into kant/update-packaging
2 parents 620d3cd + 0d2fc2d commit 67032dd

9 files changed

Lines changed: 161 additions & 39 deletions

File tree

.github/workflows/ci_python.yml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
name: Test Python
2+
3+
on:
4+
push:
5+
branches: [ master, development, experimental, test* ]
6+
pull_request:
7+
branches: [ master, development, experimental, test* ]
8+
9+
jobs:
10+
build:
11+
12+
runs-on: ubuntu-latest
13+
strategy:
14+
fail-fast: false
15+
matrix:
16+
python-version: ["3.9", "3.10", "3.11", "3.12"]
17+
18+
steps:
19+
20+
- uses: actions/checkout@v6
21+
- name: Set up Python ${{ matrix.python-version }}
22+
uses: actions/setup-python@v6
23+
with:
24+
python-version: ${{ matrix.python-version }}
25+
26+
- name: Print refs
27+
run: |
28+
echo "github.ref is: ${{ github.ref }}"
29+
echo "github.base_ref is: ${{ github.base_ref }}"
30+
31+
- name: Install Python package
32+
run: |
33+
cd src/Python
34+
pip install .
35+
36+
- name: Run tests
37+
run: |
38+
cd src/Python/tests
39+
python tests.py
40+
python diagnostic_test.py ../../../tests/minimax.wcon
41+
42+
- name: Final version info
43+
run: |
44+
pip list
45+
46+

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,5 @@ openworm/
1919
src/scala/target/*
2020
src/scala/**/target/*
2121

22+
/src/Python/example_saved_file.WCON
23+
/src/Python/wcon/wcon_schema.json

src/Python/examples/view_wcon.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,5 @@
44
sys.path.append('..')
55
from wcon import WCONWorms, MeasurementUnit
66

7-
file_name = 'asic-1 (ok415) on food L_2010_07_08__11_46_40___7___5.wcon'
7+
file_name = '../../../tests/minimax.wcon'
88
w = WCONWorms.load_from_file(file_name)

src/Python/setup.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
from codecs import open
1313
from os import path
1414
import os
15-
from wcon.version import __version__
15+
import shutil
16+
exec(open('wcon/version.py').read())
1617

1718
here = path.abspath(path.dirname(__file__))
1819
readme_path = path.join(here, 'README.md')
@@ -24,7 +25,13 @@
2425
with open(readme_path, encoding='utf-8') as f:
2526
long_description += f.read()
2627

27-
print(os.listdir('.')) # DEBUG
28+
# The canonical wcon_schema.json lives at the repository root so it can be
29+
# shared by every language implementation. setuptools cannot package files
30+
# from outside the package directory, so copy it into wcon/ at build time.
31+
repo_schema = path.join(here, '..', '..', 'wcon_schema.json')
32+
pkg_schema = path.join(here, 'wcon', 'wcon_schema.json')
33+
if path.exists(repo_schema):
34+
shutil.copyfile(repo_schema, pkg_schema)
2835

2936
setup(
3037
name='wcon',
@@ -51,8 +58,9 @@
5158
],
5259
keywords='C. elegans worm tracking',
5360
packages=['wcon'],
54-
package_data={'': ['../../wcon_schema.json']},
55-
install_requires=['jsonschema']
61+
package_data={'wcon': ['wcon_schema.json']},
62+
include_package_data=True,
63+
install_requires=['jsonschema', 'six', 'scipy', 'pandas', 'psutil'],
5664
# Actually also requires numpy, scipy and numpy but I don't want to force
5765
# pip to install these since pip is bad at that for those packages.
5866
)

src/Python/tests/diagnostic_test.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
idx = pd.IndexSlice
1919
import numpy as np
2020
import time
21+
import pprint as pp
2122

2223
dir_path = os.path.dirname(os.path.realpath(__file__))
2324
sys.path.append(os.path.join(dir_path, '..'))
@@ -52,6 +53,12 @@ def timing_function():
5253
validate_against_schema=False)
5354
print("Time to load w1: " + str(timing_function() - start_time))
5455

56+
print(" ------- W1 has " + str(len(w1.data)) + " rows and " +
57+
str(len(w1.data.columns)) + " columns")
58+
59+
print(pp.pformat(w1.data_as_odict))
60+
print (' -------- ')
61+
5562
# Save these worm tracks to a file, then load that file
5663
test_path = 'test.wcon'
5764
start_time = timing_function()
@@ -63,6 +70,12 @@ def timing_function():
6370
validate_against_schema=False)
6471
print("Time to load w2: " + str(timing_function() - start_time))
6572

73+
print(" ------- W2 has " + str(len(w2.data)) + " rows and " +
74+
str(len(w2.data.columns)) + " columns")
75+
76+
print(pp.pformat(w2.data_as_odict))
77+
print (' -------- ')
78+
6679
# x1 = w1.data.loc[:, idx[0, 'x', 0]].fillna(0)
6780
# x2 = w2.data.loc[:, idx[0, 'x', 0]].fillna(0)
6881
# cmm = np.flatnonzero(x1 != x2)
@@ -77,6 +90,14 @@ def timing_function():
7790
# "id" first in a data segment, etc.)
7891
w3 = WCONWorms.load_from_file(test_path,
7992
validate_against_schema=False)
93+
94+
95+
print(" ------- W3 has " + str(len(w3.data)) + " rows and " +
96+
str(len(w3.data.columns)) + " columns")
97+
98+
print(pp.pformat(w3.data_as_odict))
99+
print (' -------- ')
100+
80101
assert(w2 == w3)
81102
assert(w1 == w2)
82103
assert(w1 == w3)

src/Python/wcon/measurement_unit.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,18 @@ def C2C(x):
3939
"""
4040
return x
4141

42+
def F2C(x):
43+
return convert_temperature(x,'F','C')
44+
def C2F(x):
45+
return convert_temperature(x,'C','F')
46+
def K2F(x):
47+
return convert_temperature(x,'K','F')
48+
def C2K(x):
49+
return convert_temperature(x,'C','K')
50+
def K2C(x):
51+
return convert_temperature(x,'K','C')
52+
def F2K(x):
53+
return convert_temperature(x,'F','K')
4254

4355
class MeasurementUnitAtom():
4456
"""
@@ -633,11 +645,14 @@ def _create_from_atomic(cls, unit_string):
633645
@classmethod
634646
def _create_from_node(cls, node):
635647
"""
636-
node: is ast.Num or ast.BinOp or ast.UnaryOp or ast.Str or ast.Name
648+
node: is ast.Constant (numeric) or ast.BinOp or ast.UnaryOp or ast.Name
637649
The expression to be transformed into a MeasurementUnit
638650
639651
"""
640-
if isinstance(node, ast.Constant): # <number>
652+
# ast.Num was deprecated in Python 3.8 and removed in 3.14;
653+
# ast.Constant now represents all literal values.
654+
if isinstance(node, ast.Constant) and isinstance(
655+
node.value, (int, float)): # <number>
641656
n = node.value
642657
assert(n != 0) # A unit cannot have zero in the expression
643658

src/Python/wcon/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
# 2) we can import it in setup.py for the same reason
77
# 3) we can import it into your module module
88
# (from http://stackoverflow.com/questions/458550/)
9-
__version__ = '1.1.0'
9+
__version__ = '1.2.1'

src/Python/wcon/wcon_data.py

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,13 @@ def df_upsert(src, dest):
119119
dest_sliced.sort_index(axis=1, inplace=True)
120120
src_sliced.sort_index(axis=1, inplace=True)
121121

122+
# Align src_sliced's row/column labels to dest_sliced. The two
123+
# were built with independent .isin() masks so column order may
124+
# differ; pandas >=1.x refuses to compare DataFrames whose
125+
# labels are not identical.
126+
src_sliced = src_sliced.reindex(index=dest_sliced.index,
127+
columns=dest_sliced.columns)
128+
122129
# Obtain a mask of the conflicts in the current segment
123130
# as compared with all previously loaded data. That is:
124131
# NaN NaN = False
@@ -189,24 +196,30 @@ def convert_origin(df):
189196
# `for` loop loops through both `x` and `y`.
190197

191198
if offset in cur_worm.columns.get_level_values(0):
192-
# Consider offset as 0 if not available in a certain frame
193-
ox_column = cur_worm.loc[:, (offset)].fillna(0).astype('float64')
199+
# Consider offset as 0 if not available in a certain frame.
200+
# Coerce to numeric: the parser can leave the offset column
201+
# with object dtype (mixed str/int entries) when offsets
202+
# are present in some segments but not others.
203+
ox_column = cur_worm.loc[:, (offset)].apply(
204+
pd.to_numeric, errors='coerce').fillna(0)
194205

195206
# Shift our 'x' values by offset
196-
all_x_columns = cur_worm.loc[:, (coord)].fillna(0).astype('float64')
197-
ox_affine_change = (np.array(ox_column) *
207+
all_x_columns = cur_worm.loc[:, (coord)].apply(
208+
pd.to_numeric, errors='coerce')
209+
ox_affine_change = (np.array(ox_column, dtype=float) *
198210
np.ones(all_x_columns.shape))
199211
all_x_columns += ox_affine_change
200212

201213
if centroid in cur_worm.columns.get_level_values(0):
202-
cx_column = cur_worm.loc[:, (centroid)]
214+
cx_column = cur_worm.loc[:, (centroid)].apply(
215+
pd.to_numeric, errors='coerce')
203216
# Shift the centroid by the offset
204217
cx_column += ox_column
205218

206219
# Now make the centroid our new offset, since the rule
207220
# is that if the offset exists, the centroid is not
208221
# the offset, but we want it to be.
209-
cx_affine_change = (np.array(cx_column) *
222+
cx_affine_change = (np.array(cx_column, dtype=float) *
210223
np.ones(all_x_columns.shape))
211224
all_x_columns -= cx_affine_change
212225

@@ -227,7 +240,8 @@ def convert_origin(df):
227240
# This is so DataFrames with and without offsets
228241
# will show as comparing identically.
229242
for offset_key in offset_keys:
230-
df.drop(offset_key, axis=1, level='key', inplace=True, errors='ignore')
243+
df.drop(offset_key, axis=1, level='key', inplace=True,
244+
errors='ignore')
231245

232246
# Because of a known issue in Pandas
233247
# (https://github.com/pydata/pandas/issues/2770), the dropped columns
@@ -405,7 +419,7 @@ def _obtain_time_series_data_frame(time_series_data):
405419
cur_df = pd.DataFrame(cur_data, columns=cur_columns)
406420

407421
cur_df.index = cur_timeframes
408-
cur_df.index.name = 't'
422+
cur_df.index.names = ['t']
409423

410424
# We want the index (time) to be in order.
411425
cur_df.sort_index(axis=0, inplace=True)
@@ -469,7 +483,7 @@ def _obtain_time_series_data_frame(time_series_data):
469483
with warnings.catch_warnings():
470484
warnings.filterwarnings(action="ignore", category=FutureWarning)
471485
df_odict[worm_id] = \
472-
df_odict[worm_id].convert_dtypes(convert_floating=True)
486+
df_odict[worm_id].infer_objects()
473487

474488
# If 'head' or 'ventral' is NaN, we must specify '?' since
475489
# otherwise, when saving this object, to specify "no value" we would
@@ -481,21 +495,27 @@ def _obtain_time_series_data_frame(time_series_data):
481495

482496
# We must replace NaN with None, otherwise the JSON encoder will
483497
# save 'NaN' as the string and this will get rejected by our schema
484-
# on any subsequent loads
485-
# Note we can't use .fillna(None) due to this issue:
486-
# https://github.com/pydata/pandas/issues/1972
498+
# on any subsequent loads.
499+
# Pandas 3.0 infers 'str' dtype for these columns, and assigning
500+
# NaN on a str-dtype column coerces to the string 'nan'. Force
501+
# object dtype and map both real NaN and stringified 'nan' back
502+
# to None so downstream JSON serialization writes null.
487503
df_keys = set(df_odict[worm_id].columns.get_level_values('key'))
488504
for k in ['head', 'ventral']:
489505
if k in df_keys:
490-
cur_slice = df_odict[worm_id].loc[:, idx[:, k, :]]
491-
df_odict[worm_id].loc[:, idx[:, k, :]] = \
492-
cur_slice.fillna(value=np.nan)
493-
494-
# Make sure aspect_size is a float, since only floats are nullable:
506+
df = df_odict[worm_id]
507+
for col in [c for c in df.columns if c[1] == k]:
508+
s = df[col].astype(object)
509+
df[col] = s.where(s.notna() & (s != 'nan'), None)
510+
511+
# Make sure aspect_size is a float, since only floats are nullable.
512+
# Replace the column whole rather than assigning via .loc[]; pandas
513+
# 2.x preserves the parent column's existing (object/str) dtype on
514+
# .loc[] assignment and raises TypeError on non-string values.
495515
if 'aspect_size' in df_keys:
496-
df_odict[worm_id].loc[:, idx[:, 'aspect_size', :]] = \
497-
df_odict[worm_id].loc[:, idx[:, 'aspect_size', :]] \
498-
.astype(float)
516+
df = df_odict[worm_id]
517+
for col in [c for c in df.columns if c[1] == 'aspect_size']:
518+
df[col] = df[col].astype(float)
499519

500520
return sort_odict(df_odict)
501521

src/Python/wcon/wcon_parser.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -385,21 +385,30 @@ def to_canon(self):
385385
for data_key in self.units:
386386
mu = self.units[data_key]
387387

388-
# Don't bother to "convert" units that are already in their
389-
# canonical form.
390-
if mu.unit_string == mu.canonical_unit_string:
391-
continue
392-
393388
tmu = self.units['t']
389+
already_canonical = (mu.unit_string == mu.canonical_unit_string)
394390
for worm_id in w.worm_ids:
395391

396392
try:
397-
# Apply across all worm ids and all aspects
398-
mu_slice = \
399-
w._data[worm_id].loc[:, idx[:, data_key, :]].copy().astype('float64')
400-
401-
w._data[worm_id].loc[:, idx[:, data_key, :]] = \
402-
mu_slice.map(mu.to_canon)
393+
df = w._data[worm_id]
394+
target_cols = [c for c in df.columns
395+
if c[1] == data_key]
396+
if not target_cols:
397+
raise KeyError(data_key)
398+
399+
# The parser can leave numeric columns with object
400+
# dtype (e.g. mixed int/str entries from how segments
401+
# are merged). Coerce so downstream arithmetic and
402+
# JSON serialization treat them as numbers, even when
403+
# the unit is already canonical and no conversion is
404+
# otherwise required. Replace each column whole rather
405+
# than via .loc[] assignment, which would preserve the
406+
# parent column's existing (object) dtype.
407+
for col in target_cols:
408+
new_col = pd.to_numeric(df[col], errors='coerce')
409+
if not already_canonical:
410+
new_col = new_col.apply(mu.to_canon)
411+
df[col] = new_col
403412
except KeyError:
404413
# Just ignore cases where there are "units" entries but no
405414
# corresponding data
@@ -822,6 +831,7 @@ def pd_equals(df1, df2):
822831
return False
823832

824833
try:
834+
# pd.util.testing was removed in pandas 2.0; use pd.testing.
825835
pd.testing.assert_frame_equal(df1, df2)
826836
except AssertionError:
827837
return False

0 commit comments

Comments
 (0)