Skip to content

Commit a179581

Browse files
authored
Merge pull request #180 from openworm/experimental
Update dependencies and fix issues with pandas and json schema
2 parents 6ebd846 + 0d2fc2d commit a179581

11 files changed

Lines changed: 123 additions & 49 deletions

File tree

.github/workflows/ci_python.yml

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ name: Test Python
22

33
on:
44
push:
5-
branches: [ master, development ]
5+
branches: [ master, development, experimental, test* ]
66
pull_request:
7-
branches: [ master, development ]
7+
branches: [ master, development, experimental, test* ]
88

99
jobs:
1010
build:
@@ -13,13 +13,13 @@ jobs:
1313
strategy:
1414
fail-fast: false
1515
matrix:
16-
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
16+
python-version: ["3.9", "3.10", "3.11", "3.12"]
1717

1818
steps:
1919

20-
- uses: actions/checkout@v2
20+
- uses: actions/checkout@v6
2121
- name: Set up Python ${{ matrix.python-version }}
22-
uses: actions/setup-python@v2
22+
uses: actions/setup-python@v6
2323
with:
2424
python-version: ${{ matrix.python-version }}
2525

@@ -31,6 +31,16 @@ jobs:
3131
- name: Install Python package
3232
run: |
3333
cd src/Python
34-
python setup.py install
34+
pip install .
35+
36+
- name: Run tests
37+
run: |
38+
cd src/Python/tests
39+
python tests.py
40+
python diagnostic_test.py ../../../tests/minimax.wcon
41+
42+
- name: Final version info
43+
run: |
44+
pip list
3545
3646

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,5 @@ src/Python/wcon.egg*
1515

1616
# Scala
1717
src/scala/target/*
18+
/src/Python/example_saved_file.WCON
19+
/src/Python/wcon/wcon_schema.json

src/Python/examples/view_wcon.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,5 @@
44
sys.path.append('..')
55
from wcon import WCONWorms, MeasurementUnit
66

7-
file_name = 'asic-1 (ok415) on food L_2010_07_08__11_46_40___7___5.wcon'
7+
file_name = '../../../tests/minimax.wcon'
88
w = WCONWorms.load_from_file(file_name)

src/Python/setup.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from codecs import open
1313
from os import path
1414
import os
15+
import shutil
1516
exec(open('wcon/version.py').read())
1617

1718
here = path.abspath(path.dirname(__file__))
@@ -24,7 +25,13 @@
2425
with open(readme_path, encoding='utf-8') as f:
2526
long_description += f.read()
2627

27-
print(os.listdir('.')) # DEBUG
28+
# The canonical wcon_schema.json lives at the repository root so it can be
29+
# shared by every language implementation. setuptools cannot package files
30+
# from outside the package directory, so copy it into wcon/ at build time.
31+
repo_schema = path.join(here, '..', '..', 'wcon_schema.json')
32+
pkg_schema = path.join(here, 'wcon', 'wcon_schema.json')
33+
if path.exists(repo_schema):
34+
shutil.copyfile(repo_schema, pkg_schema)
2835

2936
setup(
3037
name='wcon',
@@ -51,8 +58,9 @@
5158
],
5259
keywords='C. elegans worm tracking',
5360
packages=['wcon'],
54-
package_data={'': ['../../wcon_schema.json']},
55-
install_requires=['jsonschema', 'six', 'numpy', 'scipy<=0.17.1']
61+
package_data={'wcon': ['wcon_schema.json']},
62+
include_package_data=True,
63+
install_requires=['jsonschema', 'six', 'scipy', 'pandas', 'psutil'],
5664
# Actually also requires numpy, scipy and numpy but I don't want to force
5765
# pip to install these since pip is bad at that for those packages.
5866
)

src/Python/tests/diagnostic_test.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
idx = pd.IndexSlice
1919
import numpy as np
2020
import time
21+
import pprint as pp
2122

2223
sys.path.append('..')
2324
from wcon import WCONWorms, MeasurementUnit
@@ -51,6 +52,12 @@ def timing_function():
5152
validate_against_schema=False)
5253
print("Time to load w1: " + str(timing_function() - start_time))
5354

55+
print(" ------- W1 has " + str(len(w1.data)) + " rows and " +
56+
str(len(w1.data.columns)) + " columns")
57+
58+
print(pp.pformat(w1.data_as_odict))
59+
print (' -------- ')
60+
5461
# Save these worm tracks to a file, then load that file
5562
test_path = 'test.wcon'
5663
start_time = timing_function()
@@ -62,6 +69,12 @@ def timing_function():
6269
validate_against_schema=False)
6370
print("Time to load w2: " + str(timing_function() - start_time))
6471

72+
print(" ------- W2 has " + str(len(w2.data)) + " rows and " +
73+
str(len(w2.data.columns)) + " columns")
74+
75+
print(pp.pformat(w2.data_as_odict))
76+
print (' -------- ')
77+
6578
# x1 = w1.data.loc[:, idx[0, 'x', 0]].fillna(0)
6679
# x2 = w2.data.loc[:, idx[0, 'x', 0]].fillna(0)
6780
# cmm = np.flatnonzero(x1 != x2)
@@ -76,6 +89,14 @@ def timing_function():
7689
# "id" first in a data segment, etc.)
7790
w3 = WCONWorms.load_from_file(test_path,
7891
validate_against_schema=False)
92+
93+
94+
print(" ------- W3 has " + str(len(w3.data)) + " rows and " +
95+
str(len(w3.data.columns)) + " columns")
96+
97+
print(pp.pformat(w3.data_as_odict))
98+
print (' -------- ')
99+
79100
assert(w2 == w3)
80101
assert(w1 == w2)
81102
assert(w1 == w3)

src/Python/tests/tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def flatten(list_of_lists):
4141
for element in list_of_lists:
4242
# If it's iterable but not a string or bytes, then recurse, otherwise
4343
# we are at a "leaf" node of our traversal
44-
if(isinstance(element, collections.Iterable) and
44+
if(isinstance(element, collections.abc.Iterable) and
4545
not isinstance(element, (str, bytes))):
4646
for sub_element in flatten(element):
4747
yield sub_element

src/Python/wcon/measurement_unit.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -637,12 +637,15 @@ def _create_from_atomic(cls, unit_string):
637637
@classmethod
638638
def _create_from_node(cls, node):
639639
"""
640-
node: is ast.Num or ast.BinOp or ast.UnaryOp or ast.Str or ast.Name
640+
node: is ast.Constant (numeric) or ast.BinOp or ast.UnaryOp or ast.Name
641641
The expression to be transformed into a MeasurementUnit
642642
643643
"""
644-
if isinstance(node, ast.Num): # <number>
645-
n = node.n
644+
# ast.Num was deprecated in Python 3.8 and removed in 3.14;
645+
# ast.Constant now represents all literal values.
646+
if isinstance(node, ast.Constant) and isinstance(
647+
node.value, (int, float)): # <number>
648+
n = node.value
646649
assert(n != 0) # A unit cannot have zero in the expression
647650

648651
u = cls()

src/Python/wcon/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
# 2) we can import it in setup.py for the same reason
77
# 3) we can import it into your module module
88
# (from http://stackoverflow.com/questions/458550/)
9-
__version__ = '1.1.0'
9+
__version__ = '1.2.1'

src/Python/wcon/wcon_data.py

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,13 @@ def df_upsert(src, dest):
119119
dest_sliced.sort_index(inplace=True)
120120
src_sliced.sort_index(inplace=True)
121121

122+
# Align src_sliced's row/column labels to dest_sliced. The two
123+
# were built with independent .isin() masks so column order may
124+
# differ; pandas >=1.x refuses to compare DataFrames whose
125+
# labels are not identical.
126+
src_sliced = src_sliced.reindex(index=dest_sliced.index,
127+
columns=dest_sliced.columns)
128+
122129
# Obtain a mask of the conflicts in the current segment
123130
# as compared with all previously loaded data. That is:
124131
# NaN NaN = False
@@ -189,24 +196,30 @@ def convert_origin(df):
189196
# `for` loop loops through both `x` and `y`.
190197

191198
if offset in cur_worm.columns.get_level_values(0):
192-
# Consider offset as 0 if not available in a certain frame
193-
ox_column = cur_worm.loc[:, (offset)].fillna(0)
199+
# Consider offset as 0 if not available in a certain frame.
200+
# Coerce to numeric: the parser can leave the offset column
201+
# with object dtype (mixed str/int entries) when offsets
202+
# are present in some segments but not others.
203+
ox_column = cur_worm.loc[:, (offset)].apply(
204+
pd.to_numeric, errors='coerce').fillna(0)
194205

195206
# Shift our 'x' values by offset
196-
all_x_columns = cur_worm.loc[:, (coord)]
197-
ox_affine_change = (np.array(ox_column) *
207+
all_x_columns = cur_worm.loc[:, (coord)].apply(
208+
pd.to_numeric, errors='coerce')
209+
ox_affine_change = (np.array(ox_column, dtype=float) *
198210
np.ones(all_x_columns.shape))
199211
all_x_columns += ox_affine_change
200212

201213
if centroid in cur_worm.columns.get_level_values(0):
202-
cx_column = cur_worm.loc[:, (centroid)]
214+
cx_column = cur_worm.loc[:, (centroid)].apply(
215+
pd.to_numeric, errors='coerce')
203216
# Shift the centroid by the offset
204217
cx_column += ox_column
205218

206219
# Now make the centroid our new offset, since the rule
207220
# is that if the offset exists, the centroid is not
208221
# the offset, but we want it to be.
209-
cx_affine_change = (np.array(cx_column) *
222+
cx_affine_change = (np.array(cx_column, dtype=float) *
210223
np.ones(all_x_columns.shape))
211224
all_x_columns -= cx_affine_change
212225

@@ -224,7 +237,8 @@ def convert_origin(df):
224237
# This is so DataFrames with and without offsets
225238
# will show as comparing identically.
226239
for offset_key in offset_keys:
227-
df.drop(offset_key, axis=1, level='key', inplace=True)
240+
df.drop(offset_key, axis=1, level='key', inplace=True,
241+
errors='ignore')
228242

229243
# Because of a known issue in Pandas
230244
# (https://github.com/pydata/pandas/issues/2770), the dropped columns
@@ -389,7 +403,7 @@ def _obtain_time_series_data_frame(time_series_data):
389403
for i in range(len(cur_timeframes)):
390404
data_segment[k][i] = (
391405
data_segment[k][i] +
392-
[np.NaN] * (max_aspect_size - len(data_segment[k][i])))
406+
[np.nan] * (max_aspect_size - len(data_segment[k][i])))
393407

394408
num_timeframes = len(cur_timeframes)
395409

@@ -402,7 +416,7 @@ def _obtain_time_series_data_frame(time_series_data):
402416
cur_df = pd.DataFrame(cur_data, columns=cur_columns)
403417

404418
cur_df.index = cur_timeframes
405-
cur_df.index.names = 't'
419+
cur_df.index.names = ['t']
406420

407421
# We want the index (time) to be in order.
408422
cur_df.sort_index(axis=0, inplace=True)
@@ -466,7 +480,7 @@ def _obtain_time_series_data_frame(time_series_data):
466480
with warnings.catch_warnings():
467481
warnings.filterwarnings(action="ignore", category=FutureWarning)
468482
df_odict[worm_id] = \
469-
df_odict[worm_id].convert_objects(convert_numeric=True)
483+
df_odict[worm_id].infer_objects()
470484

471485
# If 'head' or 'ventral' is NaN, we must specify '?' since
472486
# otherwise, when saving this object, to specify "no value" we would
@@ -478,21 +492,27 @@ def _obtain_time_series_data_frame(time_series_data):
478492

479493
# We must replace NaN with None, otherwise the JSON encoder will
480494
# save 'NaN' as the string and this will get rejected by our schema
481-
# on any subsequent loads
482-
# Note we can't use .fillna(None) due to this issue:
483-
# https://github.com/pydata/pandas/issues/1972
495+
# on any subsequent loads.
496+
# Pandas 3.0 infers 'str' dtype for these columns, and assigning
497+
# NaN on a str-dtype column coerces to the string 'nan'. Force
498+
# object dtype and map both real NaN and stringified 'nan' back
499+
# to None so downstream JSON serialization writes null.
484500
df_keys = set(df_odict[worm_id].columns.get_level_values('key'))
485501
for k in ['head', 'ventral']:
486502
if k in df_keys:
487-
cur_slice = df_odict[worm_id].loc[:, idx[:, k, :]]
488-
df_odict[worm_id].loc[:, idx[:, k, :]] = \
489-
cur_slice.fillna(value=np.nan)
490-
491-
# Make sure aspect_size is a float, since only floats are nullable:
503+
df = df_odict[worm_id]
504+
for col in [c for c in df.columns if c[1] == k]:
505+
s = df[col].astype(object)
506+
df[col] = s.where(s.notna() & (s != 'nan'), None)
507+
508+
# Make sure aspect_size is a float, since only floats are nullable.
509+
# Replace the column whole rather than assigning via .loc[]; pandas
510+
# 2.x preserves the parent column's existing (object/str) dtype on
511+
# .loc[] assignment and raises TypeError on non-string values.
492512
if 'aspect_size' in df_keys:
493-
df_odict[worm_id].loc[:, idx[:, 'aspect_size', :]] = \
494-
df_odict[worm_id].loc[:, idx[:, 'aspect_size', :]] \
495-
.astype(float)
513+
df = df_odict[worm_id]
514+
for col in [c for c in df.columns if c[1] == 'aspect_size']:
515+
df[col] = df[col].astype(float)
496516

497517
return sort_odict(df_odict)
498518

src/Python/wcon/wcon_parser.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -385,21 +385,30 @@ def to_canon(self):
385385
for data_key in self.units:
386386
mu = self.units[data_key]
387387

388-
# Don't bother to "convert" units that are already in their
389-
# canonical form.
390-
if mu.unit_string == mu.canonical_unit_string:
391-
continue
392-
393388
tmu = self.units['t']
389+
already_canonical = (mu.unit_string == mu.canonical_unit_string)
394390
for worm_id in w.worm_ids:
395391

396392
try:
397-
# Apply across all worm ids and all aspects
398-
mu_slice = \
399-
w._data[worm_id].loc[:, idx[:, data_key, :]].copy()
400-
401-
w._data[worm_id].loc[:, idx[:, data_key, :]] = \
402-
mu_slice.applymap(mu.to_canon)
393+
df = w._data[worm_id]
394+
target_cols = [c for c in df.columns
395+
if c[1] == data_key]
396+
if not target_cols:
397+
raise KeyError(data_key)
398+
399+
# The parser can leave numeric columns with object
400+
# dtype (e.g. mixed int/str entries from how segments
401+
# are merged). Coerce so downstream arithmetic and
402+
# JSON serialization treat them as numbers, even when
403+
# the unit is already canonical and no conversion is
404+
# otherwise required. Replace each column whole rather
405+
# than via .loc[] assignment, which would preserve the
406+
# parent column's existing (object) dtype.
407+
for col in target_cols:
408+
new_col = pd.to_numeric(df[col], errors='coerce')
409+
if not already_canonical:
410+
new_col = new_col.apply(mu.to_canon)
411+
df[col] = new_col
403412
except KeyError:
404413
# Just ignore cases where there are "units" entries but no
405414
# corresponding data
@@ -822,7 +831,8 @@ def pd_equals(df1, df2):
822831
return False
823832

824833
try:
825-
pd.util.testing.assert_frame_equal(df1, df2)
834+
# pd.util.testing was removed in pandas 2.0; use pd.testing.
835+
pd.testing.assert_frame_equal(df1, df2)
826836
except AssertionError:
827837
return False
828838

0 commit comments

Comments
 (0)