Skip to content

Commit ba1345f

Browse files
authored
allow string columns as extra columns (#229)
* allow string columns + check attribute_types * Update __about__.py bump version to `0.1.5`
1 parent b099d39 commit ba1345f

File tree

3 files changed

+95
-1
lines changed

3 files changed

+95
-1
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.1.4"
1+
__version__ = "0.1.5"

python/src/intracktive/_tests/test_convert.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,26 @@ def test_convert_file_with_overwrite_zarr_true(
149149
)
150150

151151

152+
def test_datframe_to_browser_categorical_strings(
153+
tmp_path: Path,
154+
make_sample_data: pd.DataFrame,
155+
) -> None:
156+
df = make_sample_data
157+
df["string_col"] = ["A", "B", "A", "C", "B"]
158+
159+
with patch.object(webbrowser, "open", return_value=True) as mock_browser:
160+
try:
161+
dataframe_to_browser(
162+
df,
163+
tmp_path,
164+
extra_cols=["string_col"],
165+
attribute_types=["categorical"],
166+
)
167+
mock_browser.assert_called_once()
168+
except Exception as e:
169+
pytest.fail(f"Button click failed with error: {e}")
170+
171+
152172
def test_dataframe_to_browser_with_attributes(
153173
tmp_path: Path,
154174
make_sample_data: pd.DataFrame,
@@ -524,3 +544,50 @@ def test_convert_dataframe_to_zarr_with_mixed_inf_nan_values(tmp_path):
524544
print(
525545
"✅ convert_dataframe_to_zarr handles mixed infinite and NaN values correctly!"
526546
)
547+
548+
549+
def test_convert_with_invalid_attribute_type(
550+
tmp_path: Path,
551+
make_sample_data: pd.DataFrame,
552+
) -> None:
553+
"""Test that convert_dataframe_to_zarr raises ValueError for invalid attribute types."""
554+
df = make_sample_data
555+
df["intensity"] = [100.0, 105.0, 110.0, 95.0, 98.0]
556+
557+
new_path = tmp_path / "sample_data_bundle.zarr"
558+
559+
# Test with invalid attribute type
560+
with pytest.raises(
561+
ValueError,
562+
match=r"Invalid attribute type\(s\):.*Valid types are:",
563+
):
564+
convert_dataframe_to_zarr(
565+
df=df,
566+
zarr_path=new_path,
567+
extra_cols=["intensity"],
568+
attribute_types=["invalid_type"],
569+
)
570+
571+
# Test with multiple invalid types
572+
with pytest.raises(
573+
ValueError,
574+
match=r"Invalid attribute type\(s\):.*Valid types are:",
575+
):
576+
convert_dataframe_to_zarr(
577+
df=df,
578+
zarr_path=new_path,
579+
extra_cols=["x", "y"],
580+
attribute_types=["foo", "bar"],
581+
)
582+
583+
# Test with mixed valid and invalid types
584+
with pytest.raises(
585+
ValueError,
586+
match=r"Invalid attribute type\(s\):.*Valid types are:",
587+
):
588+
convert_dataframe_to_zarr(
589+
df=df,
590+
zarr_path=new_path,
591+
extra_cols=["x", "y"],
592+
attribute_types=["continuous", "invalid"],
593+
)

python/src/intracktive/convert.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
REQUIRED_COLUMNS = ["track_id", "t", "z", "y", "x", "parent_track_id"]
2020
INF_SPACE = -9999.9
21+
VALID_ATTRIBUTE_TYPES = ["continuous", "categorical", "hex"]
2122

2223
LOG = logging.getLogger(__name__)
2324
LOG.setLevel(logging.INFO)
@@ -304,6 +305,14 @@ def convert_dataframe_to_zarr(
304305
attribute_types = [get_col_type(df[c]) for c in extra_cols]
305306
LOG.info("column types: %s", attribute_types)
306307

308+
# Validate attribute types
309+
invalid_types = [t for t in attribute_types if t not in VALID_ATTRIBUTE_TYPES]
310+
if invalid_types:
311+
raise ValueError(
312+
f"Invalid attribute type(s): {invalid_types}. "
313+
f"Valid types are: {VALID_ATTRIBUTE_TYPES}"
314+
)
315+
307316
start = time.monotonic()
308317

309318
n_time_points = len(df["t"].unique())
@@ -363,6 +372,22 @@ def convert_dataframe_to_zarr(
363372

364373
points_to_tracks[points_ids, group["track_id"] - 1] = 1
365374

375+
# Encode string categorical columns to integers
376+
string_mappings = {}
377+
for col in extra_cols:
378+
if pd.api.types.is_string_dtype(df[col]) or pd.api.types.is_object_dtype(
379+
df[col]
380+
):
381+
# Check if actually contains strings
382+
if df[col].dropna().apply(lambda x: isinstance(x, str)).any():
383+
LOG.info(f"Encoding string column '{col}' to integers")
384+
# Convert to categorical and get codes
385+
df[col] = df[col].astype("category")
386+
string_mappings[col] = {
387+
i: cat for i, cat in enumerate(df[col].cat.categories)
388+
}
389+
df[col] = df[col].cat.codes.astype(float)
390+
366391
for col in extra_cols:
367392
attribute_array = attribute_array_empty.copy()
368393
for t, group in df.groupby("t"):
@@ -503,6 +528,8 @@ def convert_dataframe_to_zarr(
503528
attributes.attrs["pre_normalized"] = (
504529
True # Always True since normalization is handled here
505530
)
531+
if string_mappings:
532+
attributes.attrs["string_mappings"] = string_mappings
506533

507534
mean = df[["z", "y", "x"]].mean()
508535
extent = (df[["z", "y", "x"]] - mean).abs().max()

0 commit comments

Comments
 (0)