Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/src/intracktive/__about__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.4"
__version__ = "0.1.5"
67 changes: 67 additions & 0 deletions python/src/intracktive/_tests/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,26 @@ def test_convert_file_with_overwrite_zarr_true(
)


def test_datframe_to_browser_categorical_strings(
tmp_path: Path,
make_sample_data: pd.DataFrame,
) -> None:
df = make_sample_data
df["string_col"] = ["A", "B", "A", "C", "B"]

with patch.object(webbrowser, "open", return_value=True) as mock_browser:
try:
dataframe_to_browser(
df,
tmp_path,
extra_cols=["string_col"],
attribute_types=["categorical"],
)
mock_browser.assert_called_once()
except Exception as e:
pytest.fail(f"Button click failed with error: {e}")


def test_dataframe_to_browser_with_attributes(
tmp_path: Path,
make_sample_data: pd.DataFrame,
Expand Down Expand Up @@ -524,3 +544,50 @@ def test_convert_dataframe_to_zarr_with_mixed_inf_nan_values(tmp_path):
print(
"✅ convert_dataframe_to_zarr handles mixed infinite and NaN values correctly!"
)


def test_convert_with_invalid_attribute_type(
tmp_path: Path,
make_sample_data: pd.DataFrame,
) -> None:
"""Test that convert_dataframe_to_zarr raises ValueError for invalid attribute types."""
df = make_sample_data
df["intensity"] = [100.0, 105.0, 110.0, 95.0, 98.0]

new_path = tmp_path / "sample_data_bundle.zarr"

# Test with invalid attribute type
with pytest.raises(
ValueError,
match=r"Invalid attribute type\(s\):.*Valid types are:",
):
convert_dataframe_to_zarr(
df=df,
zarr_path=new_path,
extra_cols=["intensity"],
attribute_types=["invalid_type"],
)

# Test with multiple invalid types
with pytest.raises(
ValueError,
match=r"Invalid attribute type\(s\):.*Valid types are:",
):
convert_dataframe_to_zarr(
df=df,
zarr_path=new_path,
extra_cols=["x", "y"],
attribute_types=["foo", "bar"],
)

# Test with mixed valid and invalid types
with pytest.raises(
ValueError,
match=r"Invalid attribute type\(s\):.*Valid types are:",
):
convert_dataframe_to_zarr(
df=df,
zarr_path=new_path,
extra_cols=["x", "y"],
attribute_types=["continuous", "invalid"],
)
27 changes: 27 additions & 0 deletions python/src/intracktive/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

REQUIRED_COLUMNS = ["track_id", "t", "z", "y", "x", "parent_track_id"]
INF_SPACE = -9999.9
VALID_ATTRIBUTE_TYPES = ["continuous", "categorical", "hex"]

LOG = logging.getLogger(__name__)
LOG.setLevel(logging.INFO)
Expand Down Expand Up @@ -304,6 +305,14 @@ def convert_dataframe_to_zarr(
attribute_types = [get_col_type(df[c]) for c in extra_cols]
LOG.info("column types: %s", attribute_types)

# Validate attribute types
invalid_types = [t for t in attribute_types if t not in VALID_ATTRIBUTE_TYPES]
if invalid_types:
raise ValueError(
f"Invalid attribute type(s): {invalid_types}. "
f"Valid types are: {VALID_ATTRIBUTE_TYPES}"
)

start = time.monotonic()

n_time_points = len(df["t"].unique())
Expand Down Expand Up @@ -363,6 +372,22 @@ def convert_dataframe_to_zarr(

points_to_tracks[points_ids, group["track_id"] - 1] = 1

# Encode string categorical columns to integers
string_mappings = {}
for col in extra_cols:
if pd.api.types.is_string_dtype(df[col]) or pd.api.types.is_object_dtype(
df[col]
):
# Check if actually contains strings
if df[col].dropna().apply(lambda x: isinstance(x, str)).any():
LOG.info(f"Encoding string column '{col}' to integers")
# Convert to categorical and get codes
df[col] = df[col].astype("category")
string_mappings[col] = {
i: cat for i, cat in enumerate(df[col].cat.categories)
}
df[col] = df[col].cat.codes.astype(float)

for col in extra_cols:
attribute_array = attribute_array_empty.copy()
for t, group in df.groupby("t"):
Expand Down Expand Up @@ -503,6 +528,8 @@ def convert_dataframe_to_zarr(
attributes.attrs["pre_normalized"] = (
True # Always True since normalization is handled here
)
if string_mappings:
attributes.attrs["string_mappings"] = string_mappings

mean = df[["z", "y", "x"]].mean()
extent = (df[["z", "y", "x"]] - mean).abs().max()
Expand Down
Loading