royerlab · TeunHuijben · Dec 16, 2025 · Dec 16, 2025 · Dec 16, 2025
diff --git a/python/src/intracktive/__about__.py b/python/src/intracktive/__about__.py
@@ -1 +1 @@
-__version__ = "0.1.4"
+__version__ = "0.1.5"
diff --git a/python/src/intracktive/_tests/test_convert.py b/python/src/intracktive/_tests/test_convert.py
@@ -149,6 +149,26 @@ def test_convert_file_with_overwrite_zarr_true(
     )
 
 
+def test_datframe_to_browser_categorical_strings(
+    tmp_path: Path,
+    make_sample_data: pd.DataFrame,
+) -> None:
+    df = make_sample_data
+    df["string_col"] = ["A", "B", "A", "C", "B"]
+
+    with patch.object(webbrowser, "open", return_value=True) as mock_browser:
+        try:
+            dataframe_to_browser(
+                df,
+                tmp_path,
+                extra_cols=["string_col"],
+                attribute_types=["categorical"],
+            )
+            mock_browser.assert_called_once()
+        except Exception as e:
+            pytest.fail(f"Button click failed with error: {e}")
+
+
 def test_dataframe_to_browser_with_attributes(
     tmp_path: Path,
     make_sample_data: pd.DataFrame,
@@ -524,3 +544,50 @@ def test_convert_dataframe_to_zarr_with_mixed_inf_nan_values(tmp_path):
     print(
         "✅ convert_dataframe_to_zarr handles mixed infinite and NaN values correctly!"
     )
+
+
+def test_convert_with_invalid_attribute_type(
+    tmp_path: Path,
+    make_sample_data: pd.DataFrame,
+) -> None:
+    """Test that convert_dataframe_to_zarr raises ValueError for invalid attribute types."""
+    df = make_sample_data
+    df["intensity"] = [100.0, 105.0, 110.0, 95.0, 98.0]
+
+    new_path = tmp_path / "sample_data_bundle.zarr"
+
+    # Test with invalid attribute type
+    with pytest.raises(
+        ValueError,
+        match=r"Invalid attribute type\(s\):.*Valid types are:",
+    ):
+        convert_dataframe_to_zarr(
+            df=df,
+            zarr_path=new_path,
+            extra_cols=["intensity"],
+            attribute_types=["invalid_type"],
+        )
+
+    # Test with multiple invalid types
+    with pytest.raises(
+        ValueError,
+        match=r"Invalid attribute type\(s\):.*Valid types are:",
+    ):
+        convert_dataframe_to_zarr(
+            df=df,
+            zarr_path=new_path,
+            extra_cols=["x", "y"],
+            attribute_types=["foo", "bar"],
+        )
+
+    # Test with mixed valid and invalid types
+    with pytest.raises(
+        ValueError,
+        match=r"Invalid attribute type\(s\):.*Valid types are:",
+    ):
+        convert_dataframe_to_zarr(
+            df=df,
+            zarr_path=new_path,
+            extra_cols=["x", "y"],
+            attribute_types=["continuous", "invalid"],
+        )
diff --git a/python/src/intracktive/convert.py b/python/src/intracktive/convert.py
@@ -18,6 +18,7 @@
 
 REQUIRED_COLUMNS = ["track_id", "t", "z", "y", "x", "parent_track_id"]
 INF_SPACE = -9999.9
+VALID_ATTRIBUTE_TYPES = ["continuous", "categorical", "hex"]
 
 LOG = logging.getLogger(__name__)
 LOG.setLevel(logging.INFO)
@@ -304,6 +305,14 @@ def convert_dataframe_to_zarr(
         attribute_types = [get_col_type(df[c]) for c in extra_cols]
     LOG.info("column types: %s", attribute_types)
 
+    # Validate attribute types
+    invalid_types = [t for t in attribute_types if t not in VALID_ATTRIBUTE_TYPES]
+    if invalid_types:
+        raise ValueError(
+            f"Invalid attribute type(s): {invalid_types}. "
+            f"Valid types are: {VALID_ATTRIBUTE_TYPES}"
+        )
+
     start = time.monotonic()
 
     n_time_points = len(df["t"].unique())
@@ -363,6 +372,22 @@ def convert_dataframe_to_zarr(
 
         points_to_tracks[points_ids, group["track_id"] - 1] = 1
 
+    # Encode string categorical columns to integers
+    string_mappings = {}
+    for col in extra_cols:
+        if pd.api.types.is_string_dtype(df[col]) or pd.api.types.is_object_dtype(
+            df[col]
+        ):
+            # Check if actually contains strings
+            if df[col].dropna().apply(lambda x: isinstance(x, str)).any():
+                LOG.info(f"Encoding string column '{col}' to integers")
+                # Convert to categorical and get codes
+                df[col] = df[col].astype("category")
+                string_mappings[col] = {
+                    i: cat for i, cat in enumerate(df[col].cat.categories)
+                }
+                df[col] = df[col].cat.codes.astype(float)
+
     for col in extra_cols:
         attribute_array = attribute_array_empty.copy()
         for t, group in df.groupby("t"):
@@ -503,6 +528,8 @@ def convert_dataframe_to_zarr(
         attributes.attrs["pre_normalized"] = (
             True  # Always True since normalization is handled here
         )
+        if string_mappings:
+            attributes.attrs["string_mappings"] = string_mappings
 
     mean = df[["z", "y", "x"]].mean()
     extent = (df[["z", "y", "x"]] - mean).abs().max()