Add private inplace subset method (#160)

eroell · web-flow · commit 780ff26c4d29 · 2025-09-16T19:14:41.000+02:00
* add inplace subset method

* remove omop dependency

* bump version, fix doc nitpick, add changelog
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,23 @@ and this project adheres to [Semantic Versioning][].
 [keep a changelog]: https://keepachangelog.com/en/1.0.0/
 [semantic versioning]: https://semver.org/spec/v2.0.0.html
 
+## [0.0.8]
+
+### Fixed
+- Update duckdb imports for future (#157) @eroell
+
+### Maintenance
+- Private subset method for `EHRData` (#160) @eroell
+- Remove `omop` package dependency (#160) @eroell
+
+## [0.0.7]
+
+### Fixed
+- Fix tests and Getting Started Notebook (#155) @eroell
+
+### Maintenance
+- Update duckdb imports for future (#155) @eroell
+
 ## [0.0.6]
 
 ### Fixed
diff --git a/docs/conf.py b/docs/conf.py
@@ -131,6 +131,7 @@
     ("py:class", "types.EllipsisType"),
     # https://github.com/duckdb/duckdb-web/issues/3806
     ("py:class", "duckdb.duckdb.DuckDBPyConnection"),
+    ("py:class", "_duckdb.DuckDBPyConnection"),
     # Is documented as a py:attribute instead
     ("py:class", "numpy.int64"),
     # For now not in public facing API
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ requires = [ "hatchling" ]
 
 [project]
 name = "ehrdata"
-version = "0.0.8a1"
+version = "0.0.8"
 description = "A Python package for EHR data"
 readme = "README.md"
 license = "Apache-2.0"
@@ -62,8 +62,8 @@ optional-dependencies.ehrapy = [
   "ehrapy",
 ]
 optional-dependencies.lamin = [
-  "lamindb[bionty,jupyter]",
-  "omop",
+  "lamindb",
+  #"omop", # Not needed and causing an issue in the readthedocs build https://github.com/laminlabs/lamindb/issues/3081
   "rich",
 ]
 optional-dependencies.test = [
diff --git a/src/ehrdata/core/ehrdata.py b/src/ehrdata/core/ehrdata.py
@@ -393,6 +393,30 @@ def _unpack_index(self, index: Index) -> tuple[Index1D, Index1D, Index1D]:
             msg = "invalid number of indices"
             raise IndexError(msg)
 
+    def _inplace_subset_obs(self, index: Index1D):
+        """Inplace subsetting along observations dimension.
+
+        Same as `edata = edata[index, :]`, but inplace.
+
+        Args:
+            index: Index to subset along variables dimension.
+        """
+        edata_subset = self[index].copy()
+
+        self._init_as_actual(edata_subset)
+
+    def _inplace_subset_var(self, index: Index1D):
+        """Inplace subsetting along variables dimension.
+
+        Same as `edata = edata[:, index]`, but inplace.
+
+        Args:
+            index: Index to subset along variables dimension.
+        """
+        edata_subset = self[:, index].copy()
+
+        self._init_as_actual(edata_subset)
+
     def copy(self) -> EHRData:
         """Returns a copy of the EHRData object."""
         return EHRData.from_adata(
diff --git a/tests/core/test_core.py b/tests/core/test_core.py
@@ -472,3 +472,58 @@ def test_copy_of_obsvar_names(edata_333, adata_33):
     edata_obsvar_subset = edata_obsvar_subset.copy()
     assert not edata_obsvar_subset.is_view
     _assert_shape_matches(edata_obsvar_subset, (1, 2, 3))
+
+
+def test_inplace_subset_obs(edata_333):
+    edata_333_copy = edata_333.copy()
+
+    # simple subset
+    edata_333._inplace_subset_obs([0, 2])
+
+    _assert_shape_matches(edata_333, (2, 3, 3))
+
+    assert np.allclose(edata_333_copy.X[[0, 2], :], edata_333.X)
+    assert np.allclose(edata_333_copy.R[[0, 2], :, :], edata_333.R)
+    assert pd.DataFrame.equals(edata_333.tem, edata_333_copy.tem)
+
+    # repeated subset
+    edata_333._inplace_subset_obs([1])
+
+    _assert_shape_matches(edata_333, (1, 3, 3))
+    assert np.allclose(edata_333_copy.X[[2], :], edata_333.X)
+    assert np.allclose(edata_333_copy.R[[2], :, :], edata_333.R)
+    assert pd.DataFrame.equals(edata_333.tem, edata_333_copy.tem)
+
+    # mixed subset
+    edata_333._inplace_subset_var([0, 2])
+    _assert_shape_matches(edata_333, (1, 2, 3))
+    assert np.allclose(edata_333_copy.X[[2], [0, 2]], edata_333.X)
+    assert np.allclose(edata_333_copy.R[[2], [0, 2], :], edata_333.R)
+    assert pd.DataFrame.equals(edata_333.tem, edata_333_copy.tem)
+
+
+def test_inplace_subset_var(edata_333):
+    edata_333_copy = edata_333.copy()
+    # simple subset
+    edata_333._inplace_subset_var([0, 2])
+
+    _assert_shape_matches(edata_333, (3, 2, 3))
+
+    assert np.allclose(edata_333_copy.X[:, [0, 2]], edata_333.X)
+    assert np.allclose(edata_333_copy.R[:, [0, 2], :], edata_333.R)
+    assert pd.DataFrame.equals(edata_333.tem, edata_333_copy.tem)
+
+    # repeated subset
+    edata_333._inplace_subset_var([1])
+
+    _assert_shape_matches(edata_333, (3, 1, 3))
+    assert np.allclose(edata_333_copy.X[:, [2]], edata_333.X)
+    assert np.allclose(edata_333_copy.R[:, [2], :], edata_333.R)
+    assert pd.DataFrame.equals(edata_333.tem, edata_333_copy.tem)
+
+    # mixed subset
+    edata_333._inplace_subset_obs([0, 2])
+    _assert_shape_matches(edata_333, (2, 1, 3))
+    assert np.allclose(edata_333_copy.X[[0, 2], [2]].reshape(-1, 1), edata_333.X)
+    assert np.allclose(edata_333_copy.R[[0, 2], [2], :].reshape(-1, 1, 3), edata_333.R)
+    assert pd.DataFrame.equals(edata_333.tem, edata_333_copy.tem)