Skip to content

Commit 780ff26

Browse files
authored
Add private inplace subset method (#160)
* add inplace subset method * remove omop dependency * bump version, fix doc nitpick, add changelog
1 parent ec0f164 commit 780ff26

5 files changed

Lines changed: 100 additions & 3 deletions

File tree

CHANGELOG.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,23 @@ and this project adheres to [Semantic Versioning][].
88
[keep a changelog]: https://keepachangelog.com/en/1.0.0/
99
[semantic versioning]: https://semver.org/spec/v2.0.0.html
1010

11+
## [0.0.8]
12+
13+
### Fixed
14+
- Update duckdb imports for future (#157) @eroell
15+
16+
### Maintenance
17+
- Private subset method for `EHRData` (#160) @eroell
18+
- Remove `omop` package dependency (#160) @eroell
19+
20+
## [0.0.7]
21+
22+
### Fixed
23+
- Fix tests and Getting Started Notebook (#155) @eroell
24+
25+
### Maintenance
26+
- Update duckdb imports for future (#155) @eroell
27+
1128
## [0.0.6]
1229

1330
### Fixed

docs/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@
131131
("py:class", "types.EllipsisType"),
132132
# https://github.com/duckdb/duckdb-web/issues/3806
133133
("py:class", "duckdb.duckdb.DuckDBPyConnection"),
134+
("py:class", "_duckdb.DuckDBPyConnection"),
134135
# Is documented as a py:attribute instead
135136
("py:class", "numpy.int64"),
136137
# For now not in public facing API

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ requires = [ "hatchling" ]
44

55
[project]
66
name = "ehrdata"
7-
version = "0.0.8a1"
7+
version = "0.0.8"
88
description = "A Python package for EHR data"
99
readme = "README.md"
1010
license = "Apache-2.0"
@@ -62,8 +62,8 @@ optional-dependencies.ehrapy = [
6262
"ehrapy",
6363
]
6464
optional-dependencies.lamin = [
65-
"lamindb[bionty,jupyter]",
66-
"omop",
65+
"lamindb",
66+
#"omop", # Not needed and causing an issue in the readthedocs build https://github.com/laminlabs/lamindb/issues/3081
6767
"rich",
6868
]
6969
optional-dependencies.test = [

src/ehrdata/core/ehrdata.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,30 @@ def _unpack_index(self, index: Index) -> tuple[Index1D, Index1D, Index1D]:
393393
msg = "invalid number of indices"
394394
raise IndexError(msg)
395395

396+
def _inplace_subset_obs(self, index: Index1D):
397+
"""Inplace subsetting along observations dimension.
398+
399+
Same as `edata = edata[index, :]`, but inplace.
400+
401+
Args:
402+
index: Index to subset along variables dimension.
403+
"""
404+
edata_subset = self[index].copy()
405+
406+
self._init_as_actual(edata_subset)
407+
408+
def _inplace_subset_var(self, index: Index1D):
409+
"""Inplace subsetting along variables dimension.
410+
411+
Same as `edata = edata[:, index]`, but inplace.
412+
413+
Args:
414+
index: Index to subset along variables dimension.
415+
"""
416+
edata_subset = self[:, index].copy()
417+
418+
self._init_as_actual(edata_subset)
419+
396420
def copy(self) -> EHRData:
397421
"""Returns a copy of the EHRData object."""
398422
return EHRData.from_adata(

tests/core/test_core.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,3 +472,58 @@ def test_copy_of_obsvar_names(edata_333, adata_33):
472472
edata_obsvar_subset = edata_obsvar_subset.copy()
473473
assert not edata_obsvar_subset.is_view
474474
_assert_shape_matches(edata_obsvar_subset, (1, 2, 3))
475+
476+
477+
def test_inplace_subset_obs(edata_333):
478+
edata_333_copy = edata_333.copy()
479+
480+
# simple subset
481+
edata_333._inplace_subset_obs([0, 2])
482+
483+
_assert_shape_matches(edata_333, (2, 3, 3))
484+
485+
assert np.allclose(edata_333_copy.X[[0, 2], :], edata_333.X)
486+
assert np.allclose(edata_333_copy.R[[0, 2], :, :], edata_333.R)
487+
assert pd.DataFrame.equals(edata_333.tem, edata_333_copy.tem)
488+
489+
# repeated subset
490+
edata_333._inplace_subset_obs([1])
491+
492+
_assert_shape_matches(edata_333, (1, 3, 3))
493+
assert np.allclose(edata_333_copy.X[[2], :], edata_333.X)
494+
assert np.allclose(edata_333_copy.R[[2], :, :], edata_333.R)
495+
assert pd.DataFrame.equals(edata_333.tem, edata_333_copy.tem)
496+
497+
# mixed subset
498+
edata_333._inplace_subset_var([0, 2])
499+
_assert_shape_matches(edata_333, (1, 2, 3))
500+
assert np.allclose(edata_333_copy.X[[2], [0, 2]], edata_333.X)
501+
assert np.allclose(edata_333_copy.R[[2], [0, 2], :], edata_333.R)
502+
assert pd.DataFrame.equals(edata_333.tem, edata_333_copy.tem)
503+
504+
505+
def test_inplace_subset_var(edata_333):
506+
edata_333_copy = edata_333.copy()
507+
# simple subset
508+
edata_333._inplace_subset_var([0, 2])
509+
510+
_assert_shape_matches(edata_333, (3, 2, 3))
511+
512+
assert np.allclose(edata_333_copy.X[:, [0, 2]], edata_333.X)
513+
assert np.allclose(edata_333_copy.R[:, [0, 2], :], edata_333.R)
514+
assert pd.DataFrame.equals(edata_333.tem, edata_333_copy.tem)
515+
516+
# repeated subset
517+
edata_333._inplace_subset_var([1])
518+
519+
_assert_shape_matches(edata_333, (3, 1, 3))
520+
assert np.allclose(edata_333_copy.X[:, [2]], edata_333.X)
521+
assert np.allclose(edata_333_copy.R[:, [2], :], edata_333.R)
522+
assert pd.DataFrame.equals(edata_333.tem, edata_333_copy.tem)
523+
524+
# mixed subset
525+
edata_333._inplace_subset_obs([0, 2])
526+
_assert_shape_matches(edata_333, (2, 1, 3))
527+
assert np.allclose(edata_333_copy.X[[0, 2], [2]].reshape(-1, 1), edata_333.X)
528+
assert np.allclose(edata_333_copy.R[[0, 2], [2], :].reshape(-1, 1, 3), edata_333.R)
529+
assert pd.DataFrame.equals(edata_333.tem, edata_333_copy.tem)

0 commit comments

Comments
 (0)