Skip to content

Commit 106a362

Browse files
committed
deduplicate reordering of data_mod and attrm
1 parent c1ba851 commit 106a362

2 files changed

Lines changed: 36 additions & 35 deletions

File tree

src/mudata/_core/mudata.py

Lines changed: 8 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@
3535
_maybe_coerce_to_int,
3636
_restore_index,
3737
_update_and_concat,
38-
fix_attrmap_col,
38+
update_fix_attrmap_col,
39+
update_reorder_df_and_attrm_index,
3940
)
4041
from .views import DictView
4142

@@ -645,27 +646,16 @@ def _update_attr(
645646
else:
646647
data_mod = _maybe_coerce_to_bool(pd.concat(dfs, join="outer", axis=0, sort=False))
647648
for mod in self.mod.keys():
648-
fix_attrmap_col(data_mod, mod, rowcol)
649+
update_fix_attrmap_col(data_mod, mod, rowcol)
649650

650651
data_mod = _make_index_unique(data_mod, force=attr_intersecting)
651652
data_global = _make_index_unique(data_global, force=attr_intersecting)
652653
if data_global.shape[1] > 0:
653654
data_mod = data_mod.join(data_global, how="left", sort=False)
654655

655656
if data_global.shape[0] > 0:
656-
# reorder new index to conform to the old index as much as possible
657-
kept_idx = data_global.index[data_global.index.isin(data_mod.index)]
658-
new_idx = data_mod.index[~data_mod.index.isin(data_global.index)]
659-
data_mod = data_mod.loc[kept_idx.append(new_idx), :]
660-
661-
index_order = data_global.index.get_indexer(data_mod.index)
662-
can_update = (
663-
new_idx.shape[0] == 0 # filtered or reordered
664-
or kept_idx.shape[0] == data_global.shape[0] # new rows only
665-
or data_mod.shape[0]
666-
== data_global.shape[
667-
0
668-
] # renamed (since new_idx.shape[0] > 0 and kept_idx.shape[0] < data_global.shape[0])
657+
data_mod, index_order, can_update = update_reorder_df_and_attrm_index(
658+
data_mod, data_global, axis, self.axis
669659
)
670660

671661
data_mod = _restore_index(data_mod)
@@ -684,7 +674,7 @@ def _update_attr(
684674
data_mod.index.set_names(rowcol, inplace=True)
685675
data_global.index.set_names(rowcol, inplace=True)
686676
for mod, amod in self.mod.items():
687-
colname = fix_attrmap_col(data_mod, mod, rowcol)
677+
colname = update_fix_attrmap_col(data_mod, mod, rowcol)
688678
if mod in attrmap:
689679
modmap = attrmap[mod].ravel()
690680
modmask = modmap > 0
@@ -717,24 +707,8 @@ def _update_attr(
717707
data_mod = _make_index_unique(data_mod, force=need_unique)
718708
data_mod = data_mod.join(data_global, how="left", sort=False)
719709

720-
# reorder new index to conform to the old index as much as possible
721-
kept_idx = data_global.index[data_global.index.isin(data_mod.index)]
722-
new_idx = data_mod.index[~data_mod.index.isin(data_global.index)]
723-
data_mod = data_mod.loc[kept_idx.append(new_idx), :]
724-
725-
index_order = data_global.index.get_indexer(data_mod.index)
726-
can_update = (
727-
new_idx.shape[0] == 0 # filtered or reordered
728-
or kept_idx.shape[0] == data_global.shape[0] # new rows only
729-
or data_mod.shape[0]
730-
== data_global.shape[
731-
0
732-
] # renamed (since new_idx.shape[0] > 0 and kept_idx.shape[0] < data_global.shape[0])
733-
or (
734-
axis == self._axis
735-
and axis != -1
736-
and data_mod.shape[0] > data_global.shape[0]
737-
) # new modality added and concacenated
710+
data_mod, index_order, can_update = update_reorder_df_and_attrm_index(
711+
data_mod, data_global, axis, self.axis
738712
)
739713

740714
if need_unique:

src/mudata/_core/utils.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def _maybe_coerce_to_int(df: T) -> T:
159159
return df
160160

161161

162-
def fix_attrmap_col(data_mod: pd.DataFrame, mod: str, rowcol: str) -> str:
162+
def update_fix_attrmap_col(data_mod: pd.DataFrame, mod: str, rowcol: str) -> str:
163163
colname = mod + ":" + rowcol
164164
# use 0 as special value for missing
165165
# we could use a pandas.array, which has missing values support, but then we get an Exception upon hdf5 write
@@ -168,3 +168,30 @@ def fix_attrmap_col(data_mod: pd.DataFrame, mod: str, rowcol: str) -> str:
168168
col.replace(np.nan, 0, inplace=True)
169169
data_mod[colname] = col.astype(np.uint32)
170170
return colname
171+
172+
173+
def update_reorder_df_and_attrm_index(
174+
data_mod: pd.DataFrame,
175+
data_global: pd.DataFrame,
176+
axis: Literal[-1, 0, 1],
177+
mdaxis: Literal[-1, 0, 1],
178+
) -> tuple[pd.DataFrame, np.ndarray[np.intp], bool]:
179+
# reorder new index to conform to the old index as much as possible
180+
kept_idx = data_global.index[data_global.index.isin(data_mod.index)]
181+
new_idx = data_mod.index[~data_mod.index.isin(data_global.index)]
182+
data_mod = data_mod.loc[kept_idx.append(new_idx), :]
183+
184+
index_order = data_global.index.get_indexer(data_mod.index)
185+
can_update = (
186+
new_idx.shape[0] == 0 # filtered or reordered
187+
or kept_idx.shape[0] == data_global.shape[0] # new rows only
188+
or data_mod.shape[0]
189+
== data_global.shape[
190+
0
191+
] # renamed (since new_idx.shape[0] > 0 and kept_idx.shape[0] < data_global.shape[0])
192+
or (
193+
axis == mdaxis and axis != -1 and data_mod.shape[0] > data_global.shape[0]
194+
) # new modality added and concacenated
195+
)
196+
197+
return data_mod, index_order, can_update

0 commit comments

Comments
 (0)