Skip to content

Commit 1e398f6

Browse files
committed
refactoring
1 parent df51c56 commit 1e398f6

9 files changed

Lines changed: 175 additions & 293 deletions

File tree

anndata-test-utils/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ pub fn test_save<B: Backend>() {
3737
)
3838
});
3939
proptest!(ProptestConfig::with_cases(100), |((adata, slice_obs, slice_var) in anndatas)| {
40-
adata.write::<B, _>(&output).unwrap();
40+
adata.write::<B, _>(&output, None).unwrap();
4141
let adata_in = AnnData::<B>::open(B::open(&output).unwrap()).unwrap();
4242
prop_assert!(anndata_eq(&adata, &adata_in).unwrap());
4343
adata_in.close().unwrap();

anndata-test-utils/tests/tests.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ fn test_complex_dataframe() {
1414
with_tmp_dir(|dir| {
1515
let file = dir.join("test.h5");
1616
let adata = AnnData::<H5>::open(H5::open(&input).unwrap()).unwrap();
17-
adata.write::<H5, _>(file).unwrap();
17+
adata.write::<H5, _>(file, None).unwrap();
1818
})
1919
}
2020

anndata/src/anndata.rs

Lines changed: 94 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
mod dataset;
22

33
pub use dataset::{AnnDataSet, StackedAnnData};
4-
use smallvec::SmallVec;
54

65
use crate::{
6+
ArrayElemOp, AxisArraysOp, ElemCollectionOp,
77
backend::{Backend, DataContainer, GroupOp, StoreOp},
88
container::{ArrayElem, Axis, AxisArrays, DataFrameElem, Dim, ElemCollection, Slot},
99
data::*,
1010
traits::AnnDataOp,
1111
};
1212

13-
use anyhow::{anyhow, ensure, Result};
13+
use anyhow::{Result, ensure};
1414
use itertools::Itertools;
1515
use std::path::{Path, PathBuf};
1616

@@ -128,7 +128,11 @@ pub(crate) fn new_varp<B: Backend>(group: B::Group, n_vars: &Dim) -> Result<Axis
128128
}
129129

130130
// Helper function to create new layers of data
131-
pub(crate) fn new_layers<B: Backend>(group: B::Group, n_obs: &Dim, n_vars: &Dim) -> Result<AxisArrays<B>> {
131+
pub(crate) fn new_layers<B: Backend>(
132+
group: B::Group,
133+
n_obs: &Dim,
134+
n_vars: &Dim,
135+
) -> Result<AxisArrays<B>> {
132136
AxisArrays::new(group, Axis::RowColumn, n_obs, Some(n_vars))
133137
}
134138

@@ -249,57 +253,48 @@ impl<B: Backend> AnnData<B> {
249253
}
250254

251255
/// Write the AnnData object to a new file.
252-
pub fn write<O: Backend, P: AsRef<Path>>(&self, filename: P) -> Result<()> {
253-
let file = O::new(filename)?;
254-
let _obs_lock = self.n_obs.lock();
255-
let _vars_lock = self.n_vars.lock();
256-
self.get_x()
257-
.lock()
258-
.as_mut()
259-
.map(|x| x.export::<O, _>(&file, "X"))
260-
.transpose()?;
261-
self.get_obs()
262-
.lock()
263-
.as_mut()
264-
.map(|x| x.export::<O, _>(&file, "obs"))
265-
.transpose()?;
266-
self.get_var()
267-
.lock()
268-
.as_mut()
269-
.map(|x| x.export::<O, _>(&file, "var"))
270-
.transpose()?;
271-
self.obsm()
272-
.lock()
273-
.as_mut()
274-
.map(|x| x.export::<O, _>(&file, "obsm"))
275-
.transpose()?;
276-
self.obsp()
277-
.lock()
278-
.as_mut()
279-
.map(|x| x.export::<O, _>(&file, "obsp"))
280-
.transpose()?;
281-
self.varm()
282-
.lock()
283-
.as_mut()
284-
.map(|x| x.export::<O, _>(&file, "varm"))
285-
.transpose()?;
286-
self.varp()
287-
.lock()
288-
.as_mut()
289-
.map(|x| x.export::<O, _>(&file, "varp"))
290-
.transpose()?;
291-
self.uns()
292-
.lock()
293-
.as_mut()
294-
.map(|x| x.export::<O, _>(&file, "uns"))
295-
.transpose()?;
296-
self.layers()
297-
.lock()
298-
.as_mut()
299-
.map(|x| x.export::<O, _>(&file, "layers"))
300-
.transpose()?;
301-
file.close()?;
302-
Ok(())
256+
///
257+
/// # Arguments
258+
///
259+
/// * `filename` - The path to the output file.
260+
/// * `chunk_size` - If None, writes the entire data matrix at once. Otherwise,
261+
/// writes the data matrix in chunks of the specified size.
262+
/// This can be useful for saving large datasets that do not fit into memory.
263+
pub fn write<O: Backend, P: AsRef<Path>>(
264+
&self,
265+
filename: P,
266+
chunk_size: Option<usize>,
267+
) -> Result<()> {
268+
let adata = AnnData::<O>::new(filename)?;
269+
270+
adata.set_n_obs(self.n_obs())?;
271+
adata.set_n_vars(self.n_vars())?;
272+
273+
if !self.get_obs().is_none() {
274+
adata.set_obs_names(self.obs_names())?;
275+
adata.set_obs(self.read_obs()?)?;
276+
}
277+
if !self.get_var().is_none() {
278+
adata.set_var_names(self.var_names())?;
279+
adata.set_var(self.read_var()?)?;
280+
}
281+
282+
if !self.x().is_none() {
283+
if let Some(chunk_size) = chunk_size {
284+
adata.set_x_from_iter(self.x().iter::<ArrayData>(chunk_size).map(|x| x.0))?;
285+
} else {
286+
adata.set_x(self.x().get::<ArrayData>()?.unwrap())?;
287+
}
288+
}
289+
290+
adata.set_obsm(self.obsm().iter_item::<ArrayData>())?;
291+
adata.set_obsp(self.obsp().iter_item::<ArrayData>())?;
292+
adata.set_varm(self.varm().iter_item::<ArrayData>())?;
293+
adata.set_varp(self.varp().iter_item::<ArrayData>())?;
294+
adata.set_uns(self.uns().iter_item::<Data>())?;
295+
adata.set_layers(self.layers().iter_item::<ArrayData>())?;
296+
297+
adata.close()
303298
}
304299

305300
/// Write a subset of the AnnData object to a new file.
@@ -309,64 +304,51 @@ impl<B: Backend> AnnData<B> {
309304
S: AsRef<[SelectInfoElem]>,
310305
P: AsRef<Path>,
311306
{
312-
selection.as_ref()[0]
313-
.bound_check(self.n_obs())
314-
.map_err(|e| anyhow!("AnnData obs {}", e))?;
315-
selection.as_ref()[1]
316-
.bound_check(self.n_vars())
317-
.map_err(|e| anyhow!("AnnData var {}", e))?;
318-
let slice: SmallVec<[_; 3]> = selection.as_ref().iter().collect();
319-
let file = O::new(filename)?;
320-
let _obs_lock = self.n_obs.lock();
321-
let _vars_lock = self.n_vars.lock();
322-
self.get_x()
323-
.lock()
324-
.as_mut()
325-
.map(|x| x.export_select::<O, _>(slice.as_slice(), &file, "X"))
326-
.transpose()?;
307+
let adata = AnnData::<O>::new(filename)?;
308+
let obs_idx = &selection.as_ref()[0];
309+
let var_idx = &selection.as_ref()[1];
310+
let full = SelectInfoElem::full();
311+
312+
let n_obs = SelectInfoElemBounds::new(&obs_idx, self.n_obs()).len();
313+
let n_vars = SelectInfoElemBounds::new(&var_idx, self.n_vars()).len();
314+
adata.set_n_obs(n_obs)?;
315+
adata.set_n_vars(n_vars)?;
316+
317+
if !self.get_obs().is_none() {
318+
adata.set_obs_names(self.obs_names().select(obs_idx))?;
319+
let obs = Selectable::select_axis(&self.read_obs()?, 0, obs_idx);
320+
adata.set_obs(obs)?;
321+
}
322+
if !self.get_var().is_none() {
323+
adata.set_var_names(self.var_names().select(var_idx))?;
324+
let var = Selectable::select_axis(&self.read_var()?, 0, var_idx);
325+
adata.set_var(var)?;
326+
}
327327

328-
self.get_obs()
329-
.lock()
330-
.as_mut()
331-
.map(|x| x.export_axis(0, slice[0], &file, "obs"))
332-
.transpose()?;
333-
self.get_var()
334-
.lock()
335-
.as_mut()
336-
.map(|x| x.export_axis(0, slice[1], &file, "var"))
337-
.transpose()?;
338-
self.uns()
339-
.lock()
340-
.as_mut()
341-
.map(|x| x.export(&file, "uns"))
342-
.transpose()?;
343-
self.obsm()
344-
.lock()
345-
.as_mut()
346-
.map(|x| x.export_select(&[slice[0]], &file, "obsm"))
347-
.transpose()?;
348-
self.obsp()
349-
.lock()
350-
.as_mut()
351-
.map(|x| x.export_select(&[slice[0]], &file, "obsp"))
352-
.transpose()?;
353-
self.varm()
354-
.lock()
355-
.as_mut()
356-
.map(|x| x.export_select(&[slice[1]], &file, "varm"))
357-
.transpose()?;
358-
self.varp()
359-
.lock()
360-
.as_mut()
361-
.map(|x| x.export_select(&[slice[1]], &file, "varp"))
362-
.transpose()?;
363-
self.layers()
364-
.lock()
365-
.as_mut()
366-
.map(|x| x.export_select(slice.as_slice(), &file, "layers"))
367-
.transpose()?;
368-
file.close()?;
369-
Ok(())
328+
if let Some(x) = self.x().slice::<ArrayData, _>(&selection)? {
329+
adata.set_x(x)?;
330+
}
331+
332+
adata.set_obsm(
333+
self.obsm()
334+
.iter_item_slice::<ArrayData, _>(&[obs_idx.clone(), full.clone()]),
335+
)?;
336+
adata.set_obsp(
337+
self.obsp()
338+
.iter_item_slice::<ArrayData, _>(&[obs_idx.clone(), obs_idx.clone()]),
339+
)?;
340+
adata.set_varm(
341+
self.varm()
342+
.iter_item_slice::<ArrayData, _>(&[var_idx.clone(), full]),
343+
)?;
344+
adata.set_varp(
345+
self.varp()
346+
.iter_item_slice::<ArrayData, _>(&[var_idx.clone(), var_idx.clone()]),
347+
)?;
348+
adata.set_uns(self.uns().iter_item::<Data>())?;
349+
adata.set_layers(self.layers().iter_item_slice::<ArrayData, _>(&selection))?;
350+
351+
adata.close()
370352
}
371353

372354
/// Get the filename of the AnnData file.
@@ -459,4 +441,4 @@ impl<B: Backend> AnnData<B> {
459441

460442
Ok(())
461443
}
462-
}
444+
}

anndata/src/anndata/dataset.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ impl<B: Backend> AnnDataSet<B> {
302302

303303
/// Convert AnnDataSet to AnnData object
304304
pub fn to_adata<O: Backend, P: AsRef<Path>>(&self, out: P, copy_x: bool) -> Result<AnnData<O>> {
305-
self.annotation.write::<O, _>(&out)?;
305+
self.annotation.write::<O, _>(&out, None)?;
306306
let adata = AnnData::open(O::open_rw(&out)?)?;
307307
if copy_x {
308308
adata.set_x_from_iter::<_, ArrayData>(

anndata/src/concat.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@ pub enum JoinType {
2020
Outer,
2121
}
2222

23+
/// Concatenate multiple AnnData objects into one.
24+
///
25+
/// # Arguments
26+
/// - `adatas`: A slice of AnnData objects to concatenate.
27+
/// - `join`: The type of join to perform on the variables (`var`).
28+
/// - `label`: An optional label for the keys column in `obs`.
29+
/// - `keys`: An optional slice of keys to label each AnnData object in `obs`.
30+
/// - `out`: The output AnnData object to store the concatenated result.
2331
pub fn concat<A, O, S>(
2432
adatas: &[A],
2533
join: JoinType,

0 commit comments

Comments
 (0)