Skip to content

Commit 8d1c63f

Browse files
committed
fix 3D array chunking
1 parent 36c6f58 commit 8d1c63f

3 files changed

Lines changed: 39 additions & 10 deletions

File tree

anndata/src/container/collection.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use crate::{
55
data::*,
66
};
77

8-
use anyhow::{Result, bail, ensure};
8+
use anyhow::{Context, Result, bail, ensure};
99
use itertools::Itertools;
1010
use log::warn;
1111
use parking_lot::{Mutex, MutexGuard};
@@ -351,7 +351,10 @@ impl<B: Backend> InnerAxisArrays<B> {
351351
if let Some(elem) = self.get(key) {
352352
elem.clear()?;
353353
}
354-
let elem = ArrayElem::try_from(ArrayChunk::write_by_chunk(data, &self.container, key)?)?;
354+
let elem = ArrayChunk::write_by_chunk(data, &self.container, key).with_context(|| {
355+
format!("failed to write data to AxisArrays with key: '{}'", key)
356+
})?;
357+
let elem = ArrayElem::try_from(elem)?;
355358

356359
let shape = { elem.inner().shape().clone() };
357360
match self.axis {

anndata/src/data/array/chunks.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
use crate::{ArrayElem, Selectable};
21
use crate::backend::{AttributeOp, Backend, BackendData, DataContainer, GroupOp, ScalarType};
32
use crate::data::{ArrayData, array::DynArray, array::utils::ExtendableDataset};
3+
use crate::{ArrayElem, Selectable};
44

55
use super::{CsrNonCanonical, DynCscMatrix, DynCsrMatrix, DynCsrNonCanonical};
66
use anyhow::{Context, Result, bail};
@@ -329,12 +329,13 @@ impl<D: RemoveAxis, T: BackendData> ArrayChunk for Array<T, D> {
329329
G: GroupOp<B>,
330330
{
331331
let mut iter = iter.peekable();
332-
let chunk_size = if let Some(n) = D::NDIM {
333-
vec![1000; n].into()
334-
} else {
335-
let n = iter.peek().unwrap().ndim();
336-
vec![1000; n].into()
337-
};
332+
let chunk_size = iter
333+
.peek()
334+
.unwrap()
335+
.shape()
336+
.iter()
337+
.map(|&x| x.min(1000))
338+
.collect();
338339
let mut data: ExtendableDataset<B, T> =
339340
ExtendableDataset::with_capacity(location, name, chunk_size)?;
340341

anndata/src/data/array/utils.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ impl<B: Backend, T: BackendData> ExtendableDataset<B, T> {
2525
where
2626
G: GroupOp<B>,
2727
{
28-
let block_size = vec![1000; capacity.ndim()].into();
28+
let block_size = alloc_block_size_with_shape(&capacity, 16384);
2929
let dataset = group.new_empty_dataset::<T>(
3030
name,
3131
&capacity,
@@ -584,4 +584,29 @@ pub(crate) fn array_major_minor_index_default<T: Default + Clone>(
584584
) -> Array2<T>
585585
{
586586
array_major_minor_index(major_idx, minor_idx, data, &T::default())
587+
}
588+
589+
pub(crate) fn alloc_block_size_with_shape(shape: &Shape, total: usize) -> Shape {
590+
let mut block_size = vec![0; shape.ndim()];
591+
let mut n = shape.ndim();
592+
593+
let mut bs = get_block_size(n, total);
594+
let mut visit_order: Vec<_> = (0..n).collect();
595+
visit_order.sort_by_key(|&i| shape[i]);
596+
for i in visit_order {
597+
let s = shape[i];
598+
if s < bs {
599+
block_size[i] = s;
600+
n -= 1;
601+
bs = get_block_size(n, total / s);
602+
} else {
603+
block_size[i] = bs;
604+
}
605+
}
606+
607+
block_size.into()
608+
}
609+
610+
fn get_block_size(n: usize, total: usize) -> usize {
611+
(total as f64).powf(1.0 / n as f64).ceil() as usize
587612
}

0 commit comments

Comments
 (0)