Skip to content

Commit 5fc406c

Browse files
committed
cleaning up code
1 parent f482a33 commit 5fc406c

9 files changed

Lines changed: 398 additions & 292 deletions

File tree

src/valor_lite/cache/ephemeral.py

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Any
1+
from typing import Any, Generator
22

33
import numpy as np
44
import pyarrow as pa
@@ -36,15 +36,71 @@ def iterate_tables(
3636
self,
3737
columns: list[str] | None = None,
3838
filter: pc.Expression | None = None,
39-
):
40-
"""Iterate over tables within the cache."""
39+
) -> Generator[pa.Table, None, None]:
40+
"""
41+
Iterate over tables within the cache.
42+
43+
Parameters
44+
----------
45+
columns : list[str], optional
46+
Optionally select columns to be returned.
47+
filter : pyarrow.compute.Expression, optional
48+
Optionally filter table before returning.
49+
50+
Yields
51+
------
52+
pa.Table
53+
"""
4154
table = self._table
4255
if filter is not None:
4356
table = table.filter(filter)
4457
if columns is not None:
4558
table = table.select(columns)
4659
yield table
4760

61+
def iterate_pairs(
62+
self,
63+
columns: list[str] | None = None,
64+
) -> Generator[np.ndarray, None, None]:
65+
"""
66+
Iterate over chunks within the cache returning arrays.
67+
68+
Parameters
69+
----------
70+
columns : list[str], optional
71+
Optionally select columns to be returned.
72+
73+
Yields
74+
------
75+
np.ndarray
76+
"""
77+
for tbl in self.iterate_tables(columns=columns):
78+
yield np.column_stack(
79+
[tbl.column(i).to_numpy() for i in range(tbl.num_columns)]
80+
)
81+
82+
def iterate_pairs_with_table(
83+
self,
84+
columns: list[str] | None = None,
85+
) -> Generator[tuple[pa.Table, np.ndarray], None, None]:
86+
"""
87+
Iterate over chunks within the cache returning both tables and arrays.
88+
89+
Parameters
90+
----------
91+
columns : list[str], optional
92+
Optionally select columns to be returned.
93+
94+
Yields
95+
------
96+
tuple[pa.Table, np.ndarray]
97+
"""
98+
for tbl in self.iterate_tables():
99+
columns = columns if columns else tbl.columns
100+
yield tbl, np.column_stack(
101+
[tbl[col].to_numpy() for col in columns]
102+
)
103+
48104

49105
class MemoryCacheWriter(MemoryCache):
50106
def __init__(

src/valor_lite/cache/persistent.py

Lines changed: 72 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import json
44
import os
55
from pathlib import Path
6-
from typing import Any
6+
from typing import Any, Generator
77

88
import numpy as np
99
import pyarrow as pa
@@ -157,8 +157,21 @@ def iterate_tables(
157157
self,
158158
columns: list[str] | None = None,
159159
filter: pc.Expression | None = None,
160-
):
161-
"""Iterate over tables within the cache."""
160+
) -> Generator[pa.Table, None, None]:
161+
"""
162+
Iterate over tables within the cache.
163+
164+
Parameters
165+
----------
166+
columns : list[str], optional
167+
Optionally select columns to be returned.
168+
filter : pyarrow.compute.Expression, optional
169+
Optionally filter table before returning.
170+
171+
Yields
172+
------
173+
pa.Table
174+
"""
162175
dataset = ds.dataset(
163176
source=self._path,
164177
schema=self._schema,
@@ -167,8 +180,62 @@ def iterate_tables(
167180
for fragment in dataset.get_fragments():
168181
yield fragment.to_table(columns=columns, filter=filter)
169182

170-
def iterate_fragments(self):
171-
"""Iterate over fragments within the file-based cache."""
183+
def iterate_pairs(
184+
self,
185+
columns: list[str] | None = None,
186+
) -> Generator[np.ndarray, None, None]:
187+
"""
188+
Iterate over chunks within the cache returning arrays.
189+
190+
Parameters
191+
----------
192+
columns : list[str], optional
193+
Optionally select columns to be returned.
194+
195+
Yields
196+
------
197+
np.ndarray
198+
"""
199+
for tbl in self.iterate_tables(columns=columns):
200+
yield np.column_stack(
201+
[tbl.column(i).to_numpy() for i in range(tbl.num_columns)]
202+
)
203+
204+
def iterate_pairs_with_table(
205+
self,
206+
columns: list[str] | None = None,
207+
) -> Generator[tuple[pa.Table, np.ndarray], None, None]:
208+
"""
209+
Iterate over chunks within the cache returning both tables and arrays.
210+
211+
Parameters
212+
----------
213+
columns : list[str], optional
214+
Optionally select columns to be returned.
215+
216+
Yields
217+
------
218+
tuple[pa.Table, np.ndarray]
219+
"""
220+
for tbl in self.iterate_tables():
221+
columns = columns if columns else tbl.columns
222+
yield tbl, np.column_stack(
223+
[tbl[col].to_numpy() for col in columns]
224+
)
225+
226+
def iterate_fragments(self) -> Generator[ds.Fragment, None, None]:
227+
"""
228+
Iterate over fragments within the file-based cache.
229+
230+
Parameters
231+
----------
232+
columns : list[str], optional
233+
Optionally select columns to be returned.
234+
235+
Yields
236+
------
237+
tuple[pa.Table, np.ndarray]
238+
"""
172239
dataset = ds.dataset(
173240
source=self._path,
174241
schema=self._schema,

src/valor_lite/object_detection/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from .annotation import Bitmask, BoundingBox, Detection, Polygon
2-
from .evaluator import DataType, Evaluator, Filter
2+
from .evaluator import Evaluator, Filter
33
from .loader import Loader
44
from .metric import Metric, MetricType
5+
from .shared import DataType, EvaluatorInfo
56

67
__all__ = [
78
"Bitmask",
@@ -14,4 +15,5 @@
1415
"Evaluator",
1516
"Filter",
1617
"DataType",
18+
"EvaluatorInfo",
1719
]

0 commit comments

Comments
 (0)