Skip to content

Commit 22e4833

Browse files
authored
Merge pull request #1 from zachspar/feature/iter
Feature/iter WIP
2 parents a1e42a0 + cd01bb1 commit 22e4833

File tree

5 files changed

+68
-2
lines changed

5 files changed

+68
-2
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ __pycache__/
66
.idea/
77

88
*.parquet
9+
*.parq
910

1011
# C extensions
1112
*.so

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "parquet-py"
3-
version = "0.0.2-alpha"
3+
version = "0.0.3-alpha"
44
edition = "2021"
55

66
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

default.nix

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{ pkgs ? import <nixpkgs> {} }:
2+
3+
let
4+
createVenv = ''
5+
if [ ! -d env ]; then
6+
python3 -m venv env
7+
source env/bin/activate
8+
pip install -U pip
9+
pip install 'maturin[patchelf]'
10+
else
11+
source env/bin/activate
12+
fi
13+
'';
14+
in
15+
16+
pkgs.mkShell {
17+
nativeBuildInputs = with pkgs.buildPackages; [ python312 ];
18+
shellHook = createVenv;
19+
}

src/lib.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ use pyo3::prelude::*;
22
use pyo3::types::{PyDict, PyList, PyBool};
33
use parquet::file::reader::{FileReader, SerializedFileReader};
44
use std::{fs::File, path::Path};
5+
use parquet::record::Row;
6+
use pyo3::exceptions::PyStopIteration;
57
use serde_json::Value;
68

79
struct PyValue(Value);
@@ -74,6 +76,48 @@ fn to_json_str(path: &str) -> PyResult<String> {
7476
}
7577
}
7678

79+
#[pyclass]
80+
struct ParquetRowIterator {
81+
// FIXME: This should be a RowIter instead
82+
iter: std::vec::IntoIter<Row>
83+
}
84+
85+
#[pymethods]
86+
impl ParquetRowIterator {
87+
#[new]
88+
fn new(path: &str) -> PyResult<Self> {
89+
let file_path = Path::new(path);
90+
let file = File::open(&file_path).map_err(|e| pyo3::exceptions::PyIOError::new_err(e.to_string()))?;
91+
let reader = SerializedFileReader::new(file).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?;
92+
93+
// FIXME: This should be a RowIter instead of rendering vec and then providing an iterator!!!
94+
Ok(Self { iter: reader.get_row_iter(None).unwrap().map(|r| r.unwrap()).collect::<Vec<_>>().into_iter()})
95+
}
96+
97+
fn __iter__(slf: PyRef<Self>) -> PyRef<Self> {
98+
slf
99+
}
100+
101+
fn __next__(mut slf: PyRefMut<Self>) -> PyResult<PyObject> {
102+
let row = slf.iter.next().ok_or_else(|| PyErr::new::<PyStopIteration, _>("End of iterator"))?;
103+
let row_dict = row.to_json_value();
104+
let dict = PyDict::new_bound(slf.py());
105+
for (key, value) in row_dict.as_object().unwrap() {
106+
dict.set_item(key, PyValue(value.clone()))?;
107+
}
108+
Ok(dict.into())
109+
}
110+
}
111+
112+
#[pyfunction]
113+
fn to_iter(path: &str) -> PyResult<ParquetRowIterator> {
114+
let file_path = Path::new(path);
115+
let file = File::open(&file_path).map_err(|e| pyo3::exceptions::PyIOError::new_err(e.to_string()))?;
116+
let reader = SerializedFileReader::new(file).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?;
117+
118+
Ok(ParquetRowIterator { iter: reader.get_row_iter(None).unwrap().map(|r| r.unwrap()).collect::<Vec<_>>().into_iter()})
119+
}
120+
77121
#[pyfunction]
78122
fn to_list(path: &str, py: Python) -> PyResult<PyObject> {
79123
let file_path = Path::new(path);
@@ -101,5 +145,7 @@ fn to_list(path: &str, py: Python) -> PyResult<PyObject> {
101145
fn parq(m: &Bound<'_, PyModule>) -> PyResult<()> {
102146
m.add_function(wrap_pyfunction!(to_json_str, m)?)?;
103147
m.add_function(wrap_pyfunction!(to_list, m)?)?;
148+
m.add_function(wrap_pyfunction!(to_iter, m)?)?;
149+
m.add_class::<ParquetRowIterator>()?;
104150
Ok(())
105151
}

0 commit comments

Comments
 (0)