Skip to content

Commit a1e42a0

Browse files
committed
add to_list api, some conversion logic
1 parent 67a2066 commit a1e42a0

File tree

3 files changed

+76
-4
lines changed

3 files changed

+76
-4
lines changed

Cargo.lock

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "parquet-py"
3-
version = "0.0.1-alpha"
3+
version = "0.0.2-alpha"
44
edition = "2021"
55

66
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@@ -11,3 +11,4 @@ crate-type = ["cdylib"]
1111
[dependencies]
1212
parquet = { version = "52.0.0", features = ["json"] }
1313
pyo3 = "0.22.0"
14+
serde_json = "1.0.120"

src/lib.rs

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,53 @@
11
use pyo3::prelude::*;
2+
use pyo3::types::{PyDict, PyList, PyBool};
23
use parquet::file::reader::{FileReader, SerializedFileReader};
34
use std::{fs::File, path::Path};
5+
use serde_json::Value;
6+
7+
struct PyValue(Value);
8+
9+
impl ToPyObject for PyValue {
10+
fn to_object(&self, py: Python) -> PyObject {
11+
match value_to_py_object(py, &self.0) {
12+
Ok(obj) => obj,
13+
Err(_) => py.None(), // Fallback to None in case of error, adjust as needed
14+
}
15+
}
16+
}
17+
18+
/// Converts a serde_json `Value` to a PyObject.
19+
fn value_to_py_object(py: Python, value: &Value) -> PyResult<PyObject> {
20+
match value {
21+
Value::Null => Ok(py.None()),
22+
Value::Bool(b) => Ok(PyBool::new_bound(py, *b).into_py(py)), // Adjusted for PyBool
23+
Value::Number(num) => {
24+
if let Some(i) = num.as_i64() {
25+
Ok(i.into_py(py))
26+
} else if let Some(f) = num.as_f64() {
27+
Ok(f.into_py(py))
28+
} else {
29+
Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
30+
"Unsupported number type",
31+
))
32+
}
33+
},
34+
Value::String(s) => Ok(s.into_py(py)),
35+
Value::Array(arr) => {
36+
let py_list = PyList::empty_bound(py);
37+
for item in arr {
38+
py_list.append(value_to_py_object(py, item)?)?;
39+
}
40+
Ok(py_list.into_py(py))
41+
},
42+
Value::Object(obj) => {
43+
let py_dict = PyDict::new_bound(py); // Correct usage of PyDict
44+
for (k, v) in obj {
45+
py_dict.set_item(k, value_to_py_object(py, v)?)?;
46+
}
47+
Ok(py_dict.into_py(py))
48+
},
49+
}
50+
}
451

552
// convert parquet file to json string
653
#[pyfunction]
@@ -21,15 +68,38 @@ fn to_json_str(path: &str) -> PyResult<String> {
2168

2269
// return json string
2370
return Ok(json_str);
71+
} else {
72+
// return ValueError if file not found
73+
Err(PyErr::new::<pyo3::exceptions::PyValueError, _>("Unable to open parquet file"))
2474
}
75+
}
2576

26-
// return ValueError if file not found
27-
Err(PyErr::new::<pyo3::exceptions::PyValueError, _>("Unable to open parquet file"))
77+
#[pyfunction]
78+
fn to_list(path: &str, py: Python) -> PyResult<PyObject> {
79+
let file_path = Path::new(path);
80+
if let Ok(file) = File::open(&file_path) {
81+
let reader = SerializedFileReader::new(file).unwrap();
82+
83+
let list = PyList::empty_bound(py);
84+
for row in reader.get_row_iter(None).unwrap() {
85+
let row_dict = row.unwrap().to_json_value();
86+
let dict = PyDict::new_bound(py);
87+
for (key, value) in row_dict.as_object().unwrap() {
88+
dict.set_item(key, PyValue(value.clone()))?;
89+
list.append(&dict)?;
90+
}
91+
}
92+
93+
Ok(list.into())
94+
} else {
95+
Err(PyErr::new::<pyo3::exceptions::PyValueError, _>("Unable to open parquet file"))
96+
}
2897
}
2998

3099
// python module
31100
#[pymodule]
32101
fn parq(m: &Bound<'_, PyModule>) -> PyResult<()> {
33102
m.add_function(wrap_pyfunction!(to_json_str, m)?)?;
103+
m.add_function(wrap_pyfunction!(to_list, m)?)?;
34104
Ok(())
35105
}

0 commit comments

Comments
 (0)