11use pyo3:: prelude:: * ;
2+ use pyo3:: types:: { PyDict , PyList , PyBool } ;
23use parquet:: file:: reader:: { FileReader , SerializedFileReader } ;
34use std:: { fs:: File , path:: Path } ;
5+ use serde_json:: Value ;
6+
7+ struct PyValue ( Value ) ;
8+
9+ impl ToPyObject for PyValue {
10+ fn to_object ( & self , py : Python ) -> PyObject {
11+ match value_to_py_object ( py, & self . 0 ) {
12+ Ok ( obj) => obj,
13+ Err ( _) => py. None ( ) , // Fallback to None in case of error, adjust as needed
14+ }
15+ }
16+ }
17+
18+ /// Converts a serde_json `Value` to a PyObject.
19+ fn value_to_py_object ( py : Python , value : & Value ) -> PyResult < PyObject > {
20+ match value {
21+ Value :: Null => Ok ( py. None ( ) ) ,
22+ Value :: Bool ( b) => Ok ( PyBool :: new_bound ( py, * b) . into_py ( py) ) , // Adjusted for PyBool
23+ Value :: Number ( num) => {
24+ if let Some ( i) = num. as_i64 ( ) {
25+ Ok ( i. into_py ( py) )
26+ } else if let Some ( f) = num. as_f64 ( ) {
27+ Ok ( f. into_py ( py) )
28+ } else {
29+ Err ( PyErr :: new :: < pyo3:: exceptions:: PyValueError , _ > (
30+ "Unsupported number type" ,
31+ ) )
32+ }
33+ } ,
34+ Value :: String ( s) => Ok ( s. into_py ( py) ) ,
35+ Value :: Array ( arr) => {
36+ let py_list = PyList :: empty_bound ( py) ;
37+ for item in arr {
38+ py_list. append ( value_to_py_object ( py, item) ?) ?;
39+ }
40+ Ok ( py_list. into_py ( py) )
41+ } ,
42+ Value :: Object ( obj) => {
43+ let py_dict = PyDict :: new_bound ( py) ; // Correct usage of PyDict
44+ for ( k, v) in obj {
45+ py_dict. set_item ( k, value_to_py_object ( py, v) ?) ?;
46+ }
47+ Ok ( py_dict. into_py ( py) )
48+ } ,
49+ }
50+ }
451
552// convert parquet file to json string
653#[ pyfunction]
@@ -21,15 +68,38 @@ fn to_json_str(path: &str) -> PyResult<String> {
2168
2269 // return json string
2370 return Ok ( json_str) ;
71+ } else {
72+ // return ValueError if file not found
73+ Err ( PyErr :: new :: < pyo3:: exceptions:: PyValueError , _ > ( "Unable to open parquet file" ) )
2474 }
75+ }
2576
26- // return ValueError if file not found
27- Err ( PyErr :: new :: < pyo3:: exceptions:: PyValueError , _ > ( "Unable to open parquet file" ) )
77+ #[ pyfunction]
78+ fn to_list ( path : & str , py : Python ) -> PyResult < PyObject > {
79+ let file_path = Path :: new ( path) ;
80+ if let Ok ( file) = File :: open ( & file_path) {
81+ let reader = SerializedFileReader :: new ( file) . unwrap ( ) ;
82+
83+ let list = PyList :: empty_bound ( py) ;
84+ for row in reader. get_row_iter ( None ) . unwrap ( ) {
85+ let row_dict = row. unwrap ( ) . to_json_value ( ) ;
86+ let dict = PyDict :: new_bound ( py) ;
87+ for ( key, value) in row_dict. as_object ( ) . unwrap ( ) {
88+ dict. set_item ( key, PyValue ( value. clone ( ) ) ) ?;
89+ list. append ( & dict) ?;
90+ }
91+ }
92+
93+ Ok ( list. into ( ) )
94+ } else {
95+ Err ( PyErr :: new :: < pyo3:: exceptions:: PyValueError , _ > ( "Unable to open parquet file" ) )
96+ }
2897}
2998
3099// python module
31100#[ pymodule]
32101fn parq ( m : & Bound < ' _ , PyModule > ) -> PyResult < ( ) > {
33102 m. add_function ( wrap_pyfunction ! ( to_json_str, m) ?) ?;
103+ m. add_function ( wrap_pyfunction ! ( to_list, m) ?) ?;
34104 Ok ( ( ) )
35105}
0 commit comments