Skip to content

Commit 93cd947

Browse files
committed
feat(geodatafusion-python): FlatGeobuf TableProvider
1 parent 61a535b commit 93cd947

7 files changed

Lines changed: 99 additions & 0 deletions

File tree

python/Cargo.lock

Lines changed: 23 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

python/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ geoarrow-cast = { path = "../rust/geoarrow-cast" }
2727
geoarrow-flatgeobuf = { path = "../rust/geoarrow-flatgeobuf" }
2828
geoarrow-schema = { path = "../rust/geoarrow-schema" }
2929
geodatafusion = { path = "../rust/geodatafusion" }
30+
geodatafusion-flatgeobuf = { path = "../rust/geodatafusion-flatgeobuf" }
3031
geoparquet = { path = "../rust/geoparquet" }
3132
geozero = "0.14"
3233
http-range-client = { version = "0.9.0", default-features = false }
@@ -35,6 +36,7 @@ numpy = "0.25"
3536
object_store = "0.12"
3637
parquet = "56"
3738
pyo3 = { version = "0.25", features = ["hashbrown", "serde", "anyhow"] }
39+
pyo3-async-runtimes = { version = "0.25", features = ["tokio-runtime"] }
3840
# https://github.com/kylebarron/arro3/pull/354
3941
pyo3-arrow = { git = "https://github.com/kylebarron/arro3", rev = "fda03cebe9cfbf8ad292f8070ea21c22b32af1a3" }
4042
pyo3-geoarrow = { path = "../rust/pyo3-geoarrow" }

python/geodatafusion/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,7 @@ crate-type = ["cdylib"]
2323
datafusion = { workspace = true }
2424
datafusion-ffi = { workspace = true }
2525
geodatafusion = { workspace = true }
26+
geodatafusion-flatgeobuf = { workspace = true }
2627
pyo3 = { workspace = true }
28+
pyo3-async-runtimes = { workspace = true }
2729
pyo3-geoarrow = { workspace = true, features = ["geozero"] }

python/geodatafusion/src/lib.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#![cfg_attr(not(test), warn(unused_crate_dependencies))]
22

33
pub(crate) mod constants;
4+
mod table_provider;
45
mod udf;
56
mod utils;
67

@@ -55,5 +56,10 @@ fn _rust(py: Python, m: &Bound<PyModule>) -> PyResult<()> {
5556
.getattr(intern!(py, "modules"))?
5657
.set_item("geodatafusion.geo", geo_mod)?;
5758

59+
m.add_function(wrap_pyfunction!(
60+
table_provider::flatgeobuf::new_flatgeobuf,
61+
m
62+
)?)?;
63+
5864
Ok(())
5965
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
use datafusion::catalog::TableProvider;
2+
use datafusion::datasource::listing::{
3+
ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
4+
};
5+
use datafusion::prelude::SessionContext;
6+
use datafusion_ffi::table_provider::FFI_TableProvider;
7+
use geodatafusion_flatgeobuf::FlatGeobufFormat;
8+
use pyo3::prelude::*;
9+
use pyo3::types::PyCapsule;
10+
use pyo3::{Bound, PyResult, Python, pyclass, pymethods};
11+
use pyo3_async_runtimes::tokio::get_runtime;
12+
use std::sync::Arc;
13+
14+
#[pyfunction]
15+
pub(crate) fn new_flatgeobuf(path: &str) -> PyFlatGeobufTableProvider {
16+
let format = Arc::new(FlatGeobufFormat::default());
17+
18+
let options = ListingOptions::new(format).with_file_extension(".fgb");
19+
20+
let table_path = ListingTableUrl::parse(path).unwrap();
21+
22+
let state = SessionContext::new().state();
23+
let runtime = get_runtime();
24+
let inferred_schema =
25+
runtime.block_on(async { options.infer_schema(&state, &table_path).await.unwrap() });
26+
27+
let config = ListingTableConfig::new(table_path)
28+
.with_listing_options(options)
29+
.with_schema(inferred_schema);
30+
31+
let table = ListingTable::try_new(config).unwrap();
32+
PyFlatGeobufTableProvider(Arc::new(table))
33+
}
34+
35+
#[pyclass(module = "geodatafusion", name = "FlatGeobufTableProvider", frozen)]
36+
pub(crate) struct PyFlatGeobufTableProvider(Arc<dyn TableProvider + Send>);
37+
38+
#[pymethods]
39+
impl PyFlatGeobufTableProvider {
40+
pub fn __datafusion_table_provider__<'py>(
41+
&self,
42+
py: Python<'py>,
43+
) -> PyResult<Bound<'py, PyCapsule>> {
44+
let name = cr"datafusion_table_provider".into();
45+
46+
let provider = FFI_TableProvider::new(self.0.clone(), false, None);
47+
48+
PyCapsule::new(py, provider, Some(name))
49+
}
50+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pub(crate) mod flatgeobuf;

python/tests/geodatafusion/__init__.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,18 @@ def test_simple():
1010
sql = "SELECT ST_AsText(ST_GeomFromText('POINT(1 2)'));"
1111
df = ctx.sql(sql)
1212
assert df.to_arrow_table().columns[0][0].as_py() == "POINT(1 2)"
13+
14+
15+
def test_flatgeobuf():
16+
from geodatafusion import new_flatgeobuf
17+
18+
path = "/Users/kyle/Downloads/countries.fgb"
19+
test = new_flatgeobuf(path)
20+
21+
ctx = SessionContext()
22+
register_all(ctx)
23+
ctx.register_table_provider("countries", test)
24+
25+
sql = "SELECT * FROM countries;"
26+
df = ctx.sql(sql)
27+
df.show()

0 commit comments

Comments
 (0)