Skip to content

Commit a636f01

Browse files
authored
feat(rust/sedona-raster-gdal): add RS_MetaData (#833)
## Summary - add `RS_MetaData` backed by the GDAL dataset provider - expose raster dimensions, transform, srid, band count, and GDAL block size metadata - keep the change limited to the incremental metadata surface on top of the current GDAL raster work ## Dependency - Depends on #831 (`RS_FromPath`) - This branch currently includes the parent `RS_FromPath` commit because GitHub upstream PRs cannot target a fork-only base branch. - Please review/merge this after #831, or compare this PR commit-by-commit with that dependency in mind. ## Testing - `cargo test -p sedona-raster-gdal` - `cargo clippy -p sedona-raster-gdal -- -D warnings` - `cargo test -p sedona-gdal`
1 parent 4c96963 commit a636f01

7 files changed

Lines changed: 620 additions & 0 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/sql/rs_metadata.qmd

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
---
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
19+
title: RS_MetaData
20+
description: Returns raster metadata as a struct.
21+
kernels:
22+
- returns: struct
23+
args: [raster]
24+
---
25+
26+
## Description
27+
28+
`RS_MetaData()` returns a struct containing:
29+
30+
- `upperLeftX`, `upperLeftY`: origin of the raster geotransform
31+
- `gridWidth`, `gridHeight`: raster dimensions in pixels
32+
- `scaleX`, `scaleY`: pixel scale
33+
- `skewX`, `skewY`: geotransform skew terms
34+
- `srid`: raster SRID if available, otherwise `0`
35+
- `numSampleDimensions`: band count
36+
- `tileWidth`, `tileHeight`: GDAL block size derived tile dimensions
37+
38+
For rasters with no bands, `tileWidth` and `tileHeight` are `0`.
39+
40+
## Examples
41+
42+
```sql
43+
SELECT RS_MetaData(RS_Example());
44+
```
45+
46+
```sql
47+
SELECT meta."gridWidth", meta."gridHeight"
48+
FROM (
49+
SELECT RS_MetaData(
50+
RS_FromPath('https://download.osgeo.org/geotiff/samples/gdal_eg/cea.tif')
51+
) AS meta
52+
) q;
53+
```

rust/sedona-raster-gdal/Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ sedona-expr = { workspace = true }
4343
sedona-functions = { workspace = true }
4444
sedona-gdal = { workspace = true }
4545
sedona-raster = { workspace = true }
46+
sedona-raster-functions = { workspace = true }
4647
sedona-schema = { workspace = true }
4748
tokio = { workspace = true }
4849

@@ -57,3 +58,8 @@ tokio = { workspace = true, features = ["rt-multi-thread", "macros"] }
5758
harness = false
5859
name = "rs_frompath"
5960
path = "benches/rs_frompath.rs"
61+
62+
[[bench]]
63+
harness = false
64+
name = "rs_metadata"
65+
path = "benches/rs_metadata.rs"
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Benchmarks for RS_MetaData UDF.
19+
20+
use std::{hint::black_box, sync::Arc};
21+
22+
use arrow_array::{ArrayRef, StringArray};
23+
use arrow_schema::DataType;
24+
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
25+
use datafusion_expr::ScalarUDF;
26+
use sedona_schema::datatypes::{SedonaType, RASTER};
27+
use sedona_testing::{data::test_raster, testers::ScalarUdfTester};
28+
29+
const SMALL_RASTER_FIXTURES: &[&str] = &[
30+
"test1.tiff",
31+
"test2.tif",
32+
"test3.tif",
33+
"test4.tiff",
34+
"test5.tiff",
35+
];
36+
37+
fn raster_path_array(names: &[&str], rows: usize) -> ArrayRef {
38+
assert!(
39+
!names.is_empty(),
40+
"benchmark fixture list must not be empty"
41+
);
42+
43+
let paths = names
44+
.iter()
45+
.map(|name| test_raster(name).unwrap())
46+
.collect::<Vec<_>>();
47+
48+
let values = (0..rows)
49+
.map(|index| paths[index % paths.len()].as_str())
50+
.collect::<Vec<_>>();
51+
52+
Arc::new(StringArray::from(values))
53+
}
54+
55+
fn build_raster_input(names: &[&str], rows: usize) -> ArrayRef {
56+
let frompath_udf: ScalarUDF = sedona_raster_gdal::rs_frompath_udf().into();
57+
let frompath_tester =
58+
ScalarUdfTester::new(frompath_udf, vec![SedonaType::Arrow(DataType::Utf8)]);
59+
frompath_tester
60+
.invoke_array(raster_path_array(names, rows))
61+
.unwrap()
62+
}
63+
64+
fn bench_rs_metadata(c: &mut Criterion) {
65+
let udf: ScalarUDF = sedona_raster_gdal::rs_metadata_udf().into();
66+
let tester = ScalarUdfTester::new(udf, vec![RASTER]);
67+
68+
let single_small = build_raster_input(&["test4.tiff"], 1);
69+
let mixed_small = build_raster_input(SMALL_RASTER_FIXTURES, SMALL_RASTER_FIXTURES.len());
70+
let batched_small = build_raster_input(SMALL_RASTER_FIXTURES, 256);
71+
72+
let mut group = c.benchmark_group("rs_metadata");
73+
74+
group.throughput(Throughput::Elements(single_small.len() as u64));
75+
group.bench_with_input(
76+
BenchmarkId::new("fixtures", "single_small"),
77+
&single_small,
78+
|b, input| b.iter(|| black_box(tester.invoke_array(input.clone()).unwrap())),
79+
);
80+
81+
group.throughput(Throughput::Elements(mixed_small.len() as u64));
82+
group.bench_with_input(
83+
BenchmarkId::new("fixtures", "mixed_small"),
84+
&mixed_small,
85+
|b, input| b.iter(|| black_box(tester.invoke_array(input.clone()).unwrap())),
86+
);
87+
88+
group.throughput(Throughput::Elements(batched_small.len() as u64));
89+
group.bench_with_input(
90+
BenchmarkId::new("fixtures", "batched_small"),
91+
&batched_small,
92+
|b, input| b.iter(|| black_box(tester.invoke_array(input.clone()).unwrap())),
93+
);
94+
95+
group.finish();
96+
}
97+
98+
criterion_group!(benches, bench_rs_metadata);
99+
criterion_main!(benches);

rust/sedona-raster-gdal/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ mod gdal_dataset_provider;
3434

3535
mod raster_loader;
3636
mod rs_frompath;
37+
mod rs_metadata;
3738
mod source_uri;
3839
mod utils;
3940

@@ -44,6 +45,7 @@ pub use gdal_common::{
4445
};
4546
pub use raster_loader::{GdalLoader, GDAL_FORMAT};
4647
pub use rs_frompath::rs_frompath_udf;
48+
pub use rs_metadata::rs_metadata_udf;
4749
pub use utils::{
4850
append_as_indb_raster, append_as_outdb_raster, append_nd_from_dataset, dataset_to_indb_raster,
4951
gdal_dataset_to_nd_raster,

rust/sedona-raster-gdal/src/register.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,6 @@ use sedona_expr::function_set::FunctionSet;
2121
pub fn default_function_set() -> FunctionSet {
2222
let mut function_set = FunctionSet::new();
2323
function_set.insert_scalar_udf(crate::rs_frompath::rs_frompath_udf());
24+
function_set.insert_scalar_udf(crate::rs_metadata::rs_metadata_udf());
2425
function_set
2526
}

0 commit comments

Comments
 (0)