Skip to content

Commit f020db4

Browse files
authored
feat(rust/sedona-raster-gdal): Add parse_outdb_source helper for the GDAL format driver (apache#812)
1 parent 6c7cd96 commit f020db4

2 files changed

Lines changed: 225 additions & 0 deletions

File tree

rust/sedona-raster-gdal/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ mod gdal_common;
3131
// Temporary until https://github.com/apache/sedona-db/issues/804 is resolved.
3232
#[allow(dead_code)]
3333
mod gdal_dataset_provider;
34+
#[cfg(test)]
35+
mod source_uri;
3436

3537
// Re-export main dataset conversion functions
3638
pub use gdal_common::{
Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! GDAL-format-driver-internal parser for out-db raster source URIs.
19+
//!
20+
//! When a band's `outdb_format` dispatches to the GDAL driver, the loader
21+
//! uses this helper to extract a 1-based source band index from `outdb_uri`
22+
//! via the SedonaDB convention `<uri>#band=N`. The convention is private to
23+
//! the GDAL driver — the schema and format-agnostic surfaces (e.g.
24+
//! `RS_BandPath`) treat `outdb_uri` as opaque. Other format drivers handle
25+
//! their own URIs however they like.
26+
27+
use datafusion_common::{error::Result, exec_err};
28+
29+
/// Parse a SedonaDB out-db source URI into the GDAL-side URI and 1-based
30+
/// source band index.
31+
///
32+
/// Behaviour:
33+
///
34+
/// - `<uri>#band=N` where `N` parses as a `u32` in `1..=u32::MAX`: strips
35+
/// the fragment and returns `(<uri>, N)`.
36+
/// - `<uri>#band=...` with a value that is not a positive `u32` (zero,
37+
/// negative, non-numeric, empty, or overflowing `u32`): returns an
38+
/// `Execution` error. The user explicitly asked for a band; we refuse to
39+
/// silently substitute a default.
40+
/// - GDAL-native subdataset URIs (e.g. `HDF5:"x.h5":/var`,
41+
/// `NETCDF:"x.nc":var`, `GTIFF_DIR:1:multi.tif`) and any URI whose
42+
/// fragment is not `band=...`: pass through verbatim with default band
43+
/// index 1.
44+
/// - Plain URIs without any fragment: pass through verbatim with default
45+
/// band index 1.
46+
pub(crate) fn parse_outdb_source(uri: &str) -> Result<(String, u32)> {
47+
// rsplit lets a trailing `#band=N` win over any earlier `#anchor` in the
48+
// URI — useful for users who append the SedonaDB convention to a URI
49+
// that already carries a fragment.
50+
if let Some((prefix, fragment)) = uri.rsplit_once('#') {
51+
if let Some(band_str) = fragment.strip_prefix("band=") {
52+
return match band_str.parse::<u32>() {
53+
Ok(band) if band >= 1 => Ok((prefix.to_string(), band)),
54+
_ => exec_err!(
55+
"Invalid band index in outdb URI fragment '#band={band_str}': expected a positive integer in 1..=u32::MAX"
56+
),
57+
};
58+
}
59+
}
60+
Ok((uri.to_string(), 1))
61+
}
62+
63+
#[cfg(test)]
64+
mod tests {
65+
use super::*;
66+
67+
#[test]
68+
fn no_fragment_defaults_to_band_one() {
69+
assert_eq!(
70+
parse_outdb_source("s3://bucket/file.tif").unwrap(),
71+
("s3://bucket/file.tif".to_string(), 1),
72+
);
73+
}
74+
75+
#[test]
76+
fn band_fragment_extracts_index() {
77+
assert_eq!(
78+
parse_outdb_source("s3://bucket/file.tif#band=42").unwrap(),
79+
("s3://bucket/file.tif".to_string(), 42),
80+
);
81+
}
82+
83+
#[test]
84+
fn band_one_fragment_round_trips() {
85+
assert_eq!(
86+
parse_outdb_source("s3://bucket/file.tif#band=1").unwrap(),
87+
("s3://bucket/file.tif".to_string(), 1),
88+
);
89+
}
90+
91+
#[test]
92+
fn band_max_u32_accepted() {
93+
let max = u32::MAX;
94+
let uri = format!("s3://bucket/file.tif#band={max}");
95+
assert_eq!(
96+
parse_outdb_source(&uri).unwrap(),
97+
("s3://bucket/file.tif".to_string(), max),
98+
);
99+
}
100+
101+
#[test]
102+
fn band_zero_errors() {
103+
let msg = parse_outdb_source("s3://bucket/file.tif#band=0")
104+
.unwrap_err()
105+
.to_string();
106+
assert!(msg.contains("band=0"), "msg was: {msg}");
107+
assert!(msg.contains("positive integer"), "msg was: {msg}");
108+
}
109+
110+
#[test]
111+
fn negative_band_errors() {
112+
let msg = parse_outdb_source("s3://bucket/file.tif#band=-2")
113+
.unwrap_err()
114+
.to_string();
115+
assert!(msg.contains("band=-2"), "msg was: {msg}");
116+
}
117+
118+
#[test]
119+
fn band_overflow_errors() {
120+
// 4294967296 = u32::MAX + 1
121+
let msg = parse_outdb_source("s3://bucket/file.tif#band=4294967296")
122+
.unwrap_err()
123+
.to_string();
124+
assert!(msg.contains("band=4294967296"), "msg was: {msg}");
125+
}
126+
127+
#[test]
128+
fn non_numeric_band_errors() {
129+
let msg = parse_outdb_source("s3://bucket/file.tif#band=abc")
130+
.unwrap_err()
131+
.to_string();
132+
assert!(msg.contains("band=abc"), "msg was: {msg}");
133+
}
134+
135+
#[test]
136+
fn empty_band_value_errors() {
137+
let msg = parse_outdb_source("s3://bucket/file.tif#band=")
138+
.unwrap_err()
139+
.to_string();
140+
assert!(msg.contains("band="), "msg was: {msg}");
141+
}
142+
143+
#[test]
144+
fn non_band_fragment_passes_through() {
145+
let uri = "s3://bucket/file.tif#section";
146+
assert_eq!(parse_outdb_source(uri).unwrap(), (uri.to_string(), 1));
147+
}
148+
149+
#[test]
150+
fn empty_fragment_passes_through() {
151+
let uri = "s3://bucket/file.tif#";
152+
assert_eq!(parse_outdb_source(uri).unwrap(), (uri.to_string(), 1));
153+
}
154+
155+
#[test]
156+
fn url_query_string_preserved_with_band_fragment() {
157+
assert_eq!(
158+
parse_outdb_source("https://example.com/r.tif?token=abc#band=3").unwrap(),
159+
("https://example.com/r.tif?token=abc".to_string(), 3),
160+
);
161+
}
162+
163+
#[test]
164+
fn url_query_string_with_non_band_fragment_passes_through() {
165+
let uri = "https://example.com/r.tif?token=abc#anchor";
166+
assert_eq!(parse_outdb_source(uri).unwrap(), (uri.to_string(), 1));
167+
}
168+
169+
#[test]
170+
fn local_path_with_band_fragment() {
171+
assert_eq!(
172+
parse_outdb_source("/tmp/file.tif#band=5").unwrap(),
173+
("/tmp/file.tif".to_string(), 5),
174+
);
175+
}
176+
177+
#[test]
178+
fn local_path_without_fragment() {
179+
assert_eq!(
180+
parse_outdb_source("/tmp/file.tif").unwrap(),
181+
("/tmp/file.tif".to_string(), 1),
182+
);
183+
}
184+
185+
#[test]
186+
fn gdal_subdataset_hdf5_passthrough() {
187+
let uri = r#"HDF5:"/path/x.h5"://temperature"#;
188+
assert_eq!(parse_outdb_source(uri).unwrap(), (uri.to_string(), 1));
189+
}
190+
191+
#[test]
192+
fn gdal_subdataset_netcdf_passthrough() {
193+
let uri = r#"NETCDF:"/path/file.nc":variable"#;
194+
assert_eq!(parse_outdb_source(uri).unwrap(), (uri.to_string(), 1));
195+
}
196+
197+
#[test]
198+
fn gdal_subdataset_gtiff_dir_passthrough() {
199+
let uri = "GTIFF_DIR:1:/path/multi.tif";
200+
assert_eq!(parse_outdb_source(uri).unwrap(), (uri.to_string(), 1));
201+
}
202+
203+
#[test]
204+
fn gdal_subdataset_with_band_fragment_extracts_band() {
205+
let uri = r#"HDF5:"/path/x.h5":/var#band=3"#;
206+
let (gdal_uri, band) = parse_outdb_source(uri).unwrap();
207+
assert_eq!(gdal_uri, r#"HDF5:"/path/x.h5":/var"#);
208+
assert_eq!(band, 3);
209+
}
210+
211+
#[test]
212+
fn trailing_band_wins_over_earlier_anchor() {
213+
assert_eq!(
214+
parse_outdb_source("https://example.com/r.tif#anchor#band=7").unwrap(),
215+
("https://example.com/r.tif#anchor".to_string(), 7),
216+
);
217+
}
218+
219+
#[test]
220+
fn empty_uri() {
221+
assert_eq!(parse_outdb_source("").unwrap(), (String::new(), 1));
222+
}
223+
}

0 commit comments

Comments
 (0)