Skip to content

Commit 1468dd2

Browse files
authored
Security fixes and improvements (spiceai#9666)
* fix: update copyright year and enhance API key validation logic * style: format code for better readability in load and store modules * Address review comments and fix runtime compile errors * Address remaining PR review comments * Refactor Cayenne IDs to use UUIDv7 strings (spiceai#9667) * Refactor table and partition identifiers to use UUIDv7 strings - Changed `table_id`, `data_file_id`, `delete_file_id`, and `partition_id` fields in various structs from `i64` to `String` to accommodate UUIDv7 format. - Updated related methods and implementations to handle the new string-based identifiers. - Removed `table_uuid` from the `TableMetadata` struct and adjusted SQL schemas accordingly. - Modified tests and catalog operations to work with the new identifier types. (cherry picked from commit a36ebe4) * Fix UUIDv7 ID refactor on trunk * Refactor test_insert_overwrite to use dynamic table_id for directory paths * Enhance metadata catalog to handle concurrent table creation and validate UUIDs in commit compaction * Fix test for oversized total payload in StoreMemoryParams validation
1 parent 06c4c73 commit 1468dd2

14 files changed

Lines changed: 514 additions & 89 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/data_components/src/databricks/sql_warehouse/datatypes.rs

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright 2024-2025 The Spice.ai OSS Authors
2+
Copyright 2024-2026 The Spice.ai OSS Authors
33
44
Licensed under the Apache License, Version 2.0 (the "License");
55
you may not use this file except in compliance with the License.
@@ -19,6 +19,8 @@ use logos::{Lexer, Logos};
1919
use std::collections::HashMap;
2020
use std::sync::Arc;
2121

22+
const MAX_RECURSION_DEPTH: usize = 100;
23+
2224
#[derive(Logos, Debug, PartialEq, Clone)]
2325
#[logos(skip r"[ \t\n\f]+")] // Skip whitespace
2426
pub enum Token<'input> {
@@ -111,7 +113,7 @@ impl<'input> Parser<'input> {
111113
}
112114

113115
pub fn parse(&mut self) -> Result<ArrowDataType, String> {
114-
self.parse_data_type()
116+
self.parse_data_type_with_depth(0)
115117
}
116118

117119
fn parse_decimal(&mut self) -> Result<ArrowDataType, String> {
@@ -145,7 +147,13 @@ impl<'input> Parser<'input> {
145147
})
146148
}
147149

148-
fn parse_data_type(&mut self) -> Result<ArrowDataType, String> {
150+
fn parse_data_type_with_depth(&mut self, depth: usize) -> Result<ArrowDataType, String> {
151+
if depth > MAX_RECURSION_DEPTH {
152+
return Err(format!(
153+
"Maximum schema recursion depth exceeded ({MAX_RECURSION_DEPTH})"
154+
));
155+
}
156+
149157
match self.current.clone() {
150158
Some(Ok(Token::BigInt)) => {
151159
self.advance();
@@ -206,23 +214,23 @@ impl<'input> Parser<'input> {
206214
Some(Ok(Token::Array)) => {
207215
self.advance();
208216
self.expect(&Token::LAngle)?;
209-
let inner_type = self.parse_data_type()?;
217+
let inner_type = self.parse_data_type_with_depth(depth + 1)?;
210218
self.expect(&Token::RAngle)?;
211219
let field = ArrowField::new("item", inner_type, true);
212220
Ok(ArrowDataType::List(Arc::new(field)))
213221
}
214-
Some(Ok(Token::Map)) => self.parse_map(),
215-
Some(Ok(Token::Struct)) => self.parse_struct(),
222+
Some(Ok(Token::Map)) => self.parse_map_with_depth(depth),
223+
Some(Ok(Token::Struct)) => self.parse_struct_with_depth(depth),
216224
_ => Err(format!("Unexpected token: {:?}", self.current)),
217225
}
218226
}
219227

220-
fn parse_map(&mut self) -> Result<ArrowDataType, String> {
228+
fn parse_map_with_depth(&mut self, depth: usize) -> Result<ArrowDataType, String> {
221229
self.advance();
222230
self.expect(&Token::LAngle)?;
223-
let key_type = self.parse_data_type()?;
231+
let key_type = self.parse_data_type_with_depth(depth + 1)?;
224232
self.expect(&Token::Comma)?;
225-
let value_type = self.parse_data_type()?;
233+
let value_type = self.parse_data_type_with_depth(depth + 1)?;
226234
self.expect(&Token::RAngle)?;
227235
let key_field = Arc::new(ArrowField::new("key", key_type, false));
228236
let value_field = Arc::new(ArrowField::new("value", value_type, true));
@@ -234,13 +242,13 @@ impl<'input> Parser<'input> {
234242
Ok(ArrowDataType::Map(entry_struct, false))
235243
}
236244

237-
fn parse_struct(&mut self) -> Result<ArrowDataType, String> {
245+
fn parse_struct_with_depth(&mut self, depth: usize) -> Result<ArrowDataType, String> {
238246
self.advance();
239247
self.expect(&Token::LAngle)?;
240248
let mut fields = Vec::new();
241249
if self.current != Some(Ok(Token::RAngle)) {
242250
loop {
243-
let field = self.parse_field()?;
251+
let field = self.parse_field_with_depth(depth + 1)?;
244252
fields.push(field);
245253
if self.current == Some(Ok(Token::Comma)) {
246254
self.advance();
@@ -283,7 +291,7 @@ impl<'input> Parser<'input> {
283291
)
284292
}
285293

286-
fn parse_field(&mut self) -> Result<ArrowField, String> {
294+
fn parse_field_with_depth(&mut self, depth: usize) -> Result<ArrowField, String> {
287295
let name = match self.current.clone() {
288296
Some(Ok(Token::Identifier(name))) => {
289297
self.advance();
@@ -297,7 +305,7 @@ impl<'input> Parser<'input> {
297305
_ => return Err("Expected identifier for field name".to_string()),
298306
};
299307
self.expect(&Token::Colon)?;
300-
let data_type = self.parse_data_type()?;
308+
let data_type = self.parse_data_type_with_depth(depth)?;
301309
let nullable = if self.current == Some(Ok(Token::Not)) {
302310
self.advance();
303311
self.expect(&Token::Null)?;
@@ -422,6 +430,20 @@ mod tests {
422430
}
423431
}
424432

433+
#[test]
434+
fn test_parse_rejects_excessive_recursion_depth() {
435+
let input = format!(
436+
"{}INT{}",
437+
"ARRAY<".repeat(MAX_RECURSION_DEPTH + 1),
438+
">".repeat(MAX_RECURSION_DEPTH + 1)
439+
);
440+
441+
let mut parser = Parser::new(&input);
442+
let err = parser.parse().expect_err("must reject excessive recursion");
443+
444+
assert!(err.contains("Maximum schema recursion depth exceeded"));
445+
}
446+
425447
#[test]
426448
fn test_struct_reserved_field_names() {
427449
let input = "STRUCT<date: STRING, value: INT>";

crates/runtime-auth/src/api_key/mod.rs

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright 2024-2025 The Spice.ai OSS Authors
2+
Copyright 2024-2026 The Spice.ai OSS Authors
33
44
Licensed under the Apache License, Version 2.0 (the "License");
55
you may not use this file except in compliance with the License.
@@ -28,7 +28,27 @@ pub struct ApiKeyAuth {
2828
impl ApiKeyAuth {
2929
#[must_use]
3030
pub fn new(api_keys: Vec<ApiKey>) -> Self {
31-
Self { api_keys }
31+
let mut filtered_keys = Vec::with_capacity(api_keys.len());
32+
let mut dropped_empty_keys = 0usize;
33+
34+
for api_key in api_keys {
35+
if api_key.is_empty() {
36+
dropped_empty_keys += 1;
37+
} else {
38+
filtered_keys.push(api_key);
39+
}
40+
}
41+
42+
if dropped_empty_keys > 0 {
43+
tracing::warn!(
44+
dropped_empty_keys,
45+
"Ignoring empty API key values to prevent insecure authentication bypass"
46+
);
47+
}
48+
49+
Self {
50+
api_keys: filtered_keys,
51+
}
3252
}
3353
}
3454

@@ -41,6 +61,10 @@ impl HttpAuth for ApiKeyAuth {
4161
.and_then(|value| value.to_str().ok())
4262
.unwrap_or_default();
4363

64+
if api_key.is_empty() {
65+
return Ok(AuthVerdict::Deny);
66+
}
67+
4468
if let Some(api_key) = self.api_keys.iter().find(|key| *key == api_key) {
4569
Ok(AuthVerdict::Allow(Arc::new(api_key.clone())))
4670
} else {
@@ -51,6 +75,10 @@ impl HttpAuth for ApiKeyAuth {
5175

5276
impl FlightBasicAuth for ApiKeyAuth {
5377
fn validate(&self, _username: &str, password: &str) -> Result<String, Error> {
78+
if password.is_empty() {
79+
return Err(Error::InvalidCredentials);
80+
}
81+
5482
if self.api_keys.iter().any(|key| key == password) {
5583
Ok(password.to_string())
5684
} else {
@@ -59,6 +87,10 @@ impl FlightBasicAuth for ApiKeyAuth {
5987
}
6088

6189
fn is_valid(&self, bearer_token: &str) -> Result<AuthVerdict, Error> {
90+
if bearer_token.is_empty() {
91+
return Ok(AuthVerdict::Deny);
92+
}
93+
6294
if let Some(api_key) = self.api_keys.iter().find(|key| *key == bearer_token) {
6395
Ok(AuthVerdict::Allow(Arc::new(api_key.clone())))
6496
} else {
@@ -77,6 +109,10 @@ impl GrpcAuth for ApiKeyAuth {
77109
return Ok(AuthVerdict::Deny);
78110
};
79111

112+
if api_key.is_empty() {
113+
return Ok(AuthVerdict::Deny);
114+
}
115+
80116
if let Some(api_key) = self.api_keys.iter().find(|key| *key == api_key) {
81117
Ok(AuthVerdict::Allow(Arc::new(api_key.clone())))
82118
} else {
@@ -140,4 +176,16 @@ mod tests {
140176
let result = auth.http_verify(&parts);
141177
assert!(matches!(result, Ok(AuthVerdict::Allow(_))));
142178
}
179+
180+
#[test]
181+
fn test_empty_configured_key_is_ignored() {
182+
let auth = ApiKeyAuth::new(vec![ApiKey::parse_str(""), ApiKey::parse_str("valid-key")]);
183+
let empty_key_parts = create_request_parts(Some(""));
184+
let empty_key_result = auth.http_verify(&empty_key_parts);
185+
assert!(matches!(empty_key_result, Ok(AuthVerdict::Deny)));
186+
187+
let valid_key_parts = create_request_parts(Some("valid-key"));
188+
let valid_key_result = auth.http_verify(&valid_key_parts);
189+
assert!(matches!(valid_key_result, Ok(AuthVerdict::Allow(_))));
190+
}
143191
}

crates/runtime-auth/src/layer/flight.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright 2024-2025 The Spice.ai OSS Authors
2+
Copyright 2024-2026 The Spice.ai OSS Authors
33
44
Licensed under the Apache License, Version 2.0 (the "License");
55
you may not use this file except in compliance with the License.

crates/runtime-request-context/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ opentelemetry.workspace = true
1919
percent-encoding.workspace = true
2020
regex.workspace = true
2121
runtime-auth = { path = "../runtime-auth" }
22+
sha2.workspace = true
2223
spicepod = { path = "../spicepod" }
2324
tokio.workspace = true
2425
tracing.workspace = true

crates/runtime-request-context/src/context.rs

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright 2024-2025 The Spice.ai OSS Authors
2+
Copyright 2024-2026 The Spice.ai OSS Authors
33
44
Licensed under the Apache License, Version 2.0 (the "License");
55
you may not use this file except in compliance with the License.
@@ -23,6 +23,7 @@ use http::HeaderMap;
2323
use opentelemetry::KeyValue;
2424
use regex::Regex;
2525
use runtime_auth::{AuthPrincipalRef, AuthRequestContext};
26+
use sha2::{Digest, Sha256};
2627
use spicepod::component::runtime::UserAgentCollection;
2728
use std::sync::atomic::Ordering;
2829
use std::{
@@ -81,6 +82,8 @@ static CLIENT_CACHE_KEY_REGEX: LazyLock<Regex> =
8182
Err(e) => unreachable!("Unable to compile regexp: {}", e),
8283
});
8384

85+
const AUTHORIZATION_SCOPE_FINGERPRINT_BYTES: usize = 16;
86+
8487
#[derive(Copy, Clone)]
8588
pub struct AsyncMarker {
8689
marker: PhantomData<()>,
@@ -244,6 +247,17 @@ impl RequestContext {
244247
self.authorization_header.as_deref()
245248
}
246249

250+
#[must_use]
251+
pub fn scoped_client_supplied_cache_key(&self) -> Option<String> {
252+
self.client_supplied_cache_key.as_deref().map(|cache_key| {
253+
let auth_scope = self
254+
.authorization_header
255+
.as_deref()
256+
.map_or_else(|| "anonymous".to_string(), authorization_scope_fingerprint);
257+
format!("{auth_scope}:{cache_key}")
258+
})
259+
}
260+
247261
pub fn extension<T>(&self) -> Option<T>
248262
where
249263
T: Extension + Clone,
@@ -412,6 +426,12 @@ impl RequestContextBuilder {
412426
self
413427
}
414428

429+
#[must_use]
430+
pub fn with_authorization_header(mut self, authorization_header: Option<String>) -> Self {
431+
self.authorization_header = authorization_header;
432+
self
433+
}
434+
415435
#[must_use]
416436
pub fn baggage_mut(&mut self) -> &mut Vec<KeyValue> {
417437
&mut self.baggage
@@ -520,6 +540,25 @@ impl RequestContextBuilder {
520540
}
521541
}
522542

543+
fn authorization_scope_fingerprint(authorization_header: &str) -> String {
544+
let digest = Sha256::digest(authorization_header.as_bytes());
545+
let mut fingerprint = String::with_capacity(5 + (AUTHORIZATION_SCOPE_FINGERPRINT_BYTES * 2));
546+
fingerprint.push_str("auth:");
547+
548+
for byte in &digest[..AUTHORIZATION_SCOPE_FINGERPRINT_BYTES] {
549+
push_hex_byte(&mut fingerprint, *byte);
550+
}
551+
552+
fingerprint
553+
}
554+
555+
fn push_hex_byte(buf: &mut String, byte: u8) {
556+
const HEX: &[u8; 16] = b"0123456789abcdef";
557+
558+
buf.push(HEX[usize::from(byte >> 4)] as char);
559+
buf.push(HEX[usize::from(byte & 0x0f)] as char);
560+
}
561+
523562
#[cfg(test)]
524563
mod tests {
525564
use http::{HeaderMap, HeaderValue};
@@ -560,4 +599,58 @@ mod tests {
560599
);
561600
assert_eq!(ctx_bad_user_key.client_supplied_cache_key, None);
562601
}
602+
603+
#[test]
604+
fn test_scoped_client_supplied_cache_key_hashes_authorization_header() {
605+
let auth_header_1 = "Bearer alice".to_string();
606+
let auth_header_2 = "Bearer bob".to_string();
607+
608+
let ctx1 = RequestContextBuilder::new(Protocol::Internal)
609+
.with_cache_control(CacheControl::Cache(CacheKeyType::ClientSupplied))
610+
.with_client_supplied_cache_key(Some("shared-key".to_string()))
611+
.with_authorization_header(Some(auth_header_1.clone()))
612+
.build();
613+
614+
let ctx2 = RequestContextBuilder::new(Protocol::Internal)
615+
.with_cache_control(CacheControl::Cache(CacheKeyType::ClientSupplied))
616+
.with_client_supplied_cache_key(Some("shared-key".to_string()))
617+
.with_authorization_header(Some(auth_header_1.clone()))
618+
.build();
619+
620+
let ctx3 = RequestContextBuilder::new(Protocol::Internal)
621+
.with_cache_control(CacheControl::Cache(CacheKeyType::ClientSupplied))
622+
.with_client_supplied_cache_key(Some("shared-key".to_string()))
623+
.with_authorization_header(Some(auth_header_2))
624+
.build();
625+
626+
let key1 = ctx1
627+
.scoped_client_supplied_cache_key()
628+
.expect("scoped key should be present");
629+
let key2 = ctx2
630+
.scoped_client_supplied_cache_key()
631+
.expect("scoped key should be present");
632+
let key3 = ctx3
633+
.scoped_client_supplied_cache_key()
634+
.expect("scoped key should be present");
635+
636+
assert!(key1.ends_with(":shared-key"));
637+
assert!(key1.starts_with("auth:"));
638+
assert_eq!(key1.len(), "auth:".len() + 32 + ":shared-key".len());
639+
assert_eq!(key1, key2);
640+
assert_ne!(key1, key3);
641+
assert!(!key1.contains(&auth_header_1));
642+
}
643+
644+
#[test]
645+
fn test_scoped_client_supplied_cache_key_defaults_to_anonymous() {
646+
let ctx = RequestContextBuilder::new(Protocol::Internal)
647+
.with_cache_control(CacheControl::Cache(CacheKeyType::ClientSupplied))
648+
.with_client_supplied_cache_key(Some("shared-key".to_string()))
649+
.build();
650+
651+
assert_eq!(
652+
ctx.scoped_client_supplied_cache_key().as_deref(),
653+
Some("anonymous:shared-key")
654+
);
655+
}
563656
}

0 commit comments

Comments
 (0)