Skip to content

Commit 9b05067

Browse files
authored
fix(sql): preserve quote information during identifier normalization (#18033)
This commit fixes an issue where quoted identifiers lost their quote information during normalization. Added unit tests to verify the fix and prevent regression: - Test SQL to AST to SQL conversion preserves quotes - Test normalize_identifier function preserves quotes - Test multiple normalizations preserve quotes
1 parent 82337da commit 9b05067

File tree

4 files changed

+213
-1
lines changed

4 files changed

+213
-1
lines changed

โ€Žsrc/query/sql/src/planner/semantic/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ mod udf_rewriter;
2525
mod view_rewriter;
2626
mod window_check;
2727

28+
#[cfg(test)]
29+
mod tests;
30+
2831
pub use aggregate_rewriter::AggregateRewriter;
2932
pub use aggregating_index_visitor::AggregatingIndexChecker;
3033
pub use aggregating_index_visitor::AggregatingIndexRewriter;

โ€Žsrc/query/sql/src/planner/semantic/name_resolution.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,13 @@ pub fn normalize_identifier(ident: &Identifier, context: &NameResolutionContext)
8787
{
8888
ident.clone()
8989
} else {
90-
Identifier::from_name(ident.span, ident.name.to_lowercase())
90+
// Preserve the quote information when creating a new identifier
91+
Identifier {
92+
span: ident.span,
93+
name: ident.name.to_lowercase(),
94+
quote: ident.quote,
95+
ident_type: ident.ident_type,
96+
}
9197
}
9298
}
9399

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
// Copyright 2021 Datafuse Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
#[cfg(test)]
15+
mod tests {
16+
17+
use databend_common_ast::ast::Identifier;
18+
use databend_common_ast::parser::parse_sql;
19+
use databend_common_ast::parser::tokenize_sql;
20+
use databend_common_ast::parser::Dialect;
21+
use databend_common_exception::Result;
22+
23+
use crate::planner::semantic::name_resolution::normalize_identifier;
24+
use crate::planner::semantic::name_resolution::NameResolutionContext;
25+
26+
#[test]
27+
fn test_sql_to_ast_to_sql_quoted_identifiers() -> Result<()> {
28+
// Test case 1: Simple WITH clause with quoted identifiers
29+
let sql = r#"WITH "QuotedCTE" AS (SELECT * FROM "QuotedTable" WHERE id = 1) SELECT * FROM "QuotedCTE""#;
30+
31+
// Parse SQL to AST
32+
// let tokens = sql.to_string();
33+
let tokens = tokenize_sql(sql)?;
34+
let (stmt, _) = parse_sql(&tokens, Dialect::PostgreSQL)?;
35+
36+
// Convert AST back to SQL
37+
let regenerated_sql = stmt.to_string();
38+
39+
// The regenerated SQL should still contain the quoted identifiers
40+
assert!(
41+
regenerated_sql.contains(r#""QuotedCTE""#),
42+
"Quoted CTE name lost in regenerated SQL"
43+
);
44+
assert!(
45+
regenerated_sql.contains(r#""QuotedTable""#),
46+
"Quoted table name lost in regenerated SQL"
47+
);
48+
49+
// Test case 2: More complex WITH clause with mixed case identifiers
50+
let sql2 = r#"WITH "MixedCaseCTE" AS (SELECT id, "Value" FROM "MixedCaseTable" WHERE id = 1) SELECT * FROM "MixedCaseCTE""#;
51+
52+
// Parse SQL to AST
53+
// let tokens2 = sql2.to_string();
54+
let tokens2 = tokenize_sql(sql2)?;
55+
let (stmt2, _) = parse_sql(&tokens2, Dialect::PostgreSQL)?;
56+
57+
// Convert AST back to SQL
58+
let regenerated_sql2 = stmt2.to_string();
59+
60+
// The regenerated SQL should still contain the quoted identifiers with correct case
61+
assert!(
62+
regenerated_sql2.contains(r#""MixedCaseCTE""#),
63+
"Quoted CTE name lost in regenerated SQL"
64+
);
65+
assert!(
66+
regenerated_sql2.contains(r#""Value""#),
67+
"Quoted column name lost in regenerated SQL"
68+
);
69+
assert!(
70+
regenerated_sql2.contains(r#""MixedCaseTable""#),
71+
"Quoted table name lost in regenerated SQL"
72+
);
73+
74+
Ok(())
75+
}
76+
77+
#[test]
78+
fn test_normalize_identifier_preserves_quotes() -> Result<()> {
79+
// Create a context where case sensitivity is off for both quoted and unquoted identifiers
80+
let context = NameResolutionContext {
81+
unquoted_ident_case_sensitive: false,
82+
quoted_ident_case_sensitive: false,
83+
deny_column_reference: false,
84+
};
85+
86+
// Test with a quoted identifier
87+
let quoted_ident = Identifier {
88+
span: Default::default(),
89+
name: "MixedCase".to_string(),
90+
quote: Some('"'),
91+
ident_type: Default::default(),
92+
};
93+
94+
// Normalize the identifier
95+
let normalized = normalize_identifier(&quoted_ident, &context);
96+
97+
// Check that the quote information is preserved
98+
assert_eq!(
99+
normalized.quote,
100+
Some('"'),
101+
"Quote information was lost during normalization"
102+
);
103+
assert_eq!(
104+
normalized.name, "mixedcase",
105+
"Name should be lowercase after normalization"
106+
);
107+
108+
// Test with an unquoted identifier
109+
let unquoted_ident = Identifier {
110+
span: Default::default(),
111+
name: "MixedCase".to_string(),
112+
quote: None,
113+
ident_type: Default::default(),
114+
};
115+
116+
// Normalize the identifier
117+
let normalized = normalize_identifier(&unquoted_ident, &context);
118+
119+
// Check that it remains unquoted
120+
assert_eq!(
121+
normalized.quote, None,
122+
"Unquoted identifier should remain unquoted"
123+
);
124+
assert_eq!(
125+
normalized.name, "mixedcase",
126+
"Name should be lowercase after normalization"
127+
);
128+
129+
Ok(())
130+
}
131+
132+
#[test]
133+
fn test_multiple_normalizations() -> Result<()> {
134+
// This test simulates the scenario where an identifier is normalized multiple times
135+
// which could happen during multiple parsing/planning phases
136+
137+
// Create contexts with different case sensitivity settings
138+
let case_insensitive = NameResolutionContext {
139+
unquoted_ident_case_sensitive: false,
140+
quoted_ident_case_sensitive: false,
141+
deny_column_reference: false,
142+
};
143+
144+
let mixed_sensitivity = NameResolutionContext {
145+
unquoted_ident_case_sensitive: false,
146+
quoted_ident_case_sensitive: true,
147+
deny_column_reference: false,
148+
};
149+
150+
// Start with a quoted identifier
151+
let original = Identifier {
152+
span: Default::default(),
153+
name: "MixedCaseIdentifier".to_string(),
154+
quote: Some('"'),
155+
ident_type: Default::default(),
156+
};
157+
158+
// First normalization (case insensitive for all)
159+
let first_norm = normalize_identifier(&original, &case_insensitive);
160+
161+
// Second normalization (case sensitive for quoted)
162+
let second_norm = normalize_identifier(&first_norm, &mixed_sensitivity);
163+
164+
// Third normalization (back to case insensitive)
165+
let third_norm = normalize_identifier(&second_norm, &case_insensitive);
166+
167+
// Check that the quote information is preserved through all normalizations
168+
assert_eq!(
169+
first_norm.quote,
170+
Some('"'),
171+
"Quote lost after first normalization"
172+
);
173+
assert_eq!(
174+
second_norm.quote,
175+
Some('"'),
176+
"Quote lost after second normalization"
177+
);
178+
assert_eq!(
179+
third_norm.quote,
180+
Some('"'),
181+
"Quote lost after third normalization"
182+
);
183+
184+
// The name should be lowercase due to the case insensitive context
185+
assert_eq!(third_norm.name, "mixedcaseidentifier");
186+
187+
Ok(())
188+
}
189+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// Copyright 2021 Datafuse Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
mod identifier_quote_test;

0 commit comments

Comments
ย (0)