Skip to content

Commit ff4e873

Browse files
tiancaiamaoclaude
andcommitted
planner, privilege: fix CTE masking policy to use original values in WHERE/HAVING/GROUP BY
This commit fixes a critical bug where masking policies in CTEs were incorrectly applied during CTE definition building, causing WHERE/HAVING/GROUP BY/ORDER BY clauses to see masked values instead of original values. Changes: - buildSelect/buildSetOpr: Skip masking when building CTE definitions by checking !b.isCTE && !b.buildingCTE - buildDataSourceFromCTEMerge: Set both b.isCTE and b.buildingCTE flags to ensure CTE context is maintained during inline merge - Preserve OrigTblName and OrigColName for masking policy lookup AT RESULT semantics: Masking is now correctly applied only at final output, not during intermediate operations. Issue: #67341 Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 359263e commit ff4e873

3 files changed

Lines changed: 404 additions & 2 deletions

File tree

pkg/planner/core/logical_plan_builder.go

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2385,8 +2385,9 @@ func (b *PlanBuilder) buildSetOpr(ctx context.Context, setOpr *ast.SetOprStmt) (
23852385

23862386
// Apply masking at the final result stage (AT RESULT semantics).
23872387
// This ensures set operators (UNION/INTERSECT/EXCEPT) use original values.
2388+
// For CTEs, we skip masking here because CTE definitions should preserve original values.
23882389
// For nested set-op operands, masking is deferred to the outermost set-op result.
2389-
if b.buildingSetOprOperand == 0 {
2390+
if b.buildingSetOprOperand == 0 && !b.isCTE && !b.buildingCTE {
23902391
setOprPlan, err = b.buildFinalProjectionWithMasking(ctx, setOprPlan, oldLen)
23912392
if err != nil {
23922393
return nil, err
@@ -4822,8 +4823,11 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p b
48224823

48234824
// Apply masking at the final result stage (AT RESULT semantics).
48244825
// This ensures HAVING, ORDER BY, set operators, etc. all use original values.
4826+
// For CTEs, we skip masking here because:
4827+
// 1. CTE definitions should preserve original values for correct filtering/joining
4828+
// 2. Masking is applied when CTE results are materialized to the final output
48254829
// For set-operator operands, masking is deferred to the outer set-op final stage.
4826-
if b.buildingSetOprOperand == 0 {
4830+
if b.buildingSetOprOperand == 0 && !b.isCTE && !b.buildingCTE {
48274831
p, err = b.buildFinalProjectionWithMasking(ctx, p, oldLen)
48284832
if err != nil {
48294833
return nil, err
@@ -5126,13 +5130,32 @@ func (b *PlanBuilder) computeCTEInlineFlag(cte *cteInfo) {
51265130
}
51275131

51285132
func (b *PlanBuilder) buildDataSourceFromCTEMerge(ctx context.Context, cte *ast.CommonTableExpression) (base.LogicalPlan, error) {
5133+
// Set b.isCTE to true to ensure masking is skipped during CTE definition building
5134+
// This preserves original values in CTE for correct WHERE/HAVING/GROUP BY behavior
5135+
oldIsCTE := b.isCTE
5136+
oldBuildingCTE := b.buildingCTE
5137+
b.isCTE = true
5138+
b.buildingCTE = true
5139+
defer func() {
5140+
b.isCTE = oldIsCTE
5141+
b.buildingCTE = oldBuildingCTE
5142+
}()
5143+
51295144
p, err := b.buildResultSetNode(ctx, cte.Query.Query, true)
51305145
if err != nil {
51315146
return nil, err
51325147
}
51335148
b.handleHelper.popMap()
51345149
outPutNames := p.OutputNames()
51355150
for _, name := range outPutNames {
5151+
// Preserve OrigTblName and OrigColName for masking policy lookup
5152+
// If they are not set, copy from TblName and ColName before overwriting
5153+
if name.OrigTblName.L == "" {
5154+
name.OrigTblName = name.TblName
5155+
}
5156+
if name.OrigColName.L == "" {
5157+
name.OrigColName = name.ColName
5158+
}
51365159
name.TblName = cte.Name
51375160
name.DBName = ast.NewCIStr(b.ctx.GetSessionVars().CurrentDB)
51385161
}
@@ -5142,6 +5165,10 @@ func (b *PlanBuilder) buildDataSourceFromCTEMerge(ctx context.Context, cte *ast.
51425165
return nil, errors.New("CTE columns length is not consistent")
51435166
}
51445167
for i, n := range cte.ColNameList {
5168+
// Preserve original column name before overwriting
5169+
if outPutNames[i].OrigColName.L == "" {
5170+
outPutNames[i].OrigColName = outPutNames[i].ColName
5171+
}
51455172
outPutNames[i].ColName = n
51465173
}
51475174
}
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
# Test basic CTE with masking policy
2+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_src;
3+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_join;
4+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_dst;
5+
CREATE TABLE privilege__column_masking_cte.t_src(id INT PRIMARY KEY, c VARCHAR(20));
6+
CREATE TABLE privilege__column_masking_cte.t_join(id INT PRIMARY KEY, name VARCHAR(20));
7+
CREATE TABLE privilege__column_masking_cte.t_dst(c VARCHAR(20));
8+
INSERT INTO privilege__column_masking_cte.t_src VALUES (1, 'secret'), (2, 'public');
9+
INSERT INTO privilege__column_masking_cte.t_join VALUES (1, 'alice'), (2, 'bob');
10+
CREATE MASKING POLICY p_cte ON privilege__column_masking_cte.t_src(c)
11+
AS MASK_FULL(c, '*') ENABLE;
12+
# Test 1: Basic CTE SELECT - should show masked values
13+
WITH cte AS (SELECT c FROM privilege__column_masking_cte.t_src)
14+
SELECT c FROM cte ORDER BY c;
15+
c
16+
******
17+
******
18+
# Test 2: CTE with COUNT(*) - WHERE clause uses original values
19+
# CTE definitions preserve original values, WHERE uses original 'secret'
20+
WITH cte AS (SELECT c FROM privilege__column_masking_cte.t_src)
21+
SELECT COUNT(*) FROM cte WHERE c = 'secret';
22+
COUNT(*)
23+
1
24+
# Test 3: CTE with JOIN - should show masked values in CTE columns
25+
WITH cte AS (SELECT id, c FROM privilege__column_masking_cte.t_src)
26+
SELECT j.name, cte.c FROM privilege__column_masking_cte.t_join j JOIN cte ON j.id = cte.id ORDER BY j.id;
27+
name c
28+
alice ******
29+
bob ******
30+
# Test 4: Nested CTE - WHERE uses original values
31+
WITH cte1 AS (SELECT c FROM privilege__column_masking_cte.t_src),
32+
cte2 AS (SELECT c FROM cte1)
33+
SELECT COUNT(*) FROM cte2 WHERE c = 'secret';
34+
COUNT(*)
35+
1
36+
# Test 5: CTE with GROUP BY in subquery
37+
WITH cte AS (SELECT c FROM privilege__column_masking_cte.t_src)
38+
SELECT COUNT(*) FROM (SELECT c FROM cte GROUP BY c) g;
39+
COUNT(*)
40+
2
41+
# Test CTE with RESTRICT ON (INSERT_INTO_SELECT)
42+
ALTER TABLE privilege__column_masking_cte.t_src DROP MASKING POLICY p_cte;
43+
CREATE MASKING POLICY p_cte_restrict ON privilege__column_masking_cte.t_src(c)
44+
AS MASK_FULL(c, '*') RESTRICT ON (INSERT_INTO_SELECT) ENABLE;
45+
WITH cte AS (SELECT c FROM privilege__column_masking_cte.t_src)
46+
SELECT c FROM cte ORDER BY c;
47+
c
48+
******
49+
******
50+
INSERT INTO privilege__column_masking_cte.t_dst WITH cte AS (SELECT c FROM privilege__column_masking_cte.t_src) SELECT c FROM cte;
51+
Error 8274 (HY000): Access denied to masked column 'c'. Obtain the required privileges and retry.
52+
# Test that HAVING, ORDER BY use original values while output is masked
53+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_having;
54+
CREATE TABLE privilege__column_masking_cte.t_having(category VARCHAR(50), amount INT);
55+
INSERT INTO privilege__column_masking_cte.t_having VALUES ('A', 100), ('B', 200), ('C', 300);
56+
CREATE MASKING POLICY p_having ON privilege__column_masking_cte.t_having(category)
57+
AS MASK_FULL(category, '*') ENABLE;
58+
# Test: HAVING clause should use original values for comparison
59+
WITH cte AS (SELECT category, amount FROM privilege__column_masking_cte.t_having)
60+
SELECT category, SUM(amount) FROM cte GROUP BY category HAVING category > 'A';
61+
category SUM(amount)
62+
* 300
63+
* 200
64+
# Test: ORDER BY should use original values for sorting
65+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_orderby;
66+
CREATE TABLE privilege__column_masking_cte.t_orderby(id INT, val VARCHAR(20));
67+
INSERT INTO privilege__column_masking_cte.t_orderby VALUES (1, 'apple'), (2, 'banana'), (3, 'cherry');
68+
CREATE MASKING POLICY p_orderby ON privilege__column_masking_cte.t_orderby(val)
69+
AS MASK_FULL(val, '*') ENABLE;
70+
WITH cte AS (SELECT id, val FROM privilege__column_masking_cte.t_orderby)
71+
SELECT id FROM cte ORDER BY val;
72+
id
73+
1
74+
2
75+
3
76+
# Test CTE referenced multiple times - should work consistently
77+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_multi;
78+
CREATE TABLE privilege__column_masking_cte.t_multi(id INT, value VARCHAR(20));
79+
INSERT INTO privilege__column_masking_cte.t_multi VALUES (1, 'data1'), (2, 'data2');
80+
CREATE MASKING POLICY p_multi ON privilege__column_masking_cte.t_multi(value)
81+
AS MASK_FULL(value, '*') ENABLE;
82+
# Reference CTE twice in same query
83+
WITH cte AS (SELECT value FROM privilege__column_masking_cte.t_multi)
84+
SELECT a.value, b.value FROM cte a JOIN cte b ON a.value = b.value ORDER BY a.value;
85+
value value
86+
***** *****
87+
***** *****
88+
# Test CTE with current_role() in masking expression
89+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_role;
90+
DROP USER IF EXISTS cte_role_user;
91+
DROP ROLE IF EXISTS cte_unmask_role;
92+
CREATE TABLE privilege__column_masking_cte.t_role(id INT, data VARCHAR(20));
93+
INSERT INTO privilege__column_masking_cte.t_role VALUES (1, 'sensitive'), (2, 'public');
94+
CREATE USER cte_role_user;
95+
CREATE ROLE cte_unmask_role;
96+
GRANT CREATE, DROP, SELECT, UPDATE, DELETE, INSERT ON privilege__column_masking_cte.* TO cte_role_user;
97+
GRANT cte_unmask_role TO cte_role_user;
98+
CREATE MASKING POLICY p_role_cte ON privilege__column_masking_cte.t_role(data)
99+
AS CASE WHEN current_role() != 'NONE' THEN data ELSE MASK_FULL(data, '*') END ENABLE;
100+
# Test 1: Default role (NONE) - should show masked
101+
WITH cte AS (SELECT id, data FROM privilege__column_masking_cte.t_role)
102+
SELECT data FROM cte ORDER BY id;
103+
data
104+
*********
105+
******
106+
# Test 2: After SET ROLE - should show unmasked
107+
SET ROLE cte_unmask_role;
108+
WITH cte AS (SELECT id, data FROM privilege__column_masking_cte.t_role)
109+
SELECT data FROM cte ORDER BY id;
110+
data
111+
sensitive
112+
public
113+
# Test 3: Back to NONE - should show masked again
114+
SET ROLE NONE;
115+
WITH cte AS (SELECT id, data FROM privilege__column_masking_cte.t_role)
116+
SELECT data FROM cte ORDER BY id;
117+
data
118+
*********
119+
******
120+
# Test CTE with MASK_PARTIAL function
121+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_partial;
122+
CREATE TABLE privilege__column_masking_cte.t_partial(id INT, email VARCHAR(100));
123+
INSERT INTO privilege__column_masking_cte.t_partial
124+
VALUES (1, 'alice@example.com'), (2, 'bob@example.com'), (3, 'charlie@example.com');
125+
CREATE MASKING POLICY p_partial ON privilege__column_masking_cte.t_partial(email)
126+
AS MASK_PARTIAL(email, 1, 2, '*') ENABLE;
127+
# MASK_PARTIAL keeps first 2 characters, masks rest with '*'
128+
WITH cte AS (SELECT id, email FROM privilege__column_masking_cte.t_partial)
129+
SELECT email FROM cte ORDER BY id;
130+
email
131+
a**************om
132+
b************om
133+
c****************om
134+
# ORDER BY should use original email for sorting
135+
WITH cte AS (SELECT id, email FROM privilege__column_masking_cte.t_partial)
136+
SELECT id FROM cte ORDER BY email;
137+
id
138+
1
139+
2
140+
3
141+
# Test CTE with CONCAT masking expression
142+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_concat;
143+
CREATE TABLE privilege__column_masking_cte.t_concat(id INT, val VARCHAR(20));
144+
INSERT INTO privilege__column_masking_cte.t_concat VALUES (1, 'a'), (2, 'b'), (3, 'c');
145+
CREATE MASKING POLICY p_concat ON privilege__column_masking_cte.t_concat(val)
146+
AS CONCAT('***', val) ENABLE;
147+
WITH cte AS (SELECT id, val FROM privilege__column_masking_cte.t_concat)
148+
SELECT val FROM cte ORDER BY id;
149+
val
150+
***a
151+
***b
152+
***c
153+
# ORDER BY should use original val for sorting
154+
WITH cte AS (SELECT id, val FROM privilege__column_masking_cte.t_concat)
155+
SELECT id FROM cte ORDER BY val DESC;
156+
id
157+
3
158+
2
159+
1
160+
# Test that masking works with recursive CTE
161+
# NOTE: Recursive CTE with masking is currently not supported in TiDB
162+
# This test case is disabled until the issue is resolved
163+
DROP USER IF EXISTS cte_role_user;
164+
DROP ROLE IF EXISTS cte_unmask_role;
165+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_src;
166+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_join;
167+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_dst;
168+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_having;
169+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_orderby;
170+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_multi;
171+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_role;
172+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_partial;
173+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_concat;
174+
DROP TABLE IF EXISTS privilege__column_masking_cte.t_tree;

0 commit comments

Comments
 (0)