Skip to content

Commit 6891f2a

Browse files
authored
[receiver/mysql] Tune query plan obfuscation (#46760)
This PR addresses over-obfuscation of MySQL query plans and simplifies the logic for determining whether a query should be EXPLAINed. **Fix over-obfuscation of query plans** — The query plan obfuscator was previously redacting structural plan metadata (access types, key names, table names, etc.) that is needed for query plan analysis. The obfuscation settings have been tuned to target only sensitive values (query text fragments, cost estimates, row estimates, condition expressions) while preserving the structural fields that describe the execution strategy. Support has been added for both MySQL EXPLAIN FORMAT=JSON v1 and v2 output formats. **Simplify explainability detection** — The decision of whether to EXPLAIN a query is now based on the `digestText` (the normalized query template) rather than the raw sample statement, and truncation detection uses a `HasSuffix("...")` check on the sample statement instead of a byte-length comparison against `@@max_digest_length`. This removes a dependency on querying the DB at connection time. #### Link to tracking issue Fixes #46629 #### Testing - Added `TestIsQueryExplainable` covering all supported DML keywords, unsupported statements, case-insensitivity, leading whitespace, and edge cases. - Added `TestBuildExplainStatement` covering plain queries, whitespace trimming, and statements with comments. - Updated `TestScrape` expected output to reflect revised obfuscation settings. - Obfuscation behavior validated against both v1 and v2 MySQL EXPLAIN FORMAT=JSON output via test fixtures. #### Documentation No documentation changes — this fixes existing behavior rather than introducing new configuration options.
1 parent 0da43fb commit 6891f2a

15 files changed

+351
-76
lines changed
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type:
5+
enhancement
6+
# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog)
7+
component:
8+
receiver/mysql
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note:
11+
Add and tune obfuscation of sensitive properties in both V1 and V2 JSON query plans.
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [46629,46587]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext: |
19+
Configure and test obfuscation for V1 and V2 plans, including tests of queries retrieved from the performance schema that are truncated and cannot be obfuscated.
20+
The importance of obfuscation can be very context dependent; sensitive PII, banking, and authorization data may reside in the same database as less sensitive data, and it can be vital to ensure that what is expected to be obfuscated is always obfuscated. Significant additional testing has been added around query plan obfuscation to ensure that this is enforced and to provide assurance and reference to users about what specifically is obfuscated and what is not.
21+
# If your change doesn't affect end users or the exported elements of any package,
22+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
23+
# Optional: The change log or logs in which this entry should be included.
24+
# e.g. '[user]' or '[user, api]'
25+
# Include 'user' if the change is relevant to end users.
26+
# Include 'api' if there is a change to a library API.
27+
# Default: '[user]'
28+
change_logs: [user]

receiver/mysqlreceiver/client.go

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ type client interface {
3232
getReplicaStatusStats() ([]replicaStatusStats, error)
3333
getQuerySamples(uint64) ([]querySample, error)
3434
getTopQueries(uint64, uint64) ([]topQuery, error)
35-
explainQuery(statement, schema string, logger *zap.Logger) string
35+
explainQuery(digestText, sampleStatement, schema, digest string, logger *zap.Logger) string
3636
Close() error
3737
}
3838

@@ -791,30 +791,32 @@ func (c *mySQLClient) getQuerySamples(limit uint64) ([]querySample, error) {
791791
return samples, nil
792792
}
793793

794-
func (c *mySQLClient) explainQuery(statement, schema string, logger *zap.Logger) string {
795-
if strings.HasSuffix(statement, "...") {
796-
logger.Warn("statement is truncated, skipping explain", zap.String("statement", statement))
794+
func (c *mySQLClient) explainQuery(digestText, sampleStatement, schema, digest string, logger *zap.Logger) string {
795+
if strings.HasSuffix(sampleStatement, "...") {
796+
logger.Debug("statement is truncated, skipping explain", zap.String("digest_text", digestText))
797797
return ""
798798
}
799-
800-
if !isQueryExplainable(statement) {
801-
logger.Debug("statement is not explainable, skipping explain query", zap.String("statement", statement))
799+
if !isQueryExplainable(digestText) {
800+
logger.Debug("statement is not explainable, skipping explain query", zap.String("digest", digest))
802801
return ""
803802
}
804803

805804
if schema != "" {
806-
if _, err := c.client.Exec(fmt.Sprintf("/* otel-collector-ignore */ USE %s;", schema)); err != nil {
807-
logger.Error(fmt.Sprintf("unable to use schema: %s", schema), zap.Error(err))
805+
if _, err := c.client.Exec(fmt.Sprintf("/* otel-collector-ignore */ USE `%s`;", strings.ReplaceAll(schema, "`", "``"))); err != nil {
806+
logger.Warn(fmt.Sprintf("unable to use schema: %s", schema), zap.String("digest", digest), zap.Error(err))
808807
return ""
809808
}
810809
}
811810

812811
var plan string
813-
err := c.client.QueryRow(fmt.Sprintf("EXPLAIN FORMAT=json %s", statement)).Scan(&plan)
812+
err := c.client.QueryRow("EXPLAIN FORMAT=json " + strings.TrimSpace(sampleStatement)).Scan(&plan)
814813
if err != nil {
815-
logger.Error("unable to execute explain statement", zap.Error(err))
814+
logger.Warn("unable to execute explain statement", zap.String("digest", digest), zap.Error(err))
816815
return ""
817816
}
817+
if plan == "" {
818+
logger.Warn("explain query returned empty plan", zap.String("digest", digest))
819+
}
818820
return plan
819821
}
820822

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package mysqlreceiver
5+
6+
import (
7+
"testing"
8+
9+
"github.com/stretchr/testify/assert"
10+
"go.uber.org/zap"
11+
)
12+
13+
func TestIsQueryExplainable(t *testing.T) {
14+
tests := []struct {
15+
name string
16+
input string
17+
expected bool
18+
}{
19+
// Supported keywords — plain queries
20+
{
21+
name: "select is explainable",
22+
input: "SELECT * FROM t",
23+
expected: true,
24+
},
25+
{
26+
name: "delete is explainable",
27+
input: "DELETE FROM t WHERE id = 1",
28+
expected: true,
29+
},
30+
{
31+
name: "insert is explainable",
32+
input: "INSERT INTO t VALUES (1)",
33+
expected: true,
34+
},
35+
{
36+
name: "replace is explainable",
37+
input: "REPLACE INTO t VALUES (1)",
38+
expected: true,
39+
},
40+
{
41+
name: "update is explainable",
42+
input: "UPDATE t SET col = 1",
43+
expected: true,
44+
},
45+
// Case-insensitive matching
46+
{
47+
name: "mixed-case SELECT is explainable",
48+
input: "Select * FROM t",
49+
expected: true,
50+
},
51+
// Leading whitespace
52+
{
53+
name: "leading whitespace before SELECT is explainable",
54+
input: " SELECT * FROM t",
55+
expected: true,
56+
},
57+
// Unsupported statements
58+
{
59+
name: "show is not explainable",
60+
input: "SHOW TABLES",
61+
expected: false,
62+
},
63+
{
64+
name: "create is not explainable",
65+
input: "CREATE TABLE t (id INT)",
66+
expected: false,
67+
},
68+
{
69+
name: "drop is not explainable",
70+
input: "DROP TABLE t",
71+
expected: false,
72+
},
73+
{
74+
name: "empty string is not explainable",
75+
input: "",
76+
expected: false,
77+
},
78+
// Truncated statements (handled upstream, but isQueryExplainable itself
79+
// should not crash; the trailing "..." doesn’t match any keyword)
80+
{
81+
name: "truncated statement that starts with SELECT is still type-explainable",
82+
input: "SELECT * FROM very_long_table_na...",
83+
expected: true, // still starts with SELECT
84+
},
85+
// Any leading block comment makes the query not explainable; digest_text
86+
// from performance_schema does not include them, so this won't arise in practice.
87+
{
88+
name: "leading block comment before SELECT is not explainable",
89+
input: "/* a comment */ SELECT * FROM t",
90+
expected: false,
91+
},
92+
}
93+
94+
for _, tt := range tests {
95+
t.Run(tt.name, func(t *testing.T) {
96+
assert.Equal(t, tt.expected, isQueryExplainable(tt.input))
97+
})
98+
}
99+
}
100+
101+
// TestExplainQueryEarlyExits verifies that explainQuery returns "" without
102+
// hitting the database when the sample statement is truncated or the digest
103+
// text is not an explainable statement type.
104+
func TestExplainQueryEarlyExits(t *testing.T) {
105+
// mySQLClient with a nil DB — safe because both early-exit paths return
106+
// before any database call is made.
107+
c := &mySQLClient{}
108+
logger := zap.NewNop()
109+
110+
t.Run("truncated sample statement returns empty", func(t *testing.T) {
111+
result := c.explainQuery("SELECT * FROM t", "SELECT * FROM very_long_table_na...", "", "digest1", logger)
112+
assert.Empty(t, result)
113+
})
114+
115+
t.Run("non-explainable digest text returns empty", func(t *testing.T) {
116+
result := c.explainQuery("SHOW TABLES", "SHOW TABLES", "", "digest2", logger)
117+
assert.Empty(t, result)
118+
})
119+
}

receiver/mysqlreceiver/metadata.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,8 @@ metrics:
827827

828828
tests:
829829
config:
830+
tls:
831+
insecure: true
830832
goleak:
831833
ignore:
832834
any:

receiver/mysqlreceiver/obfuscate.go

Lines changed: 39 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@ import (
1010
var (
1111
obfuscateSQLConfig = obfuscate.SQLConfig{DBMS: "mysql"}
1212
obfuscatorConfig = obfuscate.Config{
13-
SQLExecPlan: defaultSQLPlanObfuscateSettings,
14-
SQLExecPlanNormalize: defaultSQLPlanNormalizeSettings,
13+
SQLExecPlan: defaultSQLPlanObfuscateSettings,
1514
}
1615
)
1716

@@ -30,57 +29,57 @@ func (o *obfuscator) obfuscateSQLString(sql string) (string, error) {
3029
}
3130

3231
func (o *obfuscator) obfuscatePlan(plan string) (string, error) {
33-
obfuscated, err := (*obfuscate.Obfuscator)(o).ObfuscateSQLExecPlan(plan, true)
32+
obfuscated, err := (*obfuscate.Obfuscator)(o).ObfuscateSQLExecPlan(plan, false)
3433
if err != nil {
3534
return "", err
3635
}
3736
return obfuscated, nil
3837
}
3938

40-
// defaultSQLPlanNormalizeSettings are the default JSON obfuscator settings for both obfuscating and normalizing SQL
41-
// execution plans
42-
var defaultSQLPlanNormalizeSettings = obfuscate.JSONConfig{
39+
// For further information, see https://dev.mysql.com/doc/refman/8.4/en/explain.html
40+
// MySQL 8.4 EXPLAIN FORMAT=JSON produces two formats depending on explain_json_format_version:
41+
// - Version 1 (default): query_block → ordering_operation → table → attached_condition
42+
// - Version 2: top-level query + inputs array, each node has condition/operation/access_type etc.
43+
var defaultSQLPlanObfuscateSettings = obfuscate.JSONConfig{
4344
Enabled: true,
4445
ObfuscateSQLValues: []string{
45-
// mysql
46+
// v2: the full query text
47+
"query",
48+
// v2: SQL condition expression on a filter node
49+
"condition",
50+
// v2: human-readable description of a plan node (e.g. "Filter: (...)", "Table scan on ...")
51+
"operation",
52+
// v1: SQL condition expression attached to a table scan
4653
"attached_condition",
4754
},
4855
KeepValues: []string{
49-
// mysql
50-
"access_type",
51-
"backward_index_scan",
52-
"cacheable",
53-
"delete",
54-
"dependent",
55-
"first_match",
56-
"key",
57-
"key_length",
58-
"possible_keys",
59-
"ref",
56+
// v1 structural fields
57+
"cost_info",
58+
"ordering_operation",
59+
"query_block",
60+
"query_plan",
61+
"query_type",
6062
"select_id",
61-
"table_name",
62-
"update",
63+
"table",
6364
"used_columns",
64-
"used_key_parts",
65-
"using_MRR",
6665
"using_filesort",
67-
"using_index",
68-
"using_join_buffer",
69-
"using_temporary_table",
66+
// v2 structural fields
67+
"access_type",
68+
"covering",
69+
"estimated_rows",
70+
"estimated_total_cost",
71+
"filter_columns",
72+
"index_access_type",
73+
"index_name",
74+
"inputs",
75+
"json_schema_version",
76+
"limit",
77+
"limit_offset",
78+
"per_chunk_limit",
79+
"ranges",
80+
"row_ids",
81+
"schema_name",
82+
"sort_fields",
83+
"table_name",
7084
},
7185
}
72-
73-
// defaultSQLPlanObfuscateSettings builds upon sqlPlanNormalizeSettings by including cost & row estimates in the keep
74-
// list
75-
var defaultSQLPlanObfuscateSettings = obfuscate.JSONConfig{
76-
Enabled: true,
77-
KeepValues: append([]string{
78-
// mysql
79-
"cost_info",
80-
"filtered",
81-
"rows_examined_per_join",
82-
"rows_examined_per_scan",
83-
"rows_produced_per_join",
84-
}, defaultSQLPlanNormalizeSettings.KeepValues...),
85-
ObfuscateSQLValues: defaultSQLPlanNormalizeSettings.ObfuscateSQLValues,
86-
}

0 commit comments

Comments
 (0)