Merge pull request cockroachdb#165571 from DrewKimball/backport24.3-165260

DrewKimball · web-flow · commit cd3d59d43f43 · 2026-03-13T10:12:02.000-05:00
release-24.3: sql: partially fix data race in CollationEnvironment
diff --git a/pkg/sql/logictest/testdata/logic_test/distsql_collated_string b/pkg/sql/logictest/testdata/logic_test/distsql_collated_string
@@ -0,0 +1,131 @@
+# LogicTest: 5node
+
+subtest outbox_race
+
+# Regression test for #110322: data race in CollationEnvironment when multiple
+# outboxes share the same evalCtx on the gateway and evaluate COLLATE
+# expressions concurrently.
+
+statement ok
+CREATE TABLE t1 (c0 STRING PRIMARY KEY)
+
+statement ok
+INSERT INTO t1 VALUES ('a'), ('b'), ('c'), ('d'), ('e')
+
+statement ok
+ALTER TABLE t1 SPLIT AT VALUES ('b'), ('c'), ('d'), ('e')
+
+retry
+statement ok
+ALTER TABLE t1 EXPERIMENTAL_RELOCATE VALUES
+  (ARRAY[1], 'a'),
+  (ARRAY[2], 'b'),
+  (ARRAY[3], 'c'),
+  (ARRAY[4], 'd'),
+  (ARRAY[5], 'e')
+
+# Force DistSQL so the query creates multiple outboxes sharing the same
+# evalCtx on the gateway.
+statement ok
+SET distsql = on
+
+# This TLP-style UNION query creates three filter processors, each evaluating
+# a COLLATE expression. When distributed, the gateway runs multiple outbox
+# goroutines concurrently that all call NewDCollatedString through the shared
+# CollationEnvironment, triggering a data race.
+query T rowsort
+SELECT * FROM t1 WHERE (t1.c0 COLLATE en) > ('_' COLLATE en)
+UNION
+SELECT * FROM t1 WHERE NOT ((t1.c0 COLLATE en) > ('_' COLLATE en))
+UNION
+SELECT * FROM t1 WHERE ((t1.c0 COLLATE en) > ('_' COLLATE en)) IS NULL
+----
+a
+b
+c
+d
+e
+
+# Run the same query pattern several more times to increase the chance of
+# triggering the race under the race detector.
+query T rowsort
+SELECT * FROM t1 WHERE (t1.c0 COLLATE en) > ('b' COLLATE en)
+UNION
+SELECT * FROM t1 WHERE NOT ((t1.c0 COLLATE en) > ('b' COLLATE en))
+UNION
+SELECT * FROM t1 WHERE ((t1.c0 COLLATE en) > ('b' COLLATE en)) IS NULL
+----
+a
+b
+c
+d
+e
+
+query T rowsort
+SELECT * FROM t1 WHERE (t1.c0 COLLATE en) > ('c' COLLATE en)
+UNION
+SELECT * FROM t1 WHERE NOT ((t1.c0 COLLATE en) > ('c' COLLATE en))
+UNION
+SELECT * FROM t1 WHERE ((t1.c0 COLLATE en) > ('c' COLLATE en)) IS NULL
+----
+a
+b
+c
+d
+e
+
+subtest end
+
+subtest sampler_race
+
+# Regression test for data race in CollationEnvironment when the sampler and
+# sample aggregator processors share the same FlowCtx.EvalCtx on the gateway.
+# Both processors call SampleRow -> copyRow -> truncateDatum ->
+# NewDCollatedString concurrently for collated strings exceeding the 400-byte
+# sample size limit, racing on the shared CollationEnv.
+#
+# The race window: on the gateway, the local sampler processes rows (calling
+# truncateDatum) while the SampleAggregator concurrently processes sample rows
+# arriving from remote nodes' samplers (also calling truncateDatum). Both use
+# the shared FlowCtx.EvalCtx.CollationEnv.
+#
+# To maximize overlap, we put many rows on the gateway (so its sampler is slow)
+# and fewer rows on a remote node (so its sampler finishes first and sends
+# sample rows to the SampleAggregator while the gateway's sampler is still
+# running).
+
+statement ok
+CREATE TABLE t2 (c0 STRING COLLATE en PRIMARY KEY)
+
+# Insert long collated strings (> 400 bytes) to trigger the truncateDatum path,
+# which calls NewDCollatedString using the shared CollationEnv. We put many
+# rows on node 1 (gateway) and fewer on node 2.
+statement ok
+INSERT INTO t2
+  SELECT (chr(97 + (i % 26)) || repeat('x', 500) || i::STRING) COLLATE en
+  FROM generate_series(1, 100) AS g(i)
+
+# Split into two ranges: the first ~half on node 1 (gateway) and the rest on
+# node 2.
+statement ok
+ALTER TABLE t2 SPLIT AT VALUES ((chr(97 + 13) || repeat('x', 500) || '50') COLLATE en)
+
+retry
+statement ok
+ALTER TABLE t2 EXPERIMENTAL_RELOCATE VALUES
+  (ARRAY[1], (chr(97) || repeat('x', 500) || '1') COLLATE en),
+  (ARRAY[2], (chr(97 + 13) || repeat('x', 500) || '50') COLLATE en)
+
+# Run CREATE STATISTICS several times to increase the chance of triggering the
+# race between the sampler and sample aggregator on the gateway node. Both
+# processors run in separate goroutines sharing the same FlowCtx.EvalCtx.
+statement ok
+CREATE STATISTICS s1 ON c0 FROM t2
+
+statement ok
+CREATE STATISTICS s2 ON c0 FROM t2
+
+statement ok
+CREATE STATISTICS s3 ON c0 FROM t2
+
+subtest end
diff --git a/pkg/sql/logictest/tests/5node/BUILD.bazel b/pkg/sql/logictest/tests/5node/BUILD.bazel
@@ -12,7 +12,7 @@ go_test(
         "//build/toolchains:is_heavy": {"test.Pool": "heavy"},
         "//conditions:default": {"test.Pool": "large"},
     }),
-    shard_count = 20,
+    shard_count = 21,
     tags = ["cpu:3"],
     deps = [
         "//pkg/base",
diff --git a/pkg/sql/logictest/tests/5node/generated_test.go b/pkg/sql/logictest/tests/5node/generated_test.go
diff --git a/pkg/sql/rowexec/sample_aggregator.go b/pkg/sql/rowexec/sample_aggregator.go
@@ -41,10 +41,11 @@ import (
 type sampleAggregator struct {
 	execinfra.ProcessorBase
 
-	spec    *execinfrapb.SampleAggregatorSpec
-	input   execinfra.RowSource
-	inTypes []*types.T
-	sr      stats.SampleReservoir
+	spec         *execinfrapb.SampleAggregatorSpec
+	input        execinfra.RowSource
+	inTypes      []*types.T
+	sr           stats.SampleReservoir
+	collationEnv tree.CollationEnvironment
 
 	// memAcc accounts for memory accumulated throughout the life of the
 	// sampleAggregator.
@@ -422,7 +423,7 @@ func (s *sampleAggregator) sampleRow(
 	ctx context.Context, sr *stats.SampleReservoir, sampleRow rowenc.EncDatumRow, rank uint64,
 ) error {
 	prevCapacity := sr.Cap()
-	if err := sr.SampleRow(ctx, s.FlowCtx.EvalCtx, sampleRow, rank); err != nil {
+	if err := sr.SampleRow(ctx, &s.collationEnv, sampleRow, rank); err != nil {
 		if code := pgerror.GetPGCode(err); code != pgcode.OutOfMemory {
 			return err
 		}
diff --git a/pkg/sql/rowexec/sampler.go b/pkg/sql/rowexec/sampler.go
@@ -47,6 +47,7 @@ type samplerProcessor struct {
 	input           execinfra.RowSource
 	memAcc          mon.BoundAccount
 	sr              stats.SampleReservoir
+	collationEnv    tree.CollationEnvironment
 	sketches        []sketchInfo
 	outTypes        []*types.T
 	maxFractionIdle float64
@@ -452,7 +453,7 @@ func (s *samplerProcessor) sampleRow(
 	// Use Int63 so we don't have headaches converting to DInt.
 	rank := uint64(rng.Int63())
 	prevCapacity := sr.Cap()
-	if err := sr.SampleRow(ctx, s.FlowCtx.EvalCtx, row, rank); err != nil {
+	if err := sr.SampleRow(ctx, &s.collationEnv, row, rank); err != nil {
 		if !sqlerrors.IsOutOfMemoryError(err) {
 			return false, err
 		}
diff --git a/pkg/sql/sem/eval/context.go b/pkg/sql/sem/eval/context.go
@@ -512,9 +512,12 @@ func (ec *Context) SessionData() *sessiondata.SessionData {
 	return ec.SessionDataStack.Top()
 }
 
-// Copy returns a deep copy of ctx.
+// Copy returns a copy of the EvalCtx that can safely be used concurrently with
+// the original.
 func (ec *Context) Copy() *Context {
 	ctxCopy := *ec
+	// CollationEnvironment is not thread safe.
+	ctxCopy.CollationEnv = tree.CollationEnvironment{}
 	ctxCopy.iVarContainerStack = make([]tree.IndexedVarContainer, len(ec.iVarContainerStack), cap(ec.iVarContainerStack))
 	copy(ctxCopy.iVarContainerStack, ec.iVarContainerStack)
 	return &ctxCopy
diff --git a/pkg/sql/stats/row_sampling.go b/pkg/sql/stats/row_sampling.go
@@ -11,7 +11,6 @@ import (
 
 	"github.com/cockroachdb/cockroach/pkg/sql/memsize"
 	"github.com/cockroachdb/cockroach/pkg/sql/rowenc"
-	"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
 	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
 	"github.com/cockroachdb/cockroach/pkg/sql/sqlerrors"
 	"github.com/cockroachdb/cockroach/pkg/sql/types"
@@ -166,7 +165,7 @@ func (sr *SampleReservoir) retryMaybeResize(ctx context.Context, op func() error
 // SampleRow returns an error (any type of error), no additional calls to
 // SampleRow should be made as the failed samples will have introduced bias.
 func (sr *SampleReservoir) SampleRow(
-	ctx context.Context, evalCtx *eval.Context, row rowenc.EncDatumRow, rank uint64,
+	ctx context.Context, collationEnv *tree.CollationEnvironment, row rowenc.EncDatumRow, rank uint64,
 ) error {
 	return sr.retryMaybeResize(ctx, func() error {
 		if len(sr.samples) < cap(sr.samples) {
@@ -180,7 +179,7 @@ func (sr *SampleReservoir) SampleRow(
 					return err
 				}
 			}
-			if err := sr.copyRow(ctx, evalCtx, rowCopy, row); err != nil {
+			if err := sr.copyRow(ctx, collationEnv, rowCopy, row); err != nil {
 				return err
 			}
 			sr.samples = append(sr.samples, SampledRow{Row: rowCopy, Rank: rank})
@@ -192,7 +191,7 @@ func (sr *SampleReservoir) SampleRow(
 		}
 		// Replace the max rank if ours is smaller.
 		if len(sr.samples) > 0 && rank < sr.samples[0].Rank {
-			if err := sr.copyRow(ctx, evalCtx, sr.samples[0].Row, row); err != nil {
+			if err := sr.copyRow(ctx, collationEnv, sr.samples[0].Row, row); err != nil {
 				return err
 			}
 			sr.samples[0].Rank = rank
@@ -238,7 +237,7 @@ func (sr *SampleReservoir) GetNonNullDatums(
 }
 
 func (sr *SampleReservoir) copyRow(
-	ctx context.Context, evalCtx *eval.Context, dst, src rowenc.EncDatumRow,
+	ctx context.Context, collationEnv *tree.CollationEnvironment, dst, src rowenc.EncDatumRow,
 ) error {
 	// First, we calculate how much memory has already been accounted for the
 	// "before" row (row that we're about to overwrite) as well as how much
@@ -263,7 +262,7 @@ func (sr *SampleReservoir) copyRow(
 
 		// If the datum is too large, truncate it.
 		if afterSize > uintptr(maxBytesPerSample) {
-			sr.scratch[i].Datum = truncateDatum(evalCtx, sr.scratch[i].Datum, maxBytesPerSample)
+			sr.scratch[i].Datum = truncateDatum(collationEnv, sr.scratch[i].Datum, maxBytesPerSample)
 			afterSize = sr.scratch[i].Size()
 		}
 		afterRowSize += int64(afterSize)
@@ -289,7 +288,7 @@ const maxBytesPerSample = 400
 //
 // For example, if maxBytes=10, "Cockroach Labs" would be truncated to
 // "Cockroach ".
-func truncateDatum(evalCtx *eval.Context, d tree.Datum, maxBytes int) tree.Datum {
+func truncateDatum(collationEnv *tree.CollationEnvironment, d tree.Datum, maxBytes int) tree.Datum {
 	switch t := d.(type) {
 	case *tree.DBitArray:
 		b := tree.DBitArray{BitArray: t.ToWidth(uint(maxBytes * 8))}
@@ -310,15 +309,15 @@ func truncateDatum(evalCtx *eval.Context, d tree.Datum, maxBytes int) tree.Datum
 
 		// Note: this will end up being larger than maxBytes due to the key and
 		// locale, so this is just a best-effort attempt to limit the size.
-		res, err := tree.NewDCollatedString(contents, t.Locale, &evalCtx.CollationEnv)
+		res, err := tree.NewDCollatedString(contents, t.Locale, collationEnv)
 		if err != nil {
 			return d
 		}
 		return res
 
 	case *tree.DOidWrapper:
 		return &tree.DOidWrapper{
-			Wrapped: truncateDatum(evalCtx, t.Wrapped, maxBytes),
+			Wrapped: truncateDatum(collationEnv, t.Wrapped, maxBytes),
 			Oid:     t.Oid,
 		}
 
diff --git a/pkg/sql/stats/row_sampling_test.go b/pkg/sql/stats/row_sampling_test.go
@@ -29,19 +29,16 @@ import (
 // runSampleTest feeds rows with the given ranks through a reservoir
 // of a given size and verifies the results are correct.
 func runSampleTest(
-	t *testing.T,
-	evalCtx *eval.Context,
-	numSamples, expectedNumSamples int,
-	ranks []int,
-	memAcc *mon.BoundAccount,
+	t *testing.T, numSamples, expectedNumSamples int, ranks []int, memAcc *mon.BoundAccount,
 ) {
 	ctx := context.Background()
 	var sr SampleReservoir
+	var collationEnv tree.CollationEnvironment
 	sr.Init(numSamples, 1, []*types.T{types.Int}, memAcc, intsets.MakeFast(0))
 	for _, r := range ranks {
 		d := rowenc.DatumToEncDatum(types.Int, tree.NewDInt(tree.DInt(r)))
 		prevCapacity := sr.Cap()
-		if err := sr.SampleRow(ctx, evalCtx, rowenc.EncDatumRow{d}, uint64(r)); err != nil {
+		if err := sr.SampleRow(ctx, &collationEnv, rowenc.EncDatumRow{d}, uint64(r)); err != nil {
 			t.Fatal(err)
 		} else if sr.Cap() != prevCapacity {
 			t.Logf(
@@ -92,7 +89,6 @@ func runSampleTest(
 func TestSampleReservoir(t *testing.T) {
 	ctx := context.Background()
 	st := cluster.MakeTestingClusterSettings()
-	evalCtx := eval.MakeTestingEvalContext(st)
 
 	for _, n := range []int{10, 100, 1000, 10000} {
 		rng, _ := randutil.NewTestRand()
@@ -102,7 +98,7 @@ func TestSampleReservoir(t *testing.T) {
 		}
 		for _, k := range []int{1, 5, 10, 100} {
 			t.Run(fmt.Sprintf("n=%d/k=%d/mem=nolimit", n, k), func(t *testing.T) {
-				runSampleTest(t, &evalCtx, k, k, ranks, nil)
+				runSampleTest(t, k, k, ranks, mon.NewStandaloneUnlimitedAccount())
 			})
 			for _, mem := range []int64{1 << 8, 1 << 10, 1 << 12} {
 				t.Run(fmt.Sprintf("n=%d/k=%d/mem=%d", n, k, mem), func(t *testing.T) {
@@ -122,7 +118,7 @@ func TestSampleReservoir(t *testing.T) {
 					} else if mem == 1<<12 && n > 10 && k > 10 {
 						expectedK = 25
 					}
-					runSampleTest(t, &evalCtx, k, expectedK, ranks, &memAcc)
+					runSampleTest(t, k, expectedK, ranks, &memAcc)
 				})
 			}
 		}
@@ -132,8 +128,9 @@ func TestSampleReservoir(t *testing.T) {
 func TestTruncateDatum(t *testing.T) {
 	ctx := context.Background()
 	evalCtx := eval.MakeTestingEvalContext(cluster.MakeTestingClusterSettings())
+	var collationEnv tree.CollationEnvironment
 	runTest := func(d, expected tree.Datum) {
-		actual := truncateDatum(&evalCtx, d, 10 /* maxBytes */)
+		actual := truncateDatum(&collationEnv, d, 10 /* maxBytes */)
 		if cmp, err := actual.Compare(ctx, &evalCtx, expected); err != nil {
 			t.Fatal(err)
 		} else if cmp != 0 {
@@ -185,7 +182,7 @@ corn, the green oats, and the haystacks piled up in the meadows looked beautiful
 func TestSampleReservoirMemAccounting(t *testing.T) {
 	ctx := context.Background()
 	st := cluster.MakeTestingClusterSettings()
-	evalCtx := eval.MakeTestingEvalContext(st)
+	var collationEnv tree.CollationEnvironment
 
 	getStringDatum := func(l int) rowenc.EncDatum {
 		d := tree.DString(strings.Repeat("a", l))
@@ -210,9 +207,9 @@ func TestSampleReservoirMemAccounting(t *testing.T) {
 	memAcc := monitor.MakeBoundAccount()
 	var sr SampleReservoir
 	sr.Init(2, 1, []*types.T{types.String, types.String}, &memAcc, intsets.MakeFast(0, 1))
-	require.NoError(t, sr.SampleRow(ctx, &evalCtx, rows[0], 3))
-	require.NoError(t, sr.SampleRow(ctx, &evalCtx, rows[1], 2))
-	err := sr.SampleRow(ctx, &evalCtx, rows[2], 1)
+	require.NoError(t, sr.SampleRow(ctx, &collationEnv, rows[0], 3))
+	require.NoError(t, sr.SampleRow(ctx, &collationEnv, rows[1], 2))
+	err := sr.SampleRow(ctx, &collationEnv, rows[2], 1)
 	require.Error(t, err)
 	require.True(t, testutils.IsError(err, "memory budget exceeded"))
 }