From 186e39ce151e929bf41b3c6e81ea1a82bdf72464 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 13 May 2026 05:12:47 -0700 Subject: [PATCH 01/10] feat: clustered segments DataSegment changes and PartialLoadMatcher --- .../input/table/DataSegmentWithLocation.java | 7 +- .../loading/PartialClusterGroupLoadSpec.java | 140 ++++++++ .../druid/timeline/ClusterGroupTuples.java | 187 ++++++++++ .../apache/druid/timeline/DataSegment.java | 48 +++ .../PartialClusterGroupLoadSpecTest.java | 222 ++++++++++++ .../timeline/ClusterGroupTuplesTest.java | 243 +++++++++++++ .../DataSegmentClusterGroupsTest.java | 175 ++++++++++ .../druid/guice/PartialLoadSpecModule.java | 10 +- .../coordination/LoadableDataSegment.java | 3 + .../rules/ClusterGroupPartialLoadMatcher.java | 85 +++++ .../ExactClusterGroupPartialLoadMatcher.java | 159 +++++++++ .../druid/server/coordinator/rules/Globs.java | 127 +++++++ .../coordinator/rules/PartialLoadMatcher.java | 4 +- ...ildcardClusterGroupPartialLoadMatcher.java | 238 +++++++++++++ .../WildcardProjectionPartialLoadMatcher.java | 91 +---- .../client/CachingClusteredClientTest.java | 1 + .../coordinator/CreateDataSegments.java | 1 + ...actClusterGroupPartialLoadMatcherTest.java | 250 ++++++++++++++ .../server/coordinator/rules/GlobsTest.java | 111 ++++++ ...ardClusterGroupPartialLoadMatcherTest.java | 322 ++++++++++++++++++ 20 files changed, 2331 insertions(+), 93 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/segment/loading/PartialClusterGroupLoadSpec.java create mode 100644 processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java create mode 100644 processing/src/test/java/org/apache/druid/segment/loading/PartialClusterGroupLoadSpecTest.java create mode 100644 processing/src/test/java/org/apache/druid/timeline/ClusterGroupTuplesTest.java create mode 100644 processing/src/test/java/org/apache/druid/timeline/DataSegmentClusterGroupsTest.java create mode 100644 server/src/main/java/org/apache/druid/server/coordinator/rules/ClusterGroupPartialLoadMatcher.java create mode 100644 server/src/main/java/org/apache/druid/server/coordinator/rules/ExactClusterGroupPartialLoadMatcher.java create mode 100644 server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java create mode 100644 server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java create mode 100644 server/src/test/java/org/apache/druid/server/coordinator/rules/ExactClusterGroupPartialLoadMatcherTest.java create mode 100644 server/src/test/java/org/apache/druid/server/coordinator/rules/GlobsTest.java create mode 100644 server/src/test/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcherTest.java diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/input/table/DataSegmentWithLocation.java b/multi-stage-query/src/main/java/org/apache/druid/msq/input/table/DataSegmentWithLocation.java index 72e765d620dc..860a9d656d4b 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/input/table/DataSegmentWithLocation.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/input/table/DataSegmentWithLocation.java @@ -27,6 +27,7 @@ import org.apache.druid.jackson.CommaListJoinDeserializer; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.server.coordination.DruidServerMetadata; +import org.apache.druid.timeline.ClusterGroupTuples; import org.apache.druid.timeline.CompactionState; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.ShardSpec; @@ -54,8 +55,8 @@ private DataSegmentWithLocation( @JsonProperty("dimensions") @JsonDeserialize(using = CommaListJoinDeserializer.class) @Nullable List dimensions, @JsonProperty("metrics") @JsonDeserialize(using = CommaListJoinDeserializer.class) @Nullable List metrics, - @JsonProperty("projections") @JsonDeserialize(using = CommaListJoinDeserializer.class) @Nullable - List projections, + @JsonProperty("projections") @JsonDeserialize(using = CommaListJoinDeserializer.class) @Nullable List projections, + @JsonProperty("clusterGroups") @Nullable ClusterGroupTuples clusterGroups, @JsonProperty("shardSpec") @Nullable ShardSpec shardSpec, @JsonProperty("lastCompactionState") @Nullable CompactionState lastCompactionState, @JsonProperty("binaryVersion") Integer binaryVersion, @@ -74,6 +75,7 @@ private DataSegmentWithLocation( dimensions, metrics, projections, + clusterGroups, shardSpec, lastCompactionState, binaryVersion, @@ -98,6 +100,7 @@ public DataSegmentWithLocation( dataSegment.getDimensions(), dataSegment.getMetrics(), dataSegment.getProjections(), + dataSegment.getClusterGroups(), dataSegment.getShardSpec(), null, dataSegment.getBinaryVersion(), diff --git a/processing/src/main/java/org/apache/druid/segment/loading/PartialClusterGroupLoadSpec.java b/processing/src/main/java/org/apache/druid/segment/loading/PartialClusterGroupLoadSpec.java new file mode 100644 index 000000000000..be749e4910e6 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/loading/PartialClusterGroupLoadSpec.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import com.fasterxml.jackson.annotation.JacksonInject; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Preconditions; +import com.google.common.base.Supplier; +import com.google.common.base.Suppliers; +import org.apache.druid.utils.CollectionUtils; + +import javax.annotation.Nullable; +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +/** + * A {@link LoadSpec} wrapper that carries partial-cluster-group metadata from the coordinator to a historical + * alongside the original backend-specific load spec. The wrapped {@code delegate} is held as a raw {@link Map} so + * that the concrete backend type (e.g. {@code s3}, {@code local}, {@code hdfs}) is materialized only when needed; + * this avoids pulling backend-specific dependencies onto every node that touches the wire form. + *

+ * Both {@link #loadSegment(File)} and {@link #openRangeReader()} delegate verbatim to the inner load spec. The + * historical-side partial-load path inspects this wrapper at mount time to learn which cluster-group indices (into + * the segment's {@code clusterGroups.tuples} list) to range-read and the fingerprint identifying the request the + * coordinator made. + */ +@JsonTypeName(PartialClusterGroupLoadSpec.TYPE) +public class PartialClusterGroupLoadSpec implements LoadSpec +{ + public static final String TYPE = "partialClusterGroup"; + + private final Map delegate; + private final List clusterGroupIndices; + private final String fingerprint; + private final Supplier materializedDelegateSupplier; + + @JsonCreator + public PartialClusterGroupLoadSpec( + @JsonProperty("delegate") Map delegate, + @JsonProperty("clusterGroupIndices") List clusterGroupIndices, + @JsonProperty("fingerprint") String fingerprint, + @JacksonInject ObjectMapper jsonMapper + ) + { + Preconditions.checkNotNull(jsonMapper, "jsonMapper"); + this.delegate = Preconditions.checkNotNull(delegate, "delegate"); + Preconditions.checkArgument( + !CollectionUtils.isNullOrEmpty(clusterGroupIndices), + "clusterGroupIndices must not be null or empty" + ); + this.clusterGroupIndices = List.copyOf(clusterGroupIndices); + this.fingerprint = Preconditions.checkNotNull(fingerprint, "fingerprint"); + this.materializedDelegateSupplier = Suppliers.memoize(() -> jsonMapper.convertValue(delegate, LoadSpec.class)); + } + + @JsonProperty + public Map getDelegate() + { + return delegate; + } + + @JsonProperty + public List getClusterGroupIndices() + { + return clusterGroupIndices; + } + + @JsonProperty + public String getFingerprint() + { + return fingerprint; + } + + @Override + public LoadSpecResult loadSegment(File destDir) throws SegmentLoadingException + { + return materializedDelegateSupplier.get().loadSegment(destDir); + } + + @Override + @Nullable + public SegmentRangeReader openRangeReader() throws IOException + { + return materializedDelegateSupplier.get().openRangeReader(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + PartialClusterGroupLoadSpec that = (PartialClusterGroupLoadSpec) o; + return Objects.equals(delegate, that.delegate) + && Objects.equals(clusterGroupIndices, that.clusterGroupIndices) + && Objects.equals(fingerprint, that.fingerprint); + } + + @Override + public int hashCode() + { + return Objects.hash(delegate, clusterGroupIndices, fingerprint); + } + + @Override + public String toString() + { + return "PartialClusterGroupLoadSpec{" + + "delegate=" + delegate + + ", clusterGroupIndices=" + clusterGroupIndices + + ", fingerprint=" + fingerprint + + '}'; + } +} diff --git a/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java b/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java new file mode 100644 index 000000000000..ab942e2da5cd --- /dev/null +++ b/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.timeline; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.error.InvalidInput; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; + +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +/** + * Typed clustering tuples carried on {@link DataSegment#getClusterGroups()} for clustered base-table segments. Each + * entry in {@link #getTuples()} is one cluster group's clustering-column values, in the order declared by + * {@link #getClusteringColumns()}. + */ +public class ClusterGroupTuples +{ + private final RowSignature clusteringColumns; + private final List> tuples; + + @JsonCreator + public ClusterGroupTuples( + @JsonProperty("clusteringColumns") RowSignature clusteringColumns, + @JsonProperty("tuples") @Nullable List> tuples + ) + { + if (clusteringColumns == null || clusteringColumns.size() == 0) { + throw InvalidInput.exception("clusteringColumns must not be null or empty"); + } + this.clusteringColumns = clusteringColumns; + + final List> source = tuples == null ? Collections.emptyList() : tuples; + final int numCols = clusteringColumns.size(); + final List> coerced = new ArrayList<>(source.size()); + for (int t = 0; t < source.size(); t++) { + final List tuple = source.get(t); + if (tuple == null || tuple.size() != numCols) { + throw InvalidInput.exception( + "tuple[%s] has size [%s] but clusteringColumns size is [%s]", + t, + tuple == null ? "null" : tuple.size(), + numCols + ); + } + final Object[] out = new Object[numCols]; + for (int i = 0; i < numCols; i++) { + final String name = clusteringColumns.getColumnName(i); + final ColumnType type = clusteringColumns.getColumnType(i).orElseThrow( + () -> InvalidInput.exception("clusteringColumn[%s] has no declared type", name) + ); + out[i] = coerceValue(name, type, tuple.get(i)); + } + coerced.add(Collections.unmodifiableList(Arrays.asList(out))); + } + this.tuples = Collections.unmodifiableList(coerced); + } + + @JsonProperty + public RowSignature getClusteringColumns() + { + return clusteringColumns; + } + + @JsonProperty + public List> getTuples() + { + return tuples; + } + + /** + * Coerce {@code raw} into the canonical Java representation of {@code type}. Null in, null out. STRING uses + * {@link String#valueOf}; LONG / DOUBLE / FLOAT accept any {@link Number} (down/up-cast via the matching primitive + * accessor) or a parseable numeric {@link String}. Throws on unsupported types or unparseable strings. + *

+ * Used by: + *

    + *
  • {@link ClusterGroupTuples}'s constructor to canonicalize segment-side tuples (strict).
  • + *
  • Operator-supplied rule tuples in the cluster-group partial-load matchers, which catch the exception and + * treat it as "no match for this segment" rather than a hard failure.
  • + *
+ */ + public static Object coerceValue(String columnName, ColumnType type, @Nullable Object raw) + { + if (raw == null) { + return null; + } + if (ColumnType.STRING.equals(type)) { + return raw instanceof String ? raw : String.valueOf(raw); + } + if (ColumnType.LONG.equals(type)) { + if (raw instanceof Number) { + return ((Number) raw).longValue(); + } + if (raw instanceof String) { + try { + return Long.parseLong((String) raw); + } + catch (NumberFormatException e) { + throw InvalidInput.exception("Cannot coerce value [%s] for column [%s] to LONG", raw, columnName); + } + } + throw InvalidInput.exception("Cannot coerce value [%s] for column [%s] to LONG", raw, columnName); + } + if (ColumnType.DOUBLE.equals(type)) { + if (raw instanceof Number) { + return ((Number) raw).doubleValue(); + } + if (raw instanceof String) { + try { + return Double.parseDouble((String) raw); + } + catch (NumberFormatException e) { + throw InvalidInput.exception("Cannot coerce value [%s] for column [%s] to DOUBLE", raw, columnName); + } + } + throw InvalidInput.exception("Cannot coerce value [%s] for column [%s] to DOUBLE", raw, columnName); + } + if (ColumnType.FLOAT.equals(type)) { + if (raw instanceof Number) { + return ((Number) raw).floatValue(); + } + if (raw instanceof String) { + try { + return Float.parseFloat((String) raw); + } + catch (NumberFormatException e) { + throw InvalidInput.exception("Cannot coerce value [%s] for column [%s] to FLOAT", raw, columnName); + } + } + throw InvalidInput.exception("Cannot coerce value [%s] for column [%s] to FLOAT", raw, columnName); + } + throw InvalidInput.exception( + "Unsupported clustering column type [%s] for column [%s]; supported types are STRING, LONG, DOUBLE, FLOAT", + type, + columnName + ); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (!(o instanceof ClusterGroupTuples)) { + return false; + } + ClusterGroupTuples that = (ClusterGroupTuples) o; + return Objects.equals(clusteringColumns, that.clusteringColumns) && Objects.equals(tuples, that.tuples); + } + + @Override + public int hashCode() + { + return Objects.hash(clusteringColumns, tuples); + } + + @Override + public String toString() + { + return "ClusterGroupTuples{clusteringColumns=" + clusteringColumns + ", tuples=" + tuples + '}'; + } +} diff --git a/processing/src/main/java/org/apache/druid/timeline/DataSegment.java b/processing/src/main/java/org/apache/druid/timeline/DataSegment.java index 601ea7613c53..0b2fd8baf87f 100644 --- a/processing/src/main/java/org/apache/druid/timeline/DataSegment.java +++ b/processing/src/main/java/org/apache/druid/timeline/DataSegment.java @@ -93,6 +93,7 @@ public static class PruneSpecsHolder private static final Interner> DIMENSIONS_INTERNER = Interners.newWeakInterner(); private static final Interner> METRICS_INTERNER = Interners.newWeakInterner(); private static final Interner> PROJECTIONS_INTERNER = Interners.newWeakInterner(); + private static final Interner CLUSTER_GROUPS_INTERNER = Interners.newWeakInterner(); private static final Interner COMPACTION_STATE_INTERNER = Interners.newWeakInterner(); private static final Map PRUNED_LOAD_SPEC = ImmutableMap.of( "load spec is pruned, because it's not needed on Brokers, but eats a lot of heap space", @@ -106,6 +107,13 @@ public static class PruneSpecsHolder private final List dimensions; private final List metrics; private final List projections; + /** + * Typed clustering tuples for clustered base-table segments, or {@code null} for non-clustered segments. Each tuple + * is one cluster group's clustering-column values in the order declared by + * {@link ClusterGroupTuples#getClusteringColumns()}. Consumed by cluster-group partial-load matchers. + */ + @Nullable + private final ClusterGroupTuples clusterGroups; private final ShardSpec shardSpec; /** @@ -154,6 +162,7 @@ public DataSegment( dimensions, metrics, null, + null, shardSpec, null, binaryVersion, @@ -189,6 +198,7 @@ public DataSegment( dimensions, metrics, null, + null, shardSpec, lastCompactionState, binaryVersion, @@ -212,6 +222,7 @@ private DataSegment( @JsonProperty("metrics") @JsonDeserialize(using = CommaListJoinDeserializer.class) @Nullable List metrics, @JsonProperty("projections") @JsonDeserialize(using = CommaListJoinDeserializer.class) @Nullable List projections, + @JsonProperty("clusterGroups") @Nullable ClusterGroupTuples clusterGroups, @JsonProperty("shardSpec") @Nullable ShardSpec shardSpec, @JsonProperty("lastCompactionState") @Nullable CompactionState lastCompactionState, @JsonProperty("binaryVersion") Integer binaryVersion, @@ -229,6 +240,7 @@ private DataSegment( dimensions, metrics, projections, + clusterGroups, shardSpec, lastCompactionState, binaryVersion, @@ -247,6 +259,7 @@ public DataSegment( @Nullable List dimensions, @Nullable List metrics, @Nullable List projections, + @Nullable ClusterGroupTuples clusterGroups, @Nullable ShardSpec shardSpec, @Nullable CompactionState lastCompactionState, Integer binaryVersion, @@ -264,6 +277,7 @@ public DataSegment( // A null value for projections means that this segment is not aware of projections (launched in druid 32). // An empty list means that this segment is projection-aware, but has no projections. this.projections = projections == null ? null : prepareWithInterner(projections, PROJECTIONS_INTERNER); + this.clusterGroups = prepareClusterGroups(clusterGroups); this.shardSpec = (shardSpec == null) ? new NumberedShardSpec(0, 1) : shardSpec; this.lastCompactionState = pruneSpecsHolder.pruneLastCompactionState ? null @@ -331,6 +345,14 @@ public List getProjections() return projections; } + @Nullable + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_NULL) + public ClusterGroupTuples getClusterGroups() + { + return clusterGroups; + } + @JsonProperty public ShardSpec getShardSpec() { @@ -461,6 +483,11 @@ public DataSegment withProjections(List projections) return builder(this).projections(projections).build(); } + public DataSegment withClusterGroups(@Nullable ClusterGroupTuples clusterGroups) + { + return builder(this).clusterGroups(clusterGroups).build(); + } + public DataSegment withShardSpec(ShardSpec newSpec) { return builder(this).shardSpec(newSpec).build(); @@ -527,6 +554,7 @@ public String toString() ", dimensions=" + dimensions + ", metrics=" + metrics + ", projections=" + projections + + ", clusterGroups=" + clusterGroups + ", shardSpec=" + shardSpec + ", lastCompactionState=" + lastCompactionState + ", size=" + size + @@ -559,6 +587,15 @@ private static CompactionState prepareCompactionState(@Nullable CompactionState return COMPACTION_STATE_INTERNER.intern(lastCompactionState); } + @Nullable + private static ClusterGroupTuples prepareClusterGroups(@Nullable ClusterGroupTuples clusterGroups) + { + if (clusterGroups == null) { + return null; + } + return CLUSTER_GROUPS_INTERNER.intern(clusterGroups); + } + /** * Returns a list of strings with all empty strings removed and all strings interned. *

@@ -606,6 +643,8 @@ public static class Builder private List dimensions; private List metrics; private List projections; + @Nullable + private ClusterGroupTuples clusterGroups; private ShardSpec shardSpec; private CompactionState lastCompactionState; private Integer binaryVersion; @@ -651,6 +690,7 @@ private Builder(DataSegment segment) this.dimensions = segment.getDimensions(); this.metrics = segment.getMetrics(); this.projections = segment.getProjections(); + this.clusterGroups = segment.getClusterGroups(); this.shardSpec = segment.getShardSpec(); this.lastCompactionState = segment.getLastCompactionState(); this.binaryVersion = segment.getBinaryVersion(); @@ -668,6 +708,7 @@ private Builder(DataSegment.Builder segmentBuilder) this.dimensions = segmentBuilder.dimensions; this.metrics = segmentBuilder.metrics; this.projections = segmentBuilder.projections; + this.clusterGroups = segmentBuilder.clusterGroups; this.shardSpec = segmentBuilder.shardSpec; this.lastCompactionState = segmentBuilder.lastCompactionState; this.binaryVersion = segmentBuilder.binaryVersion; @@ -718,6 +759,12 @@ public Builder projections(List projections) return this; } + public Builder clusterGroups(@Nullable ClusterGroupTuples clusterGroups) + { + this.clusterGroups = clusterGroups; + return this; + } + public Builder shardSpec(ShardSpec shardSpec) { this.shardSpec = shardSpec; @@ -770,6 +817,7 @@ public DataSegment build() dimensions, metrics, projections, + clusterGroups, shardSpec, lastCompactionState, binaryVersion, diff --git a/processing/src/test/java/org/apache/druid/segment/loading/PartialClusterGroupLoadSpecTest.java b/processing/src/test/java/org/apache/druid/segment/loading/PartialClusterGroupLoadSpecTest.java new file mode 100644 index 000000000000..5474617ea323 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/loading/PartialClusterGroupLoadSpecTest.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.InjectableValues; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.module.SimpleModule; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import javax.annotation.Nullable; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; + +class PartialClusterGroupLoadSpecTest +{ + private static final Map DELEGATE = ImmutableMap.of( + "type", "stub", + "path", "/var/druid/segments/foo" + ); + private static final String FINGERPRINT = "v1:abcdef0123456789"; + + private static ObjectMapper configuredMapper() + { + final ObjectMapper m = new DefaultObjectMapper(); + final SimpleModule module = new SimpleModule(); + module.registerSubtypes(PartialClusterGroupLoadSpec.class, StubLoadSpec.class); + m.registerModule(module); + m.setInjectableValues(new InjectableValues.Std().addValue(ObjectMapper.class, m)); + return m; + } + + private final ObjectMapper jsonMapper = configuredMapper(); + + @Test + void testJsonRoundTrip() throws Exception + { + PartialClusterGroupLoadSpec spec = new PartialClusterGroupLoadSpec( + DELEGATE, + List.of(0, 2, 5), + FINGERPRINT, + jsonMapper + ); + String json = jsonMapper.writeValueAsString(spec); + LoadSpec reread = jsonMapper.readValue(json, LoadSpec.class); + Assertions.assertInstanceOf(PartialClusterGroupLoadSpec.class, reread); + Assertions.assertEquals(spec, reread); + } + + @Test + void testWireFormHasPartialClusterGroupType() throws Exception + { + PartialClusterGroupLoadSpec spec = new PartialClusterGroupLoadSpec( + DELEGATE, + List.of(0, 1), + FINGERPRINT, + jsonMapper + ); + Map wireForm = jsonMapper.readValue( + jsonMapper.writeValueAsString(spec), + new TypeReference<>() + { + } + ); + Assertions.assertEquals("partialClusterGroup", wireForm.get("type")); + Assertions.assertEquals(DELEGATE, wireForm.get("delegate")); + Assertions.assertEquals(List.of(0, 1), wireForm.get("clusterGroupIndices")); + Assertions.assertEquals(FINGERPRINT, wireForm.get("fingerprint")); + } + + @Test + void testLoadSegmentDelegatesToInner() throws Exception + { + PartialClusterGroupLoadSpec spec = new PartialClusterGroupLoadSpec( + DELEGATE, + List.of(0), + FINGERPRINT, + jsonMapper + ); + StubLoadSpec.LOAD_CALLS.set(0); + LoadSpec.LoadSpecResult result = spec.loadSegment(new File("/tmp/dest")); + Assertions.assertEquals(1, StubLoadSpec.LOAD_CALLS.get()); + Assertions.assertEquals(42L, result.getSize()); + } + + @Test + void testOpenRangeReaderDelegatesToInner() throws Exception + { + PartialClusterGroupLoadSpec spec = new PartialClusterGroupLoadSpec( + DELEGATE, + List.of(0), + FINGERPRINT, + jsonMapper + ); + StubLoadSpec.RANGE_CALLS.set(0); + SegmentRangeReader reader = spec.openRangeReader(); + Assertions.assertNotNull(reader); + Assertions.assertEquals(1, StubLoadSpec.RANGE_CALLS.get()); + } + + @Test + void testOpenRangeReaderReturnsNullWhenInnerDoesNotSupport() throws Exception + { + PartialClusterGroupLoadSpec spec = new PartialClusterGroupLoadSpec( + ImmutableMap.of("type", "stub", "path", "/", "supportsRange", false), + List.of(0), + FINGERPRINT, + jsonMapper + ); + Assertions.assertNull(spec.openRangeReader()); + } + + @Test + void testRejectsNullDelegate() + { + Assertions.assertThrows( + NullPointerException.class, + () -> new PartialClusterGroupLoadSpec(null, List.of(0), "v1:x", jsonMapper) + ); + } + + @Test + void testRejectsNullOrEmptyClusterGroupIndices() + { + Assertions.assertThrows( + IllegalArgumentException.class, + () -> new PartialClusterGroupLoadSpec(DELEGATE, null, "v1:x", jsonMapper) + ); + Assertions.assertThrows( + IllegalArgumentException.class, + () -> new PartialClusterGroupLoadSpec(DELEGATE, List.of(), "v1:x", jsonMapper) + ); + } + + @Test + void testRejectsNullFingerprint() + { + Assertions.assertThrows( + NullPointerException.class, + () -> new PartialClusterGroupLoadSpec(DELEGATE, List.of(0), null, jsonMapper) + ); + } + + /** + * Stub LoadSpec used to verify delegation. Uses the same JSON "type"=="stub" key as the test {@link #DELEGATE}. + */ + @JsonTypeName("stub") + public static class StubLoadSpec implements LoadSpec + { + static final AtomicInteger LOAD_CALLS = new AtomicInteger(0); + static final AtomicInteger RANGE_CALLS = new AtomicInteger(0); + + private final String path; + private final boolean supportsRange; + + @JsonCreator + public StubLoadSpec( + @JsonProperty("path") String path, + @JsonProperty("supportsRange") @Nullable Boolean supportsRange + ) + { + this.path = path; + this.supportsRange = supportsRange == null || supportsRange; + } + + @JsonProperty + public String getPath() + { + return path; + } + + @JsonProperty + public boolean isSupportsRange() + { + return supportsRange; + } + + @Override + public LoadSpecResult loadSegment(File destDir) + { + LOAD_CALLS.incrementAndGet(); + return new LoadSpecResult(42L); + } + + @Override + @Nullable + public SegmentRangeReader openRangeReader() + { + if (!supportsRange) { + return null; + } + RANGE_CALLS.incrementAndGet(); + return (filename, offset, length) -> new ByteArrayInputStream(new byte[0]); + } + } +} diff --git a/processing/src/test/java/org/apache/druid/timeline/ClusterGroupTuplesTest.java b/processing/src/test/java/org/apache/druid/timeline/ClusterGroupTuplesTest.java new file mode 100644 index 000000000000..626be79fb3f7 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/timeline/ClusterGroupTuplesTest.java @@ -0,0 +1,243 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.timeline; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.error.DruidException; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.List; + +class ClusterGroupTuplesTest +{ + private static final ObjectMapper MAPPER = new DefaultObjectMapper(); + + private static RowSignature tenantRegion() + { + return RowSignature.builder() + .add("tenant", ColumnType.STRING) + .add("region", ColumnType.STRING) + .build(); + } + + private static RowSignature tenantPriority() + { + return RowSignature.builder() + .add("tenant", ColumnType.STRING) + .add("priority", ColumnType.LONG) + .build(); + } + + @Test + void testConstructorRejectsNullClusteringColumns() + { + Assertions.assertThrows( + DruidException.class, + () -> new ClusterGroupTuples(null, List.of(List.of("acme", "us-east-1"))) + ); + } + + @Test + void testConstructorRejectsEmptyClusteringColumns() + { + Assertions.assertThrows( + DruidException.class, + () -> new ClusterGroupTuples(RowSignature.empty(), List.of()) + ); + } + + @Test + void testConstructorAllowsEmptyTuples() + { + final ClusterGroupTuples groups = new ClusterGroupTuples(tenantRegion(), List.of()); + Assertions.assertTrue(groups.getTuples().isEmpty()); + } + + @Test + void testConstructorAllowsNullTuplesList() + { + final ClusterGroupTuples groups = new ClusterGroupTuples(tenantRegion(), null); + Assertions.assertTrue(groups.getTuples().isEmpty()); + } + + @Test + void testConstructorRejectsTupleLengthMismatch() + { + Assertions.assertThrows( + DruidException.class, + () -> new ClusterGroupTuples(tenantRegion(), List.of(List.of("acme"))) + ); + } + + @Test + void testConstructorRejectsNullTuple() + { + Assertions.assertThrows( + DruidException.class, + () -> new ClusterGroupTuples(tenantRegion(), Arrays.asList(Arrays.asList("acme", "us-east-1"), null)) + ); + } + + @Test + void testConstructorRejectsUntypedClusteringColumn() + { + final RowSignature untyped = RowSignature.builder().add("tenant", null).build(); + Assertions.assertThrows( + DruidException.class, + () -> new ClusterGroupTuples(untyped, List.of(List.of("acme"))) + ); + } + + @Test + void testConstructorRejectsUnsupportedColumnType() + { + final RowSignature arraySig = RowSignature.builder().add("arr", ColumnType.STRING_ARRAY).build(); + Assertions.assertThrows( + DruidException.class, + () -> new ClusterGroupTuples(arraySig, List.of(List.of(List.of("a")))) + ); + } + + @Test + void testNullsAllowedAtAnyTuplePosition() + { + final ClusterGroupTuples groups = new ClusterGroupTuples( + tenantRegion(), + Arrays.asList( + Arrays.asList(null, "us-east-1"), + Arrays.asList("acme", null), + Arrays.asList(null, null) + ) + ); + Assertions.assertEquals(3, groups.getTuples().size()); + Assertions.assertNull(groups.getTuples().get(0).get(0)); + Assertions.assertNull(groups.getTuples().get(1).get(1)); + Assertions.assertNull(groups.getTuples().get(2).get(0)); + Assertions.assertNull(groups.getTuples().get(2).get(1)); + } + + @Test + void testEqualsAndHashCodeNullSafe() + { + final ClusterGroupTuples a = new ClusterGroupTuples( + tenantRegion(), + Arrays.asList(Arrays.asList("acme", null), Arrays.asList(null, "us-east-1")) + ); + final ClusterGroupTuples b = new ClusterGroupTuples( + tenantRegion(), + Arrays.asList(Arrays.asList("acme", null), Arrays.asList(null, "us-east-1")) + ); + Assertions.assertEquals(a, b); + Assertions.assertEquals(a.hashCode(), b.hashCode()); + } + + @Test + void testCoercionIntegerToLong() + { + final ClusterGroupTuples groups = new ClusterGroupTuples( + tenantPriority(), + List.of(List.of("acme", (Object) Integer.valueOf(5))) + ); + Assertions.assertEquals(Long.class, groups.getTuples().get(0).get(1).getClass()); + Assertions.assertEquals(5L, groups.getTuples().get(0).get(1)); + } + + @Test + void testCoercionStringToLong() + { + final ClusterGroupTuples groups = new ClusterGroupTuples( + tenantPriority(), + List.of(List.of("acme", "42")) + ); + Assertions.assertEquals(42L, groups.getTuples().get(0).get(1)); + } + + @Test + void testCoercionUnparseableStringToLongThrows() + { + Assertions.assertThrows( + DruidException.class, + () -> new ClusterGroupTuples(tenantPriority(), List.of(List.of("acme", "notanumber"))) + ); + } + + @Test + void testCoercionDoubleToFloat() + { + final RowSignature sig = RowSignature.builder().add("temp", ColumnType.FLOAT).build(); + final ClusterGroupTuples groups = new ClusterGroupTuples(sig, List.of(List.of((Object) Double.valueOf(98.6)))); + Assertions.assertEquals(Float.class, groups.getTuples().get(0).get(0).getClass()); + Assertions.assertEquals(98.6f, (Float) groups.getTuples().get(0).get(0), 0.0001f); + } + + @Test + void testCoercionStringToDouble() + { + final RowSignature sig = RowSignature.builder().add("v", ColumnType.DOUBLE).build(); + final ClusterGroupTuples groups = new ClusterGroupTuples(sig, List.of(List.of("3.14"))); + Assertions.assertEquals(Double.class, groups.getTuples().get(0).get(0).getClass()); + Assertions.assertEquals(3.14d, (Double) groups.getTuples().get(0).get(0), 0.0001d); + } + + @Test + void testCoercionAcceptsAnyTypeForString() + { + final RowSignature sig = RowSignature.builder().add("v", ColumnType.STRING).build(); + final ClusterGroupTuples groups = new ClusterGroupTuples(sig, List.of(List.of((Object) Long.valueOf(7)))); + Assertions.assertEquals("7", groups.getTuples().get(0).get(0)); + } + + @Test + void testJsonRoundTripPreservesCoercedTypes() throws Exception + { + final ClusterGroupTuples groups = new ClusterGroupTuples( + tenantPriority(), + List.of(List.of("acme", (Object) 5), List.of("globex", (Object) "10")) + ); + final String json = MAPPER.writeValueAsString(groups); + final ClusterGroupTuples back = MAPPER.readValue(json, ClusterGroupTuples.class); + Assertions.assertEquals(groups, back); + // Round-tripped tuples must end up with the same canonical types as the in-memory original. + Assertions.assertEquals(Long.class, back.getTuples().get(0).get(1).getClass()); + Assertions.assertEquals(Long.class, back.getTuples().get(1).get(1).getClass()); + } + + @Test + void testTuplesAreImmutable() + { + final ClusterGroupTuples groups = new ClusterGroupTuples( + tenantRegion(), + List.of(List.of("acme", "us-east-1")) + ); + Assertions.assertThrows( + UnsupportedOperationException.class, + () -> groups.getTuples().add(List.of("globex", "us-east-1")) + ); + Assertions.assertThrows( + UnsupportedOperationException.class, + () -> groups.getTuples().get(0).set(0, "hijacked") + ); + } +} diff --git a/processing/src/test/java/org/apache/druid/timeline/DataSegmentClusterGroupsTest.java b/processing/src/test/java/org/apache/druid/timeline/DataSegmentClusterGroupsTest.java new file mode 100644 index 000000000000..a0f54db18656 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/timeline/DataSegmentClusterGroupsTest.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.timeline; + +import com.fasterxml.jackson.databind.InjectableValues; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.timeline.partition.NumberedShardSpec; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.List; + +/** + * Coverage for {@link DataSegment#getClusterGroups()} and the associated builder / interner plumbing. + */ +class DataSegmentClusterGroupsTest +{ + private final ObjectMapper mapper = new DefaultObjectMapper(); + + @BeforeEach + void setUp() + { + final InjectableValues.Std injectables = new InjectableValues.Std(); + injectables.addValue(DataSegment.PruneSpecsHolder.class, DataSegment.PruneSpecsHolder.DEFAULT); + mapper.setInjectableValues(injectables); + } + + private static RowSignature tenantRegion() + { + return RowSignature.builder() + .add("tenant", ColumnType.STRING) + .add("region", ColumnType.STRING) + .build(); + } + + private static ClusterGroupTuples sampleGroups() + { + return new ClusterGroupTuples( + tenantRegion(), + List.of(List.of("acme", "us-east-1"), List.of("globex", "us-east-1")) + ); + } + + private DataSegment.Builder baseBuilder() + { + return DataSegment.builder(SegmentId.of( + "ds", + Intervals.of("2026-01-01/2026-01-02"), + "v", + new NumberedShardSpec(0, 1) + )).size(1L); + } + + @Test + void testNullByDefault() + { + final DataSegment segment = baseBuilder().build(); + Assertions.assertNull(segment.getClusterGroups()); + } + + @Test + void testBuilderAttachesClusterGroups() + { + final ClusterGroupTuples groups = sampleGroups(); + final DataSegment segment = baseBuilder().clusterGroups(groups).build(); + Assertions.assertEquals(groups, segment.getClusterGroups()); + } + + @Test + void testBuilderCopyPreservesClusterGroups() + { + final ClusterGroupTuples groups = sampleGroups(); + final DataSegment original = baseBuilder().clusterGroups(groups).build(); + final DataSegment copy = DataSegment.builder(original).build(); + Assertions.assertSame(original.getClusterGroups(), copy.getClusterGroups()); + } + + @Test + void testWithClusterGroupsCanClearField() + { + final DataSegment original = baseBuilder().clusterGroups(sampleGroups()).build(); + final DataSegment cleared = original.withClusterGroups(null); + Assertions.assertNull(cleared.getClusterGroups()); + } + + @Test + void testJsonRoundTripWithClusterGroups() throws Exception + { + final DataSegment segment = baseBuilder().clusterGroups(sampleGroups()).build(); + final String json = mapper.writeValueAsString(segment); + Assertions.assertTrue(json.contains("\"clusterGroups\""), () -> "expected clusterGroups in JSON: " + json); + final DataSegment back = mapper.readValue(json, DataSegment.class); + Assertions.assertEquals(segment.getClusterGroups(), back.getClusterGroups()); + } + + @Test + void testJsonRoundTripOmitsNullClusterGroups() throws Exception + { + final DataSegment segment = baseBuilder().build(); + final String json = mapper.writeValueAsString(segment); + Assertions.assertFalse(json.contains("clusterGroups"), () -> "did not expect clusterGroups in JSON: " + json); + final DataSegment back = mapper.readValue(json, DataSegment.class); + Assertions.assertNull(back.getClusterGroups()); + } + + @Test + void testOlderJsonWithoutFieldDeserializes() throws Exception + { + // A representative pre-clusterGroups JSON shape; the field is simply absent. Older readers (and segments published + // before this PR) don't carry it. + final String legacyJson = + "{\"dataSource\":\"ds\"," + + "\"interval\":\"2026-01-01T00:00:00.000Z/2026-01-02T00:00:00.000Z\"," + + "\"version\":\"v\"," + + "\"loadSpec\":{}," + + "\"dimensions\":\"\",\"metrics\":\"\"," + + "\"shardSpec\":{\"type\":\"numbered\",\"partitionNum\":0,\"partitions\":1}," + + "\"binaryVersion\":0,\"size\":1}"; + final DataSegment back = mapper.readValue(legacyJson, DataSegment.class); + Assertions.assertNull(back.getClusterGroups()); + } + + @Test + void testInternerReturnsSameInstanceAcrossSegments() + { + final DataSegment a = baseBuilder().clusterGroups(sampleGroups()).build(); + // sampleGroups() builds a fresh instance each call; the interner should dedupe identical content. + final DataSegment b = DataSegment.builder(SegmentId.of( + "ds-other", + Intervals.of("2026-01-01/2026-01-02"), + "v", + new NumberedShardSpec(0, 1) + )).size(1L).clusterGroups(sampleGroups()).build(); + Assertions.assertSame(a.getClusterGroups(), b.getClusterGroups()); + } + + @Test + void testInternerDistinguishesDifferentContent() + { + final DataSegment a = baseBuilder().clusterGroups(sampleGroups()).build(); + final ClusterGroupTuples otherGroups = new ClusterGroupTuples( + tenantRegion(), + List.of(List.of("globex", "us-west-2")) + ); + final DataSegment b = DataSegment.builder(SegmentId.of( + "ds-other", + Intervals.of("2026-01-01/2026-01-02"), + "v", + new NumberedShardSpec(0, 1) + )).size(1L).clusterGroups(otherGroups).build(); + Assertions.assertNotSame(a.getClusterGroups(), b.getClusterGroups()); + } +} diff --git a/server/src/main/java/org/apache/druid/guice/PartialLoadSpecModule.java b/server/src/main/java/org/apache/druid/guice/PartialLoadSpecModule.java index d1bc533269cd..c83f1b83b172 100644 --- a/server/src/main/java/org/apache/druid/guice/PartialLoadSpecModule.java +++ b/server/src/main/java/org/apache/druid/guice/PartialLoadSpecModule.java @@ -24,13 +24,15 @@ import com.google.inject.Binder; import org.apache.druid.initialization.DruidModule; import org.apache.druid.segment.loading.LoadSpec; +import org.apache.druid.segment.loading.PartialClusterGroupLoadSpec; import org.apache.druid.segment.loading.PartialProjectionLoadSpec; import java.util.List; /** - * Registers {@link PartialProjectionLoadSpec} as a {@link LoadSpec} subtype for serde of partial load rules. This - * module is added to the always-loaded core list so it is available alongside any other deep-storage load spec modules. + * Registers {@link PartialProjectionLoadSpec} and {@link PartialClusterGroupLoadSpec} as {@link LoadSpec} subtypes + * for serde of partial load rules. This module is added to the always-loaded core list so they are available + * alongside any other deep-storage load spec modules. */ public class PartialLoadSpecModule implements DruidModule { @@ -43,6 +45,8 @@ public void configure(Binder binder) @Override public List getJacksonModules() { - return List.of(new SimpleModule().registerSubtypes(PartialProjectionLoadSpec.class)); + return List.of( + new SimpleModule().registerSubtypes(PartialProjectionLoadSpec.class, PartialClusterGroupLoadSpec.class) + ); } } diff --git a/server/src/main/java/org/apache/druid/server/coordination/LoadableDataSegment.java b/server/src/main/java/org/apache/druid/server/coordination/LoadableDataSegment.java index 427c670cf4cf..d853c675890a 100644 --- a/server/src/main/java/org/apache/druid/server/coordination/LoadableDataSegment.java +++ b/server/src/main/java/org/apache/druid/server/coordination/LoadableDataSegment.java @@ -24,6 +24,7 @@ import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import org.apache.druid.jackson.CommaListJoinDeserializer; import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.timeline.ClusterGroupTuples; import org.apache.druid.timeline.CompactionState; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.ShardSpec; @@ -50,6 +51,7 @@ private LoadableDataSegment( @JsonProperty("dimensions") @JsonDeserialize(using = CommaListJoinDeserializer.class) @Nullable List dimensions, @JsonProperty("metrics") @JsonDeserialize(using = CommaListJoinDeserializer.class) @Nullable List metrics, @JsonProperty("projections") @JsonDeserialize(using = CommaListJoinDeserializer.class) @Nullable List projections, + @JsonProperty("clusterGroups") @Nullable ClusterGroupTuples clusterGroups, @JsonProperty("shardSpec") @Nullable ShardSpec shardSpec, @JsonProperty("lastCompactionState") @Nullable CompactionState lastCompactionState, @JsonProperty("binaryVersion") Integer binaryVersion, @@ -66,6 +68,7 @@ private LoadableDataSegment( dimensions, metrics, projections, + clusterGroups, shardSpec, lastCompactionState, binaryVersion, diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/ClusterGroupPartialLoadMatcher.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/ClusterGroupPartialLoadMatcher.java new file mode 100644 index 000000000000..f69dc902cab6 --- /dev/null +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/ClusterGroupPartialLoadMatcher.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordinator.rules; + +import com.google.common.hash.Hasher; +import com.google.common.hash.Hashing; +import com.google.common.io.BaseEncoding; +import org.apache.druid.timeline.DataSegment; + +import javax.annotation.Nullable; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +/** + * Base for {@link PartialLoadMatcher} implementations that decide which of a clustered segment's cluster groups to + * partially load. Subclasses supply the resolution policy via {@link #resolveClusterGroupIndices(DataSegment)} — the + * sorted, deduped indices into {@code segment.getClusterGroups().getTuples()} — and this base handles fingerprint + * computation and wraps the result into the {@code partialClusterGroup} load-spec wire form consumed by the + * historical-side partial loader. + *

+ * The fingerprint is a hash of the resolved indices for a segment; the data node includes this value in the segment + * announcement so the coordinator can detect rule changes between runs and reconcile loaded replicas. + */ +public abstract class ClusterGroupPartialLoadMatcher implements PartialLoadMatcher +{ + static final String LOAD_SPEC_TYPE = "partialClusterGroup"; + static final String FINGERPRINT_VERSION = "v1"; + + /** + * Returns the sorted, deduped list of indices into {@code segment.getClusterGroups().getTuples()} selected by this + * matcher. Returns an empty list when nothing matches (the segment is not clustered, or no configured pattern / + * tuple intersects what the segment has). + */ + protected abstract List resolveClusterGroupIndices(DataSegment segment); + + @Override + @Nullable + public MatchResult match(DataSegment segment, Map baseLoadSpec) + { + if (segment.getClusterGroups() == null) { + return null; + } + final List resolved = resolveClusterGroupIndices(segment); + if (resolved.isEmpty()) { + return null; + } + final String fingerprint = computeFingerprint(resolved); + final Map wrapped = Map.of( + "type", LOAD_SPEC_TYPE, + "delegate", baseLoadSpec, + "clusterGroupIndices", resolved, + "fingerprint", fingerprint + ); + return new MatchResult(wrapped, fingerprint); + } + + static String computeFingerprint(List sortedDedupedIndices) + { + final Hasher hasher = Hashing.sha256().newHasher(); + for (Integer idx : sortedDedupedIndices) { + hasher.putInt(idx); + } + final String hex = BaseEncoding.base16().encode(hasher.hash().asBytes()).toLowerCase(Locale.ROOT); + // should be good enough without dragging the whole thing around for every segment + return FINGERPRINT_VERSION + ":" + hex.substring(0, 16); + } +} diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/ExactClusterGroupPartialLoadMatcher.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/ExactClusterGroupPartialLoadMatcher.java new file mode 100644 index 000000000000..e6ba0be03826 --- /dev/null +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/ExactClusterGroupPartialLoadMatcher.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordinator.rules; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.InvalidInput; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.timeline.ClusterGroupTuples; +import org.apache.druid.timeline.DataSegment; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.TreeSet; + +/** + * Selects cluster groups whose clustering tuples exactly match one of the configured tuples, by typed equality. + * Tuples are positional (operator authors them in the segment's clustering-column order) and may include nulls. Each + * operator-supplied tuple is coerced to the segment's clustering-column types at match time via + * {@link ClusterGroupTuples#coerceValue}; coercion failures (e.g., a string in a LONG column) skip the offending + * tuple for that segment rather than failing the rule. Tuples whose length doesn't match the segment's clustering + * column count are likewise skipped — safe failure mode when segments with different clustering schemas appear under + * the same rule. + *

+ * Complement to {@link WildcardClusterGroupPartialLoadMatcher}: use the wildcard form for pattern-based partial loads + * and use this form when you need to load specific tuples verbatim, especially those containing null clustering + * values which the wildcard form can't constrain. + */ +public class ExactClusterGroupPartialLoadMatcher extends ClusterGroupPartialLoadMatcher +{ + public static final String TYPE = "exactClusterGroup"; + + private final List> tuples; + + @JsonCreator + public ExactClusterGroupPartialLoadMatcher(@JsonProperty("tuples") List> tuples) + { + if (tuples == null || tuples.isEmpty()) { + throw InvalidInput.exception("tuples must not be null or empty for exactClusterGroup matcher"); + } + final List> copied = new ArrayList<>(tuples.size()); + for (int i = 0; i < tuples.size(); i++) { + final List tuple = tuples.get(i); + if (tuple == null) { + throw InvalidInput.exception("tuples[%s] must not be null", i); + } + // ArrayList copy here (not List.copyOf) so nulls inside the tuple are preserved. + copied.add(Collections.unmodifiableList(new ArrayList<>(tuple))); + } + this.tuples = Collections.unmodifiableList(copied); + } + + @JsonProperty + public List> getTuples() + { + return tuples; + } + + @Override + protected List resolveClusterGroupIndices(DataSegment segment) + { + final ClusterGroupTuples clusterGroups = segment.getClusterGroups(); + if (clusterGroups == null) { + return Collections.emptyList(); + } + final RowSignature clusteringColumns = clusterGroups.getClusteringColumns(); + final int numCols = clusteringColumns.size(); + final List> segmentTuples = clusterGroups.getTuples(); + + final TreeSet matched = new TreeSet<>(); + for (List ruleTuple : tuples) { + if (ruleTuple.size() != numCols) { + // Length mismatch — operator's tuples are for a different clustering schema; no match against this segment. + continue; + } + final List coerced = tryCoerce(ruleTuple, clusteringColumns); + if (coerced == null) { + continue; + } + for (int i = 0; i < segmentTuples.size(); i++) { + if (segmentTuples.get(i).equals(coerced)) { + matched.add(i); + } + } + } + return new ArrayList<>(matched); + } + + /** + * Coerce an operator-supplied tuple to the segment's clustering-column types. Returns null if any value can't be + * coerced — caller treats that as "skip this tuple for this segment" rather than failing. + */ + private static List tryCoerce(List ruleTuple, RowSignature clusteringColumns) + { + final int numCols = clusteringColumns.size(); + final Object[] out = new Object[numCols]; + for (int i = 0; i < numCols; i++) { + final String name = clusteringColumns.getColumnName(i); + final ColumnType type = clusteringColumns.getColumnType(i).orElse(null); + if (type == null) { + return null; + } + try { + out[i] = ClusterGroupTuples.coerceValue(name, type, ruleTuple.get(i)); + } + catch (DruidException e) { + return null; + } + } + return Arrays.asList(out); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ExactClusterGroupPartialLoadMatcher that = (ExactClusterGroupPartialLoadMatcher) o; + return Objects.equals(tuples, that.tuples); + } + + @Override + public int hashCode() + { + return Objects.hash(tuples); + } + + @Override + public String toString() + { + return "ExactClusterGroupPartialLoadMatcher{tuples=" + tuples + "}"; + } +} diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java new file mode 100644 index 000000000000..b29e9b0cedda --- /dev/null +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordinator.rules; + +import org.apache.druid.error.InvalidInput; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Pattern; + +/** + * Shared glob compilation + matching helpers used by partial-load matchers that match operator-supplied glob patterns + * against strings (projection names, stringified cluster-group values, etc.). Supported metacharacters: + *
    + *
  • {@code *} — any sequence of characters (including empty)
  • + *
  • {@code ?} — any single character
  • + *
  • {@code \} — escapes the following character so it is treated literally; use {@code \*}, {@code \?}, or + * {@code \\} to match a literal {@code *}, {@code ?}, or {@code \}. A trailing unescaped {@code \} is + * rejected.
  • + *
+ * Other characters are literal; regex metacharacters in literal positions are escaped automatically. + */ +final class Globs +{ + private Globs() + { + } + + /** + * Translates a glob pattern into a regex pattern that matches the entire input string. + * + * @throws org.apache.druid.error.DruidException if {@code glob} ends with an unescaped backslash + */ + static String globToRegex(String glob) + { + final StringBuilder sb = new StringBuilder(glob.length() + 4); + boolean escaping = false; + for (int i = 0; i < glob.length(); i++) { + final char c = glob.charAt(i); + if (escaping) { + appendLiteral(sb, c); + escaping = false; + continue; + } + switch (c) { + case '\\': + escaping = true; + break; + case '*': + sb.append(".*"); + break; + case '?': + sb.append('.'); + break; + default: + appendLiteral(sb, c); + } + } + if (escaping) { + throw InvalidInput.exception("Glob pattern [%s] ends with an unescaped backslash", glob); + } + return sb.toString(); + } + + static List compileAll(List globs) + { + if (globs.isEmpty()) { + return List.of(); + } + final List compiled = new ArrayList<>(globs.size()); + for (String glob : globs) { + compiled.add(Pattern.compile(globToRegex(glob))); + } + return List.copyOf(compiled); + } + + static boolean matchesAny(String name, List patterns) + { + for (Pattern pattern : patterns) { + if (pattern.matcher(name).matches()) { + return true; + } + } + return false; + } + + private static void appendLiteral(StringBuilder sb, char c) + { + switch (c) { + case '.': + case '(': + case ')': + case '[': + case ']': + case '{': + case '}': + case '+': + case '|': + case '^': + case '$': + case '\\': + case '*': + case '?': + sb.append('\\').append(c); + break; + default: + sb.append(c); + } + } +} diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/PartialLoadMatcher.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/PartialLoadMatcher.java index b1c9c78ace11..00eadd91f99e 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/PartialLoadMatcher.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/PartialLoadMatcher.java @@ -37,7 +37,9 @@ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", defaultImpl = UnknownPartialLoadMatcher.class) @JsonSubTypes({ @JsonSubTypes.Type(name = ExactProjectionPartialLoadMatcher.TYPE, value = ExactProjectionPartialLoadMatcher.class), - @JsonSubTypes.Type(name = WildcardProjectionPartialLoadMatcher.TYPE, value = WildcardProjectionPartialLoadMatcher.class) + @JsonSubTypes.Type(name = WildcardProjectionPartialLoadMatcher.TYPE, value = WildcardProjectionPartialLoadMatcher.class), + @JsonSubTypes.Type(name = ExactClusterGroupPartialLoadMatcher.TYPE, value = ExactClusterGroupPartialLoadMatcher.class), + @JsonSubTypes.Type(name = WildcardClusterGroupPartialLoadMatcher.TYPE, value = WildcardClusterGroupPartialLoadMatcher.class) }) public interface PartialLoadMatcher { diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java new file mode 100644 index 000000000000..7b6882bdf21a --- /dev/null +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java @@ -0,0 +1,238 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordinator.rules; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.error.InvalidInput; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.timeline.ClusterGroupTuples; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.utils.CollectionUtils; + +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.TreeSet; +import java.util.regex.Pattern; + +/** + * Selects cluster groups whose clustering tuples match any of the configured per-column glob patterns, minus any + * groups matched by an entry in {@code excludePatterns}. Each pattern is a {@code Map} where keys are + * clustering column names and values are glob patterns matched against the rendered tuple value at that column. + * Columns omitted from a pattern are treated as wildcards. Glob syntax (including escape semantics) is shared with + * {@link WildcardProjectionPartialLoadMatcher} via {@link Globs}. + *

+ * Null clustering values are matched only by omitting the column entirely or using the literal {@code "*"} glob; both + * have explicit "match-anything-including-null" semantics. Any other glob (including {@code "null"}) does not match a + * null clustering value; for those cases use {@link ExactClusterGroupPartialLoadMatcher}. + */ +public class WildcardClusterGroupPartialLoadMatcher extends ClusterGroupPartialLoadMatcher +{ + public static final String TYPE = "globClusterGroup"; + + private final List> patterns; + private final List> excludePatterns; + private final List> compiledPatterns; + private final List> compiledExcludePatterns; + + @JsonCreator + public WildcardClusterGroupPartialLoadMatcher( + @JsonProperty("patterns") List> patterns, + @JsonProperty("excludePatterns") @Nullable List> excludePatterns + ) + { + if (patterns == null || patterns.isEmpty()) { + throw InvalidInput.exception("patterns must not be null or empty for globClusterGroup matcher"); + } + this.patterns = copyAndValidatePatterns(patterns, "patterns"); + this.excludePatterns = excludePatterns == null + ? List.of() + : copyAndValidatePatterns(excludePatterns, "excludePatterns"); + this.compiledPatterns = compileAll(this.patterns); + this.compiledExcludePatterns = compileAll(this.excludePatterns); + } + + @JsonProperty + public List> getPatterns() + { + return patterns; + } + + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_EMPTY) + public List> getExcludePatterns() + { + return excludePatterns; + } + + @Override + protected List resolveClusterGroupIndices(DataSegment segment) + { + final ClusterGroupTuples clusterGroups = segment.getClusterGroups(); + if (clusterGroups == null) { + return Collections.emptyList(); + } + final RowSignature clusteringColumns = clusterGroups.getClusteringColumns(); + final List> tuples = clusterGroups.getTuples(); + final TreeSet matched = new TreeSet<>(); + for (int i = 0; i < tuples.size(); i++) { + final List tuple = tuples.get(i); + if (!matchesAnyPattern(tuple, clusteringColumns, compiledPatterns)) { + continue; + } + if (matchesAnyPattern(tuple, clusteringColumns, compiledExcludePatterns)) { + continue; + } + matched.add(i); + } + return new ArrayList<>(matched); + } + + private static boolean matchesAnyPattern( + List tuple, + RowSignature clusteringColumns, + List> patterns + ) + { + for (Map pattern : patterns) { + if (matchesPattern(tuple, clusteringColumns, pattern)) { + return true; + } + } + return false; + } + + private static boolean matchesPattern( + List tuple, + RowSignature clusteringColumns, + Map pattern + ) + { + for (Map.Entry entry : pattern.entrySet()) { + final int idx = clusteringColumns.indexOf(entry.getKey()); + if (idx < 0) { + // Pattern references a column the segment doesn't have. Defensive: a typo shouldn't silently broaden the + // match, so treat the whole pattern as non-matching for this segment. + return false; + } + final CompiledGlob glob = entry.getValue(); + if (glob.matchAny) { + // Literal "*" — matches every value, including null. + continue; + } + final Object value = tuple.get(idx); + if (value == null) { + // Any non-"*" glob cannot match a null clustering value. Use ExactClusterGroupPartialLoadMatcher for that. + return false; + } + if (!glob.pattern.matcher(value.toString()).matches()) { + return false; + } + } + return true; + } + + private static List> copyAndValidatePatterns(List> input, String fieldName) + { + final List> out = new ArrayList<>(input.size()); + for (int i = 0; i < input.size(); i++) { + final Map pattern = input.get(i); + if (pattern == null || pattern.isEmpty()) { + throw InvalidInput.exception("%s[%s] must not be null or empty", fieldName, i); + } + out.add(Map.copyOf(pattern)); + } + return List.copyOf(out); + } + + private static List> compileAll(List> patterns) + { + if (patterns.isEmpty()) { + return List.of(); + } + final List> out = new ArrayList<>(patterns.size()); + for (Map pattern : patterns) { + final Map compiled = CollectionUtils.newLinkedHashMapWithExpectedSize(pattern.size()); + for (Map.Entry entry : pattern.entrySet()) { + compiled.put(entry.getKey(), CompiledGlob.of(entry.getValue())); + } + out.add(Collections.unmodifiableMap(compiled)); + } + return List.copyOf(out); + } + + /** + * Compiled per-column glob. The literal {@code "*"} short-circuits to a "match any value, including null" flag; + * any other glob compiles to a regex matched against the rendered tuple value (and never matches null). + */ + private static final class CompiledGlob + { + static final CompiledGlob MATCH_ANY = new CompiledGlob(true, null); + + final boolean matchAny; + @Nullable + final Pattern pattern; + + private CompiledGlob(boolean matchAny, @Nullable Pattern pattern) + { + this.matchAny = matchAny; + this.pattern = pattern; + } + + static CompiledGlob of(String glob) + { + if ("*".equals(glob)) { + return MATCH_ANY; + } + return new CompiledGlob(false, Pattern.compile(Globs.globToRegex(glob))); + } + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + WildcardClusterGroupPartialLoadMatcher that = (WildcardClusterGroupPartialLoadMatcher) o; + return Objects.equals(patterns, that.patterns) && Objects.equals(excludePatterns, that.excludePatterns); + } + + @Override + public int hashCode() + { + return Objects.hash(patterns, excludePatterns); + } + + @Override + public String toString() + { + return "WildcardClusterGroupPartialLoadMatcher{patterns=" + patterns + + ", excludePatterns=" + excludePatterns + "}"; + } +} diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardProjectionPartialLoadMatcher.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardProjectionPartialLoadMatcher.java index 934eaf6f9589..5adf3c09642f 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardProjectionPartialLoadMatcher.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardProjectionPartialLoadMatcher.java @@ -72,8 +72,8 @@ public WildcardProjectionPartialLoadMatcher( } this.patterns = List.copyOf(patterns); this.excludePatterns = excludePatterns == null ? List.of() : List.copyOf(excludePatterns); - this.compiledPatterns = compileAll(this.patterns); - this.compiledExcludePatterns = compileAll(this.excludePatterns); + this.compiledPatterns = Globs.compileAll(this.patterns); + this.compiledExcludePatterns = Globs.compileAll(this.excludePatterns); } @JsonProperty @@ -98,99 +98,16 @@ protected List resolveProjectionNames(DataSegment segment) } final TreeSet matched = new TreeSet<>(); for (String name : segmentProjections) { - if (matchesAny(name, compiledExcludePatterns)) { + if (Globs.matchesAny(name, compiledExcludePatterns)) { continue; } - if (matchesAny(name, compiledPatterns)) { + if (Globs.matchesAny(name, compiledPatterns)) { matched.add(name); } } return new ArrayList<>(matched); } - private static List compileAll(List globs) - { - if (globs.isEmpty()) { - return List.of(); - } - final List compiled = new ArrayList<>(globs.size()); - for (String glob : globs) { - compiled.add(Pattern.compile(globToRegex(glob))); - } - return List.copyOf(compiled); - } - - private static boolean matchesAny(String name, List patterns) - { - for (Pattern pattern : patterns) { - if (pattern.matcher(name).matches()) { - return true; - } - } - return false; - } - - /** - * Translates a glob pattern with {@code *}, {@code ?}, and {@code \} escape semantics into an equivalent regex - * pattern that matches the entire input string. Regex metacharacters in literal positions are escaped. - * - * @throws org.apache.druid.error.DruidException if {@code glob} ends with an unescaped backslash - */ - static String globToRegex(String glob) - { - final StringBuilder sb = new StringBuilder(glob.length() + 4); - boolean escaping = false; - for (int i = 0; i < glob.length(); i++) { - final char c = glob.charAt(i); - if (escaping) { - appendLiteral(sb, c); - escaping = false; - continue; - } - switch (c) { - case '\\': - escaping = true; - break; - case '*': - sb.append(".*"); - break; - case '?': - sb.append('.'); - break; - default: - appendLiteral(sb, c); - } - } - if (escaping) { - throw InvalidInput.exception("Glob pattern [%s] ends with an unescaped backslash", glob); - } - return sb.toString(); - } - - private static void appendLiteral(StringBuilder sb, char c) - { - switch (c) { - case '.': - case '(': - case ')': - case '[': - case ']': - case '{': - case '}': - case '+': - case '|': - case '^': - case '$': - case '\\': - case '*': - case '?': - sb.append('\\').append(c); - break; - default: - sb.append(c); - } - } - @Override public boolean equals(Object o) { diff --git a/server/src/test/java/org/apache/druid/client/CachingClusteredClientTest.java b/server/src/test/java/org/apache/druid/client/CachingClusteredClientTest.java index fa5971e96570..4f76b7b52c17 100644 --- a/server/src/test/java/org/apache/druid/client/CachingClusteredClientTest.java +++ b/server/src/test/java/org/apache/druid/client/CachingClusteredClientTest.java @@ -2761,6 +2761,7 @@ private MyDataSegment() null, null, null, + null, NoneShardSpec.instance(), null, -1, diff --git a/server/src/test/java/org/apache/druid/server/coordinator/CreateDataSegments.java b/server/src/test/java/org/apache/druid/server/coordinator/CreateDataSegments.java index 1546eb76cdfb..afee36e530b0 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/CreateDataSegments.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/CreateDataSegments.java @@ -248,6 +248,7 @@ private NumberedDataSegment( Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + null, shardSpec, compactionState, IndexIO.CURRENT_VERSION_ID, diff --git a/server/src/test/java/org/apache/druid/server/coordinator/rules/ExactClusterGroupPartialLoadMatcherTest.java b/server/src/test/java/org/apache/druid/server/coordinator/rules/ExactClusterGroupPartialLoadMatcherTest.java new file mode 100644 index 000000000000..677009900439 --- /dev/null +++ b/server/src/test/java/org/apache/druid/server/coordinator/rules/ExactClusterGroupPartialLoadMatcherTest.java @@ -0,0 +1,250 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordinator.rules; + +import com.fasterxml.jackson.databind.InjectableValues; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.error.DruidException; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.timeline.ClusterGroupTuples; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.SegmentId; +import org.apache.druid.timeline.partition.NumberedShardSpec; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +class ExactClusterGroupPartialLoadMatcherTest +{ + private static final Map BASE_LOAD_SPEC = Map.of("type", "local", "path", "/seg"); + + private final ObjectMapper mapper = new DefaultObjectMapper(); + + @BeforeEach + void setUp() + { + final InjectableValues.Std injectables = new InjectableValues.Std(); + injectables.addValue(DataSegment.PruneSpecsHolder.class, DataSegment.PruneSpecsHolder.DEFAULT); + mapper.setInjectableValues(injectables); + } + + private static RowSignature tenantRegion() + { + return RowSignature.builder() + .add("tenant", ColumnType.STRING) + .add("region", ColumnType.STRING) + .build(); + } + + private static RowSignature tenantPriority() + { + return RowSignature.builder() + .add("tenant", ColumnType.STRING) + .add("priority", ColumnType.LONG) + .build(); + } + + private static DataSegment segmentWithGroups(ClusterGroupTuples groups) + { + return DataSegment.builder(SegmentId.of( + "ds", + Intervals.of("2026-01-01/2026-01-02"), + "v", + new NumberedShardSpec(0, 1) + )).loadSpec(BASE_LOAD_SPEC).size(0).clusterGroups(groups).build(); + } + + private static DataSegment threeGroupSegment() + { + return segmentWithGroups(new ClusterGroupTuples( + tenantRegion(), + Arrays.asList( + Arrays.asList("acme", "us-east-1"), + Arrays.asList("acme", "us-west-2"), + Arrays.asList("globex", null) + ) + )); + } + + @Test + void testConstructorRejectsNullTuples() + { + Assertions.assertThrows(DruidException.class, () -> new ExactClusterGroupPartialLoadMatcher(null)); + } + + @Test + void testConstructorRejectsEmptyTuples() + { + Assertions.assertThrows(DruidException.class, () -> new ExactClusterGroupPartialLoadMatcher(List.of())); + } + + @Test + void testConstructorRejectsNullTupleEntry() + { + Assertions.assertThrows( + DruidException.class, + () -> new ExactClusterGroupPartialLoadMatcher(Arrays.asList(Arrays.asList("acme", "us-east-1"), null)) + ); + } + + @Test + void testNonClusteredSegmentReturnsNull() + { + final ExactClusterGroupPartialLoadMatcher matcher = new ExactClusterGroupPartialLoadMatcher( + List.of(List.of("acme", "us-east-1")) + ); + final DataSegment segment = DataSegment.builder(SegmentId.of( + "ds", + Intervals.of("2026-01-01/2026-01-02"), + "v", + new NumberedShardSpec(0, 1) + )).loadSpec(BASE_LOAD_SPEC).size(0).build(); + Assertions.assertNull(matcher.match(segment, BASE_LOAD_SPEC)); + } + + @Test + void testSingleTupleMatch() + { + final ExactClusterGroupPartialLoadMatcher matcher = new ExactClusterGroupPartialLoadMatcher( + List.of(List.of("acme", "us-east-1")) + ); + final PartialLoadMatcher.MatchResult result = matcher.match(threeGroupSegment(), BASE_LOAD_SPEC); + Assertions.assertEquals(List.of(0), result.wrappedLoadSpec().get("clusterGroupIndices")); + } + + @Test + void testMultiTupleMatch() + { + final ExactClusterGroupPartialLoadMatcher matcher = new ExactClusterGroupPartialLoadMatcher( + List.of(List.of("acme", "us-east-1"), List.of("acme", "us-west-2")) + ); + final PartialLoadMatcher.MatchResult result = matcher.match(threeGroupSegment(), BASE_LOAD_SPEC); + Assertions.assertEquals(List.of(0, 1), result.wrappedLoadSpec().get("clusterGroupIndices")); + } + + @Test + void testNullValueInRuleTupleMatchesNullInSegmentTuple() + { + final ExactClusterGroupPartialLoadMatcher matcher = new ExactClusterGroupPartialLoadMatcher( + Arrays.asList(Arrays.asList("globex", null)) + ); + final PartialLoadMatcher.MatchResult result = matcher.match(threeGroupSegment(), BASE_LOAD_SPEC); + Assertions.assertEquals(List.of(2), result.wrappedLoadSpec().get("clusterGroupIndices")); + } + + @Test + void testTupleLengthMismatchSkippedForSegment() + { + final ExactClusterGroupPartialLoadMatcher matcher = new ExactClusterGroupPartialLoadMatcher( + List.of(List.of("acme")) // segment has 2 clustering columns; this 1-col tuple won't match. + ); + Assertions.assertNull(matcher.match(threeGroupSegment(), BASE_LOAD_SPEC)); + } + + @Test + void testCoercionAtMatchTime() + { + // Operator wrote Integer 5 (Jackson default for small ints); segment stores Long 5 (coerced at construction). + final DataSegment segment = segmentWithGroups(new ClusterGroupTuples( + tenantPriority(), + List.of(List.of("acme", (Object) 5)) + )); + final ExactClusterGroupPartialLoadMatcher matcher = new ExactClusterGroupPartialLoadMatcher( + List.of(List.of("acme", (Object) 5)) // Integer here; should coerce to Long for equality. + ); + final PartialLoadMatcher.MatchResult result = matcher.match(segment, BASE_LOAD_SPEC); + Assertions.assertEquals(List.of(0), result.wrappedLoadSpec().get("clusterGroupIndices")); + } + + @Test + void testCoercionFailureSkipsTuple() + { + // String "acme" cannot be coerced to LONG for the priority column. The matcher silently skips that tuple + // rather than throwing; other tuples in the matcher remain effective. + final DataSegment segment = segmentWithGroups(new ClusterGroupTuples( + tenantPriority(), + List.of(List.of("acme", (Object) 5L), List.of("globex", (Object) 7L)) + )); + final ExactClusterGroupPartialLoadMatcher matcher = new ExactClusterGroupPartialLoadMatcher( + List.of( + List.of("acme", "not_a_number"), // bad type for priority — skipped. + List.of("globex", (Object) 7) + ) + ); + final PartialLoadMatcher.MatchResult result = matcher.match(segment, BASE_LOAD_SPEC); + Assertions.assertEquals(List.of(1), result.wrappedLoadSpec().get("clusterGroupIndices")); + } + + @Test + void testNoMatchReturnsNull() + { + final ExactClusterGroupPartialLoadMatcher matcher = new ExactClusterGroupPartialLoadMatcher( + List.of(List.of("nobody", "us-east-1")) + ); + Assertions.assertNull(matcher.match(threeGroupSegment(), BASE_LOAD_SPEC)); + } + + @Test + void testIndicesAreSortedAndDeduped() + { + // Same tuple twice in the matcher config — deduped in the output. + final ExactClusterGroupPartialLoadMatcher matcher = new ExactClusterGroupPartialLoadMatcher( + List.of(List.of("acme", "us-east-1"), List.of("acme", "us-east-1")) + ); + final PartialLoadMatcher.MatchResult result = matcher.match(threeGroupSegment(), BASE_LOAD_SPEC); + Assertions.assertEquals(List.of(0), result.wrappedLoadSpec().get("clusterGroupIndices")); + } + + @Test + void testFingerprintStableAcrossTupleOrder() + { + final ExactClusterGroupPartialLoadMatcher a = new ExactClusterGroupPartialLoadMatcher( + List.of(List.of("acme", "us-east-1"), List.of("acme", "us-west-2")) + ); + final ExactClusterGroupPartialLoadMatcher b = new ExactClusterGroupPartialLoadMatcher( + List.of(List.of("acme", "us-west-2"), List.of("acme", "us-east-1")) + ); + Assertions.assertEquals( + a.match(threeGroupSegment(), BASE_LOAD_SPEC).fingerprint(), + b.match(threeGroupSegment(), BASE_LOAD_SPEC).fingerprint() + ); + } + + @Test + void testJsonRoundTrip() throws Exception + { + final ExactClusterGroupPartialLoadMatcher original = new ExactClusterGroupPartialLoadMatcher( + Arrays.asList( + Arrays.asList("acme", "us-east-1"), + Arrays.asList("globex", null) + ) + ); + final String json = mapper.writeValueAsString(original); + final PartialLoadMatcher back = mapper.readValue(json, PartialLoadMatcher.class); + Assertions.assertEquals(original, back); + } +} diff --git a/server/src/test/java/org/apache/druid/server/coordinator/rules/GlobsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/rules/GlobsTest.java new file mode 100644 index 000000000000..5ea42b7345ea --- /dev/null +++ b/server/src/test/java/org/apache/druid/server/coordinator/rules/GlobsTest.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordinator.rules; + +import org.apache.druid.error.DruidException; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.List; +import java.util.regex.Pattern; + +/** + * Focused unit coverage for the shared {@link Globs} helpers. Integration semantics (e.g., how patterns interact with + * excludes inside a matcher) live with the matcher tests; this file pins the translation rules so future refactors + * don't drift them silently. + */ +class GlobsTest +{ + @Test + void testLiteralGlobMatchesItself() + { + Assertions.assertEquals("user_daily", Globs.globToRegex("user_daily")); + } + + @Test + void testStarTranslatesToDotStar() + { + Assertions.assertEquals("user_.*", Globs.globToRegex("user_*")); + } + + @Test + void testQuestionMarkTranslatesToDot() + { + Assertions.assertEquals("user_.", Globs.globToRegex("user_?")); + } + + @Test + void testRegexMetacharactersInLiteralPositionsAreEscaped() + { + // Each of these has special regex meaning; the translator must escape them so they match literally. + Assertions.assertEquals("user\\.daily", Globs.globToRegex("user.daily")); + Assertions.assertEquals("a\\+b", Globs.globToRegex("a+b")); + Assertions.assertEquals("\\(foo\\)", Globs.globToRegex("(foo)")); + Assertions.assertEquals("a\\|b", Globs.globToRegex("a|b")); + Assertions.assertEquals("\\^start", Globs.globToRegex("^start")); + Assertions.assertEquals("end\\$", Globs.globToRegex("end$")); + } + + @Test + void testBackslashEscapesGlobMetacharacters() + { + // Escaped glob metacharacters become literal regex characters (themselves escaped). + Assertions.assertEquals("a\\*b", Globs.globToRegex("a\\*b")); + Assertions.assertEquals("a\\?b", Globs.globToRegex("a\\?b")); + Assertions.assertEquals("a\\\\b", Globs.globToRegex("a\\\\b")); + } + + @Test + void testTrailingUnescapedBackslashIsRejected() + { + Assertions.assertThrows(DruidException.class, () -> Globs.globToRegex("foo\\")); + } + + @Test + void testCompileAllReturnsEmptyListForEmptyInput() + { + Assertions.assertTrue(Globs.compileAll(List.of()).isEmpty()); + } + + @Test + void testCompileAllCompilesEachPattern() + { + final List compiled = Globs.compileAll(List.of("user_*", "report_?")); + Assertions.assertEquals(2, compiled.size()); + Assertions.assertTrue(compiled.get(0).matcher("user_daily").matches()); + Assertions.assertTrue(compiled.get(1).matcher("report_x").matches()); + Assertions.assertFalse(compiled.get(1).matcher("report_xy").matches()); + } + + @Test + void testMatchesAnyReturnsTrueOnFirstHit() + { + final List compiled = Globs.compileAll(List.of("user_*", "report_*")); + Assertions.assertTrue(Globs.matchesAny("user_daily", compiled)); + Assertions.assertTrue(Globs.matchesAny("report_hourly", compiled)); + Assertions.assertFalse(Globs.matchesAny("other", compiled)); + } + + @Test + void testMatchesAnyEmptyPatternListNeverMatches() + { + Assertions.assertFalse(Globs.matchesAny("anything", List.of())); + } +} diff --git a/server/src/test/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcherTest.java b/server/src/test/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcherTest.java new file mode 100644 index 000000000000..00b149285d4b --- /dev/null +++ b/server/src/test/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcherTest.java @@ -0,0 +1,322 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordinator.rules; + +import com.fasterxml.jackson.databind.InjectableValues; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.error.DruidException; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.timeline.ClusterGroupTuples; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.SegmentId; +import org.apache.druid.timeline.partition.NumberedShardSpec; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +class WildcardClusterGroupPartialLoadMatcherTest +{ + private static final Map BASE_LOAD_SPEC = Map.of("type", "local", "path", "/seg"); + + private final ObjectMapper mapper = new DefaultObjectMapper(); + + @BeforeEach + void setUp() + { + final InjectableValues.Std injectables = new InjectableValues.Std(); + injectables.addValue(DataSegment.PruneSpecsHolder.class, DataSegment.PruneSpecsHolder.DEFAULT); + mapper.setInjectableValues(injectables); + } + + private static RowSignature tenantRegion() + { + return RowSignature.builder() + .add("tenant", ColumnType.STRING) + .add("region", ColumnType.STRING) + .build(); + } + + /** A 3-group fixture: (acme, us-east-1), (acme, us-west-2), (globex, us-east-1). */ + private static DataSegment threeGroupSegment() + { + return segmentWithGroups(new ClusterGroupTuples( + tenantRegion(), + List.of( + List.of("acme", "us-east-1"), + List.of("acme", "us-west-2"), + List.of("globex", "us-east-1") + ) + )); + } + + private static DataSegment segmentWithGroups(ClusterGroupTuples groups) + { + return DataSegment.builder(SegmentId.of( + "ds", + Intervals.of("2026-01-01/2026-01-02"), + "v", + new NumberedShardSpec(0, 1) + )).loadSpec(BASE_LOAD_SPEC).size(0).clusterGroups(groups).build(); + } + + @Test + void testConstructorRejectsNullPatterns() + { + Assertions.assertThrows(DruidException.class, () -> new WildcardClusterGroupPartialLoadMatcher(null, null)); + } + + @Test + void testConstructorRejectsEmptyPatterns() + { + Assertions.assertThrows(DruidException.class, () -> new WildcardClusterGroupPartialLoadMatcher(List.of(), null)); + } + + @Test + void testConstructorRejectsEmptyPatternMap() + { + Assertions.assertThrows( + DruidException.class, + () -> new WildcardClusterGroupPartialLoadMatcher(List.of(Map.of()), null) + ); + } + + @Test + void testConstructorRejectsEmptyExcludePatternMap() + { + Assertions.assertThrows( + DruidException.class, + () -> new WildcardClusterGroupPartialLoadMatcher(List.of(Map.of("tenant", "*")), List.of(Map.of())) + ); + } + + @Test + void testNonClusteredSegmentReturnsNull() + { + final WildcardClusterGroupPartialLoadMatcher matcher = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("tenant", "acme")), + null + ); + final DataSegment segment = DataSegment.builder(SegmentId.of( + "ds", + Intervals.of("2026-01-01/2026-01-02"), + "v", + new NumberedShardSpec(0, 1) + )).loadSpec(BASE_LOAD_SPEC).size(0).build(); + Assertions.assertNull(matcher.match(segment, BASE_LOAD_SPEC)); + } + + @Test + void testSingleColumnExactMatch() + { + final WildcardClusterGroupPartialLoadMatcher matcher = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("tenant", "acme")), + null + ); + final PartialLoadMatcher.MatchResult result = matcher.match(threeGroupSegment(), BASE_LOAD_SPEC); + Assertions.assertNotNull(result); + Assertions.assertEquals(List.of(0, 1), result.wrappedLoadSpec().get("clusterGroupIndices")); + } + + @Test + void testSingleColumnGlob() + { + final WildcardClusterGroupPartialLoadMatcher matcher = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("region", "us-east-*")), + null + ); + final PartialLoadMatcher.MatchResult result = matcher.match(threeGroupSegment(), BASE_LOAD_SPEC); + Assertions.assertNotNull(result); + Assertions.assertEquals(List.of(0, 2), result.wrappedLoadSpec().get("clusterGroupIndices")); + } + + @Test + void testMultiColumnPatternWithExactAndGlob() + { + final WildcardClusterGroupPartialLoadMatcher matcher = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("tenant", "globex", "region", "us-east-*")), + null + ); + final PartialLoadMatcher.MatchResult result = matcher.match(threeGroupSegment(), BASE_LOAD_SPEC); + Assertions.assertEquals(List.of(2), result.wrappedLoadSpec().get("clusterGroupIndices")); + } + + @Test + void testOmittedColumnIsWildcard() + { + // No region constraint -> all acme rows match regardless of region. + final WildcardClusterGroupPartialLoadMatcher matcher = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("tenant", "acme")), + null + ); + final PartialLoadMatcher.MatchResult result = matcher.match(threeGroupSegment(), BASE_LOAD_SPEC); + Assertions.assertEquals(List.of(0, 1), result.wrappedLoadSpec().get("clusterGroupIndices")); + } + + @Test + void testExcludePatternRemovesGroups() + { + final WildcardClusterGroupPartialLoadMatcher matcher = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("tenant", "acme")), + List.of(Map.of("region", "us-west-2")) + ); + final PartialLoadMatcher.MatchResult result = matcher.match(threeGroupSegment(), BASE_LOAD_SPEC); + Assertions.assertEquals(List.of(0), result.wrappedLoadSpec().get("clusterGroupIndices")); + } + + @Test + void testStarMatchesAllIncludingNullValuesAtThatColumn() + { + final DataSegment segment = segmentWithGroups(new ClusterGroupTuples( + tenantRegion(), + Arrays.asList( + Arrays.asList("acme", "us-east-1"), + Arrays.asList("acme", null), + Arrays.asList(null, "us-east-1") + ) + )); + final WildcardClusterGroupPartialLoadMatcher matcher = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("region", "*")), + null + ); + final PartialLoadMatcher.MatchResult result = matcher.match(segment, BASE_LOAD_SPEC); + // The "*" glob matches every value including null at the region position. + Assertions.assertEquals(List.of(0, 1, 2), result.wrappedLoadSpec().get("clusterGroupIndices")); + } + + @Test + void testOmittedColumnMatchesNullAtThatPosition() + { + final DataSegment segment = segmentWithGroups(new ClusterGroupTuples( + tenantRegion(), + Arrays.asList( + Arrays.asList("acme", "us-east-1"), + Arrays.asList("acme", null) + ) + )); + // No constraint on region; the null-region tuple is still picked up. + final WildcardClusterGroupPartialLoadMatcher matcher = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("tenant", "acme")), + null + ); + final PartialLoadMatcher.MatchResult result = matcher.match(segment, BASE_LOAD_SPEC); + Assertions.assertEquals(List.of(0, 1), result.wrappedLoadSpec().get("clusterGroupIndices")); + } + + @Test + void testNonStarGlobDoesNotMatchNull() + { + final DataSegment segment = segmentWithGroups(new ClusterGroupTuples( + tenantRegion(), + Arrays.asList( + Arrays.asList("acme", "us-east-1"), + Arrays.asList("acme", null) + ) + )); + // "us-*" must not match a null region (the only way to match null is "*" or omission). + final WildcardClusterGroupPartialLoadMatcher matcher = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("region", "us-*")), + null + ); + final PartialLoadMatcher.MatchResult result = matcher.match(segment, BASE_LOAD_SPEC); + Assertions.assertEquals(List.of(0), result.wrappedLoadSpec().get("clusterGroupIndices")); + } + + @Test + void testLiteralStringNullDoesNotMatchNullClusteringValue() + { + final DataSegment segment = segmentWithGroups(new ClusterGroupTuples( + tenantRegion(), + Arrays.asList(Arrays.asList("acme", null)) + )); + final WildcardClusterGroupPartialLoadMatcher matcher = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("region", "null")), + null + ); + Assertions.assertNull(matcher.match(segment, BASE_LOAD_SPEC)); + } + + @Test + void testUnknownColumnInPatternNoMatch() + { + final WildcardClusterGroupPartialLoadMatcher matcher = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("not_a_clustering_column", "anything")), + null + ); + Assertions.assertNull(matcher.match(threeGroupSegment(), BASE_LOAD_SPEC)); + } + + @Test + void testNoMatchReturnsNull() + { + final WildcardClusterGroupPartialLoadMatcher matcher = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("tenant", "nobody")), + null + ); + Assertions.assertNull(matcher.match(threeGroupSegment(), BASE_LOAD_SPEC)); + } + + @Test + void testIndicesAreSortedAndDeduped() + { + // Two patterns that both match the same group should not duplicate the index in the output. + final WildcardClusterGroupPartialLoadMatcher matcher = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("tenant", "acme"), Map.of("region", "us-east-1")), + null + ); + final PartialLoadMatcher.MatchResult result = matcher.match(threeGroupSegment(), BASE_LOAD_SPEC); + // acme matches 0, 1; us-east-1 matches 0, 2 — union sorted = [0, 1, 2]. + Assertions.assertEquals(List.of(0, 1, 2), result.wrappedLoadSpec().get("clusterGroupIndices")); + } + + @Test + void testFingerprintStableAcrossPatternOrder() + { + final WildcardClusterGroupPartialLoadMatcher a = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("tenant", "acme"), Map.of("region", "us-east-1")), + null + ); + final WildcardClusterGroupPartialLoadMatcher b = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("region", "us-east-1"), Map.of("tenant", "acme")), + null + ); + final PartialLoadMatcher.MatchResult ra = a.match(threeGroupSegment(), BASE_LOAD_SPEC); + final PartialLoadMatcher.MatchResult rb = b.match(threeGroupSegment(), BASE_LOAD_SPEC); + Assertions.assertEquals(ra.fingerprint(), rb.fingerprint()); + } + + @Test + void testJsonRoundTrip() throws Exception + { + final WildcardClusterGroupPartialLoadMatcher original = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("tenant", "acme"), Map.of("tenant", "globex", "region", "us-east-*")), + List.of(Map.of("region", "us-west-2")) + ); + final String json = mapper.writeValueAsString(original); + final PartialLoadMatcher back = mapper.readValue(json, PartialLoadMatcher.class); + Assertions.assertEquals(original, back); + } +} From e76596393c045af33fdeba9ef5c0b0717f7a9b12 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 13 May 2026 10:49:18 -0700 Subject: [PATCH 02/10] add virtualColumns to ClusterGroupTuples and WildcardClusterGroupPartialLoadMatcher, remove exact matcher for now --- .../druid/timeline/ClusterGroupTuples.java | 54 +++- .../apache/druid/timeline/DataSegment.java | 14 + .../BaseDimensionRangeShardSpec.java | 7 +- .../timeline/ClusterGroupTuplesTest.java | 81 ++++++ .../ExactClusterGroupPartialLoadMatcher.java | 159 ----------- .../coordinator/rules/PartialLoadMatcher.java | 1 - ...ildcardClusterGroupPartialLoadMatcher.java | 170 ++++++++++-- ...actClusterGroupPartialLoadMatcherTest.java | 250 ------------------ ...ardClusterGroupPartialLoadMatcherTest.java | 128 +++++++++ 9 files changed, 423 insertions(+), 441 deletions(-) delete mode 100644 server/src/main/java/org/apache/druid/server/coordinator/rules/ExactClusterGroupPartialLoadMatcher.java delete mode 100644 server/src/test/java/org/apache/druid/server/coordinator/rules/ExactClusterGroupPartialLoadMatcherTest.java diff --git a/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java b/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java index ab942e2da5cd..5ed37910195f 100644 --- a/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java +++ b/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java @@ -20,8 +20,10 @@ package org.apache.druid.timeline; import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.druid.error.InvalidInput; +import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; @@ -36,22 +38,33 @@ * Typed clustering tuples carried on {@link DataSegment#getClusterGroups()} for clustered base-table segments. Each * entry in {@link #getTuples()} is one cluster group's clustering-column values, in the order declared by * {@link #getClusteringColumns()}. + *

+ * Optionally carries the clustering {@link VirtualColumns} when the segment was clustered on a virtual-column + * expression (e.g., {@code lower(tenant)}). The matchers don't require it today, but it's persisted so future + * broker-side consumers (tier-routing helpers, {@code SegmentPruner} / {@code FilterSegmentPruner}) can resolve a + * query VC against the clustering side via {@link VirtualColumns#findEquivalent}. Defaults to + * {@link VirtualColumns#EMPTY} when absent; constituent VC instances are interned through + * {@link DataSegment#virtualColumnInterner()} so identical clustering VCs across segments collapse to a single + * reference in broker/coordinator memory. */ public class ClusterGroupTuples { private final RowSignature clusteringColumns; private final List> tuples; + private final VirtualColumns virtualColumns; @JsonCreator public ClusterGroupTuples( @JsonProperty("clusteringColumns") RowSignature clusteringColumns, - @JsonProperty("tuples") @Nullable List> tuples + @JsonProperty("tuples") @Nullable List> tuples, + @JsonProperty("virtualColumns") @Nullable VirtualColumns virtualColumns ) { if (clusteringColumns == null || clusteringColumns.size() == 0) { throw InvalidInput.exception("clusteringColumns must not be null or empty"); } this.clusteringColumns = clusteringColumns; + this.virtualColumns = internVirtualColumns(virtualColumns); final List> source = tuples == null ? Collections.emptyList() : tuples; final int numCols = clusteringColumns.size(); @@ -79,6 +92,15 @@ public ClusterGroupTuples( this.tuples = Collections.unmodifiableList(coerced); } + /** + * Convenience constructor for callers that don't carry clustering virtual columns. Equivalent to passing + * {@code null} for the virtual columns argument. + */ + public ClusterGroupTuples(RowSignature clusteringColumns, @Nullable List> tuples) + { + this(clusteringColumns, tuples, null); + } + @JsonProperty public RowSignature getClusteringColumns() { @@ -91,6 +113,25 @@ public List> getTuples() return tuples; } + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_EMPTY) + public VirtualColumns getVirtualColumns() + { + return virtualColumns; + } + + private static VirtualColumns internVirtualColumns(@Nullable VirtualColumns virtualColumns) + { + if (virtualColumns == null || virtualColumns.isEmpty()) { + return VirtualColumns.EMPTY; + } + return VirtualColumns.create( + Arrays.stream(virtualColumns.getVirtualColumns()) + .map(DataSegment.virtualColumnInterner()::intern) + .toList() + ); + } + /** * Coerce {@code raw} into the canonical Java representation of {@code type}. Null in, null out. STRING uses * {@link String#valueOf}; LONG / DOUBLE / FLOAT accept any {@link Number} (down/up-cast via the matching primitive @@ -170,18 +211,23 @@ public boolean equals(Object o) return false; } ClusterGroupTuples that = (ClusterGroupTuples) o; - return Objects.equals(clusteringColumns, that.clusteringColumns) && Objects.equals(tuples, that.tuples); + return Objects.equals(clusteringColumns, that.clusteringColumns) + && Objects.equals(tuples, that.tuples) + && Objects.equals(virtualColumns, that.virtualColumns); } @Override public int hashCode() { - return Objects.hash(clusteringColumns, tuples); + return Objects.hash(clusteringColumns, tuples, virtualColumns); } @Override public String toString() { - return "ClusterGroupTuples{clusteringColumns=" + clusteringColumns + ", tuples=" + tuples + '}'; + return "ClusterGroupTuples{clusteringColumns=" + clusteringColumns + + ", tuples=" + tuples + + ", virtualColumns=" + virtualColumns + + '}'; } } diff --git a/processing/src/main/java/org/apache/druid/timeline/DataSegment.java b/processing/src/main/java/org/apache/druid/timeline/DataSegment.java index 0b2fd8baf87f..7451d8acf5f2 100644 --- a/processing/src/main/java/org/apache/druid/timeline/DataSegment.java +++ b/processing/src/main/java/org/apache/druid/timeline/DataSegment.java @@ -39,6 +39,7 @@ import org.apache.druid.jackson.CommaListJoinSerializer; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.query.SegmentDescriptor; +import org.apache.druid.segment.VirtualColumn; import org.apache.druid.timeline.partition.NumberedShardSpec; import org.apache.druid.timeline.partition.ShardSpec; import org.joda.time.Interval; @@ -65,6 +66,18 @@ public static Interner stringInterner() return STRING_INTERNER; } + /** + * Shared canonical interner for {@link org.apache.druid.segment.VirtualColumn} instances embedded in segment-side + * metadata that lives in broker/coordinator memory. Used by callers that carry virtual columns through + * {@link DataSegment}'s wire form (clustering virtual columns on {@link ClusterGroupTuples}, + * {@link org.apache.druid.timeline.partition.BaseDimensionRangeShardSpec}'s range-clustering virtual columns) so + * identical VC definitions across segments collapse to a single instance in memory. + */ + public static Interner virtualColumnInterner() + { + return VIRTUAL_COLUMN_INTERNER; + } + /* * The difference between this class and org.apache.druid.segment.Segment is that this class contains the segment * metadata only, while org.apache.druid.segment.Segment represents the actual body of segment data, queryable. @@ -94,6 +107,7 @@ public static class PruneSpecsHolder private static final Interner> METRICS_INTERNER = Interners.newWeakInterner(); private static final Interner> PROJECTIONS_INTERNER = Interners.newWeakInterner(); private static final Interner CLUSTER_GROUPS_INTERNER = Interners.newWeakInterner(); + private static final Interner VIRTUAL_COLUMN_INTERNER = Interners.newWeakInterner(); private static final Interner COMPACTION_STATE_INTERNER = Interners.newWeakInterner(); private static final Map PRUNED_LOAD_SPEC = ImmutableMap.of( "load spec is pruned, because it's not needed on Brokers, but eats a lot of heap space", diff --git a/processing/src/main/java/org/apache/druid/timeline/partition/BaseDimensionRangeShardSpec.java b/processing/src/main/java/org/apache/druid/timeline/partition/BaseDimensionRangeShardSpec.java index d7a9d2018b08..36a761a0955b 100644 --- a/processing/src/main/java/org/apache/druid/timeline/partition/BaseDimensionRangeShardSpec.java +++ b/processing/src/main/java/org/apache/druid/timeline/partition/BaseDimensionRangeShardSpec.java @@ -19,14 +19,11 @@ package org.apache.druid.timeline.partition; -import com.google.common.collect.Interner; -import com.google.common.collect.Interners; import com.google.common.collect.Ordering; import org.apache.druid.data.input.InputRow; import org.apache.druid.data.input.StringTuple; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.guava.Comparators; -import org.apache.druid.segment.VirtualColumn; import org.apache.druid.segment.VirtualColumns; import org.apache.druid.timeline.DataSegment; @@ -37,8 +34,6 @@ public abstract class BaseDimensionRangeShardSpec implements ShardSpec { - private static final Interner VIRTUAL_COLUMN_INTERNER = Interners.newWeakInterner(); - protected final List dimensions; protected final VirtualColumns virtualColumns; @Nullable @@ -57,7 +52,7 @@ protected BaseDimensionRangeShardSpec( this.virtualColumns = virtualColumns == null ? VirtualColumns.EMPTY : VirtualColumns.create(Arrays.stream(virtualColumns.getVirtualColumns()) - .map(VIRTUAL_COLUMN_INTERNER::intern) + .map(DataSegment.virtualColumnInterner()::intern) .toList()); this.start = start; this.end = end; diff --git a/processing/src/test/java/org/apache/druid/timeline/ClusterGroupTuplesTest.java b/processing/src/test/java/org/apache/druid/timeline/ClusterGroupTuplesTest.java index 626be79fb3f7..5e0777dc165e 100644 --- a/processing/src/test/java/org/apache/druid/timeline/ClusterGroupTuplesTest.java +++ b/processing/src/test/java/org/apache/druid/timeline/ClusterGroupTuplesTest.java @@ -22,8 +22,12 @@ import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.druid.error.DruidException; import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -240,4 +244,81 @@ void testTuplesAreImmutable() () -> groups.getTuples().get(0).set(0, "hijacked") ); } + + private static VirtualColumns lowerTenantVcs() + { + return VirtualColumns.create(new ExpressionVirtualColumn( + "tenant_lower", + "lower(tenant)", + ColumnType.STRING, + TestExprMacroTable.INSTANCE + )); + } + + @Test + void testVirtualColumnsDefaultEmpty() + { + final ClusterGroupTuples groups = new ClusterGroupTuples(tenantRegion(), List.of()); + Assertions.assertSame(VirtualColumns.EMPTY, groups.getVirtualColumns()); + } + + @Test + void testVirtualColumnsAreStored() + { + final ClusterGroupTuples groups = new ClusterGroupTuples( + RowSignature.builder().add("tenant_lower", ColumnType.STRING).build(), + List.of(List.of("acme")), + lowerTenantVcs() + ); + Assertions.assertNotNull(groups.getVirtualColumns().getVirtualColumn("tenant_lower")); + } + + @Test + void testVirtualColumnsJsonRoundTrip() throws Exception + { + final ClusterGroupTuples original = new ClusterGroupTuples( + RowSignature.builder().add("tenant_lower", ColumnType.STRING).build(), + List.of(List.of("acme")), + lowerTenantVcs() + ); + // Round-trip needs an injectable ExprMacroTable for ExpressionVirtualColumn deserialization. + final ObjectMapper mapper = new DefaultObjectMapper(); + mapper.setInjectableValues( + new com.fasterxml.jackson.databind.InjectableValues.Std() + .addValue(ExprMacroTable.class, TestExprMacroTable.INSTANCE) + ); + final String json = mapper.writeValueAsString(original); + Assertions.assertTrue(json.contains("\"virtualColumns\""), () -> "expected virtualColumns in JSON: " + json); + final ClusterGroupTuples back = mapper.readValue(json, ClusterGroupTuples.class); + Assertions.assertEquals(original, back); + } + + @Test + void testVirtualColumnsOmittedFromJsonWhenEmpty() throws Exception + { + final ClusterGroupTuples groups = new ClusterGroupTuples(tenantRegion(), List.of(List.of("acme", "us-east-1"))); + final String json = MAPPER.writeValueAsString(groups); + Assertions.assertFalse(json.contains("virtualColumns"), () -> "did not expect virtualColumns in JSON: " + json); + } + + @Test + void testVirtualColumnInternerSharesAcrossInstances() + { + // Two ClusterGroupTuples built from independent (but equal) VC inputs should share their VC instances via the + // shared interner on DataSegment, so identical clustering VCs dedupe across segments held in memory. + final ClusterGroupTuples a = new ClusterGroupTuples( + RowSignature.builder().add("tenant_lower", ColumnType.STRING).build(), + List.of(List.of("acme")), + lowerTenantVcs() + ); + final ClusterGroupTuples b = new ClusterGroupTuples( + RowSignature.builder().add("tenant_lower", ColumnType.STRING).build(), + List.of(List.of("globex")), + lowerTenantVcs() + ); + Assertions.assertSame( + a.getVirtualColumns().getVirtualColumns()[0], + b.getVirtualColumns().getVirtualColumns()[0] + ); + } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/ExactClusterGroupPartialLoadMatcher.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/ExactClusterGroupPartialLoadMatcher.java deleted file mode 100644 index e6ba0be03826..000000000000 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/ExactClusterGroupPartialLoadMatcher.java +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.server.coordinator.rules; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import org.apache.druid.error.DruidException; -import org.apache.druid.error.InvalidInput; -import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.RowSignature; -import org.apache.druid.timeline.ClusterGroupTuples; -import org.apache.druid.timeline.DataSegment; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Objects; -import java.util.TreeSet; - -/** - * Selects cluster groups whose clustering tuples exactly match one of the configured tuples, by typed equality. - * Tuples are positional (operator authors them in the segment's clustering-column order) and may include nulls. Each - * operator-supplied tuple is coerced to the segment's clustering-column types at match time via - * {@link ClusterGroupTuples#coerceValue}; coercion failures (e.g., a string in a LONG column) skip the offending - * tuple for that segment rather than failing the rule. Tuples whose length doesn't match the segment's clustering - * column count are likewise skipped — safe failure mode when segments with different clustering schemas appear under - * the same rule. - *

- * Complement to {@link WildcardClusterGroupPartialLoadMatcher}: use the wildcard form for pattern-based partial loads - * and use this form when you need to load specific tuples verbatim, especially those containing null clustering - * values which the wildcard form can't constrain. - */ -public class ExactClusterGroupPartialLoadMatcher extends ClusterGroupPartialLoadMatcher -{ - public static final String TYPE = "exactClusterGroup"; - - private final List> tuples; - - @JsonCreator - public ExactClusterGroupPartialLoadMatcher(@JsonProperty("tuples") List> tuples) - { - if (tuples == null || tuples.isEmpty()) { - throw InvalidInput.exception("tuples must not be null or empty for exactClusterGroup matcher"); - } - final List> copied = new ArrayList<>(tuples.size()); - for (int i = 0; i < tuples.size(); i++) { - final List tuple = tuples.get(i); - if (tuple == null) { - throw InvalidInput.exception("tuples[%s] must not be null", i); - } - // ArrayList copy here (not List.copyOf) so nulls inside the tuple are preserved. - copied.add(Collections.unmodifiableList(new ArrayList<>(tuple))); - } - this.tuples = Collections.unmodifiableList(copied); - } - - @JsonProperty - public List> getTuples() - { - return tuples; - } - - @Override - protected List resolveClusterGroupIndices(DataSegment segment) - { - final ClusterGroupTuples clusterGroups = segment.getClusterGroups(); - if (clusterGroups == null) { - return Collections.emptyList(); - } - final RowSignature clusteringColumns = clusterGroups.getClusteringColumns(); - final int numCols = clusteringColumns.size(); - final List> segmentTuples = clusterGroups.getTuples(); - - final TreeSet matched = new TreeSet<>(); - for (List ruleTuple : tuples) { - if (ruleTuple.size() != numCols) { - // Length mismatch — operator's tuples are for a different clustering schema; no match against this segment. - continue; - } - final List coerced = tryCoerce(ruleTuple, clusteringColumns); - if (coerced == null) { - continue; - } - for (int i = 0; i < segmentTuples.size(); i++) { - if (segmentTuples.get(i).equals(coerced)) { - matched.add(i); - } - } - } - return new ArrayList<>(matched); - } - - /** - * Coerce an operator-supplied tuple to the segment's clustering-column types. Returns null if any value can't be - * coerced — caller treats that as "skip this tuple for this segment" rather than failing. - */ - private static List tryCoerce(List ruleTuple, RowSignature clusteringColumns) - { - final int numCols = clusteringColumns.size(); - final Object[] out = new Object[numCols]; - for (int i = 0; i < numCols; i++) { - final String name = clusteringColumns.getColumnName(i); - final ColumnType type = clusteringColumns.getColumnType(i).orElse(null); - if (type == null) { - return null; - } - try { - out[i] = ClusterGroupTuples.coerceValue(name, type, ruleTuple.get(i)); - } - catch (DruidException e) { - return null; - } - } - return Arrays.asList(out); - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - ExactClusterGroupPartialLoadMatcher that = (ExactClusterGroupPartialLoadMatcher) o; - return Objects.equals(tuples, that.tuples); - } - - @Override - public int hashCode() - { - return Objects.hash(tuples); - } - - @Override - public String toString() - { - return "ExactClusterGroupPartialLoadMatcher{tuples=" + tuples + "}"; - } -} diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/PartialLoadMatcher.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/PartialLoadMatcher.java index 00eadd91f99e..65c69113b627 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/PartialLoadMatcher.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/PartialLoadMatcher.java @@ -38,7 +38,6 @@ @JsonSubTypes({ @JsonSubTypes.Type(name = ExactProjectionPartialLoadMatcher.TYPE, value = ExactProjectionPartialLoadMatcher.class), @JsonSubTypes.Type(name = WildcardProjectionPartialLoadMatcher.TYPE, value = WildcardProjectionPartialLoadMatcher.class), - @JsonSubTypes.Type(name = ExactClusterGroupPartialLoadMatcher.TYPE, value = ExactClusterGroupPartialLoadMatcher.class), @JsonSubTypes.Type(name = WildcardClusterGroupPartialLoadMatcher.TYPE, value = WildcardClusterGroupPartialLoadMatcher.class) }) public interface PartialLoadMatcher diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java index 7b6882bdf21a..7406a7fd3abf 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java @@ -23,6 +23,8 @@ import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.druid.error.InvalidInput; +import org.apache.druid.segment.VirtualColumn; +import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.timeline.ClusterGroupTuples; import org.apache.druid.timeline.DataSegment; @@ -30,6 +32,7 @@ import javax.annotation.Nullable; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; @@ -40,13 +43,26 @@ /** * Selects cluster groups whose clustering tuples match any of the configured per-column glob patterns, minus any * groups matched by an entry in {@code excludePatterns}. Each pattern is a {@code Map} where keys are - * clustering column names and values are glob patterns matched against the rendered tuple value at that column. - * Columns omitted from a pattern are treated as wildcards. Glob syntax (including escape semantics) is shared with - * {@link WildcardProjectionPartialLoadMatcher} via {@link Globs}. + * clustering column names (or operator-side virtual column names — see below) and values are glob patterns matched + * against the rendered tuple value at that column. Columns omitted from a pattern are treated as wildcards. Glob + * syntax (including escape semantics) is shared with {@link WildcardProjectionPartialLoadMatcher} via {@link Globs}. + *

+ * If the operator supplies {@link #getVirtualColumns()}, a pattern key may also reference one of those virtual + * columns. At match time, the matcher resolves such a key to a clustering column on the segment via + * {@link VirtualColumns#findEquivalent(VirtualColumns.Node)} between the operator's VCs and the segment's clustering + * VCs (carried on {@link ClusterGroupTuples#getVirtualColumns()}). This lets operators author portable rules — they + * write their preferred VC name and expression, and the matcher resolves to whatever name the segment happens to use + * for the equivalent clustering VC. + *

+ * If the operator-side VC for a pattern key has no equivalent clustering VC on the segment, the pattern is treated as + * non-matching for that segment (defensive against typos or schema drift). The operator-VC-first ordering also + * disambiguates the shadowing case where an operator-VC and a clustering column share a name: the operator-VC + * interpretation wins, and a pattern is only matchable when the VCs are actually equivalent. *

* Null clustering values are matched only by omitting the column entirely or using the literal {@code "*"} glob; both * have explicit "match-anything-including-null" semantics. Any other glob (including {@code "null"}) does not match a - * null clustering value; for those cases use {@link ExactClusterGroupPartialLoadMatcher}. + * null clustering value. Matching specifically by null clustering values (e.g., load only the null-keyed group) is + * not supported by this matcher and is left for a future typed-tuple matcher variant. */ public class WildcardClusterGroupPartialLoadMatcher extends ClusterGroupPartialLoadMatcher { @@ -54,13 +70,15 @@ public class WildcardClusterGroupPartialLoadMatcher extends ClusterGroupPartialL private final List> patterns; private final List> excludePatterns; + private final VirtualColumns virtualColumns; private final List> compiledPatterns; private final List> compiledExcludePatterns; @JsonCreator public WildcardClusterGroupPartialLoadMatcher( @JsonProperty("patterns") List> patterns, - @JsonProperty("excludePatterns") @Nullable List> excludePatterns + @JsonProperty("excludePatterns") @Nullable List> excludePatterns, + @JsonProperty("virtualColumns") @Nullable VirtualColumns virtualColumns ) { if (patterns == null || patterns.isEmpty()) { @@ -70,10 +88,23 @@ public WildcardClusterGroupPartialLoadMatcher( this.excludePatterns = excludePatterns == null ? List.of() : copyAndValidatePatterns(excludePatterns, "excludePatterns"); + this.virtualColumns = internVirtualColumns(virtualColumns); this.compiledPatterns = compileAll(this.patterns); this.compiledExcludePatterns = compileAll(this.excludePatterns); } + /** + * Convenience constructor for callers that don't carry operator-side virtual columns. Equivalent to passing + * {@code null} for the virtual columns argument. + */ + public WildcardClusterGroupPartialLoadMatcher( + List> patterns, + @Nullable List> excludePatterns + ) + { + this(patterns, excludePatterns, null); + } + @JsonProperty public List> getPatterns() { @@ -87,6 +118,13 @@ public List> getExcludePatterns() return excludePatterns; } + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_EMPTY) + public VirtualColumns getVirtualColumns() + { + return virtualColumns; + } + @Override protected List resolveClusterGroupIndices(DataSegment segment) { @@ -95,14 +133,22 @@ protected List resolveClusterGroupIndices(DataSegment segment) return Collections.emptyList(); } final RowSignature clusteringColumns = clusterGroups.getClusteringColumns(); + final VirtualColumns segmentVcs = clusterGroups.getVirtualColumns(); final List> tuples = clusterGroups.getTuples(); + + // Per-pattern resolution: which clustering column does each pattern key map to? Resolution is segment-scoped + // (depends only on the segment's clustering signature + VCs), so compute it once up front and reuse it across + // tuples. A null entry in the resolved list marks the pattern as non-matching for this segment. + final List> resolvedPatterns = resolveAll(compiledPatterns, clusteringColumns, segmentVcs); + final List> resolvedExcludes = resolveAll(compiledExcludePatterns, clusteringColumns, segmentVcs); + final TreeSet matched = new TreeSet<>(); for (int i = 0; i < tuples.size(); i++) { final List tuple = tuples.get(i); - if (!matchesAnyPattern(tuple, clusteringColumns, compiledPatterns)) { + if (!matchesAnyPattern(tuple, clusteringColumns, compiledPatterns, resolvedPatterns)) { continue; } - if (matchesAnyPattern(tuple, clusteringColumns, compiledExcludePatterns)) { + if (matchesAnyPattern(tuple, clusteringColumns, compiledExcludePatterns, resolvedExcludes)) { continue; } matched.add(i); @@ -110,14 +156,83 @@ protected List resolveClusterGroupIndices(DataSegment segment) return new ArrayList<>(matched); } + /** + * For each compiled pattern, compute the {@code patternKey -> segment clustering column name} map. A {@code null} + * entry in the returned list marks a pattern as non-matching for this segment (some pattern key couldn't be + * resolved to a clustering column via either direct name or operator-VC equivalence). + */ + private List> resolveAll( + List> compiled, + RowSignature clusteringColumns, + VirtualColumns segmentVcs + ) + { + if (compiled.isEmpty()) { + return List.of(); + } + final List> out = new ArrayList<>(compiled.size()); + for (Map pattern : compiled) { + out.add(resolvePattern(pattern.keySet(), clusteringColumns, segmentVcs)); + } + return out; + } + + @Nullable + private Map resolvePattern( + java.util.Set patternKeys, + RowSignature clusteringColumns, + VirtualColumns segmentVcs + ) + { + final Map resolved = CollectionUtils.newLinkedHashMapWithExpectedSize(patternKeys.size()); + for (String key : patternKeys) { + final String target = resolveKey(key, clusteringColumns, segmentVcs); + if (target == null) { + return null; // pattern unresolvable for this segment + } + resolved.put(key, target); + } + return resolved; + } + + /** + * Resolve a pattern key to a clustering column name on the segment. Three cases: + *
    + *
  1. The matcher carries an operator-VC by this name → resolve via + * {@link VirtualColumns#findEquivalent(VirtualColumns.Node)} against the segment's clustering VCs. The + * operator-VC interpretation wins regardless of any same-name clustering column (shadowing).
  2. + *
  3. Otherwise, if the key is directly a clustering column name → identity.
  4. + *
  5. Otherwise → unresolvable (returns {@code null}).
  6. + *
+ */ + @Nullable + private String resolveKey(String key, RowSignature clusteringColumns, VirtualColumns segmentVcs) + { + final VirtualColumns.Node operatorVcNode = virtualColumns.getNode(key); + if (operatorVcNode != null) { + final VirtualColumn equivalent = segmentVcs.findEquivalent(operatorVcNode); + if (equivalent == null) { + return null; + } + final String equivalentName = equivalent.getOutputName(); + return clusteringColumns.indexOf(equivalentName) >= 0 ? equivalentName : null; + } + return clusteringColumns.indexOf(key) >= 0 ? key : null; + } + private static boolean matchesAnyPattern( List tuple, RowSignature clusteringColumns, - List> patterns + List> compiledPatterns, + List> resolvedPatterns ) { - for (Map pattern : patterns) { - if (matchesPattern(tuple, clusteringColumns, pattern)) { + for (int p = 0; p < compiledPatterns.size(); p++) { + final Map resolved = resolvedPatterns.get(p); + if (resolved == null) { + continue; + } + if (matchesPattern(tuple, clusteringColumns, compiledPatterns.get(p), resolved)) { return true; } } @@ -127,16 +242,14 @@ private static boolean matchesAnyPattern( private static boolean matchesPattern( List tuple, RowSignature clusteringColumns, - Map pattern + Map pattern, + Map resolved ) { for (Map.Entry entry : pattern.entrySet()) { - final int idx = clusteringColumns.indexOf(entry.getKey()); - if (idx < 0) { - // Pattern references a column the segment doesn't have. Defensive: a typo shouldn't silently broaden the - // match, so treat the whole pattern as non-matching for this segment. - return false; - } + final String resolvedColumn = resolved.get(entry.getKey()); + final int idx = clusteringColumns.indexOf(resolvedColumn); + // resolved is guaranteed to map every patternKey to a real clustering column (else the pattern was skipped). final CompiledGlob glob = entry.getValue(); if (glob.matchAny) { // Literal "*" — matches every value, including null. @@ -144,7 +257,7 @@ private static boolean matchesPattern( } final Object value = tuple.get(idx); if (value == null) { - // Any non-"*" glob cannot match a null clustering value. Use ExactClusterGroupPartialLoadMatcher for that. + // Any non-"*" glob cannot match a null clustering value. return false; } if (!glob.pattern.matcher(value.toString()).matches()) { @@ -183,6 +296,18 @@ private static List> compileAll(List BASE_LOAD_SPEC = Map.of("type", "local", "path", "/seg"); - - private final ObjectMapper mapper = new DefaultObjectMapper(); - - @BeforeEach - void setUp() - { - final InjectableValues.Std injectables = new InjectableValues.Std(); - injectables.addValue(DataSegment.PruneSpecsHolder.class, DataSegment.PruneSpecsHolder.DEFAULT); - mapper.setInjectableValues(injectables); - } - - private static RowSignature tenantRegion() - { - return RowSignature.builder() - .add("tenant", ColumnType.STRING) - .add("region", ColumnType.STRING) - .build(); - } - - private static RowSignature tenantPriority() - { - return RowSignature.builder() - .add("tenant", ColumnType.STRING) - .add("priority", ColumnType.LONG) - .build(); - } - - private static DataSegment segmentWithGroups(ClusterGroupTuples groups) - { - return DataSegment.builder(SegmentId.of( - "ds", - Intervals.of("2026-01-01/2026-01-02"), - "v", - new NumberedShardSpec(0, 1) - )).loadSpec(BASE_LOAD_SPEC).size(0).clusterGroups(groups).build(); - } - - private static DataSegment threeGroupSegment() - { - return segmentWithGroups(new ClusterGroupTuples( - tenantRegion(), - Arrays.asList( - Arrays.asList("acme", "us-east-1"), - Arrays.asList("acme", "us-west-2"), - Arrays.asList("globex", null) - ) - )); - } - - @Test - void testConstructorRejectsNullTuples() - { - Assertions.assertThrows(DruidException.class, () -> new ExactClusterGroupPartialLoadMatcher(null)); - } - - @Test - void testConstructorRejectsEmptyTuples() - { - Assertions.assertThrows(DruidException.class, () -> new ExactClusterGroupPartialLoadMatcher(List.of())); - } - - @Test - void testConstructorRejectsNullTupleEntry() - { - Assertions.assertThrows( - DruidException.class, - () -> new ExactClusterGroupPartialLoadMatcher(Arrays.asList(Arrays.asList("acme", "us-east-1"), null)) - ); - } - - @Test - void testNonClusteredSegmentReturnsNull() - { - final ExactClusterGroupPartialLoadMatcher matcher = new ExactClusterGroupPartialLoadMatcher( - List.of(List.of("acme", "us-east-1")) - ); - final DataSegment segment = DataSegment.builder(SegmentId.of( - "ds", - Intervals.of("2026-01-01/2026-01-02"), - "v", - new NumberedShardSpec(0, 1) - )).loadSpec(BASE_LOAD_SPEC).size(0).build(); - Assertions.assertNull(matcher.match(segment, BASE_LOAD_SPEC)); - } - - @Test - void testSingleTupleMatch() - { - final ExactClusterGroupPartialLoadMatcher matcher = new ExactClusterGroupPartialLoadMatcher( - List.of(List.of("acme", "us-east-1")) - ); - final PartialLoadMatcher.MatchResult result = matcher.match(threeGroupSegment(), BASE_LOAD_SPEC); - Assertions.assertEquals(List.of(0), result.wrappedLoadSpec().get("clusterGroupIndices")); - } - - @Test - void testMultiTupleMatch() - { - final ExactClusterGroupPartialLoadMatcher matcher = new ExactClusterGroupPartialLoadMatcher( - List.of(List.of("acme", "us-east-1"), List.of("acme", "us-west-2")) - ); - final PartialLoadMatcher.MatchResult result = matcher.match(threeGroupSegment(), BASE_LOAD_SPEC); - Assertions.assertEquals(List.of(0, 1), result.wrappedLoadSpec().get("clusterGroupIndices")); - } - - @Test - void testNullValueInRuleTupleMatchesNullInSegmentTuple() - { - final ExactClusterGroupPartialLoadMatcher matcher = new ExactClusterGroupPartialLoadMatcher( - Arrays.asList(Arrays.asList("globex", null)) - ); - final PartialLoadMatcher.MatchResult result = matcher.match(threeGroupSegment(), BASE_LOAD_SPEC); - Assertions.assertEquals(List.of(2), result.wrappedLoadSpec().get("clusterGroupIndices")); - } - - @Test - void testTupleLengthMismatchSkippedForSegment() - { - final ExactClusterGroupPartialLoadMatcher matcher = new ExactClusterGroupPartialLoadMatcher( - List.of(List.of("acme")) // segment has 2 clustering columns; this 1-col tuple won't match. - ); - Assertions.assertNull(matcher.match(threeGroupSegment(), BASE_LOAD_SPEC)); - } - - @Test - void testCoercionAtMatchTime() - { - // Operator wrote Integer 5 (Jackson default for small ints); segment stores Long 5 (coerced at construction). - final DataSegment segment = segmentWithGroups(new ClusterGroupTuples( - tenantPriority(), - List.of(List.of("acme", (Object) 5)) - )); - final ExactClusterGroupPartialLoadMatcher matcher = new ExactClusterGroupPartialLoadMatcher( - List.of(List.of("acme", (Object) 5)) // Integer here; should coerce to Long for equality. - ); - final PartialLoadMatcher.MatchResult result = matcher.match(segment, BASE_LOAD_SPEC); - Assertions.assertEquals(List.of(0), result.wrappedLoadSpec().get("clusterGroupIndices")); - } - - @Test - void testCoercionFailureSkipsTuple() - { - // String "acme" cannot be coerced to LONG for the priority column. The matcher silently skips that tuple - // rather than throwing; other tuples in the matcher remain effective. - final DataSegment segment = segmentWithGroups(new ClusterGroupTuples( - tenantPriority(), - List.of(List.of("acme", (Object) 5L), List.of("globex", (Object) 7L)) - )); - final ExactClusterGroupPartialLoadMatcher matcher = new ExactClusterGroupPartialLoadMatcher( - List.of( - List.of("acme", "not_a_number"), // bad type for priority — skipped. - List.of("globex", (Object) 7) - ) - ); - final PartialLoadMatcher.MatchResult result = matcher.match(segment, BASE_LOAD_SPEC); - Assertions.assertEquals(List.of(1), result.wrappedLoadSpec().get("clusterGroupIndices")); - } - - @Test - void testNoMatchReturnsNull() - { - final ExactClusterGroupPartialLoadMatcher matcher = new ExactClusterGroupPartialLoadMatcher( - List.of(List.of("nobody", "us-east-1")) - ); - Assertions.assertNull(matcher.match(threeGroupSegment(), BASE_LOAD_SPEC)); - } - - @Test - void testIndicesAreSortedAndDeduped() - { - // Same tuple twice in the matcher config — deduped in the output. - final ExactClusterGroupPartialLoadMatcher matcher = new ExactClusterGroupPartialLoadMatcher( - List.of(List.of("acme", "us-east-1"), List.of("acme", "us-east-1")) - ); - final PartialLoadMatcher.MatchResult result = matcher.match(threeGroupSegment(), BASE_LOAD_SPEC); - Assertions.assertEquals(List.of(0), result.wrappedLoadSpec().get("clusterGroupIndices")); - } - - @Test - void testFingerprintStableAcrossTupleOrder() - { - final ExactClusterGroupPartialLoadMatcher a = new ExactClusterGroupPartialLoadMatcher( - List.of(List.of("acme", "us-east-1"), List.of("acme", "us-west-2")) - ); - final ExactClusterGroupPartialLoadMatcher b = new ExactClusterGroupPartialLoadMatcher( - List.of(List.of("acme", "us-west-2"), List.of("acme", "us-east-1")) - ); - Assertions.assertEquals( - a.match(threeGroupSegment(), BASE_LOAD_SPEC).fingerprint(), - b.match(threeGroupSegment(), BASE_LOAD_SPEC).fingerprint() - ); - } - - @Test - void testJsonRoundTrip() throws Exception - { - final ExactClusterGroupPartialLoadMatcher original = new ExactClusterGroupPartialLoadMatcher( - Arrays.asList( - Arrays.asList("acme", "us-east-1"), - Arrays.asList("globex", null) - ) - ); - final String json = mapper.writeValueAsString(original); - final PartialLoadMatcher back = mapper.readValue(json, PartialLoadMatcher.class); - Assertions.assertEquals(original, back); - } -} diff --git a/server/src/test/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcherTest.java b/server/src/test/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcherTest.java index 00b149285d4b..9ef2e8ce72b9 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcherTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcherTest.java @@ -24,8 +24,12 @@ import org.apache.druid.error.DruidException; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.timeline.ClusterGroupTuples; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; @@ -319,4 +323,128 @@ void testJsonRoundTrip() throws Exception final PartialLoadMatcher back = mapper.readValue(json, PartialLoadMatcher.class); Assertions.assertEquals(original, back); } + + // --- Operator-side virtual columns (findEquivalent resolution) --- + + private static VirtualColumns lowerTenantVcs(String outputName) + { + return VirtualColumns.create(new ExpressionVirtualColumn( + outputName, + "lower(tenant)", + ColumnType.STRING, + TestExprMacroTable.INSTANCE + )); + } + + /** Segment clusters on a VC named "tenant_lower" with expression lower(tenant). */ + private static DataSegment vcClusteredSegment(String... lowerTenants) + { + final RowSignature clusteringColumns = RowSignature.builder().add("tenant_lower", ColumnType.STRING).build(); + final List> tuples = new java.util.ArrayList<>(lowerTenants.length); + for (String t : lowerTenants) { + tuples.add(java.util.Collections.singletonList(t)); + } + return segmentWithGroups(new ClusterGroupTuples(clusteringColumns, tuples, lowerTenantVcs("tenant_lower"))); + } + + @Test + void testOperatorVcResolvesToClusteringVcByEquivalence() + { + // Operator names their VC "queryLower" with lower(tenant); the segment's clustering VC is "tenant_lower" with + // the same expression. The matcher should find equivalence and match the segment's tuples. + final WildcardClusterGroupPartialLoadMatcher matcher = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("queryLower", "acme")), + null, + lowerTenantVcs("queryLower") + ); + final DataSegment segment = vcClusteredSegment("acme", "globex"); + final PartialLoadMatcher.MatchResult result = matcher.match(segment, BASE_LOAD_SPEC); + Assertions.assertNotNull(result); + Assertions.assertEquals(List.of(0), result.wrappedLoadSpec().get("clusterGroupIndices")); + } + + @Test + void testOperatorVcWithoutEquivalenceIsNonMatching() + { + // Operator-VC computes upper(tenant); segment's clustering VC computes lower(tenant). Not equivalent → the + // pattern is non-matching for this segment. + final VirtualColumns operatorVcs = VirtualColumns.create(new ExpressionVirtualColumn( + "queryUpper", + "upper(tenant)", + ColumnType.STRING, + TestExprMacroTable.INSTANCE + )); + final WildcardClusterGroupPartialLoadMatcher matcher = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("queryUpper", "ACME")), + null, + operatorVcs + ); + Assertions.assertNull(matcher.match(vcClusteredSegment("acme"), BASE_LOAD_SPEC)); + } + + @Test + void testOperatorVcShadowsClusteringColumnName() + { + // Operator declares a VC named "tenant" with an unrelated expression. The pattern key "tenant" resolves through + // the operator's VC (operator-VC interpretation wins). Without an equivalent on the segment side, the pattern + // is non-matching even though "tenant" happens to also be a clustering column name on a different segment. + final VirtualColumns operatorVcs = VirtualColumns.create(new ExpressionVirtualColumn( + "tenant", + "lower(otherCol)", + ColumnType.STRING, + TestExprMacroTable.INSTANCE + )); + final WildcardClusterGroupPartialLoadMatcher matcher = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("tenant", "acme")), + null, + operatorVcs + ); + // Segment that clusters directly on physical "tenant" (no segment VCs). The operator-VC shadowing rule means + // we don't silently treat the pattern's "tenant" as the clustering column "tenant" — it's interpreted as the + // operator-VC, which has no equivalent on the segment → non-matching. + Assertions.assertNull(matcher.match(threeGroupSegment(), BASE_LOAD_SPEC)); + } + + @Test + void testNoOperatorVcsKeepsDirectNameMatching() + { + // Sanity check: when no operator VCs are configured, the pattern key path falls through to direct clustering + // column name resolution, unchanged from the pre-VC behavior. + final WildcardClusterGroupPartialLoadMatcher matcher = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("tenant", "acme")), + null + ); + Assertions.assertNotNull(matcher.match(threeGroupSegment(), BASE_LOAD_SPEC)); + } + + @Test + void testOperatorVcJsonRoundTrip() throws Exception + { + // Round-trip needs an injectable ExprMacroTable for ExpressionVirtualColumn deserialization. + mapper.setInjectableValues( + new InjectableValues.Std() + .addValue(DataSegment.PruneSpecsHolder.class, DataSegment.PruneSpecsHolder.DEFAULT) + .addValue(ExprMacroTable.class, TestExprMacroTable.INSTANCE) + ); + final WildcardClusterGroupPartialLoadMatcher original = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("queryLower", "acme")), + null, + lowerTenantVcs("queryLower") + ); + final String json = mapper.writeValueAsString(original); + Assertions.assertTrue(json.contains("\"virtualColumns\""), () -> "expected virtualColumns in JSON: " + json); + final PartialLoadMatcher back = mapper.readValue(json, PartialLoadMatcher.class); + Assertions.assertEquals(original, back); + } + + @Test + void testVirtualColumnsOmittedFromJsonWhenEmpty() throws Exception + { + final WildcardClusterGroupPartialLoadMatcher matcher = new WildcardClusterGroupPartialLoadMatcher( + List.of(Map.of("tenant", "acme")), + null + ); + final String json = mapper.writeValueAsString(matcher); + Assertions.assertFalse(json.contains("virtualColumns"), () -> "did not expect virtualColumns in JSON: " + json); + } } From 11637842a7e9879d00cacde1fa0fa78fb57afb96 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 13 May 2026 12:35:42 -0700 Subject: [PATCH 03/10] tidy up --- .../druid/timeline/ClusterGroupTuples.java | 75 ++++----- .../timeline/ClusterGroupTuplesTest.java | 142 +++++++++++------- .../rules/ClusterGroupPartialLoadMatcher.java | 4 +- .../druid/server/coordinator/rules/Globs.java | 1 + ...ildcardClusterGroupPartialLoadMatcher.java | 17 ++- ...ardClusterGroupPartialLoadMatcherTest.java | 45 +++--- 6 files changed, 156 insertions(+), 128 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java b/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java index 5ed37910195f..5f5f59c3cc1e 100644 --- a/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java +++ b/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.error.DruidException; import org.apache.druid.error.InvalidInput; import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.ColumnType; @@ -37,15 +38,9 @@ /** * Typed clustering tuples carried on {@link DataSegment#getClusterGroups()} for clustered base-table segments. Each * entry in {@link #getTuples()} is one cluster group's clustering-column values, in the order declared by - * {@link #getClusteringColumns()}. - *

- * Optionally carries the clustering {@link VirtualColumns} when the segment was clustered on a virtual-column - * expression (e.g., {@code lower(tenant)}). The matchers don't require it today, but it's persisted so future - * broker-side consumers (tier-routing helpers, {@code SegmentPruner} / {@code FilterSegmentPruner}) can resolve a - * query VC against the clustering side via {@link VirtualColumns#findEquivalent}. Defaults to - * {@link VirtualColumns#EMPTY} when absent; constituent VC instances are interned through - * {@link DataSegment#virtualColumnInterner()} so identical clustering VCs across segments collapse to a single - * reference in broker/coordinator memory. + * {@link #getClusteringColumns()}. Optionally carries the clustering {@link VirtualColumns} when the segment was + * clustered on a virtual-column expression, so that matching for things like partial load rules and query time + * segment pruning can make use of this information. */ public class ClusterGroupTuples { @@ -133,66 +128,51 @@ private static VirtualColumns internVirtualColumns(@Nullable VirtualColumns virt } /** - * Coerce {@code raw} into the canonical Java representation of {@code type}. Null in, null out. STRING uses - * {@link String#valueOf}; LONG / DOUBLE / FLOAT accept any {@link Number} (down/up-cast via the matching primitive - * accessor) or a parseable numeric {@link String}. Throws on unsupported types or unparseable strings. + * Canonicalize {@code raw} for the declared clustering column {@code type}. This is intentionally narrow: its job + * is to unbreak Jackson's number-type narrowing (e.g., an Integer arriving for a LONG column gets normalized to a + * Long), not to do general value coercion. Rules: + *

    + *
  • {@code null} → {@code null}.
  • + *
  • STRING: {@link Objects#toString} on any non-null value (stringifying numeric operator input is benign).
  • + *
  • LONG / DOUBLE / FLOAT: require {@link Number}; return via the matching primitive accessor. Strings, + * Booleans, etc. are rejected — typed rule authoring should produce typed JSON, and silently parsing + * strings risks accepting operator typos that change the matched set.
  • + *
+ * Unsupported column types (anything that isn't STRING/LONG/DOUBLE/FLOAT) are rejected. *

* Used by: *

    *
  • {@link ClusterGroupTuples}'s constructor to canonicalize segment-side tuples (strict).
  • - *
  • Operator-supplied rule tuples in the cluster-group partial-load matchers, which catch the exception and - * treat it as "no match for this segment" rather than a hard failure.
  • + *
  • Operator-supplied rule tuples in future cluster-group partial-load matchers, which can catch the + * exception and treat it as "no match for this segment" rather than a hard failure.
  • *
*/ + @Nullable public static Object coerceValue(String columnName, ColumnType type, @Nullable Object raw) { if (raw == null) { return null; } if (ColumnType.STRING.equals(type)) { - return raw instanceof String ? raw : String.valueOf(raw); + return raw instanceof String ? raw : Objects.toString(raw); } if (ColumnType.LONG.equals(type)) { if (raw instanceof Number) { return ((Number) raw).longValue(); } - if (raw instanceof String) { - try { - return Long.parseLong((String) raw); - } - catch (NumberFormatException e) { - throw InvalidInput.exception("Cannot coerce value [%s] for column [%s] to LONG", raw, columnName); - } - } - throw InvalidInput.exception("Cannot coerce value [%s] for column [%s] to LONG", raw, columnName); + throw cannotCoerce(raw, columnName, "LONG"); } if (ColumnType.DOUBLE.equals(type)) { if (raw instanceof Number) { return ((Number) raw).doubleValue(); } - if (raw instanceof String) { - try { - return Double.parseDouble((String) raw); - } - catch (NumberFormatException e) { - throw InvalidInput.exception("Cannot coerce value [%s] for column [%s] to DOUBLE", raw, columnName); - } - } - throw InvalidInput.exception("Cannot coerce value [%s] for column [%s] to DOUBLE", raw, columnName); + throw cannotCoerce(raw, columnName, "DOUBLE"); } if (ColumnType.FLOAT.equals(type)) { if (raw instanceof Number) { return ((Number) raw).floatValue(); } - if (raw instanceof String) { - try { - return Float.parseFloat((String) raw); - } - catch (NumberFormatException e) { - throw InvalidInput.exception("Cannot coerce value [%s] for column [%s] to FLOAT", raw, columnName); - } - } - throw InvalidInput.exception("Cannot coerce value [%s] for column [%s] to FLOAT", raw, columnName); + throw cannotCoerce(raw, columnName, "FLOAT"); } throw InvalidInput.exception( "Unsupported clustering column type [%s] for column [%s]; supported types are STRING, LONG, DOUBLE, FLOAT", @@ -201,6 +181,17 @@ public static Object coerceValue(String columnName, ColumnType type, @Nullable O ); } + private static DruidException cannotCoerce(Object raw, String columnName, String targetType) + { + return InvalidInput.exception( + "Cannot coerce value [%s] of type [%s] for column [%s] to %s", + raw, + raw.getClass().getName(), + columnName, + targetType + ); + } + @Override public boolean equals(Object o) { diff --git a/processing/src/test/java/org/apache/druid/timeline/ClusterGroupTuplesTest.java b/processing/src/test/java/org/apache/druid/timeline/ClusterGroupTuplesTest.java index 5e0777dc165e..f8d01bc5375e 100644 --- a/processing/src/test/java/org/apache/druid/timeline/ClusterGroupTuplesTest.java +++ b/processing/src/test/java/org/apache/druid/timeline/ClusterGroupTuplesTest.java @@ -19,8 +19,10 @@ package org.apache.druid.timeline; +import com.fasterxml.jackson.databind.InjectableValues; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.druid.error.DruidException; +import org.apache.druid.error.DruidExceptionMatcher; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.expression.TestExprMacroTable; @@ -28,6 +30,7 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; +import org.hamcrest.MatcherAssert; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -37,6 +40,14 @@ class ClusterGroupTuplesTest { private static final ObjectMapper MAPPER = new DefaultObjectMapper(); + private static final VirtualColumns VIRTUAL_COLUMNS = VirtualColumns.create( + new ExpressionVirtualColumn( + "tenant_lower", + "lower(tenant)", + ColumnType.STRING, + TestExprMacroTable.INSTANCE + ) + ); private static RowSignature tenantRegion() { @@ -57,18 +68,24 @@ private static RowSignature tenantPriority() @Test void testConstructorRejectsNullClusteringColumns() { - Assertions.assertThrows( - DruidException.class, - () -> new ClusterGroupTuples(null, List.of(List.of("acme", "us-east-1"))) + MatcherAssert.assertThat( + Assertions.assertThrows( + DruidException.class, + () -> new ClusterGroupTuples(null, List.of(List.of("acme", "us-east-1"))) + ), + DruidExceptionMatcher.invalidInput().expectMessageContains("clusteringColumns must not be null or empty") ); } @Test void testConstructorRejectsEmptyClusteringColumns() { - Assertions.assertThrows( - DruidException.class, - () -> new ClusterGroupTuples(RowSignature.empty(), List.of()) + MatcherAssert.assertThat( + Assertions.assertThrows( + DruidException.class, + () -> new ClusterGroupTuples(RowSignature.empty(), List.of()) + ), + DruidExceptionMatcher.invalidInput().expectMessageContains("clusteringColumns must not be null or empty") ); } @@ -89,18 +106,26 @@ void testConstructorAllowsNullTuplesList() @Test void testConstructorRejectsTupleLengthMismatch() { - Assertions.assertThrows( - DruidException.class, - () -> new ClusterGroupTuples(tenantRegion(), List.of(List.of("acme"))) + MatcherAssert.assertThat( + Assertions.assertThrows( + DruidException.class, + () -> new ClusterGroupTuples(tenantRegion(), List.of(List.of("acme"))) + ), + DruidExceptionMatcher.invalidInput() + .expectMessageContains("tuple[0] has size [1] but clusteringColumns size is [2]") ); } @Test void testConstructorRejectsNullTuple() { - Assertions.assertThrows( - DruidException.class, - () -> new ClusterGroupTuples(tenantRegion(), Arrays.asList(Arrays.asList("acme", "us-east-1"), null)) + MatcherAssert.assertThat( + Assertions.assertThrows( + DruidException.class, + () -> new ClusterGroupTuples(tenantRegion(), Arrays.asList(Arrays.asList("acme", "us-east-1"), null)) + ), + DruidExceptionMatcher.invalidInput() + .expectMessageContains("tuple[1] has size [null] but clusteringColumns size is [2]") ); } @@ -108,9 +133,12 @@ void testConstructorRejectsNullTuple() void testConstructorRejectsUntypedClusteringColumn() { final RowSignature untyped = RowSignature.builder().add("tenant", null).build(); - Assertions.assertThrows( - DruidException.class, - () -> new ClusterGroupTuples(untyped, List.of(List.of("acme"))) + MatcherAssert.assertThat( + Assertions.assertThrows( + DruidException.class, + () -> new ClusterGroupTuples(untyped, List.of(List.of("acme"))) + ), + DruidExceptionMatcher.invalidInput().expectMessageContains("clusteringColumn[tenant] has no declared type") ); } @@ -118,9 +146,12 @@ void testConstructorRejectsUntypedClusteringColumn() void testConstructorRejectsUnsupportedColumnType() { final RowSignature arraySig = RowSignature.builder().add("arr", ColumnType.STRING_ARRAY).build(); - Assertions.assertThrows( - DruidException.class, - () -> new ClusterGroupTuples(arraySig, List.of(List.of(List.of("a")))) + MatcherAssert.assertThat( + Assertions.assertThrows( + DruidException.class, + () -> new ClusterGroupTuples(arraySig, List.of(List.of(List.of("a")))) + ), + DruidExceptionMatcher.invalidInput().expectMessageContains("Unsupported clustering column type") ); } @@ -162,28 +193,24 @@ void testCoercionIntegerToLong() { final ClusterGroupTuples groups = new ClusterGroupTuples( tenantPriority(), - List.of(List.of("acme", (Object) Integer.valueOf(5))) + List.of(List.of("acme", Integer.valueOf(5))) ); Assertions.assertEquals(Long.class, groups.getTuples().get(0).get(1).getClass()); Assertions.assertEquals(5L, groups.getTuples().get(0).get(1)); } @Test - void testCoercionStringToLong() + void testCoercionStringRejectedForLong() { - final ClusterGroupTuples groups = new ClusterGroupTuples( - tenantPriority(), - List.of(List.of("acme", "42")) - ); - Assertions.assertEquals(42L, groups.getTuples().get(0).get(1)); - } - - @Test - void testCoercionUnparseableStringToLongThrows() - { - Assertions.assertThrows( - DruidException.class, - () -> new ClusterGroupTuples(tenantPriority(), List.of(List.of("acme", "notanumber"))) + // Coercion is intentionally narrow: only Number-family inputs are normalized. A String numeric value is rejected + // rather than parsed, so operator typos don't silently broaden the matched set in future rule consumers. + MatcherAssert.assertThat( + Assertions.assertThrows( + DruidException.class, + () -> new ClusterGroupTuples(tenantPriority(), List.of(List.of("acme", "42"))) + ), + DruidExceptionMatcher.invalidInput() + .expectMessageContains("Cannot coerce value [42] of type [java.lang.String] for column [priority] to LONG") ); } @@ -197,12 +224,30 @@ void testCoercionDoubleToFloat() } @Test - void testCoercionStringToDouble() + void testCoercionStringRejectedForDouble() { final RowSignature sig = RowSignature.builder().add("v", ColumnType.DOUBLE).build(); - final ClusterGroupTuples groups = new ClusterGroupTuples(sig, List.of(List.of("3.14"))); - Assertions.assertEquals(Double.class, groups.getTuples().get(0).get(0).getClass()); - Assertions.assertEquals(3.14d, (Double) groups.getTuples().get(0).get(0), 0.0001d); + MatcherAssert.assertThat( + Assertions.assertThrows( + DruidException.class, + () -> new ClusterGroupTuples(sig, List.of(List.of("3.14"))) + ), + DruidExceptionMatcher.invalidInput() + .expectMessageContains("Cannot coerce value [3.14] of type [java.lang.String] for column [v] to DOUBLE") + ); + } + + @Test + void testCoercionBooleanRejectedForLong() + { + MatcherAssert.assertThat( + Assertions.assertThrows( + DruidException.class, + () -> new ClusterGroupTuples(tenantPriority(), List.of(List.of("acme", (Object) Boolean.TRUE))) + ), + DruidExceptionMatcher.invalidInput() + .expectMessageContains("Cannot coerce value [true] of type [java.lang.Boolean] for column [priority] to LONG") + ); } @Test @@ -216,9 +261,10 @@ void testCoercionAcceptsAnyTypeForString() @Test void testJsonRoundTripPreservesCoercedTypes() throws Exception { + // Both small Integer (Jackson default) and large Long pass through coercion to canonical Long. final ClusterGroupTuples groups = new ClusterGroupTuples( tenantPriority(), - List.of(List.of("acme", (Object) 5), List.of("globex", (Object) "10")) + List.of(List.of("acme", (Object) 5), List.of("globex", (Object) 5_000_000_000L)) ); final String json = MAPPER.writeValueAsString(groups); final ClusterGroupTuples back = MAPPER.readValue(json, ClusterGroupTuples.class); @@ -245,16 +291,6 @@ void testTuplesAreImmutable() ); } - private static VirtualColumns lowerTenantVcs() - { - return VirtualColumns.create(new ExpressionVirtualColumn( - "tenant_lower", - "lower(tenant)", - ColumnType.STRING, - TestExprMacroTable.INSTANCE - )); - } - @Test void testVirtualColumnsDefaultEmpty() { @@ -268,7 +304,7 @@ void testVirtualColumnsAreStored() final ClusterGroupTuples groups = new ClusterGroupTuples( RowSignature.builder().add("tenant_lower", ColumnType.STRING).build(), List.of(List.of("acme")), - lowerTenantVcs() + VIRTUAL_COLUMNS ); Assertions.assertNotNull(groups.getVirtualColumns().getVirtualColumn("tenant_lower")); } @@ -279,12 +315,12 @@ void testVirtualColumnsJsonRoundTrip() throws Exception final ClusterGroupTuples original = new ClusterGroupTuples( RowSignature.builder().add("tenant_lower", ColumnType.STRING).build(), List.of(List.of("acme")), - lowerTenantVcs() + VIRTUAL_COLUMNS ); // Round-trip needs an injectable ExprMacroTable for ExpressionVirtualColumn deserialization. final ObjectMapper mapper = new DefaultObjectMapper(); mapper.setInjectableValues( - new com.fasterxml.jackson.databind.InjectableValues.Std() + new InjectableValues.Std() .addValue(ExprMacroTable.class, TestExprMacroTable.INSTANCE) ); final String json = mapper.writeValueAsString(original); @@ -309,12 +345,12 @@ void testVirtualColumnInternerSharesAcrossInstances() final ClusterGroupTuples a = new ClusterGroupTuples( RowSignature.builder().add("tenant_lower", ColumnType.STRING).build(), List.of(List.of("acme")), - lowerTenantVcs() + VIRTUAL_COLUMNS ); final ClusterGroupTuples b = new ClusterGroupTuples( RowSignature.builder().add("tenant_lower", ColumnType.STRING).build(), List.of(List.of("globex")), - lowerTenantVcs() + VIRTUAL_COLUMNS ); Assertions.assertSame( a.getVirtualColumns().getVirtualColumns()[0], diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/ClusterGroupPartialLoadMatcher.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/ClusterGroupPartialLoadMatcher.java index f69dc902cab6..879611de5a6d 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/ClusterGroupPartialLoadMatcher.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/ClusterGroupPartialLoadMatcher.java @@ -31,8 +31,8 @@ /** * Base for {@link PartialLoadMatcher} implementations that decide which of a clustered segment's cluster groups to - * partially load. Subclasses supply the resolution policy via {@link #resolveClusterGroupIndices(DataSegment)} — the - * sorted, deduped indices into {@code segment.getClusterGroups().getTuples()} — and this base handles fingerprint + * partially load. Subclasses supply the resolution policy via {@link #resolveClusterGroupIndices(DataSegment)}; the + * sorted, deduped indices into {@code segment.getClusterGroups().getTuples()}, and this base handles fingerprint * computation and wraps the result into the {@code partialClusterGroup} load-spec wire form consumed by the * historical-side partial loader. *

diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java index b29e9b0cedda..eed92436e879 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java @@ -41,6 +41,7 @@ final class Globs { private Globs() { + // no instantiation } /** diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java index 7406a7fd3abf..68f2e50bd993 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java @@ -37,6 +37,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.TreeSet; import java.util.regex.Pattern; @@ -50,7 +51,7 @@ * If the operator supplies {@link #getVirtualColumns()}, a pattern key may also reference one of those virtual * columns. At match time, the matcher resolves such a key to a clustering column on the segment via * {@link VirtualColumns#findEquivalent(VirtualColumns.Node)} between the operator's VCs and the segment's clustering - * VCs (carried on {@link ClusterGroupTuples#getVirtualColumns()}). This lets operators author portable rules — they + * VCs (carried on {@link ClusterGroupTuples#getVirtualColumns()}). This lets operators author portable rules, they * write their preferred VC name and expression, and the matcher resolves to whatever name the segment happens to use * for the equivalent clustering VC. *

@@ -179,7 +180,7 @@ private List> resolveAll( @Nullable private Map resolvePattern( - java.util.Set patternKeys, + Set patternKeys, RowSignature clusteringColumns, VirtualColumns segmentVcs ) @@ -198,11 +199,11 @@ private Map resolvePattern( /** * Resolve a pattern key to a clustering column name on the segment. Three cases: *

    - *
  1. The matcher carries an operator-VC by this name → resolve via + *
  2. The matcher carries an virtual column by this name, resolve via * {@link VirtualColumns#findEquivalent(VirtualColumns.Node)} against the segment's clustering VCs. The - * operator-VC interpretation wins regardless of any same-name clustering column (shadowing).
  3. - *
  4. Otherwise, if the key is directly a clustering column name → identity.
  5. - *
  6. Otherwise → unresolvable (returns {@code null}).
  7. + * matcher VC interpretation wins regardless of any same-name clustering column (shadowing). + *
  8. Otherwise, if the key is directly a clustering column name -> identity.
  9. + *
  10. Otherwise -> unresolvable (returns {@code null}).
  11. *
*/ @Nullable @@ -215,9 +216,9 @@ private String resolveKey(String key, RowSignature clusteringColumns, VirtualCol return null; } final String equivalentName = equivalent.getOutputName(); - return clusteringColumns.indexOf(equivalentName) >= 0 ? equivalentName : null; + return clusteringColumns.contains(equivalentName) ? equivalentName : null; } - return clusteringColumns.indexOf(key) >= 0 ? key : null; + return clusteringColumns.contains(key) ? key : null; } private static boolean matchesAnyPattern( diff --git a/server/src/test/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcherTest.java b/server/src/test/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcherTest.java index 9ef2e8ce72b9..716a111d0a7f 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcherTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcherTest.java @@ -38,7 +38,9 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.Map; @@ -324,29 +326,6 @@ void testJsonRoundTrip() throws Exception Assertions.assertEquals(original, back); } - // --- Operator-side virtual columns (findEquivalent resolution) --- - - private static VirtualColumns lowerTenantVcs(String outputName) - { - return VirtualColumns.create(new ExpressionVirtualColumn( - outputName, - "lower(tenant)", - ColumnType.STRING, - TestExprMacroTable.INSTANCE - )); - } - - /** Segment clusters on a VC named "tenant_lower" with expression lower(tenant). */ - private static DataSegment vcClusteredSegment(String... lowerTenants) - { - final RowSignature clusteringColumns = RowSignature.builder().add("tenant_lower", ColumnType.STRING).build(); - final List> tuples = new java.util.ArrayList<>(lowerTenants.length); - for (String t : lowerTenants) { - tuples.add(java.util.Collections.singletonList(t)); - } - return segmentWithGroups(new ClusterGroupTuples(clusteringColumns, tuples, lowerTenantVcs("tenant_lower"))); - } - @Test void testOperatorVcResolvesToClusteringVcByEquivalence() { @@ -447,4 +426,24 @@ void testVirtualColumnsOmittedFromJsonWhenEmpty() throws Exception final String json = mapper.writeValueAsString(matcher); Assertions.assertFalse(json.contains("virtualColumns"), () -> "did not expect virtualColumns in JSON: " + json); } + + private static VirtualColumns lowerTenantVcs(String outputName) + { + return VirtualColumns.create(new ExpressionVirtualColumn( + outputName, + "lower(tenant)", + ColumnType.STRING, + TestExprMacroTable.INSTANCE + )); + } + + private static DataSegment vcClusteredSegment(String... lowerTenants) + { + final RowSignature clusteringColumns = RowSignature.builder().add("tenant_lower", ColumnType.STRING).build(); + final List> tuples = new ArrayList<>(lowerTenants.length); + for (String t : lowerTenants) { + tuples.add(Collections.singletonList(t)); + } + return segmentWithGroups(new ClusterGroupTuples(clusteringColumns, tuples, lowerTenantVcs("tenant_lower"))); + } } From beaaa0e61b5ea58997128402d771a430d31576ea Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 13 May 2026 12:42:36 -0700 Subject: [PATCH 04/10] adjust --- .../druid/timeline/ClusterGroupTuples.java | 76 +++++++++---------- .../druid/guice/PartialLoadSpecModule.java | 5 +- ...ildcardClusterGroupPartialLoadMatcher.java | 62 +++++++-------- 3 files changed, 73 insertions(+), 70 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java b/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java index 5f5f59c3cc1e..ed60a706689a 100644 --- a/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java +++ b/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java @@ -115,16 +115,34 @@ public VirtualColumns getVirtualColumns() return virtualColumns; } - private static VirtualColumns internVirtualColumns(@Nullable VirtualColumns virtualColumns) + @Override + public boolean equals(Object o) { - if (virtualColumns == null || virtualColumns.isEmpty()) { - return VirtualColumns.EMPTY; + if (this == o) { + return true; } - return VirtualColumns.create( - Arrays.stream(virtualColumns.getVirtualColumns()) - .map(DataSegment.virtualColumnInterner()::intern) - .toList() - ); + if (!(o instanceof ClusterGroupTuples)) { + return false; + } + ClusterGroupTuples that = (ClusterGroupTuples) o; + return Objects.equals(clusteringColumns, that.clusteringColumns) + && Objects.equals(tuples, that.tuples) + && Objects.equals(virtualColumns, that.virtualColumns); + } + + @Override + public int hashCode() + { + return Objects.hash(clusteringColumns, tuples, virtualColumns); + } + + @Override + public String toString() + { + return "ClusterGroupTuples{clusteringColumns=" + clusteringColumns + + ", tuples=" + tuples + + ", virtualColumns=" + virtualColumns + + '}'; } /** @@ -181,6 +199,18 @@ public static Object coerceValue(String columnName, ColumnType type, @Nullable O ); } + private static VirtualColumns internVirtualColumns(@Nullable VirtualColumns virtualColumns) + { + if (virtualColumns == null || virtualColumns.isEmpty()) { + return VirtualColumns.EMPTY; + } + return VirtualColumns.create( + Arrays.stream(virtualColumns.getVirtualColumns()) + .map(DataSegment.virtualColumnInterner()::intern) + .toList() + ); + } + private static DruidException cannotCoerce(Object raw, String columnName, String targetType) { return InvalidInput.exception( @@ -191,34 +221,4 @@ private static DruidException cannotCoerce(Object raw, String columnName, String targetType ); } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (!(o instanceof ClusterGroupTuples)) { - return false; - } - ClusterGroupTuples that = (ClusterGroupTuples) o; - return Objects.equals(clusteringColumns, that.clusteringColumns) - && Objects.equals(tuples, that.tuples) - && Objects.equals(virtualColumns, that.virtualColumns); - } - - @Override - public int hashCode() - { - return Objects.hash(clusteringColumns, tuples, virtualColumns); - } - - @Override - public String toString() - { - return "ClusterGroupTuples{clusteringColumns=" + clusteringColumns - + ", tuples=" + tuples - + ", virtualColumns=" + virtualColumns - + '}'; - } } diff --git a/server/src/main/java/org/apache/druid/guice/PartialLoadSpecModule.java b/server/src/main/java/org/apache/druid/guice/PartialLoadSpecModule.java index c83f1b83b172..d60ca44c12fc 100644 --- a/server/src/main/java/org/apache/druid/guice/PartialLoadSpecModule.java +++ b/server/src/main/java/org/apache/druid/guice/PartialLoadSpecModule.java @@ -46,7 +46,10 @@ public void configure(Binder binder) public List getJacksonModules() { return List.of( - new SimpleModule().registerSubtypes(PartialProjectionLoadSpec.class, PartialClusterGroupLoadSpec.class) + new SimpleModule().registerSubtypes( + PartialProjectionLoadSpec.class, + PartialClusterGroupLoadSpec.class + ) ); } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java index 68f2e50bd993..16d295a70589 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java @@ -44,13 +44,13 @@ /** * Selects cluster groups whose clustering tuples match any of the configured per-column glob patterns, minus any * groups matched by an entry in {@code excludePatterns}. Each pattern is a {@code Map} where keys are - * clustering column names (or operator-side virtual column names — see below) and values are glob patterns matched + * clustering column names (or operator-side virtual column names, see below) and values are glob patterns matched * against the rendered tuple value at that column. Columns omitted from a pattern are treated as wildcards. Glob * syntax (including escape semantics) is shared with {@link WildcardProjectionPartialLoadMatcher} via {@link Globs}. *

* If the operator supplies {@link #getVirtualColumns()}, a pattern key may also reference one of those virtual * columns. At match time, the matcher resolves such a key to a clustering column on the segment via - * {@link VirtualColumns#findEquivalent(VirtualColumns.Node)} between the operator's VCs and the segment's clustering + * {@link VirtualColumns#findEquivalent(VirtualColumns.Node)} between the matchers VCs and the segment's clustering * VCs (carried on {@link ClusterGroupTuples#getVirtualColumns()}). This lets operators author portable rules, they * write their preferred VC name and expression, and the matcher resolves to whatever name the segment happens to use * for the equivalent clustering VC. @@ -157,6 +157,35 @@ protected List resolveClusterGroupIndices(DataSegment segment) return new ArrayList<>(matched); } + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + WildcardClusterGroupPartialLoadMatcher that = (WildcardClusterGroupPartialLoadMatcher) o; + return Objects.equals(patterns, that.patterns) + && Objects.equals(excludePatterns, that.excludePatterns) + && Objects.equals(virtualColumns, that.virtualColumns); + } + + @Override + public int hashCode() + { + return Objects.hash(patterns, excludePatterns, virtualColumns); + } + + @Override + public String toString() + { + return "WildcardClusterGroupPartialLoadMatcher{patterns=" + patterns + + ", excludePatterns=" + excludePatterns + + ", virtualColumns=" + virtualColumns + "}"; + } + /** * For each compiled pattern, compute the {@code patternKey -> segment clustering column name} map. A {@code null} * entry in the returned list marks a pattern as non-matching for this segment (some pattern key couldn't be @@ -335,33 +364,4 @@ static CompiledGlob of(String glob) return new CompiledGlob(false, Pattern.compile(Globs.globToRegex(glob))); } } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - WildcardClusterGroupPartialLoadMatcher that = (WildcardClusterGroupPartialLoadMatcher) o; - return Objects.equals(patterns, that.patterns) - && Objects.equals(excludePatterns, that.excludePatterns) - && Objects.equals(virtualColumns, that.virtualColumns); - } - - @Override - public int hashCode() - { - return Objects.hash(patterns, excludePatterns, virtualColumns); - } - - @Override - public String toString() - { - return "WildcardClusterGroupPartialLoadMatcher{patterns=" + patterns - + ", excludePatterns=" + excludePatterns - + ", virtualColumns=" + virtualColumns + "}"; - } } From e883536c0a47e81e7c76213ced4727cb34e38a5f Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 14 May 2026 04:20:20 -0700 Subject: [PATCH 05/10] cleanup --- .../druid/server/coordinator/rules/Globs.java | 40 +++++++++++++-- ...ildcardClusterGroupPartialLoadMatcher.java | 51 +++++-------------- 2 files changed, 48 insertions(+), 43 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java index eed92436e879..b22ce12ee64f 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java @@ -21,6 +21,7 @@ import org.apache.druid.error.InvalidInput; +import javax.annotation.Nullable; import java.util.ArrayList; import java.util.List; import java.util.regex.Pattern; @@ -37,7 +38,7 @@ * * Other characters are literal; regex metacharacters in literal positions are escaped automatically. */ -final class Globs +public final class Globs { private Globs() { @@ -49,7 +50,7 @@ private Globs() * * @throws org.apache.druid.error.DruidException if {@code glob} ends with an unescaped backslash */ - static String globToRegex(String glob) + public static String globToRegex(String glob) { final StringBuilder sb = new StringBuilder(glob.length() + 4); boolean escaping = false; @@ -80,7 +81,7 @@ static String globToRegex(String glob) return sb.toString(); } - static List compileAll(List globs) + public static List compileAll(List globs) { if (globs.isEmpty()) { return List.of(); @@ -92,7 +93,7 @@ static List compileAll(List globs) return List.copyOf(compiled); } - static boolean matchesAny(String name, List patterns) + public static boolean matchesAny(String name, List patterns) { for (Pattern pattern : patterns) { if (pattern.matcher(name).matches()) { @@ -102,6 +103,18 @@ static boolean matchesAny(String name, List patterns) return false; } + /** + * Compile a glob, special-casing the literal {@code "*"} to a {@link CompiledGlob#matchAny} + * marker that matches any value including null — mirrors the matcher's {@code CompiledGlob}. + */ + static CompiledGlob compile(String glob) + { + if ("*".equals(glob)) { + return CompiledGlob.MATCH_ANY; + } + return new CompiledGlob(false, Pattern.compile(globToRegex(glob))); + } + private static void appendLiteral(StringBuilder sb, char c) { switch (c) { @@ -125,4 +138,23 @@ private static void appendLiteral(StringBuilder sb, char c) sb.append(c); } } + + /** + * Compiled, per-column glob. The literal {@code "*"} short-circuits to a "match any value, including null" flag; + * any other glob compiles to a regex matched against the rendered tuple value (and never matches null). + */ + public static final class CompiledGlob + { + static final CompiledGlob MATCH_ANY = new CompiledGlob(true, null); + + final boolean matchAny; + @Nullable + final Pattern pattern; + + private CompiledGlob(boolean matchAny, @Nullable Pattern pattern) + { + this.matchAny = matchAny; + this.pattern = pattern; + } + } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java index 16d295a70589..07a831d64e30 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java @@ -72,8 +72,8 @@ public class WildcardClusterGroupPartialLoadMatcher extends ClusterGroupPartialL private final List> patterns; private final List> excludePatterns; private final VirtualColumns virtualColumns; - private final List> compiledPatterns; - private final List> compiledExcludePatterns; + private final List> compiledPatterns; + private final List> compiledExcludePatterns; @JsonCreator public WildcardClusterGroupPartialLoadMatcher( @@ -192,7 +192,7 @@ public String toString() * resolved to a clustering column via either direct name or operator-VC equivalence). */ private List> resolveAll( - List> compiled, + List> compiled, RowSignature clusteringColumns, VirtualColumns segmentVcs ) @@ -201,7 +201,7 @@ private List> resolveAll( return List.of(); } final List> out = new ArrayList<>(compiled.size()); - for (Map pattern : compiled) { + for (Map pattern : compiled) { out.add(resolvePattern(pattern.keySet(), clusteringColumns, segmentVcs)); } return out; @@ -253,7 +253,7 @@ private String resolveKey(String key, RowSignature clusteringColumns, VirtualCol private static boolean matchesAnyPattern( List tuple, RowSignature clusteringColumns, - List> compiledPatterns, + List> compiledPatterns, List> resolvedPatterns ) { @@ -272,15 +272,15 @@ private static boolean matchesAnyPattern( private static boolean matchesPattern( List tuple, RowSignature clusteringColumns, - Map pattern, + Map pattern, Map resolved ) { - for (Map.Entry entry : pattern.entrySet()) { + for (Map.Entry entry : pattern.entrySet()) { final String resolvedColumn = resolved.get(entry.getKey()); final int idx = clusteringColumns.indexOf(resolvedColumn); // resolved is guaranteed to map every patternKey to a real clustering column (else the pattern was skipped). - final CompiledGlob glob = entry.getValue(); + final Globs.CompiledGlob glob = entry.getValue(); if (glob.matchAny) { // Literal "*" — matches every value, including null. continue; @@ -310,16 +310,16 @@ private static List> copyAndValidatePatterns(List> compileAll(List> patterns) + private static List> compileAll(List> patterns) { if (patterns.isEmpty()) { return List.of(); } - final List> out = new ArrayList<>(patterns.size()); + final List> out = new ArrayList<>(patterns.size()); for (Map pattern : patterns) { - final Map compiled = CollectionUtils.newLinkedHashMapWithExpectedSize(pattern.size()); + final Map compiled = CollectionUtils.newLinkedHashMapWithExpectedSize(pattern.size()); for (Map.Entry entry : pattern.entrySet()) { - compiled.put(entry.getKey(), CompiledGlob.of(entry.getValue())); + compiled.put(entry.getKey(), Globs.compile(entry.getValue())); } out.add(Collections.unmodifiableMap(compiled)); } @@ -337,31 +337,4 @@ private static VirtualColumns internVirtualColumns(@Nullable VirtualColumns virt .toList() ); } - - /** - * Compiled per-column glob. The literal {@code "*"} short-circuits to a "match any value, including null" flag; - * any other glob compiles to a regex matched against the rendered tuple value (and never matches null). - */ - private static final class CompiledGlob - { - static final CompiledGlob MATCH_ANY = new CompiledGlob(true, null); - - final boolean matchAny; - @Nullable - final Pattern pattern; - - private CompiledGlob(boolean matchAny, @Nullable Pattern pattern) - { - this.matchAny = matchAny; - this.pattern = pattern; - } - - static CompiledGlob of(String glob) - { - if ("*".equals(glob)) { - return MATCH_ANY; - } - return new CompiledGlob(false, Pattern.compile(Globs.globToRegex(glob))); - } - } } From 2fa00e9af2b9f0513b45737f13a4403419721cbb Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 14 May 2026 16:38:44 -0700 Subject: [PATCH 06/10] add constructor back, etc --- .../apache/druid/timeline/DataSegment.java | 40 +++++++++++++++++++ ...ildcardClusterGroupPartialLoadMatcher.java | 1 - .../server/coordinator/rules/GlobsTest.java | 33 +++++++++++++++ 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/DataSegment.java b/processing/src/main/java/org/apache/druid/timeline/DataSegment.java index 7451d8acf5f2..f9375db5ae95 100644 --- a/processing/src/main/java/org/apache/druid/timeline/DataSegment.java +++ b/processing/src/main/java/org/apache/druid/timeline/DataSegment.java @@ -223,6 +223,46 @@ public DataSegment( ); } + /** + * @deprecated use {@link #builder(SegmentId)} or {@link #builder(DataSegment)} instead. + */ + @Deprecated + public DataSegment( + String dataSource, + Interval interval, + String version, + @Nullable Map loadSpec, + @Nullable List dimensions, + @Nullable List metrics, + @Nullable List projections, + @Nullable ShardSpec shardSpec, + @Nullable CompactionState lastCompactionState, + Integer binaryVersion, + long size, + Integer totalRows, + String indexingStateFingerprint, + PruneSpecsHolder pruneSpecsHolder + ) + { + this( + dataSource, + interval, + version, + loadSpec, + dimensions, + metrics, + projections, + null, + shardSpec, + lastCompactionState, + binaryVersion, + size, + totalRows, + indexingStateFingerprint, + pruneSpecsHolder + ); + } + @JsonCreator private DataSegment( @JsonProperty("dataSource") String dataSource, diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java index 07a831d64e30..ae3f68794e7c 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java @@ -39,7 +39,6 @@ import java.util.Objects; import java.util.Set; import java.util.TreeSet; -import java.util.regex.Pattern; /** * Selects cluster groups whose clustering tuples match any of the configured per-column glob patterns, minus any diff --git a/server/src/test/java/org/apache/druid/server/coordinator/rules/GlobsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/rules/GlobsTest.java index 5ea42b7345ea..04b71158bd7a 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/rules/GlobsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/rules/GlobsTest.java @@ -108,4 +108,37 @@ void testMatchesAnyEmptyPatternListNeverMatches() { Assertions.assertFalse(Globs.matchesAny("anything", List.of())); } + + @Test + void testCompileLiteralStarReturnsMatchAny() + { + // The literal "*" is special-cased to MATCH_ANY so callers can short-circuit a "matches any value, including + // null" branch without paying a regex match. Any other glob takes the regex path. + final Globs.CompiledGlob compiled = Globs.compile("*"); + Assertions.assertSame(Globs.CompiledGlob.MATCH_ANY, compiled); + Assertions.assertTrue(compiled.matchAny); + Assertions.assertNull(compiled.pattern); + } + + @Test + void testCompileGlobReturnsCompiledRegex() + { + final Globs.CompiledGlob compiled = Globs.compile("us-*"); + Assertions.assertFalse(compiled.matchAny); + Assertions.assertNotNull(compiled.pattern); + Assertions.assertTrue(compiled.pattern.matcher("us-east-1").matches()); + Assertions.assertFalse(compiled.pattern.matcher("eu-west").matches()); + } + + @Test + void testCompileLiteralNullStringHasNoSpecialTreatment() + { + // The string "null" goes through the regex path like any other literal glob, the helper does not give the + // literal-string "null" any special meaning + final Globs.CompiledGlob compiled = Globs.compile("null"); + Assertions.assertFalse(compiled.matchAny); + Assertions.assertNotNull(compiled.pattern); + Assertions.assertTrue(compiled.pattern.matcher("null").matches()); + Assertions.assertFalse(compiled.pattern.matcher("nullx").matches()); + } } From cb0d0f183dc854714655c1022db83f59755582e8 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 14 May 2026 17:08:17 -0700 Subject: [PATCH 07/10] cleanup --- .../druid/server/coordinator/rules/Globs.java | 24 ++++++++++++++++++- ...ildcardClusterGroupPartialLoadMatcher.java | 6 ++--- .../server/coordinator/rules/GlobsTest.java | 8 +++---- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java index b22ce12ee64f..867218746869 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java @@ -19,6 +19,7 @@ package org.apache.druid.server.coordinator.rules; +import org.apache.druid.error.DruidException; import org.apache.druid.error.InvalidInput; import javax.annotation.Nullable; @@ -145,7 +146,7 @@ private static void appendLiteral(StringBuilder sb, char c) */ public static final class CompiledGlob { - static final CompiledGlob MATCH_ANY = new CompiledGlob(true, null); + public static final CompiledGlob MATCH_ANY = new CompiledGlob(true, null); final boolean matchAny; @Nullable @@ -153,8 +154,29 @@ public static final class CompiledGlob private CompiledGlob(boolean matchAny, @Nullable Pattern pattern) { + if (matchAny) { + DruidException.conditionalDefensive(pattern == null, "matchAny=true must not carry a pattern"); + } else { + DruidException.conditionalDefensive(pattern != null, "pattern must be non-null when matchAny=false"); + } this.matchAny = matchAny; this.pattern = pattern; } + + public boolean matches(@Nullable String value) + { + if (matchAny) { + return true; + } + if (value == null) { + return false; + } + return pattern.matcher(value).matches(); + } + + public boolean isMatchAny() + { + return matchAny; + } } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java index ae3f68794e7c..7aeb0a3ca898 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java @@ -280,8 +280,8 @@ private static boolean matchesPattern( final int idx = clusteringColumns.indexOf(resolvedColumn); // resolved is guaranteed to map every patternKey to a real clustering column (else the pattern was skipped). final Globs.CompiledGlob glob = entry.getValue(); - if (glob.matchAny) { - // Literal "*" — matches every value, including null. + if (glob.isMatchAny()) { + // Literal "*" matches every value, including null. continue; } final Object value = tuple.get(idx); @@ -289,7 +289,7 @@ private static boolean matchesPattern( // Any non-"*" glob cannot match a null clustering value. return false; } - if (!glob.pattern.matcher(value.toString()).matches()) { + if (!glob.matches(value.toString())) { return false; } } diff --git a/server/src/test/java/org/apache/druid/server/coordinator/rules/GlobsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/rules/GlobsTest.java index 04b71158bd7a..b8ad7aa9180a 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/rules/GlobsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/rules/GlobsTest.java @@ -126,8 +126,8 @@ void testCompileGlobReturnsCompiledRegex() final Globs.CompiledGlob compiled = Globs.compile("us-*"); Assertions.assertFalse(compiled.matchAny); Assertions.assertNotNull(compiled.pattern); - Assertions.assertTrue(compiled.pattern.matcher("us-east-1").matches()); - Assertions.assertFalse(compiled.pattern.matcher("eu-west").matches()); + Assertions.assertTrue(compiled.matches("us-east-1")); + Assertions.assertFalse(compiled.matches("eu-west")); } @Test @@ -138,7 +138,7 @@ void testCompileLiteralNullStringHasNoSpecialTreatment() final Globs.CompiledGlob compiled = Globs.compile("null"); Assertions.assertFalse(compiled.matchAny); Assertions.assertNotNull(compiled.pattern); - Assertions.assertTrue(compiled.pattern.matcher("null").matches()); - Assertions.assertFalse(compiled.pattern.matcher("nullx").matches()); + Assertions.assertTrue(compiled.matches("null")); + Assertions.assertFalse(compiled.matches("nullx")); } } From 00b8dc3c099ba7c76a98c7fb867112c320f9baed Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 14 May 2026 17:10:56 -0700 Subject: [PATCH 08/10] oops --- .../org/apache/druid/server/coordinator/rules/Globs.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java index 867218746869..b0f81b98a563 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/Globs.java @@ -105,10 +105,10 @@ public static boolean matchesAny(String name, List patterns) } /** - * Compile a glob, special-casing the literal {@code "*"} to a {@link CompiledGlob#matchAny} - * marker that matches any value including null — mirrors the matcher's {@code CompiledGlob}. + * Compile a glob, special-casing the literal {@code "*"} to a {@link CompiledGlob#matchAny} marker that matches any + * value including null */ - static CompiledGlob compile(String glob) + public static CompiledGlob compile(String glob) { if ("*".equals(glob)) { return CompiledGlob.MATCH_ANY; From de77cf87e82d8a8e0bf3015224b52dac6814f957 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 14 May 2026 20:04:34 -0700 Subject: [PATCH 09/10] use PartialLoadSpec as base for PartialClusterGroupLoadSpec --- .../loading/PartialClusterGroupLoadSpec.java | 82 +++++++------------ .../loading/PartialProjectionLoadSpec.java | 5 -- .../rules/ClusterGroupPartialLoadMatcher.java | 10 +-- 3 files changed, 32 insertions(+), 65 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/loading/PartialClusterGroupLoadSpec.java b/processing/src/main/java/org/apache/druid/segment/loading/PartialClusterGroupLoadSpec.java index be749e4910e6..86e562010c00 100644 --- a/processing/src/main/java/org/apache/druid/segment/loading/PartialClusterGroupLoadSpec.java +++ b/processing/src/main/java/org/apache/druid/segment/loading/PartialClusterGroupLoadSpec.java @@ -25,37 +25,43 @@ import com.fasterxml.jackson.annotation.JsonTypeName; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Preconditions; -import com.google.common.base.Supplier; -import com.google.common.base.Suppliers; import org.apache.druid.utils.CollectionUtils; -import javax.annotation.Nullable; -import java.io.File; -import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Objects; /** - * A {@link LoadSpec} wrapper that carries partial-cluster-group metadata from the coordinator to a historical - * alongside the original backend-specific load spec. The wrapped {@code delegate} is held as a raw {@link Map} so - * that the concrete backend type (e.g. {@code s3}, {@code local}, {@code hdfs}) is materialized only when needed; - * this avoids pulling backend-specific dependencies onto every node that touches the wire form. - *

- * Both {@link #loadSegment(File)} and {@link #openRangeReader()} delegate verbatim to the inner load spec. The - * historical-side partial-load path inspects this wrapper at mount time to learn which cluster-group indices (into - * the segment's {@code clusterGroups.tuples} list) to range-read and the fingerprint identifying the request the - * coordinator made. + * A {@link PartialLoadSpec} that requests partial loading of a clustered segment's cluster groups. The base class + * carries the common {@code fingerprint} and {@code delegate} wire fields; this subtype adds the resolved + * {@code clusterGroupIndices} (positions into {@link org.apache.druid.timeline.ClusterGroupTuples#getTuples()}) that + * the historical should range-read into the local segment. */ @JsonTypeName(PartialClusterGroupLoadSpec.TYPE) -public class PartialClusterGroupLoadSpec implements LoadSpec +public class PartialClusterGroupLoadSpec extends PartialLoadSpec { public static final String TYPE = "partialClusterGroup"; - private final Map delegate; + /** + * Builds the raw wire-form {@link Map} representation of a {@link PartialClusterGroupLoadSpec} request. Used by the + * coordinator-side matcher (which doesn't instantiate the typed class because doing so would require plumbing an + * {@link ObjectMapper} through every matcher just to satisfy the constructor's lazy-delegate supplier). + */ + public static Map wireForm( + Map delegate, + List clusterGroupIndices, + String fingerprint + ) + { + return Map.of( + "type", TYPE, + "delegate", delegate, + "clusterGroupIndices", clusterGroupIndices, + "fingerprint", fingerprint + ); + } + private final List clusterGroupIndices; - private final String fingerprint; - private final Supplier materializedDelegateSupplier; @JsonCreator public PartialClusterGroupLoadSpec( @@ -65,21 +71,12 @@ public PartialClusterGroupLoadSpec( @JacksonInject ObjectMapper jsonMapper ) { - Preconditions.checkNotNull(jsonMapper, "jsonMapper"); - this.delegate = Preconditions.checkNotNull(delegate, "delegate"); + super(delegate, fingerprint, jsonMapper); Preconditions.checkArgument( !CollectionUtils.isNullOrEmpty(clusterGroupIndices), "clusterGroupIndices must not be null or empty" ); this.clusterGroupIndices = List.copyOf(clusterGroupIndices); - this.fingerprint = Preconditions.checkNotNull(fingerprint, "fingerprint"); - this.materializedDelegateSupplier = Suppliers.memoize(() -> jsonMapper.convertValue(delegate, LoadSpec.class)); - } - - @JsonProperty - public Map getDelegate() - { - return delegate; } @JsonProperty @@ -88,25 +85,6 @@ public List getClusterGroupIndices() return clusterGroupIndices; } - @JsonProperty - public String getFingerprint() - { - return fingerprint; - } - - @Override - public LoadSpecResult loadSegment(File destDir) throws SegmentLoadingException - { - return materializedDelegateSupplier.get().loadSegment(destDir); - } - - @Override - @Nullable - public SegmentRangeReader openRangeReader() throws IOException - { - return materializedDelegateSupplier.get().openRangeReader(); - } - @Override public boolean equals(Object o) { @@ -117,24 +95,24 @@ public boolean equals(Object o) return false; } PartialClusterGroupLoadSpec that = (PartialClusterGroupLoadSpec) o; - return Objects.equals(delegate, that.delegate) + return Objects.equals(getDelegate(), that.getDelegate()) && Objects.equals(clusterGroupIndices, that.clusterGroupIndices) - && Objects.equals(fingerprint, that.fingerprint); + && Objects.equals(getFingerprint(), that.getFingerprint()); } @Override public int hashCode() { - return Objects.hash(delegate, clusterGroupIndices, fingerprint); + return Objects.hash(getDelegate(), clusterGroupIndices, getFingerprint()); } @Override public String toString() { return "PartialClusterGroupLoadSpec{" + - "delegate=" + delegate + + "delegate=" + getDelegate() + ", clusterGroupIndices=" + clusterGroupIndices + - ", fingerprint=" + fingerprint + + ", fingerprint=" + getFingerprint() + '}'; } } diff --git a/processing/src/main/java/org/apache/druid/segment/loading/PartialProjectionLoadSpec.java b/processing/src/main/java/org/apache/druid/segment/loading/PartialProjectionLoadSpec.java index 07c642a28879..737a10655898 100644 --- a/processing/src/main/java/org/apache/druid/segment/loading/PartialProjectionLoadSpec.java +++ b/processing/src/main/java/org/apache/druid/segment/loading/PartialProjectionLoadSpec.java @@ -35,11 +35,6 @@ * A {@link PartialLoadSpec} that requests partial loading of a segment's projections. The base class carries the * common {@code fingerprint} and {@code delegate} wire fields; this subtype adds the resolved projection names that * the historical should range-read into the local segment. - *

- * The historical-side partial-load path inspects this wrapper at mount time. Until that path exists, the base - * class's default {@link #loadSegment} performs a full download via the inner delegate, and the announcement layer - * stamps the fingerprint + full size on the response so the coordinator's reconciler counts the replica as a - * satisfying full-fallback rather than re-queuing the load. */ @JsonTypeName(PartialProjectionLoadSpec.TYPE) public class PartialProjectionLoadSpec extends PartialLoadSpec diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/ClusterGroupPartialLoadMatcher.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/ClusterGroupPartialLoadMatcher.java index 879611de5a6d..2dd50818def8 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/ClusterGroupPartialLoadMatcher.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/ClusterGroupPartialLoadMatcher.java @@ -22,6 +22,7 @@ import com.google.common.hash.Hasher; import com.google.common.hash.Hashing; import com.google.common.io.BaseEncoding; +import org.apache.druid.segment.loading.PartialClusterGroupLoadSpec; import org.apache.druid.timeline.DataSegment; import javax.annotation.Nullable; @@ -41,7 +42,6 @@ */ public abstract class ClusterGroupPartialLoadMatcher implements PartialLoadMatcher { - static final String LOAD_SPEC_TYPE = "partialClusterGroup"; static final String FINGERPRINT_VERSION = "v1"; /** @@ -63,13 +63,7 @@ public MatchResult match(DataSegment segment, Map baseLoadSpec) return null; } final String fingerprint = computeFingerprint(resolved); - final Map wrapped = Map.of( - "type", LOAD_SPEC_TYPE, - "delegate", baseLoadSpec, - "clusterGroupIndices", resolved, - "fingerprint", fingerprint - ); - return new MatchResult(wrapped, fingerprint); + return new MatchResult(PartialClusterGroupLoadSpec.wireForm(baseLoadSpec, resolved, fingerprint), fingerprint); } static String computeFingerprint(List sortedDedupedIndices) From 3d80665a1b66cceeb724a9576cb312f09259c4ff Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 14 May 2026 21:30:02 -0700 Subject: [PATCH 10/10] switch ClusterGroupTuples to record class --- .../druid/timeline/ClusterGroupTuples.java | 137 ++++++------------ .../timeline/ClusterGroupTuplesTest.java | 56 +++---- ...ildcardClusterGroupPartialLoadMatcher.java | 8 +- ...ardClusterGroupPartialLoadMatcherTest.java | 2 +- 4 files changed, 80 insertions(+), 123 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java b/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java index ed60a706689a..7b16295d3a1a 100644 --- a/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java +++ b/processing/src/main/java/org/apache/druid/timeline/ClusterGroupTuples.java @@ -37,54 +37,29 @@ /** * Typed clustering tuples carried on {@link DataSegment#getClusterGroups()} for clustered base-table segments. Each - * entry in {@link #getTuples()} is one cluster group's clustering-column values, in the order declared by - * {@link #getClusteringColumns()}. Optionally carries the clustering {@link VirtualColumns} when the segment was + * entry in {@link #tuples()} is one cluster group's clustering-column values, in the order declared by + * {@link #clusteringColumns()}. Optionally carries the clustering {@link VirtualColumns} when the segment was * clustered on a virtual-column expression, so that matching for things like partial load rules and query time * segment pruning can make use of this information. + *

+ * The compact constructor validates {@code clusteringColumns}, interns the virtual columns through + * {@link DataSegment#virtualColumnInterner()}, and canonicalizes every tuple value to its declared + * {@link ColumnType} via {@link #coerceValue} so {@link Object#equals} works across the JSON/programmatic boundary. */ -public class ClusterGroupTuples +public record ClusterGroupTuples( + @JsonProperty("clusteringColumns") RowSignature clusteringColumns, + @JsonProperty("virtualColumns") @JsonInclude(JsonInclude.Include.NON_EMPTY) VirtualColumns virtualColumns, + @JsonProperty("tuples") List> tuples +) { - private final RowSignature clusteringColumns; - private final List> tuples; - private final VirtualColumns virtualColumns; - @JsonCreator - public ClusterGroupTuples( - @JsonProperty("clusteringColumns") RowSignature clusteringColumns, - @JsonProperty("tuples") @Nullable List> tuples, - @JsonProperty("virtualColumns") @Nullable VirtualColumns virtualColumns - ) + public ClusterGroupTuples { if (clusteringColumns == null || clusteringColumns.size() == 0) { throw InvalidInput.exception("clusteringColumns must not be null or empty"); } - this.clusteringColumns = clusteringColumns; - this.virtualColumns = internVirtualColumns(virtualColumns); - - final List> source = tuples == null ? Collections.emptyList() : tuples; - final int numCols = clusteringColumns.size(); - final List> coerced = new ArrayList<>(source.size()); - for (int t = 0; t < source.size(); t++) { - final List tuple = source.get(t); - if (tuple == null || tuple.size() != numCols) { - throw InvalidInput.exception( - "tuple[%s] has size [%s] but clusteringColumns size is [%s]", - t, - tuple == null ? "null" : tuple.size(), - numCols - ); - } - final Object[] out = new Object[numCols]; - for (int i = 0; i < numCols; i++) { - final String name = clusteringColumns.getColumnName(i); - final ColumnType type = clusteringColumns.getColumnType(i).orElseThrow( - () -> InvalidInput.exception("clusteringColumn[%s] has no declared type", name) - ); - out[i] = coerceValue(name, type, tuple.get(i)); - } - coerced.add(Collections.unmodifiableList(Arrays.asList(out))); - } - this.tuples = Collections.unmodifiableList(coerced); + virtualColumns = internVirtualColumns(virtualColumns); + tuples = canonicalizeTuples(clusteringColumns, tuples); } /** @@ -93,56 +68,7 @@ public ClusterGroupTuples( */ public ClusterGroupTuples(RowSignature clusteringColumns, @Nullable List> tuples) { - this(clusteringColumns, tuples, null); - } - - @JsonProperty - public RowSignature getClusteringColumns() - { - return clusteringColumns; - } - - @JsonProperty - public List> getTuples() - { - return tuples; - } - - @JsonProperty - @JsonInclude(JsonInclude.Include.NON_EMPTY) - public VirtualColumns getVirtualColumns() - { - return virtualColumns; - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (!(o instanceof ClusterGroupTuples)) { - return false; - } - ClusterGroupTuples that = (ClusterGroupTuples) o; - return Objects.equals(clusteringColumns, that.clusteringColumns) - && Objects.equals(tuples, that.tuples) - && Objects.equals(virtualColumns, that.virtualColumns); - } - - @Override - public int hashCode() - { - return Objects.hash(clusteringColumns, tuples, virtualColumns); - } - - @Override - public String toString() - { - return "ClusterGroupTuples{clusteringColumns=" + clusteringColumns - + ", tuples=" + tuples - + ", virtualColumns=" + virtualColumns - + '}'; + this(clusteringColumns, null, tuples); } /** @@ -160,7 +86,7 @@ public String toString() *

* Used by: *

    - *
  • {@link ClusterGroupTuples}'s constructor to canonicalize segment-side tuples (strict).
  • + *
  • {@link ClusterGroupTuples}'s compact constructor to canonicalize segment-side tuples (strict).
  • *
  • Operator-supplied rule tuples in future cluster-group partial-load matchers, which can catch the * exception and treat it as "no match for this segment" rather than a hard failure.
  • *
@@ -211,6 +137,37 @@ private static VirtualColumns internVirtualColumns(@Nullable VirtualColumns virt ); } + private static List> canonicalizeTuples( + RowSignature clusteringColumns, + @Nullable List> tuples + ) + { + final List> source = tuples == null ? Collections.emptyList() : tuples; + final int numCols = clusteringColumns.size(); + final List> coerced = new ArrayList<>(source.size()); + for (int t = 0; t < source.size(); t++) { + final List tuple = source.get(t); + if (tuple == null || tuple.size() != numCols) { + throw InvalidInput.exception( + "tuple[%s] has size [%s] but clusteringColumns size is [%s]", + t, + tuple == null ? "null" : tuple.size(), + numCols + ); + } + final Object[] out = new Object[numCols]; + for (int i = 0; i < numCols; i++) { + final String name = clusteringColumns.getColumnName(i); + final ColumnType type = clusteringColumns.getColumnType(i).orElseThrow( + () -> InvalidInput.exception("clusteringColumn[%s] has no declared type", name) + ); + out[i] = coerceValue(name, type, tuple.get(i)); + } + coerced.add(Collections.unmodifiableList(Arrays.asList(out))); + } + return Collections.unmodifiableList(coerced); + } + private static DruidException cannotCoerce(Object raw, String columnName, String targetType) { return InvalidInput.exception( diff --git a/processing/src/test/java/org/apache/druid/timeline/ClusterGroupTuplesTest.java b/processing/src/test/java/org/apache/druid/timeline/ClusterGroupTuplesTest.java index f8d01bc5375e..674ad78c502e 100644 --- a/processing/src/test/java/org/apache/druid/timeline/ClusterGroupTuplesTest.java +++ b/processing/src/test/java/org/apache/druid/timeline/ClusterGroupTuplesTest.java @@ -93,14 +93,14 @@ void testConstructorRejectsEmptyClusteringColumns() void testConstructorAllowsEmptyTuples() { final ClusterGroupTuples groups = new ClusterGroupTuples(tenantRegion(), List.of()); - Assertions.assertTrue(groups.getTuples().isEmpty()); + Assertions.assertTrue(groups.tuples().isEmpty()); } @Test void testConstructorAllowsNullTuplesList() { final ClusterGroupTuples groups = new ClusterGroupTuples(tenantRegion(), null); - Assertions.assertTrue(groups.getTuples().isEmpty()); + Assertions.assertTrue(groups.tuples().isEmpty()); } @Test @@ -166,11 +166,11 @@ void testNullsAllowedAtAnyTuplePosition() Arrays.asList(null, null) ) ); - Assertions.assertEquals(3, groups.getTuples().size()); - Assertions.assertNull(groups.getTuples().get(0).get(0)); - Assertions.assertNull(groups.getTuples().get(1).get(1)); - Assertions.assertNull(groups.getTuples().get(2).get(0)); - Assertions.assertNull(groups.getTuples().get(2).get(1)); + Assertions.assertEquals(3, groups.tuples().size()); + Assertions.assertNull(groups.tuples().get(0).get(0)); + Assertions.assertNull(groups.tuples().get(1).get(1)); + Assertions.assertNull(groups.tuples().get(2).get(0)); + Assertions.assertNull(groups.tuples().get(2).get(1)); } @Test @@ -195,8 +195,8 @@ void testCoercionIntegerToLong() tenantPriority(), List.of(List.of("acme", Integer.valueOf(5))) ); - Assertions.assertEquals(Long.class, groups.getTuples().get(0).get(1).getClass()); - Assertions.assertEquals(5L, groups.getTuples().get(0).get(1)); + Assertions.assertEquals(Long.class, groups.tuples().get(0).get(1).getClass()); + Assertions.assertEquals(5L, groups.tuples().get(0).get(1)); } @Test @@ -219,8 +219,8 @@ void testCoercionDoubleToFloat() { final RowSignature sig = RowSignature.builder().add("temp", ColumnType.FLOAT).build(); final ClusterGroupTuples groups = new ClusterGroupTuples(sig, List.of(List.of((Object) Double.valueOf(98.6)))); - Assertions.assertEquals(Float.class, groups.getTuples().get(0).get(0).getClass()); - Assertions.assertEquals(98.6f, (Float) groups.getTuples().get(0).get(0), 0.0001f); + Assertions.assertEquals(Float.class, groups.tuples().get(0).get(0).getClass()); + Assertions.assertEquals(98.6f, (Float) groups.tuples().get(0).get(0), 0.0001f); } @Test @@ -255,7 +255,7 @@ void testCoercionAcceptsAnyTypeForString() { final RowSignature sig = RowSignature.builder().add("v", ColumnType.STRING).build(); final ClusterGroupTuples groups = new ClusterGroupTuples(sig, List.of(List.of((Object) Long.valueOf(7)))); - Assertions.assertEquals("7", groups.getTuples().get(0).get(0)); + Assertions.assertEquals("7", groups.tuples().get(0).get(0)); } @Test @@ -270,8 +270,8 @@ void testJsonRoundTripPreservesCoercedTypes() throws Exception final ClusterGroupTuples back = MAPPER.readValue(json, ClusterGroupTuples.class); Assertions.assertEquals(groups, back); // Round-tripped tuples must end up with the same canonical types as the in-memory original. - Assertions.assertEquals(Long.class, back.getTuples().get(0).get(1).getClass()); - Assertions.assertEquals(Long.class, back.getTuples().get(1).get(1).getClass()); + Assertions.assertEquals(Long.class, back.tuples().get(0).get(1).getClass()); + Assertions.assertEquals(Long.class, back.tuples().get(1).get(1).getClass()); } @Test @@ -283,11 +283,11 @@ void testTuplesAreImmutable() ); Assertions.assertThrows( UnsupportedOperationException.class, - () -> groups.getTuples().add(List.of("globex", "us-east-1")) + () -> groups.tuples().add(List.of("globex", "us-east-1")) ); Assertions.assertThrows( UnsupportedOperationException.class, - () -> groups.getTuples().get(0).set(0, "hijacked") + () -> groups.tuples().get(0).set(0, "hijacked") ); } @@ -295,7 +295,7 @@ void testTuplesAreImmutable() void testVirtualColumnsDefaultEmpty() { final ClusterGroupTuples groups = new ClusterGroupTuples(tenantRegion(), List.of()); - Assertions.assertSame(VirtualColumns.EMPTY, groups.getVirtualColumns()); + Assertions.assertSame(VirtualColumns.EMPTY, groups.virtualColumns()); } @Test @@ -303,10 +303,10 @@ void testVirtualColumnsAreStored() { final ClusterGroupTuples groups = new ClusterGroupTuples( RowSignature.builder().add("tenant_lower", ColumnType.STRING).build(), - List.of(List.of("acme")), - VIRTUAL_COLUMNS + VIRTUAL_COLUMNS, + List.of(List.of("acme")) ); - Assertions.assertNotNull(groups.getVirtualColumns().getVirtualColumn("tenant_lower")); + Assertions.assertNotNull(groups.virtualColumns().getVirtualColumn("tenant_lower")); } @Test @@ -314,8 +314,8 @@ void testVirtualColumnsJsonRoundTrip() throws Exception { final ClusterGroupTuples original = new ClusterGroupTuples( RowSignature.builder().add("tenant_lower", ColumnType.STRING).build(), - List.of(List.of("acme")), - VIRTUAL_COLUMNS + VIRTUAL_COLUMNS, + List.of(List.of("acme")) ); // Round-trip needs an injectable ExprMacroTable for ExpressionVirtualColumn deserialization. final ObjectMapper mapper = new DefaultObjectMapper(); @@ -344,17 +344,17 @@ void testVirtualColumnInternerSharesAcrossInstances() // shared interner on DataSegment, so identical clustering VCs dedupe across segments held in memory. final ClusterGroupTuples a = new ClusterGroupTuples( RowSignature.builder().add("tenant_lower", ColumnType.STRING).build(), - List.of(List.of("acme")), - VIRTUAL_COLUMNS + VIRTUAL_COLUMNS, + List.of(List.of("acme")) ); final ClusterGroupTuples b = new ClusterGroupTuples( RowSignature.builder().add("tenant_lower", ColumnType.STRING).build(), - List.of(List.of("globex")), - VIRTUAL_COLUMNS + VIRTUAL_COLUMNS, + List.of(List.of("globex")) ); Assertions.assertSame( - a.getVirtualColumns().getVirtualColumns()[0], - b.getVirtualColumns().getVirtualColumns()[0] + a.virtualColumns().getVirtualColumns()[0], + b.virtualColumns().getVirtualColumns()[0] ); } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java b/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java index 7aeb0a3ca898..bf88d2952d0c 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcher.java @@ -50,7 +50,7 @@ * If the operator supplies {@link #getVirtualColumns()}, a pattern key may also reference one of those virtual * columns. At match time, the matcher resolves such a key to a clustering column on the segment via * {@link VirtualColumns#findEquivalent(VirtualColumns.Node)} between the matchers VCs and the segment's clustering - * VCs (carried on {@link ClusterGroupTuples#getVirtualColumns()}). This lets operators author portable rules, they + * VCs (carried on {@link ClusterGroupTuples#virtualColumns()}). This lets operators author portable rules, they * write their preferred VC name and expression, and the matcher resolves to whatever name the segment happens to use * for the equivalent clustering VC. *

@@ -132,9 +132,9 @@ protected List resolveClusterGroupIndices(DataSegment segment) if (clusterGroups == null) { return Collections.emptyList(); } - final RowSignature clusteringColumns = clusterGroups.getClusteringColumns(); - final VirtualColumns segmentVcs = clusterGroups.getVirtualColumns(); - final List> tuples = clusterGroups.getTuples(); + final RowSignature clusteringColumns = clusterGroups.clusteringColumns(); + final VirtualColumns segmentVcs = clusterGroups.virtualColumns(); + final List> tuples = clusterGroups.tuples(); // Per-pattern resolution: which clustering column does each pattern key map to? Resolution is segment-scoped // (depends only on the segment's clustering signature + VCs), so compute it once up front and reuse it across diff --git a/server/src/test/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcherTest.java b/server/src/test/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcherTest.java index 716a111d0a7f..0ff1bdc804ed 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcherTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/rules/WildcardClusterGroupPartialLoadMatcherTest.java @@ -444,6 +444,6 @@ private static DataSegment vcClusteredSegment(String... lowerTenants) for (String t : lowerTenants) { tuples.add(Collections.singletonList(t)); } - return segmentWithGroups(new ClusterGroupTuples(clusteringColumns, tuples, lowerTenantVcs("tenant_lower"))); + return segmentWithGroups(new ClusterGroupTuples(clusteringColumns, lowerTenantVcs("tenant_lower"), tuples)); } }