Skip to content

Commit 2887ad3

Browse files
authored
fix bug with NestedCommonFormatColumnFormatSpec default value resolution (apache#18638)
changes: * fix a bug that prevented job and system level auto column format specs from overriding `objectFieldsDictionaryEncoding` and `objectStorageEncoding` even if the user specified no format spec on the column itself
1 parent 73bdc19 commit 2887ad3

3 files changed

Lines changed: 147 additions & 99 deletions

File tree

processing/src/main/java/org/apache/druid/segment/IndexSpec.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,9 +267,13 @@ public IndexSpec getEffectiveSpec()
267267
}
268268

269269
if (autoColumnFormatSpec != null) {
270-
bob.withAutoColumnFormatSpec(autoColumnFormatSpec.getEffectiveSpec(this));
270+
bob.withAutoColumnFormatSpec(
271+
NestedCommonFormatColumnFormatSpec.getEffectiveFormatSpec(autoColumnFormatSpec, this)
272+
);
271273
} else if (defaultSpec.autoColumnFormatSpec != null) {
272-
bob.withAutoColumnFormatSpec(defaultSpec.autoColumnFormatSpec.getEffectiveSpec(this));
274+
bob.withAutoColumnFormatSpec(
275+
NestedCommonFormatColumnFormatSpec.getEffectiveFormatSpec(defaultSpec.autoColumnFormatSpec, this)
276+
);
273277
}
274278

275279
return bob.build();

processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java

Lines changed: 93 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,9 @@
4141
public class NestedCommonFormatColumnFormatSpec
4242
{
4343
private static final NestedCommonFormatColumnFormatSpec DEFAULT =
44-
NestedCommonFormatColumnFormatSpec.builder()
45-
.setObjectFieldsDictionaryEncoding(StringEncodingStrategy.UTF8_STRATEGY)
46-
.setObjectStorageEncoding(ObjectStorageEncoding.SMILE)
47-
.build();
44+
builder().setObjectFieldsDictionaryEncoding(StringEncodingStrategy.UTF8_STRATEGY)
45+
.setObjectStorageEncoding(ObjectStorageEncoding.SMILE)
46+
.build();
4847

4948
public static Builder builder()
5049
{
@@ -56,102 +55,27 @@ public static Builder builder(NestedCommonFormatColumnFormatSpec spec)
5655
return new Builder(spec);
5756
}
5857

58+
/**
59+
* Create a {@link NestedCommonFormatColumnFormatSpec} with all fields fully populated. Values from the supplied
60+
* column format spec take priority, any null values are then populated by checking
61+
* {@link IndexSpec#getAutoColumnFormatSpec()}, then falling back to fields on {@link IndexSpec} itself if applicable,
62+
* and finally resorting to hard coded defaults.
63+
*/
5964
public static NestedCommonFormatColumnFormatSpec getEffectiveFormatSpec(
6065
@Nullable NestedCommonFormatColumnFormatSpec columnFormatSpec,
6166
IndexSpec indexSpec
6267
)
6368
{
64-
return Objects.requireNonNullElse(columnFormatSpec, DEFAULT).getEffectiveSpec(indexSpec);
65-
}
66-
67-
@Nullable
68-
private final StringEncodingStrategy objectFieldsDictionaryEncoding;
69-
@Nullable
70-
private final ObjectStorageEncoding objectStorageEncoding;
71-
@Nullable
72-
private final CompressionStrategy objectStorageCompression;
73-
@Nullable
74-
private final StringEncodingStrategy stringDictionaryEncoding;
75-
@Nullable
76-
private final CompressionStrategy dictionaryEncodedColumnCompression;
77-
@Nullable
78-
private final CompressionFactory.LongEncodingStrategy longColumnEncoding;
79-
@Nullable
80-
private final CompressionStrategy longColumnCompression;
81-
@Nullable
82-
private final CompressionStrategy doubleColumnCompression;
83-
@Nullable
84-
private final BitmapSerdeFactory bitmapEncoding;
85-
86-
@JsonCreator
87-
public NestedCommonFormatColumnFormatSpec(
88-
@JsonProperty("objectFieldsDictionaryEncoding") @Nullable StringEncodingStrategy objectFieldsDictionaryEncoding,
89-
@JsonProperty("objectStorageEncoding") @Nullable ObjectStorageEncoding objectStorageEncoding,
90-
@JsonProperty("objectStorageCompression") @Nullable CompressionStrategy objectStorageCompression,
91-
@JsonProperty("stringDictionaryEncoding") @Nullable StringEncodingStrategy stringDictionaryEncoding,
92-
@JsonProperty("dictionaryEncodedColumnCompression") @Nullable CompressionStrategy dictionaryEncodedColumnCompression,
93-
@JsonProperty("longColumnEncoding") @Nullable CompressionFactory.LongEncodingStrategy longColumnEncoding,
94-
@JsonProperty("longColumnCompression") @Nullable CompressionStrategy longColumnCompression,
95-
@JsonProperty("doubleColumnCompression") @Nullable CompressionStrategy doubleColumnCompression
96-
)
97-
{
98-
this(
99-
objectFieldsDictionaryEncoding,
100-
objectStorageEncoding,
101-
objectStorageCompression,
102-
stringDictionaryEncoding,
103-
dictionaryEncodedColumnCompression,
104-
longColumnEncoding,
105-
longColumnCompression,
106-
doubleColumnCompression,
107-
null
108-
);
109-
}
110-
111-
/**
112-
* Internal constructor used by {@link Builder} to set {@link #bitmapEncoding} during the process of resolving values
113-
* for {@link #getEffectiveSpec(IndexSpec)}. {@link #bitmapEncoding} cannot vary per column, and is always set from
114-
* {@link IndexSpec#getBitmapSerdeFactory()}.
115-
*/
116-
protected NestedCommonFormatColumnFormatSpec(
117-
@Nullable StringEncodingStrategy objectFieldsDictionaryEncoding,
118-
@Nullable ObjectStorageEncoding objectStorageEncoding,
119-
@Nullable CompressionStrategy objectStorageCompression,
120-
@Nullable StringEncodingStrategy stringDictionaryEncoding,
121-
@Nullable CompressionStrategy dictionaryEncodedColumnCompression,
122-
@Nullable CompressionFactory.LongEncodingStrategy longColumnEncoding,
123-
@Nullable CompressionStrategy longColumnCompression,
124-
@Nullable CompressionStrategy doubleColumnCompression,
125-
@Nullable BitmapSerdeFactory bitmapEncoding
126-
)
127-
{
128-
this.objectFieldsDictionaryEncoding = objectFieldsDictionaryEncoding;
129-
this.objectStorageEncoding = objectStorageEncoding;
130-
this.objectStorageCompression = objectStorageCompression;
131-
this.stringDictionaryEncoding = stringDictionaryEncoding;
132-
this.dictionaryEncodedColumnCompression = dictionaryEncodedColumnCompression;
133-
this.longColumnEncoding = longColumnEncoding;
134-
this.longColumnCompression = longColumnCompression;
135-
this.doubleColumnCompression = doubleColumnCompression;
136-
this.bitmapEncoding = bitmapEncoding;
137-
}
69+
final Builder builder = columnFormatSpec == null ? builder() : builder(columnFormatSpec);
13870

139-
/**
140-
* Fully populate all fields of {@link NestedCommonFormatColumnFormatSpec}. Null values are populated first checking
141-
* {@link IndexSpec#getAutoColumnFormatSpec()}, then falling back to fields on {@link IndexSpec} itself if applicable,
142-
* and finally resorting to hard coded defaults.
143-
*/
144-
public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec)
145-
{
146-
// this is a defensive check, the json spec can't set this, only the builder can
147-
if (bitmapEncoding != null && !bitmapEncoding.equals(indexSpec.getBitmapSerdeFactory())) {
71+
// this is a defensive check, the json spec of the column can't set this, only the builder can
72+
if (builder.bitmapEncoding != null && !builder.bitmapEncoding.equals(indexSpec.getBitmapSerdeFactory())) {
14873
throw new ISE(
14974
"bitmapEncoding[%s] does not match indexSpec.bitmap[%s]",
150-
bitmapEncoding,
75+
builder.bitmapEncoding,
15176
indexSpec.getBitmapSerdeFactory()
15277
);
15378
}
154-
Builder builder = new Builder(this);
15579
builder.setBitmapEncoding(indexSpec.getBitmapSerdeFactory());
15680

15781
final NestedCommonFormatColumnFormatSpec defaultSpec;
@@ -161,23 +85,23 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec)
16185
defaultSpec = DEFAULT;
16286
}
16387

164-
if (objectFieldsDictionaryEncoding == null) {
88+
if (builder.objectFieldsDictionaryEncoding == null) {
16589
if (defaultSpec.getObjectFieldsDictionaryEncoding() != null) {
16690
builder.setObjectFieldsDictionaryEncoding(defaultSpec.getObjectFieldsDictionaryEncoding());
16791
} else {
16892
builder.setObjectFieldsDictionaryEncoding(StringEncodingStrategy.DEFAULT);
16993
}
17094
}
17195

172-
if (objectStorageEncoding == null) {
96+
if (builder.objectStorageEncoding == null) {
17397
if (defaultSpec.getObjectStorageEncoding() != null) {
17498
builder.setObjectStorageEncoding(defaultSpec.getObjectStorageEncoding());
17599
} else {
176100
builder.setObjectStorageEncoding(ObjectStorageEncoding.SMILE);
177101
}
178102
}
179103

180-
if (objectStorageCompression == null) {
104+
if (builder.objectStorageCompression == null) {
181105
if (defaultSpec.getObjectStorageCompression() != null) {
182106
builder.setObjectStorageCompression(defaultSpec.getObjectStorageCompression());
183107
} else if (indexSpec.getJsonCompression() != null) {
@@ -187,39 +111,39 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec)
187111
}
188112
}
189113

190-
if (stringDictionaryEncoding == null) {
114+
if (builder.stringDictionaryEncoding == null) {
191115
if (defaultSpec.getStringDictionaryEncoding() != null) {
192116
builder.setStringDictionaryEncoding(defaultSpec.getStringDictionaryEncoding());
193117
} else {
194118
builder.setStringDictionaryEncoding(indexSpec.getStringDictionaryEncoding());
195119
}
196120
}
197121

198-
if (dictionaryEncodedColumnCompression == null) {
122+
if (builder.dictionaryEncodedColumnCompression == null) {
199123
if (defaultSpec.getDictionaryEncodedColumnCompression() != null) {
200124
builder.setDictionaryEncodedColumnCompression(defaultSpec.getDictionaryEncodedColumnCompression());
201125
} else {
202126
builder.setDictionaryEncodedColumnCompression(indexSpec.getDimensionCompression());
203127
}
204128
}
205129

206-
if (longColumnEncoding == null) {
130+
if (builder.longColumnEncoding == null) {
207131
if (defaultSpec.getLongColumnEncoding() != null) {
208132
builder.setLongColumnEncoding(defaultSpec.getLongColumnEncoding());
209133
} else {
210134
builder.setLongColumnEncoding(indexSpec.getLongEncoding());
211135
}
212136
}
213137

214-
if (longColumnCompression == null) {
138+
if (builder.longColumnCompression == null) {
215139
if (defaultSpec.getLongColumnCompression() != null) {
216140
builder.setLongColumnCompression(defaultSpec.getLongColumnCompression());
217141
} else {
218142
builder.setLongColumnCompression(indexSpec.getMetricCompression());
219143
}
220144
}
221145

222-
if (doubleColumnCompression == null) {
146+
if (builder.doubleColumnCompression == null) {
223147
if (defaultSpec.getDoubleColumnCompression() != null) {
224148
builder.setDoubleColumnCompression(defaultSpec.getDoubleColumnCompression());
225149
} else {
@@ -230,6 +154,78 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec)
230154
return builder.build();
231155
}
232156

157+
@Nullable
158+
private final StringEncodingStrategy objectFieldsDictionaryEncoding;
159+
@Nullable
160+
private final ObjectStorageEncoding objectStorageEncoding;
161+
@Nullable
162+
private final CompressionStrategy objectStorageCompression;
163+
@Nullable
164+
private final StringEncodingStrategy stringDictionaryEncoding;
165+
@Nullable
166+
private final CompressionStrategy dictionaryEncodedColumnCompression;
167+
@Nullable
168+
private final CompressionFactory.LongEncodingStrategy longColumnEncoding;
169+
@Nullable
170+
private final CompressionStrategy longColumnCompression;
171+
@Nullable
172+
private final CompressionStrategy doubleColumnCompression;
173+
@Nullable
174+
private final BitmapSerdeFactory bitmapEncoding;
175+
176+
@JsonCreator
177+
public NestedCommonFormatColumnFormatSpec(
178+
@JsonProperty("objectFieldsDictionaryEncoding") @Nullable StringEncodingStrategy objectFieldsDictionaryEncoding,
179+
@JsonProperty("objectStorageEncoding") @Nullable ObjectStorageEncoding objectStorageEncoding,
180+
@JsonProperty("objectStorageCompression") @Nullable CompressionStrategy objectStorageCompression,
181+
@JsonProperty("stringDictionaryEncoding") @Nullable StringEncodingStrategy stringDictionaryEncoding,
182+
@JsonProperty("dictionaryEncodedColumnCompression") @Nullable CompressionStrategy dictionaryEncodedColumnCompression,
183+
@JsonProperty("longColumnEncoding") @Nullable CompressionFactory.LongEncodingStrategy longColumnEncoding,
184+
@JsonProperty("longColumnCompression") @Nullable CompressionStrategy longColumnCompression,
185+
@JsonProperty("doubleColumnCompression") @Nullable CompressionStrategy doubleColumnCompression
186+
)
187+
{
188+
this(
189+
objectFieldsDictionaryEncoding,
190+
objectStorageEncoding,
191+
objectStorageCompression,
192+
stringDictionaryEncoding,
193+
dictionaryEncodedColumnCompression,
194+
longColumnEncoding,
195+
longColumnCompression,
196+
doubleColumnCompression,
197+
null
198+
);
199+
}
200+
201+
/**
202+
* Internal constructor used by {@link Builder} to set {@link #bitmapEncoding} during the process of resolving values
203+
* for {@link #getEffectiveFormatSpec(NestedCommonFormatColumnFormatSpec, IndexSpec)}. {@link #bitmapEncoding} cannot
204+
* vary per column, and is always set from {@link IndexSpec#getBitmapSerdeFactory()}.
205+
*/
206+
protected NestedCommonFormatColumnFormatSpec(
207+
@Nullable StringEncodingStrategy objectFieldsDictionaryEncoding,
208+
@Nullable ObjectStorageEncoding objectStorageEncoding,
209+
@Nullable CompressionStrategy objectStorageCompression,
210+
@Nullable StringEncodingStrategy stringDictionaryEncoding,
211+
@Nullable CompressionStrategy dictionaryEncodedColumnCompression,
212+
@Nullable CompressionFactory.LongEncodingStrategy longColumnEncoding,
213+
@Nullable CompressionStrategy longColumnCompression,
214+
@Nullable CompressionStrategy doubleColumnCompression,
215+
@Nullable BitmapSerdeFactory bitmapEncoding
216+
)
217+
{
218+
this.objectFieldsDictionaryEncoding = objectFieldsDictionaryEncoding;
219+
this.objectStorageEncoding = objectStorageEncoding;
220+
this.objectStorageCompression = objectStorageCompression;
221+
this.stringDictionaryEncoding = stringDictionaryEncoding;
222+
this.dictionaryEncodedColumnCompression = dictionaryEncodedColumnCompression;
223+
this.longColumnEncoding = longColumnEncoding;
224+
this.longColumnCompression = longColumnCompression;
225+
this.doubleColumnCompression = doubleColumnCompression;
226+
this.bitmapEncoding = bitmapEncoding;
227+
}
228+
233229
@Nullable
234230
@JsonProperty
235231
public StringEncodingStrategy getObjectFieldsDictionaryEncoding()

processing/src/test/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpecTest.java

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,54 @@ public void testGetEffectiveSpecDefaults()
9494
);
9595
}
9696

97+
@Test
98+
public void testEffectiveSpecIndexSpecOverrides()
99+
{
100+
StringEncodingStrategy frontcoded = new StringEncodingStrategy.FrontCoded(4, FrontCodedIndexed.V1);
101+
NestedCommonFormatColumnFormatSpec defaults = NestedCommonFormatColumnFormatSpec.getEffectiveFormatSpec(
102+
null,
103+
IndexSpec.builder()
104+
.withAutoColumnFormatSpec(
105+
NestedCommonFormatColumnFormatSpec.builder()
106+
.setObjectFieldsDictionaryEncoding(frontcoded)
107+
.setObjectStorageEncoding(ObjectStorageEncoding.NONE)
108+
.build()
109+
)
110+
.withMetricCompression(CompressionStrategy.LZF)
111+
.build()
112+
.getEffectiveSpec()
113+
);
114+
115+
Assert.assertEquals(
116+
frontcoded,
117+
defaults.getObjectFieldsDictionaryEncoding()
118+
);
119+
Assert.assertEquals(
120+
ObjectStorageEncoding.NONE,
121+
defaults.getObjectStorageEncoding()
122+
);
123+
Assert.assertEquals(
124+
CompressionStrategy.LZ4,
125+
defaults.getObjectStorageCompression()
126+
);
127+
Assert.assertEquals(
128+
IndexSpec.getDefault().getEffectiveSpec().getDimensionCompression(),
129+
defaults.getDictionaryEncodedColumnCompression()
130+
);
131+
Assert.assertEquals(
132+
IndexSpec.getDefault().getEffectiveSpec().getStringDictionaryEncoding(),
133+
defaults.getStringDictionaryEncoding()
134+
);
135+
Assert.assertEquals(
136+
CompressionStrategy.LZF,
137+
defaults.getLongColumnCompression()
138+
);
139+
Assert.assertEquals(
140+
CompressionStrategy.LZF,
141+
defaults.getDoubleColumnCompression()
142+
);
143+
}
144+
97145
@Test
98146
public void testGetEffectiveSpecMerge()
99147
{

0 commit comments

Comments
 (0)