Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@
/** Helper methods to create distributions to pass into Apache Gravitino. */
public class Distributions {

/**
* AUTO indicates that the number of buckets is automatically determined by the system (without
* the need for manual specification).
*/
public static final int AUTO = -1;

/** NONE is used to indicate that there is no distribution. */
public static final Distribution NONE =
new DistributionImpl(Strategy.NONE, 0, Expression.EMPTY_EXPRESSION);
Expand Down Expand Up @@ -74,6 +80,17 @@ public static Distribution of(Strategy strategy, int number, Expression... expre
return new DistributionImpl(strategy, number, expressions);
}

/**
* Create a distribution by the given strategy, the number of buckets is not used.
*
* @param strategy The strategy to use
* @param expressions The expressions to distribute by
* @return The created distribution
*/
public static Distribution auto(Strategy strategy, Expression... expressions) {
return new DistributionImpl(strategy, AUTO, expressions);
}

/**
* Create a distribution on columns. Like distribute by (a) or (a, b), for complex like
* distributing by (func(a), b) or (func(a), func(b)), please use {@link DistributionImpl.Builder}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ protected String generateCreateTableSql(
}

if (distribution.number() != 0) {
sqlBuilder.append(" BUCKETS ").append(distribution.number());
sqlBuilder.append(" BUCKETS ").append(DorisUtils.toBucketNumberString(distribution.number()));
}

properties = appendNecessaryProperties(properties);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import java.util.stream.Collectors;
import org.apache.gravitino.rel.expressions.NamedReference;
import org.apache.gravitino.rel.expressions.distributions.Distribution;
import org.apache.gravitino.rel.expressions.distributions.Distributions;
import org.apache.gravitino.rel.expressions.distributions.Distributions.DistributionImpl;
import org.apache.gravitino.rel.expressions.distributions.Strategy;
import org.apache.gravitino.rel.expressions.literals.Literal;
Expand All @@ -48,7 +49,7 @@ public final class DorisUtils {

private static final Pattern DISTRIBUTION_INFO_PATTERN =
Pattern.compile(
"DISTRIBUTED BY\\s+(HASH|RANDOM)\\s*(\\(([^)]+)\\))?\\s*(BUCKETS\\s+(\\d+))?");
"DISTRIBUTED BY\\s+(HASH|RANDOM)\\s*(\\(([^)]+)\\))?\\s*(BUCKETS\\s+(\\d+|AUTO))?");

private static final String LIST_PARTITION = "LIST";
private static final String RANGE_PARTITION = "RANGE";
Expand Down Expand Up @@ -202,11 +203,8 @@ public static Distribution extractDistributionInfoFromSql(String createTableSql)
.map(f -> f.substring(1, f.length() - 1))
.toArray(String[]::new);

// Default bucket number is 1.
int bucketNum = 1;
if (matcher.find(5)) {
bucketNum = Integer.valueOf(matcher.group(5));
}
// Default bucket number is 1, auto is -1.
int bucketNum = extractBucketNum(matcher);

return new DistributionImpl.Builder()
.withStrategy(Strategy.getByName(distributionType))
Expand All @@ -220,4 +218,21 @@ public static Distribution extractDistributionInfoFromSql(String createTableSql)

throw new RuntimeException("Failed to extract distribution info in sql:" + createTableSql);
}

private static int extractBucketNum(Matcher matcher) {
int bucketNum = 1;
if (matcher.find(5)) {
String bucketValue = matcher.group(5);
// Use -1 to indicate auto bucket.
bucketNum =
bucketValue.trim().toUpperCase().equals("AUTO")
? Distributions.AUTO
: Integer.valueOf(bucketValue);
}
return bucketNum;
}

public static String toBucketNumberString(int number) {
return number == Distributions.AUTO ? "AUTO" : String.valueOf(number);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
import org.apache.gravitino.rel.expressions.NamedReference;
import org.apache.gravitino.rel.expressions.distributions.Distribution;
import org.apache.gravitino.rel.expressions.distributions.Distributions;
import org.apache.gravitino.rel.expressions.distributions.Strategy;
import org.apache.gravitino.rel.expressions.literals.Literal;
import org.apache.gravitino.rel.expressions.literals.Literals;
import org.apache.gravitino.rel.expressions.sorts.SortOrder;
Expand Down Expand Up @@ -1004,4 +1005,35 @@ void testAllDistribution() {
tableCatalog.dropTable(tableIdentifier);
}
}

@Test
void testAllDistributionWithAuto() {
Distribution distribution =
Distributions.auto(Strategy.HASH, NamedReference.field(DORIS_COL_NAME1));

String tableName = GravitinoITUtils.genRandomName("test_distribution_table");
NameIdentifier tableIdentifier = NameIdentifier.of(schemaName, tableName);
Column[] columns = createColumns();
Index[] indexes = Indexes.EMPTY_INDEXES;
Map<String, String> properties = createTableProperties();
Transform[] partitioning = Transforms.EMPTY_TRANSFORM;
TableCatalog tableCatalog = catalog.asTableCatalog();
tableCatalog.createTable(
tableIdentifier,
columns,
table_comment,
properties,
partitioning,
distribution,
null,
indexes);
// load table
Table loadTable = tableCatalog.loadTable(tableIdentifier);

Assertions.assertEquals(distribution.strategy(), loadTable.distribution().strategy());
Assertions.assertEquals(distribution.number(), loadTable.distribution().number());
Assertions.assertArrayEquals(
distribution.expressions(), loadTable.distribution().expressions());
tableCatalog.dropTable(tableIdentifier);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import org.apache.gravitino.rel.expressions.distributions.Distribution;
import org.apache.gravitino.rel.expressions.literals.Literal;
import org.apache.gravitino.rel.expressions.literals.Literals;
import org.apache.gravitino.rel.expressions.transforms.Transform;
Expand Down Expand Up @@ -171,4 +172,17 @@ public void testGeneratePartitionSqlFragment() {
partitionSqlFragment = DorisUtils.generatePartitionSqlFragment(partition);
assertEquals("PARTITION `p7` VALUES IN ((\"1\",\"2\"),(\"3\",\"4\"))", partitionSqlFragment);
}

@Test
public void testDistributedInfoPattern() {
String createTableSql =
"CREATE TABLE `testTable` (\n`col1` date NOT NULL\n) ENGINE=OLAP\n PARTITION BY RANGE(`col1`)\n()\n DISTRIBUTED BY HASH(`col1`) BUCKETS 2";
Distribution distribution = DorisUtils.extractDistributionInfoFromSql(createTableSql);
assertEquals(distribution.number(), 2);

String createTableSqlWithAuto =
"CREATE TABLE `testTable` (\n`col1` date NOT NULL\n) ENGINE=OLAP\n PARTITION BY RANGE(`col1`)\n()\n DISTRIBUTED BY HASH(`col1`) BUCKETS AUTO";
Distribution distribution2 = DorisUtils.extractDistributionInfoFromSql(createTableSqlWithAuto);
assertEquals(distribution2.number(), -1);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ public DistributionDTO build() {
strategy = strategy == null ? Strategy.HASH : strategy;

Preconditions.checkState(args != null, "expressions cannot be null");
Preconditions.checkState(number >= 0, "bucketNum must be greater than 0");
// Check if the number of buckets is greater than -1, -1 is auto.
Preconditions.checkState(number >= -1, "bucketNum must be greater than -1");
return new DistributionDTO(strategy, number, args);
}
}
Expand Down
12 changes: 12 additions & 0 deletions docs/table-partitioning-distribution-sort-order-indexes.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,21 @@ Distributions.of(Strategy.HASH, 4, NamedReference.field("score"));
```

</TabItem>

<TabItem value="java" label="Java">
if you want to use auto distribution, you can use the following code, it will set the number is -1.

```java
// Auto distribution with strategy and fields
Distributions.auto(Strategy.HASH, NamedReference.field("score"));
```
</TabItem>

</Tabs>




## Sort ordering

To define a sorted order table, you should use the following three components to construct a valid sorted order table.
Expand Down