prestodb · anandamideShakyan · May 18, 2025 · Jun 23, 2025 · Jun 25, 2025 · Jun 27, 2025
@@ -476,9 +476,6 @@ private WriterParameters getWriterParametersForExistingUnpartitionedTable(Option
     {
         // Note: temporary table is always empty at this step
         if (!table.getTableType().equals(TEMPORARY_TABLE)) {
-            if (bucketNumber.isPresent()) {
-                throw new PrestoException(HIVE_PARTITION_READ_ONLY, "Cannot insert into bucketed unpartitioned Hive table");
-            }
             if (immutablePartitions) {
                 throw new PrestoException(HIVE_PARTITION_READ_ONLY, "Unpartitioned Hive tables are immutable");
             }

@@ -863,6 +863,117 @@ public void testCreateTableNonSupportedVarcharColumn()
         assertUpdate("CREATE TABLE test_create_table_non_supported_varchar_column (apple varchar(65536))");
     }
 
+    @Test
+    public void testEmptyBucketedTable()
+    {
+        // go through all storage formats to make sure the empty buckets are correctly created
+        testWithAllStorageFormats(this::testEmptyBucketedTable);
+    }
+
+    private void testEmptyBucketedTable(Session session, HiveStorageFormat storageFormat)
+    {
+        testEmptyBucketedTable(session, storageFormat, true, true);
+        testEmptyBucketedTable(session, storageFormat, true, false);
+        testEmptyBucketedTable(session, storageFormat, false, true);
+        testEmptyBucketedTable(session, storageFormat, false, false);
+    }
+
+    private void testEmptyBucketedTable(Session session, HiveStorageFormat storageFormat, boolean optimizedPartitionUpdateSerializationEnabled, boolean createEmpty)
+    {
+        String tableName = "test_empty_bucketed_table";
+
+        @Language("SQL") String createTable = "" +
+                "CREATE TABLE " + tableName + " " +
+                "(bucket_key VARCHAR, col_1 VARCHAR, col2 VARCHAR) " +
+                "WITH (" +
+                "format = '" + storageFormat + "', " +
+                "bucketed_by = ARRAY[ 'bucket_key' ], " +
+                "bucket_count = 11 " +
+                ") ";
+
+        assertUpdate(createTable);
+
+        TableMetadata tableMetadata = getTableMetadata(catalog, TPCH_SCHEMA, tableName);
+        assertEquals(tableMetadata.getMetadata().getProperties().get(STORAGE_FORMAT_PROPERTY), storageFormat);
+
+        assertNull(tableMetadata.getMetadata().getProperties().get(PARTITIONED_BY_PROPERTY));
+        assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKETED_BY_PROPERTY), ImmutableList.of("bucket_key"));
+        assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKET_COUNT_PROPERTY), 11);
+
+        assertEquals(computeActual("SELECT * from " + tableName).getRowCount(), 0);
+
+        // make sure that we will get one file per bucket regardless of writer count configured
+        Session parallelWriter = Session.builder(getTableWriteTestingSession(optimizedPartitionUpdateSerializationEnabled))
+                .setCatalogSessionProperty(catalog, "create_empty_bucket_files", String.valueOf(createEmpty))
+                .build();
+        assertUpdate(parallelWriter, "INSERT INTO " + tableName + " VALUES ('a0', 'b0', 'c0')", 1);
+        assertUpdate(parallelWriter, "INSERT INTO " + tableName + " VALUES ('a1', 'b1', 'c1')", 1);
+
+        assertQuery("SELECT * from " + tableName, "VALUES ('a0', 'b0', 'c0'), ('a1', 'b1', 'c1')");
+
+        assertUpdate(session, "DROP TABLE " + tableName);
+        assertFalse(getQueryRunner().tableExists(session, tableName));
+    }
+
+    @Test
+    public void testBucketedTable()
+    {
+        // go through all storage formats to make sure the empty buckets are correctly created
+        testWithAllStorageFormats(this::testBucketedTable);
+    }
+
+    private void testBucketedTable(Session session, HiveStorageFormat storageFormat)
+    {
+        testBucketedTable(session, storageFormat, true, true);
+        testBucketedTable(session, storageFormat, true, false);
+        testBucketedTable(session, storageFormat, false, true);
+        testBucketedTable(session, storageFormat, false, false);
+    }
+
+    private void testBucketedTable(Session session, HiveStorageFormat storageFormat, boolean optimizedPartitionUpdateSerializationEnabled, boolean createEmpty)
+    {
+        String tableName = "test_bucketed_table";
+
+        @Language("SQL") String createTable = "" +
+                "CREATE TABLE " + tableName + " " +
+                "WITH (" +
+                "format = '" + storageFormat + "', " +
+                "bucketed_by = ARRAY[ 'bucket_key' ], " +
+                "bucket_count = 11 " +
+                ") " +
+                "AS " +
+                "SELECT * " +
+                "FROM (" +
+                "VALUES " +
+                "  (VARCHAR 'a', VARCHAR 'b', VARCHAR 'c'), " +
+                "  ('aa', 'bb', 'cc'), " +
+                "  ('aaa', 'bbb', 'ccc')" +
+                ") t (bucket_key, col_1, col_2)";
+
+        // make sure that we will get one file per bucket regardless of writer count configured
+        Session parallelWriter = Session.builder(getTableWriteTestingSession(optimizedPartitionUpdateSerializationEnabled))
+                .setCatalogSessionProperty(catalog, "create_empty_bucket_files", String.valueOf(createEmpty))
+                .build();
+        assertUpdate(parallelWriter, createTable, 3);
+
+        TableMetadata tableMetadata = getTableMetadata(catalog, TPCH_SCHEMA, tableName);
+        assertEquals(tableMetadata.getMetadata().getProperties().get(STORAGE_FORMAT_PROPERTY), storageFormat);
+
+        assertNull(tableMetadata.getMetadata().getProperties().get(PARTITIONED_BY_PROPERTY));
+        assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKETED_BY_PROPERTY), ImmutableList.of("bucket_key"));
+        assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKET_COUNT_PROPERTY), 11);
+
+        assertQuery("SELECT * from " + tableName, "VALUES ('a', 'b', 'c'), ('aa', 'bb', 'cc'), ('aaa', 'bbb', 'ccc')");
+
+        assertUpdate(parallelWriter, "INSERT INTO " + tableName + " VALUES ('a0', 'b0', 'c0')", 1);
+        assertUpdate(parallelWriter, "INSERT INTO " + tableName + " VALUES ('a1', 'b1', 'c1')", 1);
+
+        assertQuery("SELECT * from " + tableName, "VALUES ('a', 'b', 'c'), ('aa', 'bb', 'cc'), ('aaa', 'bbb', 'ccc'), ('a0', 'b0', 'c0'), ('a1', 'b1', 'c1')");
+
+        assertUpdate(session, "DROP TABLE " + tableName);
+        assertFalse(getQueryRunner().tableExists(session, tableName));
+    }
+
     @Test
     public void testCreatePartitionedBucketedTableAsFewRows()
     {

@@ -156,6 +156,11 @@
             <version>${project.version}</version>
             <scope>test</scope>
         </dependency>
+        <dependency>
+            <groupId>io.prestodb.tempto</groupId>
+            <artifactId>tempto-core</artifactId>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
 
     <build>

@@ -0,0 +1,86 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.nativetests;
+
+import com.facebook.presto.testing.QueryRunner;
+import com.facebook.presto.tests.AbstractTestQueryFramework;
+import org.intellij.lang.annotations.Language;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import static io.prestodb.tempto.fulfillment.table.hive.tpch.TpchTableDefinitions.NATION;
+import static java.lang.Boolean.parseBoolean;
+import static org.testng.Assert.assertEquals;
+
+public class TestHivePartitionedInsertNative
+        extends AbstractTestQueryFramework
+{
+    private String storageFormat;
+    private boolean sidecarEnabled;
+    QueryRunner queryRunner;
+
+    @BeforeClass
+    @Override
+    public void init() throws Exception
+    {
+        storageFormat = System.getProperty("storageFormat", "PARQUET");
+        sidecarEnabled = parseBoolean(System.getProperty("sidecarEnabled", "true"));
+        super.init();
+    }
+
+    @Override
+    protected QueryRunner createQueryRunner() throws Exception
+    {
+        queryRunner = NativeTestsUtils.createNativeQueryRunner(storageFormat, sidecarEnabled);
+        return queryRunner;
+    }
+
+    @Override
+    protected void createTables()
+    {
+        NativeTestsUtils.createTables(storageFormat);
+    }
+
+
+@Test
+    public void testInsertIntoBucketedTables()
+    {
+        String tableName = "hive.tpch.bucketed_nation";
+
+//        // Clean up previous run
+        queryRunner.execute("DROP TABLE IF EXISTS " + tableName);
+//
+//        // Create the bucketed table
+        @Language("SQL") String createTableSql = "CREATE TABLE " + tableName + " (\n" +
+                "    n_nationkey BIGINT,\n" +
+                "    n_name VARCHAR,\n" +
+                "    n_regionkey BIGINT,\n" +
+                "    n_comment VARCHAR\n" +
+                ")\n" +
+                "WITH (\n" +
+                "    format = 'PARQUET',\n" +
+                "    bucketed_by = ARRAY['n_regionkey'],\n" +
+                "    bucket_count = 2\n" +
+                ")";
+        queryRunner.execute(createTableSql);
+
+        // Insert data twice
+        queryRunner.execute("INSERT INTO "+ tableName + " SELECT * FROM "+NATION.getName());
+        queryRunner.execute("INSERT INTO "+ tableName + " SELECT * FROM "+NATION.getName());
+        // Validate total row count
+        assertEquals(queryRunner.execute(queryRunner.getDefaultSession(), "SELECT count(*) FROM " + tableName).toString(), "MaterializedResult{rows=[[50]], types=[bigint], setSessionProperties={}, resetSessionProperties=[]}");
+        // Validate filtered row count
+        assertEquals(queryRunner.execute(queryRunner.getDefaultSession(), "SELECT count(*) FROM " + tableName + " WHERE n_regionkey = 0").toString(), "MaterializedResult{rows=[[10]], types=[bigint], setSessionProperties={}, resetSessionProperties=[]}");
+    }
+}
@@ -278,13 +278,18 @@ public void testInsertBucketed()
             assertThat(statisticsAfterCreate.getNumRows().getAsLong()).isEqualTo(25);
             assertThat(statisticsAfterCreate.getNumFiles().getAsLong()).isEqualTo(50);
 
-            // Insert into bucketed unpartitioned table is unsupported
-            assertThatThrownBy(() -> insertNationData(onPresto(), tableName))
-                    .hasMessageContaining("Cannot insert into bucketed unpartitioned Hive table");
+            insertNationData(onPresto(), tableName);
 
             BasicStatistics statisticsAfterInsert = getBasicStatisticsForTable(onHive(), tableName);
-            assertThat(statisticsAfterInsert.getNumRows().getAsLong()).isEqualTo(25);
-            assertThat(statisticsAfterCreate.getNumFiles().getAsLong()).isEqualTo(50);
+
+            assertThat(statisticsAfterInsert.getNumRows().getAsLong()).isEqualTo(50);
+            assertThat(statisticsAfterInsert.getNumFiles().getAsLong()).isEqualTo(100);
+
+            insertNationData(onPresto(), tableName);
+
+            BasicStatistics statisticsAfterInsert2 = getBasicStatisticsForTable(onHive(), tableName);
+            assertThat(statisticsAfterInsert2.getNumRows().getAsLong()).isEqualTo(75);
+            assertThat(statisticsAfterInsert2.getNumFiles().getAsLong()).isEqualTo(150);
         }
         finally {
             onPresto().executeQuery(format("DROP TABLE IF EXISTS %s", tableName));

@@ -44,35 +44,35 @@ public class TestHiveBucketedTables
         implements RequirementsProvider
 {
     @TableDefinitionsRepository.RepositoryTableDefinition
-    public static final HiveTableDefinition BUCKETED_PARTITIONED_NATION = HiveTableDefinition.builder("bucket_partition_nation")
-            .setCreateTableDDLTemplate("CREATE TABLE %NAME%(" +
-                    "n_nationkey     BIGINT," +
-                    "n_name          STRING," +
-                    "n_regionkey     BIGINT," +
-                    "n_comment       STRING) " +
-                    "PARTITIONED BY (part_key STRING) " +
-                    "CLUSTERED BY (n_regionkey) " +
-                    "INTO 2 BUCKETS " +
-                    "ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'")
-            .setNoData()
-            .build();
+    public static final HiveTableDefinition BUCKETED_NATION = bucketTableDefinition("bucket_nation", false, true);
 
     @TableDefinitionsRepository.RepositoryTableDefinition
-    public static final HiveTableDefinition PARTITIONED_NATION = HiveTableDefinition.builder("partitioned_nation")
-            .setCreateTableDDLTemplate("CREATE TABLE %NAME%(" +
-                    "n_nationkey     BIGINT," +
-                    "n_name          STRING," +
-                    "n_regionkey     BIGINT," +
-                    "n_comment       STRING) " +
-                    "PARTITIONED BY (part_key STRING) " +
-                    "ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'")
-            .setNoData()
-            .build();
+    public static final HiveTableDefinition BUCKETED_PARTITIONED_NATION = bucketTableDefinition("bucket_partitioned_nation", true, true);
+
+    @TableDefinitionsRepository.RepositoryTableDefinition
+    public static final HiveTableDefinition PARTITIONED_NATION = bucketTableDefinition("partitioned_nation", true, false);
+
+    private static HiveTableDefinition bucketTableDefinition(String tableName, boolean partitioned, boolean bucketed)
+    {
+        return HiveTableDefinition.builder(tableName)
+                .setCreateTableDDLTemplate("CREATE TABLE %NAME%(" +
+                        "n_nationkey     BIGINT," +
+                        "n_name          STRING," +
+                        "n_regionkey     BIGINT," +
+                        "n_comment       STRING) " +
+                        (partitioned ? "PARTITIONED BY (part_key STRING) " : " ") +
+                        "CLUSTERED BY (n_regionkey) " +
+                        (bucketed ? "INTO 2 BUCKETS " : " ") +
+                        "ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'")
+                .setNoData()
+                .build();
+    }
 
     @Override
     public Requirement getRequirements(Configuration configuration)
     {
         return Requirements.compose(
+                MutableTableRequirement.builder(BUCKETED_NATION).withState(CREATED).build(),
                 MutableTableRequirement.builder(BUCKETED_PARTITIONED_NATION).withState(CREATED).build(),
                 immutableTable(NATION));
     }
@@ -167,4 +167,17 @@ private static void disableBucketedExecution()
             throw new RuntimeException(e);
         }
     }
+
+    @Test
+    public void testInsertIntoBucketedTables()
+    {
+        String tableName = mutableTablesState().get(BUCKETED_NATION).getNameInDatabase();
+
+        query(format("INSERT INTO %s SELECT * FROM %s", tableName, NATION.getName()));
+        // make sure that insert will not overwrite existing data
+        query(format("INSERT INTO %s SELECT * FROM %s", tableName, NATION.getName()));
+
+        assertThat(query(format("SELECT count(*) FROM %s", tableName))).containsExactly(row(50));
+        assertThat(query(format("SELECT count(*) FROM %s WHERE n_regionkey=0", tableName))).containsExactly(row(10));
+    }
 }