[KYUUBI #7369] KSHC support the PURGE TABLE

maomaodev · pan3793 · commit c237522916e4 · 2026-03-30T12:52:28.000+08:00
### Why are the changes needed? 1. Support `DROP TABLE ... PURGE` syntax in KSHC, so users can explicitly purge table data. 2. Add a compatibility configuration `spark.sql.kyuubi.hive.connector.dropTableAsPurgeTable` (default: false) in KSHC. When set to true, `DROP TABLE` behaves like `PURGE`— data is removed immediately and HDFS Trash is bypassed (preserving the previous behavior). When set to false, `DROP TABLE` will move table data to the HDFS Trash instead. ### How was this patch tested? 1. unit test 4. Manually test ### Was this patch authored or co-authored using generative AI tooling? no Closes #7370 from maomaodev/kyuubi-7369. Closes #7369 db989d1 [lifumao] Revert "change code" cbb70e0 [lifumao] change code 7ad375f [lifumao] change style code fd2c6ae [lifumao] change code 18a7207 [lifumao] add migration guide 971f000 [lifumao] KSHC support the PURGE TABLE f6ef135 [lifumao] KSHC support the PURGE TABLE Authored-by: lifumao <lifumao@tencent.com> Signed-off-by: Cheng Pan <chengpan@apache.org>
diff --git a/docs/deployment/migration-guide.md b/docs/deployment/migration-guide.md
@@ -21,6 +21,7 @@
 
 * Since Kyuubi 1.12, the support of variable `<KYUUBI_HOME>` substitution in config `kyuubi.metadata.store.jdbc.url` is deprecated, use `{{KYUUBI_HOME}}` instead.
 * Since Kyuubi 1.12, default value of `kyuubi.metrics.json.location` is changed to `{{KYUUBI_HOME}}/metrics`, to restore previous behavior, change it to `{{KYUUBI_WORK_DIR_ROOT}}/metrics`.
+* Since Kyuubi 1.12, the configuration `spark.sql.kyuubi.hive.connector.dropTableAsPurgeTable` is introduced by Kyuubi Spark Hive connector(KSHC) to control whether DROP TABLE command completely remove its data by skipping HDFS trash. The default value is false. To restore the legacy behavior, set it to true.
 
 ## Upgrading from Kyuubi 1.10 to 1.11
 
diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/HiveTableCatalog.scala b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/HiveTableCatalog.scala
@@ -48,6 +48,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 import org.apache.kyuubi.spark.connector.hive.HiveConnectorUtils.withSparkSQLConf
 import org.apache.kyuubi.spark.connector.hive.HiveTableCatalog.{getStorageFormatAndProvider, toCatalogDatabase, CatalogDatabaseHelper, IdentifierHelper, NamespaceHelper}
+import org.apache.kyuubi.spark.connector.hive.KyuubiHiveConnectorConf.DROP_TABLE_AS_PURGE_TABLE
 import org.apache.kyuubi.spark.connector.hive.KyuubiHiveConnectorDelegationTokenProvider.metastoreTokenSignature
 import org.apache.kyuubi.util.reflect.{DynClasses, DynConstructors}
 
@@ -392,14 +393,23 @@ class HiveTableCatalog(sparkSession: SparkSession)
       loadTable(ident)
     }
 
-  override def dropTable(ident: Identifier): Boolean =
+  override def purgeTable(ident: Identifier): Boolean = {
+    dropTableInternal(ident, purge = true)
+  }
+
+  override def dropTable(ident: Identifier): Boolean = {
+    val purge = sessionState.conf.getConf(DROP_TABLE_AS_PURGE_TABLE)
+    dropTableInternal(ident, purge)
+  }
+
+  private def dropTableInternal(ident: Identifier, purge: Boolean): Boolean =
     withSparkSQLConf(LEGACY_NON_IDENTIFIER_OUTPUT_CATALOG_NAME -> "true") {
       try {
         if (loadTable(ident) != null) {
           catalog.dropTable(
             ident.asTableIdentifier,
             ignoreIfNotExists = true,
-            purge = true /* skip HDFS trash */ )
+            purge /* whether to skip HDFS trash */ )
           true
         } else {
           false
diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/KyuubiHiveConnectorConf.scala b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/KyuubiHiveConnectorConf.scala
@@ -57,4 +57,12 @@ object KyuubiHiveConnectorConf {
       .version("1.11.0")
       .booleanConf
       .createWithDefault(true)
+
+  val DROP_TABLE_AS_PURGE_TABLE =
+    buildConf("spark.sql.kyuubi.hive.connector.dropTableAsPurgeTable")
+      .doc("When enabled, the DROP TABLE command will completely remove its data " +
+        "by skipping HDFS trash, equivalent to the PURGE TABLE command.")
+      .version("1.12.0")
+      .booleanConf
+      .createWithDefault(false)
 }
diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveCatalogSuite.scala b/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveCatalogSuite.scala
@@ -221,6 +221,18 @@ class HiveCatalogSuite extends KyuubiHiveTest {
     assert(!catalog.tableExists(testIdent))
   }
 
+  test("purgeTable") {
+    assert(!catalog.tableExists(testIdent))
+
+    catalog.createTable(testIdent, schema, Array.empty[Transform], emptyProps)
+
+    assert(catalog.tableExists(testIdent))
+
+    catalog.purgeTable(testIdent)
+
+    assert(!catalog.tableExists(testIdent))
+  }
+
   test("createTable: location") {
     val properties = new util.HashMap[String, String]()
     properties.put(TableCatalog.PROP_PROVIDER, "parquet")