Skip to content

Commit c237522

Browse files
maomaodevpan3793
authored andcommitted
[KYUUBI #7369] KSHC support the PURGE TABLE
### Why are the changes needed? 1. Support `DROP TABLE ... PURGE` syntax in KSHC, so users can explicitly purge table data. 2. Add a compatibility configuration `spark.sql.kyuubi.hive.connector.dropTableAsPurgeTable` (default: false) in KSHC. When set to true, `DROP TABLE` behaves like `PURGE`— data is removed immediately and HDFS Trash is bypassed (preserving the previous behavior). When set to false, `DROP TABLE` will move table data to the HDFS Trash instead. ### How was this patch tested? 1. unit test 4. Manually test ### Was this patch authored or co-authored using generative AI tooling? no Closes #7370 from maomaodev/kyuubi-7369. Closes #7369 db989d1 [lifumao] Revert "change code" cbb70e0 [lifumao] change code 7ad375f [lifumao] change style code fd2c6ae [lifumao] change code 18a7207 [lifumao] add migration guide 971f000 [lifumao] KSHC support the PURGE TABLE f6ef135 [lifumao] KSHC support the PURGE TABLE Authored-by: lifumao <lifumao@tencent.com> Signed-off-by: Cheng Pan <chengpan@apache.org>
1 parent e0e822d commit c237522

4 files changed

Lines changed: 33 additions & 2 deletions

File tree

docs/deployment/migration-guide.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
* Since Kyuubi 1.12, the support of variable `<KYUUBI_HOME>` substitution in config `kyuubi.metadata.store.jdbc.url` is deprecated, use `{{KYUUBI_HOME}}` instead.
2323
* Since Kyuubi 1.12, default value of `kyuubi.metrics.json.location` is changed to `{{KYUUBI_HOME}}/metrics`, to restore previous behavior, change it to `{{KYUUBI_WORK_DIR_ROOT}}/metrics`.
24+
* Since Kyuubi 1.12, the configuration `spark.sql.kyuubi.hive.connector.dropTableAsPurgeTable` is introduced by Kyuubi Spark Hive connector(KSHC) to control whether DROP TABLE command completely remove its data by skipping HDFS trash. The default value is false. To restore the legacy behavior, set it to true.
2425

2526
## Upgrading from Kyuubi 1.10 to 1.11
2627

extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/HiveTableCatalog.scala

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
4848

4949
import org.apache.kyuubi.spark.connector.hive.HiveConnectorUtils.withSparkSQLConf
5050
import org.apache.kyuubi.spark.connector.hive.HiveTableCatalog.{getStorageFormatAndProvider, toCatalogDatabase, CatalogDatabaseHelper, IdentifierHelper, NamespaceHelper}
51+
import org.apache.kyuubi.spark.connector.hive.KyuubiHiveConnectorConf.DROP_TABLE_AS_PURGE_TABLE
5152
import org.apache.kyuubi.spark.connector.hive.KyuubiHiveConnectorDelegationTokenProvider.metastoreTokenSignature
5253
import org.apache.kyuubi.util.reflect.{DynClasses, DynConstructors}
5354

@@ -392,14 +393,23 @@ class HiveTableCatalog(sparkSession: SparkSession)
392393
loadTable(ident)
393394
}
394395

395-
override def dropTable(ident: Identifier): Boolean =
396+
override def purgeTable(ident: Identifier): Boolean = {
397+
dropTableInternal(ident, purge = true)
398+
}
399+
400+
override def dropTable(ident: Identifier): Boolean = {
401+
val purge = sessionState.conf.getConf(DROP_TABLE_AS_PURGE_TABLE)
402+
dropTableInternal(ident, purge)
403+
}
404+
405+
private def dropTableInternal(ident: Identifier, purge: Boolean): Boolean =
396406
withSparkSQLConf(LEGACY_NON_IDENTIFIER_OUTPUT_CATALOG_NAME -> "true") {
397407
try {
398408
if (loadTable(ident) != null) {
399409
catalog.dropTable(
400410
ident.asTableIdentifier,
401411
ignoreIfNotExists = true,
402-
purge = true /* skip HDFS trash */ )
412+
purge /* whether to skip HDFS trash */ )
403413
true
404414
} else {
405415
false

extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/KyuubiHiveConnectorConf.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,12 @@ object KyuubiHiveConnectorConf {
5757
.version("1.11.0")
5858
.booleanConf
5959
.createWithDefault(true)
60+
61+
val DROP_TABLE_AS_PURGE_TABLE =
62+
buildConf("spark.sql.kyuubi.hive.connector.dropTableAsPurgeTable")
63+
.doc("When enabled, the DROP TABLE command will completely remove its data " +
64+
"by skipping HDFS trash, equivalent to the PURGE TABLE command.")
65+
.version("1.12.0")
66+
.booleanConf
67+
.createWithDefault(false)
6068
}

extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveCatalogSuite.scala

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,18 @@ class HiveCatalogSuite extends KyuubiHiveTest {
221221
assert(!catalog.tableExists(testIdent))
222222
}
223223

224+
test("purgeTable") {
225+
assert(!catalog.tableExists(testIdent))
226+
227+
catalog.createTable(testIdent, schema, Array.empty[Transform], emptyProps)
228+
229+
assert(catalog.tableExists(testIdent))
230+
231+
catalog.purgeTable(testIdent)
232+
233+
assert(!catalog.tableExists(testIdent))
234+
}
235+
224236
test("createTable: location") {
225237
val properties = new util.HashMap[String, String]()
226238
properties.put(TableCatalog.PROP_PROVIDER, "parquet")

0 commit comments

Comments
 (0)