Skip to content

Commit b18fe40

Browse files
tdascloud-fan
andauthored
[3.2] DeltaCatalog.createTable should respect PROP_IS_MANAGED_LOCATION (#3663)
#### Which Delta project/connector is this regarding? <!-- Please add the component selected below to the beginning of the pull request title For example: [Spark] Title of my pull request --> - [x] Spark - [ ] Standalone - [ ] Flink - [ ] Kernel - [ ] Other (fill in here) ## Description <!-- - Describe what this PR changes. - Describe why we need the change. If this PR resolves an issue be sure to include "Resolves #XXX" to correctly link and close the issue upon merge. --> Even if a table has the location field, it should still be a managed table if `PROP_IS_MANAGED_LOCATION` is present in the table properties. Note: this case won't happen with Spark integration solely. It's only an issue for third-party catalogs that delegate requests to `DeltaCatalog`, such as Unity Catalog. ## How was this patch tested? <!-- If tests were added, say they were added here. Please make sure to test the changes thoroughly including negative and positive cases if possible. If the changes were tested in any way other than unit tests, please clarify how you tested step by step (ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future). If the changes were not tested, please explain why. --> new test ## Does this PR introduce _any_ user-facing changes? <!-- If yes, please clarify the previous behavior and the change this PR proposes - provide the console output, description and/or an example to show the behavior difference if possible. If possible, please also clarify if this is a user-facing change compared to the released Delta Lake versions or within the unreleased branches such as master. If no, write 'No'. --> no Co-authored-by: Wenchen Fan <[email protected]>
1 parent 8fef464 commit b18fe40

File tree

2 files changed

+42
-5
lines changed

2 files changed

+42
-5
lines changed

spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaCatalog.scala

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,19 @@ class DeltaCatalog extends DelegatingCatalogExtension
146146
.getOrElse(spark.sessionState.catalog.defaultTablePath(id))
147147
val storage = DataSource.buildStorageFormatFromOptions(writeOptions)
148148
.copy(locationUri = Option(loc))
149-
val tableType =
150-
if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
149+
// PROP_IS_MANAGED_LOCATION indicates that the table location is not user-specified but
150+
// system-generated. The table should be created as managed table in this case.
151+
val isManagedLocation = Option(allTableProperties.get(TableCatalog.PROP_IS_MANAGED_LOCATION))
152+
.exists(_.equalsIgnoreCase("true"))
153+
// Note: Spark generates the table location for managed tables in
154+
// `DeltaCatalog#delegate#createTable`, so `isManagedLocation` should never be true if
155+
// Unity Catalog is not involved. For safety we also check `isUnityCatalog` here.
156+
val respectManagedLoc = isUnityCatalog || org.apache.spark.util.Utils.isTesting
157+
val tableType = if (location.isEmpty || (isManagedLocation && respectManagedLoc)) {
158+
CatalogTableType.MANAGED
159+
} else {
160+
CatalogTableType.EXTERNAL
161+
}
151162
val commentOpt = Option(allTableProperties.get("comment"))
152163

153164

spark/src/test/scala/org/apache/spark/sql/delta/CustomCatalogSuite.scala

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,22 @@ class CustomCatalogSuite extends QueryTest with SharedSparkSession
296296
}
297297
}
298298
}
299+
300+
test("custom catalog that generates location for managed tables") {
301+
// Reset catalog manager so that the new `spark_catalog` implementation can apply.
302+
spark.sessionState.catalogManager.reset()
303+
withSQLConf("spark.sql.catalog.spark_catalog" -> classOf[DummySessionCatalog].getName) {
304+
withTable("t") {
305+
withTempPath { path =>
306+
sql(s"CREATE TABLE t (id LONG) USING delta TBLPROPERTIES (fakeLoc='$path')")
307+
val t = spark.sessionState.catalogManager.v2SessionCatalog.asInstanceOf[TableCatalog]
308+
.loadTable(Identifier.of(Array("default"), "t"))
309+
// It should be a managed table.
310+
assert(!t.properties().containsKey(TableCatalog.PROP_EXTERNAL))
311+
}
312+
}
313+
}
314+
}
299315
}
300316

301317
class DummyCatalog extends TableCatalog {
@@ -396,9 +412,10 @@ class DummySessionCatalogInner extends DelegatingCatalogExtension {
396412
}
397413

398414
// A dummy catalog that adds a layer between DeltaCatalog and the Spark SessionCatalog,
399-
// to attach additional table storage properties after the table is loaded.
415+
// to attach additional table storage properties after the table is loaded, and generates location
416+
// for managed tables.
400417
class DummySessionCatalog extends TableCatalog {
401-
private var deltaCatalog: DelegatingCatalogExtension = null
418+
private var deltaCatalog: DeltaCatalog = null
402419

403420
override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {
404421
val inner = new DummySessionCatalogInner()
@@ -421,7 +438,16 @@ class DummySessionCatalog extends TableCatalog {
421438
schema: StructType,
422439
partitions: Array[Transform],
423440
properties: java.util.Map[String, String]): Table = {
424-
deltaCatalog.createTable(ident, schema, partitions, properties)
441+
if (!properties.containsKey(TableCatalog.PROP_EXTERNAL) &&
442+
!properties.containsKey(TableCatalog.PROP_LOCATION)) {
443+
val newProps = new java.util.HashMap[String, String]
444+
newProps.putAll(properties)
445+
newProps.put(TableCatalog.PROP_LOCATION, properties.get("fakeLoc"))
446+
newProps.put(TableCatalog.PROP_IS_MANAGED_LOCATION, "true")
447+
deltaCatalog.createTable(ident, schema, partitions, newProps)
448+
} else {
449+
deltaCatalog.createTable(ident, schema, partitions, properties)
450+
}
425451
}
426452

427453
override def alterTable(ident: Identifier, changes: TableChange*): Table = {

0 commit comments

Comments
 (0)