Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -4657,6 +4657,12 @@
},
"sqlState" : "42823"
},
"INVALID_TABLE_STATS_VALUE" : {
"message" : [
"The value <value> for table statistics property <key> is not a valid numeric value."
],
"sqlState" : "22023"
},
"INVALID_TEMP_OBJ_QUALIFIER" : {
"message" : [
"Temporary <objectType> <objectName> cannot be qualified with <qualifier>. Temporary objects can only be qualified with SESSION or SYSTEM.SESSION."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1203,6 +1203,16 @@ trait CheckAnalysis extends LookupCatalog with QueryErrorsBase with PlanToString
}
case _ =>
}
// SPARK-47444: Validate that stats-related properties have numeric values.
case SetTableProperties(_, properties) =>
val numericStatsKeys = Set("numRows", "totalSize", "rawDataSize")
properties.foreach { case (key, value) =>
if (numericStatsKeys.contains(key) && scala.util.Try(BigInt(value)).isFailure) {
throw new AnalysisException(
errorClass = "INVALID_TABLE_STATS_VALUE",
messageParameters = Map("key" -> toSQLId(key), "value" -> s"'$value'"))
}
}
case _ =>
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import org.apache.hadoop.mapred.{FileInputFormat, JobConf}

import org.apache.spark.internal.{Logging, LogKeys}
import org.apache.spark.internal.config.RDD_PARALLEL_LISTING_THRESHOLD
import org.apache.spark.sql.{Row, SaveMode, SparkSession}
import org.apache.spark.sql.{AnalysisException, Row, SaveMode, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, Resolver}
import org.apache.spark.sql.catalyst.catalog._
Expand Down Expand Up @@ -321,6 +321,15 @@ case class AlterTableSetPropertiesCommand(
override def run(sparkSession: SparkSession): Seq[Row] = {
val catalog = sparkSession.sessionState.catalog
val table = catalog.getTableRawMetadata(tableName)
// SPARK-47444: Validate that stats-related properties have numeric values.
val numericStatsKeys = Set("numRows", "totalSize", "rawDataSize")
properties.foreach { case (key, value) =>
if (numericStatsKeys.contains(key) && scala.util.Try(BigInt(value)).isFailure) {
throw new AnalysisException(
errorClass = "INVALID_TABLE_STATS_VALUE",
messageParameters = Map("key" -> toSQLId(key), "value" -> s"'$value'"))
}
}
// This overrides old properties and update the comment parameter of CatalogTable
// with the newly added/modified comment since CatalogTable also holds comment as its
// direct property.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,29 @@ trait AlterTableSetTblPropertiesSuiteBase extends QueryTest with DDLCommandTestU
}
}
}

test("SPARK-47444: reject non-numeric values for table stats properties") {
withNamespaceAndTable("ns", "tbl") { t =>
sql(s"CREATE TABLE $t (col1 int) $defaultUsing")
Seq("numRows", "totalSize", "rawDataSize").foreach { key =>
checkError(
exception = intercept[AnalysisException] {
sql(s"ALTER TABLE $t SET TBLPROPERTIES ('$key'='')")
},
condition = "INVALID_TABLE_STATS_VALUE",
parameters = Map("key" -> s"`$key`", "value" -> "''")
)
checkError(
exception = intercept[AnalysisException] {
sql(s"ALTER TABLE $t SET TBLPROPERTIES ('$key'='abc')")
},
condition = "INVALID_TABLE_STATS_VALUE",
parameters = Map("key" -> s"`$key`", "value" -> "'abc'")
)
}
// Valid numeric values should succeed
sql(s"ALTER TABLE $t SET TBLPROPERTIES ('numRows'='100', 'totalSize'='5000')")
}
}

}