apache
diff --git a/‎benchmarks/tpc/engines/comet-hashjoin.toml‎
Lines changed: 0 additions & 1 deletion b/‎benchmarks/tpc/engines/comet-hashjoin.toml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎benchmarks/tpc/engines/comet.toml‎
Lines changed: 0 additions & 1 deletion b/‎benchmarks/tpc/engines/comet.toml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎spark/src/main/scala/org/apache/comet/CometConf.scala‎
Lines changed: 0 additions & 19 deletions b/‎spark/src/main/scala/org/apache/comet/CometConf.scala‎
Lines changed: 0 additions & 19 deletions
diff --git a/‎spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala‎
Lines changed: 1 addition & 1 deletion b/‎spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala‎
Lines changed: 16 additions & 26 deletions b/‎spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala‎
Lines changed: 16 additions & 26 deletions
diff --git a/‎spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala‎
Lines changed: 11 additions & 30 deletions b/‎spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala‎
Lines changed: 11 additions & 30 deletions
diff --git a/‎spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala‎
Lines changed: 10 additions & 12 deletions b/‎spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala‎
Lines changed: 10 additions & 12 deletions
diff --git a/‎spark/src/test/scala/org/apache/comet/CometCastSuite.scala‎
Lines changed: 2 additions & 7 deletions b/‎spark/src/test/scala/org/apache/comet/CometCastSuite.scala‎
Lines changed: 2 additions & 7 deletions
diff --git a/‎spark/src/test/scala/org/apache/comet/CometCsvExpressionSuite.scala‎
Lines changed: 1 addition & 3 deletions b/‎spark/src/test/scala/org/apache/comet/CometCsvExpressionSuite.scala‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala‎
Lines changed: 0 additions & 15 deletions b/‎spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala‎
Lines changed: 0 additions & 15 deletions
@@ -30,6 +30,5 @@ driver_class_path = ["$COMET_JAR"]
 "spark.executor.extraClassPath" = "$COMET_JAR"
 "spark.plugins" = "org.apache.spark.CometPlugin"
 "spark.shuffle.manager" = "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager"
-"spark.comet.scan.impl" = "native_datafusion"
 "spark.comet.exec.replaceSortMergeJoin" = "true"
 "spark.comet.expression.Cast.allowIncompatible" = "true"
@@ -30,5 +30,4 @@ driver_class_path = ["$COMET_JAR"]
 "spark.executor.extraClassPath" = "$COMET_JAR"
 "spark.plugins" = "org.apache.spark.CometPlugin"
 "spark.shuffle.manager" = "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager"
-"spark.comet.scan.impl" = "native_datafusion"
 "spark.comet.expression.Cast.allowIncompatible" = "true"
@@ -111,25 +111,6 @@ object CometConf extends ShimCometConf {
       .booleanConf
       .createWithEnvVarOrDefault("ENABLE_COMET_WRITE", false)
 
-  @deprecated
-  val SCAN_NATIVE_DATAFUSION = "native_datafusion"
-
-  @deprecated
-  val SCAN_NATIVE_ICEBERG_COMPAT = "native_iceberg_compat"
-
-  @deprecated
-  val SCAN_AUTO = "auto"
-
-  @deprecated
-  val COMET_NATIVE_SCAN_IMPL: ConfigEntry[String] = conf("spark.comet.scan.impl")
-    .category(CATEGORY_TESTING)
-    .internal()
-    .doc("This configuration option is deprecated and has no effect on Comet behavior.")
-    .stringConf
-    .transform(_.toLowerCase(Locale.ROOT))
-    .checkValues(Set(SCAN_NATIVE_DATAFUSION, SCAN_AUTO))
-    .createWithEnvVarOrDefault("COMET_PARQUET_SCAN_IMPL", SCAN_AUTO)
-
   val COMET_ICEBERG_NATIVE_ENABLED: ConfigEntry[Boolean] =
     conf("spark.comet.scan.icebergNative.enabled")
       .category(CATEGORY_SCAN)
 
@@ -258,7 +258,7 @@ case class CometExecRule(session: SparkSession)
   private def transform(plan: SparkPlan): SparkPlan = {
     def convertNode(op: SparkPlan): SparkPlan = op match {
       // Fully native scan for V1
-      case scan: CometScanExec if scan.scanImpl == CometConf.SCAN_NATIVE_DATAFUSION =>
+      case scan: CometScanExec =>
         convertToComet(scan, CometNativeScan).getOrElse(scan)
 
       // Fully native Iceberg scan for V2 (iceberg-rust path)
 
@@ -197,9 +197,7 @@ case class CometScanRule(session: SparkSession)
       r: HadoopFsRelation,
       hadoopConf: Configuration): Option[SparkPlan] = {
     if (!COMET_EXEC_ENABLED.get()) {
-      withInfo(
-        scanExec,
-        s"$SCAN_NATIVE_DATAFUSION scan requires ${COMET_EXEC_ENABLED.key} to be enabled")
+      withInfo(scanExec, s"Native Parquet scan requires ${COMET_EXEC_ENABLED.key} to be enabled")
       return None
     }
     // Disabling the vectorized reader opts into parquet-mr's permissive behavior
@@ -210,7 +208,7 @@ case class CometScanRule(session: SparkSession)
       !COMET_SCAN_ALLOW_DISABLED_PARQUET_VECTORIZED_READER.get()) {
       withInfo(
         scanExec,
-        s"$SCAN_NATIVE_DATAFUSION scan is incompatible with " +
+        s"Native Parquet scan is incompatible with " +
           s"${SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key}=false; set " +
           s"${COMET_SCAN_ALLOW_DISABLED_PARQUET_VECTORIZED_READER.key}=true to opt in")
       return None
@@ -219,7 +217,7 @@ case class CometScanRule(session: SparkSession)
       return None
     }
     if (encryptionEnabled(hadoopConf) && !isEncryptionConfigSupported(hadoopConf)) {
-      withInfo(scanExec, s"$SCAN_NATIVE_DATAFUSION does not support encryption")
+      withInfo(scanExec, "Native Parquet scan does not support encryption")
       return None
     }
     if (scanExec.fileConstantMetadataColumns.nonEmpty) {
@@ -244,10 +242,10 @@ case class CometScanRule(session: SparkSession)
       withInfo(scanExec, "Native DataFusion scan does not support row index generation")
       return None
     }
-    if (!isSchemaSupported(scanExec, SCAN_NATIVE_DATAFUSION, r)) {
+    if (!isSchemaSupported(scanExec, r)) {
       return None
     }
-    Some(CometScanExec(scanExec, session, SCAN_NATIVE_DATAFUSION))
+    Some(CometScanExec(scanExec, session))
   }
 
   private def transformV2Scan(scanExec: BatchScanExec): SparkPlan = {
@@ -313,7 +311,7 @@ case class CometScanRule(session: SparkSession)
           return withInfos(scanExec, fallbackReasons.toSet)
         }
 
-        val typeChecker = CometScanTypeChecker(SCAN_NATIVE_DATAFUSION)
+        val typeChecker = CometScanTypeChecker()
         val schemaSupported =
           typeChecker.isSchemaSupported(scanExec.scan.readSchema(), fallbackReasons)
 
@@ -670,48 +668,40 @@ case class CometScanRule(session: SparkSession)
       case _ => false
     }
 
-  private def isSchemaSupported(
-      scanExec: FileSourceScanExec,
-      scanImpl: String,
-      r: HadoopFsRelation): Boolean = {
+  private def isSchemaSupported(scanExec: FileSourceScanExec, r: HadoopFsRelation): Boolean = {
     val fallbackReasons = new ListBuffer[String]()
-    val typeChecker = CometScanTypeChecker(scanImpl)
+    val typeChecker = CometScanTypeChecker()
     val schemaSupported =
       typeChecker.isSchemaSupported(scanExec.requiredSchema, fallbackReasons)
     if (!schemaSupported) {
       withInfo(
         scanExec,
-        s"Unsupported schema ${scanExec.requiredSchema} " +
-          s"for $scanImpl: ${fallbackReasons.mkString(", ")}")
+        s"Unsupported schema ${scanExec.requiredSchema}: ${fallbackReasons.mkString(", ")}")
       return false
     }
     val partitionSchemaSupported =
       typeChecker.isSchemaSupported(r.partitionSchema, fallbackReasons)
     if (!partitionSchemaSupported) {
       withInfo(
         scanExec,
-        s"Unsupported partitioning schema ${scanExec.requiredSchema} " +
-          s"for $scanImpl: ${fallbackReasons
-              .mkString(", ")}")
+        s"Unsupported partitioning schema ${scanExec.requiredSchema}: " +
+          fallbackReasons.mkString(", "))
       return false
     }
     true
   }
 }
 
-case class CometScanTypeChecker(scanImpl: String) extends DataTypeSupport with CometTypeShim {
-
-  // this class is intended to be used with a specific scan impl
-  assert(scanImpl != CometConf.SCAN_AUTO)
+case class CometScanTypeChecker() extends DataTypeSupport with CometTypeShim {
 
   override def isTypeSupported(
       dt: DataType,
       name: String,
       fallbackReasons: ListBuffer[String]): Boolean = {
     dt match {
       case ShortType if CometConf.COMET_PARQUET_UNSIGNED_SMALL_INT_CHECK.get() =>
-        fallbackReasons += s"$scanImpl scan may not handle unsigned UINT_8 correctly for $dt. " +
-          s"Set ${CometConf.COMET_PARQUET_UNSIGNED_SMALL_INT_CHECK.key}=false to allow " +
+        fallbackReasons += s"Native Parquet scan may not handle unsigned UINT_8 correctly for " +
+          s"$dt. Set ${CometConf.COMET_PARQUET_UNSIGNED_SMALL_INT_CHECK.key}=false to allow " +
           "native execution if your data does not contain unsigned small integers. " +
           CometConf.COMPAT_GUIDE
         false
@@ -722,9 +712,9 @@ case class CometScanTypeChecker(scanImpl: String) extends DataTypeSupport with C
       case s: StructType if isVariantStruct(s) =>
         // Spark 4.0's PushVariantIntoScan rewrites a VariantType column into a struct of typed
         // fields plus per-field VariantMetadata, expecting the scan to honor Parquet variant
-        // shredding semantics. Comet's native scans don't, so fall back to Spark.
+        // shredding semantics. Comet's native scan does not, so fall back to Spark.
         fallbackReasons +=
-          s"Unsupported $name of type VariantType (shredded; not supported by $scanImpl scan)"
+          s"Unsupported $name of type VariantType (shredded; not supported by native scan)"
         false
       case s: StructType if s.fields.isEmpty =>
         false
 
@@ -42,22 +42,15 @@ import org.apache.spark.sql.types._
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.collection._
 
-import org.apache.comet.{CometConf, MetricsSupport}
+import org.apache.comet.MetricsSupport
 import org.apache.comet.parquet.CometParquetFileFormat
 
 /**
- * Comet physical scan node for DataSource V1. Most of the code here follow Spark's
- * [[FileSourceScanExec]].
- *
- * This is a hybrid scan where the native plan will contain a `ScanExec` that reads batches of
- * data from the JVM via JNI. The ultimate source of data may be a JVM implementation such as
- * Spark readers, or could be the `native_iceberg_compat` native scan.
- *
- * Note that scanImpl can only be `native_datafusion` after CometScanRule runs and before
- * CometExecRule runs. It will never be set to `native_datafusion` at execution time
+ * Comet physical scan node for DataSource V1. Most of the code here follows Spark's
+ * [[FileSourceScanExec]]. After CometScanRule runs, this node is replaced by a fully native scan
+ * by CometExecRule; it does not survive to execution time.
  */
 case class CometScanExec(
-    scanImpl: String,
     @transient relation: HadoopFsRelation,
     output: Seq[Attribute],
     requiredSchema: StructType,
@@ -72,10 +65,8 @@ case class CometScanExec(
     with ShimCometScanExec
     with CometPlan {
 
-  assert(scanImpl != CometConf.SCAN_AUTO)
-
   override val nodeName: String =
-    s"CometScan [$scanImpl] $relation ${tableIdentifier.map(_.unquotedString).getOrElse("")}"
+    s"CometScan $relation ${tableIdentifier.map(_.unquotedString).getOrElse("")}"
 
   // FIXME: ideally we should reuse wrapped.supportsColumnar, however that fails many tests
   override lazy val supportsColumnar: Boolean =
@@ -154,18 +145,13 @@ case class CometScanExec(
   }
 
   /**
-   * Returns the data filters that are supported for this scan implementation. For
-   * native_datafusion scans, this excludes dynamic pruning filters (subqueries) and null checks
-   * on array columns (see [[isNullCheckOnArrayColumn]]).
+   * Returns the data filters that are supported for this scan. Excludes dynamic pruning filters
+   * (subqueries) and null checks on array columns (see [[isNullCheckOnArrayColumn]]).
    */
   lazy val supportedDataFilters: Seq[Expression] = {
-    if (scanImpl == CometConf.SCAN_NATIVE_DATAFUSION) {
-      dataFilters
-        .filterNot(isDynamicPruningFilter)
-        .filterNot(isNullCheckOnArrayColumn)
-    } else {
-      dataFilters
-    }
+    dataFilters
+      .filterNot(isDynamicPruningFilter)
+      .filterNot(isNullCheckOnArrayColumn)
   }
 
   /**
@@ -516,7 +502,6 @@ case class CometScanExec(
 
   override def doCanonicalize(): CometScanExec = {
     CometScanExec(
-      scanImpl,
       relation,
       output.map(QueryPlan.normalizeExpressions(_, output)),
       requiredSchema,
@@ -534,10 +519,7 @@ case class CometScanExec(
 
 object CometScanExec {
 
-  def apply(
-      scanExec: FileSourceScanExec,
-      session: SparkSession,
-      scanImpl: String): CometScanExec = {
+  def apply(scanExec: FileSourceScanExec, session: SparkSession): CometScanExec = {
     // TreeNode.mapProductIterator is protected method.
     def mapProductIterator[B: ClassTag](product: Product, f: Any => B): Array[B] = {
       val arr = Array.ofDim[B](product.productArity)
@@ -563,7 +545,6 @@ object CometScanExec {
     val newArgs = mapProductIterator(scanExec, transform)
     val wrapped = scanExec.makeCopy(newArgs).asInstanceOf[FileSourceScanExec]
     val batchScanExec = CometScanExec(
-      scanImpl,
       wrapped.relation,
       wrapped.output,
       wrapped.requiredSchema,
 
@@ -985,18 +985,16 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp
 
   test("size - respect to legacySizeOfNull") {
     val table = "t1"
-    withSQLConf(CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_DATAFUSION) {
-      withTable(table) {
-        sql(s"create table $table(col array<string>) using parquet")
-        sql(s"insert into $table values(null)")
-        withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "false") {
-          checkSparkAnswerAndOperator(sql(s"select size(col) from $table"))
-        }
-        withSQLConf(
-          SQLConf.LEGACY_SIZE_OF_NULL.key -> "true",
-          SQLConf.ANSI_ENABLED.key -> "false") {
-          checkSparkAnswerAndOperator(sql(s"select size(col) from $table"))
-        }
+    withTable(table) {
+      sql(s"create table $table(col array<string>) using parquet")
+      sql(s"insert into $table values(null)")
+      withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "false") {
+        checkSparkAnswerAndOperator(sql(s"select size(col) from $table"))
+      }
+      withSQLConf(
+        SQLConf.LEGACY_SIZE_OF_NULL.key -> "true",
+        SQLConf.ANSI_ENABLED.key -> "false") {
+        checkSparkAnswerAndOperator(sql(s"select size(col) from $table"))
       }
     }
   }
 
@@ -1540,13 +1540,8 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
   }
 
   test("cast ArrayType to StringType") {
-    val hasIncompatibleType = (dt: DataType) =>
-      if (CometConf.COMET_NATIVE_SCAN_IMPL.get() == "auto") {
-        true
-      } else {
-        !CometScanTypeChecker(CometConf.COMET_NATIVE_SCAN_IMPL.get())
-          .isTypeSupported(dt, "a", ListBuffer.empty)
-      }
+    val hasIncompatibleType =
+      (dt: DataType) => !CometScanTypeChecker().isTypeSupported(dt, "a", ListBuffer.empty)
     Seq(
       BooleanType,
       StringType,
 
@@ -70,9 +70,7 @@ class CometCsvExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper
 
   test("to_csv - with configurable formatting options") {
     val table = "t1"
-    withSQLConf(
-      CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_DATAFUSION,
-      CometConf.getExprAllowIncompatConfigKey(classOf[StructsToCsv]) -> "true") {
+    withSQLConf(CometConf.getExprAllowIncompatConfigKey(classOf[StructsToCsv]) -> "true") {
       withTable(table) {
         val newLinesStr =
           """ abc
 
@@ -23,9 +23,6 @@ import java.time.{Duration, Period}
 
 import scala.util.Random
 
-import org.scalactic.source.Position
-import org.scalatest.Tag
-
 import org.apache.hadoop.fs.Path
 import org.apache.spark.sql.{CometTestBase, DataFrame, Row}
 import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, FromUnixTime, Literal, StructsToJson, TruncDate, TruncTimestamp}
@@ -44,15 +41,6 @@ import org.apache.comet.testing.{DataGenOptions, FuzzDataGenerator}
 class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
   import testImplicits._
 
-  override protected def test(testName: String, testTags: Tag*)(testFun: => Any)(implicit
-      pos: Position): Unit = {
-    super.test(testName, testTags: _*) {
-      withSQLConf(CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_AUTO) {
-        testFun
-      }
-    }
-  }
-
   val ARITHMETIC_OVERFLOW_EXCEPTION_MSG =
     """[ARITHMETIC_OVERFLOW] integer overflow. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error"""
   val DIVIDE_BY_ZERO_EXCEPTION_MSG =
@@ -2516,7 +2504,6 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
 
       withSQLConf(
         CometConf.COMET_ENABLED.key -> "true",
-        CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_DATAFUSION,
         CometConf.COMET_EXPLAIN_FALLBACK_ENABLED.key -> "true") {
 
         val df = spark.read.parquet(dir.toString())
@@ -2546,7 +2533,6 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
 
       withSQLConf(
         CometConf.COMET_ENABLED.key -> "true",
-        CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_DATAFUSION,
         CometConf.COMET_EXPLAIN_FALLBACK_ENABLED.key -> "true") {
 
         val df = spark.read.parquet(dir.toString())
@@ -3014,7 +3000,6 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
         CometConf.COMET_EXEC_ENABLED.key -> "true",
         CometConf.COMET_ENABLED.key -> "true",
         CometConf.COMET_EXPLAIN_FALLBACK_ENABLED.key -> "false",
-        CometConf.COMET_NATIVE_SCAN_IMPL.key -> "native_datafusion",
         SQLConf.PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> "true",
         SQLConf.COLUMN_VECTOR_OFFHEAP_ENABLED.key -> offheapEnabled.toString,
         // SPARK-53535 (Spark 4.1+) flipped the default to "false", which preserves the parent