apache
diff --git a/‎.github/workflows/pr_build_linux.yml‎
Lines changed: 4 additions & 0 deletions b/‎.github/workflows/pr_build_linux.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/pr_build_macos.yml‎
Lines changed: 4 additions & 0 deletions b/‎.github/workflows/pr_build_macos.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎dev/diffs/4.1.1.diff‎
Lines changed: 13 additions & 3 deletions b/‎dev/diffs/4.1.1.diff‎
Lines changed: 13 additions & 3 deletions
diff --git a/‎docs/source/contributor-guide/spark_expressions_support.md‎
Lines changed: 3 additions & 3 deletions b/‎docs/source/contributor-guide/spark_expressions_support.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/source/user-guide/latest/compatibility/expressions/index.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/user-guide/latest/compatibility/expressions/index.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/user-guide/latest/compatibility/expressions/url.md‎
Lines changed: 23 additions & 0 deletions b/‎docs/source/user-guide/latest/compatibility/expressions/url.md‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎docs/source/user-guide/latest/expressions.md‎
Lines changed: 5 additions & 0 deletions b/‎docs/source/user-guide/latest/expressions.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎docs/source/user-guide/latest/iceberg.md‎
Lines changed: 18 additions & 0 deletions b/‎docs/source/user-guide/latest/iceberg.md‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎docs/source/user-guide/latest/index.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/user-guide/latest/index.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/user-guide/latest/scala_java_udfs.md‎
Lines changed: 61 additions & 0 deletions b/‎docs/source/user-guide/latest/scala_java_udfs.md‎
Lines changed: 61 additions & 0 deletions
@@ -302,6 +302,7 @@ jobs:
               org.apache.comet.CometFuzzAggregateSuite
               org.apache.comet.CometFuzzIcebergSuite
               org.apache.comet.CometFuzzMathSuite
+              org.apache.comet.CometCodegenFuzzSuite
               org.apache.comet.DataGeneratorSuite
           - name: "shuffle"
             value: |
@@ -380,6 +381,9 @@ jobs:
               org.apache.comet.expressions.conditional.CometIfSuite
               org.apache.comet.expressions.conditional.CometCoalesceSuite
               org.apache.comet.expressions.conditional.CometCaseWhenSuite
+              org.apache.comet.CometCodegenSuite
+              org.apache.comet.CometCodegenSourceSuite
+              org.apache.comet.CometCodegenHOFSuite
           - name: "sql"
             value: |
               org.apache.spark.sql.CometToPrettyStringSuite
 
@@ -155,6 +155,7 @@ jobs:
               org.apache.comet.CometFuzzAggregateSuite
               org.apache.comet.CometFuzzIcebergSuite
               org.apache.comet.CometFuzzMathSuite
+              org.apache.comet.CometCodegenFuzzSuite
               org.apache.comet.DataGeneratorSuite
           - name: "shuffle"
             value: |
@@ -232,6 +233,9 @@ jobs:
               org.apache.comet.expressions.conditional.CometIfSuite
               org.apache.comet.expressions.conditional.CometCoalesceSuite
               org.apache.comet.expressions.conditional.CometCaseWhenSuite
+              org.apache.comet.CometCodegenSuite
+              org.apache.comet.CometCodegenSourceSuite
+              org.apache.comet.CometCodegenHOFSuite
           - name: "sql"
             value: |
               org.apache.spark.sql.CometToPrettyStringSuite
 
@@ -695,10 +695,20 @@ index e1a2fd33c7c..632f4b695df 100644
              }
            assert(scanOption.isDefined)
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
-index b27122a8de2..a4c5aac8212 100644
+index b27122a8de2..3c690dbe788 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
-@@ -470,7 +470,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
+@@ -267,7 +267,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
+     }
+   }
+ 
+-  test("SPARK-33853: explain codegen - check presence of subquery") {
++  test("SPARK-33853: explain codegen - check presence of subquery",
++      IgnoreComet("Comet plan has a different WholeStageCodegen subtree count")) {
+     withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
+       withTempView("df") {
+         val df1 = spark.range(1, 100)
+@@ -470,7 +471,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
      }
    }
 
@@ -708,7 +718,7 @@ index b27122a8de2..a4c5aac8212 100644
      withTempDir { dir =>
        Seq("parquet", "orc", "csv", "json").foreach { fmt =>
          val basePath = dir.getCanonicalPath + "/" + fmt
-@@ -548,7 +549,9 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
+@@ -548,7 +550,9 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
    }
  }
 
 
@@ -215,7 +215,7 @@
 ### datetime_funcs
 
 - [ ] add_months
-- [ ] convert_timezone
+- [x] convert_timezone
 - [ ] curdate
 - [ ] current_date
 - [ ] current_time
@@ -413,7 +413,7 @@
 - [ ] randstr
 - [ ] rint
 - [x] round
-- [ ] sec
+- [x] sec
 - [x] shiftleft
 - [x] sign
 - [x] signum
@@ -596,7 +596,7 @@
 
 ### url_funcs
 
-- [ ] parse_url
+- [x] parse_url (Incompatible: native diverges from Spark on edge cases)
 - [x] try_url_decode
   - 4.0.1, 2026-05-05
 - [x] url_decode
 
@@ -36,5 +36,6 @@ math
 misc
 string
 struct
+url
 cast
 ```
@@ -0,0 +1,23 @@
+<!---
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# URL Expressions
+
+<!--BEGIN:EXPR_COMPAT[url]-->
+<!--END:EXPR_COMPAT-->
@@ -101,6 +101,7 @@ of expressions that be disabled.
 
 | Expression       | SQL                          |
 | ---------------- | ---------------------------- |
+| ConvertTimezone  | `convert_timezone`           |
 | CurrentTimeZone  | `current_timezone`           |
 | DateAdd          | `date_add`                   |
 | DateDiff         | `datediff`                   |
@@ -115,6 +116,7 @@ of expressions that be disabled.
 | LastDay          | `last_day`                   |
 | LocalTimestamp   | `localtimestamp`             |
 | MakeDate         | `make_date`                  |
+| MakeTime         | `make_time`                  |
 | Minute           | `minute`                     |
 | NextDay          | `next_day`                   |
 | Second           | `second`                     |
@@ -131,6 +133,8 @@ of expressions that be disabled.
 | DayOfYear        | `dayofyear`                  |
 | WeekOfYear       | `weekofyear`                 |
 | Quarter          | `quarter`                    |
+| ToTime           | `to_time`                    |
+| TryToTime        | `try_to_time`                |
 
 ## Math Expressions
 
@@ -171,6 +175,7 @@ of expressions that be disabled.
 | Randn          | `randn`        |
 | Remainder      | `%`            |
 | Round          | `round`        |
+| Sec            | `sec`          |
 | Signum         | `signum`       |
 | Sin            | `sin`          |
 | Sinh           | `sinh`         |
 
@@ -146,6 +146,24 @@ The following scenarios will fall back to Spark's native Iceberg reader:
 - Dynamic Partition Pruning under Adaptive Query Execution (non-AQE DPP is supported);
   see [#3510](https://github.com/apache/datafusion-comet/issues/3510)
 
+### Iceberg UDFs
+
+Iceberg ships several `ScalaUDF`s that surface in user queries and maintenance actions:
+
+- `IcebergSpark.registerBucketUDF` and `registerTruncateUDF` register `bucket(N, col)` and
+  `truncate(W, col)` for use in `SELECT` / `JOIN` / `WHERE` predicates that align with hidden
+  partitioning.
+- `RewriteDataFiles` with `sort-strategy=zorder` builds a tree of per-type ordered-bytes UDFs
+  (`INT_ORDERED_BYTES`, `LONG_ORDERED_BYTES`, ..., `INTERLEAVE_BYTES`) over the sort key columns
+  during compaction.
+
+By default these UDFs cause the enclosing operator to fall back to Spark, which forces a
+columnar-to-row roundtrip and demotes the surrounding shuffle from `CometExchange` to
+`CometColumnarExchange`. Enabling the experimental
+[Scala UDF and Java UDF Support](scala_java_udfs.md) feature
+(`spark.comet.exec.scalaUDF.codegen.enabled=true`) routes these UDFs through native execution so
+the project, exchange, and sort operators around them stay on the Comet path end-to-end.
+
 ### Task input metrics
 
 The native Iceberg reader populates Spark's task-level `inputMetrics.bytesRead` (visible in the Spark UI Stages tab) using the `bytes_read` counter from iceberg-rust's `ScanMetrics`. This counter includes bytes read from both data files and delete files.
 
@@ -43,6 +43,7 @@ to read more.
    Supported Data Types <datatypes>
    Supported Operators <operators>
    Supported Expressions <expressions>
+   ScalaUDF and Java UDF Support <scala_java_udfs>
    Configuration Settings <configs>
    Compatibility Guide <compatibility/index>
    Understanding Comet Plans <understanding-comet-plans>
 
@@ -0,0 +1,61 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Scala UDF and Java UDF Support
+
+Comet executes Spark's Scala and Java [scalar user-defined functions (UDFs)](https://spark.apache.org/docs/latest/sql-ref-functions-udf-scalar.html) on the native Comet path. The presence of a UDF does not force the enclosing operator off the native path; surrounding native operators stay native.
+
+This page covers Spark's `ScalaUDF` (Scala `udf(...)`, `spark.udf.register(...)` over Scala or Java functional interfaces, and SQL `CREATE FUNCTION ... AS 'com.example.MyUDF'`). Other UDF kinds (Python / Pandas, Hive, aggregate) are out of scope and continue to fall back to Spark.
+
+This feature is experimental and disabled by default.
+
+## Configuration
+
+| Key                                         | Default | Description                                                                                                        |
+| ------------------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------ |
+| `spark.comet.exec.scalaUDF.codegen.enabled` | `false` | When `true`, eligible `ScalaUDF`s run on the Comet path. When `false`, the enclosing operator falls back to Spark. |
+
+## Supported
+
+- User functions registered via `udf(...)`, `spark.udf.register(...)` (Scala or Java functional interfaces), or SQL `CREATE FUNCTION ... AS 'com.example.MyUDF'`.
+- Scalar input/output types: `Boolean`, `Byte`, `Short`, `Int`, `Long`, `Float`, `Double`, `Decimal`, `String`, `Binary`, `Date`, `Timestamp`, `TimestampNTZ`.
+- Complex input/output types with arbitrary nesting: `ArrayType`, `StructType`, `MapType`.
+- Composition with other Catalyst expressions inside the argument tree (e.g. `myUdf(upper(s))` runs as one native unit).
+- Higher-order functions (`transform`, `filter`, `exists`, `aggregate`, `zip_with`, `map_filter`, `map_zip_with`, etc.) inside the argument tree.
+
+## Not supported
+
+- Aggregate UDFs (`ScalaAggregator`, `TypedImperativeAggregate`, the legacy `UserDefinedAggregateFunction`).
+- Table UDFs and generators.
+- Python `@udf` and Pandas `@pandas_udf`.
+- Hive `GenericUDF` and `SimpleUDF`.
+- `CalendarIntervalType`, `NullType`, and `UserDefinedType` arguments and return types. UDT-typed columns fall back to Spark; for native execution, store and read the underlying representation directly (e.g. write MLlib `Vector` outputs as `Struct<type: Byte, size: Int, indices: Array<Int>, values: Array<Double>>` rather than `VectorUDT`).
+- Trees whose total nested-field count (output plus all input columns the UDF tree references) exceeds `spark.sql.codegen.maxFields` (default 100). Comet refuses these at plan time and the operator falls back to Spark.
+
+When a UDF is rejected, the reason surfaces through Comet's standard fallback diagnostics; the query still runs on Spark.
+
+## Behavior
+
+- Non-deterministic expressions referenced from the argument tree (`rand`, `uuid`, `monotonically_increasing_id`) produce per-partition sequences consistent with Spark.
+- `TaskContext.get()` inside the user function returns the driving Spark task's context.
+- The user function must be closure-serializable; the same function that works with Spark's executor execution works here.
+
+## Known limitations
+
+- Each query containing a ScalaUDF pays a one-time codegen cost on its first batch and reuses the compiled kernel for subsequent batches, matching Spark's whole-stage codegen behavior. Bytecode is deduped JVM-wide via the same `CodeGenerator` cache, so structurally identical queries across a session share the compiled class.
Original file line number	Diff line number	Diff line change
`@@ -695,10 +695,20 @@ index e1a2fd33c7c..632f4b695df 100644`
`695`	`695`	`}`
`696`	`696`	`assert(scanOption.isDefined)`
`697`	`697`	`diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala`
`698`		`-index b27122a8de2..a4c5aac8212 100644`
	`698`	`+index b27122a8de2..3c690dbe788 100644`
`699`	`699`	`--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala`
`700`	`700`	`+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala`
`701`		`-@@ -470,7 +470,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite`
	`701`	`+@@ -267,7 +267,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite`
	`702`	`+ }`
	`703`	`+ }`
	`704`	`+`
	`705`	`+- test("SPARK-33853: explain codegen - check presence of subquery") {`
	`706`	`++ test("SPARK-33853: explain codegen - check presence of subquery",`
	`707`	`++ IgnoreComet("Comet plan has a different WholeStageCodegen subtree count")) {`
	`708`	`+ withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {`
	`709`	`+ withTempView("df") {`
	`710`	`+ val df1 = spark.range(1, 100)`
	`711`	`+@@ -470,7 +471,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite`
`702`	`712`	`}`
`703`	`713`	`}`
`704`	`714`
`@@ -708,7 +718,7 @@ index b27122a8de2..a4c5aac8212 100644`
`708`	`718`	`withTempDir { dir =>`
`709`	`719`	`Seq("parquet", "orc", "csv", "json").foreach { fmt =>`
`710`	`720`	`val basePath = dir.getCanonicalPath + "/" + fmt`
`711`		`-@@ -548,7 +549,9 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite`
	`721`	`+@@ -548,7 +550,9 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite`
`712`	`722`	`}`
`713`	`723`	`}`
`714`	`724`
-Original file line number
+Diff line change
 misc
 string
 struct
 +url
 cast
 ```