@@ -35,18 +35,18 @@ import org.apache.comet.shims.CometExprTraitShim
3535 * fuses Arrow input reads, Spark expression evaluation, and Arrow output writes into one
3636 * Janino-compiled method per `(expression, schema)` pair.
3737 *
38- * The kernel compiles any bound Catalyst expression; the tree need not be rooted at a `ScalaUDF`.
38+ * The kernel compiles any bound Catalyst expression. The tree need not be rooted at a `ScalaUDF`.
3939 * Today's only consumer is [[org.apache.comet.udf.codegen.CometScalaUDFCodegen ]].
4040 *
41- * Constraints: one output vector per kernel; per-row scalar evaluation only (aggregate, window,
41+ * Constraints: one output vector per kernel, per-row scalar evaluation only (aggregate, window,
4242 * generator are rejected by [[canHandle ]]).
4343 *
4444 * Input- and output-side emission live in [[CometBatchKernelCodegenInput ]] and
4545 * [[CometBatchKernelCodegenOutput ]]. This file owns the [[ArrowColumnSpec ]] vocabulary, the
4646 * [[canHandle ]] / [[allocateOutput ]] / [[compile ]] / [[generateSource ]] entry points, and
4747 * cross-cutting kernel-shape decisions (NullIntolerant short-circuit, CSE variant).
4848 *
49- * The generated kernel is the `InternalRow` that Spark's `BoundReference.genCode` reads from; see
49+ * The generated kernel is the `InternalRow` that Spark's `BoundReference.genCode` reads from. See
5050 * [[generateSource ]] for how the wiring is set up.
5151 */
5252object CometBatchKernelCodegen extends Logging with CometExprTraitShim {
@@ -128,7 +128,7 @@ object CometBatchKernelCodegen extends Logging with CometExprTraitShim {
128128 s " spark.sql.codegen.maxFields= $maxFields) " )
129129 }
130130 // HOFs are `CodegenFallback` but admitted: `CodegenFallback.doGenCode` emits one
131- // `((Expression) references[N]).eval(row)` call site per HOF; the kernel dispatches to the
131+ // `((Expression) references[N]).eval(row)` call site per HOF. The kernel dispatches to the
132132 // HOF's interpreted `eval`, which mutates `NamedLambdaVariable.value` per element and reads
133133 // the input array through the kernel's typed Arrow getters. Per-task `boundExpr` isolation
134134 // in `CometScalaUDFCodegen.kernelCache` prevents concurrent partitions from racing on the
@@ -140,8 +140,8 @@ object CometBatchKernelCodegen extends Logging with CometExprTraitShim {
140140 //
141141 // `ExecSubqueryExpression` (`ScalarSubquery`, `InSubqueryExec`) is accepted: the surrounding
142142 // Comet operator's inherited `SparkPlan.waitForSubqueries` populates the subquery's
143- // `result` field before evaluation; the closure serializer captures that value into the
144- // arg-0 bytes; the dispatcher keys its compile cache on those bytes, so distinct subquery
143+ // `result` field before evaluation. The closure serializer captures that value into the
144+ // arg-0 bytes, and the dispatcher keys its compile cache on those bytes, so distinct subquery
145145 // results produce distinct cache entries.
146146 //
147147 // `Unevaluable`: rejected by default. `isCodegenInertUnevaluable` exempts version-specific
@@ -207,7 +207,7 @@ object CometBatchKernelCodegen extends Logging with CometExprTraitShim {
207207 .mkString(" ," ))
208208 // ScalaUDF embeds stateful `ExpressionEncoder` serializers via `ctx.addReferenceObj` that
209209 // reuse internal `UnsafeRow` / `byte[]` buffers per `apply`. Each kernel instance needs its
210- // own copy; the closure regenerates the references array per call so the dispatcher can hand
210+ // own copy. The closure regenerates the references array per call so the dispatcher can hand
211211 // a fresh array to every kernel it allocates from this `CompiledKernel`.
212212 val freshReferences : () => Array [Any ] = () =>
213213 generateSource(boundExpr, inputSchema).references
@@ -245,12 +245,12 @@ object CometBatchKernelCodegen extends Logging with CometExprTraitShim {
245245 // empty string here.
246246 //
247247 // TODO(method-size): perRowBody is inlined inside process's for-loop and not split.
248- // Sufficiently deep trees can exceed Janino's 64KB method size; wrap in
248+ // Sufficiently deep trees can exceed Janino's 64KB method size. Wrap in
249249 // ctx.splitExpressionsWithCurrentInputs when hit.
250250 val (concreteOutClass, outputSetup, perRowBody) = {
251251 // Class-field CSE. `generateExpressions` runs `subexpressionElimination` under the hood,
252252 // populating `ctx.subexprFunctions` with per-row helper calls that write common subtree
253- // results into `addMutableState` fields; the returned `ExprCode` references those fields.
253+ // results into `addMutableState` fields. The returned `ExprCode` references those fields.
254254 // `subexprFunctionsCode` is the concatenated helper invocation block, spliced into the
255255 // per-row body by `defaultBody`.
256256 val ev = if (SQLConf .get.subexpressionEliminationEnabled) {
@@ -338,7 +338,7 @@ object CometBatchKernelCodegen extends Logging with CometExprTraitShim {
338338 * skipped on null rows. Otherwise the standard shape: run `ev.code`, then `setNull` or write
339339 * based on `ev.isNull`.
340340 *
341- * `subExprsCode` is the CSE helper-invocation block; it must run before `ev.code`. Inside the
341+ * `subExprsCode` is the CSE helper-invocation block. It must run before `ev.code`. Inside the
342342 * short-circuit it lives in the else branch so null rows skip CSE too.
343343 */
344344 private def defaultBody (
@@ -418,8 +418,8 @@ object CometBatchKernelCodegen extends Logging with CometExprTraitShim {
418418
419419 /**
420420 * Array column: an Arrow `ListVector` wrapping a child spec. `elementSparkType` lets the
421- * nested-class emitter pick the right read template; the child carries the Arrow vector class.
422- * Nested arrays compose recursively.
421+ * nested-class emitter pick the right read template, and the child carries the Arrow vector
422+ * class. Nested arrays compose recursively.
423423 */
424424 final case class ArrayColumnSpec (
425425 nullable : Boolean ,
@@ -449,8 +449,9 @@ object CometBatchKernelCodegen extends Logging with CometExprTraitShim {
449449
450450 /**
451451 * Map column: an Arrow `MapVector` (subclass of `ListVector`) whose data vector is a
452- * `StructVector` with key at child 0 and value at child 1. Nullable keys/values are carried in
453- * the child specs. Nested keys and values compose recursively.
452+ * `StructVector` with key at child 0 and value at child 1. Nested keys and values compose
453+ * recursively. The child specs' `nullable` field is unused on the read path. Output-side null
454+ * guards for map values come from `MapType.valueContainsNull` on the Spark `DataType`.
454455 */
455456 final case class MapColumnSpec (
456457 nullable : Boolean ,
@@ -463,9 +464,9 @@ object CometBatchKernelCodegen extends Logging with CometExprTraitShim {
463464 }
464465
465466 /**
466- * Compiled kernel handle. `factory` is a Spark-generated stateless class safe to share across
467- * partitions; `freshReferences` regenerates the references array per kernel allocation because
468- * `ScalaUDF` embeds stateful `ExpressionEncoder` serializers that cannot be shared.
467+ * Compiled kernel handle. `freshReferences` regenerates the references array per kernel
468+ * allocation because `ScalaUDF` embeds stateful `ExpressionEncoder` serializers that cannot be
469+ * shared.
469470 */
470471 final case class CompiledKernel (factory : GeneratedClass , freshReferences : () => Array [Any ]) {
471472 def newInstance (): CometBatchKernel =
@@ -474,7 +475,7 @@ object CometBatchKernelCodegen extends Logging with CometExprTraitShim {
474475
475476 /**
476477 * Output of [[generateSource ]]. Tests inspect `body` to assert the shape of the generated
477- * source; see `CometCodegenSourceSuite`.
478+ * source. See `CometCodegenSourceSuite`.
478479 */
479480 final case class GeneratedSource (body : String , code : CodeAndComment , references : Array [Any ])
480481
0 commit comments