apache · andygrove · Jan 19, 2026 · Jan 19, 2026 · Jan 19, 2026 · Jan 19, 2026
diff --git a/.github/workflows/pr_build_linux.yml b/.github/workflows/pr_build_linux.yml
@@ -116,6 +116,7 @@ jobs:
             value: |
               org.apache.comet.exec.CometShuffleSuite
               org.apache.comet.exec.CometShuffle4_0Suite
+              org.apache.comet.exec.CometNativeColumnarToRowSuite
               org.apache.comet.exec.CometNativeShuffleSuite
               org.apache.comet.exec.CometShuffleEncryptionSuite
               org.apache.comet.exec.CometShuffleManagerSuite

diff --git a/.github/workflows/pr_build_macos.yml b/.github/workflows/pr_build_macos.yml
@@ -79,6 +79,7 @@ jobs:
             value: |
               org.apache.comet.exec.CometShuffleSuite
               org.apache.comet.exec.CometShuffle4_0Suite
+              org.apache.comet.exec.CometNativeColumnarToRowSuite
               org.apache.comet.exec.CometNativeShuffleSuite
               org.apache.comet.exec.CometShuffleEncryptionSuite
               org.apache.comet.exec.CometShuffleManagerSuite

diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+CLAUDE.md
 target
 .idea
 *.iml

diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala
@@ -286,6 +286,17 @@ object CometConf extends ShimCometConf {
   val COMET_EXEC_LOCAL_TABLE_SCAN_ENABLED: ConfigEntry[Boolean] =
     createExecEnabledConfig("localTableScan", defaultValue = false)
 
+  val COMET_NATIVE_COLUMNAR_TO_ROW_ENABLED: ConfigEntry[Boolean] =
+    conf(s"$COMET_EXEC_CONFIG_PREFIX.columnarToRow.native.enabled")
+      .category(CATEGORY_EXEC)
+      .doc(
+        "Whether to enable native columnar to row conversion. When enabled, Comet will use " +
+          "native Rust code to convert Arrow columnar data to Spark UnsafeRow format instead " +
+          "of the JVM implementation. This can improve performance for queries that need to " +
+          "convert between columnar and row formats. This is an experimental feature.")
+      .booleanConf
+      .createWithDefault(false)
+
   val COMET_EXEC_SORT_MERGE_JOIN_WITH_JOIN_FILTER_ENABLED: ConfigEntry[Boolean] =
     conf("spark.comet.exec.sortMergeJoinWithJoinFilter.enabled")
       .category(CATEGORY_ENABLE_EXEC)

diff --git a/common/src/main/scala/org/apache/comet/vector/NativeUtil.scala b/common/src/main/scala/org/apache/comet/vector/NativeUtil.scala
@@ -78,6 +78,26 @@ class NativeUtil {
     (arrays, schemas)
   }
 
+  /**
+   * Exports a ColumnarBatch to Arrow FFI and returns the memory addresses.
+   *
+   * This is a convenience method that allocates Arrow structs, exports the batch, and returns
+   * just the memory addresses (without exposing the Arrow types).
+   *
+   * @param batch
+   *   the columnar batch to export
+   * @return
+   *   a tuple of (array addresses, schema addresses, number of rows)
+   */
+  def exportBatchToAddresses(batch: ColumnarBatch): (Array[Long], Array[Long], Int) = {
+    val numCols = batch.numCols()
+    val (arrays, schemas) = allocateArrowStructs(numCols)
+    val arrayAddrs = arrays.map(_.memoryAddress())
+    val schemaAddrs = schemas.map(_.memoryAddress())
+    val numRows = exportBatch(arrayAddrs, schemaAddrs, batch)
+    (arrayAddrs, schemaAddrs, numRows)
+  }
+
   /**
    * Exports a Comet `ColumnarBatch` into a list of memory addresses that can be consumed by the
    * native execution.

diff --git a/docs/source/user-guide/latest/configs.md b/docs/source/user-guide/latest/configs.md
@@ -66,6 +66,7 @@ Comet provides the following configuration settings.
 | `spark.comet.dppFallback.enabled` | Whether to fall back to Spark for queries that use DPP. | true |
 | `spark.comet.enabled` | Whether to enable Comet extension for Spark. When this is turned on, Spark will use Comet to read Parquet data source. Note that to enable native vectorized execution, both this config and `spark.comet.exec.enabled` need to be enabled. It can be overridden by the environment variable `ENABLE_COMET`. | true |
 | `spark.comet.exceptionOnDatetimeRebase` | Whether to throw exception when seeing dates/timestamps from the legacy hybrid (Julian + Gregorian) calendar. Since Spark 3, dates/timestamps were written according to the Proleptic Gregorian calendar. When this is true, Comet will throw exceptions when seeing these dates/timestamps that were written by Spark version before 3.0. If this is false, these dates/timestamps will be read as if they were written to the Proleptic Gregorian calendar and will not be rebased. | false |
+| `spark.comet.exec.columnarToRow.native.enabled` | Whether to enable native columnar to row conversion. When enabled, Comet will use native Rust code to convert Arrow columnar data to Spark UnsafeRow format instead of the JVM implementation. This can improve performance for queries that need to convert between columnar and row formats. This is an experimental feature. | false |
 | `spark.comet.exec.enabled` | Whether to enable Comet native vectorized execution for Spark. This controls whether Spark should convert operators into their Comet counterparts and execute them in native space. Note: each operator is associated with a separate config in the format of `spark.comet.exec.<operator_name>.enabled` at the moment, and both the config and this need to be turned on, in order for the operator to be executed in native. | true |
 | `spark.comet.exec.replaceSortMergeJoin` | Experimental feature to force Spark to replace SortMergeJoin with ShuffledHashJoin for improved performance. This feature is not stable yet. For more information, refer to the [Comet Tuning Guide](https://datafusion.apache.org/comet/user-guide/tuning.html). | false |
 | `spark.comet.exec.strictFloatingPoint` | When enabled, fall back to Spark for floating-point operations that may differ from Spark, such as when comparing or sorting -0.0 and 0.0. For more information, refer to the [Comet Compatibility Guide](https://datafusion.apache.org/comet/user-guide/compatibility.html). | false |
-Original file line number
+Diff line change
@@ -1,3 +1,4 @@
+    CLAUDE.md
     target
     .idea
     *.iml
@@ Expand Down @@