From 20e6c8d391b133ae75bef33d91712a461426051c Mon Sep 17 00:00:00 2001 From: bk-mz Date: Mon, 19 Jan 2026 12:09:32 +0200 Subject: [PATCH 1/4] Add test reproducing rewrite_position_delete_files failure with array columns This test demonstrates a regression in Iceberg 1.10.0 where rewrite_position_delete_files fails with ValidationException when the table contains array columns with primitive fields. The error occurs because ExpressionUtil.identitySpec() creates a temporary partition spec from schema field IDs, and the new validation added in commit 9fb80b716 rejects fields whose parent type is a list. --- ...stRewritePositionDeleteFilesProcedure.java | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java index feafaff27b45..0b2543e968fb 100644 --- a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java +++ b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java @@ -245,6 +245,41 @@ public void testRewriteSummary() throws Exception { EnvironmentContext.ENGINE_VERSION, v -> assertThat(v).startsWith("3.5")); } + @TestTemplate + public void testRewritePositionDeletesWithArrayColumns() throws Exception { + // Create table with array column containing primitive fields - this triggers the bug + sql( + "CREATE TABLE %s (id BIGINT, data STRING, items ARRAY>) " + + "USING iceberg TBLPROPERTIES" + + "('format-version'='2', 'write.delete.mode'='merge-on-read', 'write.update.mode'='merge-on-read')", + tableName); + + // Insert multiple rows to ensure position deletes are created + sql( + "INSERT INTO %s VALUES " + + "(1, 'a', array(named_struct('value', cast(10 as bigint), 'count', 1))), " + + "(2, 'b', array(named_struct('value', cast(20 as bigint), 'count', 2))), " + + "(3, 'c', array(named_struct('value', cast(30 as bigint), 'count', 3))), " + + "(4, 'd', array(named_struct('value', cast(40 as bigint), 'count', 4))), " + + "(5, 'e', array(named_struct('value', cast(50 as bigint), 'count', 5))), " + + "(6, 'f', array(named_struct('value', cast(60 as bigint), 'count', 6)))", + tableName); + + // Create position delete files with multiple deletes + sql("DELETE FROM %s WHERE id = 1", tableName); + sql("DELETE FROM %s WHERE id = 2", tableName); + + Table table = validationCatalog.loadTable(tableIdent); + assertThat(TestHelpers.deleteFiles(table)).hasSizeGreaterThanOrEqualTo(1); + + // This should NOT throw ValidationException: Invalid partition field parent: list + sql( + "CALL %s.system.rewrite_position_delete_files(" + + "table => '%s'," + + "options => map('rewrite-all','true'))", + catalogName, tableIdent); + } + private Map snapshotSummary() { return validationCatalog.loadTable(tableIdent).currentSnapshot().summary(); } From aee5b6faab1c3f4aa76de1945fe8b14b8a4d6527 Mon Sep 17 00:00:00 2001 From: bk-mz Date: Mon, 19 Jan 2026 12:39:22 +0200 Subject: [PATCH 2/4] Fix rewrite_position_delete_files failure with array columns ExpressionUtil.identitySpec() now skips fields that cannot be partition sources. Fields nested inside arrays or maps are filtered out by checking that all ancestor types are structs, matching the validation logic in PartitionSpec.checkCompatibility(). Fixes #15080 --- .../iceberg/expressions/ExpressionUtil.java | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java index 9bb2b713439d..778d4b028b78 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java @@ -24,6 +24,7 @@ import java.time.temporal.ChronoUnit; import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.concurrent.TimeUnit; import java.util.function.Function; import java.util.regex.Pattern; @@ -37,6 +38,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.variants.PhysicalType; @@ -737,11 +739,36 @@ private static String sanitizeVariantValue( private static PartitionSpec identitySpec(Schema schema, int... ids) { PartitionSpec.Builder specBuilder = PartitionSpec.builderFor(schema); + Map idToParent = TypeUtil.indexParents(schema.asStruct()); for (int id : ids) { - specBuilder.identity(schema.findColumnName(id)); + String columnName = schema.findColumnName(id); + // Skip fields nested in arrays or maps - they cannot be partition sources + // Only fields whose ancestors are all struct types can be partition sources + if (columnName != null && canBePartitionSource(id, schema, idToParent)) { + specBuilder.identity(columnName); + } } return specBuilder.build(); } + + /** + * Checks if a field can be used as a partition source. + * + *

A field can only be a partition source if all of its ancestors are struct types. Fields + * nested inside arrays or maps cannot be partition sources. + */ + private static boolean canBePartitionSource( + int fieldId, Schema schema, Map idToParent) { + Integer parentId = idToParent.get(fieldId); + while (parentId != null) { + Type parentType = schema.findType(parentId); + if (parentType == null || !parentType.isStructType()) { + return false; + } + parentId = idToParent.get(parentId); + } + return true; + } } From 9c4e1c34926488cce1d0ebc7daff8cf619f8d739 Mon Sep 17 00:00:00 2001 From: bk-mz Date: Mon, 19 Jan 2026 12:46:56 +0200 Subject: [PATCH 3/4] Add test for array columns to Spark 3.4, 4.0, and 4.1 --- ...stRewritePositionDeleteFilesProcedure.java | 35 +++++++++++++++++++ ...stRewritePositionDeleteFilesProcedure.java | 35 +++++++++++++++++++ ...stRewritePositionDeleteFilesProcedure.java | 35 +++++++++++++++++++ 3 files changed, 105 insertions(+) diff --git a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java b/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java index 0ff3a949ae51..039d841fada2 100644 --- a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java +++ b/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java @@ -245,6 +245,41 @@ public void testRewriteSummary() throws Exception { EnvironmentContext.ENGINE_VERSION, v -> assertThat(v).startsWith("3.4")); } + @TestTemplate + public void testRewritePositionDeletesWithArrayColumns() throws Exception { + // Create table with array column containing primitive fields - this triggers the bug + sql( + "CREATE TABLE %s (id BIGINT, data STRING, items ARRAY>) " + + "USING iceberg TBLPROPERTIES" + + "('format-version'='2', 'write.delete.mode'='merge-on-read', 'write.update.mode'='merge-on-read')", + tableName); + + // Insert multiple rows to ensure position deletes are created + sql( + "INSERT INTO %s VALUES " + + "(1, 'a', array(named_struct('value', cast(10 as bigint), 'count', 1))), " + + "(2, 'b', array(named_struct('value', cast(20 as bigint), 'count', 2))), " + + "(3, 'c', array(named_struct('value', cast(30 as bigint), 'count', 3))), " + + "(4, 'd', array(named_struct('value', cast(40 as bigint), 'count', 4))), " + + "(5, 'e', array(named_struct('value', cast(50 as bigint), 'count', 5))), " + + "(6, 'f', array(named_struct('value', cast(60 as bigint), 'count', 6)))", + tableName); + + // Create position delete files with multiple deletes + sql("DELETE FROM %s WHERE id = 1", tableName); + sql("DELETE FROM %s WHERE id = 2", tableName); + + Table table = validationCatalog.loadTable(tableIdent); + assertThat(TestHelpers.deleteFiles(table)).hasSizeGreaterThanOrEqualTo(1); + + // This should NOT throw ValidationException: Invalid partition field parent: list + sql( + "CALL %s.system.rewrite_position_delete_files(" + + "table => '%s'," + + "options => map('rewrite-all','true'))", + catalogName, tableIdent); + } + private Map snapshotSummary() { return validationCatalog.loadTable(tableIdent).currentSnapshot().summary(); } diff --git a/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java b/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java index 006379adda56..de1cb75edf26 100644 --- a/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java +++ b/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java @@ -245,6 +245,41 @@ public void testRewriteSummary() throws Exception { EnvironmentContext.ENGINE_VERSION, v -> assertThat(v).startsWith("4.0")); } + @TestTemplate + public void testRewritePositionDeletesWithArrayColumns() throws Exception { + // Create table with array column containing primitive fields - this triggers the bug + sql( + "CREATE TABLE %s (id BIGINT, data STRING, items ARRAY>) " + + "USING iceberg TBLPROPERTIES" + + "('format-version'='2', 'write.delete.mode'='merge-on-read', 'write.update.mode'='merge-on-read')", + tableName); + + // Insert multiple rows to ensure position deletes are created + sql( + "INSERT INTO %s VALUES " + + "(1, 'a', array(named_struct('value', cast(10 as bigint), 'count', 1))), " + + "(2, 'b', array(named_struct('value', cast(20 as bigint), 'count', 2))), " + + "(3, 'c', array(named_struct('value', cast(30 as bigint), 'count', 3))), " + + "(4, 'd', array(named_struct('value', cast(40 as bigint), 'count', 4))), " + + "(5, 'e', array(named_struct('value', cast(50 as bigint), 'count', 5))), " + + "(6, 'f', array(named_struct('value', cast(60 as bigint), 'count', 6)))", + tableName); + + // Create position delete files with multiple deletes + sql("DELETE FROM %s WHERE id = 1", tableName); + sql("DELETE FROM %s WHERE id = 2", tableName); + + Table table = validationCatalog.loadTable(tableIdent); + assertThat(TestHelpers.deleteFiles(table)).hasSizeGreaterThanOrEqualTo(1); + + // This should NOT throw ValidationException: Invalid partition field parent: list + sql( + "CALL %s.system.rewrite_position_delete_files(" + + "table => '%s'," + + "options => map('rewrite-all','true'))", + catalogName, tableIdent); + } + private Map snapshotSummary() { return validationCatalog.loadTable(tableIdent).currentSnapshot().summary(); } diff --git a/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java b/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java index 311cf763eeef..43cb4500cfa6 100644 --- a/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java +++ b/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java @@ -245,6 +245,41 @@ public void testRewriteSummary() throws Exception { EnvironmentContext.ENGINE_VERSION, v -> assertThat(v).startsWith("4.1")); } + @TestTemplate + public void testRewritePositionDeletesWithArrayColumns() throws Exception { + // Create table with array column containing primitive fields - this triggers the bug + sql( + "CREATE TABLE %s (id BIGINT, data STRING, items ARRAY>) " + + "USING iceberg TBLPROPERTIES" + + "('format-version'='2', 'write.delete.mode'='merge-on-read', 'write.update.mode'='merge-on-read')", + tableName); + + // Insert multiple rows to ensure position deletes are created + sql( + "INSERT INTO %s VALUES " + + "(1, 'a', array(named_struct('value', cast(10 as bigint), 'count', 1))), " + + "(2, 'b', array(named_struct('value', cast(20 as bigint), 'count', 2))), " + + "(3, 'c', array(named_struct('value', cast(30 as bigint), 'count', 3))), " + + "(4, 'd', array(named_struct('value', cast(40 as bigint), 'count', 4))), " + + "(5, 'e', array(named_struct('value', cast(50 as bigint), 'count', 5))), " + + "(6, 'f', array(named_struct('value', cast(60 as bigint), 'count', 6)))", + tableName); + + // Create position delete files with multiple deletes + sql("DELETE FROM %s WHERE id = 1", tableName); + sql("DELETE FROM %s WHERE id = 2", tableName); + + Table table = validationCatalog.loadTable(tableIdent); + assertThat(TestHelpers.deleteFiles(table)).hasSizeGreaterThanOrEqualTo(1); + + // This should NOT throw ValidationException: Invalid partition field parent: list + sql( + "CALL %s.system.rewrite_position_delete_files(" + + "table => '%s'," + + "options => map('rewrite-all','true'))", + catalogName, tableIdent); + } + private Map snapshotSummary() { return validationCatalog.loadTable(tableIdent).currentSnapshot().summary(); } From 1b5cd6caf34aa5c4984bb55daf6f01348505b9af Mon Sep 17 00:00:00 2001 From: bk-mz Date: Mon, 19 Jan 2026 12:54:32 +0200 Subject: [PATCH 4/4] Remove comments from tests --- .../org/apache/iceberg/expressions/ExpressionUtil.java | 8 -------- .../TestRewritePositionDeleteFilesProcedure.java | 4 ---- .../TestRewritePositionDeleteFilesProcedure.java | 4 ---- .../TestRewritePositionDeleteFilesProcedure.java | 4 ---- .../TestRewritePositionDeleteFilesProcedure.java | 4 ---- 5 files changed, 24 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java index 778d4b028b78..c03c3323fe8b 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java @@ -743,8 +743,6 @@ private static PartitionSpec identitySpec(Schema schema, int... ids) { for (int id : ids) { String columnName = schema.findColumnName(id); - // Skip fields nested in arrays or maps - they cannot be partition sources - // Only fields whose ancestors are all struct types can be partition sources if (columnName != null && canBePartitionSource(id, schema, idToParent)) { specBuilder.identity(columnName); } @@ -753,12 +751,6 @@ private static PartitionSpec identitySpec(Schema schema, int... ids) { return specBuilder.build(); } - /** - * Checks if a field can be used as a partition source. - * - *

A field can only be a partition source if all of its ancestors are struct types. Fields - * nested inside arrays or maps cannot be partition sources. - */ private static boolean canBePartitionSource( int fieldId, Schema schema, Map idToParent) { Integer parentId = idToParent.get(fieldId); diff --git a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java b/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java index 039d841fada2..006b1edf1ce7 100644 --- a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java +++ b/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java @@ -247,14 +247,12 @@ public void testRewriteSummary() throws Exception { @TestTemplate public void testRewritePositionDeletesWithArrayColumns() throws Exception { - // Create table with array column containing primitive fields - this triggers the bug sql( "CREATE TABLE %s (id BIGINT, data STRING, items ARRAY>) " + "USING iceberg TBLPROPERTIES" + "('format-version'='2', 'write.delete.mode'='merge-on-read', 'write.update.mode'='merge-on-read')", tableName); - // Insert multiple rows to ensure position deletes are created sql( "INSERT INTO %s VALUES " + "(1, 'a', array(named_struct('value', cast(10 as bigint), 'count', 1))), " @@ -265,14 +263,12 @@ public void testRewritePositionDeletesWithArrayColumns() throws Exception { + "(6, 'f', array(named_struct('value', cast(60 as bigint), 'count', 6)))", tableName); - // Create position delete files with multiple deletes sql("DELETE FROM %s WHERE id = 1", tableName); sql("DELETE FROM %s WHERE id = 2", tableName); Table table = validationCatalog.loadTable(tableIdent); assertThat(TestHelpers.deleteFiles(table)).hasSizeGreaterThanOrEqualTo(1); - // This should NOT throw ValidationException: Invalid partition field parent: list sql( "CALL %s.system.rewrite_position_delete_files(" + "table => '%s'," diff --git a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java index 0b2543e968fb..dd2bd051cd8a 100644 --- a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java +++ b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java @@ -247,14 +247,12 @@ public void testRewriteSummary() throws Exception { @TestTemplate public void testRewritePositionDeletesWithArrayColumns() throws Exception { - // Create table with array column containing primitive fields - this triggers the bug sql( "CREATE TABLE %s (id BIGINT, data STRING, items ARRAY>) " + "USING iceberg TBLPROPERTIES" + "('format-version'='2', 'write.delete.mode'='merge-on-read', 'write.update.mode'='merge-on-read')", tableName); - // Insert multiple rows to ensure position deletes are created sql( "INSERT INTO %s VALUES " + "(1, 'a', array(named_struct('value', cast(10 as bigint), 'count', 1))), " @@ -265,14 +263,12 @@ public void testRewritePositionDeletesWithArrayColumns() throws Exception { + "(6, 'f', array(named_struct('value', cast(60 as bigint), 'count', 6)))", tableName); - // Create position delete files with multiple deletes sql("DELETE FROM %s WHERE id = 1", tableName); sql("DELETE FROM %s WHERE id = 2", tableName); Table table = validationCatalog.loadTable(tableIdent); assertThat(TestHelpers.deleteFiles(table)).hasSizeGreaterThanOrEqualTo(1); - // This should NOT throw ValidationException: Invalid partition field parent: list sql( "CALL %s.system.rewrite_position_delete_files(" + "table => '%s'," diff --git a/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java b/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java index de1cb75edf26..e9bddcc09d12 100644 --- a/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java +++ b/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java @@ -247,14 +247,12 @@ public void testRewriteSummary() throws Exception { @TestTemplate public void testRewritePositionDeletesWithArrayColumns() throws Exception { - // Create table with array column containing primitive fields - this triggers the bug sql( "CREATE TABLE %s (id BIGINT, data STRING, items ARRAY>) " + "USING iceberg TBLPROPERTIES" + "('format-version'='2', 'write.delete.mode'='merge-on-read', 'write.update.mode'='merge-on-read')", tableName); - // Insert multiple rows to ensure position deletes are created sql( "INSERT INTO %s VALUES " + "(1, 'a', array(named_struct('value', cast(10 as bigint), 'count', 1))), " @@ -265,14 +263,12 @@ public void testRewritePositionDeletesWithArrayColumns() throws Exception { + "(6, 'f', array(named_struct('value', cast(60 as bigint), 'count', 6)))", tableName); - // Create position delete files with multiple deletes sql("DELETE FROM %s WHERE id = 1", tableName); sql("DELETE FROM %s WHERE id = 2", tableName); Table table = validationCatalog.loadTable(tableIdent); assertThat(TestHelpers.deleteFiles(table)).hasSizeGreaterThanOrEqualTo(1); - // This should NOT throw ValidationException: Invalid partition field parent: list sql( "CALL %s.system.rewrite_position_delete_files(" + "table => '%s'," diff --git a/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java b/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java index 43cb4500cfa6..63f3303ea663 100644 --- a/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java +++ b/spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java @@ -247,14 +247,12 @@ public void testRewriteSummary() throws Exception { @TestTemplate public void testRewritePositionDeletesWithArrayColumns() throws Exception { - // Create table with array column containing primitive fields - this triggers the bug sql( "CREATE TABLE %s (id BIGINT, data STRING, items ARRAY>) " + "USING iceberg TBLPROPERTIES" + "('format-version'='2', 'write.delete.mode'='merge-on-read', 'write.update.mode'='merge-on-read')", tableName); - // Insert multiple rows to ensure position deletes are created sql( "INSERT INTO %s VALUES " + "(1, 'a', array(named_struct('value', cast(10 as bigint), 'count', 1))), " @@ -265,14 +263,12 @@ public void testRewritePositionDeletesWithArrayColumns() throws Exception { + "(6, 'f', array(named_struct('value', cast(60 as bigint), 'count', 6)))", tableName); - // Create position delete files with multiple deletes sql("DELETE FROM %s WHERE id = 1", tableName); sql("DELETE FROM %s WHERE id = 2", tableName); Table table = validationCatalog.loadTable(tableIdent); assertThat(TestHelpers.deleteFiles(table)).hasSizeGreaterThanOrEqualTo(1); - // This should NOT throw ValidationException: Invalid partition field parent: list sql( "CALL %s.system.rewrite_position_delete_files(" + "table => '%s',"