From 9032acdaaa0eac0f39c4306ce2618f325307b74f Mon Sep 17 00:00:00 2001 From: yan zhang Date: Tue, 20 Jan 2026 13:48:41 +0800 Subject: [PATCH 1/6] fix variant writer --- .../iceberg/variants/ShreddedObject.java | 8 +-- .../iceberg/parquet/TestVariantWriters.java | 59 +++++++++++++++++++ 2 files changed, 63 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java b/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java index c1fba073cfdf..31ddc61058a9 100644 --- a/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java +++ b/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java @@ -178,20 +178,20 @@ private SerializationState( } } else if (unshredded != null) { for (String name : unshredded.fieldNames()) { - boolean replaced = shreddedFields.containsKey(name) || removedFields.contains(name); + boolean replaced = this.shreddedFields.containsKey(name) || removedFields.contains(name); if (!replaced) { - shreddedFields.put(name, unshredded.get(name)); + this.shreddedFields.put(name, unshredded.get(name)); } } } this.unshreddedFields = unshreddedBuilder.build(); // duplicates are suppressed when creating unshreddedFields - this.numElements = unshreddedFields.size() + shreddedFields.size(); + this.numElements = unshreddedFields.size() + this.shreddedFields.size(); // object is large if the number of elements can't be stored in 1 byte this.isLarge = numElements > 0xFF; - for (VariantValue value : shreddedFields.values()) { + for (VariantValue value : this.shreddedFields.values()) { totalDataSize += value.sizeInBytes(); } diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java index 2c83406f9fee..c9827075d253 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java @@ -52,6 +52,7 @@ import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; +import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.FieldSource; @@ -280,4 +281,62 @@ private static ValueArray array(VariantValue... values) { return arr; } + + @Test + public void testPartialShreddingWithShreddedObject() throws IOException { + // Test for issue #15086: partial shredding with ShreddedObject created using put() + // Create a ShreddedObject with multiple fields, then partially shred it + VariantMetadata metadata = Variants.metadata("id", "name", "city"); + + // Create objects using ShreddedObject.put() instead of serialized buffers + List records = Lists.newArrayList(); + for (int i = 0; i < 3; i++) { + org.apache.iceberg.variants.ShreddedObject obj = Variants.object(metadata); + obj.put("id", Variants.of(1000L + i)); + obj.put("name", Variants.of("user_" + i)); + obj.put("city", Variants.of("city_" + i)); + + Variant variant = Variant.of(metadata, obj); + Record record = RECORD.copy("id", i, "var", variant); + records.add(record); + } + + // Shredding function that only shreds the "id" field + VariantShreddingFunction partialShredding = + (id, name) -> + org.apache.parquet.schema.Types.optionalGroup() + .addField( + org.apache.parquet.schema.Types.optionalGroup() + .addField( + org.apache.parquet.schema.Types.optional( + org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName + .BINARY) + .named("value")) + .addField( + org.apache.parquet.schema.Types.optional( + org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64) + .named("typed_value")) + .named("id")) + .named("typed_value"); + + // Write and read back + List actual = writeAndRead(partialShredding, records); + + // Verify all records match + assertThat(actual).hasSameSizeAs(records); + for (int i = 0; i < records.size(); i++) { + Record expected = records.get(i); + Record read = actual.get(i); + + InternalTestHelpers.assertEquals(SCHEMA.asStruct(), expected, read); + + // Also verify the variant object has all fields intact + Variant readVariant = (Variant) read.getField("var"); + org.apache.iceberg.variants.VariantObject readObj = readVariant.value().asObject(); + assertThat(readObj.numFields()).isEqualTo(3); + assertThat(readObj.get("id").asPrimitive().get()).isEqualTo(1000L + i); + assertThat(readObj.get("name").asPrimitive().get()).isEqualTo("user_" + i); + assertThat(readObj.get("city").asPrimitive().get()).isEqualTo("city_" + i); + } + } } From 5218092f12c66964750fcce19dfda7a2004ace80 Mon Sep 17 00:00:00 2001 From: yan zhang Date: Tue, 20 Jan 2026 18:27:33 +0800 Subject: [PATCH 2/6] fix per comment --- .../org/apache/iceberg/parquet/TestVariantWriters.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java index c9827075d253..53b704f8c3af 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java @@ -41,6 +41,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; +import org.apache.iceberg.variants.ShreddedObject; import org.apache.iceberg.variants.ValueArray; import org.apache.iceberg.variants.Variant; import org.apache.iceberg.variants.VariantArray; @@ -52,6 +53,7 @@ import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.PrimitiveType; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.FieldSource; @@ -291,7 +293,7 @@ public void testPartialShreddingWithShreddedObject() throws IOException { // Create objects using ShreddedObject.put() instead of serialized buffers List records = Lists.newArrayList(); for (int i = 0; i < 3; i++) { - org.apache.iceberg.variants.ShreddedObject obj = Variants.object(metadata); + ShreddedObject obj = Variants.object(metadata); obj.put("id", Variants.of(1000L + i)); obj.put("name", Variants.of("user_" + i)); obj.put("city", Variants.of("city_" + i)); @@ -309,12 +311,12 @@ public void testPartialShreddingWithShreddedObject() throws IOException { org.apache.parquet.schema.Types.optionalGroup() .addField( org.apache.parquet.schema.Types.optional( - org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName + PrimitiveType.PrimitiveTypeName .BINARY) .named("value")) .addField( org.apache.parquet.schema.Types.optional( - org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64) + PrimitiveType.PrimitiveTypeName.INT64) .named("typed_value")) .named("id")) .named("typed_value"); @@ -332,7 +334,7 @@ public void testPartialShreddingWithShreddedObject() throws IOException { // Also verify the variant object has all fields intact Variant readVariant = (Variant) read.getField("var"); - org.apache.iceberg.variants.VariantObject readObj = readVariant.value().asObject(); + VariantObject readObj = readVariant.value().asObject(); assertThat(readObj.numFields()).isEqualTo(3); assertThat(readObj.get("id").asPrimitive().get()).isEqualTo(1000L + i); assertThat(readObj.get("name").asPrimitive().get()).isEqualTo("user_" + i); From 04745aa14bdce0397ad392c04b84f2b297610aeb Mon Sep 17 00:00:00 2001 From: yan zhang Date: Tue, 20 Jan 2026 20:12:07 +0800 Subject: [PATCH 3/6] fix java format --- .../java/org/apache/iceberg/parquet/TestVariantWriters.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java index 53b704f8c3af..eba3699230e7 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java @@ -311,8 +311,7 @@ public void testPartialShreddingWithShreddedObject() throws IOException { org.apache.parquet.schema.Types.optionalGroup() .addField( org.apache.parquet.schema.Types.optional( - PrimitiveType.PrimitiveTypeName - .BINARY) + PrimitiveType.PrimitiveTypeName.BINARY) .named("value")) .addField( org.apache.parquet.schema.Types.optional( From 00b22dd15ca24fe61b1fc8997f16c68a5a747fcd Mon Sep 17 00:00:00 2001 From: yan zhang Date: Wed, 21 Jan 2026 09:36:25 +0800 Subject: [PATCH 4/6] fix per comment --- .../iceberg/variants/ShreddedObject.java | 12 ++++---- .../iceberg/variants/TestShreddedObject.java | 28 +++++++++++++++++++ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java b/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java index 31ddc61058a9..5d2bfc48fae0 100644 --- a/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java +++ b/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java @@ -153,12 +153,12 @@ private static class SerializationState { private SerializationState( VariantMetadata metadata, VariantObject unshredded, - Map shreddedFields, + Map shredded, Set removedFields) { this.metadata = metadata; // field ID size is the size needed to store the largest field ID in the data this.fieldIdSize = VariantUtil.sizeOf(metadata.dictionarySize()); - this.shreddedFields = Maps.newHashMap(shreddedFields); + this.shreddedFields = Maps.newHashMap(shredded); int totalDataSize = 0; // get the unshredded field names and values as byte buffers @@ -178,20 +178,20 @@ private SerializationState( } } else if (unshredded != null) { for (String name : unshredded.fieldNames()) { - boolean replaced = this.shreddedFields.containsKey(name) || removedFields.contains(name); + boolean replaced = shreddedFields.containsKey(name) || removedFields.contains(name); if (!replaced) { - this.shreddedFields.put(name, unshredded.get(name)); + shreddedFields.put(name, unshredded.get(name)); } } } this.unshreddedFields = unshreddedBuilder.build(); // duplicates are suppressed when creating unshreddedFields - this.numElements = unshreddedFields.size() + this.shreddedFields.size(); + this.numElements = unshreddedFields.size() + shreddedFields.size(); // object is large if the number of elements can't be stored in 1 byte this.isLarge = numElements > 0xFF; - for (VariantValue value : this.shreddedFields.values()) { + for (VariantValue value : shreddedFields.values()) { totalDataSize += value.sizeInBytes(); } diff --git a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java index 1ebb433a5423..bfb6143e7aad 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java @@ -213,6 +213,28 @@ public void testPartiallyShreddedObjectSerializationMinimalBuffer() { .isEqualTo(DateTimeUtil.isoDateToDays("2024-10-12")); } + @Test + public void testPartiallyShreddedUnserializedObjectSerializationMinimalBuffer() { + ShreddedObject partial = createUnserializedObject(FIELDS); + VariantMetadata metadata = partial.metadata(); + + // replace field c with a new value + partial.put("c", Variants.ofIsoDate("2024-10-12")); + partial.remove("b"); + + VariantValue value = roundTripMinimalBuffer(partial, metadata); + + assertThat(value).isInstanceOf(SerializedObject.class); + SerializedObject actual = (SerializedObject) value; + + assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); + assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("c").type()).isEqualTo(PhysicalType.DATE); + assertThat(actual.get("c").asPrimitive().get()) + .isEqualTo(DateTimeUtil.isoDateToDays("2024-10-12")); + } + @Test public void testPartiallyShreddedObjectSerializationLargeBuffer() { ShreddedObject partial = createUnshreddedObject(FIELDS); @@ -381,6 +403,12 @@ private static ShreddedObject createShreddedObject( return object; } + private static ShreddedObject createUnserializedObject(Map fields) { + ByteBuffer metadataBuffer = VariantTestUtil.createMetadata(fields.keySet(), false); + VariantMetadata metadata = SerializedMetadata.from(metadataBuffer); + return new ShreddedObject(metadata, createShreddedObject(metadata, fields)); + } + /** Creates a ShreddedObject with fields in its shredded map */ private static ShreddedObject createShreddedObject(Map fields) { ByteBuffer metadataBuffer = VariantTestUtil.createMetadata(fields.keySet(), false); From 24bb7ea34720712be74f88221c8ab70228546a67 Mon Sep 17 00:00:00 2001 From: yan zhang Date: Wed, 21 Jan 2026 10:01:18 +0800 Subject: [PATCH 5/6] use ParquetVairnatUtil to construct schema --- .../iceberg/parquet/TestVariantWriters.java | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java index eba3699230e7..13521ec02982 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java @@ -53,7 +53,6 @@ import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.PrimitiveType; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.FieldSource; @@ -305,20 +304,12 @@ public void testPartialShreddingWithShreddedObject() throws IOException { // Shredding function that only shreds the "id" field VariantShreddingFunction partialShredding = - (id, name) -> - org.apache.parquet.schema.Types.optionalGroup() - .addField( - org.apache.parquet.schema.Types.optionalGroup() - .addField( - org.apache.parquet.schema.Types.optional( - PrimitiveType.PrimitiveTypeName.BINARY) - .named("value")) - .addField( - org.apache.parquet.schema.Types.optional( - PrimitiveType.PrimitiveTypeName.INT64) - .named("typed_value")) - .named("id")) - .named("typed_value"); + (id, name) -> { + VariantMetadata shreddedMetadata = Variants.metadata("id"); + ShreddedObject shreddedObject = Variants.object(shreddedMetadata); + shreddedObject.put("id", Variants.of(1234L)); + return ParquetVariantUtil.toParquetSchema(shreddedObject); + }; // Write and read back List actual = writeAndRead(partialShredding, records); From f34042ace3428c9e7ae6ab049dc7cd6e0c647f73 Mon Sep 17 00:00:00 2001 From: yan zhang Date: Wed, 21 Jan 2026 11:39:29 +0800 Subject: [PATCH 6/6] fix java format --- .../java/org/apache/iceberg/variants/TestShreddedObject.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java index bfb6143e7aad..97ca6c2fa216 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java @@ -232,7 +232,7 @@ public void testPartiallyShreddedUnserializedObjectSerializationMinimalBuffer() assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); assertThat(actual.get("c").type()).isEqualTo(PhysicalType.DATE); assertThat(actual.get("c").asPrimitive().get()) - .isEqualTo(DateTimeUtil.isoDateToDays("2024-10-12")); + .isEqualTo(DateTimeUtil.isoDateToDays("2024-10-12")); } @Test