diff --git a/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java b/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java index c1fba073cfdf..5d2bfc48fae0 100644 --- a/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java +++ b/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java @@ -153,12 +153,12 @@ private static class SerializationState { private SerializationState( VariantMetadata metadata, VariantObject unshredded, - Map shreddedFields, + Map shredded, Set removedFields) { this.metadata = metadata; // field ID size is the size needed to store the largest field ID in the data this.fieldIdSize = VariantUtil.sizeOf(metadata.dictionarySize()); - this.shreddedFields = Maps.newHashMap(shreddedFields); + this.shreddedFields = Maps.newHashMap(shredded); int totalDataSize = 0; // get the unshredded field names and values as byte buffers diff --git a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java index 1ebb433a5423..97ca6c2fa216 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java @@ -213,6 +213,28 @@ public void testPartiallyShreddedObjectSerializationMinimalBuffer() { .isEqualTo(DateTimeUtil.isoDateToDays("2024-10-12")); } + @Test + public void testPartiallyShreddedUnserializedObjectSerializationMinimalBuffer() { + ShreddedObject partial = createUnserializedObject(FIELDS); + VariantMetadata metadata = partial.metadata(); + + // replace field c with a new value + partial.put("c", Variants.ofIsoDate("2024-10-12")); + partial.remove("b"); + + VariantValue value = roundTripMinimalBuffer(partial, metadata); + + assertThat(value).isInstanceOf(SerializedObject.class); + SerializedObject actual = (SerializedObject) value; + + assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); + assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("c").type()).isEqualTo(PhysicalType.DATE); + assertThat(actual.get("c").asPrimitive().get()) + .isEqualTo(DateTimeUtil.isoDateToDays("2024-10-12")); + } + @Test public void testPartiallyShreddedObjectSerializationLargeBuffer() { ShreddedObject partial = createUnshreddedObject(FIELDS); @@ -381,6 +403,12 @@ private static ShreddedObject createShreddedObject( return object; } + private static ShreddedObject createUnserializedObject(Map fields) { + ByteBuffer metadataBuffer = VariantTestUtil.createMetadata(fields.keySet(), false); + VariantMetadata metadata = SerializedMetadata.from(metadataBuffer); + return new ShreddedObject(metadata, createShreddedObject(metadata, fields)); + } + /** Creates a ShreddedObject with fields in its shredded map */ private static ShreddedObject createShreddedObject(Map fields) { ByteBuffer metadataBuffer = VariantTestUtil.createMetadata(fields.keySet(), false); diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java index 2c83406f9fee..13521ec02982 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java @@ -41,6 +41,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; +import org.apache.iceberg.variants.ShreddedObject; import org.apache.iceberg.variants.ValueArray; import org.apache.iceberg.variants.Variant; import org.apache.iceberg.variants.VariantArray; @@ -52,6 +53,7 @@ import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; +import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.FieldSource; @@ -280,4 +282,53 @@ private static ValueArray array(VariantValue... values) { return arr; } + + @Test + public void testPartialShreddingWithShreddedObject() throws IOException { + // Test for issue #15086: partial shredding with ShreddedObject created using put() + // Create a ShreddedObject with multiple fields, then partially shred it + VariantMetadata metadata = Variants.metadata("id", "name", "city"); + + // Create objects using ShreddedObject.put() instead of serialized buffers + List records = Lists.newArrayList(); + for (int i = 0; i < 3; i++) { + ShreddedObject obj = Variants.object(metadata); + obj.put("id", Variants.of(1000L + i)); + obj.put("name", Variants.of("user_" + i)); + obj.put("city", Variants.of("city_" + i)); + + Variant variant = Variant.of(metadata, obj); + Record record = RECORD.copy("id", i, "var", variant); + records.add(record); + } + + // Shredding function that only shreds the "id" field + VariantShreddingFunction partialShredding = + (id, name) -> { + VariantMetadata shreddedMetadata = Variants.metadata("id"); + ShreddedObject shreddedObject = Variants.object(shreddedMetadata); + shreddedObject.put("id", Variants.of(1234L)); + return ParquetVariantUtil.toParquetSchema(shreddedObject); + }; + + // Write and read back + List actual = writeAndRead(partialShredding, records); + + // Verify all records match + assertThat(actual).hasSameSizeAs(records); + for (int i = 0; i < records.size(); i++) { + Record expected = records.get(i); + Record read = actual.get(i); + + InternalTestHelpers.assertEquals(SCHEMA.asStruct(), expected, read); + + // Also verify the variant object has all fields intact + Variant readVariant = (Variant) read.getField("var"); + VariantObject readObj = readVariant.value().asObject(); + assertThat(readObj.numFields()).isEqualTo(3); + assertThat(readObj.get("id").asPrimitive().get()).isEqualTo(1000L + i); + assertThat(readObj.get("name").asPrimitive().get()).isEqualTo("user_" + i); + assertThat(readObj.get("city").asPrimitive().get()).isEqualTo("city_" + i); + } + } }