From 7dca7489f1f21a7428c8916c05f2a5166bacb0c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn=20Horstmann?= Date: Mon, 26 Jan 2026 19:03:12 +0100 Subject: [PATCH 1/4] Implement a more generic from_nested_iter method for list arrays --- arrow-array/src/array/list_array.rs | 100 +++++++++++++++++++++++++--- 1 file changed, 91 insertions(+), 9 deletions(-) diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index ae1b77895de..4f4b214d45d 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -16,7 +16,7 @@ // under the License. use crate::array::{get_offsets_from_buffer, make_array, print_long_array}; -use crate::builder::{GenericListBuilder, PrimitiveBuilder}; +use crate::builder::{ArrayBuilder, GenericListBuilder, PrimitiveBuilder}; use crate::{ Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray, iterator::GenericListArrayIter, new_empty_array, @@ -418,18 +418,39 @@ impl GenericListArray { T: ArrowPrimitiveType, P: IntoIterator::Native>>, I: IntoIterator>, + { + Self::from_nested_iter::, P, I>(iter) + } + + /// Creates a [`GenericListArray`] from an iterator of primitive values + /// # Example + /// ``` + /// # use arrow_array::ListArray; + /// # use arrow_array::builder::Int32Builder; + /// + /// let data = vec![ + /// Some(vec![Some(0), Some(1), Some(2)]), + /// None, + /// Some(vec![Some(3), None, Some(5)]), + /// Some(vec![Some(6), Some(7)]), + /// ]; + /// let list_array = ListArray::from_nested_iter::(data); + /// println!("{:?}", list_array); + /// ``` + pub fn from_nested_iter(iter: I) -> Self + where + B: ArrayBuilder + Default + Extend>, + P: IntoIterator>, + I: IntoIterator>, { let iter = iter.into_iter(); let size_hint = iter.size_hint().0; - let mut builder = - GenericListBuilder::with_capacity(PrimitiveBuilder::::new(), size_hint); + let mut builder = GenericListBuilder::with_capacity(B::default(), size_hint); for i in iter { match i { Some(p) => { - for t in p { - builder.values().append_option(t); - } + builder.values().extend(p); builder.append(true); } None => builder.append(false), @@ -635,10 +656,15 @@ pub type LargeListArray = GenericListArray; #[cfg(test)] mod tests { use super::*; - use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder, UnionBuilder}; + use crate::builder::{ + BooleanBuilder, FixedSizeListBuilder, Int32Builder, ListBuilder, StringBuilder, + StringDictionaryBuilder, UnionBuilder, + }; use crate::cast::AsArray; - use crate::types::Int32Type; - use crate::{Int32Array, Int64Array}; + use crate::types::{Int8Type, Int32Type}; + use crate::{ + BooleanArray, Int8Array, Int8DictionaryArray, Int32Array, Int64Array, StringArray, + }; use arrow_buffer::{Buffer, ScalarBuffer, bit_util}; use arrow_schema::Field; @@ -1295,4 +1321,60 @@ mod tests { let array = ListArray::new_null(field, 5); assert_eq!(array.len(), 5); } + + #[test] + fn test_list_from_iter_i32() { + let array = ListArray::from_nested_iter::(vec![ + None, + Some(vec![Some(1), None, Some(2)]), + ]); + let expected_offsets = &[0, 0, 3]; + let expected_values: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), None, Some(2)])); + assert_eq!(array.value_offsets(), expected_offsets); + assert_eq!(array.values(), &expected_values); + } + + #[test] + fn test_list_from_iter_bool() { + let array = ListArray::from_nested_iter::(vec![ + Some(vec![None, Some(false), Some(true)]), + None, + ]); + let expected_offsets = &[0, 3, 3]; + let expected_values: ArrayRef = + Arc::new(BooleanArray::from(vec![None, Some(false), Some(true)])); + assert_eq!(array.value_offsets(), expected_offsets); + assert_eq!(array.values(), &expected_values); + } + + #[test] + fn test_list_from_iter_str() { + let array = ListArray::from_nested_iter::<&str, StringBuilder, _, _>(vec![ + Some(vec![Some("foo"), None, Some("bar")]), + None, + ]); + let expected_offsets = &[0, 3, 3]; + let expected_values: ArrayRef = + Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")])); + assert_eq!(array.value_offsets(), expected_offsets); + assert_eq!(array.values(), &expected_values); + } + + #[test] + fn test_list_from_iter_dict_str() { + let array = + ListArray::from_nested_iter::<&str, StringDictionaryBuilder, _, _>(vec![ + Some(vec![Some("foo"), None, Some("bar"), Some("foo")]), + None, + ]); + let expected_offsets = &[0, 4, 4]; + let expected_dict_values: ArrayRef = + Arc::new(StringArray::from(vec![Some("foo"), Some("bar")])); + let expected_dict_keys = Int8Array::from(vec![Some(0), None, Some(1), Some(0)]); + let expected_values: ArrayRef = Arc::new( + Int8DictionaryArray::try_new(expected_dict_keys, expected_dict_values).unwrap(), + ); + assert_eq!(array.value_offsets(), expected_offsets); + assert_eq!(array.values(), &expected_values); + } } From 8e7604d528c382881edbcb926f260b4517c69450 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn=20Horstmann?= Date: Tue, 27 Jan 2026 10:43:38 +0100 Subject: [PATCH 2/4] Reorder generic parameters and extend documentation --- arrow-array/src/array/list_array.rs | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index 4f4b214d45d..77d5c5ce780 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -419,25 +419,30 @@ impl GenericListArray { P: IntoIterator::Native>>, I: IntoIterator>, { - Self::from_nested_iter::, P, I>(iter) + Self::from_nested_iter::, T::Native, P, I>(iter) } - /// Creates a [`GenericListArray`] from an iterator of primitive values + /// Creates a [`GenericListArray`] from a nested iterator of primitive values. + /// This method works for any values type that has a corresponding builder that implements the + /// `Extend` trait. That includes all numeric types, booleans, binary and string types and also + /// dictionary encoded binary and strings. + /// /// # Example /// ``` /// # use arrow_array::ListArray; - /// # use arrow_array::builder::Int32Builder; + /// # use arrow_array::types::Int32Type; + /// # use arrow_array::builder::StringDictionaryBuilder; /// /// let data = vec![ - /// Some(vec![Some(0), Some(1), Some(2)]), + /// Some(vec![Some("foo"), Some("bar"), Some("baz")]), /// None, - /// Some(vec![Some(3), None, Some(5)]), - /// Some(vec![Some(6), Some(7)]), + /// Some(vec![Some("bar"), None, Some("foo")]), + /// Some(vec![]), /// ]; - /// let list_array = ListArray::from_nested_iter::(data); + /// let list_array = ListArray::from_nested_iter::, _, _, _>(data); /// println!("{:?}", list_array); /// ``` - pub fn from_nested_iter(iter: I) -> Self + pub fn from_nested_iter(iter: I) -> Self where B: ArrayBuilder + Default + Extend>, P: IntoIterator>, @@ -1324,7 +1329,7 @@ mod tests { #[test] fn test_list_from_iter_i32() { - let array = ListArray::from_nested_iter::(vec![ + let array = ListArray::from_nested_iter::(vec![ None, Some(vec![Some(1), None, Some(2)]), ]); @@ -1336,7 +1341,7 @@ mod tests { #[test] fn test_list_from_iter_bool() { - let array = ListArray::from_nested_iter::(vec![ + let array = ListArray::from_nested_iter::(vec![ Some(vec![None, Some(false), Some(true)]), None, ]); @@ -1349,7 +1354,7 @@ mod tests { #[test] fn test_list_from_iter_str() { - let array = ListArray::from_nested_iter::<&str, StringBuilder, _, _>(vec![ + let array = ListArray::from_nested_iter::(vec![ Some(vec![Some("foo"), None, Some("bar")]), None, ]); @@ -1363,7 +1368,7 @@ mod tests { #[test] fn test_list_from_iter_dict_str() { let array = - ListArray::from_nested_iter::<&str, StringDictionaryBuilder, _, _>(vec![ + ListArray::from_nested_iter::, _, _, _>(vec![ Some(vec![Some("foo"), None, Some("bar"), Some("foo")]), None, ]); From a2bbbbcb11a9912382aa87cfa5647e1b60f1768b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn=20Horstmann?= Date: Tue, 27 Jan 2026 14:52:31 +0100 Subject: [PATCH 3/4] Update comment Co-authored-by: Jeffrey Vo --- arrow-array/src/array/list_array.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index 77d5c5ce780..a3ce891825d 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -422,7 +422,7 @@ impl GenericListArray { Self::from_nested_iter::, T::Native, P, I>(iter) } - /// Creates a [`GenericListArray`] from a nested iterator of primitive values. + /// Creates a [`GenericListArray`] from a nested iterator of values. /// This method works for any values type that has a corresponding builder that implements the /// `Extend` trait. That includes all numeric types, booleans, binary and string types and also /// dictionary encoded binary and strings. From 8982ee71815ffa2c928dfff1e33db845107ee66d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn=20Horstmann?= Date: Tue, 27 Jan 2026 14:52:43 +0100 Subject: [PATCH 4/4] Update comment Co-authored-by: Jeffrey Vo --- arrow-array/src/array/list_array.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index a3ce891825d..5c098bd2579 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -432,7 +432,6 @@ impl GenericListArray { /// # use arrow_array::ListArray; /// # use arrow_array::types::Int32Type; /// # use arrow_array::builder::StringDictionaryBuilder; - /// /// let data = vec![ /// Some(vec![Some("foo"), Some("bar"), Some("baz")]), /// None,