diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index ae1b77895de..5c098bd2579 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -16,7 +16,7 @@ // under the License. use crate::array::{get_offsets_from_buffer, make_array, print_long_array}; -use crate::builder::{GenericListBuilder, PrimitiveBuilder}; +use crate::builder::{ArrayBuilder, GenericListBuilder, PrimitiveBuilder}; use crate::{ Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray, iterator::GenericListArrayIter, new_empty_array, @@ -418,18 +418,43 @@ impl GenericListArray { T: ArrowPrimitiveType, P: IntoIterator::Native>>, I: IntoIterator>, + { + Self::from_nested_iter::, T::Native, P, I>(iter) + } + + /// Creates a [`GenericListArray`] from a nested iterator of values. + /// This method works for any values type that has a corresponding builder that implements the + /// `Extend` trait. That includes all numeric types, booleans, binary and string types and also + /// dictionary encoded binary and strings. + /// + /// # Example + /// ``` + /// # use arrow_array::ListArray; + /// # use arrow_array::types::Int32Type; + /// # use arrow_array::builder::StringDictionaryBuilder; + /// let data = vec![ + /// Some(vec![Some("foo"), Some("bar"), Some("baz")]), + /// None, + /// Some(vec![Some("bar"), None, Some("foo")]), + /// Some(vec![]), + /// ]; + /// let list_array = ListArray::from_nested_iter::, _, _, _>(data); + /// println!("{:?}", list_array); + /// ``` + pub fn from_nested_iter(iter: I) -> Self + where + B: ArrayBuilder + Default + Extend>, + P: IntoIterator>, + I: IntoIterator>, { let iter = iter.into_iter(); let size_hint = iter.size_hint().0; - let mut builder = - GenericListBuilder::with_capacity(PrimitiveBuilder::::new(), size_hint); + let mut builder = GenericListBuilder::with_capacity(B::default(), size_hint); for i in iter { match i { Some(p) => { - for t in p { - builder.values().append_option(t); - } + builder.values().extend(p); builder.append(true); } None => builder.append(false), @@ -635,10 +660,15 @@ pub type LargeListArray = GenericListArray; #[cfg(test)] mod tests { use super::*; - use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder, UnionBuilder}; + use crate::builder::{ + BooleanBuilder, FixedSizeListBuilder, Int32Builder, ListBuilder, StringBuilder, + StringDictionaryBuilder, UnionBuilder, + }; use crate::cast::AsArray; - use crate::types::Int32Type; - use crate::{Int32Array, Int64Array}; + use crate::types::{Int8Type, Int32Type}; + use crate::{ + BooleanArray, Int8Array, Int8DictionaryArray, Int32Array, Int64Array, StringArray, + }; use arrow_buffer::{Buffer, ScalarBuffer, bit_util}; use arrow_schema::Field; @@ -1295,4 +1325,60 @@ mod tests { let array = ListArray::new_null(field, 5); assert_eq!(array.len(), 5); } + + #[test] + fn test_list_from_iter_i32() { + let array = ListArray::from_nested_iter::(vec![ + None, + Some(vec![Some(1), None, Some(2)]), + ]); + let expected_offsets = &[0, 0, 3]; + let expected_values: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), None, Some(2)])); + assert_eq!(array.value_offsets(), expected_offsets); + assert_eq!(array.values(), &expected_values); + } + + #[test] + fn test_list_from_iter_bool() { + let array = ListArray::from_nested_iter::(vec![ + Some(vec![None, Some(false), Some(true)]), + None, + ]); + let expected_offsets = &[0, 3, 3]; + let expected_values: ArrayRef = + Arc::new(BooleanArray::from(vec![None, Some(false), Some(true)])); + assert_eq!(array.value_offsets(), expected_offsets); + assert_eq!(array.values(), &expected_values); + } + + #[test] + fn test_list_from_iter_str() { + let array = ListArray::from_nested_iter::(vec![ + Some(vec![Some("foo"), None, Some("bar")]), + None, + ]); + let expected_offsets = &[0, 3, 3]; + let expected_values: ArrayRef = + Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")])); + assert_eq!(array.value_offsets(), expected_offsets); + assert_eq!(array.values(), &expected_values); + } + + #[test] + fn test_list_from_iter_dict_str() { + let array = + ListArray::from_nested_iter::, _, _, _>(vec![ + Some(vec![Some("foo"), None, Some("bar"), Some("foo")]), + None, + ]); + let expected_offsets = &[0, 4, 4]; + let expected_dict_values: ArrayRef = + Arc::new(StringArray::from(vec![Some("foo"), Some("bar")])); + let expected_dict_keys = Int8Array::from(vec![Some(0), None, Some(1), Some(0)]); + let expected_values: ArrayRef = Arc::new( + Int8DictionaryArray::try_new(expected_dict_keys, expected_dict_values).unwrap(), + ); + assert_eq!(array.value_offsets(), expected_offsets); + assert_eq!(array.values(), &expected_values); + } }