-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Implement a more generic from_nested_iter method for list arrays #9268
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,7 +16,7 @@ | |
| // under the License. | ||
|
|
||
| use crate::array::{get_offsets_from_buffer, make_array, print_long_array}; | ||
| use crate::builder::{GenericListBuilder, PrimitiveBuilder}; | ||
| use crate::builder::{ArrayBuilder, GenericListBuilder, PrimitiveBuilder}; | ||
| use crate::{ | ||
| Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray, | ||
| iterator::GenericListArrayIter, new_empty_array, | ||
|
|
@@ -418,18 +418,43 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> { | |
| T: ArrowPrimitiveType, | ||
| P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>, | ||
| I: IntoIterator<Item = Option<P>>, | ||
| { | ||
| Self::from_nested_iter::<PrimitiveBuilder<T>, T::Native, P, I>(iter) | ||
| } | ||
|
|
||
| /// Creates a [`GenericListArray`] from a nested iterator of values. | ||
| /// This method works for any values type that has a corresponding builder that implements the | ||
| /// `Extend` trait. That includes all numeric types, booleans, binary and string types and also | ||
| /// dictionary encoded binary and strings. | ||
| /// | ||
| /// # Example | ||
| /// ``` | ||
| /// # use arrow_array::ListArray; | ||
| /// # use arrow_array::types::Int32Type; | ||
| /// # use arrow_array::builder::StringDictionaryBuilder; | ||
| /// let data = vec![ | ||
| /// Some(vec![Some("foo"), Some("bar"), Some("baz")]), | ||
| /// None, | ||
| /// Some(vec![Some("bar"), None, Some("foo")]), | ||
| /// Some(vec![]), | ||
| /// ]; | ||
| /// let list_array = ListArray::from_nested_iter::<StringDictionaryBuilder<Int32Type>, _, _, _>(data); | ||
| /// println!("{:?}", list_array); | ||
| /// ``` | ||
| pub fn from_nested_iter<B, T, P, I>(iter: I) -> Self | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe should we call it
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can't have
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Implementing the |
||
| where | ||
| B: ArrayBuilder + Default + Extend<Option<T>>, | ||
| P: IntoIterator<Item = Option<T>>, | ||
| I: IntoIterator<Item = Option<P>>, | ||
| { | ||
| let iter = iter.into_iter(); | ||
| let size_hint = iter.size_hint().0; | ||
| let mut builder = | ||
| GenericListBuilder::with_capacity(PrimitiveBuilder::<T>::new(), size_hint); | ||
| let mut builder = GenericListBuilder::with_capacity(B::default(), size_hint); | ||
|
|
||
| for i in iter { | ||
| match i { | ||
| Some(p) => { | ||
| for t in p { | ||
| builder.values().append_option(t); | ||
| } | ||
| builder.values().extend(p); | ||
| builder.append(true); | ||
| } | ||
| None => builder.append(false), | ||
|
|
@@ -635,10 +660,15 @@ pub type LargeListArray = GenericListArray<i64>; | |
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
| use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder, UnionBuilder}; | ||
| use crate::builder::{ | ||
| BooleanBuilder, FixedSizeListBuilder, Int32Builder, ListBuilder, StringBuilder, | ||
| StringDictionaryBuilder, UnionBuilder, | ||
| }; | ||
| use crate::cast::AsArray; | ||
| use crate::types::Int32Type; | ||
| use crate::{Int32Array, Int64Array}; | ||
| use crate::types::{Int8Type, Int32Type}; | ||
| use crate::{ | ||
| BooleanArray, Int8Array, Int8DictionaryArray, Int32Array, Int64Array, StringArray, | ||
| }; | ||
| use arrow_buffer::{Buffer, ScalarBuffer, bit_util}; | ||
| use arrow_schema::Field; | ||
|
|
||
|
|
@@ -1295,4 +1325,60 @@ mod tests { | |
| let array = ListArray::new_null(field, 5); | ||
| assert_eq!(array.len(), 5); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_list_from_iter_i32() { | ||
| let array = ListArray::from_nested_iter::<Int32Builder, _, _, _>(vec![ | ||
| None, | ||
| Some(vec![Some(1), None, Some(2)]), | ||
| ]); | ||
| let expected_offsets = &[0, 0, 3]; | ||
| let expected_values: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), None, Some(2)])); | ||
| assert_eq!(array.value_offsets(), expected_offsets); | ||
| assert_eq!(array.values(), &expected_values); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_list_from_iter_bool() { | ||
| let array = ListArray::from_nested_iter::<BooleanBuilder, _, _, _>(vec![ | ||
| Some(vec![None, Some(false), Some(true)]), | ||
| None, | ||
| ]); | ||
| let expected_offsets = &[0, 3, 3]; | ||
| let expected_values: ArrayRef = | ||
| Arc::new(BooleanArray::from(vec![None, Some(false), Some(true)])); | ||
| assert_eq!(array.value_offsets(), expected_offsets); | ||
| assert_eq!(array.values(), &expected_values); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_list_from_iter_str() { | ||
| let array = ListArray::from_nested_iter::<StringBuilder, _, _, _>(vec![ | ||
| Some(vec![Some("foo"), None, Some("bar")]), | ||
| None, | ||
| ]); | ||
| let expected_offsets = &[0, 3, 3]; | ||
| let expected_values: ArrayRef = | ||
| Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")])); | ||
| assert_eq!(array.value_offsets(), expected_offsets); | ||
| assert_eq!(array.values(), &expected_values); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_list_from_iter_dict_str() { | ||
| let array = | ||
| ListArray::from_nested_iter::<StringDictionaryBuilder<Int8Type>, _, _, _>(vec![ | ||
| Some(vec![Some("foo"), None, Some("bar"), Some("foo")]), | ||
| None, | ||
| ]); | ||
| let expected_offsets = &[0, 4, 4]; | ||
| let expected_dict_values: ArrayRef = | ||
| Arc::new(StringArray::from(vec![Some("foo"), Some("bar")])); | ||
| let expected_dict_keys = Int8Array::from(vec![Some(0), None, Some(1), Some(0)]); | ||
| let expected_values: ArrayRef = Arc::new( | ||
| Int8DictionaryArray::try_new(expected_dict_keys, expected_dict_values).unwrap(), | ||
| ); | ||
| assert_eq!(array.value_offsets(), expected_offsets); | ||
| assert_eq!(array.values(), &expected_values); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Amazing