Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 95 additions & 9 deletions arrow-array/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use crate::array::{get_offsets_from_buffer, make_array, print_long_array};
use crate::builder::{GenericListBuilder, PrimitiveBuilder};
use crate::builder::{ArrayBuilder, GenericListBuilder, PrimitiveBuilder};
use crate::{
Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray,
iterator::GenericListArrayIter, new_empty_array,
Expand Down Expand Up @@ -418,18 +418,43 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
T: ArrowPrimitiveType,
P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
I: IntoIterator<Item = Option<P>>,
{
Self::from_nested_iter::<PrimitiveBuilder<T>, T::Native, P, I>(iter)
}

/// Creates a [`GenericListArray`] from a nested iterator of values.
/// This method works for any values type that has a corresponding builder that implements the
/// `Extend` trait. That includes all numeric types, booleans, binary and string types and also
/// dictionary encoded binary and strings.
///
/// # Example
/// ```
/// # use arrow_array::ListArray;
/// # use arrow_array::types::Int32Type;
/// # use arrow_array::builder::StringDictionaryBuilder;
/// let data = vec![
/// Some(vec![Some("foo"), Some("bar"), Some("baz")]),
/// None,
/// Some(vec![Some("bar"), None, Some("foo")]),
/// Some(vec![]),
/// ];
/// let list_array = ListArray::from_nested_iter::<StringDictionaryBuilder<Int32Type>, _, _, _>(data);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Amazing

/// println!("{:?}", list_array);
/// ```
pub fn from_nested_iter<B, T, P, I>(iter: I) -> Self
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe should we call it from_iter_primitive_nested to mirror from_iter_primitive ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can't have primitive in the name since this new API accepts strings too 🤔

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Implementing the FromIter trait would be another option, but that would be less explicit and I think makes the generic parameters more difficult.

where
B: ArrayBuilder + Default + Extend<Option<T>>,
P: IntoIterator<Item = Option<T>>,
I: IntoIterator<Item = Option<P>>,
{
let iter = iter.into_iter();
let size_hint = iter.size_hint().0;
let mut builder =
GenericListBuilder::with_capacity(PrimitiveBuilder::<T>::new(), size_hint);
let mut builder = GenericListBuilder::with_capacity(B::default(), size_hint);

for i in iter {
match i {
Some(p) => {
for t in p {
builder.values().append_option(t);
}
builder.values().extend(p);
builder.append(true);
}
None => builder.append(false),
Expand Down Expand Up @@ -635,10 +660,15 @@ pub type LargeListArray = GenericListArray<i64>;
#[cfg(test)]
mod tests {
use super::*;
use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder, UnionBuilder};
use crate::builder::{
BooleanBuilder, FixedSizeListBuilder, Int32Builder, ListBuilder, StringBuilder,
StringDictionaryBuilder, UnionBuilder,
};
use crate::cast::AsArray;
use crate::types::Int32Type;
use crate::{Int32Array, Int64Array};
use crate::types::{Int8Type, Int32Type};
use crate::{
BooleanArray, Int8Array, Int8DictionaryArray, Int32Array, Int64Array, StringArray,
};
use arrow_buffer::{Buffer, ScalarBuffer, bit_util};
use arrow_schema::Field;

Expand Down Expand Up @@ -1295,4 +1325,60 @@ mod tests {
let array = ListArray::new_null(field, 5);
assert_eq!(array.len(), 5);
}

#[test]
fn test_list_from_iter_i32() {
let array = ListArray::from_nested_iter::<Int32Builder, _, _, _>(vec![
None,
Some(vec![Some(1), None, Some(2)]),
]);
let expected_offsets = &[0, 0, 3];
let expected_values: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), None, Some(2)]));
assert_eq!(array.value_offsets(), expected_offsets);
assert_eq!(array.values(), &expected_values);
}

#[test]
fn test_list_from_iter_bool() {
let array = ListArray::from_nested_iter::<BooleanBuilder, _, _, _>(vec![
Some(vec![None, Some(false), Some(true)]),
None,
]);
let expected_offsets = &[0, 3, 3];
let expected_values: ArrayRef =
Arc::new(BooleanArray::from(vec![None, Some(false), Some(true)]));
assert_eq!(array.value_offsets(), expected_offsets);
assert_eq!(array.values(), &expected_values);
}

#[test]
fn test_list_from_iter_str() {
let array = ListArray::from_nested_iter::<StringBuilder, _, _, _>(vec![
Some(vec![Some("foo"), None, Some("bar")]),
None,
]);
let expected_offsets = &[0, 3, 3];
let expected_values: ArrayRef =
Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")]));
assert_eq!(array.value_offsets(), expected_offsets);
assert_eq!(array.values(), &expected_values);
}

#[test]
fn test_list_from_iter_dict_str() {
let array =
ListArray::from_nested_iter::<StringDictionaryBuilder<Int8Type>, _, _, _>(vec![
Some(vec![Some("foo"), None, Some("bar"), Some("foo")]),
None,
]);
let expected_offsets = &[0, 4, 4];
let expected_dict_values: ArrayRef =
Arc::new(StringArray::from(vec![Some("foo"), Some("bar")]));
let expected_dict_keys = Int8Array::from(vec![Some(0), None, Some(1), Some(0)]);
let expected_values: ArrayRef = Arc::new(
Int8DictionaryArray::try_new(expected_dict_keys, expected_dict_values).unwrap(),
);
assert_eq!(array.value_offsets(), expected_offsets);
assert_eq!(array.values(), &expected_values);
}
}
Loading