diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 064091971cf88..4d8caf837dd46 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -43,7 +43,7 @@ use crate::cast::{ as_float16_array, as_float32_array, as_float64_array, as_int8_array, as_int16_array, as_int32_array, as_int64_array, as_interval_dt_array, as_interval_mdn_array, as_interval_ym_array, as_large_binary_array, as_large_list_array, - as_large_string_array, as_string_array, as_string_view_array, + as_large_string_array, as_list_view_array, as_string_array, as_string_view_array, as_time32_millisecond_array, as_time32_second_array, as_time64_microsecond_array, as_time64_nanosecond_array, as_timestamp_microsecond_array, as_timestamp_millisecond_array, as_timestamp_nanosecond_array, @@ -62,14 +62,15 @@ use arrow::array::{ DictionaryArray, DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray, Float16Array, Float32Array, Float64Array, GenericListArray, - Int8Array, Int16Array, Int32Array, Int64Array, IntervalDayTimeArray, - IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray, LargeListArray, - LargeStringArray, ListArray, MapArray, MutableArrayData, OffsetSizeTrait, - PrimitiveArray, Scalar, StringArray, StringViewArray, StringViewBuilder, StructArray, - Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, - Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, - TimestampNanosecondArray, TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array, - UInt64Array, UnionArray, new_empty_array, new_null_array, + GenericListViewArray, Int8Array, Int16Array, Int32Array, Int64Array, + IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray, + LargeBinaryArray, LargeListArray, LargeListViewArray, LargeStringArray, ListArray, + ListViewArray, MapArray, MutableArrayData, OffsetSizeTrait, PrimitiveArray, Scalar, + StringArray, StringViewArray, StringViewBuilder, StructArray, Time32MillisecondArray, + Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, + TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, + TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array, UnionArray, + new_empty_array, new_null_array, }; use arrow::buffer::{BooleanBuffer, ScalarBuffer}; use arrow::compute::kernels::cast::{CastOptions, cast_with_options}; @@ -255,8 +256,8 @@ pub(crate) fn format_timestamp_type_for_error(target_type: &DataType) -> String /// /// # Nested Types /// -/// `List` / `LargeList` / `FixedSizeList` / `Struct` / `Map` are represented as a -/// single element array of the corresponding type. +/// `List` / `LargeList` / `FixedSizeList` / `ListView` / `LargeListView` / `Struct` / `Map` +/// are represented as a single element array of the corresponding type. /// /// ## Example: Creating [`ScalarValue::Struct`] using [`ScalarStructBuilder`] /// ``` @@ -379,6 +380,14 @@ pub enum ScalarValue { List(Arc), /// The array must be a LargeListArray with length 1. LargeList(Arc), + /// Represents a single element of a [`ListViewArray`] as an [`ArrayRef`] + /// + /// The array must be a ListViewArray with length 1. + ListView(Arc), + /// Represents a single element of a [`LargeListViewArray`] as an [`ArrayRef`] + /// + /// The array must be a LargeListViewArray with length 1. + LargeListView(Arc), /// Represents a single element [`StructArray`] as an [`ArrayRef`]. See /// [`ScalarValue`] for examples of how to create instances of this type. Struct(Arc), @@ -513,6 +522,10 @@ impl PartialEq for ScalarValue { (List(_), _) => false, (LargeList(v1), LargeList(v2)) => v1.eq(v2), (LargeList(_), _) => false, + (ListView(v1), ListView(v2)) => v1.eq(v2), + (ListView(_), _) => false, + (LargeListView(v1), LargeListView(v2)) => v1.eq(v2), + (LargeListView(_), _) => false, (Struct(v1), Struct(v2)) => v1.eq(v2), (Struct(_), _) => false, (Map(v1), Map(v2)) => v1.eq(v2), @@ -654,7 +667,8 @@ impl PartialOrd for ScalarValue { (FixedSizeBinary(_, _), _) => None, (LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2), (LargeBinary(_), _) => None, - // ScalarValue::List / ScalarValue::FixedSizeList / ScalarValue::LargeList are ensure to have length 1 + // ScalarValue::List / ScalarValue::FixedSizeList / ScalarValue::LargeList / ScalarValue::ListView / ScalarValue::LargeListView + // are ensure to have length 1 (List(arr1), List(arr2)) => partial_cmp_list(arr1.as_ref(), arr2.as_ref()), (FixedSizeList(arr1), FixedSizeList(arr2)) => { partial_cmp_list(arr1.as_ref(), arr2.as_ref()) @@ -662,7 +676,17 @@ impl PartialOrd for ScalarValue { (LargeList(arr1), LargeList(arr2)) => { partial_cmp_list(arr1.as_ref(), arr2.as_ref()) } - (List(_), _) | (LargeList(_), _) | (FixedSizeList(_), _) => None, + (ListView(arr1), ListView(arr2)) => { + partial_cmp_list(arr1.as_ref(), arr2.as_ref()) + } + (LargeListView(arr1), LargeListView(arr2)) => { + partial_cmp_list(arr1.as_ref(), arr2.as_ref()) + } + (List(_), _) + | (LargeList(_), _) + | (FixedSizeList(_), _) + | (ListView(_), _) + | (LargeListView(_), _) => None, (Struct(struct_arr1), Struct(struct_arr2)) => { partial_cmp_struct(struct_arr1.as_ref(), struct_arr2.as_ref()) } @@ -728,7 +752,7 @@ impl PartialOrd for ScalarValue { } } -/// List/LargeList/FixedSizeList scalars always have a single element +/// List/LargeList/FixedSizeList/ListView/LargeListView scalars always have a single element /// array. This function returns that array fn first_array_for_list(arr: &dyn Array) -> ArrayRef { assert_eq!(arr.len(), 1); @@ -738,14 +762,18 @@ fn first_array_for_list(arr: &dyn Array) -> ArrayRef { arr.value(0) } else if let Some(arr) = arr.as_fixed_size_list_opt() { arr.value(0) + } else if let Some(arr) = arr.as_list_view_opt::() { + arr.value(0) + } else if let Some(arr) = arr.as_list_view_opt::() { + arr.value(0) } else { unreachable!( - "Since only List / LargeList / FixedSizeList are supported, this should never happen" + "Since only List / LargeList / FixedSizeList / ListView / LargeListView are supported, this should never happen" ) } } -/// Compares two List/LargeList/FixedSizeList scalars +/// Compares two List/LargeList/FixedSizeList/ListView/LargeListView scalars fn partial_cmp_list(arr1: &dyn Array, arr2: &dyn Array) -> Option { if arr1.data_type() != arr2.data_type() { return None; @@ -933,6 +961,12 @@ impl Hash for ScalarValue { FixedSizeList(arr) => { hash_nested_array(arr.to_owned() as ArrayRef, state); } + ListView(arr) => { + hash_nested_array(arr.to_owned() as ArrayRef, state); + } + LargeListView(arr) => { + hash_nested_array(arr.to_owned() as ArrayRef, state); + } Struct(arr) => { hash_nested_array(arr.to_owned() as ArrayRef, state); } @@ -1259,6 +1293,12 @@ impl ScalarValue { 1, ))) } + DataType::ListView(field_ref) => ScalarValue::ListView(Arc::new( + GenericListViewArray::new_null(Arc::clone(field_ref), 1), + )), + DataType::LargeListView(field_ref) => ScalarValue::LargeListView(Arc::new( + GenericListViewArray::new_null(Arc::clone(field_ref), 1), + )), DataType::Struct(fields) => ScalarValue::Struct( new_null_array(&DataType::Struct(fields.to_owned()), 1) .as_struct() @@ -1618,6 +1658,24 @@ impl ScalarValue { let list = ScalarValue::new_large_list(&[], field.data_type()); Ok(ScalarValue::LargeList(list)) } + DataType::ListView(field) => { + let empty_arr = new_empty_array(field.data_type()); + let values = Arc::new( + SingleRowListArrayBuilder::new(empty_arr) + .with_nullable(field.is_nullable()) + .build_list_view_array(), + ); + Ok(ScalarValue::ListView(values)) + } + DataType::LargeListView(field) => { + let empty_arr = new_empty_array(field.data_type()); + let values = Arc::new( + SingleRowListArrayBuilder::new(empty_arr) + .with_nullable(field.is_nullable()) + .build_large_list_view_array(), + ); + Ok(ScalarValue::LargeListView(values)) + } // Struct types DataType::Struct(fields) => { @@ -1923,6 +1981,8 @@ impl ScalarValue { ScalarValue::List(arr) => arr.data_type().to_owned(), ScalarValue::LargeList(arr) => arr.data_type().to_owned(), ScalarValue::FixedSizeList(arr) => arr.data_type().to_owned(), + ScalarValue::ListView(arr) => arr.data_type().to_owned(), + ScalarValue::LargeListView(arr) => arr.data_type().to_owned(), ScalarValue::Struct(arr) => arr.data_type().to_owned(), ScalarValue::Map(arr) => arr.data_type().to_owned(), ScalarValue::Date32(_) => DataType::Date32, @@ -2206,6 +2266,8 @@ impl ScalarValue { ScalarValue::List(arr) => arr.len() == arr.null_count(), ScalarValue::LargeList(arr) => arr.len() == arr.null_count(), ScalarValue::FixedSizeList(arr) => arr.len() == arr.null_count(), + ScalarValue::ListView(arr) => arr.len() == arr.null_count(), + ScalarValue::LargeListView(arr) => arr.len() == arr.null_count(), ScalarValue::Struct(arr) => arr.len() == arr.null_count(), ScalarValue::Map(arr) => arr.len() == arr.null_count(), ScalarValue::Date32(v) => v.is_none(), @@ -2556,6 +2618,8 @@ impl ScalarValue { } DataType::List(_) | DataType::LargeList(_) + | DataType::ListView(_) + | DataType::LargeListView(_) | DataType::Map(_, _) | DataType::Struct(_) | DataType::Union(_, _) => { @@ -2625,9 +2689,7 @@ impl ScalarValue { | DataType::Time32(TimeUnit::Nanosecond) | DataType::Time64(TimeUnit::Second) | DataType::Time64(TimeUnit::Millisecond) - | DataType::RunEndEncoded(_, _) - | DataType::ListView(_) - | DataType::LargeListView(_) => { + | DataType::RunEndEncoded(_, _) => { return _not_impl_err!( "Unsupported creation of {:?} array from ScalarValue {:?}", data_type, @@ -3048,6 +3110,18 @@ impl ScalarValue { } Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)? } + ScalarValue::ListView(arr) => { + if size == 1 { + return Ok(Arc::clone(arr) as Arc); + } + Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)? + } + ScalarValue::LargeListView(arr) => { + if size == 1 { + return Ok(Arc::clone(arr) as Arc); + } + Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)? + } ScalarValue::Struct(arr) => { if size == 1 { return Ok(Arc::clone(arr) as Arc); @@ -3487,6 +3561,22 @@ impl ScalarValue { .with_field(field) .build_fixed_size_list_scalar(list_size) } + DataType::ListView(field) => { + let list_array = as_list_view_array(array)?; + let nested_array = list_array.value(index); + // Produces a single element `ListViewArray` with the value at `index`. + SingleRowListArrayBuilder::new(nested_array) + .with_field(field) + .build_list_view_scalar() + } + DataType::LargeListView(field) => { + let list_array = as_list_view_array(array)?; + let nested_array = list_array.value(index); + // Produces a single element `LargeListViewArray` with the value at `index`. + SingleRowListArrayBuilder::new(nested_array) + .with_field(field) + .build_large_list_view_scalar() + } DataType::Date32 => typed_cast!(array, index, as_date32_array, Date32)?, DataType::Date64 => typed_cast!(array, index, as_date64_array, Date64)?, DataType::Time32(TimeUnit::Second) => { @@ -3923,6 +4013,12 @@ impl ScalarValue { ScalarValue::FixedSizeList(arr) => { Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index) } + ScalarValue::ListView(arr) => { + Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index) + } + ScalarValue::LargeListView(arr) => { + Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index) + } ScalarValue::Struct(arr) => { Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index) } @@ -4082,6 +4178,8 @@ impl ScalarValue { ScalarValue::List(arr) => arr.get_array_memory_size(), ScalarValue::LargeList(arr) => arr.get_array_memory_size(), ScalarValue::FixedSizeList(arr) => arr.get_array_memory_size(), + ScalarValue::ListView(arr) => arr.get_array_memory_size(), + ScalarValue::LargeListView(arr) => arr.get_array_memory_size(), ScalarValue::Struct(arr) => arr.get_array_memory_size(), ScalarValue::Map(arr) => arr.get_array_memory_size(), ScalarValue::Union(vals, fields, _mode) => { @@ -4196,6 +4294,14 @@ impl ScalarValue { let array = copy_array_data(&arr.to_data()); *Arc::make_mut(arr) = LargeListArray::from(array) } + ScalarValue::ListView(arr) => { + let array = copy_array_data(&arr.to_data()); + *Arc::make_mut(arr) = ListViewArray::from(array); + } + ScalarValue::LargeListView(arr) => { + let array = copy_array_data(&arr.to_data()); + *Arc::make_mut(arr) = LargeListViewArray::from(array) + } ScalarValue::Struct(arr) => { let array = copy_array_data(&arr.to_data()); *Arc::make_mut(arr) = StructArray::from(array); @@ -4728,6 +4834,8 @@ impl fmt::Display for ScalarValue { ScalarValue::List(arr) => fmt_list(arr.as_ref(), f)?, ScalarValue::LargeList(arr) => fmt_list(arr.as_ref(), f)?, ScalarValue::FixedSizeList(arr) => fmt_list(arr.as_ref(), f)?, + ScalarValue::ListView(arr) => fmt_list(arr.as_ref(), f)?, + ScalarValue::LargeListView(arr) => fmt_list(arr.as_ref(), f)?, ScalarValue::Date32(e) => format_option!( f, e.map(|v| { @@ -4850,7 +4958,7 @@ impl fmt::Display for ScalarValue { } fn fmt_list(arr: &dyn Array, f: &mut fmt::Formatter) -> fmt::Result { - // ScalarValue List, LargeList, FixedSizeList should always have a single element + // ScalarValue List, LargeList, FixedSizeList, ListView, LargeListView should always have a single element assert_eq!(arr.len(), 1); let options = FormatOptions::default().with_display_error(true); let formatter = ArrayFormatter::try_new(arr, &options).unwrap(); @@ -4936,6 +5044,8 @@ impl fmt::Debug for ScalarValue { ScalarValue::FixedSizeList(_) => write!(f, "FixedSizeList({self})"), ScalarValue::List(_) => write!(f, "List({self})"), ScalarValue::LargeList(_) => write!(f, "LargeList({self})"), + ScalarValue::ListView(_) => write!(f, "ListView({self})"), + ScalarValue::LargeListView(_) => write!(f, "LargeListView({self})"), ScalarValue::Struct(struct_arr) => { // ScalarValue Struct should always have a single element assert_eq!(struct_arr.len(), 1); @@ -5073,12 +5183,15 @@ mod tests { use std::sync::Arc; use super::*; - use crate::cast::{as_list_array, as_map_array, as_struct_array}; + use crate::cast::{ + as_large_list_view_array, as_list_array, as_map_array, as_struct_array, + }; use crate::test_util::batches_to_string; use arrow::array::{ - FixedSizeListBuilder, Int32Builder, LargeListBuilder, ListBuilder, MapBuilder, - NullArray, NullBufferBuilder, OffsetSizeTrait, PrimitiveBuilder, RecordBatch, - StringBuilder, StringDictionaryBuilder, StructBuilder, UnionBuilder, + FixedSizeListBuilder, Int32Builder, LargeListBuilder, LargeListViewBuilder, + ListBuilder, ListViewBuilder, MapBuilder, NullArray, NullBufferBuilder, + OffsetSizeTrait, PrimitiveBuilder, RecordBatch, StringBuilder, + StringDictionaryBuilder, StructBuilder, UnionBuilder, }; use arrow::buffer::{Buffer, NullBuffer, OffsetBuffer}; use arrow::compute::{is_null, kernels}; @@ -5265,6 +5378,27 @@ mod tests { ]); assert_eq!(&arr, actual_list_arr); + + // ListView + let arr = + ListViewArray::from_iter_primitive::(vec![Some(vec![ + Some(1), + None, + Some(2), + ])]); + + let sv = ScalarValue::ListView(Arc::new(arr)); + let actual_arr = sv + .to_array_of_size(2) + .expect("Failed to convert to array of size"); + let actual_list_arr = actual_arr.as_list_view::(); + + let arr = ListViewArray::from_iter_primitive::(vec![ + Some(vec![Some(1), None, Some(2)]), + Some(vec![Some(1), None, Some(2)]), + ]); + + assert_eq!(&arr, actual_list_arr); } #[test] @@ -5492,13 +5626,13 @@ mod tests { #[test] fn iter_to_array_primitive_test() { + // List // List[[1,2,3]], List[null], List[[4,5]] let scalars = build_list::(vec![ Some(vec![Some(1), Some(2), Some(3)]), None, Some(vec![Some(4), Some(5)]), ]); - let array = ScalarValue::iter_to_array(scalars).unwrap(); let list_array = as_list_array(&array).unwrap(); // List[[1,2,3], null, [4,5]] @@ -5509,20 +5643,57 @@ mod tests { ]); assert_eq!(list_array, &expected); + // LargeList + // List[[1,2,3]], List[null], List[[4,5]] let scalars = build_list::(vec![ Some(vec![Some(1), Some(2), Some(3)]), None, Some(vec![Some(4), Some(5)]), ]); - let array = ScalarValue::iter_to_array(scalars).unwrap(); - let list_array = as_large_list_array(&array).unwrap(); + let large_list_array = as_large_list_array(&array).unwrap(); let expected = LargeListArray::from_iter_primitive::(vec![ Some(vec![Some(1), Some(2), Some(3)]), None, Some(vec![Some(4), Some(5)]), ]); - assert_eq!(list_array, &expected); + assert_eq!(large_list_array, &expected); + + // ListView + // List[[1,2,3]], List[null], List[[4,5]] + let scalars = build_list::(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + None, + Some(vec![Some(4), Some(5)]), + ]); + + let array = ScalarValue::iter_to_array(scalars).unwrap(); + let list_view_array = as_list_view_array(&array).unwrap(); + // List[[1,2,3], null, [4,5]] + let expected = ListViewArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + None, + Some(vec![Some(4), Some(5)]), + ]); + assert_eq!(list_view_array, &expected); + + // LargeListView + // List[[1,2,3]], List[null], List[[4,5]] + let scalars = build_list::(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + None, + Some(vec![Some(4), Some(5)]), + ]); + + let array = ScalarValue::iter_to_array(scalars).unwrap(); + let large_list_view_array = as_large_list_view_array(&array).unwrap(); + // List[[1,2,3], null, [4,5]] + let expected = LargeListViewArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + None, + Some(vec![Some(4), Some(5)]), + ]); + assert_eq!(large_list_view_array, &expected); } #[test] @@ -5565,16 +5736,36 @@ mod tests { ])); let fsl_array: ArrayRef = - Arc::new(ListArray::from_iter_primitive::(vec![ + Arc::new(FixedSizeListArray::from_iter_primitive::( + vec![ + Some(vec![Some(0), Some(1), Some(2)]), + None, + Some(vec![Some(3), None, Some(5)]), + ], + 3, + )); + let list_view_array: ArrayRef = + Arc::new(ListViewArray::from_iter_primitive::(vec![ Some(vec![Some(0), Some(1), Some(2)]), None, - Some(vec![Some(3), None, Some(5)]), + Some(vec![None, Some(5)]), ])); - for arr in [list_array, fsl_array] { + for arr in [list_array, fsl_array, list_view_array] { for i in 0..arr.len() { - let scalar = - ScalarValue::List(arr.slice(i, 1).as_list::().to_owned().into()); + let slice = arr.slice(i, 1); + let scalar = match arr.data_type() { + DataType::List(_) => { + ScalarValue::List(slice.as_list::().to_owned().into()) + } + DataType::FixedSizeList(_, _) => ScalarValue::FixedSizeList( + slice.as_fixed_size_list().to_owned().into(), + ), + DataType::ListView(_) => ScalarValue::ListView( + slice.as_list_view::().to_owned().into(), + ), + _ => unreachable!(), + }; assert!(scalar.eq_array(&arr, i).unwrap()); } } @@ -6158,6 +6349,40 @@ mod tests { ), )); assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater)); + + let a = ScalarValue::ListView(Arc::new(ListViewArray::from_iter_primitive::< + Int64Type, + _, + _, + >(vec![Some(vec![ + None, + Some(2), + Some(3), + ])]))); + let b = ScalarValue::ListView(Arc::new(ListViewArray::from_iter_primitive::< + Int64Type, + _, + _, + >(vec![Some(vec![ + Some(1), + Some(2), + Some(3), + ])]))); + assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater)); + + let a = + ScalarValue::LargeListView(Arc::new( + LargeListViewArray::from_iter_primitive::(vec![Some( + vec![None, Some(2), Some(3)], + )]), + )); + let b = + ScalarValue::LargeListView(Arc::new( + LargeListViewArray::from_iter_primitive::(vec![Some( + vec![Some(1), Some(2), Some(3)], + )]), + )); + assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater)); } #[test] @@ -6505,6 +6730,30 @@ mod tests { ); assert_eq!(expected, scalar); assert!(expected.is_null()); + + // Test for ListView + let data_type = &DataType::ListView(Arc::clone(&inner_field)); + let scalar: ScalarValue = data_type.try_into().unwrap(); + let expected = ScalarValue::ListView( + new_null_array(data_type, 1) + .as_list_view::() + .to_owned() + .into(), + ); + assert_eq!(expected, scalar); + assert!(expected.is_null()); + + // Test for LargeListView + let data_type = &DataType::LargeListView(Arc::clone(&inner_field)); + let scalar: ScalarValue = data_type.try_into().unwrap(); + let expected = ScalarValue::LargeListView( + new_null_array(data_type, 1) + .as_list_view::() + .to_owned() + .into(), + ); + assert_eq!(expected, scalar); + assert!(expected.is_null()); } #[test] @@ -7217,6 +7466,34 @@ mod tests { builder.append(true); Arc::new(builder.finish()) }, + // list view array + { + let values_builder = StringBuilder::new(); + let mut builder = ListViewBuilder::new(values_builder); + // [A, B] + builder.values().append_value("A"); + builder.values().append_value("B"); + builder.append(true); + // [ ] (empty list) + builder.append(true); + // Null + builder.append(false); + Arc::new(builder.finish()) + }, + // large list view array + { + let values_builder = StringBuilder::new(); + let mut builder = LargeListViewBuilder::new(values_builder); + // [A, B] + builder.values().append_value("A"); + builder.values().append_value("B"); + builder.append(true); + // [ ] (empty list) + builder.append(true); + // Null + builder.append(false); + Arc::new(builder.finish()) + }, // map { let string_builder = StringBuilder::new(); @@ -7714,6 +7991,38 @@ mod tests { }, DataType::LargeList(Arc::new(Field::new("element", DataType::Int64, true))), ); + check_scalar_cast( + { + let element_field = + Arc::new(Field::new("element", DataType::Int32, true)); + + let mut builder = + ListViewBuilder::new(Int32Builder::new()).with_field(element_field); + builder.append_value([Some(1)]); + builder.append(true); + + ScalarValue::ListView(Arc::new(builder.finish())) + }, + DataType::ListView(Arc::new(Field::new("element", DataType::Int64, true))), + ); + check_scalar_cast( + { + let element_field = + Arc::new(Field::new("element", DataType::Int32, true)); + + let mut builder = LargeListViewBuilder::new(Int32Builder::new()) + .with_field(element_field); + builder.append_value([Some(1)]); + builder.append(true); + + ScalarValue::LargeListView(Arc::new(builder.finish())) + }, + DataType::LargeListView(Arc::new(Field::new( + "element", + DataType::Int64, + true, + ))), + ); } // mimics how casting work on scalar values by `casting` `scalar` to `desired_type` @@ -8404,6 +8713,21 @@ mod tests { "); } + #[test] + fn test_list_view_display() { + let s = ScalarValue::ListView( + ListViewArray::from_iter_primitive::(vec![Some(vec![ + Some(1), + None, + Some(3), + ])]) + .into(), + ); + + assert_eq!(s.to_string(), "todo"); + assert_eq!(format!("{s:?}"), "todo"); + } + #[test] fn test_null_bug() { let field_a = Field::new("a", DataType::Int32, true); @@ -8862,6 +9186,10 @@ mod tests { 42, )) .unwrap(), + ScalarValue::try_new_null(&DataType::ListView(Arc::clone(&field_ref))) + .unwrap(), + ScalarValue::try_new_null(&DataType::LargeListView(Arc::clone(&field_ref))) + .unwrap(), ScalarValue::try_new_null(&DataType::Struct( vec![Arc::clone(&field_ref)].into(), )) @@ -8954,6 +9282,17 @@ mod tests { _ => panic!("Expected List"), } + let list_result = + ScalarValue::new_default(&DataType::ListView(Arc::new(list_field.clone()))) + .unwrap(); + match list_result { + ScalarValue::ListView(arr) => { + assert_eq!(arr.len(), 1); + assert_eq!(arr.value_size(0), 0); // empty list + } + _ => panic!("Expected List"), + } + // Test struct type let struct_fields = Fields::from(vec![ Field::new("a", DataType::Int32, false), @@ -9063,6 +9402,14 @@ mod tests { )))), None ); + assert_eq!( + ScalarValue::min(&DataType::ListView(Arc::new(Field::new( + "item", + DataType::Int32, + true + )))), + None + ); } #[test] @@ -9139,6 +9486,14 @@ mod tests { )]))), None ); + assert_eq!( + ScalarValue::max(&DataType::ListView(Arc::new(Field::new( + "item", + DataType::Int32, + true + )))), + None + ); } #[test] diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index 03310a7bde193..ad1ba5830a97e 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -29,7 +29,8 @@ use arrow::array::{ Array, ArrayRef, FixedSizeListArray, LargeListArray, ListArray, OffsetSizeTrait, cast::AsArray, }; -use arrow::buffer::OffsetBuffer; +use arrow::array::{LargeListViewArray, ListViewArray}; +use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow::compute::{SortColumn, SortOptions, partition}; use arrow::datatypes::{DataType, Field, SchemaRef}; #[cfg(feature = "sql")] @@ -479,6 +480,32 @@ impl SingleRowListArrayBuilder { ScalarValue::FixedSizeList(Arc::new(self.build_fixed_size_list_array(list_size))) } + /// Build a single element [`ListViewArray`] + pub fn build_list_view_array(self) -> ListViewArray { + let (field, arr) = self.into_field_and_arr(); + let offsets = ScalarBuffer::from(vec![0]); + let sizes = ScalarBuffer::from(vec![arr.len() as i32]); + ListViewArray::new(field, offsets, sizes, arr, None) + } + + /// Build a single element [`ListViewArray`] and wrap as [`ScalarValue::ListView`] + pub fn build_list_view_scalar(self) -> ScalarValue { + ScalarValue::ListView(Arc::new(self.build_list_view_array())) + } + + /// Build a single element [`LargeListViewArray`] + pub fn build_large_list_view_array(self) -> LargeListViewArray { + let (field, arr) = self.into_field_and_arr(); + let offsets = ScalarBuffer::from(vec![0]); + let sizes = ScalarBuffer::from(vec![arr.len() as i64]); + LargeListViewArray::new(field, offsets, sizes, arr, None) + } + + /// Build a single element [`LargeListViewArray`] and wrap as [`ScalarValue::LargeListView`] + pub fn build_large_list_view_scalar(self) -> ScalarValue { + ScalarValue::LargeListView(Arc::new(self.build_large_list_view_array())) + } + /// Helper function: convert this builder into a tuple of field and array fn into_field_and_arr(self) -> (Arc, ArrayRef) { let Self { diff --git a/datafusion/proto-common/proto/datafusion_common.proto b/datafusion/proto-common/proto/datafusion_common.proto index 08bb25bd715b9..f3c18bc5364f5 100644 --- a/datafusion/proto-common/proto/datafusion_common.proto +++ b/datafusion/proto-common/proto/datafusion_common.proto @@ -194,7 +194,7 @@ message Union{ repeated int32 type_ids = 3; } -// Used for List/FixedSizeList/LargeList/Struct/Map +// Used for List/FixedSizeList/LargeList/ListView/LargeListView/Struct/Map message ScalarNestedValue { message Dictionary { bytes ipc_message = 1; @@ -295,6 +295,8 @@ message ScalarValue{ ScalarNestedValue large_list_value = 16; ScalarNestedValue list_value = 17; ScalarNestedValue fixed_size_list_value = 18; + ScalarNestedValue list_view_value = 45; + ScalarNestedValue large_list_view_value = 46; ScalarNestedValue struct_value = 32; ScalarNestedValue map_value = 41; @@ -385,6 +387,8 @@ message ArrowType{ List LIST = 25; List LARGE_LIST = 26; FixedSizeList FIXED_SIZE_LIST = 27; + List LIST_VIEW = 42; + List LARGE_LIST_VIEW = 43; Struct STRUCT = 28; Union UNION = 29; Dictionary DICTIONARY = 30; diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs index 3c41b8cad9ed1..5f574c3e3baa4 100644 --- a/datafusion/proto-common/src/from_proto/mod.rs +++ b/datafusion/proto-common/src/from_proto/mod.rs @@ -292,6 +292,16 @@ impl TryFrom<&protobuf::arrow_type::ArrowTypeEnum> for DataType { let list_size = list.list_size; DataType::FixedSizeList(Arc::new(list_type), list_size) } + arrow_type::ArrowTypeEnum::ListView(list) => { + let list_type = + list.as_ref().field_type.as_deref().required("field_type")?; + DataType::ListView(Arc::new(list_type)) + } + arrow_type::ArrowTypeEnum::LargeListView(list) => { + let list_type = + list.as_ref().field_type.as_deref().required("field_type")?; + DataType::LargeListView(Arc::new(list_type)) + } arrow_type::ArrowTypeEnum::Struct(strct) => DataType::Struct( parse_proto_fields_to_fields(&strct.sub_field_types)?.into(), ), @@ -388,6 +398,8 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue { Value::ListValue(v) | Value::FixedSizeListValue(v) | Value::LargeListValue(v) + | Value::ListViewValue(v) + | Value::LargeListViewValue(v) | Value::StructValue(v) | Value::MapValue(v) => { let protobuf::ScalarNestedValue { @@ -472,6 +484,12 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue { Value::FixedSizeListValue(_) => { Self::FixedSizeList(arr.as_fixed_size_list().to_owned().into()) } + Value::ListViewValue(_) => { + Self::ListView(arr.as_list_view::().to_owned().into()) + } + Value::LargeListViewValue(_) => { + Self::LargeListView(arr.as_list_view::().to_owned().into()) + } Value::StructValue(_) => { Self::Struct(arr.as_struct().to_owned().into()) } diff --git a/datafusion/proto-common/src/generated/pbjson.rs b/datafusion/proto-common/src/generated/pbjson.rs index ef0eae1981d93..3a82444ad9354 100644 --- a/datafusion/proto-common/src/generated/pbjson.rs +++ b/datafusion/proto-common/src/generated/pbjson.rs @@ -264,6 +264,12 @@ impl serde::Serialize for ArrowType { arrow_type::ArrowTypeEnum::FixedSizeList(v) => { struct_ser.serialize_field("FIXEDSIZELIST", v)?; } + arrow_type::ArrowTypeEnum::ListView(v) => { + struct_ser.serialize_field("LISTVIEW", v)?; + } + arrow_type::ArrowTypeEnum::LargeListView(v) => { + struct_ser.serialize_field("LARGELISTVIEW", v)?; + } arrow_type::ArrowTypeEnum::Struct(v) => { struct_ser.serialize_field("STRUCT", v)?; } @@ -329,6 +335,10 @@ impl<'de> serde::Deserialize<'de> for ArrowType { "LARGELIST", "FIXED_SIZE_LIST", "FIXEDSIZELIST", + "LIST_VIEW", + "LISTVIEW", + "LARGE_LIST_VIEW", + "LARGELISTVIEW", "STRUCT", "UNION", "DICTIONARY", @@ -371,6 +381,8 @@ impl<'de> serde::Deserialize<'de> for ArrowType { List, LargeList, FixedSizeList, + ListView, + LargeListView, Struct, Union, Dictionary, @@ -430,6 +442,8 @@ impl<'de> serde::Deserialize<'de> for ArrowType { "LIST" => Ok(GeneratedField::List), "LARGELIST" | "LARGE_LIST" => Ok(GeneratedField::LargeList), "FIXEDSIZELIST" | "FIXED_SIZE_LIST" => Ok(GeneratedField::FixedSizeList), + "LISTVIEW" | "LIST_VIEW" => Ok(GeneratedField::ListView), + "LARGELISTVIEW" | "LARGE_LIST_VIEW" => Ok(GeneratedField::LargeListView), "STRUCT" => Ok(GeneratedField::Struct), "UNION" => Ok(GeneratedField::Union), "DICTIONARY" => Ok(GeneratedField::Dictionary), @@ -687,6 +701,20 @@ impl<'de> serde::Deserialize<'de> for ArrowType { return Err(serde::de::Error::duplicate_field("FIXEDSIZELIST")); } arrow_type_enum__ = map_.next_value::<::std::option::Option<_>>()?.map(arrow_type::ArrowTypeEnum::FixedSizeList) +; + } + GeneratedField::ListView => { + if arrow_type_enum__.is_some() { + return Err(serde::de::Error::duplicate_field("LISTVIEW")); + } + arrow_type_enum__ = map_.next_value::<::std::option::Option<_>>()?.map(arrow_type::ArrowTypeEnum::ListView) +; + } + GeneratedField::LargeListView => { + if arrow_type_enum__.is_some() { + return Err(serde::de::Error::duplicate_field("LARGELISTVIEW")); + } + arrow_type_enum__ = map_.next_value::<::std::option::Option<_>>()?.map(arrow_type::ArrowTypeEnum::LargeListView) ; } GeneratedField::Struct => { @@ -7553,6 +7581,12 @@ impl serde::Serialize for ScalarValue { scalar_value::Value::FixedSizeListValue(v) => { struct_ser.serialize_field("fixedSizeListValue", v)?; } + scalar_value::Value::ListViewValue(v) => { + struct_ser.serialize_field("listViewValue", v)?; + } + scalar_value::Value::LargeListViewValue(v) => { + struct_ser.serialize_field("largeListViewValue", v)?; + } scalar_value::Value::StructValue(v) => { struct_ser.serialize_field("structValue", v)?; } @@ -7687,6 +7721,10 @@ impl<'de> serde::Deserialize<'de> for ScalarValue { "listValue", "fixed_size_list_value", "fixedSizeListValue", + "list_view_value", + "listViewValue", + "large_list_view_value", + "largeListViewValue", "struct_value", "structValue", "map_value", @@ -7755,6 +7793,8 @@ impl<'de> serde::Deserialize<'de> for ScalarValue { LargeListValue, ListValue, FixedSizeListValue, + ListViewValue, + LargeListViewValue, StructValue, MapValue, Decimal32Value, @@ -7818,6 +7858,8 @@ impl<'de> serde::Deserialize<'de> for ScalarValue { "largeListValue" | "large_list_value" => Ok(GeneratedField::LargeListValue), "listValue" | "list_value" => Ok(GeneratedField::ListValue), "fixedSizeListValue" | "fixed_size_list_value" => Ok(GeneratedField::FixedSizeListValue), + "listViewValue" | "list_view_value" => Ok(GeneratedField::ListViewValue), + "largeListViewValue" | "large_list_view_value" => Ok(GeneratedField::LargeListViewValue), "structValue" | "struct_value" => Ok(GeneratedField::StructValue), "mapValue" | "map_value" => Ok(GeneratedField::MapValue), "decimal32Value" | "decimal32_value" => Ok(GeneratedField::Decimal32Value), @@ -7985,6 +8027,20 @@ impl<'de> serde::Deserialize<'de> for ScalarValue { return Err(serde::de::Error::duplicate_field("fixedSizeListValue")); } value__ = map_.next_value::<::std::option::Option<_>>()?.map(scalar_value::Value::FixedSizeListValue) +; + } + GeneratedField::ListViewValue => { + if value__.is_some() { + return Err(serde::de::Error::duplicate_field("listViewValue")); + } + value__ = map_.next_value::<::std::option::Option<_>>()?.map(scalar_value::Value::ListViewValue) +; + } + GeneratedField::LargeListViewValue => { + if value__.is_some() { + return Err(serde::de::Error::duplicate_field("largeListViewValue")); + } + value__ = map_.next_value::<::std::option::Option<_>>()?.map(scalar_value::Value::LargeListViewValue) ; } GeneratedField::StructValue => { diff --git a/datafusion/proto-common/src/generated/prost.rs b/datafusion/proto-common/src/generated/prost.rs index 16601dcf46977..084b4bd5a81f7 100644 --- a/datafusion/proto-common/src/generated/prost.rs +++ b/datafusion/proto-common/src/generated/prost.rs @@ -184,7 +184,7 @@ pub struct Union { #[prost(int32, repeated, tag = "3")] pub type_ids: ::prost::alloc::vec::Vec, } -/// Used for List/FixedSizeList/LargeList/Struct/Map +/// Used for List/FixedSizeList/LargeList/ListView/LargeListView/Struct/Map #[derive(Clone, PartialEq, ::prost::Message)] pub struct ScalarNestedValue { #[prost(bytes = "vec", tag = "1")] @@ -311,7 +311,7 @@ pub struct ScalarFixedSizeBinary { pub struct ScalarValue { #[prost( oneof = "scalar_value::Value", - tags = "33, 1, 2, 3, 23, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 32, 41, 43, 44, 20, 39, 21, 24, 35, 36, 37, 38, 26, 27, 28, 29, 22, 30, 25, 31, 34, 42" + tags = "33, 1, 2, 3, 23, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 45, 46, 32, 41, 43, 44, 20, 39, 21, 24, 35, 36, 37, 38, 26, 27, 28, 29, 22, 30, 25, 31, 34, 42" )] pub value: ::core::option::Option, } @@ -362,6 +362,10 @@ pub mod scalar_value { ListValue(super::ScalarNestedValue), #[prost(message, tag = "18")] FixedSizeListValue(super::ScalarNestedValue), + #[prost(message, tag = "45")] + ListViewValue(super::ScalarNestedValue), + #[prost(message, tag = "46")] + LargeListViewValue(super::ScalarNestedValue), #[prost(message, tag = "32")] StructValue(super::ScalarNestedValue), #[prost(message, tag = "41")] @@ -449,7 +453,7 @@ pub struct Decimal256 { pub struct ArrowType { #[prost( oneof = "arrow_type::ArrowTypeEnum", - tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 35, 32, 15, 34, 16, 31, 17, 18, 19, 20, 21, 22, 23, 40, 41, 24, 36, 25, 26, 27, 28, 29, 30, 33" + tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 35, 32, 15, 34, 16, 31, 17, 18, 19, 20, 21, 22, 23, 40, 41, 24, 36, 25, 26, 27, 42, 43, 28, 29, 30, 33" )] pub arrow_type_enum: ::core::option::Option, } @@ -530,6 +534,10 @@ pub mod arrow_type { LargeList(::prost::alloc::boxed::Box), #[prost(message, tag = "27")] FixedSizeList(::prost::alloc::boxed::Box), + #[prost(message, tag = "42")] + ListView(::prost::alloc::boxed::Box), + #[prost(message, tag = "43")] + LargeListView(::prost::alloc::boxed::Box), #[prost(message, tag = "28")] Struct(super::Struct), #[prost(message, tag = "29")] diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs index fee3656482005..5288fd9764664 100644 --- a/datafusion/proto-common/src/to_proto/mod.rs +++ b/datafusion/proto-common/src/to_proto/mod.rs @@ -171,6 +171,12 @@ impl TryFrom<&DataType> for protobuf::arrow_type::ArrowTypeEnum { DataType::LargeList(item_type) => Self::LargeList(Box::new(protobuf::List { field_type: Some(Box::new(item_type.as_ref().try_into()?)), })), + DataType::ListView(item_type) => Self::ListView(Box::new(protobuf::List { + field_type: Some(Box::new(item_type.as_ref().try_into()?)), + })), + DataType::LargeListView(item_type) => Self::LargeListView(Box::new(protobuf::List { + field_type: Some(Box::new(item_type.as_ref().try_into()?)), + })), DataType::Struct(struct_fields) => Self::Struct(protobuf::Struct { sub_field_types: convert_arc_fields_to_proto_fields(struct_fields)?, }), @@ -220,9 +226,6 @@ impl TryFrom<&DataType> for protobuf::arrow_type::ArrowTypeEnum { "Proto serialization error: The RunEndEncoded data type is not yet supported".to_owned() )) } - DataType::ListView(_) | DataType::LargeListView(_) => { - return Err(Error::General(format!("Proto serialization error: {val} not yet supported"))) - } }; Ok(res) @@ -374,6 +377,12 @@ impl TryFrom<&ScalarValue> for protobuf::ScalarValue { ScalarValue::FixedSizeList(arr) => { encode_scalar_nested_value(arr.to_owned() as ArrayRef, val) } + ScalarValue::ListView(arr) => { + encode_scalar_nested_value(arr.to_owned() as ArrayRef, val) + } + ScalarValue::LargeListView(arr) => { + encode_scalar_nested_value(arr.to_owned() as ArrayRef, val) + } ScalarValue::Struct(arr) => { encode_scalar_nested_value(arr.to_owned() as ArrayRef, val) } @@ -1010,7 +1019,7 @@ fn create_proto_scalar protobuf::scalar_value::Value>( Ok(protobuf::ScalarValue { value: Some(value) }) } -// ScalarValue::List / FixedSizeList / LargeList / Struct / Map are serialized using +// ScalarValue::List / FixedSizeList / LargeList / ListView / LargeListView / Struct / Map are serialized using // Arrow IPC messages as a single column RecordBatch fn encode_scalar_nested_value( arr: ArrayRef, @@ -1066,6 +1075,16 @@ fn encode_scalar_nested_value( scalar_list_value, )), }), + ScalarValue::ListView(_) => Ok(protobuf::ScalarValue { + value: Some(protobuf::scalar_value::Value::ListViewValue( + scalar_list_value, + )), + }), + ScalarValue::LargeListView(_) => Ok(protobuf::ScalarValue { + value: Some(protobuf::scalar_value::Value::LargeListViewValue( + scalar_list_value, + )), + }), ScalarValue::Struct(_) => Ok(protobuf::ScalarValue { value: Some(protobuf::scalar_value::Value::StructValue( scalar_list_value, diff --git a/datafusion/proto/src/generated/datafusion_proto_common.rs b/datafusion/proto/src/generated/datafusion_proto_common.rs index 16601dcf46977..084b4bd5a81f7 100644 --- a/datafusion/proto/src/generated/datafusion_proto_common.rs +++ b/datafusion/proto/src/generated/datafusion_proto_common.rs @@ -184,7 +184,7 @@ pub struct Union { #[prost(int32, repeated, tag = "3")] pub type_ids: ::prost::alloc::vec::Vec, } -/// Used for List/FixedSizeList/LargeList/Struct/Map +/// Used for List/FixedSizeList/LargeList/ListView/LargeListView/Struct/Map #[derive(Clone, PartialEq, ::prost::Message)] pub struct ScalarNestedValue { #[prost(bytes = "vec", tag = "1")] @@ -311,7 +311,7 @@ pub struct ScalarFixedSizeBinary { pub struct ScalarValue { #[prost( oneof = "scalar_value::Value", - tags = "33, 1, 2, 3, 23, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 32, 41, 43, 44, 20, 39, 21, 24, 35, 36, 37, 38, 26, 27, 28, 29, 22, 30, 25, 31, 34, 42" + tags = "33, 1, 2, 3, 23, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 45, 46, 32, 41, 43, 44, 20, 39, 21, 24, 35, 36, 37, 38, 26, 27, 28, 29, 22, 30, 25, 31, 34, 42" )] pub value: ::core::option::Option, } @@ -362,6 +362,10 @@ pub mod scalar_value { ListValue(super::ScalarNestedValue), #[prost(message, tag = "18")] FixedSizeListValue(super::ScalarNestedValue), + #[prost(message, tag = "45")] + ListViewValue(super::ScalarNestedValue), + #[prost(message, tag = "46")] + LargeListViewValue(super::ScalarNestedValue), #[prost(message, tag = "32")] StructValue(super::ScalarNestedValue), #[prost(message, tag = "41")] @@ -449,7 +453,7 @@ pub struct Decimal256 { pub struct ArrowType { #[prost( oneof = "arrow_type::ArrowTypeEnum", - tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 35, 32, 15, 34, 16, 31, 17, 18, 19, 20, 21, 22, 23, 40, 41, 24, 36, 25, 26, 27, 28, 29, 30, 33" + tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 35, 32, 15, 34, 16, 31, 17, 18, 19, 20, 21, 22, 23, 40, 41, 24, 36, 25, 26, 27, 42, 43, 28, 29, 30, 33" )] pub arrow_type_enum: ::core::option::Option, } @@ -530,6 +534,10 @@ pub mod arrow_type { LargeList(::prost::alloc::boxed::Box), #[prost(message, tag = "27")] FixedSizeList(::prost::alloc::boxed::Box), + #[prost(message, tag = "42")] + ListView(::prost::alloc::boxed::Box), + #[prost(message, tag = "43")] + LargeListView(::prost::alloc::boxed::Box), #[prost(message, tag = "28")] Struct(super::Struct), #[prost(message, tag = "29")] diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index e5c218e5ebe2e..efd70fdfa875d 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -16,7 +16,8 @@ // under the License. use arrow::array::{ - ArrayRef, FixedSizeListArray, Int32Builder, MapArray, MapBuilder, StringBuilder, + ArrayRef, FixedSizeListArray, Int32Builder, LargeListViewArray, ListViewArray, + MapArray, MapBuilder, StringBuilder, }; use arrow::datatypes::{ DECIMAL256_MAX_PRECISION, DataType, Field, FieldRef, Fields, Int32Type, @@ -1352,253 +1353,267 @@ impl LogicalExtensionCodec for UDFExtensionCodec { #[test] fn round_trip_scalar_values_and_data_types() { - let should_pass: Vec = vec![ - ScalarValue::Boolean(None), - ScalarValue::Float32(None), - ScalarValue::Float64(None), - ScalarValue::Int8(None), - ScalarValue::Int16(None), - ScalarValue::Int32(None), - ScalarValue::Int64(None), - ScalarValue::UInt8(None), - ScalarValue::UInt16(None), - ScalarValue::UInt32(None), - ScalarValue::UInt64(None), - ScalarValue::Utf8(None), - ScalarValue::LargeUtf8(None), - ScalarValue::List(ScalarValue::new_list_nullable(&[], &DataType::Boolean)), - ScalarValue::LargeList(ScalarValue::new_large_list(&[], &DataType::Boolean)), - ScalarValue::Date32(None), - ScalarValue::Boolean(Some(true)), - ScalarValue::Boolean(Some(false)), - ScalarValue::Float32(Some(1.0)), - ScalarValue::Float32(Some(f32::MAX)), - ScalarValue::Float32(Some(f32::MIN)), - ScalarValue::Float32(Some(-2000.0)), - ScalarValue::Float64(Some(1.0)), - ScalarValue::Float64(Some(f64::MAX)), - ScalarValue::Float64(Some(f64::MIN)), - ScalarValue::Float64(Some(-2000.0)), - ScalarValue::Int8(Some(i8::MIN)), - ScalarValue::Int8(Some(i8::MAX)), - ScalarValue::Int8(Some(0)), - ScalarValue::Int8(Some(-15)), - ScalarValue::Int16(Some(i16::MIN)), - ScalarValue::Int16(Some(i16::MAX)), - ScalarValue::Int16(Some(0)), - ScalarValue::Int16(Some(-15)), - ScalarValue::Int32(Some(i32::MIN)), - ScalarValue::Int32(Some(i32::MAX)), - ScalarValue::Int32(Some(0)), - ScalarValue::Int32(Some(-15)), - ScalarValue::Int64(Some(i64::MIN)), - ScalarValue::Int64(Some(i64::MAX)), - ScalarValue::Int64(Some(0)), - ScalarValue::Int64(Some(-15)), - ScalarValue::UInt8(Some(u8::MAX)), - ScalarValue::UInt8(Some(0)), - ScalarValue::UInt16(Some(u16::MAX)), - ScalarValue::UInt16(Some(0)), - ScalarValue::UInt32(Some(u32::MAX)), - ScalarValue::UInt32(Some(0)), - ScalarValue::UInt64(Some(u64::MAX)), - ScalarValue::UInt64(Some(0)), - ScalarValue::Utf8(Some(String::from("Test string "))), - ScalarValue::LargeUtf8(Some(String::from("Test Large utf8"))), - ScalarValue::Utf8View(Some(String::from("Test stringview"))), - ScalarValue::BinaryView(Some(b"binaryview".to_vec())), - ScalarValue::Date32(Some(0)), - ScalarValue::Date32(Some(i32::MAX)), - ScalarValue::Date32(None), - ScalarValue::Date64(Some(0)), - ScalarValue::Date64(Some(i64::MAX)), - ScalarValue::Date64(None), - ScalarValue::Time32Second(Some(0)), - ScalarValue::Time32Second(Some(i32::MAX)), - ScalarValue::Time32Second(None), - ScalarValue::Time32Millisecond(Some(0)), - ScalarValue::Time32Millisecond(Some(i32::MAX)), - ScalarValue::Time32Millisecond(None), - ScalarValue::Time64Microsecond(Some(0)), - ScalarValue::Time64Microsecond(Some(i64::MAX)), - ScalarValue::Time64Microsecond(None), - ScalarValue::Time64Nanosecond(Some(0)), - ScalarValue::Time64Nanosecond(Some(i64::MAX)), - ScalarValue::Time64Nanosecond(None), - ScalarValue::TimestampNanosecond(Some(0), None), - ScalarValue::TimestampNanosecond(Some(i64::MAX), None), - ScalarValue::TimestampNanosecond(Some(0), Some("UTC".into())), - ScalarValue::TimestampNanosecond(None, None), - ScalarValue::TimestampMicrosecond(Some(0), None), - ScalarValue::TimestampMicrosecond(Some(i64::MAX), None), - ScalarValue::TimestampMicrosecond(Some(0), Some("UTC".into())), - ScalarValue::TimestampMicrosecond(None, None), - ScalarValue::TimestampMillisecond(Some(0), None), - ScalarValue::TimestampMillisecond(Some(i64::MAX), None), - ScalarValue::TimestampMillisecond(Some(0), Some("UTC".into())), - ScalarValue::TimestampMillisecond(None, None), - ScalarValue::TimestampSecond(Some(0), None), - ScalarValue::TimestampSecond(Some(i64::MAX), None), - ScalarValue::TimestampSecond(Some(0), Some("UTC".into())), - ScalarValue::TimestampSecond(None, None), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(0, 0))), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(1, 2))), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - i32::MAX, - i32::MAX, - ))), - ScalarValue::IntervalDayTime(None), - ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( - 0, 0, 0, - ))), - ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( - 1, 2, 3, - ))), - ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( - i32::MAX, - i32::MAX, - i64::MAX, - ))), - ScalarValue::IntervalMonthDayNano(None), - ScalarValue::List(ScalarValue::new_list_nullable( - &[ - ScalarValue::Float32(Some(-213.1)), - ScalarValue::Float32(None), - ScalarValue::Float32(Some(5.5)), - ScalarValue::Float32(Some(2.0)), - ScalarValue::Float32(Some(1.0)), - ], - &DataType::Float32, - )), - ScalarValue::LargeList(ScalarValue::new_large_list( - &[ - ScalarValue::Float32(Some(-213.1)), - ScalarValue::Float32(None), - ScalarValue::Float32(Some(5.5)), - ScalarValue::Float32(Some(2.0)), - ScalarValue::Float32(Some(1.0)), - ], - &DataType::Float32, - )), - ScalarValue::List(ScalarValue::new_list_nullable( - &[ - ScalarValue::List(ScalarValue::new_list_nullable( - &[], - &DataType::Float32, - )), - ScalarValue::List(ScalarValue::new_list_nullable( - &[ - ScalarValue::Float32(Some(-213.1)), - ScalarValue::Float32(None), - ScalarValue::Float32(Some(5.5)), - ScalarValue::Float32(Some(2.0)), - ScalarValue::Float32(Some(1.0)), - ], - &DataType::Float32, - )), - ], - &DataType::List(new_arc_field("item", DataType::Float32, true)), - )), - ScalarValue::LargeList(ScalarValue::new_large_list( - &[ - ScalarValue::LargeList(ScalarValue::new_large_list( - &[], - &DataType::Float32, - )), - ScalarValue::LargeList(ScalarValue::new_large_list( - &[ - ScalarValue::Float32(Some(-213.1)), - ScalarValue::Float32(None), - ScalarValue::Float32(Some(5.5)), - ScalarValue::Float32(Some(2.0)), - ScalarValue::Float32(Some(1.0)), - ], - &DataType::Float32, - )), - ], - &DataType::LargeList(new_arc_field("item", DataType::Float32, true)), - )), - ScalarValue::FixedSizeList(Arc::new(FixedSizeListArray::from_iter_primitive::< - Int32Type, - _, - _, - >( - vec![Some(vec![Some(1), Some(2), Some(3)])], - 3, - ))), - ScalarValue::Dictionary( - Box::new(DataType::Int32), - Box::new(ScalarValue::from("foo")), - ), - ScalarValue::Dictionary( - Box::new(DataType::Int32), - Box::new(ScalarValue::Utf8(None)), - ), - ScalarValue::Binary(Some(b"bar".to_vec())), - ScalarValue::Binary(None), - ScalarValue::LargeBinary(Some(b"bar".to_vec())), - ScalarValue::LargeBinary(None), - ScalarStructBuilder::new() - .with_scalar( + let should_pass: Vec = + vec![ + ScalarValue::Boolean(None), + ScalarValue::Float32(None), + ScalarValue::Float64(None), + ScalarValue::Int8(None), + ScalarValue::Int16(None), + ScalarValue::Int32(None), + ScalarValue::Int64(None), + ScalarValue::UInt8(None), + ScalarValue::UInt16(None), + ScalarValue::UInt32(None), + ScalarValue::UInt64(None), + ScalarValue::Utf8(None), + ScalarValue::LargeUtf8(None), + ScalarValue::List(ScalarValue::new_list_nullable(&[], &DataType::Boolean)), + ScalarValue::LargeList(ScalarValue::new_large_list(&[], &DataType::Boolean)), + ScalarValue::Date32(None), + ScalarValue::Boolean(Some(true)), + ScalarValue::Boolean(Some(false)), + ScalarValue::Float32(Some(1.0)), + ScalarValue::Float32(Some(f32::MAX)), + ScalarValue::Float32(Some(f32::MIN)), + ScalarValue::Float32(Some(-2000.0)), + ScalarValue::Float64(Some(1.0)), + ScalarValue::Float64(Some(f64::MAX)), + ScalarValue::Float64(Some(f64::MIN)), + ScalarValue::Float64(Some(-2000.0)), + ScalarValue::Int8(Some(i8::MIN)), + ScalarValue::Int8(Some(i8::MAX)), + ScalarValue::Int8(Some(0)), + ScalarValue::Int8(Some(-15)), + ScalarValue::Int16(Some(i16::MIN)), + ScalarValue::Int16(Some(i16::MAX)), + ScalarValue::Int16(Some(0)), + ScalarValue::Int16(Some(-15)), + ScalarValue::Int32(Some(i32::MIN)), + ScalarValue::Int32(Some(i32::MAX)), + ScalarValue::Int32(Some(0)), + ScalarValue::Int32(Some(-15)), + ScalarValue::Int64(Some(i64::MIN)), + ScalarValue::Int64(Some(i64::MAX)), + ScalarValue::Int64(Some(0)), + ScalarValue::Int64(Some(-15)), + ScalarValue::UInt8(Some(u8::MAX)), + ScalarValue::UInt8(Some(0)), + ScalarValue::UInt16(Some(u16::MAX)), + ScalarValue::UInt16(Some(0)), + ScalarValue::UInt32(Some(u32::MAX)), + ScalarValue::UInt32(Some(0)), + ScalarValue::UInt64(Some(u64::MAX)), + ScalarValue::UInt64(Some(0)), + ScalarValue::Utf8(Some(String::from("Test string "))), + ScalarValue::LargeUtf8(Some(String::from("Test Large utf8"))), + ScalarValue::Utf8View(Some(String::from("Test stringview"))), + ScalarValue::BinaryView(Some(b"binaryview".to_vec())), + ScalarValue::Date32(Some(0)), + ScalarValue::Date32(Some(i32::MAX)), + ScalarValue::Date32(None), + ScalarValue::Date64(Some(0)), + ScalarValue::Date64(Some(i64::MAX)), + ScalarValue::Date64(None), + ScalarValue::Time32Second(Some(0)), + ScalarValue::Time32Second(Some(i32::MAX)), + ScalarValue::Time32Second(None), + ScalarValue::Time32Millisecond(Some(0)), + ScalarValue::Time32Millisecond(Some(i32::MAX)), + ScalarValue::Time32Millisecond(None), + ScalarValue::Time64Microsecond(Some(0)), + ScalarValue::Time64Microsecond(Some(i64::MAX)), + ScalarValue::Time64Microsecond(None), + ScalarValue::Time64Nanosecond(Some(0)), + ScalarValue::Time64Nanosecond(Some(i64::MAX)), + ScalarValue::Time64Nanosecond(None), + ScalarValue::TimestampNanosecond(Some(0), None), + ScalarValue::TimestampNanosecond(Some(i64::MAX), None), + ScalarValue::TimestampNanosecond(Some(0), Some("UTC".into())), + ScalarValue::TimestampNanosecond(None, None), + ScalarValue::TimestampMicrosecond(Some(0), None), + ScalarValue::TimestampMicrosecond(Some(i64::MAX), None), + ScalarValue::TimestampMicrosecond(Some(0), Some("UTC".into())), + ScalarValue::TimestampMicrosecond(None, None), + ScalarValue::TimestampMillisecond(Some(0), None), + ScalarValue::TimestampMillisecond(Some(i64::MAX), None), + ScalarValue::TimestampMillisecond(Some(0), Some("UTC".into())), + ScalarValue::TimestampMillisecond(None, None), + ScalarValue::TimestampSecond(Some(0), None), + ScalarValue::TimestampSecond(Some(i64::MAX), None), + ScalarValue::TimestampSecond(Some(0), Some("UTC".into())), + ScalarValue::TimestampSecond(None, None), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(0, 0))), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(1, 2))), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + i32::MAX, + i32::MAX, + ))), + ScalarValue::IntervalDayTime(None), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(0, 0, 0), + )), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(1, 2, 3), + )), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(i32::MAX, i32::MAX, i64::MAX), + )), + ScalarValue::IntervalMonthDayNano(None), + ScalarValue::List(ScalarValue::new_list_nullable( + &[ + ScalarValue::Float32(Some(-213.1)), + ScalarValue::Float32(None), + ScalarValue::Float32(Some(5.5)), + ScalarValue::Float32(Some(2.0)), + ScalarValue::Float32(Some(1.0)), + ], + &DataType::Float32, + )), + ScalarValue::LargeList(ScalarValue::new_large_list( + &[ + ScalarValue::Float32(Some(-213.1)), + ScalarValue::Float32(None), + ScalarValue::Float32(Some(5.5)), + ScalarValue::Float32(Some(2.0)), + ScalarValue::Float32(Some(1.0)), + ], + &DataType::Float32, + )), + ScalarValue::List(ScalarValue::new_list_nullable( + &[ + ScalarValue::List(ScalarValue::new_list_nullable( + &[], + &DataType::Float32, + )), + ScalarValue::List(ScalarValue::new_list_nullable( + &[ + ScalarValue::Float32(Some(-213.1)), + ScalarValue::Float32(None), + ScalarValue::Float32(Some(5.5)), + ScalarValue::Float32(Some(2.0)), + ScalarValue::Float32(Some(1.0)), + ], + &DataType::Float32, + )), + ], + &DataType::List(new_arc_field("item", DataType::Float32, true)), + )), + ScalarValue::LargeList(ScalarValue::new_large_list( + &[ + ScalarValue::LargeList(ScalarValue::new_large_list( + &[], + &DataType::Float32, + )), + ScalarValue::LargeList(ScalarValue::new_large_list( + &[ + ScalarValue::Float32(Some(-213.1)), + ScalarValue::Float32(None), + ScalarValue::Float32(Some(5.5)), + ScalarValue::Float32(Some(2.0)), + ScalarValue::Float32(Some(1.0)), + ], + &DataType::Float32, + )), + ], + &DataType::LargeList(new_arc_field("item", DataType::Float32, true)), + )), + ScalarValue::FixedSizeList(Arc::new( + FixedSizeListArray::from_iter_primitive::( + vec![Some(vec![Some(1), Some(2), Some(3)])], + 3, + ), + )), + ScalarValue::ListView(Arc::new(ListViewArray::from_iter_primitive::< + Int32Type, + _, + _, + >(vec![Some(vec![ + Some(1), + None, + Some(3), + ])]))), + ScalarValue::LargeListView(Arc::new( + LargeListViewArray::from_iter_primitive::(vec![Some( + vec![Some(1), None, Some(3)], + )]), + )), + ScalarValue::Dictionary( + Box::new(DataType::Int32), + Box::new(ScalarValue::from("foo")), + ), + ScalarValue::Dictionary( + Box::new(DataType::Int32), + Box::new(ScalarValue::Utf8(None)), + ), + ScalarValue::Binary(Some(b"bar".to_vec())), + ScalarValue::Binary(None), + ScalarValue::LargeBinary(Some(b"bar".to_vec())), + ScalarValue::LargeBinary(None), + ScalarStructBuilder::new() + .with_scalar( + Field::new("a", DataType::Int32, true), + ScalarValue::from(23i32), + ) + .with_scalar( + Field::new("b", DataType::Boolean, false), + ScalarValue::from(false), + ) + .build() + .unwrap(), + ScalarStructBuilder::new() + .with_scalar( + Field::new("a", DataType::Int32, true), + ScalarValue::from(23i32), + ) + .with_scalar( + Field::new("b", DataType::Boolean, false), + ScalarValue::from(false), + ) + .build() + .unwrap(), + ScalarValue::try_from(&DataType::Struct(Fields::from(vec![ Field::new("a", DataType::Int32, true), - ScalarValue::from(23i32), - ) - .with_scalar( Field::new("b", DataType::Boolean, false), - ScalarValue::from(false), - ) - .build() + ]))) .unwrap(), - ScalarStructBuilder::new() - .with_scalar( + ScalarValue::try_from(&DataType::Struct(Fields::from(vec![ Field::new("a", DataType::Int32, true), - ScalarValue::from(23i32), - ) - .with_scalar( Field::new("b", DataType::Boolean, false), - ScalarValue::from(false), - ) - .build() + ]))) .unwrap(), - ScalarValue::try_from(&DataType::Struct(Fields::from(vec![ - Field::new("a", DataType::Int32, true), - Field::new("b", DataType::Boolean, false), - ]))) - .unwrap(), - ScalarValue::try_from(&DataType::Struct(Fields::from(vec![ - Field::new("a", DataType::Int32, true), - Field::new("b", DataType::Boolean, false), - ]))) - .unwrap(), - ScalarValue::try_from(&DataType::Map( - Arc::new(Field::new( - "entries", - DataType::Struct(Fields::from(vec![ - Field::new("key", DataType::Int32, true), - Field::new("value", DataType::Utf8, false), - ])), - false, - )), - false, - )) - .unwrap(), - ScalarValue::try_from(&DataType::Map( - Arc::new(Field::new( - "entries", - DataType::Struct(Fields::from(vec![ - Field::new("key", DataType::Int32, true), - Field::new("value", DataType::Utf8, true), - ])), + ScalarValue::try_from(&DataType::Map( + Arc::new(Field::new( + "entries", + DataType::Struct(Fields::from(vec![ + Field::new("key", DataType::Int32, true), + Field::new("value", DataType::Utf8, false), + ])), + false, + )), false, - )), - true, - )) - .unwrap(), - ScalarValue::Map(Arc::new(create_map_array_test_case())), - ScalarValue::FixedSizeBinary(b"bar".to_vec().len() as i32, Some(b"bar".to_vec())), - ScalarValue::FixedSizeBinary(0, None), - ScalarValue::FixedSizeBinary(5, None), - ]; + )) + .unwrap(), + ScalarValue::try_from(&DataType::Map( + Arc::new(Field::new( + "entries", + DataType::Struct(Fields::from(vec![ + Field::new("key", DataType::Int32, true), + Field::new("value", DataType::Utf8, true), + ])), + false, + )), + true, + )) + .unwrap(), + ScalarValue::Map(Arc::new(create_map_array_test_case())), + ScalarValue::FixedSizeBinary( + b"bar".to_vec().len() as i32, + Some(b"bar".to_vec()), + ), + ScalarValue::FixedSizeBinary(0, None), + ScalarValue::FixedSizeBinary(5, None), + ]; // ScalarValue directly for test_case in should_pass.iter() { diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index ac7b467920364..1cc8faf324aab 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -1313,6 +1313,8 @@ impl Unparser<'_> { ScalarValue::FixedSizeList(a) => self.scalar_value_list_to_sql(a.values()), ScalarValue::List(a) => self.scalar_value_list_to_sql(a.values()), ScalarValue::LargeList(a) => self.scalar_value_list_to_sql(a.values()), + ScalarValue::ListView(a) => self.scalar_value_list_to_sql(a.values()), + ScalarValue::LargeListView(a) => self.scalar_value_list_to_sql(a.values()), ScalarValue::Date32(Some(_)) => { let date = v .to_array()? @@ -1824,7 +1826,7 @@ mod tests { use std::{any::Any, sync::Arc, vec}; use crate::unparser::dialect::SqliteDialect; - use arrow::array::{LargeListArray, ListArray}; + use arrow::array::{LargeListArray, LargeListViewArray, ListArray, ListViewArray}; use arrow::datatypes::{DataType::Int8, Field, Int32Type, Schema, TimeUnit}; use ast::ObjectName; use datafusion_common::datatype::DataTypeExt; @@ -2342,6 +2344,28 @@ mod tests { ), "[1, 2, 3]", ), + ( + Expr::Literal( + ScalarValue::ListView(Arc::new( + ListViewArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + ]), + )), + None, + ), + "[1, 2, 3]", + ), + ( + Expr::Literal( + ScalarValue::LargeListView(Arc::new( + LargeListViewArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + ]), + )), + None, + ), + "[1, 2, 3]", + ), ( Expr::BinaryExpr(BinaryExpr { left: Box::new(col("a")),