Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions arrow-array/src/array/run_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ impl<R: RunEndIndexType> RunArray<R> {
///
/// [`values`]: Self::values
pub fn values_slice(&self) -> ArrayRef {
if self.is_empty() {
return self.values.slice(0, 0);
}
let start = self.get_start_physical_index();
let end = self.get_end_physical_index();
self.values.slice(start, end - start + 1)
Expand Down Expand Up @@ -655,6 +658,7 @@ mod tests {
use super::*;
use crate::builder::PrimitiveRunBuilder;
use crate::cast::AsArray;
use crate::new_empty_array;
use crate::types::{Int8Type, UInt32Type};
use crate::{Int16Array, Int32Array, StringArray};

Expand Down Expand Up @@ -752,6 +756,26 @@ mod tests {
assert_eq!(run_ends.values(), &run_ends_values);
}

#[test]
fn test_run_array_empty() {
let runs = new_empty_array(&DataType::Int16);
let runs = runs.as_primitive::<Int16Type>();
let values = new_empty_array(&DataType::Int64);
let array = RunArray::try_new(runs, &values).unwrap();

fn assertions(array: &RunArray<Int16Type>) {
assert!(array.is_empty());
assert_eq!(array.get_start_physical_index(), 0);
assert_eq!(array.get_end_physical_index(), 0);
assert!(array.get_physical_indices::<i16>(&[]).unwrap().is_empty());
assert!(array.run_ends().is_empty());
assert_eq!(array.run_ends().sliced_values().count(), 0);
}

assertions(&array);
assertions(&array.slice(0, 0));
}

#[test]
fn test_run_array_fmt_debug() {
let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(3);
Expand Down Expand Up @@ -1186,4 +1210,91 @@ mod tests {
let values_slice2 = values_slice2.as_primitive::<Int32Type>();
assert_eq!(values_slice2.values(), &[1]);
}

#[test]
fn test_run_array_values_slice_empty() {
let run_ends = Int32Array::from(vec![2, 5, 10]);
let values = StringArray::from(vec!["a", "b", "c"]);
let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();

let slice = array.slice(0, 0);
assert_eq!(slice.len(), 0);

let values_slice = slice.values_slice();
assert_eq!(values_slice.len(), 0);
assert_eq!(values_slice.data_type(), &DataType::Utf8);
}

#[test]
fn test_run_array_eq_empty() {
let run_ends = Int32Array::from(vec![2, 5, 10]);
let values = StringArray::from(vec!["a", "b", "c"]);
let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();

let slice1 = array.slice(0, 0);
let slice2 = array.slice(1, 0);
let slice3 = array.slice(10, 0);

assert_eq!(slice1, slice2);
assert_eq!(slice2, slice3);

let empty_array = new_empty_array(array.data_type());
let empty_array = crate::cast::as_run_array::<Int32Type>(empty_array.as_ref());

assert_eq!(&slice1, empty_array);
}

#[test]
fn test_run_array_eq_diff_physical_same_logical() {
let run_ends1 = Int32Array::from(vec![1, 3, 6]);
let values1 = StringArray::from(vec!["a", "b", "c"]);
let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();

let run_ends2 = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
let values2 = StringArray::from(vec!["a", "b", "b", "c", "c", "c"]);
let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();

assert_eq!(array1, array2);
}

#[test]
fn test_run_array_eq_sliced() {
let run_ends1 = Int32Array::from(vec![2, 5, 10]);
let values1 = StringArray::from(vec!["a", "b", "c"]);
let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
// Logical: a, a, b, b, b, c, c, c, c, c

let slice1 = array1.slice(1, 6);
// Logical: a, b, b, b, c, c

let run_ends2 = Int32Array::from(vec![1, 4, 6]);
let values2 = StringArray::from(vec!["a", "b", "c"]);
let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
// Logical: a, b, b, b, c, c

assert_eq!(slice1, array2);

let slice2 = array1.slice(2, 3);
// Logical: b, b, b
let run_ends3 = Int32Array::from(vec![3]);
let values3 = StringArray::from(vec!["b"]);
let array3 = RunArray::<Int32Type>::try_new(&run_ends3, &values3).unwrap();
assert_eq!(slice2, array3);
}

#[test]
fn test_run_array_eq_sliced_different_offsets() {
let run_ends1 = Int32Array::from(vec![2, 5, 10]);
let values1 = StringArray::from(vec!["a", "b", "c"]);
let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
let array2 = array1.clone();
assert_eq!(array1, array2);

let slice1 = array1.slice(1, 4); // a, b, b, b
let slice2 = array1.slice(1, 4);
assert_eq!(slice1, slice2);

let slice3 = array1.slice(0, 4); // a, a, b, b
assert_ne!(slice1, slice3);
}
}
13 changes: 10 additions & 3 deletions arrow-buffer/src/buffer/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,16 @@ where
pub fn sliced_values(&self) -> impl Iterator<Item = E> + '_ {
let offset = self.logical_offset;
let len = self.logical_length;
let start = self.get_start_physical_index();
let end = self.get_end_physical_index();
self.run_ends[start..=end].iter().map(move |&val| {
// Doing this roundabout way since the iterator type we return must be
// the same (i.e. cannot use std::iter::empty())
let physical_slice = if self.is_empty() {
&self.run_ends[0..0]
} else {
let start = self.get_start_physical_index();
let end = self.get_end_physical_index();
&self.run_ends[start..=end]
};
physical_slice.iter().map(move |&val| {
let val = val.as_usize().saturating_sub(offset).min(len);
E::from_usize(val).unwrap()
})
Expand Down
28 changes: 28 additions & 0 deletions arrow-cast/src/cast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12451,4 +12451,32 @@ mod tests {
assert_eq!(casted.as_ref(), &expected);
}
}

#[test]
fn test_cast_between_sliced_run_end_encoded() {
let run_ends = Int16Array::from(vec![2, 5, 8]);
let values = StringArray::from(vec!["a", "b", "c"]);

let ree_array = RunArray::<Int16Type>::try_new(&run_ends, &values).unwrap();
let ree_array = ree_array.slice(1, 2);
let array_ref = Arc::new(ree_array) as ArrayRef;

let target_type = DataType::RunEndEncoded(
Arc::new(Field::new("run_ends", DataType::Int64, false)),
Arc::new(Field::new("values", DataType::Utf8, true)),
);
let cast_options = CastOptions {
safe: false,
format_options: FormatOptions::default(),
};

let result = cast_with_options(&array_ref, &target_type, &cast_options).unwrap();
let run_array = result.as_run::<Int64Type>();
let run_array = run_array.downcast::<StringArray>().unwrap();

let expected = vec!["a", "b"];
let actual = run_array.into_iter().flatten().collect::<Vec<_>>();

assert_eq!(expected, actual);
}
}
16 changes: 9 additions & 7 deletions arrow-cast/src/cast/run_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,18 @@ pub(crate) fn run_end_encoded_cast<K: RunEndIndexType>(
.downcast_ref::<RunArray<K>>()
.ok_or_else(|| ArrowError::CastError("Expected RunArray".to_string()))?;

let values = run_array.values();

match to_type {
// Stay as RunEndEncoded, cast only the values
DataType::RunEndEncoded(target_index_field, target_value_field) => {
let cast_values =
cast_with_options(values, target_value_field.data_type(), cast_options)?;
let values = run_array.values_slice();
let cast_values = cast_with_options(
values.as_ref(),
target_value_field.data_type(),
cast_options,
)?;

let run_ends_array = PrimitiveArray::<K>::from_iter_values(
run_array.run_ends().values().iter().copied(),
);
let run_ends_array =
PrimitiveArray::<K>::from_iter_values(run_array.run_ends().sliced_values());
let cast_run_ends = cast_with_options(
&run_ends_array,
target_index_field.data_type(),
Expand Down Expand Up @@ -72,6 +73,7 @@ pub(crate) fn run_end_encoded_cast<K: RunEndIndexType>(

// Expand to logical form
_ => {
let values = run_array.values();
let len = run_array.len();
let offset = run_array.offset();
let run_ends = run_array.run_ends().values();
Expand Down
Loading
Loading