Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ set(SPARROW_IPC_HEADERS
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_array_impl.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_decimal_array.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_duration_array.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_fixedsizebinary_array.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_fixed_size_binary_array.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_interval_array.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_null_array.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_primitive_array.hpp
Expand All @@ -149,14 +149,16 @@ set(SPARROW_IPC_HEADERS

set(SPARROW_IPC_SRC
${SPARROW_IPC_SOURCE_DIR}/any_output_stream.cpp
${SPARROW_IPC_SOURCE_DIR}/array_deserializer.cpp
${SPARROW_IPC_SOURCE_DIR}/array_deserializer_impl.hpp
${SPARROW_IPC_SOURCE_DIR}/arrow_interface/arrow_array.cpp
${SPARROW_IPC_SOURCE_DIR}/arrow_interface/arrow_array/private_data.cpp
${SPARROW_IPC_SOURCE_DIR}/arrow_interface/arrow_schema.cpp
${SPARROW_IPC_SOURCE_DIR}/arrow_interface/arrow_schema/private_data.cpp
${SPARROW_IPC_SOURCE_DIR}/chunk_memory_serializer.cpp
${SPARROW_IPC_SOURCE_DIR}/compression.cpp
${SPARROW_IPC_SOURCE_DIR}/compression_impl.hpp
${SPARROW_IPC_SOURCE_DIR}/deserialize_fixedsizebinary_array.cpp
${SPARROW_IPC_SOURCE_DIR}/deserialize_fixed_size_binary_array.cpp
${SPARROW_IPC_SOURCE_DIR}/deserialize_null_array.cpp
${SPARROW_IPC_SOURCE_DIR}/deserialize_utils.cpp
${SPARROW_IPC_SOURCE_DIR}/deserialize.cpp
Expand Down
2 changes: 1 addition & 1 deletion include/sparrow_ipc/deserialize_array_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ namespace sparrow_ipc::detail
* @return The deserialized array of type ArrayType<T>
*/
template <template<typename...> class ArrayType, typename T>
[[nodiscard]] ArrayType<T> deserialize_non_owning_simple_array(
[[nodiscard]] ArrayType<T> deserialize_simple_array(
const org::apache::arrow::flatbuf::RecordBatch& record_batch,
std::span<const uint8_t> body,
std::string_view name,
Expand Down
4 changes: 2 additions & 2 deletions include/sparrow_ipc/deserialize_decimal_array.hpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#pragma once

#include <span>
Expand All @@ -14,7 +14,7 @@
namespace sparrow_ipc
{
template <sparrow::decimal_type T>
[[nodiscard]] sparrow::decimal_array<T> deserialize_non_owning_decimal(
[[nodiscard]] sparrow::decimal_array<T> deserialize_decimal_array(
const org::apache::arrow::flatbuf::RecordBatch& record_batch,
std::span<const uint8_t> body,
std::string_view name,
Expand Down Expand Up @@ -103,4 +103,4 @@
sparrow::arrow_proxy ap{std::move(array), std::move(schema)};
return sparrow::decimal_array<T>(std::move(ap));
}
}
}
4 changes: 2 additions & 2 deletions include/sparrow_ipc/deserialize_duration_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
namespace sparrow_ipc
{
template <typename T>
[[nodiscard]] sparrow::duration_array<T> deserialize_non_owning_duration_array(
[[nodiscard]] sparrow::duration_array<T> deserialize_duration_array(
const org::apache::arrow::flatbuf::RecordBatch& record_batch,
std::span<const uint8_t> body,
std::string_view name,
Expand All @@ -23,7 +23,7 @@ namespace sparrow_ipc
size_t& buffer_index
)
{
return detail::deserialize_non_owning_simple_array<sparrow::duration_array, T>(
return detail::deserialize_simple_array<sparrow::duration_array, T>(
record_batch,
body,
name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

namespace sparrow_ipc
{
[[nodiscard]] sparrow::fixed_width_binary_array deserialize_non_owning_fixedwidthbinary(
[[nodiscard]] sparrow::fixed_width_binary_array deserialize_fixed_width_binary_array(
const org::apache::arrow::flatbuf::RecordBatch& record_batch,
std::span<const uint8_t> body,
std::string_view name,
Expand All @@ -19,4 +19,4 @@ namespace sparrow_ipc
size_t& buffer_index,
int32_t byte_width
);
}
}
4 changes: 2 additions & 2 deletions include/sparrow_ipc/deserialize_interval_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
namespace sparrow_ipc
{
template <typename T>
[[nodiscard]] sparrow::interval_array<T> deserialize_non_owning_interval_array(
[[nodiscard]] sparrow::interval_array<T> deserialize_interval_array(
const org::apache::arrow::flatbuf::RecordBatch& record_batch,
std::span<const uint8_t> body,
std::string_view name,
Expand All @@ -18,7 +18,7 @@ namespace sparrow_ipc
size_t& buffer_index
)
{
return detail::deserialize_non_owning_simple_array<sparrow::interval_array, T>(
return detail::deserialize_simple_array<sparrow::interval_array, T>(
record_batch,
body,
name,
Expand Down
4 changes: 2 additions & 2 deletions include/sparrow_ipc/deserialize_null_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@

namespace sparrow_ipc
{
[[nodiscard]] sparrow::null_array deserialize_non_owning_null(
[[nodiscard]] sparrow::null_array deserialize_null_array(
const org::apache::arrow::flatbuf::RecordBatch& record_batch,
std::span<const uint8_t> body,
std::string_view name,
const std::optional<std::vector<sparrow::metadata_pair>>& metadata,
bool nullable,
size_t& buffer_index
);
}
}
4 changes: 2 additions & 2 deletions include/sparrow_ipc/deserialize_primitive_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
namespace sparrow_ipc
{
template <typename T>
[[nodiscard]] sparrow::primitive_array<T> deserialize_non_owning_primitive_array(
[[nodiscard]] sparrow::primitive_array<T> deserialize_primitive_array(
const org::apache::arrow::flatbuf::RecordBatch& record_batch,
std::span<const uint8_t> body,
std::string_view name,
Expand All @@ -21,7 +21,7 @@ namespace sparrow_ipc
size_t& buffer_index
)
{
return detail::deserialize_non_owning_simple_array<sparrow::primitive_array, T>(
return detail::deserialize_simple_array<sparrow::primitive_array, T>(
record_batch,
body,
name,
Expand Down
16 changes: 8 additions & 8 deletions include/sparrow_ipc/deserialize_time_related_arrays.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
namespace sparrow_ipc
{
template <typename T>
[[nodiscard]] sparrow::date_array<T> deserialize_non_owning_date_array(
[[nodiscard]] sparrow::date_array<T> deserialize_date_array(
const org::apache::arrow::flatbuf::RecordBatch& record_batch,
std::span<const uint8_t> body,
std::string_view name,
Expand All @@ -27,7 +27,7 @@ namespace sparrow_ipc
size_t& buffer_index
)
{
return detail::deserialize_non_owning_simple_array<sparrow::date_array, T>(
return detail::deserialize_simple_array<sparrow::date_array, T>(
record_batch,
body,
name,
Expand All @@ -39,7 +39,7 @@ namespace sparrow_ipc
}

template <typename T>
[[nodiscard]] sparrow::timestamp_array<T> deserialize_non_owning_timestamp_array(
[[nodiscard]] sparrow::timestamp_array<T> deserialize_timestamp_array(
const org::apache::arrow::flatbuf::RecordBatch& record_batch,
std::span<const uint8_t> body,
std::string_view name,
Expand All @@ -53,7 +53,7 @@ namespace sparrow_ipc
sparrow::detail::get_data_type_from_array<sparrow::timestamp_array<T>>::get()
)) + timezone;

return detail::deserialize_non_owning_simple_array<sparrow::timestamp_array, T>(
return detail::deserialize_simple_array<sparrow::timestamp_array, T>(
record_batch,
body,
name,
Expand All @@ -65,7 +65,7 @@ namespace sparrow_ipc
}

template <typename T>
[[nodiscard]] sparrow::timestamp_without_timezone_array<T> deserialize_non_owning_timestamp_without_timezone_array(
[[nodiscard]] sparrow::timestamp_without_timezone_array<T> deserialize_timestamp_without_timezone_array(
const org::apache::arrow::flatbuf::RecordBatch& record_batch,
std::span<const uint8_t> body,
std::string_view name,
Expand All @@ -74,7 +74,7 @@ namespace sparrow_ipc
size_t& buffer_index
)
{
return detail::deserialize_non_owning_simple_array<sparrow::timestamp_without_timezone_array, T>(
return detail::deserialize_simple_array<sparrow::timestamp_without_timezone_array, T>(
record_batch,
body,
name,
Expand All @@ -86,7 +86,7 @@ namespace sparrow_ipc
}

template <typename T>
[[nodiscard]] sparrow::time_array<T> deserialize_non_owning_time_array(
[[nodiscard]] sparrow::time_array<T> deserialize_time_array(
const org::apache::arrow::flatbuf::RecordBatch& record_batch,
std::span<const uint8_t> body,
std::string_view name,
Expand All @@ -95,7 +95,7 @@ namespace sparrow_ipc
size_t& buffer_index
)
{
return detail::deserialize_non_owning_simple_array<sparrow::time_array, T>(
return detail::deserialize_simple_array<sparrow::time_array, T>(
record_batch,
body,
name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
namespace sparrow_ipc
{
template <typename T>
[[nodiscard]] T deserialize_non_owning_variable_size_binary(
[[nodiscard]] T deserialize_variable_size_binary_array(
const org::apache::arrow::flatbuf::RecordBatch& record_batch,
std::span<const uint8_t> body,
std::string_view name,
Expand Down
95 changes: 59 additions & 36 deletions include/sparrow_ipc/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,24 @@
namespace sparrow_ipc::utils
{
// Aligns a value to the next multiple of 8, as required by the Arrow IPC format for message bodies
inline size_t align_to_8(const size_t n)
constexpr size_t align_to_8(const size_t n)
{
return (n + 7) & -8;
}

/**
* @brief Extracts words after ':' separated by ',' from a string.
* @brief Get substring after separator.
*
* This function finds the position of ':' in the input string and then
* splits the remaining part by ',' to extract individual words.
*
* @param str Input string to parse (e.g., "prefix:word1,word2,word3")
* @return std::vector<std::string_view> Vector of string views containing the extracted words
* Returns an empty vector if ':' is not found or if there are no words after it
* @param str The string view to parse
* @param str The separator to use
* @return std::optional<std::string_view> The parsed substring, or std::nullopt if parsing fails
*
* @example
* extract_words_after_colon("d:128,10") returns {"128", "10"}
* extract_words_after_colon("w:256") returns {"256"}
* extract_words_after_colon("no_colon") returns {}
* get_substr_after_separator("w:abc", ":") returns std::optional<std::string_view>("abc")
* get_substr_after_separator("abc", ":") returns std::nullopt
* get_substr_after_separator("", ":") returns std::nullopt
*/
SPARROW_IPC_API std::vector<std::string_view> extract_words_after_colon(std::string_view str);
SPARROW_IPC_API std::optional<std::string_view> get_substr_after_separator(std::string_view str, std::string_view sep);

/**
* @brief Parse a string_view to int32_t using std::from_chars.
Expand All @@ -49,6 +46,56 @@ namespace sparrow_ipc::utils
*/
SPARROW_IPC_API std::optional<int32_t> parse_to_int32(std::string_view str);

/**
* @brief Get substring after separator as int32_t.
*
* @param str The string view to parse
* @param str The separator to use
* @return std::optional<int32_t> The parsed substring as integer, or std::nullopt if parsing fails
*
* @example
* get_substr_as_int32("w:123", ":") returns std::optional<int32_t>(123)
* get_substr_as_int32("abc", ":") returns std::nullopt
* get_substr_as_int32("abc:a", ":") returns std::nullopt
* get_substr_as_int32("", ":") returns std::nullopt
*/
SPARROW_IPC_API std::optional<int32_t> get_substr_as_int32(std::string_view str, std::string_view sep);

/**
* @brief Parse decimal format strings.
*
* This function parses decimal format strings which can be in two formats:
* - "d:precision,scale" (e.g., "d:19,10")
* - "d:precision,scale,bitWidth" (e.g., "d:19,10,128")
*
* @param format_str The format string to parse
* @return std::optional<std::tuple<int32_t, int32_t, std::optional<int32_t>>>
* A tuple containing (precision, scale, optional bitWidth), or std::nullopt if parsing fails
*
* @example
* parse_decimal_format("d:19,10") returns std::optional{std::tuple{19, 10, std::nullopt}}
* parse_decimal_format("d:19,10,128") returns std::optional{std::tuple{19, 10, std::optional{128}}}
* parse_decimal_format("invalid") returns std::nullopt
*/
SPARROW_IPC_API std::optional<std::tuple<int32_t, int32_t, std::optional<int32_t>>> parse_decimal_format(std::string_view format_str);

/**
* @brief Extracts words after ':' separated by ',' from a string.
*
* This function finds the position of ':' in the input string and then
* splits the remaining part by ',' to extract individual words.
*
* @param str Input string to parse (e.g., "prefix:word1,word2,word3")
* @return std::vector<std::string_view> Vector of string views containing the extracted words
* Returns an empty vector if ':' is not found or if there are no words after it
*
* @example
* extract_words_after_colon("d:128,10") returns {"128", "10"}
* extract_words_after_colon("w:256") returns {"256"}
* extract_words_after_colon("no_colon") returns {}
*/
SPARROW_IPC_API std::vector<std::string_view> extract_words_after_colon(std::string_view str);

/**
* @brief Checks if all record batches in a collection have consistent structure.
*
Expand Down Expand Up @@ -95,28 +142,4 @@ namespace sparrow_ipc::utils
}
return true;
}

std::optional<std::string_view> parse_after_separator(std::string_view format_str, std::string_view sep);
// Parse the format string
// The format string is expected to be "w:size", "+w:size", "d:precision,scale", etc
std::optional<int32_t> parse_format(std::string_view format_str, std::string_view sep);
// size_t calculate_output_serialized_size(const sparrow::record_batch& record_batch);

/**
* @brief Parse decimal format strings.
*
* This function parses decimal format strings which can be in two formats:
* - "d:precision,scale" (e.g., "d:19,10")
* - "d:precision,scale,bitWidth" (e.g., "d:19,10,128")
*
* @param format_str The format string to parse
* @return std::optional<std::tuple<int32_t, int32_t, std::optional<int32_t>>>
* A tuple containing (precision, scale, optional bitWidth), or std::nullopt if parsing fails
*
* @example
* parse_decimal_format("d:19,10") returns std::optional{std::tuple{19, 10, std::nullopt}}
* parse_decimal_format("d:19,10,128") returns std::optional{std::tuple{19, 10, std::optional{128}}}
* parse_decimal_format("invalid") returns std::nullopt
*/
SPARROW_IPC_API std::optional<std::tuple<int32_t, int32_t, std::optional<int32_t>>> parse_decimal_format(std::string_view format_str);
}
Loading
Loading