diff --git a/cpp/src/gandiva/annotator.cc b/cpp/src/gandiva/annotator.cc index b341fdde3a3..6735cc45e61 100644 --- a/cpp/src/gandiva/annotator.cc +++ b/cpp/src/gandiva/annotator.cc @@ -93,6 +93,25 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc, } } +const Status Annotator::CheckEvalBatchFieldType( + const arrow::RecordBatch& record_batch) const { + for (int i = 0; i < record_batch.num_columns(); ++i) { + const std::string& name = record_batch.column_name(i); + auto found = in_name_to_desc_.find(name); + if (found == in_name_to_desc_.end()) { + // skip columns not involved in the expression. + continue; + } + if (record_batch.column(i)->type_id() != found->second->Type()->id()) { + return Status::ExecutionError("Expect field ", name, " type is ", + found->second->Type()->ToString(), ", input field ", + name, " type is ", + record_batch.column(i)->type()->ToString()); + } + } + return Status::OK(); +} + EvalBatchPtr Annotator::PrepareEvalBatch(const arrow::RecordBatch& record_batch, const ArrayDataVector& out_vector) const { EvalBatchPtr eval_batch = std::make_shared( diff --git a/cpp/src/gandiva/annotator.h b/cpp/src/gandiva/annotator.h index be49509e624..f88efa02946 100644 --- a/cpp/src/gandiva/annotator.h +++ b/cpp/src/gandiva/annotator.h @@ -60,6 +60,8 @@ class GANDIVA_EXPORT Annotator { EvalBatchPtr PrepareEvalBatch(const arrow::RecordBatch& record_batch, const ArrayDataVector& out_vector) const; + const Status CheckEvalBatchFieldType(const arrow::RecordBatch& record_batch) const; + int buffer_count() const { return buffer_count_; } private: diff --git a/cpp/src/gandiva/expr_validator.cc b/cpp/src/gandiva/expr_validator.cc index 27b27fbe25a..dacb745f241 100644 --- a/cpp/src/gandiva/expr_validator.cc +++ b/cpp/src/gandiva/expr_validator.cc @@ -72,20 +72,6 @@ Status ExprValidator::Visit(const FieldNode& node) { Status::ExpressionValidationError("Field ", node.field()->name(), " has unsupported data type ", node.return_type()->name())); - - // Ensure that field is found in schema - auto field_in_schema_entry = field_map_.find(node.field()->name()); - ARROW_RETURN_IF(field_in_schema_entry == field_map_.end(), - Status::ExpressionValidationError("Field ", node.field()->name(), - " not in schema.")); - - // Ensure that the found field matches. - FieldPtr field_in_schema = field_in_schema_entry->second; - ARROW_RETURN_IF(!field_in_schema->Equals(node.field()), - Status::ExpressionValidationError( - "Field definition in schema ", field_in_schema->ToString(), - " different from field in expression ", node.field()->ToString())); - return Status::OK(); } diff --git a/cpp/src/gandiva/expression.cc b/cpp/src/gandiva/expression.cc index 06aada27b3c..d62d24e0090 100644 --- a/cpp/src/gandiva/expression.cc +++ b/cpp/src/gandiva/expression.cc @@ -22,4 +22,6 @@ namespace gandiva { std::string Expression::ToString() { return root()->ToString(); } +std::string Expression::ToCacheKeyString() { return root()->ToCacheKeyString(); } + } // namespace gandiva diff --git a/cpp/src/gandiva/expression.h b/cpp/src/gandiva/expression.h index cdda2512b9e..0efa45d3e68 100644 --- a/cpp/src/gandiva/expression.h +++ b/cpp/src/gandiva/expression.h @@ -38,6 +38,8 @@ class GANDIVA_EXPORT Expression { std::string ToString(); + std::string ToCacheKeyString(); + private: const NodePtr root_; const FieldPtr result_; diff --git a/cpp/src/gandiva/expression_cache_key.h b/cpp/src/gandiva/expression_cache_key.h index e7522042a7d..25182b7f1a8 100644 --- a/cpp/src/gandiva/expression_cache_key.h +++ b/cpp/src/gandiva/expression_cache_key.h @@ -34,45 +34,40 @@ class ExpressionCacheKey { public: ExpressionCacheKey(SchemaPtr schema, std::shared_ptr configuration, ExpressionVector expression_vector, SelectionVector::Mode mode) - : schema_(schema), mode_(mode), uniquifier_(0), configuration_(configuration) { + : mode_(mode), uniqifier_(0), configuration_(configuration) { static const int kSeedValue = 4; size_t result = kSeedValue; for (auto& expr : expression_vector) { - std::string expr_as_string = expr->ToString(); - expressions_as_strings_.push_back(expr_as_string); - arrow::internal::hash_combine(result, expr_as_string); - UpdateUniquifier(expr_as_string); + std::string expr_cache_key_string = expr->ToCacheKeyString(); + expressions_as_cache_key_strings_.push_back(expr_cache_key_string); + arrow::internal::hash_combine(result, expr_cache_key_string); + UpdateUniqifier(expr_cache_key_string); } arrow::internal::hash_combine(result, static_cast(mode)); arrow::internal::hash_combine(result, configuration->Hash()); - arrow::internal::hash_combine(result, schema_->ToString()); - arrow::internal::hash_combine(result, uniquifier_); + arrow::internal::hash_combine(result, uniqifier_); hash_code_ = result; } ExpressionCacheKey(SchemaPtr schema, std::shared_ptr configuration, Expression& expression) - : schema_(schema), - mode_(SelectionVector::MODE_NONE), - uniquifier_(0), - configuration_(configuration) { + : mode_(SelectionVector::MODE_NONE), uniqifier_(0), configuration_(configuration) { static const int kSeedValue = 4; size_t result = kSeedValue; - expressions_as_strings_.push_back(expression.ToString()); - UpdateUniquifier(expression.ToString()); - + expressions_as_cache_key_strings_.push_back(expression.ToCacheKeyString()); + UpdateUniqifier(expression.ToCacheKeyString()); + arrow::internal::hash_combine(result, expression.ToCacheKeyString()); arrow::internal::hash_combine(result, configuration->Hash()); - arrow::internal::hash_combine(result, schema_->ToString()); - arrow::internal::hash_combine(result, uniquifier_); + arrow::internal::hash_combine(result, uniqifier_); hash_code_ = result; } - void UpdateUniquifier(const std::string& expr) { - if (uniquifier_ == 0) { + void UpdateUniqifier(const std::string& expr) { + if (uniqifier_ == 0) { // caching of expressions with re2 patterns causes lock contention. So, use // multiple instances to reduce contention. if (expr.find(" like(") != std::string::npos) { - uniquifier_ = std::hash()(std::this_thread::get_id()) % 16; + uniqifier_ = std::hash()(std::this_thread::get_id()) % 16; } } } @@ -84,10 +79,6 @@ class ExpressionCacheKey { return false; } - if (!(schema_->Equals(*other.schema_, true))) { - return false; - } - if (configuration_ != other.configuration_) { return false; } @@ -96,11 +87,11 @@ class ExpressionCacheKey { return false; } - if (expressions_as_strings_ != other.expressions_as_strings_) { + if (expressions_as_cache_key_strings_ != other.expressions_as_cache_key_strings_) { return false; } - if (uniquifier_ != other.uniquifier_) { + if (uniqifier_ != other.uniqifier_) { return false; } @@ -111,10 +102,9 @@ class ExpressionCacheKey { private: size_t hash_code_; - SchemaPtr schema_; - std::vector expressions_as_strings_; + std::vector expressions_as_cache_key_strings_; SelectionVector::Mode mode_; - uint32_t uniquifier_; + uint32_t uniqifier_; std::shared_ptr configuration_; }; diff --git a/cpp/src/gandiva/filter.cc b/cpp/src/gandiva/filter.cc index 8a270cfdc06..a239610ebe7 100644 --- a/cpp/src/gandiva/filter.cc +++ b/cpp/src/gandiva/filter.cc @@ -91,8 +91,6 @@ Status Filter::Make(SchemaPtr schema, ConditionPtr condition, Status Filter::Evaluate(const arrow::RecordBatch& batch, std::shared_ptr out_selection) { const auto num_rows = batch.num_rows(); - ARROW_RETURN_IF(!batch.schema()->Equals(*schema_), - Status::Invalid("RecordBatch schema must expected filter schema")); ARROW_RETURN_IF(num_rows == 0, Status::Invalid("RecordBatch must be non-empty.")); ARROW_RETURN_IF(out_selection == nullptr, Status::Invalid("out_selection must be non-null.")); diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc index 4e6480fa167..8c0b9eb1daa 100644 --- a/cpp/src/gandiva/llvm_generator.cc +++ b/cpp/src/gandiva/llvm_generator.cc @@ -131,6 +131,10 @@ Status LLVMGenerator::Execute(const arrow::RecordBatch& record_batch, const ArrayDataVector& output_vector) const { DCHECK_GT(record_batch.num_rows(), 0); + auto status = annotator_.CheckEvalBatchFieldType(record_batch); + + ARROW_RETURN_IF(!status.ok(), status); + auto eval_batch = annotator_.PrepareEvalBatch(record_batch, output_vector); DCHECK_GT(eval_batch->GetNumBuffers(), 0); diff --git a/cpp/src/gandiva/node.h b/cpp/src/gandiva/node.h index 858c6570489..2d74ec764e6 100644 --- a/cpp/src/gandiva/node.h +++ b/cpp/src/gandiva/node.h @@ -48,6 +48,8 @@ class GANDIVA_EXPORT Node { virtual std::string ToString() const = 0; + virtual std::string ToCacheKeyString() const = 0; + protected: DataTypePtr return_type_; }; @@ -99,6 +101,8 @@ class GANDIVA_EXPORT LiteralNode : public Node { return ss.str(); } + std::string ToCacheKeyString() const override { return ToString(); } + private: LiteralHolder holder_; bool is_null_; @@ -117,6 +121,10 @@ class GANDIVA_EXPORT FieldNode : public Node { return "(" + field()->type()->ToString() + ") " + field()->name(); } + std::string ToCacheKeyString() const override { + return "(" + field()->type()->ToString() + ") "; + } + private: FieldPtr field_; }; @@ -149,6 +157,24 @@ class GANDIVA_EXPORT FunctionNode : public Node { return ss.str(); } + std::string ToCacheKeyString() const override { + std::stringstream ss; + ss << ((return_type() == NULLPTR) ? "untyped" + : descriptor()->return_type()->ToString()) + << " " << descriptor()->name() << "("; + bool skip_comma = true; + for (auto& child : children()) { + if (skip_comma) { + ss << child->ToCacheKeyString(); + skip_comma = false; + } else { + ss << ", " << child->ToCacheKeyString(); + } + } + ss << ")"; + return ss.str(); + } + private: FuncDescriptorPtr descriptor_; NodeVector children_; @@ -188,6 +214,14 @@ class GANDIVA_EXPORT IfNode : public Node { return ss.str(); } + std::string ToCacheKeyString() const override { + std::stringstream ss; + ss << "if (" << condition()->ToCacheKeyString() << ") { "; + ss << then_node()->ToCacheKeyString() << " } else { "; + ss << else_node()->ToCacheKeyString() << " }"; + return ss.str(); + } + private: NodePtr condition_; NodePtr then_node_; @@ -225,6 +259,23 @@ class GANDIVA_EXPORT BooleanNode : public Node { return ss.str(); } + std::string ToCacheKeyString() const override { + std::stringstream ss; + bool first = true; + for (auto& child : children_) { + if (!first) { + if (expr_type() == BooleanNode::AND) { + ss << " && "; + } else { + ss << " || "; + } + } + ss << child->ToCacheKeyString(); + first = false; + } + return ss.str(); + } + private: ExprType expr_type_; NodeVector children_; @@ -265,6 +316,22 @@ class InExpressionNode : public Node { return ss.str(); } + std::string ToCacheKeyString() const override { + std::stringstream ss; + ss << eval_expr_->ToCacheKeyString() << " IN ("; + bool add_comma = false; + for (auto& value : values_) { + if (add_comma) { + ss << ", "; + } + // add type in the front to differentiate + ss << value; + add_comma = true; + } + ss << ")"; + return ss.str(); + } + private: NodePtr eval_expr_; std::unordered_set values_; @@ -309,6 +376,22 @@ class InExpressionNode : public Node { return ss.str(); } + std::string ToCacheKeyString() const override { + std::stringstream ss; + ss << eval_expr_->ToCacheKeyString() << " IN ("; + bool add_comma = false; + for (auto& value : values_) { + if (add_comma) { + ss << ", "; + } + // add type in the front to differentiate + ss << value; + add_comma = true; + } + ss << ")"; + return ss.str(); + } + private: NodePtr eval_expr_; std::unordered_set values_; diff --git a/cpp/src/gandiva/projector.cc b/cpp/src/gandiva/projector.cc index ec0302146ff..89e969f47af 100644 --- a/cpp/src/gandiva/projector.cc +++ b/cpp/src/gandiva/projector.cc @@ -230,8 +230,6 @@ Status Projector::AllocArrayData(const DataTypePtr& type, int64_t num_records, } Status Projector::ValidateEvaluateArgsCommon(const arrow::RecordBatch& batch) const { - ARROW_RETURN_IF(!batch.schema()->Equals(*schema_), - Status::Invalid("Schema in RecordBatch must match schema in Make()")); ARROW_RETURN_IF(batch.num_rows() == 0, Status::Invalid("RecordBatch must be non-empty.")); diff --git a/cpp/src/gandiva/tests/filter_test.cc b/cpp/src/gandiva/tests/filter_test.cc index 749000aa0cf..3b20e78a5cc 100644 --- a/cpp/src/gandiva/tests/filter_test.cc +++ b/cpp/src/gandiva/tests/filter_test.cc @@ -42,16 +42,16 @@ class TestFilter : public ::testing::Test { TEST_F(TestFilter, TestFilterCache) { // schema for input fields - auto field0 = field("f0_filter_cache", int32()); - auto field1 = field("f1_filter_cache", int32()); + auto field0 = field("f0_filter_cache", int64()); + auto field1 = field("f1_filter_cache", int64()); auto schema = arrow::schema({field0, field1}); // Build condition f0 + f1 < 10 auto node_f0 = TreeExprBuilder::MakeField(field0); auto node_f1 = TreeExprBuilder::MakeField(field1); auto sum_func = - TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int32()); - auto literal_10 = TreeExprBuilder::MakeLiteral((int32_t)10); + TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int64()); + auto literal_10 = TreeExprBuilder::MakeLiteral((int64_t)10); auto less_than_10 = TreeExprBuilder::MakeFunction("less_than", {sum_func, literal_10}, arrow::boolean()); auto condition = TreeExprBuilder::MakeCondition(less_than_10); @@ -69,13 +69,13 @@ TEST_F(TestFilter, TestFilterCache) { EXPECT_TRUE(cached_filter->GetBuiltFromCache()); // schema is different should return a new filter. - auto field2 = field("f2_filter_cache", int32()); + auto field2 = field("f2_filter_cache", int64()); auto different_schema = arrow::schema({field0, field1, field2}); std::shared_ptr should_be_new_filter; status = Filter::Make(different_schema, condition, configuration, &should_be_new_filter); EXPECT_TRUE(status.ok()); - EXPECT_FALSE(should_be_new_filter->GetBuiltFromCache()); + EXPECT_TRUE(should_be_new_filter->GetBuiltFromCache()); // condition is different, should return a new filter. auto greater_than_10 = TreeExprBuilder::MakeFunction( @@ -84,7 +84,7 @@ TEST_F(TestFilter, TestFilterCache) { std::shared_ptr should_be_new_filter1; status = Filter::Make(schema, new_condition, configuration, &should_be_new_filter1); EXPECT_TRUE(status.ok()); - EXPECT_FALSE(should_be_new_filter->GetBuiltFromCache()); + EXPECT_FALSE(should_be_new_filter1->GetBuiltFromCache()); } TEST_F(TestFilter, TestFilterCacheNullTreatment) { diff --git a/cpp/src/gandiva/tests/projector_build_validation_test.cc b/cpp/src/gandiva/tests/projector_build_validation_test.cc index 1ed4c77a074..e2b7e37e120 100644 --- a/cpp/src/gandiva/tests/projector_build_validation_test.cc +++ b/cpp/src/gandiva/tests/projector_build_validation_test.cc @@ -114,9 +114,20 @@ TEST_F(ValidationTestProjector, TestIncorrectSchemaMissingField) { // Build a projector for the expressions. std::shared_ptr projector; auto status = Projector::Make(schema, {lt_expr}, TestConfiguration(), &projector); - EXPECT_TRUE(status.IsExpressionValidationError()); - std::string expected_error = "Field f2 not in schema"; - EXPECT_TRUE(status.message().find(expected_error) != std::string::npos); + EXPECT_TRUE(status.ok()); + + auto array0 = MakeArrowArrayFloat32({1.0, 2.0, 3.0, 4.0}, {true, true, true, true}); + auto array2 = MakeArrowArrayFloat32({2.0, 3.0, 4.0, 5.0}, {true, true, true, true}); + + auto in_batch = + arrow::RecordBatch::Make(arrow::schema({field0, field1}), 4, {array0, array2}); + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + EXPECT_TRUE(status.ok()); + + auto expected_array = + MakeArrowArrayBool({true, true, true, true}, {true, true, true, true}); + EXPECT_ARROW_ARRAY_EQUALS(expected_array, outputs[0]); } TEST_F(ValidationTestProjector, TestIncorrectSchemaTypeNotMatching) { @@ -136,10 +147,16 @@ TEST_F(ValidationTestProjector, TestIncorrectSchemaTypeNotMatching) { // Build a projector for the expressions. std::shared_ptr projector; auto status = Projector::Make(schema, {lt_expr}, TestConfiguration(), &projector); - EXPECT_TRUE(status.IsExpressionValidationError()); - std::string expected_error = - "Field definition in schema f2: int32 different from field in expression f2: float"; - EXPECT_TRUE(status.message().find(expected_error) != std::string::npos); + EXPECT_TRUE(status.ok()); + + auto array0 = MakeArrowArrayFloat32({1.0, 2.0, 3.0, 4.0}, {true, true, true, true}); + auto array2 = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, true}); + + auto in_batch = + arrow::RecordBatch::Make(arrow::schema({field0, field2}), 4, {array0, array2}); + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + EXPECT_FALSE(status.ok()); } TEST_F(ValidationTestProjector, TestIfNotSupportedFunction) { diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc index dc1ac9dfd26..524f526bb93 100644 --- a/cpp/src/gandiva/tests/projector_test.cc +++ b/cpp/src/gandiva/tests/projector_test.cc @@ -88,7 +88,7 @@ TEST_F(TestProjector, TestProjectCache) { status = Projector::Make(different_schema, {sum_expr, sub_expr}, configuration, &should_be_new_projector); ASSERT_OK(status); - EXPECT_FALSE(should_be_new_projector->GetBuiltFromCache()); + EXPECT_TRUE(should_be_new_projector->GetBuiltFromCache()); // expression list is different should return a new projector. std::shared_ptr should_be_new_projector1; @@ -2350,10 +2350,19 @@ TEST_F(TestProjector, TestBigIntCastFunction) { MakeArrowArrayFloat32({6.6f, -6.6f, 9.999999f, 0}, {true, true, true, false}); auto array1 = MakeArrowArrayFloat64({6.6, -6.6, 9.99999999999, 0}, {true, true, true, false}); - auto array2 = MakeArrowArrayInt64({100, 25, -0, 0}, {true, true, true, false}); - auto array3 = MakeArrowArrayInt32({25, -25, -0, 0}, {true, true, true, false}); - auto in_batch = - arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2, array3}); + + std::shared_ptr array_day_interval; + arrow::ArrayFromVector( + arrow::day_time_interval(), {true, true, true, false}, + {{100, 0}, {25, 0}, {0, 0}, {0, 0}}, &array_day_interval); + + std::shared_ptr array_month_interval; + arrow::ArrayFromVector( + arrow::month_interval(), {true, true, true, false}, {25, -25, -0, 0}, + &array_month_interval); + auto in_batch = arrow::RecordBatch::Make( + schema, num_records, {array0, array1, array_day_interval, array_month_interval}); auto out_float4 = MakeArrowArrayInt64({7, -7, 10, 0}, {true, true, true, false}); auto out_float8 = MakeArrowArrayInt64({7, -7, 10, 0}, {true, true, true, false}); @@ -2407,8 +2416,14 @@ TEST_F(TestProjector, TestIntCastFunction) { MakeArrowArrayFloat32({6.6f, -6.6f, 9.999999f, 0}, {true, true, true, false}); auto array1 = MakeArrowArrayFloat64({6.6, -6.6, 9.99999999999, 0}, {true, true, true, false}); - auto array2 = MakeArrowArrayInt32({25, -25, -0, 0}, {true, true, true, false}); - auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2}); + + std::shared_ptr array_month_interval; + arrow::ArrayFromVector( + arrow::month_interval(), {true, true, true, false}, {25, -25, -0, 0}, + &array_month_interval); + + auto in_batch = arrow::RecordBatch::Make(schema, num_records, + {array0, array1, array_month_interval}); auto out_float4 = MakeArrowArrayInt32({7, -7, 10, 0}, {true, true, true, false}); auto out_float8 = MakeArrowArrayInt32({7, -7, 10, 0}, {true, true, true, false}); @@ -2453,7 +2468,10 @@ TEST_F(TestProjector, TestCastNullableIntYearInterval) { // Last validity is false and the cast functions throw error when input is empty. Should // not be evaluated due to addition of NativeFunction::kCanReturnErrors - auto array0 = MakeArrowArrayInt32({12, -24, -0, 0}, {true, true, true, false}); + std::shared_ptr array0; + arrow::ArrayFromVector( + arrow::month_interval(), {true, true, true, false}, {12, -24, -0, 0}, &array0); + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0}); auto out_int32 = MakeArrowArrayInt32({1, -2, -0, 0}, {true, true, true, false}); @@ -3116,7 +3134,7 @@ TEST_F(TestProjector, TestCastBinaryBinary) { int num_records = 3; auto array0 = - MakeArrowArrayUtf8({"\\x41\\x42\\x43", "\\x41\\x42", ""}, {true, true, true}); + MakeArrowArrayBinary({"\\x41\\x42\\x43", "\\x41\\x42", ""}, {true, true, true}); auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0}); @@ -3577,7 +3595,6 @@ TEST_F(TestProjector, TestSqrtFloat64) { EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0)); } - TEST_F(TestProjector, TestExtendedFunctions) { auto in_field = field("in", arrow::int32()); auto schema = arrow::schema({in_field}); @@ -3677,5 +3694,68 @@ TEST_F(TestProjector, TestExtendedCFunctionThatNeedsContext) { ARROW_EXPECT_OK(projector->Evaluate(*in_batch, pool_, &outs)); EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0)); } +TEST_F(TestProjector, TestCacheHitForDifferentSchema) { + auto field0 = field("f0", arrow::utf8()); + auto field1 = field("f1", arrow::utf8()); + auto field2 = field("f2", arrow::int32()); + auto field3 = field("f3", arrow::int32()); + auto field4 = field("f4", arrow::int32()); + auto field5 = field("f5", arrow::int32()); + + auto schema_1 = arrow::schema({field0}); + + auto field0_lower = field("f0_lower", arrow::utf8()); + auto field_add = field("field_add", arrow::int32()); + + auto lower_expr = TreeExprBuilder::MakeExpression("lower", {field0}, field0_lower); + auto add_expr = TreeExprBuilder::MakeExpression("add", {field2, field3}, field_add); + auto configuration = TestConfiguration(); + + std::shared_ptr projector; + auto status = + Projector::Make(schema_1, {lower_expr, add_expr}, configuration, &projector); + ASSERT_OK(status); + EXPECT_FALSE(projector->GetBuiltFromCache()); + + auto lower_expr2 = TreeExprBuilder::MakeExpression("lower", {field1}, field0_lower); + auto add_expr2 = TreeExprBuilder::MakeExpression("add", {field4, field5}, field_add); + + std::shared_ptr projector_new; + status = Projector::Make(arrow::schema({field1}), {lower_expr2, add_expr2}, + configuration, &projector_new); + ASSERT_OK(status); + EXPECT_TRUE(projector_new->GetBuiltFromCache()); + + auto data_lower = MakeArrowArrayUtf8({"A", "B", "C", "D"}, {true, true, true, true}); + auto array0 = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, true}); + auto array1 = MakeArrowArrayInt32({1, 1, 1, 1}, {true, true, true, true}); + int num_records = 4; + auto in_batch = arrow::RecordBatch::Make(arrow::schema({field1, field4, field5}), + num_records, {data_lower, array0, array1}); + + arrow::ArrayVector outputs_new; + status = projector_new->Evaluate(*in_batch, pool_, &outputs_new); + EXPECT_TRUE(status.ok()); + + auto data_lower_expect = + MakeArrowArrayUtf8({"a", "b", "c", "d"}, {true, true, true, true}); + auto add_expect = MakeArrowArrayInt32({2, 3, 4, 5}, {true, true, true, true}); + + EXPECT_ARROW_ARRAY_EQUALS(data_lower_expect, outputs_new.at(0)); + EXPECT_ARROW_ARRAY_EQUALS(add_expect, outputs_new.at(1)); + + // different type like add(int64,int64) + + auto array0_int64 = MakeArrowArrayInt64({1, 2, 3, 4}, {true, true, true, true}); + auto array1_int64 = MakeArrowArrayInt64({1, 1, 1, 1}, {true, true, true, true}); + auto field4_int64 = arrow::field("f4", int64()); + auto field5_int64 = arrow::field("f5", int64()); + + in_batch = + arrow::RecordBatch::Make(arrow::schema({field1, field4_int64, field5_int64}), + num_records, {data_lower, array0_int64, array1_int64}); + status = projector_new->Evaluate(*in_batch, pool_, &outputs_new); + EXPECT_FALSE(status.ok()); +} } // namespace gandiva diff --git a/cpp/src/gandiva/tests/utf8_test.cc b/cpp/src/gandiva/tests/utf8_test.cc index 1e408f0e431..7d3e0013190 100644 --- a/cpp/src/gandiva/tests/utf8_test.cc +++ b/cpp/src/gandiva/tests/utf8_test.cc @@ -653,10 +653,10 @@ TEST_F(TestUtf8, TestConvertUtf8) { // Create a row-batch with some sample data int num_records = 3; - auto array_a = MakeArrowArrayUtf8({"ok-\xf8\x28" - "-a", - "all-valid", "ok-\xa0\xa1-valid"}, - {true, true, true}); + auto array_a = MakeArrowArrayBinary({"ok-\xf8\x28" + "-a", + "all-valid", "ok-\xa0\xa1-valid"}, + {true, true, true}); auto array_b = MakeArrowArrayUtf8({"ok-z(-a", "all-valid", "ok-zz-valid"}, {true, true, true});