Skip to content
12 changes: 12 additions & 0 deletions onnxruntime/core/providers/openvino/backends/basic_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,13 @@ struct OnnxToOvNetworkBindings {
[&onnx_name](const auto& ov_parameter_info) { return ov_parameter_info.get_names().contains(onnx_name); });
bool matched_names = it != ov_parameters.end();

if (it == ov_parameters.end()) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer that we only do this if onnx_input_map.size() > std::distance(ov_parameters.begin(), ov_parameters.end()) and we check it after we handle session_context.enable_causallm.

LOGS_DEFAULT(WARNING) << log_tag << "The input '" << onnx_name
<< "' is not used due to OpenVINO optimization. "
"This may cause issues if the input is required.";
continue;
}

// For Stateful Model Compilation, the ONNX model includes KV cache (past/present) tensors.
// However, these tensors are internally converted to a stateful representation, which removes them.
// It's also possible that the onnx model does not contain tensors such as beam_idx, whereas our converted
Expand Down Expand Up @@ -110,6 +117,11 @@ struct OnnxToOvNetworkBindings {

info.SetFullyDynamic(has_fully_dynamic);
info.SetBoundedDynamic(has_bounded_dynamic);
} else {
auto shape_size = ov::shape_size(shape.get_shape());
if (0 == shape_size) {
has_dynamic_io_ = true;
}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you elaborate a bit more on this case? The shape came from the OV compiled model, and the shape is reported as static but has a zero dimension? Or maybe no dimension? Does that actually make it dynamic?

Copy link
Author

@sgbihu sgbihu Dec 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason is the result is 0 size tensor. The OV can't create 0 size buffer. We need set this as dynamic to avoid allocate the buffer.
image

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another way is we can disable this test case due to 0 size buffer.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer we don't have specific handling in i/o binding for it a case ov doesn't support, but if we keep it, just comment why it's there so it can be more easily evaluated if we want to change it in the future.

}

input_output_map.push_back(std::move(info));
Expand Down
10 changes: 9 additions & 1 deletion onnxruntime/core/providers/openvino/ov_versions/capability.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,16 @@ std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {
if (unsupported_nodes.empty()) {
std::vector<std::string> inputs;
std::vector<std::string> outputs;
auto input_nodes = graph_viewer_.GetInputs();
// Input is not a tensor, OV only handle a tensor input
for (auto& node : input_nodes) {
auto shape = node->Shape();
if (!shape) {
return result;
}
}
// Fill inputs with names
Iterable2String(inputs, graph_viewer_.GetInputs());
Iterable2String(inputs, input_nodes);

/* In scenarios, when there are no inputs or all inputs being initializers,
ConstantFolding optimization in onnxruntime pre-computes the value.*/
Expand Down
59 changes: 39 additions & 20 deletions onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,41 +35,22 @@ namespace openvino_ep {

// Ops which are supported only in models(as intermediate nodes) and not in unit tests
std::set<std::string> ops_supported_only_in_model = {
"Add",
"Cast",
"Celu",
"Concat",
"ConstantOfShape",
"DequantizeLinear",
"Dropout",
"Einsum",
"Exp",
"Expand",
"EyeLike",
"GatherElements",
"GatherND",
"GridSample",
"Identity",
"LayerNormalization",
"Loop",
"LSTM",
"NonMaxSuppression",
"NonZero",
"Not",
"OneHot",
"Pad",
"QuantizeLinear",
"RandomNormalLike",
"Range",
"ReduceMin",
"Resize",
"Round",
"Shape",
"Slice",
"Split",
"Tile",
"TopK",
"Trilu"};
"TopK"};

// Ops which are supported as functions (as composite ops)
std::set<std::string> ops_supported_as_function = {
Expand Down Expand Up @@ -269,6 +250,8 @@ void DataOps::populate_types_supported() {
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
supported_types_initializer_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
supported_types_initializer_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT32));
supported_types_initializer_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
supported_types_initializer_.insert(
Expand Down Expand Up @@ -317,6 +300,8 @@ void DataOps::populate_types_supported() {
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
supported_types_cpu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
supported_types_cpu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT32));
supported_types_cpu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
supported_types_cpu_.insert(
Expand Down Expand Up @@ -367,6 +352,7 @@ void DataOps::populate_op_mode_supported() {
no_dimension_supported_.push_back({"DynamicQuantizeLinear", V_2025_2, {"All"}});
no_dimension_supported_.push_back({"Equal", V_2022_1, {"CPU"}});
no_dimension_supported_.push_back({"Equal", V_2023_0, {"GPU"}});
no_dimension_supported_.push_back({"Exp", V_2020_4, {"CPU", "GPU"}});
no_dimension_supported_.push_back({"Expand", V_2023_3, {"CPU"}});
no_dimension_supported_.push_back({"Expand", V_2024_3, {"CPU", "GPU"}});
no_dimension_supported_.push_back({"Floor", V_2020_4, {"All"}});
Expand All @@ -382,6 +368,7 @@ void DataOps::populate_op_mode_supported() {
no_dimension_supported_.push_back({"Mul", V_2020_4, {"All"}});
no_dimension_supported_.push_back({"Neg", V_2023_0, {"CPU", "GPU"}});
no_dimension_supported_.push_back({"Pow", V_2023_0, {"CPU", "GPU"}});
no_dimension_supported_.push_back({"PRelu", V_2020_4, {"CPU", "GPU"}});
no_dimension_supported_.push_back({"QuantizeLinear", V_2021_4, {"All"}});
no_dimension_supported_.push_back({"Range", V_2021_2, {"All"}});
no_dimension_supported_.push_back({"ReduceMax", V_2021_4, {"All"}});
Expand Down Expand Up @@ -489,6 +476,38 @@ void DataOps::populate_op_mode_supported() {
}};
op_list_.insert({"Upsample", obj});
}
{
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2,
V_2024_3, V_2024_4, V_2024_5, V_2024_6, V_2025_0, V_2025_1, V_2025_2, V_2025_3, V_2025_4},
[this](const Node* node, const InitializedTensorSet&) {
auto& attributes = node->GetAttributes();
if (attributes.count("coordinate_transformation_mode") > 0) {
auto coordinate_transformation_mode =
attributes.at("coordinate_transformation_mode").s();
if (coordinate_transformation_mode == "tf_crop_and_resize" ||
coordinate_transformation_mode == "half_pixel_symmetric") {
return true;
}
}
if (attributes.count("antialias") > 0) {
auto antialias_mode =
attributes.at("antialias").i();
auto resize_mode = attributes.at("mode").s();
if (antialias_mode == 1 &&
(resize_mode == "linear" ||
resize_mode == "cubic")) {
return true;
}
}
if (attributes.count("exclude_outside") > 0) {
if (attributes.at("exclude_outside").i() == 1) {
return true;
}
}
return false;
}};
op_list_.insert({"Resize", obj});
}
}

bool DataOps::op_is_supported(std::string name, std::vector<SupportedOp>& op_list) {
Expand Down
74 changes: 71 additions & 3 deletions onnxruntime/test/contrib_ops/quantize_ops_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,46 @@ TEST(QuantizeLinearContribOpTest, QuantizeLinear_per_tensor_float_int8) {
127, -127,
127, -128,
127, -128});
std::unordered_set<std::string> excluded_providers;
// Disable Tensorrt EP due to error: node1_quantize_scale_node: out of bounds channel axis 1. Number of input dimensions is 1.
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
excluded_providers.insert(kTensorrtExecutionProvider);
// Disable OV EP due to different formulation for QuantizeLinear
excluded_providers.insert(kOpenVINOExecutionProvider);
test.ConfigExcludeEps(excluded_providers)
.RunWithConfig();
}

#ifdef USE_OPENVINO
TEST(QuantizeLinearContribOpTest, OVEPQuantizeLinear_per_tensor_float_int8) {
OpTester test("QuantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{16};
test.AddInput<float>("x", dims, {
0.f, 2.f, //
3.f, -3.f, // rounding half to even
2.9f, -2.9f, // low case
3.1f, -3.1f, // up case
254.f, -256.f, // critical point
255.f, -257.f, // critical point
256.f, -258.f, // critical point
1000.f, -1000.f // saturate case
});
test.AddInput<float>("y_scale", {}, {2.0f});
test.AddInput<int8_t>("y_zero_point", {}, {1});
test.AddOutput<int8_t>("y", dims,
{1, 2,
2, 0,
2, 0,
3, -1,
127, -127,
127, -128,
127, -128,
127, -128});
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
execution_providers.emplace_back(DefaultOpenVINOExecutionProvider());
test.ConfigEps(std::move(execution_providers))
.RunWithConfig();
}
#endif // USE_OPENVINO

// Test uint16 com.microsoft.QuantizeLinear (per tensor)
TEST(QuantizeLinearContribOpTest, QuantizeLinear_per_tensor_float_uint16) {
Expand All @@ -311,10 +348,41 @@ TEST(QuantizeLinearContribOpTest, QuantizeLinear_per_tensor_float_uint16) {
32769, 32765,
65535, 0,
65535, 0});

std::unordered_set<std::string> excluded_providers;
// Disable Tensorrt EP due to error: unsupported data type
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
excluded_providers.insert(kTensorrtExecutionProvider);
// Disable OV EP due to different formulation for QuantizeLinear
excluded_providers.insert(kOpenVINOExecutionProvider);
test.ConfigExcludeEps(excluded_providers)
.RunWithConfig();
}

#ifdef USE_OPENVINO
TEST(QuantizeLinearContribOpTest, OVEPQuantizeLinear_per_tensor_float_uint16) {
OpTester test("QuantizeLinear", 1, onnxruntime::kMSDomain);
std::vector<int64_t> dims{12};
test.AddInput<float>("x", dims, {
0.f, -128.f, 3.f, -3.f, // rounding half to even
2.9f, -2.9f, // round < .5
3.1f, -3.1f, // round > .5
65536.f, -65534.f, // critical point
70000.f, -70000.f // saturate case
});
test.AddInput<float>("scale", {}, {2.0f}, true);
test.AddInput<uint16_t>("zero_point", {}, {32767}, true);
test.AddOutput<uint16_t>("y", dims,
{32767, 32703,
32768, 32766,
32768, 32766,
32769, 32765,
65535, 0,
65535, 0});
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
execution_providers.emplace_back(DefaultOpenVINOExecutionProvider());
test.ConfigEps(std::move(execution_providers))
.RunWithConfig();
}
#endif // USE_OPENVINO

// Test int16 com.microsoft.QuantizeLinear (per tensor)
TEST(QuantizeLinearContribOpTest, QuantizeLinear_per_tensor_float_int16) {
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/test/providers/cpu/controlflow/loop_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1037,7 +1037,8 @@ TEST(Loop, IterationCountAsOutput) {
test.AddOutput<int64_t>("loop_var_0_final", {3, 1}, {0, 1, 2});

// Disable TensorRT on unsupported data type BOOL
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
// Disable OV EP due to ONNX partition create new domain and OV FE can't handle it
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider});
}

#if defined(USE_CUDA)
Expand Down
5 changes: 5 additions & 0 deletions onnxruntime/test/providers/cpu/tensor/cast_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ void TestCastOp(gsl::span<const SrcType> input,
excluded_provider_types.insert(kCudaExecutionProvider);
}

if (input.size() == 0) {
// The OpenVINO doesn't support 0 size input
excluded_provider_types.insert(kOpenVINOExecutionProvider);
}

if (cuda_only && (excluded_provider_types.count(kCudaExecutionProvider) > 0)) {
return;
}
Expand Down
Loading
Loading