From f6f7ef57c57b6421425d5099fe9f252dfbe77d5e Mon Sep 17 00:00:00 2001 From: menglcai Date: Fri, 2 Jan 2026 17:17:39 +0800 Subject: [PATCH 1/5] update --- src/onnx/broadcast_qdq.cpp | 36 +++++++++++++++++++ .../include/migraphx/onnx/broadcast_qdq.hpp | 7 ++++ src/onnx/parse_qlinearmatmul.cpp | 14 ++++++-- 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/src/onnx/broadcast_qdq.cpp b/src/onnx/broadcast_qdq.cpp index dbe6878a4cd..e7d950d84f8 100644 --- a/src/onnx/broadcast_qdq.cpp +++ b/src/onnx/broadcast_qdq.cpp @@ -61,6 +61,42 @@ instruction_ref bcast_qdq_instr(const std::string& op_name, return info.add_instruction(migraphx::make_op(op_name), x_in, bcast_scale, bcast_zero_pt); } +instruction_ref bcast_qdq_instr_matmul(const std::string& op_name, + instruction_ref x_in, + instruction_ref arg_fscale, + instruction_ref arg_z_pt, + const onnx_parser::node_info& info) +{ + auto in_lens = x_in->get_shape().lens(); + + // prep 1: broadcast scale. it can come as a scalar or a 1-D tensor. + instruction_ref bcast_scale; + if (arg_fscale->get_shape().elements() > 1) + { + auto axis = x_in->get_shape().lens().size() - arg_fscale->get_shape().lens().size(); + bcast_scale = info.add_instruction( + migraphx::make_op("broadcast", { {"axis", axis}, {"out_lens", in_lens} }), arg_fscale); + } + else + bcast_scale = info.add_instruction( + migraphx::make_op("multibroadcast", { {"out_lens", in_lens} }), arg_fscale); + + // prep 2: broadcast zero point. it can come as a scalar or a 1-D tensor. + instruction_ref bcast_zero_pt; + if (arg_z_pt->get_shape().elements() > 1) + { + auto axis = x_in->get_shape().lens().size() - arg_z_pt->get_shape().lens().size(); + bcast_zero_pt = info.add_instruction( + migraphx::make_op("broadcast", { {"axis", axis}, {"out_lens", in_lens} }), arg_z_pt); + } + else + bcast_zero_pt = info.add_instruction( + migraphx::make_op("multibroadcast", { {"out_lens", in_lens} }), arg_z_pt); + + // op_name is either quantizelinear or dequantizelinear: + return info.add_instruction(migraphx::make_op(op_name), x_in, bcast_scale, bcast_zero_pt); +} + // Multibroadcast a scaler.. instruction_ref bcast_scalar_instr(const migraphx::shape& shape_out, instruction_ref arg_in, diff --git a/src/onnx/include/migraphx/onnx/broadcast_qdq.hpp b/src/onnx/include/migraphx/onnx/broadcast_qdq.hpp index 04432b01d86..19c0ed1537f 100644 --- a/src/onnx/include/migraphx/onnx/broadcast_qdq.hpp +++ b/src/onnx/include/migraphx/onnx/broadcast_qdq.hpp @@ -44,6 +44,13 @@ instruction_ref bcast_qdq_instr(const std::string& op_name, instruction_ref arg_z_pt, const onnx_parser::node_info& info); + +instruction_ref bcast_qdq_instr_matmul(const std::string& op_name, + instruction_ref x_in, + instruction_ref arg_fscale, + instruction_ref arg_z_pt, + const onnx_parser::node_info& info); + // Multibroadcast a scaler.. instruction_ref bcast_scalar_instr(const migraphx::shape& shape_out, instruction_ref arg_in, diff --git a/src/onnx/parse_qlinearmatmul.cpp b/src/onnx/parse_qlinearmatmul.cpp index 1b430ab6fbd..78430afb891 100644 --- a/src/onnx/parse_qlinearmatmul.cpp +++ b/src/onnx/parse_qlinearmatmul.cpp @@ -138,9 +138,17 @@ struct parse_qlinearmatmul : op_parser not std::equal(lens_a.rbegin() + 2, lens_a.rend(), lens_b.rbegin() + 2, lens_b.rend())) MIGRAPHX_THROW("QLINEARMATMUL: mismatched input dimensions"); - if(migraphx::any_of({args[1], args[2], args[4], args[5]}, + if(migraphx::any_of({args[1], args[2]}, [](auto arg) { return not arg->get_shape().scalar(); })) MIGRAPHX_THROW("QLINEARMATMUL: unsupported row/column quantization"); + + const auto& in_scale_b = args[4]; + const auto& in_zero_pt_b = args[5]; + size_t dim_scale_b = in_scale_b->get_shape().lens().size(); + size_t dim_zero_pt_b = in_zero_pt_b->get_shape().lens().size(); + + if ((dim_scale_b > 1) || (dim_zero_pt_b > 1)) + MIGRAPHX_THROW("QLINEARMATMUL: unsupported row/column quantization"); } instruction_ref parse(const op_desc& /* opd */, @@ -154,13 +162,13 @@ struct parse_qlinearmatmul : op_parser const auto& in_a = args[0]; const auto& in_scale_a = args[1]; const auto& in_zero_pt_a = args[2]; - auto dquant_a = bcast_qdq_instr("dequantizelinear", in_a, in_scale_a, in_zero_pt_a, info); + auto dquant_a = bcast_qdq_instr_matmul("dequantizelinear", in_a, in_scale_a, in_zero_pt_a, info); // B const auto& in_b = args[3]; const auto& in_scale_b = args[4]; const auto& in_zero_pt_b = args[5]; - auto dquant_b = bcast_qdq_instr("dequantizelinear", in_b, in_scale_b, in_zero_pt_b, info); + auto dquant_b = bcast_qdq_instr_matmul("dequantizelinear", in_b, in_scale_b, in_zero_pt_b, info); bool is_a_prepended = false; bool is_b_appended = false; From df68934ec9ebd0c11dc55ce04af7d8893b6f0bb7 Mon Sep 17 00:00:00 2001 From: menglcai Date: Mon, 5 Jan 2026 10:21:28 +0800 Subject: [PATCH 2/5] update parse_qlinearconv --- src/onnx/parse_qlinearconv.cpp | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/onnx/parse_qlinearconv.cpp b/src/onnx/parse_qlinearconv.cpp index 26f2f7b9125..baf85566e04 100644 --- a/src/onnx/parse_qlinearconv.cpp +++ b/src/onnx/parse_qlinearconv.cpp @@ -229,7 +229,23 @@ struct parse_qlinearconv : op_parser // Biases, if any.. : is an optional argument. if(args.size() > 8) - conv_x_w = add_bias_to_conv(args[8], conv_x_w, info); + { + const auto& in_B = args[8]; + auto b_sh = in_B->get_shape(); + + auto bcast_scale_x = info.add_instruction( + migraphx::make_op("multibroadcast", { {"out_lens", in_scale_w->get_shape().lens()} }), + in_scale_x); + + auto bias_scale = info.add_instruction(migraphx::make_op("mul"), bcast_scale_x, in_scale_w); + auto zero_lit = info.add_literal(migraphx::literal{ migraphx::shape{migraphx::shape::int32_type}, {0} }); + auto bias_zp = info.add_instruction( + migraphx::make_op("multibroadcast", { {"out_lens", b_sh.lens()} }), zero_lit); + auto dquant_bias = info.add_instruction( + migraphx::make_op("dequantizelinear"), args[8], bias_scale, bias_zp); + + conv_x_w = add_bias_to_conv(dquant_bias, conv_x_w, info); + } return bcast_qdq_instr("quantizelinear", conv_x_w, in_scale_y, in_zero_pt_y, info); } From b723b2bf447856a4bd4e9e37c6e556a7f5d2ee9e Mon Sep 17 00:00:00 2001 From: menglcai Date: Mon, 5 Jan 2026 15:17:01 +0800 Subject: [PATCH 3/5] update code style --- src/onnx/parse_qlinearconv.cpp | 4 ++-- src/onnx/parse_qlinearmatmul.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/onnx/parse_qlinearconv.cpp b/src/onnx/parse_qlinearconv.cpp index baf85566e04..eb23537e931 100644 --- a/src/onnx/parse_qlinearconv.cpp +++ b/src/onnx/parse_qlinearconv.cpp @@ -230,8 +230,8 @@ struct parse_qlinearconv : op_parser // Biases, if any.. : is an optional argument. if(args.size() > 8) { - const auto& in_B = args[8]; - auto b_sh = in_B->get_shape(); + const auto& in_b = args[8]; + auto b_sh = in_b->get_shape(); auto bcast_scale_x = info.add_instruction( migraphx::make_op("multibroadcast", { {"out_lens", in_scale_w->get_shape().lens()} }), diff --git a/src/onnx/parse_qlinearmatmul.cpp b/src/onnx/parse_qlinearmatmul.cpp index 78430afb891..aa8735aeb3f 100644 --- a/src/onnx/parse_qlinearmatmul.cpp +++ b/src/onnx/parse_qlinearmatmul.cpp @@ -147,7 +147,7 @@ struct parse_qlinearmatmul : op_parser size_t dim_scale_b = in_scale_b->get_shape().lens().size(); size_t dim_zero_pt_b = in_zero_pt_b->get_shape().lens().size(); - if ((dim_scale_b > 1) || (dim_zero_pt_b > 1)) + if ((dim_scale_b > 1) or (dim_zero_pt_b > 1)) MIGRAPHX_THROW("QLINEARMATMUL: unsupported row/column quantization"); } From 0ed5da65ec4ed89e3e3f0f0d0aa6c1f2e33d944c Mon Sep 17 00:00:00 2001 From: menglcai Date: Mon, 5 Jan 2026 15:33:05 +0800 Subject: [PATCH 4/5] update code style --- src/onnx/broadcast_qdq.cpp | 16 ++++++++-------- src/onnx/include/migraphx/onnx/broadcast_qdq.hpp | 1 - src/onnx/parse_qlinearconv.cpp | 12 +++++++----- src/onnx/parse_qlinearmatmul.cpp | 8 +++++--- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/onnx/broadcast_qdq.cpp b/src/onnx/broadcast_qdq.cpp index e7d950d84f8..09853e78a54 100644 --- a/src/onnx/broadcast_qdq.cpp +++ b/src/onnx/broadcast_qdq.cpp @@ -71,27 +71,27 @@ instruction_ref bcast_qdq_instr_matmul(const std::string& op_name, // prep 1: broadcast scale. it can come as a scalar or a 1-D tensor. instruction_ref bcast_scale; - if (arg_fscale->get_shape().elements() > 1) + if(arg_fscale->get_shape().elements() > 1) { - auto axis = x_in->get_shape().lens().size() - arg_fscale->get_shape().lens().size(); + auto axis = x_in->get_shape().lens().size() - arg_fscale->get_shape().lens().size(); bcast_scale = info.add_instruction( - migraphx::make_op("broadcast", { {"axis", axis}, {"out_lens", in_lens} }), arg_fscale); + migraphx::make_op("broadcast", {{"axis", axis}, {"out_lens", in_lens}}), arg_fscale); } else bcast_scale = info.add_instruction( - migraphx::make_op("multibroadcast", { {"out_lens", in_lens} }), arg_fscale); + migraphx::make_op("multibroadcast", {{"out_lens", in_lens}}), arg_fscale); // prep 2: broadcast zero point. it can come as a scalar or a 1-D tensor. instruction_ref bcast_zero_pt; - if (arg_z_pt->get_shape().elements() > 1) + if(arg_z_pt->get_shape().elements() > 1) { - auto axis = x_in->get_shape().lens().size() - arg_z_pt->get_shape().lens().size(); + auto axis = x_in->get_shape().lens().size() - arg_z_pt->get_shape().lens().size(); bcast_zero_pt = info.add_instruction( - migraphx::make_op("broadcast", { {"axis", axis}, {"out_lens", in_lens} }), arg_z_pt); + migraphx::make_op("broadcast", {{"axis", axis}, {"out_lens", in_lens}}), arg_z_pt); } else bcast_zero_pt = info.add_instruction( - migraphx::make_op("multibroadcast", { {"out_lens", in_lens} }), arg_z_pt); + migraphx::make_op("multibroadcast", {{"out_lens", in_lens}}), arg_z_pt); // op_name is either quantizelinear or dequantizelinear: return info.add_instruction(migraphx::make_op(op_name), x_in, bcast_scale, bcast_zero_pt); diff --git a/src/onnx/include/migraphx/onnx/broadcast_qdq.hpp b/src/onnx/include/migraphx/onnx/broadcast_qdq.hpp index 19c0ed1537f..184aa143c39 100644 --- a/src/onnx/include/migraphx/onnx/broadcast_qdq.hpp +++ b/src/onnx/include/migraphx/onnx/broadcast_qdq.hpp @@ -44,7 +44,6 @@ instruction_ref bcast_qdq_instr(const std::string& op_name, instruction_ref arg_z_pt, const onnx_parser::node_info& info); - instruction_ref bcast_qdq_instr_matmul(const std::string& op_name, instruction_ref x_in, instruction_ref arg_fscale, diff --git a/src/onnx/parse_qlinearconv.cpp b/src/onnx/parse_qlinearconv.cpp index eb23537e931..e4e082bb5b0 100644 --- a/src/onnx/parse_qlinearconv.cpp +++ b/src/onnx/parse_qlinearconv.cpp @@ -231,16 +231,18 @@ struct parse_qlinearconv : op_parser if(args.size() > 8) { const auto& in_b = args[8]; - auto b_sh = in_b->get_shape(); + auto b_sh = in_b->get_shape(); auto bcast_scale_x = info.add_instruction( - migraphx::make_op("multibroadcast", { {"out_lens", in_scale_w->get_shape().lens()} }), + migraphx::make_op("multibroadcast", {{"out_lens", in_scale_w->get_shape().lens()}}), in_scale_x); - auto bias_scale = info.add_instruction(migraphx::make_op("mul"), bcast_scale_x, in_scale_w); - auto zero_lit = info.add_literal(migraphx::literal{ migraphx::shape{migraphx::shape::int32_type}, {0} }); + auto bias_scale = + info.add_instruction(migraphx::make_op("mul"), bcast_scale_x, in_scale_w); + auto zero_lit = info.add_literal( + migraphx::literal{migraphx::shape{migraphx::shape::int32_type}, {0}}); auto bias_zp = info.add_instruction( - migraphx::make_op("multibroadcast", { {"out_lens", b_sh.lens()} }), zero_lit); + migraphx::make_op("multibroadcast", {{"out_lens", b_sh.lens()}}), zero_lit); auto dquant_bias = info.add_instruction( migraphx::make_op("dequantizelinear"), args[8], bias_scale, bias_zp); diff --git a/src/onnx/parse_qlinearmatmul.cpp b/src/onnx/parse_qlinearmatmul.cpp index aa8735aeb3f..37547cafbf5 100644 --- a/src/onnx/parse_qlinearmatmul.cpp +++ b/src/onnx/parse_qlinearmatmul.cpp @@ -147,7 +147,7 @@ struct parse_qlinearmatmul : op_parser size_t dim_scale_b = in_scale_b->get_shape().lens().size(); size_t dim_zero_pt_b = in_zero_pt_b->get_shape().lens().size(); - if ((dim_scale_b > 1) or (dim_zero_pt_b > 1)) + if((dim_scale_b > 1) or (dim_zero_pt_b > 1)) MIGRAPHX_THROW("QLINEARMATMUL: unsupported row/column quantization"); } @@ -162,13 +162,15 @@ struct parse_qlinearmatmul : op_parser const auto& in_a = args[0]; const auto& in_scale_a = args[1]; const auto& in_zero_pt_a = args[2]; - auto dquant_a = bcast_qdq_instr_matmul("dequantizelinear", in_a, in_scale_a, in_zero_pt_a, info); + auto dquant_a = + bcast_qdq_instr_matmul("dequantizelinear", in_a, in_scale_a, in_zero_pt_a, info); // B const auto& in_b = args[3]; const auto& in_scale_b = args[4]; const auto& in_zero_pt_b = args[5]; - auto dquant_b = bcast_qdq_instr_matmul("dequantizelinear", in_b, in_scale_b, in_zero_pt_b, info); + auto dquant_b = + bcast_qdq_instr_matmul("dequantizelinear", in_b, in_scale_b, in_zero_pt_b, info); bool is_a_prepended = false; bool is_b_appended = false; From 31274c92b32dbee88b6c4c308844e264b2c0c0c4 Mon Sep 17 00:00:00 2001 From: menglcai Date: Mon, 5 Jan 2026 15:43:53 +0800 Subject: [PATCH 5/5] update license --- src/onnx/broadcast_qdq.cpp | 2 +- src/onnx/include/migraphx/onnx/broadcast_qdq.hpp | 2 +- src/onnx/parse_qlinearconv.cpp | 2 +- src/onnx/parse_qlinearmatmul.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/onnx/broadcast_qdq.cpp b/src/onnx/broadcast_qdq.cpp index 09853e78a54..9469894d200 100644 --- a/src/onnx/broadcast_qdq.cpp +++ b/src/onnx/broadcast_qdq.cpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/src/onnx/include/migraphx/onnx/broadcast_qdq.hpp b/src/onnx/include/migraphx/onnx/broadcast_qdq.hpp index 184aa143c39..b1362b6ad4b 100644 --- a/src/onnx/include/migraphx/onnx/broadcast_qdq.hpp +++ b/src/onnx/include/migraphx/onnx/broadcast_qdq.hpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/src/onnx/parse_qlinearconv.cpp b/src/onnx/parse_qlinearconv.cpp index e4e082bb5b0..50a06a55d2c 100644 --- a/src/onnx/parse_qlinearconv.cpp +++ b/src/onnx/parse_qlinearconv.cpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/src/onnx/parse_qlinearmatmul.cpp b/src/onnx/parse_qlinearmatmul.cpp index 37547cafbf5..4d3a1670749 100644 --- a/src/onnx/parse_qlinearmatmul.cpp +++ b/src/onnx/parse_qlinearmatmul.cpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal