From c3abdc0a1bff494fb1ae34570d8a51c9eb1eb87d Mon Sep 17 00:00:00 2001 From: Ted Themistokleous Date: Fri, 3 Oct 2025 14:16:05 +0000 Subject: [PATCH 1/3] Add quantization and tuning ops as part of model compile hash --- .../migraphx/migraphx_execution_provider.cc | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc index 7ab9ffdba3950..d2837512f0fea 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc @@ -1316,7 +1316,23 @@ Status MIGraphXExecutionProvider::Compile(const std::vector& input_shapes.push_back(tensor_shape->dim(j).dim_value()); } } - model_cache_file = model_cache_path_ / (mxr_filename_prefix + make_hash(input_shapes) + ".mxr"); + // capture flags outside of name/inputs that are used when models are compiled + // Each of these will change the final compiled model and need to be captured to ensure + // hash uses the quantization flags and modes + auto get_quant_and_tune_flags = [=](){ + std::vector data_out{}; + + data_out.push_back(static_cast(fp16_enable_)); + data_out.push_back(static_cast(fp8_enable_)); + data_out.push_back(static_cast(bf16_enable_)); + data_out.push_back(static_cast(int8_enable_)); + data_out.push_back(static_cast(mem_limit_)); + data_out.push_back(static_cast(exhaustive_tune_)); + + return data_out; + }; + + model_cache_file = model_cache_path_ / (mxr_filename_prefix + make_hash(input_shapes) + "-" + make_hash(get_quant_and_tune_flags()) +".mxr"); } // map parameter input name to index From de990f7087579d9db85831b0f0de6702f2a4800d Mon Sep 17 00:00:00 2001 From: Ted Themistokleous Date: Fri, 3 Oct 2025 22:06:02 +0000 Subject: [PATCH 2/3] Update hash from flags for when we need to recompile a model turns out we weren't passing exhaustive tune flags for the recompile in along with some other iflags like mem_limit --- .../migraphx/migraphx_execution_provider.cc | 18 ++++++++++++++++-- .../migraphx/migraphx_execution_provider.h | 1 + 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc index d2837512f0fea..584691f71328e 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc @@ -1401,7 +1401,7 @@ Status MIGraphXExecutionProvider::Compile(const std::vector& map_onnx_string_[context->node_name], options, t_, map_input_index_[context->node_name], &mgx_mu_, map_no_input_shape_[context->node_name], fp16_enable_, bf16_enable_, fp8_enable_, int8_enable_, int8_calibration_cache_available_, dynamic_range_map_, - model_cache_path_.string(), dump_model_ops_}; + model_cache_path_.string(), dump_model_ops_, exhaustive_tune_, mem_limit_}; *state = p.release(); return 0; }; @@ -1427,6 +1427,8 @@ Status MIGraphXExecutionProvider::Compile(const std::vector& bool fp8_enable = mgx_state->fp8_enable; bool int8_enable = mgx_state->int8_enable; bool int8_calibration_cache_available = mgx_state->int8_calibration_cache_available; + bool exhaustive_tune = mgx_state->exhaustive_tune; + size_t mem_limit = mgx_state->mem_limit; // mean no program at all, so need to get the input shape info // from input data @@ -1485,7 +1487,19 @@ Status MIGraphXExecutionProvider::Compile(const std::vector& std::filesystem::path model_cache_file; // empty cache path means the MXR caching is disabled - always compile if (!model_cache_path_.empty()) { - model_cache_file = mgx_state->model_cache_dir / (mxr_filename_prefix + make_hash(input_shapes) + ".mxr"); + auto get_quant_and_tune_flags = [=](){ + std::vector data_out{}; + + data_out.push_back(static_cast(fp16_enable)); + data_out.push_back(static_cast(fp8_enable)); + data_out.push_back(static_cast(bf16_enable)); + data_out.push_back(static_cast(int8_enable)); + data_out.push_back(static_cast(mem_limit)); + data_out.push_back(static_cast(exhaustive_tune)); + + return data_out; + }; + model_cache_file = mgx_state->model_cache_dir / (mxr_filename_prefix + make_hash(input_shapes) + "-" + make_hash(get_quant_and_tune_flags()) + ".mxr"); } if (!load_precompiled_model(prog, model_cache_file)) { LOGS_DEFAULT(VERBOSE) << "Input shape mismatch detected. Recompiling"; diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h index ea0c2f7d9a060..a027bb6fe42a1 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h @@ -56,6 +56,7 @@ struct MIGraphXFuncState { std::filesystem::path model_cache_dir; bool dump_model_ops = false; bool exhaustive_tune = false; + size_t mem_limit; }; // Logical device representation. From c7792d900c87be7435823395aaebf25670bad2ca Mon Sep 17 00:00:00 2001 From: Ted Themistokleous Date: Fri, 3 Oct 2025 22:06:43 +0000 Subject: [PATCH 3/3] lintrunner pass --- .../migraphx/migraphx_execution_provider.cc | 39 +++++++++---------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc index 584691f71328e..0070054936c18 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc @@ -808,13 +808,12 @@ std::unique_ptr MIGraphXExecutionProvider::GetSubGraph(const st if (output.second->Exists()) { auto name = output.second->Name(); if (std::find(graph_output_names.begin(), graph_output_names.end(), name) == graph_output_names.end()) { - // if graph is split we dont know if output is used so we need this, otherwise if the graph isn't split - // then we can safely assume this output is a dangling output from a node and to discard it as part of the - // final graph output - if(is_graph_split) - { - output_names.push_back(name); - } + // if graph is split we dont know if output is used so we need this, otherwise if the graph isn't split + // then we can safely assume this output is a dangling output from a node and to discard it as part of the + // final graph output + if (is_graph_split) { + output_names.push_back(name); + } } else { graph_out_names.insert(name); } @@ -1319,20 +1318,20 @@ Status MIGraphXExecutionProvider::Compile(const std::vector& // capture flags outside of name/inputs that are used when models are compiled // Each of these will change the final compiled model and need to be captured to ensure // hash uses the quantization flags and modes - auto get_quant_and_tune_flags = [=](){ - std::vector data_out{}; + auto get_quant_and_tune_flags = [=]() { + std::vector data_out{}; - data_out.push_back(static_cast(fp16_enable_)); - data_out.push_back(static_cast(fp8_enable_)); - data_out.push_back(static_cast(bf16_enable_)); - data_out.push_back(static_cast(int8_enable_)); - data_out.push_back(static_cast(mem_limit_)); - data_out.push_back(static_cast(exhaustive_tune_)); + data_out.push_back(static_cast(fp16_enable_)); + data_out.push_back(static_cast(fp8_enable_)); + data_out.push_back(static_cast(bf16_enable_)); + data_out.push_back(static_cast(int8_enable_)); + data_out.push_back(static_cast(mem_limit_)); + data_out.push_back(static_cast(exhaustive_tune_)); - return data_out; + return data_out; }; - model_cache_file = model_cache_path_ / (mxr_filename_prefix + make_hash(input_shapes) + "-" + make_hash(get_quant_and_tune_flags()) +".mxr"); + model_cache_file = model_cache_path_ / (mxr_filename_prefix + make_hash(input_shapes) + "-" + make_hash(get_quant_and_tune_flags()) + ".mxr"); } // map parameter input name to index @@ -1487,7 +1486,7 @@ Status MIGraphXExecutionProvider::Compile(const std::vector& std::filesystem::path model_cache_file; // empty cache path means the MXR caching is disabled - always compile if (!model_cache_path_.empty()) { - auto get_quant_and_tune_flags = [=](){ + auto get_quant_and_tune_flags = [=]() { std::vector data_out{}; data_out.push_back(static_cast(fp16_enable)); @@ -1498,8 +1497,8 @@ Status MIGraphXExecutionProvider::Compile(const std::vector& data_out.push_back(static_cast(exhaustive_tune)); return data_out; - }; - model_cache_file = mgx_state->model_cache_dir / (mxr_filename_prefix + make_hash(input_shapes) + "-" + make_hash(get_quant_and_tune_flags()) + ".mxr"); + }; + model_cache_file = mgx_state->model_cache_dir / (mxr_filename_prefix + make_hash(input_shapes) + "-" + make_hash(get_quant_and_tune_flags()) + ".mxr"); } if (!load_precompiled_model(prog, model_cache_file)) { LOGS_DEFAULT(VERBOSE) << "Input shape mismatch detected. Recompiling";