diff --git a/src/include/migraphx/module.hpp b/src/include/migraphx/module.hpp index d68b2683e65..04b2dea9221 100644 --- a/src/include/migraphx/module.hpp +++ b/src/include/migraphx/module.hpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -323,6 +323,12 @@ struct MIGRAPHX_EXPORT module void annotate(std::ostream& os, std::function a) const; std::vector get_sub_modules(bool shallow = false) const; + + /* Creates a new module with the same instructions but with different input parameter shapes. + Returns the new module by value without modifying the original. + */ + module with_static_shapes(const std::vector& input_shapes); + /* sorts the module in topological order aka reverse-post order (RPO) DFS order it takes last instruction or @return as the root and walks back the graph and moves inputs of the each instruction such that it appears before the instruction itself. diff --git a/src/include/migraphx/op/pointwise.hpp b/src/include/migraphx/op/pointwise.hpp index 51ca75ee92b..03f16ca7d01 100644 --- a/src/include/migraphx/op/pointwise.hpp +++ b/src/include/migraphx/op/pointwise.hpp @@ -61,7 +61,10 @@ struct pointwise MIGRAPHX_THROW("pointwise should have at least one input"); auto* pm = mods.front(); auto pnames = pm->get_parameter_names(); - check_shapes{inputs, *this}.has(pnames.size()).same_dims(); + check_shapes{inputs, *this, true}.has(pnames.size()).same_dims(); + + std::vector scalar_const_out_lens = + inputs.front().dynamic() ? std::vector{} : inputs.front().lens(); const auto rank = inputs.front().ndim(); const bool has_broadcasts = @@ -69,7 +72,7 @@ struct pointwise auto result = pm->compute_shapes( (rank > 1 and has_broadcasts) ? remove_broadcasts(inputs) : inputs, - {.name = name(), .strict_type = true, .scalar_const_out_lens = inputs.front().lens()}); + {.name = name(), .strict_type = true, .scalar_const_out_lens = scalar_const_out_lens}); if(result.size() == 1) return result.front(); return shape{result}; diff --git a/src/module.cpp b/src/module.cpp index 4838d241904..fa98636771c 100644 --- a/src/module.cpp +++ b/src/module.cpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -717,7 +718,8 @@ std::vector module::compute_shapes(const std::vector& inputs, ins->get_shape().type_string() + " but passed " + ins_shapes[ins].type_string()); } - if(options.strict_lens and ins->get_shape().lens() != ins_shapes[ins].lens()) + if(not ins->get_shape().dynamic() and options.strict_lens and + ins->get_shape().lens() != ins_shapes[ins].lens()) { MIGRAPHX_THROW(options.name + ": Mismatched lens: expected {" + to_string_range(ins->get_shape().lens()) + "} but passed {" + @@ -1466,6 +1468,78 @@ std::vector module::get_sub_modules(bool shallow) const return vec_modules; } +module module::with_static_shapes(const std::vector& input_shapes) +{ + // This routine creates a new module with the same instructions but with different input shapes. + // The sequence of instructions (operators and interconnectivity) is copied, but all input + // parameter shapes are replaced with new "input_shapes". + + // ensure input_shapes is the same length as the parameters. + auto param_names = this->get_parameter_names(); + assert(param_names.size() == input_shapes.size()); + + // Make a mapping from the parameter names to the new shapes. + std::unordered_map shape_map; + for(std::size_t i = 0; i < param_names.size(); ++i) + shape_map[param_names[i]] = input_shapes[i]; + + module new_mod; + + std::unordered_map ins_map; + + // First, create parameters with new shapes in new_mod and fill ins_map for params + for(auto ins : iterator_for(*this)) + { + if(ins->name() == "@param") + { + auto pname = any_cast(ins->get_operator()).parameter; + assert(shape_map.count(pname) > 0); + ins_map[ins] = new_mod.add_parameter(pname, shape_map.at(pname)); + } + } + + // Copy remaining instructions (except parameters) in order + for(auto ins : iterator_for(*this)) + { + if(ins->name() == "@param") + continue; + + // Gather new input refs for this instruction + std::vector new_args; + std::transform(ins->inputs().begin(), + ins->inputs().end(), + std::back_inserter(new_args), + [&](auto arg) { return ins_map.at(arg); }); + + // Gather new module argument refs if present + std::vector new_mod_args; + std::transform(ins->module_inputs().begin(), + ins->module_inputs().end(), + std::back_inserter(new_mod_args), + [&](auto modarg) { return modarg; }); + + instruction_ref new_ins; + if(ins->name() == "@literal") + { + new_ins = new_mod.add_literal(ins->get_literal()); + } + else if(ins->name() == "@return") + { + new_ins = new_mod.add_return(new_args); + } + else + { + if(new_mod_args.empty()) + new_ins = new_mod.add_instruction(ins->get_operator(), new_args); + else + new_ins = new_mod.add_instruction(ins->get_operator(), new_args, new_mod_args); + } + ins_map[ins] = new_ins; + } + + return new_mod; +} + module& module::sort() { if(this->begin() == this->end()) diff --git a/src/targets/gpu/compile_ops.cpp b/src/targets/gpu/compile_ops.cpp index 20ca81c3b1a..b984f5d4ee2 100644 --- a/src/targets/gpu/compile_ops.cpp +++ b/src/targets/gpu/compile_ops.cpp @@ -37,6 +37,7 @@ #include #include #include +#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { @@ -45,42 +46,8 @@ namespace gpu { MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_COMPILE_PARALLEL); MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_BENCHMARKING); -struct precompile_op -{ - operation op = op::identity{}; - std::size_t additional_args = 1; - bool ignore_modules = false; - std::optional output_shape = nullopt; - - template - static auto reflect(Self& self, F f) - { - return pack(f(self.op, "op"), - f(self.additional_args, "additional_args"), - f(self.ignore_modules, "ignore_modules"), - f(self.output_shape, "output_shape")); - } - - std::string name() const { return "gpu::precompile_op"; } - - shape compute_shape(std::vector inputs, const std::vector& mods) const - { - // Pop off additional args - inputs.resize(inputs.size() - additional_args); - if(output_shape.has_value()) - return output_shape.value(); - if(ignore_modules) - return op.compute_shape(inputs); - return op.compute_shape(inputs, mods); - } - - std::ptrdiff_t output_alias(const std::vector& shapes) const - { - return shapes.size() - 1; - } -}; - MIGRAPHX_REGISTER_OP(precompile_op); +MIGRAPHX_REGISTER_OP(dynamic_code_object_op); struct compiled_result { diff --git a/src/targets/gpu/include/migraphx/gpu/precompile_ops.hpp b/src/targets/gpu/include/migraphx/gpu/precompile_ops.hpp new file mode 100644 index 00000000000..7bec916b425 --- /dev/null +++ b/src/targets/gpu/include/migraphx/gpu/precompile_ops.hpp @@ -0,0 +1,193 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MIGRAPHX_GUARD_GPU_PRECOMPILE_OPS_HPP +#define MIGRAPHX_GUARD_GPU_PRECOMPILE_OPS_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct precompile_op +{ + operation op = op::identity{}; + std::size_t additional_args = 1; + bool ignore_modules = false; + std::optional output_shape = nullopt; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.op, "op"), + f(self.additional_args, "additional_args"), + f(self.ignore_modules, "ignore_modules"), + f(self.output_shape, "output_shape")); + } + + std::string name() const { return "gpu::precompile_op"; } + + shape compute_shape(std::vector inputs, const std::vector& mods) const + { + // Pop off additional args + inputs.resize(inputs.size() - additional_args); + if(output_shape.has_value()) + return output_shape.value(); + if(ignore_modules) + return op.compute_shape(inputs); + return op.compute_shape(inputs, mods); + } + + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +struct dynamic_code_object_op +{ + operation pre_op = precompile_op{}; + std::optional output_shape = nullopt; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.pre_op, "pre_op"), f(self.output_shape, "output_shape")); + } + + std::string name() const { return "gpu::dynamic_code_object_op"; } + + shape compute_shape(std::vector inputs, const std::vector& mods) const + { + return pre_op.compute_shape(inputs, mods); + } + + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } + argument compute(context& ctx, + const shape&, + const std::vector& args, + const std::vector& module_args, + std::function( + module_ref&, const std::unordered_map&)> run) const + { + auto static_args = std::vector{args.begin(), args.end()}; + auto output_arg = static_args.back(); + module static_mod; + if(not module_args.empty()) + { + // rewrite module without dynamic shapes + auto mod_args = std::vector{args.begin(), args.end() - 1}; + static_mod = module_args.front()->with_static_shapes(to_shapes(mod_args)); + static_mod.set_bypass(true); + + // compute output arg shape + if(output_arg.get_shape().dynamic()) + { + auto out_shapes = static_mod.compute_shapes(to_shapes(mod_args)); + auto rsp_shape = (out_shapes.size() > 1) ? shape{out_shapes} : out_shapes.front(); + static_args[static_args.size() - 1] = output_arg.reshape(rsp_shape); + } + } + else + { + if(output_arg.get_shape().dynamic()) + { + auto out_shape = pre_op.compute_shape(to_shapes(static_args)); + static_args[static_args.size() - 1] = output_arg.reshape(out_shape); + } + } + + auto temp_mod = module("temp_mod"); + std::vector args_ins; + std::vector idx(static_args.size()); + std::iota(std::begin(idx), std::end(idx), 0); + std::transform(static_args.begin(), + static_args.end(), + idx.begin(), + std::back_inserter(args_ins), + [&](const auto& arg, const auto& i) { + return temp_mod.add_parameter("temp_mod:x" + std::to_string(i), + arg.get_shape()); + }); + instruction_ref ins; + if(not module_args.empty()) + { + ins = temp_mod.add_instruction(pre_op, args_ins, {&static_mod}); + } + else + { + ins = temp_mod.add_instruction(pre_op, args_ins); + } + temp_mod.add_return({ins}); + + operation preop = any_cast(ins->get_operator()).op; + auto config = get_tuning_config(ctx, ins, preop, false); + value solution = value{}; + if(config.has_value()) + { + solution = config->solutions.front(); + } + auto compiled_op = compile(ctx, ins, preop, solution); + compiled_op.replace(temp_mod, ins); + run_passes(temp_mod, {dead_code_elimination{}}); + + // Finalize the module before execution + std::vector contexts = {migraphx::context(ctx)}; + temp_mod.finalize(contexts); + + // Build param_map based on ACTUAL parameters that exist + auto param_map = std::unordered_map{}; + for(auto i : idx) + { + param_map["temp_mod:x" + std::to_string(i)] = static_args[i]; + } + module_ref temp_mod_ref = &temp_mod; + + auto results = run(temp_mod_ref, param_map); + + if(results.size() > 1) + return results; + return results.front(); + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_PRECOMPILE_OPS_HPP diff --git a/src/targets/gpu/lowering.cpp b/src/targets/gpu/lowering.cpp index 0e24c3cc936..9f4d2b43790 100644 --- a/src/targets/gpu/lowering.cpp +++ b/src/targets/gpu/lowering.cpp @@ -182,6 +182,7 @@ struct miopen_apply else if(has_compiler_for(it->name())) { check_shape(s, insert_precompile_op(it)); + check_shape(s, insert_dynamic_code_object_op(it)); } else if(attrs.contains("target")) { @@ -240,6 +241,20 @@ struct miopen_apply ins->module_inputs()); } + instruction_ref insert_dynamic_code_object_op(instruction_ref ins) const + { + assert(ins->get_operator().name() == "gpu::precompile_op"); + + if(not ins->get_shape().dynamic()) + return ins; + + return mod->replace_instruction( + ins, + make_op("gpu::dynamic_code_object_op", {{"pre_op", to_value(ins->get_operator())}}), + ins->inputs(), + ins->module_inputs()); + } + instruction_ref insert_allocation(instruction_ref ins, const shape& s) const { return mod->insert_instruction(ins, make_op("allocate", {{"shape", to_value(s)}})); @@ -334,7 +349,8 @@ struct miopen_apply static bool use_miopen_pooling(instruction_ref ins) { if(enabled(MIGRAPHX_DISABLE_MIOPEN_POOLING{}) or - not contains({shape::float_type, shape::half_type}, ins->get_shape().type())) + not contains({shape::float_type, shape::half_type}, ins->get_shape().type()) or + ins->get_shape().dynamic()) return false; auto&& op = ins->get_operator(); auto op_val = op.to_value(); @@ -355,7 +371,10 @@ struct miopen_apply { apply_map.emplace("pooling", [=](instruction_ref ins) { if(not use_miopen_pooling(ins)) - return insert_precompile_op(ins); + { + auto preop = insert_precompile_op(ins); + return insert_dynamic_code_object_op(preop); + } #if MIGRAPHX_USE_MIOPEN auto output = insert_allocation(ins, ins->get_shape()); std::vector refs = ins->inputs(); @@ -363,7 +382,8 @@ struct miopen_apply refs.push_back(output); return mod->replace_instruction(ins, make_op("gpu::pooling", op.to_value()), refs); #else - return insert_precompile_op(ins); + auto preop = insert_precompile_op(ins); + return insert_dynamic_op(preop); #endif }); } diff --git a/test/gpu/dynamic_code_object_op.cpp b/test/gpu/dynamic_code_object_op.cpp new file mode 100644 index 00000000000..ec22ded6dae --- /dev/null +++ b/test/gpu/dynamic_code_object_op.cpp @@ -0,0 +1,73 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void run_lowering(migraphx::program& p, bool offload_copy = false) +{ + auto ctx = migraphx::gpu::context{}; + migraphx::run_passes(*p.get_main_module(), {migraphx::gpu::lowering{&ctx, offload_copy}}); +} + +TEST_CASE(dynamic_code_object_op) +{ + migraphx::shape s{migraphx::shape::float_type, {{1, 3}, {2, 4}, {6, 6}}}; + migraphx::program p1; + auto* mm = p1.get_main_module(); + auto a = mm->add_parameter("a", s); + auto b = mm->add_parameter("b", s); + + auto pw = add_pointwise(p1, "main:pointwise0", {a, b}, single_pointwise("add")); + auto pw_name = pw->name(); + auto pw_module_inputs = pw->module_inputs(); + + mm->add_return({pw}); + + run_lowering(p1); + + bool found = false; + for(auto ins : iterator_for(*p1.get_main_module())) + { + if(ins->name() == "gpu::dynamic_code_object_op") + { + found = true; + auto dyn_op = migraphx::any_cast(ins->get_operator()); + auto pre_op = migraphx::any_cast(dyn_op.pre_op); + EXPECT(pre_op.op.name() == pw_name); + EXPECT(ins->module_inputs() == pw_module_inputs); + } + } + EXPECT(found); +} + +int main(int argc, const char* argv[]) { test::run(argc, argv); } diff --git a/test/module_test.cpp b/test/module_test.cpp index 87ab9019e13..65b896db5fa 100644 --- a/test/module_test.cpp +++ b/test/module_test.cpp @@ -803,6 +803,26 @@ TEST_CASE(add_params) EXPECT(m1.get_parameter("x1") == map_ins[add]); } +TEST_CASE(with_static_shapes) +{ + auto create_module = [](const std::vector& input_shapes) { + migraphx::module m; + auto x = m.add_parameter("x", input_shapes[0]); + auto y = m.add_parameter("y", input_shapes[1]); + auto add = m.add_instruction(migraphx::make_op("add"), x, y); + auto reduce_mean = m.add_instruction(migraphx::make_op("reduce_mean", {{"axes", {1}}}), add); + m.add_return({reduce_mean}); + return m; + }; + auto dyn_shape = migraphx::shape{migraphx::shape::float_type, {{1,4}, {4,8}}}; + auto dyn_mod = create_module({dyn_shape, dyn_shape}); + + auto static_shape = migraphx::shape{migraphx::shape::float_type, {2, 5}}; + auto static_mod = create_module({static_shape, static_shape}); + + EXPECT(dyn_mod.with_static_shapes({static_shape, static_shape}).sort() == static_mod.sort()); +} + TEST_CASE(linear_graph_sort) { //