Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/include/migraphx/module.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -323,6 +323,12 @@ struct MIGRAPHX_EXPORT module
void annotate(std::ostream& os, std::function<void(instruction_ref)> a) const;

std::vector<module_ref> get_sub_modules(bool shallow = false) const;

/* Creates a new module with the same instructions but with different input parameter shapes.
Returns the new module by value without modifying the original.
*/
module with_static_shapes(const std::vector<shape>& input_shapes);

/* sorts the module in topological order aka reverse-post order (RPO) DFS order
it takes last instruction or @return as the root and walks back the graph and moves inputs
of the each instruction such that it appears before the instruction itself.
Expand Down
7 changes: 5 additions & 2 deletions src/include/migraphx/op/pointwise.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,18 @@ struct pointwise
MIGRAPHX_THROW("pointwise should have at least one input");
auto* pm = mods.front();
auto pnames = pm->get_parameter_names();
check_shapes{inputs, *this}.has(pnames.size()).same_dims();
check_shapes{inputs, *this, true}.has(pnames.size()).same_dims();

std::vector<std::size_t> scalar_const_out_lens =
inputs.front().dynamic() ? std::vector<std::size_t>{} : inputs.front().lens();

const auto rank = inputs.front().ndim();
const bool has_broadcasts =
std::any_of(inputs.begin(), inputs.end(), [](auto s) { return s.broadcasted(); });

auto result = pm->compute_shapes(
(rank > 1 and has_broadcasts) ? remove_broadcasts(inputs) : inputs,
{.name = name(), .strict_type = true, .scalar_const_out_lens = inputs.front().lens()});
{.name = name(), .strict_type = true, .scalar_const_out_lens = scalar_const_out_lens});
if(result.size() == 1)
return result.front();
return shape{result};
Expand Down
78 changes: 76 additions & 2 deletions src/module.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand All @@ -25,6 +25,7 @@
#include <migraphx/algorithm.hpp>
#include <migraphx/module.hpp>
#include <migraphx/bit_signal.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/target.hpp>
Expand Down Expand Up @@ -717,7 +718,8 @@ std::vector<shape> module::compute_shapes(const std::vector<shape>& inputs,
ins->get_shape().type_string() + " but passed " +
ins_shapes[ins].type_string());
}
if(options.strict_lens and ins->get_shape().lens() != ins_shapes[ins].lens())
if(not ins->get_shape().dynamic() and options.strict_lens and
ins->get_shape().lens() != ins_shapes[ins].lens())
{
MIGRAPHX_THROW(options.name + ": Mismatched lens: expected {" +
to_string_range(ins->get_shape().lens()) + "} but passed {" +
Expand Down Expand Up @@ -1466,6 +1468,78 @@ std::vector<module_ref> module::get_sub_modules(bool shallow) const
return vec_modules;
}

module module::with_static_shapes(const std::vector<shape>& input_shapes)
{
// This routine creates a new module with the same instructions but with different input shapes.
// The sequence of instructions (operators and interconnectivity) is copied, but all input
// parameter shapes are replaced with new "input_shapes".

// ensure input_shapes is the same length as the parameters.
auto param_names = this->get_parameter_names();
assert(param_names.size() == input_shapes.size());

// Make a mapping from the parameter names to the new shapes.
std::unordered_map<std::string, shape> shape_map;
for(std::size_t i = 0; i < param_names.size(); ++i)
shape_map[param_names[i]] = input_shapes[i];

module new_mod;

std::unordered_map<instruction_ref, instruction_ref> ins_map;

// First, create parameters with new shapes in new_mod and fill ins_map for params
for(auto ins : iterator_for(*this))
{
if(ins->name() == "@param")
{
auto pname = any_cast<builtin::param>(ins->get_operator()).parameter;
assert(shape_map.count(pname) > 0);
ins_map[ins] = new_mod.add_parameter(pname, shape_map.at(pname));
}
}

// Copy remaining instructions (except parameters) in order
for(auto ins : iterator_for(*this))
{
if(ins->name() == "@param")
continue;

// Gather new input refs for this instruction
std::vector<instruction_ref> new_args;
std::transform(ins->inputs().begin(),
ins->inputs().end(),
std::back_inserter(new_args),
[&](auto arg) { return ins_map.at(arg); });

// Gather new module argument refs if present
std::vector<module_ref> new_mod_args;
std::transform(ins->module_inputs().begin(),
ins->module_inputs().end(),
std::back_inserter(new_mod_args),
[&](auto modarg) { return modarg; });

instruction_ref new_ins;
if(ins->name() == "@literal")
{
new_ins = new_mod.add_literal(ins->get_literal());
}
else if(ins->name() == "@return")
{
new_ins = new_mod.add_return(new_args);
}
else
{
if(new_mod_args.empty())
new_ins = new_mod.add_instruction(ins->get_operator(), new_args);
else
new_ins = new_mod.add_instruction(ins->get_operator(), new_args, new_mod_args);
}
ins_map[ins] = new_ins;
}

return new_mod;
}

module& module::sort()
{
if(this->begin() == this->end())
Expand Down
37 changes: 2 additions & 35 deletions src/targets/gpu/compile_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include <migraphx/gpu/compile_ops.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/time_op.hpp>
#include <migraphx/gpu/precompile_ops.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
Expand All @@ -45,42 +46,8 @@ namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_COMPILE_PARALLEL);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_BENCHMARKING);

struct precompile_op
{
operation op = op::identity{};
std::size_t additional_args = 1;
bool ignore_modules = false;
std::optional<shape> output_shape = nullopt;

template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.op, "op"),
f(self.additional_args, "additional_args"),
f(self.ignore_modules, "ignore_modules"),
f(self.output_shape, "output_shape"));
}

std::string name() const { return "gpu::precompile_op"; }

shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const
{
// Pop off additional args
inputs.resize(inputs.size() - additional_args);
if(output_shape.has_value())
return output_shape.value();
if(ignore_modules)
return op.compute_shape(inputs);
return op.compute_shape(inputs, mods);
}

std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};

MIGRAPHX_REGISTER_OP(precompile_op);
MIGRAPHX_REGISTER_OP(dynamic_code_object_op);

struct compiled_result
{
Expand Down
193 changes: 193 additions & 0 deletions src/targets/gpu/include/migraphx/gpu/precompile_ops.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

#ifndef MIGRAPHX_GUARD_GPU_PRECOMPILE_OPS_HPP
#define MIGRAPHX_GUARD_GPU_PRECOMPILE_OPS_HPP

#include <migraphx/gpu/config.hpp>
#include <string>
#include <migraphx/operation.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/optional.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/module.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/gpu/context.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {

struct precompile_op
{
operation op = op::identity{};
std::size_t additional_args = 1;
bool ignore_modules = false;
std::optional<shape> output_shape = nullopt;

template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.op, "op"),
f(self.additional_args, "additional_args"),
f(self.ignore_modules, "ignore_modules"),
f(self.output_shape, "output_shape"));
}

std::string name() const { return "gpu::precompile_op"; }

shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const
{
// Pop off additional args
inputs.resize(inputs.size() - additional_args);
if(output_shape.has_value())
return output_shape.value();
if(ignore_modules)
return op.compute_shape(inputs);
return op.compute_shape(inputs, mods);
}

std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};

struct dynamic_code_object_op
{
operation pre_op = precompile_op{};
std::optional<shape> output_shape = nullopt;

template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.pre_op, "pre_op"), f(self.output_shape, "output_shape"));
}

std::string name() const { return "gpu::dynamic_code_object_op"; }

shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const

Check warning on line 93 in src/targets/gpu/include/migraphx/gpu/precompile_ops.hpp

View workflow job for this annotation

GitHub Actions / tidy

the parameter 'inputs' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param,-warnings-as-errors]

Check warning on line 93 in src/targets/gpu/include/migraphx/gpu/precompile_ops.hpp

View workflow job for this annotation

GitHub Actions / tidy

the parameter 'inputs' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param,-warnings-as-errors]
{
return pre_op.compute_shape(inputs, mods);
}

std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
argument compute(context& ctx,
const shape&,
const std::vector<argument>& args,
const std::vector<module_ref>& module_args,
std::function<std::vector<argument>(
module_ref&, const std::unordered_map<std::string, argument>&)> run) const

Check warning on line 107 in src/targets/gpu/include/migraphx/gpu/precompile_ops.hpp

View workflow job for this annotation

GitHub Actions / tidy

the parameter 'run' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param,-warnings-as-errors]

Check warning on line 107 in src/targets/gpu/include/migraphx/gpu/precompile_ops.hpp

View workflow job for this annotation

GitHub Actions / tidy

the parameter 'run' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param,-warnings-as-errors]
{
auto static_args = std::vector<argument>{args.begin(), args.end()};
auto output_arg = static_args.back();
module static_mod;
if(not module_args.empty())
{
// rewrite module without dynamic shapes
auto mod_args = std::vector<argument>{args.begin(), args.end() - 1};
static_mod = module_args.front()->with_static_shapes(to_shapes(mod_args));
static_mod.set_bypass(true);

// compute output arg shape
if(output_arg.get_shape().dynamic())
{
auto out_shapes = static_mod.compute_shapes(to_shapes(mod_args));
auto rsp_shape = (out_shapes.size() > 1) ? shape{out_shapes} : out_shapes.front();
static_args[static_args.size() - 1] = output_arg.reshape(rsp_shape);
}
}
else
{
if(output_arg.get_shape().dynamic())
{
auto out_shape = pre_op.compute_shape(to_shapes(static_args));
static_args[static_args.size() - 1] = output_arg.reshape(out_shape);
}
}

auto temp_mod = module("temp_mod");
std::vector<instruction_ref> args_ins;
std::vector<size_t> idx(static_args.size());
std::iota(std::begin(idx), std::end(idx), 0);
std::transform(static_args.begin(),
static_args.end(),
idx.begin(),
std::back_inserter(args_ins),
[&](const auto& arg, const auto& i) {
return temp_mod.add_parameter("temp_mod:x" + std::to_string(i),
arg.get_shape());
});
instruction_ref ins;
if(not module_args.empty())
{
ins = temp_mod.add_instruction(pre_op, args_ins, {&static_mod});
}
else
{
ins = temp_mod.add_instruction(pre_op, args_ins);
}
temp_mod.add_return({ins});

operation preop = any_cast<precompile_op>(ins->get_operator()).op;
auto config = get_tuning_config(ctx, ins, preop, false);
value solution = value{};
if(config.has_value())
{
solution = config->solutions.front();
}
auto compiled_op = compile(ctx, ins, preop, solution);
compiled_op.replace(temp_mod, ins);
run_passes(temp_mod, {dead_code_elimination{}});

// Finalize the module before execution
std::vector<migraphx::context> contexts = {migraphx::context(ctx)};
temp_mod.finalize(contexts);

// Build param_map based on ACTUAL parameters that exist
auto param_map = std::unordered_map<std::string, argument>{};
for(auto i : idx)
{
param_map["temp_mod:x" + std::to_string(i)] = static_args[i];
}
module_ref temp_mod_ref = &temp_mod;

auto results = run(temp_mod_ref, param_map);

if(results.size() > 1)
return results;
return results.front();
}
};

} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_PRECOMPILE_OPS_HPP
Loading
Loading