diff --git a/src/targets/gpu/CMakeLists.txt b/src/targets/gpu/CMakeLists.txt index 639812d88d0..abea1bee8bd 100644 --- a/src/targets/gpu/CMakeLists.txt +++ b/src/targets/gpu/CMakeLists.txt @@ -23,6 +23,11 @@ # #################################################################################### find_package(hip REQUIRED) + +if(NOT WIN32) + find_package(hsa-runtime64 REQUIRED CONFIG) + message(STATUS "MIGraphX is using HSA runtime for chiplet query") +endif() if(NOT GPU_TARGETS) set(fatal_msg "HIP package is broken and has no GPU_TARGETS. Please pass GPU_TARGETS to cmake.") if(NOT WIN32) @@ -164,6 +169,7 @@ add_library(migraphx_gpu hip.cpp hipblaslt.cpp hip_gemm_impl.cpp + hsa_chiplet.cpp kernel.cpp lowering.cpp logsoftmax.cpp @@ -397,6 +403,11 @@ else() target_link_libraries(migraphx_gpu PRIVATE migraphx_device) endif() target_link_libraries(migraphx_gpu PRIVATE migraphx_kernels) + +# Link HSA runtime for chiplet query (Linux only) +if(NOT WIN32) + target_link_libraries(migraphx_gpu PRIVATE hsa-runtime64::hsa-runtime64) +endif() if(MIGRAPHX_USE_COMPOSABLEKERNEL) target_link_libraries(migraphx_gpu PRIVATE composable_kernel::jit_library) target_compile_definitions(migraphx_gpu PRIVATE MIGRAPHX_USE_COMPOSABLEKERNEL=1) diff --git a/src/targets/gpu/hsa_chiplet.cpp b/src/targets/gpu/hsa_chiplet.cpp new file mode 100644 index 00000000000..13b4836bad2 --- /dev/null +++ b/src/targets/gpu/hsa_chiplet.cpp @@ -0,0 +1,171 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include + +#ifndef _WIN32 +#include +#include +#endif + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +#ifndef _WIN32 + +namespace { + +/// Convert HSA status code to a human-readable string +std::string hsa_error_string(hsa_status_t status) +{ + const char* msg = nullptr; + if(hsa_status_string(status, &msg) == HSA_STATUS_SUCCESS and msg != nullptr) + return msg; + return "Unknown HSA error (code " + std::to_string(static_cast(status)) + ")"; +} + +/// RAII wrapper for HSA runtime initialization. +/// Calls hsa_init() in constructor and hsa_shut_down() in destructor. +struct hsa_guard +{ + hsa_status_t init_status; + bool initialized; + + hsa_guard() : init_status(hsa_init()), initialized(init_status == HSA_STATUS_SUCCESS) {} + + ~hsa_guard() + { + if(initialized) + hsa_shut_down(); + } + + hsa_guard(const hsa_guard&) = delete; + hsa_guard& operator=(const hsa_guard&) = delete; + + explicit operator bool() const { return initialized; } + + hsa_status_t status() const { return init_status; } +}; + +/// Query chiplet counts for all GPU devices and cache the results. +/// This is called once and the results are stored in a static vector. +std::vector query_all_chiplet_counts() +{ + std::vector chiplet_counts; + + hsa_guard guard; + if(not guard) + { + MIGRAPHX_THROW("HSA runtime initialization failed: " + hsa_error_string(guard.status()) + + ". GPU is not accessible."); + } + + // Structure to collect chiplet counts for all GPUs + struct agent_data + { + std::vector* counts; + }; + + agent_data data{&chiplet_counts}; + + // Callback function for hsa_iterate_agents. + // HSA agents are enumerated in the same order as HIP device IDs for GPU agents. + // Reference: ROCm documentation on device enumeration consistency between HIP and HSA. + auto agent_callback = [](hsa_agent_t agent, void* user_data) -> hsa_status_t { + auto* agent_data_ptr = static_cast(user_data); + + hsa_device_type_t device_type; + hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type); + if(err != HSA_STATUS_SUCCESS) + return err; + + if(device_type == HSA_DEVICE_TYPE_GPU) + { + uint32_t num_chiplets = 1; + err = hsa_agent_get_info( + agent, static_cast(HSA_AMD_AGENT_INFO_NUM_XCC), &num_chiplets); + // If the query fails (e.g., older ROCm or unsupported GPU), use default of 1. + // This is expected on older ROCm versions, so no warning needed. + if(err != HSA_STATUS_SUCCESS) + num_chiplets = 1; + + agent_data_ptr->counts->push_back(static_cast(num_chiplets)); + } + + return HSA_STATUS_SUCCESS; + }; + + hsa_status_t status = hsa_iterate_agents(agent_callback, &data); + if(status != HSA_STATUS_SUCCESS and status != HSA_STATUS_INFO_BREAK) + { + MIGRAPHX_THROW("HSA agent enumeration failed: " + hsa_error_string(status) + + ". Unable to query GPU devices."); + } + + return chiplet_counts; +} + +/// Get cached chiplet counts. Thread-safe, queries HSA only once. +const std::vector& get_cached_chiplet_counts() +{ + static std::once_flag flag; + static std::vector counts; + + std::call_once(flag, []() { counts = query_all_chiplet_counts(); }); + + return counts; +} + +} // namespace + +std::size_t get_hsa_chiplet_count(std::size_t device_id) +{ + const auto& counts = get_cached_chiplet_counts(); + + if(device_id < counts.size()) + return counts[device_id]; + + // Device not found - HSA enumerated fewer GPUs than expected. + // This shouldn't happen in normal operation, but return default 1. + return 1; +} + +#else // _WIN32 + +std::size_t get_hsa_chiplet_count(std::size_t /*device_id*/) +{ + // HSA not available on Windows, assume single chiplet. + // TODO: For future architectures with multiple chiplets, + // need a way to query on Windows or hardcode based on gfx number. + return 1; +} + +#endif // _WIN32 + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/src/targets/gpu/include/migraphx/gpu/context.hpp b/src/targets/gpu/include/migraphx/gpu/context.hpp index db5bb7373f0..21bfdf01d4f 100644 --- a/src/targets/gpu/include/migraphx/gpu/context.hpp +++ b/src/targets/gpu/include/migraphx/gpu/context.hpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -61,6 +62,9 @@ struct hip_device if(status != hipSuccess) MIGRAPHX_THROW("Failed to get device properties: " + hip_error(status)); + // Query chiplet count once at construction (immutable device property) + chiplet_count = get_hsa_chiplet_count(device_id); + // Set the device prior to Events that get created within a Context. set_device(device_id); @@ -210,6 +214,8 @@ struct hip_device std::size_t get_cu_count() const { return device_props.multiProcessorCount; } + std::size_t get_chiplet_count() const { return chiplet_count; } + std::size_t get_max_workitems_per_cu() const { return device_props.maxThreadsPerMultiProcessor; @@ -222,6 +228,7 @@ struct hip_device private: std::size_t device_id = 0; std::size_t current_stream = 0; + std::size_t chiplet_count = 1; std::vector streams; hipDeviceProp_t device_props; diff --git a/src/targets/gpu/include/migraphx/gpu/hsa_chiplet.hpp b/src/targets/gpu/include/migraphx/gpu/hsa_chiplet.hpp new file mode 100644 index 00000000000..0b5719f4632 --- /dev/null +++ b/src/targets/gpu/include/migraphx/gpu/hsa_chiplet.hpp @@ -0,0 +1,43 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_HSA_CHIPLET_HPP +#define MIGRAPHX_GUARD_RTGLIB_HSA_CHIPLET_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +/// Query the number of chiplets (XCCs) for a given HIP device ID. +/// Returns 1 if HSA is not available or if the query fails. +MIGRAPHX_GPU_EXPORT std::size_t get_hsa_chiplet_count(std::size_t device_id); + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/src/targets/gpu/mlir.cpp b/src/targets/gpu/mlir.cpp index eb3daa141c7..cd96546e5d8 100644 --- a/src/targets/gpu/mlir.cpp +++ b/src/targets/gpu/mlir.cpp @@ -639,7 +639,8 @@ struct mlir_program {"sym_name", sym_name}, {"kernel", std::string("mixr")}, {"arch", target_arch}, - {"num_cu", num_cu}}); + {"num_cu", num_cu}, + {"num_chiplets", num_chiplets}}); if(enabled(MIGRAPHX_MLIR_ENABLE_SPLITK{})) { ops.add_attributes({{"enable_splitk_for_tuning", mlirUnitAttrGet(ctx.get())}}); @@ -900,6 +901,7 @@ struct mlir_program const auto& device = migraphx_ctx.get_current_device(); target_arch = device.get_device_name(); num_cu = device.get_cu_count(); + num_chiplets = device.get_chiplet_count(); } std::pair get_launch_params() const @@ -1068,6 +1070,7 @@ struct mlir_program std::deque strings{}; std::string target_arch = ""; std::size_t num_cu = 0; + std::size_t num_chiplets = 0; std::string sym_name; }; diff --git a/test/gpu/mlir.cpp b/test/gpu/mlir.cpp index f61a3e17943..56c9d301712 100644 --- a/test/gpu/mlir.cpp +++ b/test/gpu/mlir.cpp @@ -161,9 +161,9 @@ static std::string get_attrs() { if(migraphx::enabled(MIGRAPHX_MLIR_ENABLE_SPLITK{})) { - return R"({arch = "", enable_splitk_for_tuning, kernel = "mixr", num_cu = 0 : i64})"; + return R"({arch = "", enable_splitk_for_tuning, kernel = "mixr", num_chiplets = 0 : i64, num_cu = 0 : i64})"; } - return R"({arch = "", kernel = "mixr", num_cu = 0 : i64})"; + return R"({arch = "", kernel = "mixr", num_chiplets = 0 : i64, num_cu = 0 : i64})"; } TEST_CASE(conv)