ROCm · kahmed10 · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025
@@ -23,6 +23,11 @@
 # ####################################################################################
 
 find_package(hip REQUIRED)
+
+if(NOT WIN32)
+    find_package(hsa-runtime64 REQUIRED CONFIG)
+    message(STATUS "MIGraphX is using HSA runtime for chiplet query")
+endif()
 if(NOT GPU_TARGETS)
     set(fatal_msg "HIP package is broken and has no GPU_TARGETS. Please pass GPU_TARGETS to cmake.")
     if(NOT WIN32)
@@ -164,6 +169,7 @@ add_library(migraphx_gpu
     hip.cpp
     hipblaslt.cpp
     hip_gemm_impl.cpp
+    hsa_chiplet.cpp
     kernel.cpp
     lowering.cpp
     logsoftmax.cpp
@@ -397,6 +403,11 @@ else()
     target_link_libraries(migraphx_gpu PRIVATE migraphx_device)
 endif()
 target_link_libraries(migraphx_gpu PRIVATE migraphx_kernels)
+
+# Link HSA runtime for chiplet query (Linux only)
+if(NOT WIN32)
+    target_link_libraries(migraphx_gpu PRIVATE hsa-runtime64::hsa-runtime64)
+endif()
 if(MIGRAPHX_USE_COMPOSABLEKERNEL)
     target_link_libraries(migraphx_gpu PRIVATE composable_kernel::jit_library)
     target_compile_definitions(migraphx_gpu PRIVATE MIGRAPHX_USE_COMPOSABLEKERNEL=1)

@@ -0,0 +1,171 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/gpu/hsa_chiplet.hpp>
+#include <migraphx/errors.hpp>
+#include <vector>
+#include <mutex>
+
+#ifndef _WIN32
+#include <hsa/hsa.h>
+#include <hsa/hsa_ext_amd.h>
+#endif
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+#ifndef _WIN32
+
+namespace {
+
+/// Convert HSA status code to a human-readable string
+std::string hsa_error_string(hsa_status_t status)
+{
+    const char* msg = nullptr;
+    if(hsa_status_string(status, &msg) == HSA_STATUS_SUCCESS and msg != nullptr)
+        return msg;
+    return "Unknown HSA error (code " + std::to_string(static_cast<int>(status)) + ")";
+}
+
+/// RAII wrapper for HSA runtime initialization.
+/// Calls hsa_init() in constructor and hsa_shut_down() in destructor.
+struct hsa_guard
+{
+    hsa_status_t init_status;
+    bool initialized;
+
+    hsa_guard() : init_status(hsa_init()), initialized(init_status == HSA_STATUS_SUCCESS) {}
+
+    ~hsa_guard()
+    {
+        if(initialized)
+            hsa_shut_down();
+    }
+
+    hsa_guard(const hsa_guard&)            = delete;
+    hsa_guard& operator=(const hsa_guard&) = delete;
+
+    explicit operator bool() const { return initialized; }
+
+    hsa_status_t status() const { return init_status; }
+};
+
+/// Query chiplet counts for all GPU devices and cache the results.
+/// This is called once and the results are stored in a static vector.
+std::vector<std::size_t> query_all_chiplet_counts()
+{
+    std::vector<std::size_t> chiplet_counts;
+
+    hsa_guard guard;
+    if(not guard)
+    {
+        MIGRAPHX_THROW("HSA runtime initialization failed: " + hsa_error_string(guard.status()) +
+                       ". GPU is not accessible.");
+    }
+
+    // Structure to collect chiplet counts for all GPUs
+    struct agent_data
+    {
+        std::vector<std::size_t>* counts;
+    };
+
+    agent_data data{&chiplet_counts};
+
+    // Callback function for hsa_iterate_agents.
+    // HSA agents are enumerated in the same order as HIP device IDs for GPU agents.
+    // Reference: ROCm documentation on device enumeration consistency between HIP and HSA.
+    auto agent_callback = [](hsa_agent_t agent, void* user_data) -> hsa_status_t {
+        auto* agent_data_ptr = static_cast<agent_data*>(user_data);
+
+        hsa_device_type_t device_type;
+        hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
+        if(err != HSA_STATUS_SUCCESS)
+            return err;
+
+        if(device_type == HSA_DEVICE_TYPE_GPU)
+        {
+            uint32_t num_chiplets = 1;
+            err                   = hsa_agent_get_info(
+                agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_XCC), &num_chiplets);
+            // If the query fails (e.g., older ROCm or unsupported GPU), use default of 1.
+            // This is expected on older ROCm versions, so no warning needed.
+            if(err != HSA_STATUS_SUCCESS)
+                num_chiplets = 1;
+
+            agent_data_ptr->counts->push_back(static_cast<std::size_t>(num_chiplets));
+        }
+
+        return HSA_STATUS_SUCCESS;
+    };
+
+    hsa_status_t status = hsa_iterate_agents(agent_callback, &data);
+    if(status != HSA_STATUS_SUCCESS and status != HSA_STATUS_INFO_BREAK)
+    {
+        MIGRAPHX_THROW("HSA agent enumeration failed: " + hsa_error_string(status) +
+                       ". Unable to query GPU devices.");
+    }
+
+    return chiplet_counts;
+}
+
+/// Get cached chiplet counts. Thread-safe, queries HSA only once.
+const std::vector<std::size_t>& get_cached_chiplet_counts()
+{
+    static std::once_flag flag;
+    static std::vector<std::size_t> counts;
+
+    std::call_once(flag, []() { counts = query_all_chiplet_counts(); });
+
+    return counts;
+}
+
+} // namespace
+
+std::size_t get_hsa_chiplet_count(std::size_t device_id)
+{
+    const auto& counts = get_cached_chiplet_counts();
+
+    if(device_id < counts.size())
+        return counts[device_id];
+
+    // Device not found - HSA enumerated fewer GPUs than expected.
+    // This shouldn't happen in normal operation, but return default 1.
+    return 1;
+}
+
+#else // _WIN32
+
+std::size_t get_hsa_chiplet_count(std::size_t /*device_id*/)
+{
+    // HSA not available on Windows, assume single chiplet.
+    // TODO: For future architectures with multiple chiplets,
+    // need a way to query on Windows or hardcode based on gfx number.
+    return 1;
+}
+
+#endif // _WIN32
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -39,6 +39,7 @@
 #include <migraphx/config.hpp>
 #include <migraphx/gpu/device_name.hpp>
 #include <migraphx/gpu/problem_cache.hpp>
+#include <migraphx/gpu/hsa_chiplet.hpp>
 #include <unordered_map>
 #include <memory>
 
@@ -61,6 +62,9 @@ struct hip_device
         if(status != hipSuccess)
             MIGRAPHX_THROW("Failed to get device properties: " + hip_error(status));
 
+        // Query chiplet count once at construction (immutable device property)
+        chiplet_count = get_hsa_chiplet_count(device_id);
+
         // Set the device prior to Events that get created within a Context.
         set_device(device_id);
 
@@ -210,6 +214,8 @@ struct hip_device
 
     std::size_t get_cu_count() const { return device_props.multiProcessorCount; }
 
+    std::size_t get_chiplet_count() const { return chiplet_count; }
+
     std::size_t get_max_workitems_per_cu() const
     {
         return device_props.maxThreadsPerMultiProcessor;
@@ -222,6 +228,7 @@ struct hip_device
     private:
     std::size_t device_id      = 0;
     std::size_t current_stream = 0;
+    std::size_t chiplet_count  = 1;
     std::vector<stream> streams;
     hipDeviceProp_t device_props;
 

@@ -0,0 +1,43 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_RTGLIB_HSA_CHIPLET_HPP
+#define MIGRAPHX_GUARD_RTGLIB_HSA_CHIPLET_HPP
+
+#include <migraphx/gpu/export.h>
+#include <migraphx/config.hpp>
+#include <cstddef>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+/// Query the number of chiplets (XCCs) for a given HIP device ID.
+/// Returns 1 if HSA is not available or if the query fails.
+MIGRAPHX_GPU_EXPORT std::size_t get_hsa_chiplet_count(std::size_t device_id);
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
@@ -639,7 +639,8 @@ struct mlir_program
                             {"sym_name", sym_name},
                             {"kernel", std::string("mixr")},
                             {"arch", target_arch},
-                            {"num_cu", num_cu}});
+                            {"num_cu", num_cu},
+                            {"num_chiplets", num_chiplets}});
         if(enabled(MIGRAPHX_MLIR_ENABLE_SPLITK{}))
         {
             ops.add_attributes({{"enable_splitk_for_tuning", mlirUnitAttrGet(ctx.get())}});
@@ -900,6 +901,7 @@ struct mlir_program
         const auto& device = migraphx_ctx.get_current_device();
         target_arch        = device.get_device_name();
         num_cu             = device.get_cu_count();
+        num_chiplets       = device.get_chiplet_count();
     }
 
     std::pair<std::size_t, std::size_t> get_launch_params() const
@@ -1068,6 +1070,7 @@ struct mlir_program
     std::deque<std::string> strings{};
     std::string target_arch = "";
     std::size_t num_cu      = 0;
+    std::size_t num_chiplets = 0;
-    std::size_t num_chiplets = 0;
+    std::size_t num_chiplets = 1;
-    std::size_t num_chiplets = 0;
+    std::size_t num_chiplets = 1;
     std::string sym_name;
 };
 

@@ -161,9 +161,9 @@ static std::string get_attrs()
 {
     if(migraphx::enabled(MIGRAPHX_MLIR_ENABLE_SPLITK{}))
     {
-        return R"({arch = "", enable_splitk_for_tuning, kernel = "mixr", num_cu = 0 : i64})";
+        return R"({arch = "", enable_splitk_for_tuning, kernel = "mixr", num_chiplets = 0 : i64, num_cu = 0 : i64})";
     }
-    return R"({arch = "", kernel = "mixr", num_cu = 0 : i64})";
+    return R"({arch = "", kernel = "mixr", num_chiplets = 0 : i64, num_cu = 0 : i64})";
 }
 
 TEST_CASE(conv)