From 08a37f2378ffbb73f92ddc9c89f3d98f255b4442 Mon Sep 17 00:00:00 2001
From: kahmed10 <15948690+kahmed10@users.noreply.github.com>
Date: Wed, 10 Dec 2025 14:00:48 -0600
Subject: [PATCH 01/11] query hsa for number of chiplets

---
 .../gpu/include/migraphx/gpu/context.hpp      | 75 +++++++++++++++++++
 src/targets/gpu/mlir.cpp                      |  5 +-
 2 files changed, 79 insertions(+), 1 deletion(-)
diff --git a/src/targets/gpu/include/migraphx/gpu/context.hpp b/src/targets/gpu/include/migraphx/gpu/context.hpp
index db5bb7373f0..dbc2e58d7d4 100644
--- a/src/targets/gpu/include/migraphx/gpu/context.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/context.hpp
@@ -42,6 +42,12 @@
 #include <unordered_map>
 #include <memory>
 
+// HSA is only available on non-Windows platforms
+#ifndef _WIN32
+#include "hsa/hsa.h"
+#include "hsa/hsa_ext_amd.h"
+#endif
+
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
@@ -210,6 +216,75 @@ struct hip_device
 
     std::size_t get_cu_count() const { return device_props.multiProcessorCount; }
 
+    std::size_t get_chiplet_count() const
+    {
+#ifndef _WIN32
+        // Structure to pass data through HSA agent iteration
+        struct agent_info
+        {
+            std::size_t target_device_id;
+            std::size_t gpu_count;
+            uint32_t num_chiplets;
+            bool found;
+        };
+
+        hsa_status_t status = hsa_init();
+        if(status != HSA_STATUS_SUCCESS)
+        {
+            // If HSA init fails, return 1 as default (single chiplet)
+            return 1;
+        }
+
+        agent_info info{};
+        info.target_device_id = device_id;
+        info.gpu_count        = 0;
+        info.num_chiplets     = 0;
+        info.found            = false;
+
+        // Callback function for hsa_iterate_agents
+        // GPUs are enumerated in the same order as HIP device IDs
+        auto agent_callback = [](hsa_agent_t agent, void* data) -> hsa_status_t {
+            auto* info = static_cast<agent_info*>(data);
+
+            hsa_device_type_t device_type;
+            hsa_status_t err =
+                hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
+            if(err != HSA_STATUS_SUCCESS)
+                return err;
+
+            if(device_type == HSA_DEVICE_TYPE_GPU)
+            {
+                // Check if this is the GPU we're looking for (by enumeration order)
+                if(info->gpu_count == info->target_device_id)
+                {
+                    err = hsa_agent_get_info(
+                        agent,
+                        static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_XCC),
+                        &info->num_chiplets);
+                    if(err != HSA_STATUS_SUCCESS)
+                        return err;
+
+                    info->found = true;
+                    return HSA_STATUS_INFO_BREAK; // Stop iteration
+                }
+                info->gpu_count++;
+            }
+
+            return HSA_STATUS_SUCCESS;
+        };
+
+        // Iterate through all HSA agents to find matching GPU
+        status = hsa_iterate_agents(agent_callback, &info);
+
+        hsa_shut_down();
+        return info.num_chiplets;
+#else
+        // HSA not available on Windows, assume single chiplet
+        return 1;
+#endif
+    }
+
+
     std::size_t get_max_workitems_per_cu() const
     {
         return device_props.maxThreadsPerMultiProcessor;
diff --git a/src/targets/gpu/mlir.cpp b/src/targets/gpu/mlir.cpp
index 9ca5c35330c..fc1438457c1 100644
--- a/src/targets/gpu/mlir.cpp
+++ b/src/targets/gpu/mlir.cpp
@@ -638,7 +638,8 @@ struct mlir_program
                             {"sym_name", sym_name},
                             {"kernel", std::string("mixr")},
                             {"arch", target_arch},
-                            {"num_cu", num_cu}});
+                            {"num_cu", num_cu},
+                            {"num_chiplets", num_chiplets}});
         if(enabled(MIGRAPHX_MLIR_ENABLE_SPLITK{}))
         {
             ops.add_attributes({{"enable_splitk_for_tuning", mlirUnitAttrGet(ctx.get())}});
@@ -899,6 +900,7 @@ struct mlir_program
         const auto& device = migraphx_ctx.get_current_device();
         target_arch        = device.get_device_name();
         num_cu             = device.get_cu_count();
+        num_chiplets        = device.get_chiplet_count();
     }
 
     std::pair<std::size_t, std::size_t> get_launch_params() const
@@ -1067,6 +1069,7 @@ struct mlir_program
     std::deque<std::string> strings{};
     std::string target_arch = "";
     std::size_t num_cu      = 0;
+    std::size_t num_chiplets = 0;
     std::string sym_name;
 };
 

From f0a44cbe93f8b6fb1f3392923d0421338310cdaa Mon Sep 17 00:00:00 2001
From: kahmed10 <15948690+kahmed10@users.noreply.github.com>
Date: Wed, 10 Dec 2025 14:01:11 -0600
Subject: [PATCH 02/11] formatting

---
 src/targets/gpu/include/migraphx/gpu/context.hpp | 4 +---
 src/targets/gpu/mlir.cpp                         | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/targets/gpu/include/migraphx/gpu/context.hpp b/src/targets/gpu/include/migraphx/gpu/context.hpp
index dbc2e58d7d4..712bd4c7814 100644
--- a/src/targets/gpu/include/migraphx/gpu/context.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/context.hpp
@@ -247,8 +247,7 @@ struct hip_device
             auto* info = static_cast<agent_info*>(data);
 
             hsa_device_type_t device_type;
-            hsa_status_t err =
-                hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
+            hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
             if(err != HSA_STATUS_SUCCESS)
                 return err;
 
@@ -284,7 +283,6 @@ struct hip_device
 #endif
     }
 
-
     std::size_t get_max_workitems_per_cu() const
     {
         return device_props.maxThreadsPerMultiProcessor;
diff --git a/src/targets/gpu/mlir.cpp b/src/targets/gpu/mlir.cpp
index fc1438457c1..3d041ef1ba8 100644
--- a/src/targets/gpu/mlir.cpp
+++ b/src/targets/gpu/mlir.cpp
@@ -900,7 +900,7 @@ struct mlir_program
         const auto& device = migraphx_ctx.get_current_device();
         target_arch        = device.get_device_name();
         num_cu             = device.get_cu_count();
-        num_chiplets        = device.get_chiplet_count();
+        num_chiplets       = device.get_chiplet_count();
     }
 
     std::pair<std::size_t, std::size_t> get_launch_params() const

From f9a07c3b719ed2d5af4e2115519f572f0988441a Mon Sep 17 00:00:00 2001
From: kahmed10 <15948690+kahmed10@users.noreply.github.com>
Date: Wed, 10 Dec 2025 14:32:14 -0600
Subject: [PATCH 03/11] add TODO for future archs

---
 src/targets/gpu/include/migraphx/gpu/context.hpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/targets/gpu/include/migraphx/gpu/context.hpp b/src/targets/gpu/include/migraphx/gpu/context.hpp
index 712bd4c7814..52989a270de 100644
--- a/src/targets/gpu/include/migraphx/gpu/context.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/context.hpp
@@ -279,6 +279,9 @@ struct hip_device
         return info.num_chiplets;
 #else
         // HSA not available on Windows, assume single chiplet
+        // TODO: for future archs that have multiple chiplets,
+        // need a way to query on Windows or just hardcode
+        // based on gfx number
         return 1;
 #endif
     }

From 37fbdc04f59d4589ac277508804d0c21327e6b17 Mon Sep 17 00:00:00 2001
From: kahmed10 <15948690+kahmed10@users.noreply.github.com>
Date: Tue, 13 Jan 2026 12:53:43 -0600
Subject: [PATCH 04/11] refactor and move to different file

---
 src/targets/gpu/CMakeLists.txt                |  12 ++
 src/targets/gpu/hsa_chiplet.cpp               | 161 ++++++++++++++++++
 .../gpu/include/migraphx/gpu/context.hpp      |  77 +--------
 .../gpu/include/migraphx/gpu/hsa_chiplet.hpp  |  44 +++++
 4 files changed, 219 insertions(+), 75 deletions(-)
 create mode 100644 src/targets/gpu/hsa_chiplet.cpp
 create mode 100644 src/targets/gpu/include/migraphx/gpu/hsa_chiplet.hpp

diff --git a/src/targets/gpu/CMakeLists.txt b/src/targets/gpu/CMakeLists.txt
index ee725b1d638..b29494281a6 100644
--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -23,6 +23,12 @@
 # ####################################################################################
 
 find_package(hip REQUIRED)
+
+# Find HSA runtime for chiplet count query (Linux only)
+if(NOT WIN32)
+    find_package(hsa-runtime64 REQUIRED CONFIG)
+    message(STATUS "MIGraphX is using HSA runtime for chiplet query")
+endif()
 if(NOT GPU_TARGETS)
     set(fatal_msg "HIP package is broken and has no GPU_TARGETS. Please pass GPU_TARGETS to cmake.")
     if(NOT WIN32)
@@ -164,6 +170,7 @@ add_library(migraphx_gpu
     hip.cpp
     hipblaslt.cpp
     hip_gemm_impl.cpp
+    hsa_chiplet.cpp
     kernel.cpp
     lowering.cpp
     logsoftmax.cpp
@@ -396,6 +403,11 @@ else()
     target_link_libraries(migraphx_gpu PRIVATE migraphx_device)
 endif()
 target_link_libraries(migraphx_gpu PRIVATE migraphx_kernels)
+
+# Link HSA runtime for chiplet query (Linux only)
+if(NOT WIN32)
+    target_link_libraries(migraphx_gpu PRIVATE hsa-runtime64::hsa-runtime64)
+endif()
 if(MIGRAPHX_USE_COMPOSABLEKERNEL)
     target_link_libraries(migraphx_gpu PRIVATE composable_kernel::jit_library)
     target_compile_definitions(migraphx_gpu PRIVATE MIGRAPHX_USE_COMPOSABLEKERNEL=1)
diff --git a/src/targets/gpu/hsa_chiplet.cpp b/src/targets/gpu/hsa_chiplet.cpp
new file mode 100644
index 00000000000..d14b6870d00
--- /dev/null
+++ b/src/targets/gpu/hsa_chiplet.cpp
@@ -0,0 +1,161 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/gpu/hsa_chiplet.hpp>
+#include <vector>
+#include <mutex>
+
+#ifndef _WIN32
+#include <hsa/hsa.h>
+#include <hsa/hsa_ext_amd.h>
+#endif
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+#ifndef _WIN32
+
+namespace {
+
+/// RAII wrapper for HSA runtime initialization.
+/// Calls hsa_init() in constructor and hsa_shut_down() in destructor.
+struct hsa_guard
+{
+    bool initialized = false;
+
+    hsa_guard()
+    {
+        hsa_status_t status = hsa_init();
+        initialized         = (status == HSA_STATUS_SUCCESS);
+    }
+
+    ~hsa_guard()
+    {
+        if(initialized)
+            hsa_shut_down();
+    }
+
+    hsa_guard(const hsa_guard&)            = delete;
+    hsa_guard& operator=(const hsa_guard&) = delete;
+
+    explicit operator bool() const { return initialized; }
+};
+
+/// Query chiplet counts for all GPU devices and cache the results.
+/// This is called once and the results are stored in a static vector.
+std::vector<std::size_t> query_all_chiplet_counts()
+{
+    std::vector<std::size_t> chiplet_counts;
+
+    hsa_guard guard;
+    if(not guard)
+    {
+        // HSA init failed, return empty vector
+        return chiplet_counts;
+    }
+
+    // Structure to collect chiplet counts for all GPUs
+    struct agent_data
+    {
+        std::vector<std::size_t>* counts;
+    };
+
+    agent_data data{&chiplet_counts};
+
+    // Callback function for hsa_iterate_agents.
+    // HSA agents are enumerated in the same order as HIP device IDs for GPU agents.
+    // Reference: ROCm documentation on device enumeration consistency between HIP and HSA.
+    auto agent_callback = [](hsa_agent_t agent, void* user_data) -> hsa_status_t {
+        auto* agent_data_ptr = static_cast<agent_data*>(user_data);
+
+        hsa_device_type_t device_type;
+        hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
+        if(err != HSA_STATUS_SUCCESS)
+            return err;
+
+        if(device_type == HSA_DEVICE_TYPE_GPU)
+        {
+            uint32_t num_chiplets = 1;
+            err                   = hsa_agent_get_info(
+                agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_XCC), &num_chiplets);
+            // If the query fails (e.g., older ROCm or unsupported GPU), use default of 1
+            if(err != HSA_STATUS_SUCCESS)
+                num_chiplets = 1;
+
+            agent_data_ptr->counts->push_back(static_cast<std::size_t>(num_chiplets));
+        }
+
+        return HSA_STATUS_SUCCESS;
+    };
+
+    hsa_status_t status = hsa_iterate_agents(agent_callback, &data);
+    if(status != HSA_STATUS_SUCCESS and status != HSA_STATUS_INFO_BREAK)
+    {
+        // Iteration failed, return whatever we collected (may be empty)
+        return chiplet_counts;
+    }
+
+    return chiplet_counts;
+}
+
+/// Get cached chiplet counts. Thread-safe, queries HSA only once.
+const std::vector<std::size_t>& get_cached_chiplet_counts()
+{
+    static std::once_flag flag;
+    static std::vector<std::size_t> counts;
+
+    std::call_once(flag, []() { counts = query_all_chiplet_counts(); });
+
+    return counts;
+}
+
+} // namespace
+
+std::size_t get_hsa_chiplet_count(std::size_t device_id)
+{
+    const auto& counts = get_cached_chiplet_counts();
+
+    if(device_id < counts.size())
+        return counts[device_id];
+
+    // Device not found in HSA enumeration, return default of 1
+    return 1;
+}
+
+#else // _WIN32
+
+std::size_t get_hsa_chiplet_count(std::size_t /*device_id*/)
+{
+    // HSA not available on Windows, assume single chiplet.
+    // TODO: For future architectures with multiple chiplets,
+    // need a way to query on Windows or hardcode based on gfx number.
+    return 1;
+}
+
+#endif // _WIN32
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
diff --git a/src/targets/gpu/include/migraphx/gpu/context.hpp b/src/targets/gpu/include/migraphx/gpu/context.hpp
index 52989a270de..dfc64505e47 100644
--- a/src/targets/gpu/include/migraphx/gpu/context.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/context.hpp
@@ -39,15 +39,10 @@
 #include <migraphx/config.hpp>
 #include <migraphx/gpu/device_name.hpp>
 #include <migraphx/gpu/problem_cache.hpp>
+#include <migraphx/gpu/hsa_chiplet.hpp>
 #include <unordered_map>
 #include <memory>
 
-// HSA is only available on non-Windows platforms
-#ifndef _WIN32
-#include "hsa/hsa.h"
-#include "hsa/hsa_ext_amd.h"
-#endif
-
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
@@ -216,75 +211,7 @@ struct hip_device
 
     std::size_t get_cu_count() const { return device_props.multiProcessorCount; }
 
-    std::size_t get_chiplet_count() const
-    {
-#ifndef _WIN32
-        // Structure to pass data through HSA agent iteration
-        struct agent_info
-        {
-            std::size_t target_device_id;
-            std::size_t gpu_count;
-            uint32_t num_chiplets;
-            bool found;
-        };
-
-        hsa_status_t status = hsa_init();
-        if(status != HSA_STATUS_SUCCESS)
-        {
-            // If HSA init fails, return 1 as default (single chiplet)
-            return 1;
-        }
-
-        agent_info info{};
-        info.target_device_id = device_id;
-        info.gpu_count        = 0;
-        info.num_chiplets     = 0;
-        info.found            = false;
-
-        // Callback function for hsa_iterate_agents
-        // GPUs are enumerated in the same order as HIP device IDs
-        auto agent_callback = [](hsa_agent_t agent, void* data) -> hsa_status_t {
-            auto* info = static_cast<agent_info*>(data);
-
-            hsa_device_type_t device_type;
-            hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
-            if(err != HSA_STATUS_SUCCESS)
-                return err;
-
-            if(device_type == HSA_DEVICE_TYPE_GPU)
-            {
-                // Check if this is the GPU we're looking for (by enumeration order)
-                if(info->gpu_count == info->target_device_id)
-                {
-                    err = hsa_agent_get_info(
-                        agent,
-                        static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_XCC),
-                        &info->num_chiplets);
-                    if(err != HSA_STATUS_SUCCESS)
-                        return err;
-
-                    info->found = true;
-                    return HSA_STATUS_INFO_BREAK; // Stop iteration
-                }
-                info->gpu_count++;
-            }
-
-            return HSA_STATUS_SUCCESS;
-        };
-
-        // Iterate through all HSA agents to find matching GPU
-        status = hsa_iterate_agents(agent_callback, &info);
-
-        hsa_shut_down();
-        return info.num_chiplets;
-#else
-        // HSA not available on Windows, assume single chiplet
-        // TODO: for future archs that have multiple chiplets,
-        // need a way to query on Windows or just hardcode
-        // based on gfx number
-        return 1;
-#endif
-    }
+    std::size_t get_chiplet_count() const { return get_hsa_chiplet_count(device_id); }
 
     std::size_t get_max_workitems_per_cu() const
     {
diff --git a/src/targets/gpu/include/migraphx/gpu/hsa_chiplet.hpp b/src/targets/gpu/include/migraphx/gpu/hsa_chiplet.hpp
new file mode 100644
index 00000000000..0b77be8fb72
--- /dev/null
+++ b/src/targets/gpu/include/migraphx/gpu/hsa_chiplet.hpp
@@ -0,0 +1,44 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_RTGLIB_HSA_CHIPLET_HPP
+#define MIGRAPHX_GUARD_RTGLIB_HSA_CHIPLET_HPP
+
+#include <migraphx/gpu/export.h>
+#include <migraphx/config.hpp>
+#include <cstddef>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+/// Query the number of chiplets (XCCs) for a given HIP device ID.
+/// Returns 1 if HSA is not available or if the query fails.
+MIGRAPHX_GPU_EXPORT std::size_t get_hsa_chiplet_count(std::size_t device_id);
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
+

From 260d14e61b47b288f9e122b8cea9148c66712f2f Mon Sep 17 00:00:00 2001
From: kahmed10 <15948690+kahmed10@users.noreply.github.com>
Date: Tue, 13 Jan 2026 12:56:48 -0600
Subject: [PATCH 05/11] formatting and update copyright

---
 src/targets/gpu/CMakeLists.txt                       | 2 +-
 src/targets/gpu/hsa_chiplet.cpp                      | 1 -
 src/targets/gpu/include/migraphx/gpu/context.hpp     | 2 +-
 src/targets/gpu/include/migraphx/gpu/hsa_chiplet.hpp | 3 +--
 4 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/targets/gpu/CMakeLists.txt b/src/targets/gpu/CMakeLists.txt
index b29494281a6..71de1f8e58d 100644
--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -1,7 +1,7 @@
 # ####################################################################################
 # The MIT License (MIT)
 #
-# Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/src/targets/gpu/hsa_chiplet.cpp b/src/targets/gpu/hsa_chiplet.cpp
index d14b6870d00..bd3971d2166 100644
--- a/src/targets/gpu/hsa_chiplet.cpp
+++ b/src/targets/gpu/hsa_chiplet.cpp
@@ -158,4 +158,3 @@ std::size_t get_hsa_chiplet_count(std::size_t /*device_id*/)
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
-
diff --git a/src/targets/gpu/include/migraphx/gpu/context.hpp b/src/targets/gpu/include/migraphx/gpu/context.hpp
index dfc64505e47..e29414d41f3 100644
--- a/src/targets/gpu/include/migraphx/gpu/context.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/context.hpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
diff --git a/src/targets/gpu/include/migraphx/gpu/hsa_chiplet.hpp b/src/targets/gpu/include/migraphx/gpu/hsa_chiplet.hpp
index 0b77be8fb72..0b5719f4632 100644
--- a/src/targets/gpu/include/migraphx/gpu/hsa_chiplet.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/hsa_chiplet.hpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -41,4 +41,3 @@ MIGRAPHX_GPU_EXPORT std::size_t get_hsa_chiplet_count(std::size_t device_id);
 } // namespace migraphx
 
 #endif
-

From 0135cb3a9322c346bcf48ba2f72b446a6ee56d00 Mon Sep 17 00:00:00 2001
From: kahmed10 <15948690+kahmed10@users.noreply.github.com>
Date: Tue, 13 Jan 2026 13:01:56 -0600
Subject: [PATCH 06/11] remove comment

---
 src/targets/gpu/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/targets/gpu/CMakeLists.txt b/src/targets/gpu/CMakeLists.txt
index a2a476d5c45..abea1bee8bd 100644
--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -24,7 +24,6 @@
 
 find_package(hip REQUIRED)
 
-# Find HSA runtime for chiplet count query (Linux only)
 if(NOT WIN32)
     find_package(hsa-runtime64 REQUIRED CONFIG)
     message(STATUS "MIGraphX is using HSA runtime for chiplet query")

From 94f23a713b0bed5df2adb94d709f6b5e5082b232 Mon Sep 17 00:00:00 2001
From: kahmed10 <15948690+kahmed10@users.noreply.github.com>
Date: Wed, 14 Jan 2026 08:07:18 -0600
Subject: [PATCH 07/11] update test with num_chiplets field

---
 test/gpu/mlir.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/gpu/mlir.cpp b/test/gpu/mlir.cpp
index f61a3e17943..56c9d301712 100644
--- a/test/gpu/mlir.cpp
+++ b/test/gpu/mlir.cpp
@@ -161,9 +161,9 @@ static std::string get_attrs()
 {
     if(migraphx::enabled(MIGRAPHX_MLIR_ENABLE_SPLITK{}))
     {
-        return R"({arch = "", enable_splitk_for_tuning, kernel = "mixr", num_cu = 0 : i64})";
+        return R"({arch = "", enable_splitk_for_tuning, kernel = "mixr", num_chiplets = 0 : i64, num_cu = 0 : i64})";
     }
-    return R"({arch = "", kernel = "mixr", num_cu = 0 : i64})";
+    return R"({arch = "", kernel = "mixr", num_chiplets = 0 : i64, num_cu = 0 : i64})";
 }
 
 TEST_CASE(conv)

From 70a4f6e3df30bc4a5a736ae2ed8b1781baec8707 Mon Sep 17 00:00:00 2001
From: kahmed10 <15948690+kahmed10@users.noreply.github.com>
Date: Wed, 14 Jan 2026 14:26:21 -0600
Subject: [PATCH 08/11] set chiplet count as member variable to context

---
 src/targets/gpu/include/migraphx/gpu/context.hpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/targets/gpu/include/migraphx/gpu/context.hpp b/src/targets/gpu/include/migraphx/gpu/context.hpp
index e29414d41f3..21bfdf01d4f 100644
--- a/src/targets/gpu/include/migraphx/gpu/context.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/context.hpp
@@ -62,6 +62,9 @@ struct hip_device
         if(status != hipSuccess)
             MIGRAPHX_THROW("Failed to get device properties: " + hip_error(status));
 
+        // Query chiplet count once at construction (immutable device property)
+        chiplet_count = get_hsa_chiplet_count(device_id);
+
         // Set the device prior to Events that get created within a Context.
         set_device(device_id);
 
@@ -211,7 +214,7 @@ struct hip_device
 
     std::size_t get_cu_count() const { return device_props.multiProcessorCount; }
 
-    std::size_t get_chiplet_count() const { return get_hsa_chiplet_count(device_id); }
+    std::size_t get_chiplet_count() const { return chiplet_count; }
 
     std::size_t get_max_workitems_per_cu() const
     {
@@ -225,6 +228,7 @@ struct hip_device
     private:
     std::size_t device_id      = 0;
     std::size_t current_stream = 0;
+    std::size_t chiplet_count  = 1;
     std::vector<stream> streams;
     hipDeviceProp_t device_props;
 

From b1d3a1b2b2a8230432c7e9b4134c881084585194 Mon Sep 17 00:00:00 2001
From: kahmed10 <15948690+kahmed10@users.noreply.github.com>
Date: Thu, 15 Jan 2026 15:31:52 -0600
Subject: [PATCH 09/11] throw error if HSA fails

---
 src/targets/gpu/hsa_chiplet.cpp | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/src/targets/gpu/hsa_chiplet.cpp b/src/targets/gpu/hsa_chiplet.cpp
index bd3971d2166..fdb9cb172bb 100644
--- a/src/targets/gpu/hsa_chiplet.cpp
+++ b/src/targets/gpu/hsa_chiplet.cpp
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include <migraphx/gpu/hsa_chiplet.hpp>
+#include <migraphx/errors.hpp>
 #include <vector>
 #include <mutex>
 
@@ -38,16 +39,26 @@ namespace gpu {
 
 namespace {
 
+/// Convert HSA status code to a human-readable string
+std::string hsa_error_string(hsa_status_t status)
+{
+    const char* msg = nullptr;
+    if(hsa_status_string(status, &msg) == HSA_STATUS_SUCCESS and msg != nullptr)
+        return msg;
+    return "Unknown HSA error (code " + std::to_string(static_cast<int>(status)) + ")";
+}
+
 /// RAII wrapper for HSA runtime initialization.
 /// Calls hsa_init() in constructor and hsa_shut_down() in destructor.
 struct hsa_guard
 {
-    bool initialized = false;
+    bool initialized       = false;
+    hsa_status_t init_status = HSA_STATUS_SUCCESS;
 
     hsa_guard()
     {
-        hsa_status_t status = hsa_init();
-        initialized         = (status == HSA_STATUS_SUCCESS);
+        init_status = hsa_init();
+        initialized = (init_status == HSA_STATUS_SUCCESS);
     }
 
     ~hsa_guard()
@@ -60,6 +71,8 @@ struct hsa_guard
     hsa_guard& operator=(const hsa_guard&) = delete;
 
     explicit operator bool() const { return initialized; }
+
+    hsa_status_t status() const { return init_status; }
 };
 
 /// Query chiplet counts for all GPU devices and cache the results.
@@ -71,8 +84,8 @@ std::vector<std::size_t> query_all_chiplet_counts()
     hsa_guard guard;
     if(not guard)
     {
-        // HSA init failed, return empty vector
-        return chiplet_counts;
+        MIGRAPHX_THROW("HSA runtime initialization failed: " + hsa_error_string(guard.status()) +
+                       ". GPU is not accessible.");
     }
 
     // Structure to collect chiplet counts for all GPUs
@@ -99,7 +112,8 @@ std::vector<std::size_t> query_all_chiplet_counts()
             uint32_t num_chiplets = 1;
             err                   = hsa_agent_get_info(
                 agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_XCC), &num_chiplets);
-            // If the query fails (e.g., older ROCm or unsupported GPU), use default of 1
+            // If the query fails (e.g., older ROCm or unsupported GPU), use default of 1.
+            // This is expected on older ROCm versions, so no warning needed.
             if(err != HSA_STATUS_SUCCESS)
                 num_chiplets = 1;
 
@@ -112,8 +126,8 @@ std::vector<std::size_t> query_all_chiplet_counts()
     hsa_status_t status = hsa_iterate_agents(agent_callback, &data);
     if(status != HSA_STATUS_SUCCESS and status != HSA_STATUS_INFO_BREAK)
     {
-        // Iteration failed, return whatever we collected (may be empty)
-        return chiplet_counts;
+        MIGRAPHX_THROW("HSA agent enumeration failed: " + hsa_error_string(status) +
+                       ". Unable to query GPU devices.");
     }
 
     return chiplet_counts;
@@ -139,7 +153,8 @@ std::size_t get_hsa_chiplet_count(std::size_t device_id)
     if(device_id < counts.size())
         return counts[device_id];
 
-    // Device not found in HSA enumeration, return default of 1
+    // Device not found - HSA enumerated fewer GPUs than expected.
+    // This shouldn't happen in normal operation, but return default 1.
     return 1;
 }
 

From 323fda4c71a76f1d1b9d0d1e379c4a6a3f834c64 Mon Sep 17 00:00:00 2001
From: kahmed10 <15948690+kahmed10@users.noreply.github.com>
Date: Thu, 15 Jan 2026 15:32:29 -0600
Subject: [PATCH 10/11] formatting

---
 src/targets/gpu/hsa_chiplet.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/targets/gpu/hsa_chiplet.cpp b/src/targets/gpu/hsa_chiplet.cpp
index fdb9cb172bb..567ce9b55ff 100644
--- a/src/targets/gpu/hsa_chiplet.cpp
+++ b/src/targets/gpu/hsa_chiplet.cpp
@@ -52,7 +52,7 @@ std::string hsa_error_string(hsa_status_t status)
 /// Calls hsa_init() in constructor and hsa_shut_down() in destructor.
 struct hsa_guard
 {
-    bool initialized       = false;
+    bool initialized         = false;
     hsa_status_t init_status = HSA_STATUS_SUCCESS;
 
     hsa_guard()

From 9669c8bd585ef702f4da9f4141c5e2a7680839f4 Mon Sep 17 00:00:00 2001
From: kahmed10 <15948690+kahmed10@users.noreply.github.com>
Date: Thu, 15 Jan 2026 17:04:48 -0600
Subject: [PATCH 11/11] fix tidy

---
 src/targets/gpu/hsa_chiplet.cpp | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/targets/gpu/hsa_chiplet.cpp b/src/targets/gpu/hsa_chiplet.cpp
index 567ce9b55ff..13b4836bad2 100644
--- a/src/targets/gpu/hsa_chiplet.cpp
+++ b/src/targets/gpu/hsa_chiplet.cpp
@@ -52,14 +52,10 @@ std::string hsa_error_string(hsa_status_t status)
 /// Calls hsa_init() in constructor and hsa_shut_down() in destructor.
 struct hsa_guard
 {
-    bool initialized         = false;
-    hsa_status_t init_status = HSA_STATUS_SUCCESS;
+    hsa_status_t init_status;
+    bool initialized;
 
-    hsa_guard()
-    {
-        init_status = hsa_init();
-        initialized = (init_status == HSA_STATUS_SUCCESS);
-    }
+    hsa_guard() : init_status(hsa_init()), initialized(init_status == HSA_STATUS_SUCCESS) {}
 
     ~hsa_guard()
     {