From 824e0b609770ae4c6c41fc9468a67880d18242cf Mon Sep 17 00:00:00 2001
From: Felix Schlepper <felix.schlepper@cern.ch>
Date: Sun, 14 Sep 2025 13:13:29 +0200
Subject: [PATCH 1/3] ITS: mm add tests

Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
---
 Detectors/ITSMFT/ITS/tracking/CMakeLists.txt  |   2 +
 .../include/ITStracking/BoundedAllocator.h    |  63 +++---
 .../tracking/include/ITStracking/TimeFrame.h  |   2 +-
 .../tracking/include/ITStracking/Tracker.h    |   2 +-
 .../include/ITStracking/TrackerTraits.h       |   2 +-
 .../tracking/include/ITStracking/Vertexer.h   |   2 +-
 .../include/ITStracking/VertexerTraits.h      |   2 +-
 .../ITSMFT/ITS/tracking/src/TimeFrame.cxx     |  30 ++-
 .../ITSMFT/ITS/tracking/test/CMakeLists.txt   |  16 ++
 .../test/testBoundedMemoryResource.cxx        | 209 ++++++++++++++++++
 10 files changed, 283 insertions(+), 47 deletions(-)
 create mode 100644 Detectors/ITSMFT/ITS/tracking/test/CMakeLists.txt
 create mode 100644 Detectors/ITSMFT/ITS/tracking/test/testBoundedMemoryResource.cxx
diff --git a/Detectors/ITSMFT/ITS/tracking/CMakeLists.txt b/Detectors/ITSMFT/ITS/tracking/CMakeLists.txt
index 291ddffbf9475..001ee537f50d2 100644
--- a/Detectors/ITSMFT/ITS/tracking/CMakeLists.txt
+++ b/Detectors/ITSMFT/ITS/tracking/CMakeLists.txt
@@ -56,3 +56,5 @@ o2_target_root_dictionary(ITStracking
 if(CUDA_ENABLED OR HIP_ENABLED)
   add_subdirectory(GPU)
 endif()
+
+add_subdirectory(test)
diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h
index ac9f72089602d..c19c1e56b273d 100644
--- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h
+++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h
@@ -69,7 +69,14 @@ class BoundedMemoryResource final : public std::pmr::memory_resource
     } while (!mUsedMemory.compare_exchange_weak(current_used, new_used,
                                                 std::memory_order_acq_rel,
                                                 std::memory_order_relaxed));
-    return mUpstream->allocate(bytes, alignment);
+    void* p{nullptr};
+    try {
+      p = mUpstream->allocate(bytes, alignment);
+    } catch (...) {
+      mUsedMemory.fetch_sub(bytes, std::memory_order_relaxed);
+      throw;
+    }
+    return p;
   }
 
   void do_deallocate(void* p, size_t bytes, size_t alignment) final
@@ -87,11 +94,12 @@ class BoundedMemoryResource final : public std::pmr::memory_resource
   size_t getMaxMemory() const noexcept { return mMaxMemory; }
   void setMaxMemory(size_t max)
   {
-    if (mUsedMemory > max) {
+    size_t used = mUsedMemory.load(std::memory_order_acquire);
+    if (used > max) {
       ++mCountThrow;
-      throw MemoryLimitExceeded(0, mUsedMemory, max);
+      throw MemoryLimitExceeded(0, used, max);
     }
-    mMaxMemory = max;
+    mMaxMemory.store(max, std::memory_order_release);
   }
 
   void print() const
@@ -106,7 +114,7 @@ class BoundedMemoryResource final : public std::pmr::memory_resource
   }
 
  private:
-  size_t mMaxMemory{std::numeric_limits<size_t>::max()};
+  std::atomic<size_t> mMaxMemory{std::numeric_limits<size_t>::max()};
   std::atomic<size_t> mCountThrow{0};
   std::atomic<size_t> mUsedMemory{0};
   std::pmr::memory_resource* mUpstream;
@@ -116,66 +124,71 @@ template <typename T>
 using bounded_vector = std::pmr::vector<T>;
 
 template <typename T>
-void deepVectorClear(std::vector<T>& vec)
+inline void deepVectorClear(std::vector<T>& vec)
 {
   std::vector<T>().swap(vec);
 }
 
 template <typename T>
-inline void deepVectorClear(bounded_vector<T>& vec, BoundedMemoryResource* bmr = nullptr)
+inline void deepVectorClear(bounded_vector<T>& vec, std::pmr::memory_resource* mr = nullptr)
 {
-  vec.~bounded_vector<T>();
-  if (bmr == nullptr) {
-    auto alloc = vec.get_allocator().resource();
-    new (&vec) bounded_vector<T>(alloc);
+  auto* res = mr ? mr : vec.get_allocator().resource();
+  if (res == vec.get_allocator().resource()) {
+    bounded_vector<T> empty{std::pmr::polymorphic_allocator<T>{res}};
+    vec.swap(empty);
   } else {
-    new (&vec) bounded_vector<T>(bmr);
+    vec = bounded_vector<T>(std::pmr::polymorphic_allocator<T>{res});
   }
 }
 
 template <typename T>
-void deepVectorClear(std::vector<bounded_vector<T>>& vec, BoundedMemoryResource* bmr = nullptr)
+inline void deepVectorClear(std::vector<bounded_vector<T>>& vec, std::pmr::memory_resource* mr = nullptr)
 {
   for (auto& v : vec) {
-    deepVectorClear(v, bmr);
+    deepVectorClear(v, mr);
   }
 }
 
 template <typename T, size_t S>
-void deepVectorClear(std::array<bounded_vector<T>, S>& arr, BoundedMemoryResource* bmr = nullptr)
+inline void deepVectorClear(std::array<bounded_vector<T>, S>& arr, std::pmr::memory_resource* mr = nullptr)
 {
   for (size_t i{0}; i < S; ++i) {
-    deepVectorClear(arr[i], bmr);
+    deepVectorClear(arr[i], mr);
   }
 }
 
 template <typename T>
-void clearResizeBoundedVector(bounded_vector<T>& vec, size_t size, BoundedMemoryResource* bmr, T def = T())
+inline void clearResizeBoundedVector(bounded_vector<T>& vec, size_t sz, std::pmr::memory_resource* mr = nullptr, T def = T())
 {
-  vec.~bounded_vector<T>();
-  new (&vec) bounded_vector<T>(size, def, bmr);
+  auto* res = mr ? mr : vec.get_allocator().resource();
+  if (res == vec.get_allocator().resource()) {
+    bounded_vector<T> tmp(sz, def, std::pmr::polymorphic_allocator<T>{res});
+    vec.swap(tmp);
+  } else {
+    vec = bounded_vector<T>(sz, def, std::pmr::polymorphic_allocator<T>{res});
+  }
 }
 
 template <typename T>
-void clearResizeBoundedVector(std::vector<bounded_vector<T>>& vec, size_t size, BoundedMemoryResource* bmr)
+void clearResizeBoundedVector(std::vector<bounded_vector<T>>& vec, size_t size, std::pmr::memory_resource* mr)
 {
   vec.clear();
   vec.reserve(size);
-  for (size_t i{0}; i < size; ++i) {
-    vec.emplace_back(bmr);
+  for (size_t i = 0; i < size; ++i) {
+    vec.emplace_back(std::pmr::polymorphic_allocator<bounded_vector<T>>{mr});
   }
 }
 
 template <typename T, size_t S>
-void clearResizeBoundedArray(std::array<bounded_vector<T>, S>& arr, size_t size, BoundedMemoryResource* bmr, T def = T())
+inline void clearResizeBoundedArray(std::array<bounded_vector<T>, S>& arr, size_t size, std::pmr::memory_resource* mr = nullptr, T def = T())
 {
   for (size_t i{0}; i < S; ++i) {
-    clearResizeBoundedVector(arr[i], size, bmr, def);
+    clearResizeBoundedVector(arr[i], size, mr, def);
   }
 }
 
 template <typename T>
-std::vector<T> toSTDVector(const bounded_vector<T>& b)
+inline std::vector<T> toSTDVector(const bounded_vector<T>& b)
 {
   std::vector<T> t(b.size());
   std::copy(b.cbegin(), b.cend(), t.begin());
diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h
index c34701ce222e2..7b5b84b1acde2 100644
--- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h
+++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h
@@ -188,7 +188,7 @@ struct TimeFrame {
   auto getNumberOfUsedExtendedClusters() const { return mNExtendedUsedClusters; }
 
   /// memory management
-  void setMemoryPool(std::shared_ptr<BoundedMemoryResource>& pool);
+  void setMemoryPool(std::shared_ptr<BoundedMemoryResource> pool);
   auto& getMemoryPool() const noexcept { return mMemoryPool; }
   bool checkMemory(unsigned long max) { return getArtefactsMemory() < max; }
   unsigned long getArtefactsMemory() const;
diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h
index 642717bd09596..4c903ed1f3ca1 100644
--- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h
+++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h
@@ -66,7 +66,7 @@ class Tracker
     const LogFunc& = [](const std::string& s) { std::cerr << s << '\n'; });
 
   void setParameters(const std::vector<TrackingParameters>& p) { mTrkParams = p; }
-  void setMemoryPool(std::shared_ptr<BoundedMemoryResource>& pool) { mMemoryPool = pool; }
+  void setMemoryPool(std::shared_ptr<BoundedMemoryResource> pool) { mMemoryPool = pool; }
   std::vector<TrackingParameters>& getParameters() { return mTrkParams; }
   void setBz(float bz) { mTraits->setBz(bz); }
   bool isMatLUT() const { return mTraits->isMatLUT(); }
diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h
index 9d14bb91635a0..ee64cacb8fa2a 100644
--- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h
+++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h
@@ -70,7 +70,7 @@ class TrackerTraits
   bool isMatLUT() const;
   virtual const char* getName() const noexcept { return "CPU"; }
   virtual bool isGPU() const noexcept { return false; }
-  void setMemoryPool(std::shared_ptr<BoundedMemoryResource>& pool) noexcept { mMemoryPool = pool; }
+  void setMemoryPool(std::shared_ptr<BoundedMemoryResource> pool) noexcept { mMemoryPool = pool; }
   auto getMemoryPool() const noexcept { return mMemoryPool; }
 
   // Others
diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Vertexer.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Vertexer.h
index ab92e7c1a1523..d66bcd6ee2358 100644
--- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Vertexer.h
+++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Vertexer.h
@@ -52,7 +52,7 @@ class Vertexer
   auto& getVertParameters() const { return mTraits->getVertexingParameters(); }
   void setParameters(const std::vector<VertexingParameters>& vertParams) { mVertParams = vertParams; }
   const auto& getParameters() const noexcept { return mVertParams; }
-  void setMemoryPool(std::shared_ptr<BoundedMemoryResource>& pool) { mMemoryPool = pool; }
+  void setMemoryPool(std::shared_ptr<BoundedMemoryResource> pool) { mMemoryPool = pool; }
 
   std::vector<Vertex> exportVertices();
   VertexerTraitsN* getTraits() const { return mTraits; };
diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/VertexerTraits.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/VertexerTraits.h
index dda32ddfd5aec..b1422d66e12df 100644
--- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/VertexerTraits.h
+++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/VertexerTraits.h
@@ -82,7 +82,7 @@ class VertexerTraits
   virtual bool isGPU() const noexcept { return false; }
   virtual const char* getName() const noexcept { return "CPU"; }
   virtual bool usesMemoryPool() const noexcept { return true; }
-  void setMemoryPool(std::shared_ptr<BoundedMemoryResource>& pool) { mMemoryPool = pool; }
+  void setMemoryPool(std::shared_ptr<BoundedMemoryResource> pool) { mMemoryPool = pool; }
 
   static std::pair<o2::MCCompLabel, float> computeMain(const bounded_vector<o2::MCCompLabel>& elements)
   {
diff --git a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx
index 510c66e2420f1..741acd227cb32 100644
--- a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx
+++ b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx
@@ -575,36 +575,32 @@ void TimeFrame<nLayers>::printSliceInfo(const int startROF, const int sliceSize)
     LOG(info) << "Number of seeding vertices: " << getPrimaryVertices(iROF).size();
     int iVertex{0};
     for (auto& v : getPrimaryVertices(iROF)) {
-      LOG(info) << "\t vertex " << iVertex++ << ": x=" << v.getX() << " " << " y=" << v.getY() << " z=" << v.getZ() << " has " << v.getNContributors() << " contributors.";
+      LOG(info) << "\t vertex " << iVertex++ << ": x=" << v.getX() << " "
+                << " y=" << v.getY() << " z=" << v.getZ() << " has " << v.getNContributors() << " contributors.";
     }
   }
 }
 
 template <int nLayers>
-void TimeFrame<nLayers>::setMemoryPool(std::shared_ptr<BoundedMemoryResource>& pool)
+void TimeFrame<nLayers>::setMemoryPool(std::shared_ptr<BoundedMemoryResource> pool)
 {
   mMemoryPool = pool;
 
-  auto initVector = [&]<typename T>(bounded_vector<T>& vec) {
-    auto alloc = vec.get_allocator().resource();
-    if (alloc != mMemoryPool.get()) {
-      vec = bounded_vector<T>(mMemoryPool.get());
-    }
+  auto initVector = [&]<typename T>(bounded_vector<T> & vec)
+  {
+    bounded_vector<T> tmp(std::pmr::polymorphic_allocator<T>{mMemoryPool.get()});
+    vec.swap(tmp);
   };
-  auto initArrays = [&]<typename T, size_t S>(std::array<bounded_vector<T>, S>& arr) {
+  auto initArrays = [&]<typename T, size_t S>(std::array<bounded_vector<T>, S> & arr)
+  {
     for (size_t i{0}; i < S; ++i) {
-      auto alloc = arr[i].get_allocator().resource();
-      if (alloc != mMemoryPool.get()) {
-        arr[i] = bounded_vector<T>(mMemoryPool.get());
-      }
+      initVector(arr[i]);
     }
   };
-  auto initVectors = [&]<typename T>(std::vector<bounded_vector<T>>& vec) {
+  auto initVectors = [&]<typename T>(std::vector<bounded_vector<T>> & vec)
+  {
     for (size_t i{0}; i < vec.size(); ++i) {
-      auto alloc = vec[i].get_allocator().resource();
-      if (alloc != mMemoryPool.get()) {
-        vec[i] = bounded_vector<T>(mMemoryPool.get());
-      }
+      initVector(vec[i]);
     }
   };
 
diff --git a/Detectors/ITSMFT/ITS/tracking/test/CMakeLists.txt b/Detectors/ITSMFT/ITS/tracking/test/CMakeLists.txt
new file mode 100644
index 0000000000000..818ad1d667371
--- /dev/null
+++ b/Detectors/ITSMFT/ITS/tracking/test/CMakeLists.txt
@@ -0,0 +1,16 @@
+# Copyright 2019-2020 CERN and copyright holders of ALICE O2.
+# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+# All rights not expressly granted are reserved.
+#
+# This software is distributed under the terms of the GNU General Public
+# License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+#
+# In applying this license CERN does not waive the privileges and immunities
+# granted to it by virtue of its status as an Intergovernmental Organization
+# or submit itself to any jurisdiction.
+
+o2_add_test(boundedmemoryresource
+            SOURCES testBoundedMemoryResource.cxx
+            COMPONENT_NAME its-tracking
+            LABELS "its;tracking"
+            PUBLIC_LINK_LIBRARIES O2::ITStracking)
diff --git a/Detectors/ITSMFT/ITS/tracking/test/testBoundedMemoryResource.cxx b/Detectors/ITSMFT/ITS/tracking/test/testBoundedMemoryResource.cxx
new file mode 100644
index 0000000000000..e7d8bd9de25de
--- /dev/null
+++ b/Detectors/ITSMFT/ITS/tracking/test/testBoundedMemoryResource.cxx
@@ -0,0 +1,209 @@
+// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
+// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+// All rights not expressly granted are reserved.
+//
+// This software is distributed under the terms of the GNU General Public
+// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+#define BOOST_TEST_MODULE Test Flags
+#define BOOST_TEST_MAIN
+#define BOOST_TEST_DYN_LINK
+
+#include <boost/test/unit_test.hpp>
+#include <random>
+#include "ITStracking/BoundedAllocator.h"
+
+using namespace o2::its;
+using Vec = bounded_vector<int>;
+auto getRandomInt(int min = -100, int max = 100)
+{
+  static std::mt19937 gen(std::random_device{}()); // static generator, seeded once
+  std::uniform_int_distribution<> dist(min, max);
+  return [&, dist]() mutable {
+    return dist(gen);
+  };
+}
+
+// -------- Throwing upstream resource for testing rollback --------
+class ThrowingResource final : public std::pmr::memory_resource
+{
+ protected:
+  void* do_allocate(size_t, size_t) final
+  {
+    throw std::bad_alloc(); // always fail
+  }
+  void do_deallocate(void*, size_t, size_t) noexcept final
+  {
+    // nothing
+  }
+  bool do_is_equal(const std::pmr::memory_resource& other) const noexcept final
+  {
+    return this == &other;
+  }
+};
+
+// -------- Upstream resource with empty deallocate --------
+class NoDeallocateResource final : public std::pmr::memory_resource
+{
+ public:
+  NoDeallocateResource(std::pmr::memory_resource* upstream = std::pmr::get_default_resource())
+    : mUpstream(upstream) {}
+
+ protected:
+  void* do_allocate(size_t bytes, size_t alignment) final
+  {
+    return mUpstream->allocate(bytes, alignment);
+  }
+  void do_deallocate(void*, size_t, size_t) noexcept final
+  {
+    // nothing
+  }
+  bool do_is_equal(const std::pmr::memory_resource& other) const noexcept final
+  {
+    return this == &other;
+  }
+
+ private:
+  std::pmr::memory_resource* mUpstream;
+};
+
+// -------- Tests --------
+BOOST_AUTO_TEST_CASE(allocation_and_clear_updates_used_memory)
+{
+  BoundedMemoryResource bmr(10 * 1024 * 1024); // 10 MB cap
+
+  Vec v(std::pmr::polymorphic_allocator<int>{&bmr});
+  BOOST_CHECK_EQUAL(bmr.getUsedMemory(), 0u);
+
+  const size_t count = 128;
+  v.reserve(count);
+  const size_t expected = count * sizeof(int);
+  BOOST_CHECK_GE(bmr.getUsedMemory(), expected);
+  BOOST_CHECK_LE(bmr.getUsedMemory(), expected + 64);
+
+  deepVectorClear(v, &bmr);
+  BOOST_CHECK_EQUAL(bmr.getUsedMemory(), 0u);
+}
+
+BOOST_AUTO_TEST_CASE(clearResizeBoundedVector_resizes_and_tracks_memory)
+{
+  BoundedMemoryResource bmr(1024 * 1024); // 1 MB cap
+
+  Vec v(std::pmr::polymorphic_allocator<int>{&bmr});
+  v.reserve(200);
+  const size_t used_before = bmr.getUsedMemory();
+  BOOST_CHECK_GT(used_before, 0u);
+
+  clearResizeBoundedVector(v, 50, &bmr, 7);
+  const size_t used_after = bmr.getUsedMemory();
+  BOOST_CHECK_GE(used_after, 50 * sizeof(int));
+  BOOST_CHECK_LT(used_after, used_before);
+
+  clearResizeBoundedVector(v, 300, &bmr, 3);
+  BOOST_CHECK_GE(bmr.getUsedMemory(), 300 * sizeof(int));
+}
+
+BOOST_AUTO_TEST_CASE(clearResizeBoundedVector_throws_on_over_limit_and_preserves_previous_usage)
+{
+  const size_t cap = 256 * sizeof(int); // a cap that will be exceeded
+  BoundedMemoryResource bmr(cap);
+  Vec v(std::pmr::polymorphic_allocator<int>{&bmr});
+  clearResizeBoundedVector(v, 10, &bmr, 1);
+  const size_t before = bmr.getUsedMemory();
+  BOOST_CHECK_GT(before, 0u);
+  BOOST_CHECK_LE(before, cap);
+  bool threw = false;
+  try {
+    clearResizeBoundedVector(v, cap / sizeof(int) + 10, &bmr, 2);
+  } catch (const BoundedMemoryResource::MemoryLimitExceeded&) {
+    threw = true;
+  }
+  BOOST_CHECK(threw);
+  BOOST_CHECK_EQUAL(bmr.getUsedMemory(), before);
+}
+
+BOOST_AUTO_TEST_CASE(upstream_throw_rolls_back_reservation)
+{
+  ThrowingResource upstream;
+  BoundedMemoryResource bmr(std::numeric_limits<size_t>::max(), &upstream);
+  const size_t bytes = 1024;
+  bool threw = false;
+  void* p{nullptr};
+  try {
+    p = bmr.allocate(bytes, alignof(std::max_align_t));
+  } catch (const std::bad_alloc&) {
+    threw = true;
+  }
+  BOOST_CHECK(threw);
+  BOOST_CHECK_EQUAL(p, nullptr);
+  BOOST_CHECK_EQUAL(bmr.getUsedMemory(), 0u);
+}
+
+BOOST_AUTO_TEST_CASE(vector_of_bounded_vectors_deep_clear_releases_all)
+{
+  BoundedMemoryResource bmr(10 * 1024 * 1024); // 10 MB
+  std::vector<Vec> outer;
+  outer.reserve(5);
+  for (int i = 0; i < 5; ++i) {
+    outer.emplace_back(std::pmr::polymorphic_allocator<int>{&bmr});
+    outer.back().reserve(100);
+  }
+  BOOST_CHECK_GT(bmr.getUsedMemory(), 0u);
+  deepVectorClear(outer, &bmr); // deep clear outer
+  BOOST_CHECK_EQUAL(bmr.getUsedMemory(), 0u);
+}
+
+BOOST_AUTO_TEST_CASE(array_of_bounded_vectors_clear_resize_works)
+{
+  BoundedMemoryResource bmr(10 * 1024 * 1024);
+  std::array<Vec, 3> arr{{Vec(std::pmr::polymorphic_allocator<int>{&bmr}),
+                          Vec(std::pmr::polymorphic_allocator<int>{&bmr}),
+                          Vec(std::pmr::polymorphic_allocator<int>{&bmr})}};
+  clearResizeBoundedVector(arr[0], 10, &bmr, 1);
+  clearResizeBoundedVector(arr[1], 20, &bmr, 2);
+  clearResizeBoundedVector(arr[2], 30, &bmr, 3);
+  BOOST_CHECK_GT(bmr.getUsedMemory(), 0u);
+  deepVectorClear(arr, &bmr); // now clear all recursively
+  BOOST_CHECK_EQUAL(bmr.getUsedMemory(), 0u);
+}
+
+BOOST_AUTO_TEST_CASE(deepVectorClear_releases_and_reuses_resource)
+{
+  // Use a small bounded memory resource
+  BoundedMemoryResource bmr(1024);
+  bounded_vector<int> vec{std::pmr::polymorphic_allocator<int>{&bmr}};
+  vec.resize(100, 42);
+  BOOST_TEST(bmr.getUsedMemory() > 0);
+  deepVectorClear(vec, &bmr);
+  BOOST_TEST(vec.empty());
+  BOOST_TEST(vec.get_allocator().resource() == &bmr);
+  auto usedAfter = bmr.getUsedMemory();
+  BOOST_CHECK_EQUAL(bmr.getUsedMemory(), 0);
+  vec.push_back(7);
+  BOOST_TEST(vec.size() == 1);
+  BOOST_TEST(vec[0] == 7);
+  BOOST_TEST(vec.get_allocator().resource() == &bmr);
+}
+
+BOOST_AUTO_TEST_CASE(clear_with_memory_resource_without_deallocator)
+{
+  NoDeallocateResource dmr;
+  Vec v(std::pmr::polymorphic_allocator<int>{&dmr});
+
+  for (int shift{0}; shift < 12; ++shift) {
+    const int c{1 << shift};
+    v.resize(100);
+    std::generate(v.begin(), v.end(), getRandomInt());
+    // allocate different sizes, which is actually a no-op now
+    clearResizeBoundedVector(v, c / 2, &dmr, 999);
+    for (size_t i{0}; i < c / 2; ++i) { // now only the first c/2 elements should be set
+      BOOST_CHECK_EQUAL(v[i], 999);
+    }
+    // try to deepclear
+    deepVectorClear(v);
+  }
+}

From 70eb6b49ce87d3092a00c6ea1cd34edc9e91f46e Mon Sep 17 00:00:00 2001
From: Felix Schlepper <felix.schlepper@cern.ch>
Date: Mon, 15 Sep 2025 13:05:45 +0200
Subject: [PATCH 2/3] ITS: GPU: use pinned framework memory

Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
---
 .../ITS/tracking/GPU/cuda/TimeFrameGPU.cu     | 142 ++++++++++--------
 .../tracking/GPU/cuda/TrackerTraitsGPU.cxx    |  14 +-
 .../include/ITStracking/BoundedAllocator.h    |   8 +-
 .../include/ITStracking/ExternalAllocator.h   |  32 ++++
 .../tracking/include/ITStracking/TimeFrame.h  |  37 +++--
 .../ITSMFT/ITS/tracking/src/TimeFrame.cxx     |  77 +++++-----
 .../ITS3/reconstruction/src/IOUtils.cxx       |   2 +-
 GPU/GPUTracking/Global/GPUChainITS.cxx        |  19 ++-
 GPU/GPUTracking/Global/GPUChainITS.h          |   9 +-
 9 files changed, 193 insertions(+), 147 deletions(-)

diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu
index 4da91522371f8..27bcf04746da5 100644
--- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu
+++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu
@@ -32,7 +32,7 @@ template <int nLayers>
 void TimeFrameGPU<nLayers>::allocMemAsync(void** ptr, size_t size, Stream& stream, bool extAllocator)
 {
   if (extAllocator) {
-    *ptr = this->mAllocator->allocate(size);
+    *ptr = this->mExtDeviceAllocator->allocate(size);
   } else {
     GPULog("Calling default CUDA allocator");
     GPUChkErrS(cudaMallocAsync(reinterpret_cast<void**>(ptr), size, stream.get()));
@@ -43,7 +43,7 @@ template <int nLayers>
 void TimeFrameGPU<nLayers>::allocMem(void** ptr, size_t size, bool extAllocator)
 {
   if (extAllocator) {
-    *ptr = this->mAllocator->allocate(size);
+    *ptr = this->mExtDeviceAllocator->allocate(size);
   } else {
     GPULog("Calling default CUDA allocator");
     GPUChkErrS(cudaMalloc(reinterpret_cast<void**>(ptr), size));
@@ -56,7 +56,7 @@ void TimeFrameGPU<nLayers>::loadIndexTableUtils(const int iteration)
   GPUTimer timer("loading indextable utils");
   if (!iteration) {
     GPULog("gpu-allocation: allocating IndexTableUtils buffer, for {:.2f} MB.", sizeof(IndexTableUtilsN) / constants::MB);
-    allocMem(reinterpret_cast<void**>(&mIndexTableUtilsDevice), sizeof(IndexTableUtilsN), this->getExtAllocator());
+    allocMem(reinterpret_cast<void**>(&mIndexTableUtilsDevice), sizeof(IndexTableUtilsN), this->hasExternalDeviceAllocator());
   }
   GPULog("gpu-transfer: loading IndexTableUtils object, for {:.2f} MB.", sizeof(IndexTableUtilsN) / constants::MB);
   GPUChkErrS(cudaMemcpy(mIndexTableUtilsDevice, &(this->mIndexTableUtils), sizeof(IndexTableUtilsN), cudaMemcpyHostToDevice));
@@ -67,12 +67,14 @@ void TimeFrameGPU<nLayers>::createUnsortedClustersDeviceArray(const int iteratio
 {
   if (!iteration) {
     GPUTimer timer("creating unsorted clusters array");
-    allocMem(reinterpret_cast<void**>(&mUnsortedClustersDeviceArray), nLayers * sizeof(Cluster*), this->getExtAllocator());
+    allocMem(reinterpret_cast<void**>(&mUnsortedClustersDeviceArray), nLayers * sizeof(Cluster*), this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaHostRegister(mUnsortedClustersDevice.data(), nLayers * sizeof(Cluster*), cudaHostRegisterPortable));
     mPinnedUnsortedClusters.set(nLayers);
-    for (auto iLayer{0}; iLayer < o2::gpu::CAMath::Min(maxLayers, nLayers); ++iLayer) {
-      GPUChkErrS(cudaHostRegister(this->mUnsortedClusters[iLayer].data(), this->mUnsortedClusters[iLayer].size() * sizeof(Cluster), cudaHostRegisterPortable));
-      mPinnedUnsortedClusters.set(iLayer);
+    if (!this->hasExternalDeviceAllocator()) {
+      for (auto iLayer{0}; iLayer < o2::gpu::CAMath::Min(maxLayers, nLayers); ++iLayer) {
+        GPUChkErrS(cudaHostRegister(this->mUnsortedClusters[iLayer].data(), this->mUnsortedClusters[iLayer].size() * sizeof(Cluster), cudaHostRegisterPortable));
+        mPinnedUnsortedClusters.set(iLayer);
+      }
     }
   }
 }
@@ -83,7 +85,7 @@ void TimeFrameGPU<nLayers>::loadUnsortedClustersDevice(const int iteration, cons
   if (!iteration) {
     GPUTimer timer(mGpuStreams[layer], "loading unsorted clusters", layer);
     GPULog("gpu-transfer: loading {} unsorted clusters on layer {}, for {:.2f} MB.", this->mUnsortedClusters[layer].size(), layer, this->mUnsortedClusters[layer].size() * sizeof(Cluster) / constants::MB);
-    allocMemAsync(reinterpret_cast<void**>(&mUnsortedClustersDevice[layer]), this->mUnsortedClusters[layer].size() * sizeof(Cluster), mGpuStreams[layer], this->getExtAllocator());
+    allocMemAsync(reinterpret_cast<void**>(&mUnsortedClustersDevice[layer]), this->mUnsortedClusters[layer].size() * sizeof(Cluster), mGpuStreams[layer], this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaMemcpyAsync(mUnsortedClustersDevice[layer], this->mUnsortedClusters[layer].data(), this->mUnsortedClusters[layer].size() * sizeof(Cluster), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
     GPUChkErrS(cudaMemcpyAsync(&mUnsortedClustersDeviceArray[layer], &mUnsortedClustersDevice[layer], sizeof(Cluster*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
   }
@@ -94,12 +96,14 @@ void TimeFrameGPU<nLayers>::createClustersDeviceArray(const int iteration, const
 {
   if (!iteration) {
     GPUTimer timer("creating sorted clusters array");
-    allocMem(reinterpret_cast<void**>(&mClustersDeviceArray), nLayers * sizeof(Cluster*), this->getExtAllocator());
+    allocMem(reinterpret_cast<void**>(&mClustersDeviceArray), nLayers * sizeof(Cluster*), this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaHostRegister(mClustersDevice.data(), nLayers * sizeof(Cluster*), cudaHostRegisterPortable));
     mPinnedClusters.set(nLayers);
-    for (auto iLayer{0}; iLayer < o2::gpu::CAMath::Min(maxLayers, nLayers); ++iLayer) {
-      GPUChkErrS(cudaHostRegister(this->mClusters[iLayer].data(), this->mClusters[iLayer].size() * sizeof(Cluster), cudaHostRegisterPortable));
-      mPinnedClusters.set(iLayer);
+    if (!this->hasExternalDeviceAllocator()) {
+      for (auto iLayer{0}; iLayer < o2::gpu::CAMath::Min(maxLayers, nLayers); ++iLayer) {
+        GPUChkErrS(cudaHostRegister(this->mClusters[iLayer].data(), this->mClusters[iLayer].size() * sizeof(Cluster), cudaHostRegisterPortable));
+        mPinnedClusters.set(iLayer);
+      }
     }
   }
 }
@@ -110,7 +114,7 @@ void TimeFrameGPU<nLayers>::loadClustersDevice(const int iteration, const int la
   if (!iteration) {
     GPUTimer timer(mGpuStreams[layer], "loading sorted clusters", layer);
     GPULog("gpu-transfer: loading {} clusters on layer {}, for {:.2f} MB.", this->mClusters[layer].size(), layer, this->mClusters[layer].size() * sizeof(Cluster) / constants::MB);
-    allocMemAsync(reinterpret_cast<void**>(&mClustersDevice[layer]), this->mClusters[layer].size() * sizeof(Cluster), mGpuStreams[layer], this->getExtAllocator());
+    allocMemAsync(reinterpret_cast<void**>(&mClustersDevice[layer]), this->mClusters[layer].size() * sizeof(Cluster), mGpuStreams[layer], this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaMemcpyAsync(mClustersDevice[layer], this->mClusters[layer].data(), this->mClusters[layer].size() * sizeof(Cluster), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
     GPUChkErrS(cudaMemcpyAsync(&mClustersDeviceArray[layer], &mClustersDevice[layer], sizeof(Cluster*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
   }
@@ -121,12 +125,14 @@ void TimeFrameGPU<nLayers>::createClustersIndexTablesArray(const int iteration)
 {
   if (!iteration) {
     GPUTimer timer("creating clustersindextable array");
-    allocMem(reinterpret_cast<void**>(&mClustersIndexTablesDeviceArray), nLayers * sizeof(int*), this->getExtAllocator());
+    allocMem(reinterpret_cast<void**>(&mClustersIndexTablesDeviceArray), nLayers * sizeof(int*), this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaHostRegister(mClustersIndexTablesDevice.data(), nLayers * sizeof(int*), cudaHostRegisterPortable));
     mPinnedClustersIndexTables.set(nLayers);
-    for (auto iLayer{0}; iLayer < nLayers; ++iLayer) {
-      GPUChkErrS(cudaHostRegister(this->mIndexTables[iLayer].data(), this->mIndexTables[iLayer].size() * sizeof(int), cudaHostRegisterPortable));
-      mPinnedClustersIndexTables.set(iLayer);
+    if (!this->hasExternalDeviceAllocator()) {
+      for (auto iLayer{0}; iLayer < nLayers; ++iLayer) {
+        GPUChkErrS(cudaHostRegister(this->mIndexTables[iLayer].data(), this->mIndexTables[iLayer].size() * sizeof(int), cudaHostRegisterPortable));
+        mPinnedClustersIndexTables.set(iLayer);
+      }
     }
   }
 }
@@ -137,7 +143,7 @@ void TimeFrameGPU<nLayers>::loadClustersIndexTables(const int iteration, const i
   if (!iteration) {
     GPUTimer timer(mGpuStreams[layer], "loading sorted clusters", layer);
     GPULog("gpu-transfer: loading clusters indextable for layer {} with {} elements, for {:.2f} MB.", layer, this->mIndexTables[layer].size(), this->mIndexTables[layer].size() * sizeof(int) / constants::MB);
-    allocMemAsync(reinterpret_cast<void**>(&mClustersIndexTablesDevice[layer]), this->mIndexTables[layer].size() * sizeof(int), mGpuStreams[layer], this->getExtAllocator());
+    allocMemAsync(reinterpret_cast<void**>(&mClustersIndexTablesDevice[layer]), this->mIndexTables[layer].size() * sizeof(int), mGpuStreams[layer], this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaMemcpyAsync(mClustersIndexTablesDevice[layer], this->mIndexTables[layer].data(), this->mIndexTables[layer].size() * sizeof(int), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
     GPUChkErrS(cudaMemcpyAsync(&mClustersIndexTablesDeviceArray[layer], &mClustersIndexTablesDevice[layer], sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
   }
@@ -148,12 +154,14 @@ void TimeFrameGPU<nLayers>::createUsedClustersDeviceArray(const int iteration, c
 {
   if (!iteration) {
     GPUTimer timer("creating used clusters flags");
-    allocMem(reinterpret_cast<void**>(&mUsedClustersDeviceArray), nLayers * sizeof(uint8_t*), this->getExtAllocator());
+    allocMem(reinterpret_cast<void**>(&mUsedClustersDeviceArray), nLayers * sizeof(uint8_t*), this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaHostRegister(mUsedClustersDevice.data(), nLayers * sizeof(uint8_t*), cudaHostRegisterPortable));
     mPinnedUsedClusters.set(nLayers);
-    for (auto iLayer{0}; iLayer < o2::gpu::CAMath::Min(maxLayers, nLayers); ++iLayer) {
-      GPUChkErrS(cudaHostRegister(this->mUsedClusters[iLayer].data(), this->mUsedClusters[iLayer].size() * sizeof(uint8_t), cudaHostRegisterPortable));
-      mPinnedUsedClusters.set(iLayer);
+    if (!this->hasExternalDeviceAllocator()) {
+      for (auto iLayer{0}; iLayer < o2::gpu::CAMath::Min(maxLayers, nLayers); ++iLayer) {
+        GPUChkErrS(cudaHostRegister(this->mUsedClusters[iLayer].data(), this->mUsedClusters[iLayer].size() * sizeof(uint8_t), cudaHostRegisterPortable));
+        mPinnedUsedClusters.set(iLayer);
+      }
     }
   }
 }
@@ -164,7 +172,7 @@ void TimeFrameGPU<nLayers>::createUsedClustersDevice(const int iteration, const
   if (!iteration) {
     GPUTimer timer(mGpuStreams[layer], "creating used clusters flags", layer);
     GPULog("gpu-transfer: creating {} used clusters flags on layer {}, for {:.2f} MB.", this->mUsedClusters[layer].size(), layer, this->mUsedClusters[layer].size() * sizeof(unsigned char) / constants::MB);
-    allocMemAsync(reinterpret_cast<void**>(&mUsedClustersDevice[layer]), this->mUsedClusters[layer].size() * sizeof(unsigned char), mGpuStreams[layer], this->getExtAllocator());
+    allocMemAsync(reinterpret_cast<void**>(&mUsedClustersDevice[layer]), this->mUsedClusters[layer].size() * sizeof(unsigned char), mGpuStreams[layer], this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaMemsetAsync(mUsedClustersDevice[layer], 0, this->mUsedClusters[layer].size() * sizeof(unsigned char), mGpuStreams[layer].get()));
     GPUChkErrS(cudaMemcpyAsync(&mUsedClustersDeviceArray[layer], &mUsedClustersDevice[layer], sizeof(unsigned char*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
   }
@@ -185,12 +193,14 @@ void TimeFrameGPU<nLayers>::createROFrameClustersDeviceArray(const int iteration
 {
   if (!iteration) {
     GPUTimer timer("creating ROFrame clusters array");
-    allocMem(reinterpret_cast<void**>(&mROFramesClustersDeviceArray), nLayers * sizeof(int*), this->getExtAllocator());
+    allocMem(reinterpret_cast<void**>(&mROFramesClustersDeviceArray), nLayers * sizeof(int*), this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaHostRegister(mROFramesClustersDevice.data(), nLayers * sizeof(int*), cudaHostRegisterPortable));
     mPinnedROFramesClusters.set(nLayers);
-    for (auto iLayer{0}; iLayer < nLayers; ++iLayer) {
-      GPUChkErrS(cudaHostRegister(this->mROFramesClusters[iLayer].data(), this->mROFramesClusters[iLayer].size() * sizeof(int), cudaHostRegisterPortable));
-      mPinnedROFramesClusters.set(iLayer);
+    if (!this->hasExternalDeviceAllocator()) {
+      for (auto iLayer{0}; iLayer < nLayers; ++iLayer) {
+        GPUChkErrS(cudaHostRegister(this->mROFramesClusters[iLayer].data(), this->mROFramesClusters[iLayer].size() * sizeof(int), cudaHostRegisterPortable));
+        mPinnedROFramesClusters.set(iLayer);
+      }
     }
   }
 }
@@ -201,7 +211,7 @@ void TimeFrameGPU<nLayers>::loadROFrameClustersDevice(const int iteration, const
   if (!iteration) {
     GPUTimer timer(mGpuStreams[layer], "loading ROframe clusters", layer);
     GPULog("gpu-transfer: loading {} ROframe clusters info on layer {}, for {:.2f} MB.", this->mROFramesClusters[layer].size(), layer, this->mROFramesClusters[layer].size() * sizeof(int) / constants::MB);
-    allocMemAsync(reinterpret_cast<void**>(&mROFramesClustersDevice[layer]), this->mROFramesClusters[layer].size() * sizeof(int), mGpuStreams[layer], this->getExtAllocator());
+    allocMemAsync(reinterpret_cast<void**>(&mROFramesClustersDevice[layer]), this->mROFramesClusters[layer].size() * sizeof(int), mGpuStreams[layer], this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaMemcpyAsync(mROFramesClustersDevice[layer], this->mROFramesClusters[layer].data(), this->mROFramesClusters[layer].size() * sizeof(int), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
     GPUChkErrS(cudaMemcpyAsync(&mROFramesClustersDeviceArray[layer], &mROFramesClustersDevice[layer], sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
   }
@@ -212,12 +222,14 @@ void TimeFrameGPU<nLayers>::createTrackingFrameInfoDeviceArray(const int iterati
 {
   if (!iteration) {
     GPUTimer timer("creating trackingframeinfo array");
-    allocMem(reinterpret_cast<void**>(&mTrackingFrameInfoDeviceArray), nLayers * sizeof(TrackingFrameInfo*), this->getExtAllocator());
+    allocMem(reinterpret_cast<void**>(&mTrackingFrameInfoDeviceArray), nLayers * sizeof(TrackingFrameInfo*), this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaHostRegister(mTrackingFrameInfoDevice.data(), nLayers * sizeof(TrackingFrameInfo*), cudaHostRegisterPortable));
     mPinnedTrackingFrameInfo.set(nLayers);
-    for (auto iLayer{0}; iLayer < nLayers; ++iLayer) {
-      GPUChkErrS(cudaHostRegister(this->mTrackingFrameInfo[iLayer].data(), this->mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo), cudaHostRegisterPortable));
-      mPinnedTrackingFrameInfo.set(iLayer);
+    if (!this->hasExternalDeviceAllocator()) {
+      for (auto iLayer{0}; iLayer < nLayers; ++iLayer) {
+        GPUChkErrS(cudaHostRegister(this->mTrackingFrameInfo[iLayer].data(), this->mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo), cudaHostRegisterPortable));
+        mPinnedTrackingFrameInfo.set(iLayer);
+      }
     }
   }
 }
@@ -228,7 +240,7 @@ void TimeFrameGPU<nLayers>::loadTrackingFrameInfoDevice(const int iteration, con
   if (!iteration) {
     GPUTimer timer(mGpuStreams[layer], "loading trackingframeinfo", layer);
     GPULog("gpu-transfer: loading {} tfinfo on layer {}, for {:.2f} MB.", this->mTrackingFrameInfo[layer].size(), layer, this->mTrackingFrameInfo[layer].size() * sizeof(TrackingFrameInfo) / constants::MB);
-    allocMemAsync(reinterpret_cast<void**>(&mTrackingFrameInfoDevice[layer]), this->mTrackingFrameInfo[layer].size() * sizeof(TrackingFrameInfo), mGpuStreams[layer], this->getExtAllocator());
+    allocMemAsync(reinterpret_cast<void**>(&mTrackingFrameInfoDevice[layer]), this->mTrackingFrameInfo[layer].size() * sizeof(TrackingFrameInfo), mGpuStreams[layer], this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaMemcpyAsync(mTrackingFrameInfoDevice[layer], this->mTrackingFrameInfo[layer].data(), this->mTrackingFrameInfo[layer].size() * sizeof(TrackingFrameInfo), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
     GPUChkErrS(cudaMemcpyAsync(&mTrackingFrameInfoDeviceArray[layer], &mTrackingFrameInfoDevice[layer], sizeof(TrackingFrameInfo*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
   }
@@ -241,7 +253,7 @@ void TimeFrameGPU<nLayers>::loadMultiplicityCutMask(const int iteration)
     GPUTimer timer("loading multiplicity cut mask");
     GPULog("gpu-transfer: iteration {} loading multiplicity cut mask with {} elements, for {:.2f} MB.", iteration, this->mMultiplicityCutMask.size(), this->mMultiplicityCutMask.size() * sizeof(uint8_t) / constants::MB);
     if (!iteration) { // only allocate on first call
-      allocMem(reinterpret_cast<void**>(&mMultMaskDevice), this->mMultiplicityCutMask.size() * sizeof(uint8_t), this->getExtAllocator());
+      allocMem(reinterpret_cast<void**>(&mMultMaskDevice), this->mMultiplicityCutMask.size() * sizeof(uint8_t), this->hasExternalDeviceAllocator());
     }
     GPUChkErrS(cudaMemcpy(mMultMaskDevice, this->mMultiplicityCutMask.data(), this->mMultiplicityCutMask.size() * sizeof(uint8_t), cudaMemcpyHostToDevice));
   }
@@ -253,10 +265,10 @@ void TimeFrameGPU<nLayers>::loadVertices(const int iteration)
   if (!iteration) {
     GPUTimer timer("loading seeding vertices");
     GPULog("gpu-transfer: loading {} ROframes vertices, for {:.2f} MB.", this->mROFramesPV.size(), this->mROFramesPV.size() * sizeof(int) / constants::MB);
-    allocMem(reinterpret_cast<void**>(&mROFramesPVDevice), this->mROFramesPV.size() * sizeof(int), this->getExtAllocator());
+    allocMem(reinterpret_cast<void**>(&mROFramesPVDevice), this->mROFramesPV.size() * sizeof(int), this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaMemcpy(mROFramesPVDevice, this->mROFramesPV.data(), this->mROFramesPV.size() * sizeof(int), cudaMemcpyHostToDevice));
     GPULog("gpu-transfer: loading {} seeding vertices, for {:.2f} MB.", this->mPrimaryVertices.size(), this->mPrimaryVertices.size() * sizeof(Vertex) / constants::MB);
-    allocMem(reinterpret_cast<void**>(&mPrimaryVerticesDevice), this->mPrimaryVertices.size() * sizeof(Vertex), this->getExtAllocator());
+    allocMem(reinterpret_cast<void**>(&mPrimaryVerticesDevice), this->mPrimaryVertices.size() * sizeof(Vertex), this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaMemcpy(mPrimaryVerticesDevice, this->mPrimaryVertices.data(), this->mPrimaryVertices.size() * sizeof(Vertex), cudaMemcpyHostToDevice));
   }
 }
@@ -265,7 +277,7 @@ template <int nLayers>
 void TimeFrameGPU<nLayers>::createTrackletsLUTDeviceArray(const int iteration)
 {
   if (!iteration) {
-    allocMem(reinterpret_cast<void**>(&mTrackletsLUTDeviceArray), (nLayers - 1) * sizeof(int*), this->getExtAllocator());
+    allocMem(reinterpret_cast<void**>(&mTrackletsLUTDeviceArray), (nLayers - 1) * sizeof(int*), this->hasExternalDeviceAllocator());
   }
 }
 
@@ -276,7 +288,7 @@ void TimeFrameGPU<nLayers>::createTrackletsLUTDevice(const int iteration, const
   const int ncls = this->mClusters[layer].size() + 1;
   if (!iteration) {
     GPULog("gpu-allocation: creating tracklets LUT for {} elements on layer {}, for {:.2f} MB.", ncls, layer, ncls * sizeof(int) / constants::MB);
-    allocMemAsync(reinterpret_cast<void**>(&mTrackletsLUTDevice[layer]), ncls * sizeof(int), mGpuStreams[layer], this->getExtAllocator());
+    allocMemAsync(reinterpret_cast<void**>(&mTrackletsLUTDevice[layer]), ncls * sizeof(int), mGpuStreams[layer], this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaMemcpyAsync(&mTrackletsLUTDeviceArray[layer], &mTrackletsLUTDevice[layer], sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
   }
   GPUChkErrS(cudaMemsetAsync(mTrackletsLUTDevice[layer], 0, ncls * sizeof(int), mGpuStreams[layer].get()));
@@ -287,7 +299,7 @@ void TimeFrameGPU<nLayers>::createTrackletsBuffersArray(const int iteration)
 {
   if (!iteration) {
     GPUTimer timer("creating tracklet buffers array");
-    allocMem(reinterpret_cast<void**>(&mTrackletsDeviceArray), (nLayers - 1) * sizeof(Tracklet*), this->getExtAllocator());
+    allocMem(reinterpret_cast<void**>(&mTrackletsDeviceArray), (nLayers - 1) * sizeof(Tracklet*), this->hasExternalDeviceAllocator());
   }
 }
 
@@ -299,7 +311,7 @@ void TimeFrameGPU<nLayers>::createTrackletsBuffers(const int layer)
   GPUChkErrS(cudaMemcpyAsync(&mNTracklets[layer], mTrackletsLUTDevice[layer] + this->mClusters[layer].size(), sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get()));
   mGpuStreams[layer].sync(); // ensure number of tracklets is correct
   GPULog("gpu-transfer: creating tracklets buffer for {} elements on layer {}, for {:.2f} MB.", mNTracklets[layer], layer, mNTracklets[layer] * sizeof(Tracklet) / constants::MB);
-  allocMemAsync(reinterpret_cast<void**>(&mTrackletsDevice[layer]), mNTracklets[layer] * sizeof(Tracklet), mGpuStreams[layer], this->getExtAllocator());
+  allocMemAsync(reinterpret_cast<void**>(&mTrackletsDevice[layer]), mNTracklets[layer] * sizeof(Tracklet), mGpuStreams[layer], this->hasExternalDeviceAllocator());
   GPUChkErrS(cudaMemcpyAsync(&mTrackletsDeviceArray[layer], &mTrackletsDevice[layer], sizeof(Tracklet*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
 }
 
@@ -331,7 +343,7 @@ void TimeFrameGPU<nLayers>::createNeighboursIndexTablesDevice(const int layer)
 {
   GPUTimer timer(mGpuStreams[layer], "creating cells neighbours", layer);
   GPULog("gpu-transfer: reserving neighbours LUT for {} elements on layer {}, for {:.2f} MB.", mNCells[layer] + 1, layer, (mNCells[layer] + 1) * sizeof(int) / constants::MB);
-  allocMemAsync(reinterpret_cast<void**>(&mNeighboursIndexTablesDevice[layer]), (mNCells[layer] + 1) * sizeof(int), mGpuStreams[layer], this->getExtAllocator());
+  allocMemAsync(reinterpret_cast<void**>(&mNeighboursIndexTablesDevice[layer]), (mNCells[layer] + 1) * sizeof(int), mGpuStreams[layer], this->hasExternalDeviceAllocator());
   GPUChkErrS(cudaMemsetAsync(mNeighboursIndexTablesDevice[layer], 0, (mNCells[layer] + 1) * sizeof(int), mGpuStreams[layer].get()));
 }
 
@@ -340,7 +352,7 @@ void TimeFrameGPU<nLayers>::createNeighboursLUTDevice(const int layer, const uns
 {
   GPUTimer timer(mGpuStreams[layer], "reserving neighboursLUT");
   GPULog("gpu-allocation: reserving neighbours LUT for {} elements on layer {} , for {:.2f} MB.", nCells + 1, layer, (nCells + 1) * sizeof(int) / constants::MB);
-  allocMemAsync(reinterpret_cast<void**>(&mNeighboursLUTDevice[layer]), (nCells + 1) * sizeof(int), mGpuStreams[layer], this->getExtAllocator()); // We need one element more to move exc -> inc
+  allocMemAsync(reinterpret_cast<void**>(&mNeighboursLUTDevice[layer]), (nCells + 1) * sizeof(int), mGpuStreams[layer], this->hasExternalDeviceAllocator()); // We need one element more to move exc -> inc
   GPUChkErrS(cudaMemsetAsync(mNeighboursLUTDevice[layer], 0, (nCells + 1) * sizeof(int), mGpuStreams[layer].get()));
 }
 
@@ -350,8 +362,8 @@ void TimeFrameGPU<nLayers>::loadCellsDevice()
   GPUTimer timer(mGpuStreams, "loading cell seeds", nLayers - 2);
   for (auto iLayer{0}; iLayer < nLayers - 2; ++iLayer) {
     GPULog("gpu-transfer: loading {} cell seeds on layer {}, for {:.2f} MB.", this->mCells[iLayer].size(), iLayer, this->mCells[iLayer].size() * sizeof(CellSeedN) / constants::MB);
-    allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[iLayer]), this->mCells[iLayer].size() * sizeof(CellSeedN), mGpuStreams[iLayer], this->getExtAllocator());
-    allocMemAsync(reinterpret_cast<void**>(&mNeighboursIndexTablesDevice[iLayer]), (this->mCells[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer], this->getExtAllocator()); // accessory for the neigh. finding.
+    allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[iLayer]), this->mCells[iLayer].size() * sizeof(CellSeedN), mGpuStreams[iLayer], this->hasExternalDeviceAllocator());
+    allocMemAsync(reinterpret_cast<void**>(&mNeighboursIndexTablesDevice[iLayer]), (this->mCells[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer], this->hasExternalDeviceAllocator()); // accessory for the neigh. finding.
     GPUChkErrS(cudaMemsetAsync(mNeighboursIndexTablesDevice[iLayer], 0, (this->mCells[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer].get()));
     GPUChkErrS(cudaMemcpyAsync(mCellsDevice[iLayer], this->mCells[iLayer].data(), this->mCells[iLayer].size() * sizeof(CellSeedN), cudaMemcpyHostToDevice, mGpuStreams[iLayer].get()));
   }
@@ -362,7 +374,7 @@ void TimeFrameGPU<nLayers>::createCellsLUTDeviceArray(const int iteration)
 {
   if (!iteration) {
     GPUTimer timer("creating cells LUTs array");
-    allocMem(reinterpret_cast<void**>(&mCellsLUTDeviceArray), (nLayers - 2) * sizeof(int*), this->getExtAllocator());
+    allocMem(reinterpret_cast<void**>(&mCellsLUTDeviceArray), (nLayers - 2) * sizeof(int*), this->hasExternalDeviceAllocator());
   }
 }
 
@@ -371,7 +383,7 @@ void TimeFrameGPU<nLayers>::createCellsLUTDevice(const int layer)
 {
   GPUTimer timer(mGpuStreams[layer], "creating cells LUTs", layer);
   GPULog("gpu-transfer: creating cell LUT for {} elements on layer {}, for {:.2f} MB.", mNTracklets[layer] + 1, layer, (mNTracklets[layer] + 1) * sizeof(int) / constants::MB);
-  allocMemAsync(reinterpret_cast<void**>(&mCellsLUTDevice[layer]), (mNTracklets[layer] + 1) * sizeof(int), mGpuStreams[layer], this->getExtAllocator());
+  allocMemAsync(reinterpret_cast<void**>(&mCellsLUTDevice[layer]), (mNTracklets[layer] + 1) * sizeof(int), mGpuStreams[layer], this->hasExternalDeviceAllocator());
   GPUChkErrS(cudaMemsetAsync(mCellsLUTDevice[layer], 0, (mNTracklets[layer] + 1) * sizeof(int), mGpuStreams[layer].get()));
   GPUChkErrS(cudaMemcpyAsync(&mCellsLUTDeviceArray[layer], &mCellsLUTDevice[layer], sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
 }
@@ -381,7 +393,7 @@ void TimeFrameGPU<nLayers>::createCellsBuffersArray(const int iteration)
 {
   if (!iteration) {
     GPUTimer timer("creating cells buffers array");
-    allocMem(reinterpret_cast<void**>(&mCellsDeviceArray), (nLayers - 2) * sizeof(CellSeedN*), this->getExtAllocator());
+    allocMem(reinterpret_cast<void**>(&mCellsDeviceArray), (nLayers - 2) * sizeof(CellSeedN*), this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaMemcpy(mCellsDeviceArray, mCellsDevice.data(), mCellsDevice.size() * sizeof(CellSeedN*), cudaMemcpyHostToDevice));
   }
 }
@@ -394,7 +406,7 @@ void TimeFrameGPU<nLayers>::createCellsBuffers(const int layer)
   GPUChkErrS(cudaMemcpyAsync(&mNCells[layer], mCellsLUTDevice[layer] + mNTracklets[layer], sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get()));
   mGpuStreams[layer].sync(); // ensure number of cells is correct
   GPULog("gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeedN) / constants::MB);
-  allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeedN), mGpuStreams[layer], this->getExtAllocator());
+  allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeedN), mGpuStreams[layer], this->hasExternalDeviceAllocator());
   GPUChkErrS(cudaMemcpyAsync(&mCellsDeviceArray[layer], &mCellsDevice[layer], sizeof(CellSeedN*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
 }
 
@@ -414,7 +426,7 @@ void TimeFrameGPU<nLayers>::loadRoadsDevice()
 {
   GPUTimer timer("loading roads device");
   GPULog("gpu-transfer: loading {} roads, for {:.2f} MB.", this->mRoads.size(), this->mRoads.size() * sizeof(Road<nLayers - 2>) / constants::MB);
-  allocMem(reinterpret_cast<void**>(&mRoadsDevice), this->mRoads.size() * sizeof(Road<nLayers - 2>), this->getExtAllocator());
+  allocMem(reinterpret_cast<void**>(&mRoadsDevice), this->mRoads.size() * sizeof(Road<nLayers - 2>), this->hasExternalDeviceAllocator());
   GPUChkErrS(cudaHostRegister(this->mRoads.data(), this->mRoads.size() * sizeof(Road<nLayers - 2>), cudaHostRegisterPortable));
   GPUChkErrS(cudaMemcpy(mRoadsDevice, this->mRoads.data(), this->mRoads.size() * sizeof(Road<nLayers - 2>), cudaMemcpyHostToDevice));
 }
@@ -424,7 +436,7 @@ void TimeFrameGPU<nLayers>::loadTrackSeedsDevice(bounded_vector<CellSeedN>& seed
 {
   GPUTimer timer("loading track seeds");
   GPULog("gpu-transfer: loading {} track seeds, for {:.2f} MB.", seeds.size(), seeds.size() * sizeof(CellSeedN) / constants::MB);
-  allocMem(reinterpret_cast<void**>(&mTrackSeedsDevice), seeds.size() * sizeof(CellSeedN), this->getExtAllocator());
+  allocMem(reinterpret_cast<void**>(&mTrackSeedsDevice), seeds.size() * sizeof(CellSeedN), this->hasExternalDeviceAllocator());
   GPUChkErrS(cudaHostRegister(seeds.data(), seeds.size() * sizeof(CellSeedN), cudaHostRegisterPortable));
   GPUChkErrS(cudaMemcpy(mTrackSeedsDevice, seeds.data(), seeds.size() * sizeof(CellSeedN), cudaMemcpyHostToDevice));
 }
@@ -437,10 +449,10 @@ void TimeFrameGPU<nLayers>::createNeighboursDevice(const unsigned int layer)
   GPUChkErrS(cudaMemcpyAsync(&(this->mNNeighbours[layer]), &(mNeighboursLUTDevice[layer][this->mNCells[layer + 1] - 1]), sizeof(unsigned int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get()));
   mGpuStreams[layer].sync(); // ensure number of neighbours is correct
   GPULog("gpu-allocation: reserving {} neighbours (pairs), for {:.2f} MB.", this->mNNeighbours[layer], (this->mNNeighbours[layer]) * sizeof(gpuPair<int, int>) / constants::MB);
-  allocMemAsync(reinterpret_cast<void**>(&mNeighbourPairsDevice[layer]), (this->mNNeighbours[layer]) * sizeof(gpuPair<int, int>), mGpuStreams[layer], this->getExtAllocator());
+  allocMemAsync(reinterpret_cast<void**>(&mNeighbourPairsDevice[layer]), (this->mNNeighbours[layer]) * sizeof(gpuPair<int, int>), mGpuStreams[layer], this->hasExternalDeviceAllocator());
   GPUChkErrS(cudaMemsetAsync(mNeighbourPairsDevice[layer], -1, (this->mNNeighbours[layer]) * sizeof(gpuPair<int, int>), mGpuStreams[layer].get()));
   GPULog("gpu-allocation: reserving {} neighbours, for {:.2f} MB.", this->mNNeighbours[layer], (this->mNNeighbours[layer]) * sizeof(gpuPair<int, int>) / constants::MB);
-  allocMemAsync(reinterpret_cast<void**>(&mNeighboursDevice[layer]), (this->mNNeighbours[layer]) * sizeof(int), mGpuStreams[layer], this->getExtAllocator());
+  allocMemAsync(reinterpret_cast<void**>(&mNeighboursDevice[layer]), (this->mNNeighbours[layer]) * sizeof(int), mGpuStreams[layer], this->hasExternalDeviceAllocator());
 }
 
 template <int nLayers>
@@ -449,7 +461,7 @@ void TimeFrameGPU<nLayers>::createTrackITSExtDevice(bounded_vector<CellSeedN>& s
   GPUTimer timer("reserving tracks");
   mTrackITSExt = bounded_vector<TrackITSExt>(seeds.size(), {}, this->getMemoryPool().get());
   GPULog("gpu-allocation: reserving {} tracks, for {:.2f} MB.", seeds.size(), seeds.size() * sizeof(o2::its::TrackITSExt) / constants::MB);
-  allocMem(reinterpret_cast<void**>(&mTrackITSExtDevice), seeds.size() * sizeof(o2::its::TrackITSExt), this->getExtAllocator());
+  allocMem(reinterpret_cast<void**>(&mTrackITSExtDevice), seeds.size() * sizeof(o2::its::TrackITSExt), this->hasExternalDeviceAllocator());
   GPUChkErrS(cudaMemset(mTrackITSExtDevice, 0, seeds.size() * sizeof(o2::its::TrackITSExt)));
   GPUChkErrS(cudaHostRegister(mTrackITSExt.data(), seeds.size() * sizeof(o2::its::TrackITSExt), cudaHostRegisterPortable));
 }
@@ -462,13 +474,13 @@ void TimeFrameGPU<nLayers>::createVtxTrackletsLUTDevice(const int32_t iteration)
   for (int32_t iMode{0}; iMode < 2; ++iMode) {
     if (!iteration) {
       GPULog("gpu-transfer: creating vertexer tracklets per cluster for {} elements for mode {}, for {:.2f} MB.", ncls, iMode, ncls * sizeof(int32_t) / constants::MB);
-      allocMemAsync(reinterpret_cast<void**>(&mNTrackletsPerClusterDevice[iMode]), ncls * sizeof(int32_t), mGpuStreams[iMode], this->getExtAllocator());
+      allocMemAsync(reinterpret_cast<void**>(&mNTrackletsPerClusterDevice[iMode]), ncls * sizeof(int32_t), mGpuStreams[iMode], this->hasExternalDeviceAllocator());
 
       GPULog("gpu-transfer: creating vertexer tracklets per cluster sum for {} elements for mode {}, for {:.2f} MB.", ncls + 1, iMode, (ncls + 1) * sizeof(int32_t) / constants::MB);
-      allocMemAsync(reinterpret_cast<void**>(&mNTrackletsPerClusterSumDevice[iMode]), (ncls + 1) * sizeof(int32_t), mGpuStreams[iMode], this->getExtAllocator());
+      allocMemAsync(reinterpret_cast<void**>(&mNTrackletsPerClusterSumDevice[iMode]), (ncls + 1) * sizeof(int32_t), mGpuStreams[iMode], this->hasExternalDeviceAllocator());
 
       GPULog("gpu-transfer: creating vertexer tracklets per ROF for {} elements for mode {}, for {:.2f} MB.", this->mNrof + 1, iMode, (this->mNrof + 1) * sizeof(int32_t) / constants::MB);
-      allocMemAsync(reinterpret_cast<void**>(&mNTrackletsPerROFDevice[iMode]), (this->mNrof + 1) * sizeof(int32_t), mGpuStreams[iMode], this->getExtAllocator());
+      allocMemAsync(reinterpret_cast<void**>(&mNTrackletsPerROFDevice[iMode]), (this->mNrof + 1) * sizeof(int32_t), mGpuStreams[iMode], this->hasExternalDeviceAllocator());
     }
     GPUChkErrS(cudaMemsetAsync(mNTrackletsPerClusterDevice[iMode], 0, ncls * sizeof(int32_t), mGpuStreams[iMode].get()));
     GPUChkErrS(cudaMemsetAsync(mNTrackletsPerClusterSumDevice[iMode], 0, (ncls + 1) * sizeof(int32_t), mGpuStreams[iMode].get()));
@@ -477,13 +489,13 @@ void TimeFrameGPU<nLayers>::createVtxTrackletsLUTDevice(const int32_t iteration)
   mGpuStreams[0].sync();
   mGpuStreams[1].sync();
   if (!iteration) {
-    allocMem(reinterpret_cast<void**>(&mNTrackletsPerClusterDeviceArray), mNTrackletsPerClusterDevice.size() * sizeof(int32_t*), this->getExtAllocator());
+    allocMem(reinterpret_cast<void**>(&mNTrackletsPerClusterDeviceArray), mNTrackletsPerClusterDevice.size() * sizeof(int32_t*), this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaMemcpy(mNTrackletsPerClusterDeviceArray, mNTrackletsPerClusterDevice.data(), mNTrackletsPerClusterDevice.size() * sizeof(int32_t*), cudaMemcpyHostToDevice));
 
-    allocMem(reinterpret_cast<void**>(&mNTrackletsPerClusterSumDeviceArray), mNTrackletsPerClusterSumDevice.size() * sizeof(int32_t*), this->getExtAllocator());
+    allocMem(reinterpret_cast<void**>(&mNTrackletsPerClusterSumDeviceArray), mNTrackletsPerClusterSumDevice.size() * sizeof(int32_t*), this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaMemcpy(mNTrackletsPerClusterSumDeviceArray, mNTrackletsPerClusterSumDevice.data(), mNTrackletsPerClusterSumDevice.size() * sizeof(int32_t*), cudaMemcpyHostToDevice));
 
-    allocMem(reinterpret_cast<void**>(&mNTrackletsPerROFDeviceArray), mNTrackletsPerROFDevice.size() * sizeof(int32_t*), this->getExtAllocator());
+    allocMem(reinterpret_cast<void**>(&mNTrackletsPerROFDeviceArray), mNTrackletsPerROFDevice.size() * sizeof(int32_t*), this->hasExternalDeviceAllocator());
     GPUChkErrS(cudaMemcpy(mNTrackletsPerROFDeviceArray, mNTrackletsPerROFDevice.data(), mNTrackletsPerROFDevice.size() * sizeof(int32_t*), cudaMemcpyHostToDevice));
   }
 }
@@ -496,11 +508,11 @@ void TimeFrameGPU<nLayers>::createVtxTrackletsBuffers(const int32_t iteration)
     this->mTotalTracklets[iMode] = 0;
     GPUChkErrS(cudaMemcpyAsync(&(this->mTotalTracklets[iMode]), mNTrackletsPerClusterSumDevice[iMode] + this->mClusters[1].size(), sizeof(int32_t), cudaMemcpyDeviceToHost, mGpuStreams[iMode].get()));
     GPULog("gpu-transfer: creating vertexer tracklets buffer for {} elements on layer {}, for {:.2f} MB.", this->mTotalTracklets[iMode], iMode, this->mTotalTracklets[iMode] * sizeof(Tracklet) / constants::MB);
-    allocMemAsync(reinterpret_cast<void**>(&mTrackletsDevice[iMode]), this->mTotalTracklets[iMode] * sizeof(Tracklet), mGpuStreams[iMode], this->getExtAllocator());
+    allocMemAsync(reinterpret_cast<void**>(&mTrackletsDevice[iMode]), this->mTotalTracklets[iMode] * sizeof(Tracklet), mGpuStreams[iMode], this->hasExternalDeviceAllocator());
   }
   mGpuStreams[0].sync();
   mGpuStreams[1].sync();
-  allocMem(reinterpret_cast<void**>(&mTrackletsDeviceArray), 2 * sizeof(Tracklet*), this->getExtAllocator());
+  allocMem(reinterpret_cast<void**>(&mTrackletsDeviceArray), 2 * sizeof(Tracklet*), this->hasExternalDeviceAllocator());
   GPUChkErrS(cudaHostRegister(mTrackletsDevice.data(), 2 * sizeof(Tracklet*), cudaHostRegisterPortable));
   GPUChkErrS(cudaMemcpy(mTrackletsDeviceArray, mTrackletsDevice.data(), 2 * sizeof(Tracklet*), cudaMemcpyHostToDevice));
 }
@@ -512,14 +524,14 @@ void TimeFrameGPU<nLayers>::createVtxLinesLUTDevice(const int32_t iteration)
   const int32_t ncls = this->mClusters[1].size();
 
   GPULog("gpu-transfer: creating vertexer lines per cluster for {} elements , for {:.2f} MB.", ncls, ncls * sizeof(int32_t) / constants::MB);
-  allocMem(reinterpret_cast<void**>(&mNLinesPerClusterDevice), ncls * sizeof(int32_t), this->getExtAllocator());
+  allocMem(reinterpret_cast<void**>(&mNLinesPerClusterDevice), ncls * sizeof(int32_t), this->hasExternalDeviceAllocator());
 
   GPULog("gpu-transfer: creating vertexer lines per cluster sum for {} elements , for {:.2f} MB.", ncls + 1, (ncls + 1) * sizeof(int32_t) / constants::MB);
-  allocMem(reinterpret_cast<void**>(&mNLinesPerClusterSumDevice), (ncls + 1) * sizeof(int32_t), this->getExtAllocator());
+  allocMem(reinterpret_cast<void**>(&mNLinesPerClusterSumDevice), (ncls + 1) * sizeof(int32_t), this->hasExternalDeviceAllocator());
 
   const int32_t ntrkls = this->mTotalTracklets[0];
   GPULog("gpu-transfer: creating vertexer used tracklets for {} elements , for {:.2f} MB.", ntrkls, ntrkls * sizeof(uint8_t) / constants::MB);
-  allocMem(reinterpret_cast<void**>(&mUsedTrackletsDevice), ntrkls * sizeof(uint8_t), this->getExtAllocator());
+  allocMem(reinterpret_cast<void**>(&mUsedTrackletsDevice), ntrkls * sizeof(uint8_t), this->hasExternalDeviceAllocator());
 }
 
 template <int nLayers>
@@ -530,7 +542,7 @@ void TimeFrameGPU<nLayers>::createVtxLinesBuffer(const int32_t iteration)
   GPUChkErrS(cudaMemcpy(&nlines, mNLinesPerClusterDevice + this->mClusters[1].size(), sizeof(int32_t), cudaMemcpyDeviceToHost));
   this->mTotalLines = nlines;
   GPULog("gpu-transfer: creating vertexer lines for {} elements , for {:.2f} MB.", nlines, nlines * sizeof(Line) / constants::MB);
-  allocMem(reinterpret_cast<void**>(&mLinesDevice), nlines * sizeof(Line), this->getExtAllocator());
+  allocMem(reinterpret_cast<void**>(&mLinesDevice), nlines * sizeof(Line), this->hasExternalDeviceAllocator());
   // reset used tracklets
   GPUChkErrS(cudaMemset(mUsedTrackletsDevice, 0, this->mTotalTracklets[0] * sizeof(uint8_t)));
 }
diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx
index 047d42d815e99..cca4283c9b77f 100644
--- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx
+++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx
@@ -33,12 +33,14 @@ void TrackerTraitsGPU<nLayers>::initialiseTimeFrame(const int iteration)
   mTimeFrameGPU->loadVertices(iteration);
   mTimeFrameGPU->loadIndexTableUtils(iteration);
   mTimeFrameGPU->loadMultiplicityCutMask(iteration);
+  // pinned on host
   mTimeFrameGPU->createUsedClustersDeviceArray(iteration);
   mTimeFrameGPU->createClustersDeviceArray(iteration);
   mTimeFrameGPU->createUnsortedClustersDeviceArray(iteration);
   mTimeFrameGPU->createClustersIndexTablesArray(iteration);
   mTimeFrameGPU->createTrackingFrameInfoDeviceArray(iteration);
   mTimeFrameGPU->createROFrameClustersDeviceArray(iteration);
+  // device array
   mTimeFrameGPU->createTrackletsLUTDeviceArray(iteration);
   mTimeFrameGPU->createTrackletsBuffersArray(iteration);
   mTimeFrameGPU->createCellsBuffersArray(iteration);
@@ -106,7 +108,7 @@ void TrackerTraitsGPU<nLayers>::computeLayerTracklets(const int iteration, int i
                                          mTimeFrameGPU->getPositionResolutions(),
                                          this->mTrkParams[iteration].LayerRadii,
                                          mTimeFrameGPU->getMSangles(),
-                                         mTimeFrameGPU->getExternalAllocator(),
+                                         mTimeFrameGPU->getExternalDeviceAllocator(),
                                          conf.nBlocksLayerTracklets[iteration],
                                          conf.nThreadsLayerTracklets[iteration],
                                          mTimeFrameGPU->getStreams());
@@ -144,7 +146,7 @@ void TrackerTraitsGPU<nLayers>::computeLayerTracklets(const int iteration, int i
                                            mTimeFrameGPU->getPositionResolutions(),
                                            this->mTrkParams[iteration].LayerRadii,
                                            mTimeFrameGPU->getMSangles(),
-                                           mTimeFrameGPU->getExternalAllocator(),
+                                           mTimeFrameGPU->getExternalDeviceAllocator(),
                                            conf.nBlocksLayerTracklets[iteration],
                                            conf.nThreadsLayerTracklets[iteration],
                                            mTimeFrameGPU->getStreams());
@@ -195,7 +197,7 @@ void TrackerTraitsGPU<nLayers>::computeLayerCells(const int iteration)
                                this->mTrkParams[iteration].MaxChi2ClusterAttachment,
                                this->mTrkParams[iteration].CellDeltaTanLambdaSigma,
                                this->mTrkParams[iteration].NSigmaCut,
-                               mTimeFrameGPU->getExternalAllocator(),
+                               mTimeFrameGPU->getExternalDeviceAllocator(),
                                conf.nBlocksLayerCells[iteration],
                                conf.nThreadsLayerCells[iteration],
                                mTimeFrameGPU->getStreams());
@@ -251,7 +253,7 @@ void TrackerTraitsGPU<nLayers>::findCellsNeighbours(const int iteration)
                                         currentLayerCellsNum,
                                         nextLayerCellsNum,
                                         1e2,
-                                        mTimeFrameGPU->getExternalAllocator(),
+                                        mTimeFrameGPU->getExternalDeviceAllocator(),
                                         conf.nBlocksFindNeighbours[iteration],
                                         conf.nThreadsFindNeighbours[iteration],
                                         mTimeFrameGPU->getStream(iLayer));
@@ -279,7 +281,7 @@ void TrackerTraitsGPU<nLayers>::findCellsNeighbours(const int iteration)
                                                                                mTimeFrameGPU->getDeviceNeighbours(iLayer),
                                                                                mTimeFrameGPU->getArrayNNeighbours()[iLayer],
                                                                                mTimeFrameGPU->getStream(iLayer),
-                                                                               mTimeFrameGPU->getExternalAllocator());
+                                                                               mTimeFrameGPU->getExternalDeviceAllocator());
   }
   mTimeFrameGPU->syncStreams(false);
 }
@@ -310,7 +312,7 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
                                         this->mTrkParams[0].MaxChi2NDF,
                                         mTimeFrameGPU->getDevicePropagator(),
                                         this->mTrkParams[0].CorrType,
-                                        mTimeFrameGPU->getExternalAllocator(),
+                                        mTimeFrameGPU->getExternalDeviceAllocator(),
                                         conf.nBlocksProcessNeighbours[iteration],
                                         conf.nThreadsProcessNeighbours[iteration]);
     }
diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h
index c19c1e56b273d..6a1a7e18e16f4 100644
--- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h
+++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h
@@ -22,6 +22,8 @@
 #include <new>
 #include <vector>
 
+#include "ITStracking/ExternalAllocator.h"
+
 #include "GPUCommonLogger.h"
 
 namespace o2::its
@@ -56,6 +58,7 @@ class BoundedMemoryResource final : public std::pmr::memory_resource
 
   BoundedMemoryResource(size_t maxBytes = std::numeric_limits<size_t>::max(), std::pmr::memory_resource* upstream = std::pmr::get_default_resource())
     : mMaxMemory(maxBytes), mUpstream(upstream) {}
+  BoundedMemoryResource(ExternalAllocator* alloc) : mAdaptor(std::make_unique<ExternalAllocatorAdaptor>(alloc)), mUpstream(mAdaptor.get()) {}
 
   void* do_allocate(size_t bytes, size_t alignment) final
   {
@@ -117,7 +120,8 @@ class BoundedMemoryResource final : public std::pmr::memory_resource
   std::atomic<size_t> mMaxMemory{std::numeric_limits<size_t>::max()};
   std::atomic<size_t> mCountThrow{0};
   std::atomic<size_t> mUsedMemory{0};
-  std::pmr::memory_resource* mUpstream;
+  std::unique_ptr<ExternalAllocatorAdaptor> mAdaptor{nullptr};
+  std::pmr::memory_resource* mUpstream{nullptr};
 };
 
 template <typename T>
@@ -170,7 +174,7 @@ inline void clearResizeBoundedVector(bounded_vector<T>& vec, size_t sz, std::pmr
 }
 
 template <typename T>
-void clearResizeBoundedVector(std::vector<bounded_vector<T>>& vec, size_t size, std::pmr::memory_resource* mr)
+inline void clearResizeBoundedVector(std::vector<bounded_vector<T>>& vec, size_t size, std::pmr::memory_resource* mr)
 {
   vec.clear();
   vec.reserve(size);
diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/ExternalAllocator.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/ExternalAllocator.h
index 1628bbc52776b..36e78ef24020c 100644
--- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/ExternalAllocator.h
+++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/ExternalAllocator.h
@@ -16,6 +16,8 @@
 #ifndef TRACKINGITSU_INCLUDE_EXTERNALALLOCATOR_H_
 #define TRACKINGITSU_INCLUDE_EXTERNALALLOCATOR_H_
 
+#include <memory_resource>
+
 namespace o2::its
 {
 
@@ -25,6 +27,36 @@ class ExternalAllocator
   virtual void* allocate(size_t) = 0;
   virtual void deallocate(char*, size_t) = 0;
 };
+
+class ExternalAllocatorAdaptor final : public std::pmr::memory_resource
+{
+ public:
+  explicit ExternalAllocatorAdaptor(ExternalAllocator* alloc) : mAlloc(alloc) {}
+
+ protected:
+  void* do_allocate(size_t bytes, size_t alignment) override
+  {
+    void* p = mAlloc->allocate(bytes);
+    if (!p) {
+      throw std::bad_alloc();
+    }
+    return p;
+  }
+
+  void do_deallocate(void* p, size_t bytes, size_t) override
+  {
+    mAlloc->deallocate(static_cast<char*>(p), bytes);
+  }
+
+  bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override
+  {
+    return this == &other;
+  }
+
+ private:
+  ExternalAllocator* mAlloc;
+};
+
 } // namespace o2::its
 
 #endif
diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h
index 7b5b84b1acde2..f1f16ce30a67d 100644
--- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h
+++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h
@@ -95,7 +95,7 @@ struct TimeFrame {
                       gsl::span<const unsigned char>::iterator& pattIt,
                       const itsmft::TopologyDictionary* dict,
                       const dataformats::MCTruthContainer<MCCompLabel>* mcLabels = nullptr);
-  void resetROFrameData();
+  void resetROFrameData(size_t nROFs);
 
   int getTotalClusters() const;
   auto& getTotVertIteration() { return mTotVertPerIteration; }
@@ -233,23 +233,26 @@ struct TimeFrame {
   void setBz(float bz) { mBz = bz; }
   float getBz() const { return mBz; }
 
-  void setExternalAllocator(ExternalAllocator* allocator)
+  /// State if memory will be externally managed.
+  // device
+  ExternalAllocator* mExtDeviceAllocator{nullptr};
+  void setExternalDeviceAllocator(ExternalAllocator* allocator) { mExtDeviceAllocator = allocator; }
+  ExternalAllocator* getExternalDeviceAllocator() { return mExtDeviceAllocator; }
+  bool hasExternalDeviceAllocator() const noexcept { return mExtDeviceAllocator != nullptr; }
+  // host
+  ExternalAllocator* mExtHostAllocator{nullptr};
+  void setExternalHostAllocator(ExternalAllocator* allocator)
   {
-    if (isGPU()) {
-      LOGP(debug, "Setting timeFrame allocator to external");
-      mAllocator = allocator;
-    } else {
-      LOGP(fatal, "External allocator is currently only supported for GPU");
-    }
+    mExtHostAllocator = allocator;
+    mExtMemoryPool = std::make_shared<BoundedMemoryResource>(mExtHostAllocator);
   }
-
-  ExternalAllocator* getExternalAllocator() { return mAllocator; }
-
-  virtual void setDevicePropagator(const o2::base::PropagatorImpl<float>*)
-  {
-    return;
-  };
+  ExternalAllocator* getExternalHostAllocator() { return mExtHostAllocator; }
+  bool hasExternalHostAllocator() const noexcept { return mExtHostAllocator != nullptr; }
+  std::shared_ptr<BoundedMemoryResource> mExtMemoryPool;
+  BoundedMemoryResource* getMaybeExternalHostResource(bool force = false) { return (hasExternalHostAllocator() && !force) ? mExtMemoryPool.get() : mMemoryPool.get(); }
+  // Propagator
   const o2::base::PropagatorImpl<float>* getDevicePropagator() const { return mPropagatorDevice; }
+  virtual void setDevicePropagator(const o2::base::PropagatorImpl<float>*){};
 
   template <typename... T>
   void addClusterToLayer(int layer, T&&... args);
@@ -290,10 +293,6 @@ struct TimeFrame {
   bounded_vector<int> mROFramesPV;
   bounded_vector<Vertex> mPrimaryVertices;
 
-  // State if memory will be externally managed.
-  ExternalAllocator* mAllocator = nullptr;
-  bool getExtAllocator() const noexcept { return mAllocator != nullptr; }
-
   std::array<bounded_vector<Cluster>, nLayers> mUnsortedClusters;
   std::vector<bounded_vector<Tracklet>> mTracklets;
   std::vector<bounded_vector<CellSeedN>> mCells;
diff --git a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx
index 741acd227cb32..ab68a63fc394f 100644
--- a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx
+++ b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx
@@ -121,14 +121,15 @@ int TimeFrame<nLayers>::loadROFrameData(gsl::span<o2::itsmft::ROFRecord> rofs,
                                         const itsmft::TopologyDictionary* dict,
                                         const dataformats::MCTruthContainer<MCCompLabel>* mcLabels)
 {
-  resetROFrameData();
+  resetROFrameData(rofs.size());
 
   GeometryTGeo* geom = GeometryTGeo::Instance();
   geom->fillMatrixCache(o2::math_utils::bit2Mask(o2::math_utils::TransformType::T2L, o2::math_utils::TransformType::L2G));
 
-  mNrof = 0;
+  mNrof = rofs.size();
   clearResizeBoundedVector(mClusterSize, clusters.size(), mMemoryPool.get());
-  for (auto& rof : rofs) {
+  for (size_t iRof{0}; iRof < rofs.size(); ++iRof) {
+    const auto& rof = rofs[iRof];
     for (int clusterId{rof.getFirstEntry()}; clusterId < rof.getFirstEntry() + rof.getNEntries(); ++clusterId) {
       const auto& c = clusters[clusterId];
 
@@ -164,15 +165,13 @@ int TimeFrame<nLayers>::loadROFrameData(gsl::span<o2::itsmft::ROFRecord> rofs,
       addTrackingFrameInfoToLayer(layer, gloXYZ.x(), gloXYZ.y(), gloXYZ.z(), trkXYZ.x(), geom->getSensorRefAlpha(sensorID),
                                   std::array<float, 2>{trkXYZ.y(), trkXYZ.z()},
                                   std::array<float, 3>{sigmaY2, sigmaYZ, sigmaZ2});
-
       /// Rotate to the global frame
       addClusterToLayer(layer, gloXYZ.x(), gloXYZ.y(), gloXYZ.z(), mUnsortedClusters[layer].size());
       addClusterExternalIndexToLayer(layer, clusterId);
     }
     for (unsigned int iL{0}; iL < mUnsortedClusters.size(); ++iL) {
-      mROFramesClusters[iL].push_back(mUnsortedClusters[iL].size());
+      mROFramesClusters[iL][iRof + 1] = mUnsortedClusters[iL].size(); // effectively calculating and exclusive sum
     }
-    mNrof++;
   }
 
   for (auto i = 0; i < mNTrackletsPerCluster.size(); ++i) {
@@ -188,13 +187,13 @@ int TimeFrame<nLayers>::loadROFrameData(gsl::span<o2::itsmft::ROFRecord> rofs,
 }
 
 template <int nLayers>
-void TimeFrame<nLayers>::resetROFrameData()
+void TimeFrame<nLayers>::resetROFrameData(size_t nRofs)
 {
   for (int iLayer{0}; iLayer < nLayers; ++iLayer) {
-    deepVectorClear(mUnsortedClusters[iLayer], mMemoryPool.get());
-    deepVectorClear(mTrackingFrameInfo[iLayer], mMemoryPool.get());
+    deepVectorClear(mUnsortedClusters[iLayer], getMaybeExternalHostResource());
+    deepVectorClear(mTrackingFrameInfo[iLayer], getMaybeExternalHostResource());
+    clearResizeBoundedVector(mROFramesClusters[iLayer], nRofs + 1, getMaybeExternalHostResource());
     deepVectorClear(mClusterExternalIndices[iLayer], mMemoryPool.get());
-    clearResizeBoundedVector(mROFramesClusters[iLayer], 1, mMemoryPool.get(), 0);
 
     if (iLayer < 2) {
       deepVectorClear(mTrackletsIndexROF[iLayer], mMemoryPool.get());
@@ -298,11 +297,11 @@ void TimeFrame<nLayers>::initialise(const int iteration, const TrackingParameter
     clearResizeBoundedVector(mBogusClusters, trkParam.NLayers, mMemoryPool.get());
     deepVectorClear(mTrackletClusters);
     for (unsigned int iLayer{0}; iLayer < std::min((int)mClusters.size(), maxLayers); ++iLayer) {
-      clearResizeBoundedVector(mClusters[iLayer], mUnsortedClusters[iLayer].size(), mMemoryPool.get());
-      clearResizeBoundedVector(mUsedClusters[iLayer], mUnsortedClusters[iLayer].size(), mMemoryPool.get());
+      clearResizeBoundedVector(mClusters[iLayer], mUnsortedClusters[iLayer].size(), getMaybeExternalHostResource(maxLayers != nLayers));
+      clearResizeBoundedVector(mUsedClusters[iLayer], mUnsortedClusters[iLayer].size(), getMaybeExternalHostResource(maxLayers != nLayers));
       mPositionResolution[iLayer] = o2::gpu::CAMath::Sqrt(0.5f * (trkParam.SystErrorZ2[iLayer] + trkParam.SystErrorY2[iLayer]) + trkParam.LayerResolution[iLayer] * trkParam.LayerResolution[iLayer]);
     }
-    clearResizeBoundedArray(mIndexTables, mNrof * (trkParam.ZBins * trkParam.PhiBins + 1), mMemoryPool.get());
+    clearResizeBoundedArray(mIndexTables, mNrof * (trkParam.ZBins * trkParam.PhiBins + 1), getMaybeExternalHostResource(maxLayers != nLayers));
     clearResizeBoundedVector(mLines, mNrof, mMemoryPool.get());
     clearResizeBoundedVector(mTrackletClusters, mNrof, mMemoryPool.get());
 
@@ -586,37 +585,24 @@ void TimeFrame<nLayers>::setMemoryPool(std::shared_ptr<BoundedMemoryResource> po
 {
   mMemoryPool = pool;
 
-  auto initVector = [&]<typename T>(bounded_vector<T> & vec)
-  {
-    bounded_vector<T> tmp(std::pmr::polymorphic_allocator<T>{mMemoryPool.get()});
-    vec.swap(tmp);
-  };
-  auto initArrays = [&]<typename T, size_t S>(std::array<bounded_vector<T>, S> & arr)
+  auto initVector = [&]<typename T>(bounded_vector<T> & vec, bool useExternal = false)
   {
-    for (size_t i{0}; i < S; ++i) {
-      initVector(arr[i]);
-    }
+    vec = bounded_vector<T>(std::pmr::polymorphic_allocator<T>{(useExternal) ? mExtMemoryPool.get() : mMemoryPool.get()});
   };
-  auto initVectors = [&]<typename T>(std::vector<bounded_vector<T>> & vec)
+  auto initContainers = [&]<typename Container>(Container & container, bool useExternal = false)
   {
-    for (size_t i{0}; i < vec.size(); ++i) {
-      initVector(vec[i]);
+    for (auto& v : container) {
+      initVector(v, useExternal);
     }
   };
-
+  // these will only reside on the host for the cpu part
   initVector(mTotVertPerIteration);
   initVector(mPrimaryVertices);
   initVector(mROFramesPV);
-  initArrays(mClusters);
-  initArrays(mTrackingFrameInfo);
-  initArrays(mClusterExternalIndices);
-  initArrays(mROFramesClusters);
-  initArrays(mNTrackletsPerCluster);
-  initArrays(mNTrackletsPerClusterSum);
-  initArrays(mNClustersPerROF);
-  initArrays(mIndexTables);
-  initArrays(mUsedClusters);
-  initArrays(mUnsortedClusters);
+  initContainers(mClusterExternalIndices);
+  initContainers(mNTrackletsPerCluster);
+  initContainers(mNTrackletsPerClusterSum);
+  initContainers(mNClustersPerROF);
   initVector(mROFramesPV);
   initVector(mPrimaryVertices);
   initVector(mRoads);
@@ -628,12 +614,19 @@ void TimeFrame<nLayers>::setMemoryPool(std::shared_ptr<BoundedMemoryResource> po
   initVector(mPValphaX);
   initVector(mBogusClusters);
   initVector(mVerticesContributorLabels);
-  initArrays(mTrackletsIndexROF);
-  initVectors(mTracks);
-  initVectors(mTracklets);
-  initVectors(mCells);
-  initVectors(mCellsNeighbours);
-  initVectors(mCellsLookupTable);
+  initContainers(mTrackletsIndexROF);
+  initContainers(mTracks);
+  initContainers(mTracklets);
+  initContainers(mCells);
+  initContainers(mCellsNeighbours);
+  initContainers(mCellsLookupTable);
+  // these will use possibly an externally provided allocator
+  initContainers(mClusters, hasExternalHostAllocator());
+  initContainers(mUsedClusters, hasExternalHostAllocator());
+  initContainers(mUnsortedClusters, hasExternalHostAllocator());
+  initContainers(mIndexTables, hasExternalHostAllocator());
+  initContainers(mTrackingFrameInfo, hasExternalHostAllocator());
+  initContainers(mROFramesClusters, hasExternalHostAllocator());
 }
 
 template <int nLayers>
diff --git a/Detectors/Upgrades/ITS3/reconstruction/src/IOUtils.cxx b/Detectors/Upgrades/ITS3/reconstruction/src/IOUtils.cxx
index acba8022e376f..2fced813efc93 100644
--- a/Detectors/Upgrades/ITS3/reconstruction/src/IOUtils.cxx
+++ b/Detectors/Upgrades/ITS3/reconstruction/src/IOUtils.cxx
@@ -64,7 +64,7 @@ int loadROFrameDataITS3(its::TimeFrame<7>* tf,
                         const its3::TopologyDictionary* dict,
                         const dataformats::MCTruthContainer<MCCompLabel>* mcLabels)
 {
-  tf->resetROFrameData();
+  tf->resetROFrameData(rofs.size());
 
   auto geom = its::GeometryTGeo::Instance();
   geom->fillMatrixCache(o2::math_utils::bit2Mask(o2::math_utils::TransformType::T2L, o2::math_utils::TransformType::L2G));
diff --git a/GPU/GPUTracking/Global/GPUChainITS.cxx b/GPU/GPUTracking/Global/GPUChainITS.cxx
index 26dff3710cd4a..a85cdb48c4d1c 100644
--- a/GPU/GPUTracking/Global/GPUChainITS.cxx
+++ b/GPU/GPUTracking/Global/GPUChainITS.cxx
@@ -17,7 +17,6 @@
 #include "DataFormatsITS/TrackITS.h"
 #include "ITStracking/ExternalAllocator.h"
 #include "GPUReconstructionIncludesITS.h"
-#include <algorithm>
 
 using namespace o2::gpu;
 
@@ -26,15 +25,18 @@ namespace o2::its
 class GPUFrameworkExternalAllocator final : public o2::its::ExternalAllocator
 {
  public:
+  GPUFrameworkExternalAllocator(GPUMemoryResource::MemoryType type) : mType(type) {}
+
   void* allocate(size_t size) override
   {
-    return mFWReco->AllocateDirectMemory(size, GPUMemoryResource::MEMORY_GPU);
+    return mFWReco->AllocateDirectMemory(size, mType);
   }
-  void deallocate(char* ptr, size_t) override {}
+  void deallocate(char* ptr, size_t size) override {}
   void setReconstructionFramework(o2::gpu::GPUReconstruction* fwr) { mFWReco = fwr; }
 
  private:
   o2::gpu::GPUReconstruction* mFWReco;
+  GPUMemoryResource::MemoryType mType;
 };
 } // namespace o2::its
 
@@ -71,11 +73,12 @@ o2::its::TimeFrame<7>* GPUChainITS::GetITSTimeframe()
   }
 #if !defined(GPUCA_STANDALONE)
   if (mITSTimeFrame->isGPU()) {
-    auto doFWExtAlloc = [this](size_t size) -> void* { return rec()->AllocateDirectMemory(size, GPUMemoryResource::MEMORY_GPU); };
-
-    mFrameworkAllocator.reset(new o2::its::GPUFrameworkExternalAllocator);
-    mFrameworkAllocator->setReconstructionFramework(rec());
-    mITSTimeFrame->setExternalAllocator(mFrameworkAllocator.get());
+    mFrameworkDeviceAllocator.reset(new o2::its::GPUFrameworkExternalAllocator(GPUMemoryResource::MEMORY_GPU));
+    mFrameworkDeviceAllocator->setReconstructionFramework(rec());
+    mITSTimeFrame->setExternalDeviceAllocator(mFrameworkDeviceAllocator.get());
+    mFrameworkHostAllocator.reset(new o2::its::GPUFrameworkExternalAllocator(GPUMemoryResource::MEMORY_HOST));
+    mFrameworkHostAllocator->setReconstructionFramework(rec());
+    mITSTimeFrame->setExternalHostAllocator(mFrameworkHostAllocator.get());
   }
 #endif
   return mITSTimeFrame.get();
diff --git a/GPU/GPUTracking/Global/GPUChainITS.h b/GPU/GPUTracking/Global/GPUChainITS.h
index a607f66322bab..e95c2de2f2023 100644
--- a/GPU/GPUTracking/Global/GPUChainITS.h
+++ b/GPU/GPUTracking/Global/GPUChainITS.h
@@ -40,9 +40,9 @@ class GPUChainITS final : public GPUChain
   int32_t Finalize() override;
   int32_t RunChain() override;
 
-  void RegisterPermanentMemoryAndProcessors() final {};
-  void RegisterGPUProcessors() final {};
-  void MemorySize(size_t&, size_t&) final {};
+  void RegisterPermanentMemoryAndProcessors() final{};
+  void RegisterGPUProcessors() final{};
+  void MemorySize(size_t&, size_t&) final{};
 
   o2::its::TrackerTraits<7>* GetITSTrackerTraits();
   o2::its::VertexerTraits<7>* GetITSVertexerTraits();
@@ -53,7 +53,8 @@ class GPUChainITS final : public GPUChain
   std::unique_ptr<o2::its::TrackerTraits<7>> mITSTrackerTraits;
   std::unique_ptr<o2::its::VertexerTraits<7>> mITSVertexerTraits;
   std::unique_ptr<o2::its::TimeFrame<7>> mITSTimeFrame;
-  std::unique_ptr<o2::its::GPUFrameworkExternalAllocator> mFrameworkAllocator;
+  std::unique_ptr<o2::its::GPUFrameworkExternalAllocator> mFrameworkDeviceAllocator;
+  std::unique_ptr<o2::its::GPUFrameworkExternalAllocator> mFrameworkHostAllocator;
 };
 } // namespace o2::gpu
 

From dd7a382d02667e6d4052158574d32daebc00ab24 Mon Sep 17 00:00:00 2001
From: ALICE Action Bot <alibuild@cern.ch>
Date: Fri, 19 Sep 2025 09:29:21 +0000
Subject: [PATCH 3/3] Please consider the following formatting changes

---
 .../ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h     | 2 +-
 Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx             | 6 ++----
 GPU/GPUTracking/Global/GPUChainITS.h                        | 6 +++---
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h
index f1f16ce30a67d..949eda042e0f9 100644
--- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h
+++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h
@@ -252,7 +252,7 @@ struct TimeFrame {
   BoundedMemoryResource* getMaybeExternalHostResource(bool force = false) { return (hasExternalHostAllocator() && !force) ? mExtMemoryPool.get() : mMemoryPool.get(); }
   // Propagator
   const o2::base::PropagatorImpl<float>* getDevicePropagator() const { return mPropagatorDevice; }
-  virtual void setDevicePropagator(const o2::base::PropagatorImpl<float>*){};
+  virtual void setDevicePropagator(const o2::base::PropagatorImpl<float>*) {};
 
   template <typename... T>
   void addClusterToLayer(int layer, T&&... args);
diff --git a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx
index ab68a63fc394f..f3fa5a5fec522 100644
--- a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx
+++ b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx
@@ -585,12 +585,10 @@ void TimeFrame<nLayers>::setMemoryPool(std::shared_ptr<BoundedMemoryResource> po
 {
   mMemoryPool = pool;
 
-  auto initVector = [&]<typename T>(bounded_vector<T> & vec, bool useExternal = false)
-  {
+  auto initVector = [&]<typename T>(bounded_vector<T>& vec, bool useExternal = false) {
     vec = bounded_vector<T>(std::pmr::polymorphic_allocator<T>{(useExternal) ? mExtMemoryPool.get() : mMemoryPool.get()});
   };
-  auto initContainers = [&]<typename Container>(Container & container, bool useExternal = false)
-  {
+  auto initContainers = [&]<typename Container>(Container& container, bool useExternal = false) {
     for (auto& v : container) {
       initVector(v, useExternal);
     }
diff --git a/GPU/GPUTracking/Global/GPUChainITS.h b/GPU/GPUTracking/Global/GPUChainITS.h
index e95c2de2f2023..ab693bcef3f8b 100644
--- a/GPU/GPUTracking/Global/GPUChainITS.h
+++ b/GPU/GPUTracking/Global/GPUChainITS.h
@@ -40,9 +40,9 @@ class GPUChainITS final : public GPUChain
   int32_t Finalize() override;
   int32_t RunChain() override;
 
-  void RegisterPermanentMemoryAndProcessors() final{};
-  void RegisterGPUProcessors() final{};
-  void MemorySize(size_t&, size_t&) final{};
+  void RegisterPermanentMemoryAndProcessors() final {};
+  void RegisterGPUProcessors() final {};
+  void MemorySize(size_t&, size_t&) final {};
 
   o2::its::TrackerTraits<7>* GetITSTrackerTraits();
   o2::its::VertexerTraits<7>* GetITSVertexerTraits();