From 59a4cf5212d9bfd2b322ac073d853ccefedf1b2c Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Fri, 18 Apr 2025 16:31:29 +0200 Subject: [PATCH 1/6] ITS: fix GPU tracking - compute-sanitizer reveal malicious write past allocated table - ZBins for map lookup was not taken from params --- .../ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index 805e66675e1b9..a1c65f67738dd 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -84,7 +84,7 @@ GPUd() const int4 getBinsRect(const Cluster& currentCluster, const int layerInde return int4{o2::gpu::CAMath::Max(0, utils.getZBinIndex(layerIndex + 1, zRangeMin)), utils.getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMin)), - o2::gpu::CAMath::Min(ZBins - 1, utils.getZBinIndex(layerIndex + 1, zRangeMax)), + o2::gpu::CAMath::Min(utils.getNzBins() - 1, utils.getZBinIndex(layerIndex + 1, zRangeMax)), utils.getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMax))}; } @@ -1248,7 +1248,7 @@ void processNeighboursHandler(const int startLayer, maxChi2ClusterAttachment, propagator, matCorrType); - auto t1 = updatedCellSeed.size(); + GPUChkErrS(cudaFree(d_temp_storage)); int level = startLevel; for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) { @@ -1258,7 +1258,7 @@ void processNeighboursHandler(const int startLayer, thrust::device_vector().swap(updatedCellSeed); thrust::device_vector().swap(updatedCellId); auto lastCellSeedSize{lastCellSeed.size()}; - foundSeedsTable.resize(nCells[iLayer] + 1); + foundSeedsTable.resize(lastCellSeedSize + 1); thrust::fill(foundSeedsTable.begin(), foundSeedsTable.end(), 0); --level; gpu::processNeighboursKernel<<>>(iLayer, @@ -1282,14 +1282,14 @@ void processNeighboursHandler(const int startLayer, temp_storage_bytes, // temp_storage_bytes thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - nCells[iLayer] + 1, // num_items + foundSeedsTable.size(), // num_i_items 0)); // NOLINT: this is the offset of the sum, not a pointer GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage temp_storage_bytes, // temp_storage_bytes thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - nCells[iLayer] + 1, // num_items + foundSeedsTable.size(), // num_i_items 0)); // NOLINT: this is the offset of the sum, not a pointer auto foundSeeds{foundSeedsTable.back()}; updatedCellId.resize(foundSeeds); From ba14ded6f1994f65f655f5cfe75cbff9c7a6491d Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Fri, 18 Apr 2025 16:42:11 +0200 Subject: [PATCH 2/6] ITS: GPU refactoring - Removes unused headers - adds two new functions for ex/in-clusive scan via cub - inlines square - applies deterministc mode blocks=1,threads=1 to all kernels --- .../ITS/tracking/GPU/cuda/TrackingKernels.cu | 408 ++++++++---------- 1 file changed, 178 insertions(+), 230 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index a1c65f67738dd..b1cd6725f3003 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -12,11 +12,9 @@ #include #include -#include -#include #include -#include +#include #include #include #include @@ -28,7 +26,6 @@ #include #include "ITStracking/Constants.h" -#include "ITStracking/Configuration.h" #include "ITStracking/IndexTableUtils.h" #include "ITStracking/MathUtils.h" #include "DataFormatsITS/TrackITS.h" @@ -59,7 +56,7 @@ namespace o2::its using namespace constants::its2; using Vertex = o2::dataformats::Vertex>; -GPUd() float Sq(float v) +GPUdii() float Sq(float v) { return v * v; } @@ -76,15 +73,15 @@ GPUd() const int4 getBinsRect(const Cluster& currentCluster, const int layerInde const float zRangeMax = o2::gpu::CAMath::Max(z1, z2) + maxdeltaz; const float phiRangeMax = (maxdeltaphi > constants::math::Pi) ? constants::math::TwoPi : currentCluster.phi + maxdeltaphi; - if (zRangeMax < -LayersZCoordinate()[layerIndex + 1] || - zRangeMin > LayersZCoordinate()[layerIndex + 1] || zRangeMin > zRangeMax) { + if (zRangeMax < -utils.getLayerZ(layerIndex) || + zRangeMin > utils.getLayerZ(layerIndex) || zRangeMin > zRangeMax) { return getEmptyBinsRect(); } - return int4{o2::gpu::CAMath::Max(0, utils.getZBinIndex(layerIndex + 1, zRangeMin)), + return int4{o2::gpu::CAMath::Max(0, utils.getZBinIndex(layerIndex, zRangeMin)), utils.getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMin)), - o2::gpu::CAMath::Min(utils.getNzBins() - 1, utils.getZBinIndex(layerIndex + 1, zRangeMax)), + o2::gpu::CAMath::Min(utils.getNzBins() - 1, utils.getZBinIndex(layerIndex, zRangeMax)), utils.getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMax))}; } @@ -522,7 +519,7 @@ GPUg() void computeLayerTrackletsMultiROFKernel( const float zAtRmax{tanLambda * (maxR - currentCluster.radius) + currentCluster.zCoordinate}; const float sqInverseDeltaZ0{1.f / (Sq(currentCluster.zCoordinate - primaryVertex.getZ()) + 2.e-8f)}; /// protecting from overflows adding the detector resolution const float sigmaZ{o2::gpu::CAMath::Sqrt(Sq(resolution) * Sq(tanLambda) * ((Sq(inverseR0) + sqInverseDeltaZ0) * Sq(meanDeltaR) + 1.f) + Sq(meanDeltaR * MSAngle))}; - const int4 selectedBinsRect{getBinsRect(currentCluster, layerIndex, *utils, zAtRmin, zAtRmax, sigmaZ * NSigmaCut, phiCut)}; + const int4 selectedBinsRect{getBinsRect(currentCluster, layerIndex + 1, *utils, zAtRmin, zAtRmax, sigmaZ * NSigmaCut, phiCut)}; if (selectedBinsRect.x == 0 && selectedBinsRect.y == 0 && selectedBinsRect.z == 0 && selectedBinsRect.w == 0) { continue; } @@ -800,6 +797,44 @@ GPUg() void printCellSeeds(CellSeed* seed, int nCells, const unsigned int tId = } } } + +template +GPUhi() void cubExclusiveScanInPlace(T* in_out, int num_items, cudaStream_t stream = nullptr) +{ + void* d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, in_out, + in_out, num_items, stream)); + GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); + GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, in_out, + in_out, num_items, stream)); + GPUChkErrS(cudaFree(d_temp_storage)); +} + +template +GPUhi() void cubExclusiveScanInPlace(Vector& in_out, int num_items, cudaStream_t stream = nullptr) +{ + cubExclusiveScanInPlace(thrust::raw_pointer_cast(in_out.data()), num_items, stream); +} + +template +GPUhi() void cubInclusiveScanInPlace(T* in_out, int num_items, cudaStream_t stream = nullptr) +{ + void* d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, in_out, + in_out, num_items, stream)); + GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); + GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, in_out, + in_out, num_items, stream)); + GPUChkErrS(cudaFree(d_temp_storage)); +} + +template +GPUhi() void cubInclusiveScanInPlace(Vector& in_out, int num_items, cudaStream_t stream = nullptr) +{ + cubInclusiveScanInPlace(thrust::raw_pointer_cast(in_out.data()), num_items, stream); +} } // namespace gpu template @@ -833,7 +868,8 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils, const int nThreads) { for (int iLayer = 0; iLayer < nLayers - 1; ++iLayer) { - gpu::computeLayerTrackletsMultiROFKernel<<>>( + gpu::computeLayerTrackletsMultiROFKernel<<>>( utils, multMask, iLayer, @@ -860,22 +896,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils, resolutions[iLayer], radii[iLayer + 1] - radii[iLayer], mulScatAng[iLayer]); - void* d_temp_storage = nullptr; - size_t temp_storage_bytes = 0; - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - trackletsLUTsHost[iLayer], // d_in - trackletsLUTsHost[iLayer], // d_out - nClusters[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - trackletsLUTsHost[iLayer], // d_in - trackletsLUTsHost[iLayer], // d_out - nClusters[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaFree(d_temp_storage)); + gpu::cubExclusiveScanInPlace(trackletsLUTsHost[iLayer], nClusters[iLayer] + 1); } } @@ -913,55 +934,42 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils, const int nThreads) { for (int iLayer = 0; iLayer < nLayers - 1; ++iLayer) { - gpu::computeLayerTrackletsMultiROFKernel<<>>(utils, - multMask, - iLayer, - startROF, - endROF, - maxROF, - deltaROF, - vertices, - rofPV, - nVertices, - vertexId, - clusters, - ROFClusters, - usedClusters, - clustersIndexTables, - tracklets, - trackletsLUTs, - iteration, - NSigmaCut, - phiCuts[iLayer], - resolutionPV, - minRs[iLayer + 1], - maxRs[iLayer + 1], - resolutions[iLayer], - radii[iLayer + 1] - radii[iLayer], - mulScatAng[iLayer]); + gpu::computeLayerTrackletsMultiROFKernel<<>>(utils, + multMask, + iLayer, + startROF, + endROF, + maxROF, + deltaROF, + vertices, + rofPV, + nVertices, + vertexId, + clusters, + ROFClusters, + usedClusters, + clustersIndexTables, + tracklets, + trackletsLUTs, + iteration, + NSigmaCut, + phiCuts[iLayer], + resolutionPV, + minRs[iLayer + 1], + maxRs[iLayer + 1], + resolutions[iLayer], + radii[iLayer + 1] - radii[iLayer], + mulScatAng[iLayer]); thrust::device_ptr tracklets_ptr(spanTracklets[iLayer]); thrust::sort(thrust::device, tracklets_ptr, tracklets_ptr + nTracklets[iLayer], gpu::sort_tracklets()); auto unique_end = thrust::unique(thrust::device, tracklets_ptr, tracklets_ptr + nTracklets[iLayer], gpu::equal_tracklets()); nTracklets[iLayer] = unique_end - tracklets_ptr; if (iLayer > 0) { GPUChkErrS(cudaMemset(trackletsLUTsHost[iLayer], 0, nClusters[iLayer] * sizeof(int))); - gpu::compileTrackletsLookupTableKernel<<>>(spanTracklets[iLayer], trackletsLUTsHost[iLayer], nTracklets[iLayer]); - void* d_temp_storage = nullptr; - size_t temp_storage_bytes = 0; - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - trackletsLUTsHost[iLayer], // d_in - trackletsLUTsHost[iLayer], // d_out - nClusters[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - trackletsLUTsHost[iLayer], // d_in - trackletsLUTsHost[iLayer], // d_out - nClusters[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaFree(d_temp_storage)); + gpu::compileTrackletsLookupTableKernel<<>>( + spanTracklets[iLayer], trackletsLUTsHost[iLayer], nTracklets[iLayer]); + gpu::cubExclusiveScanInPlace(trackletsLUTsHost[iLayer], nClusters[iLayer] + 1); } } } @@ -984,7 +992,8 @@ void countCellsHandler( const int nBlocks, const int nThreads) { - gpu::computeLayerCellsKernel<<>>( + gpu::computeLayerCellsKernel<<>>( sortedClusters, // const Cluster** unsortedClusters, // const Cluster** tfInfo, // const TrackingFrameInfo** @@ -998,22 +1007,7 @@ void countCellsHandler( maxChi2ClusterAttachment, // const float cellDeltaTanLambdaSigma, // const float nSigmaCut); // const float - void* d_temp_storage = nullptr; - size_t temp_storage_bytes = 0; - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - cellsLUTsHost, // d_in - cellsLUTsHost, // d_out - nTracklets + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - cellsLUTsHost, // d_in - cellsLUTsHost, // d_out - nTracklets + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaFree(d_temp_storage)); + gpu::cubExclusiveScanInPlace(cellsLUTsHost, nTracklets + 1); } void computeCellsHandler( @@ -1034,7 +1028,8 @@ void computeCellsHandler( const int nBlocks, const int nThreads) { - gpu::computeLayerCellsKernel<<>>( + gpu::computeLayerCellsKernel<<>>( sortedClusters, // const Cluster** unsortedClusters, // const Cluster** tfInfo, // const TrackingFrameInfo** @@ -1064,7 +1059,8 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice, const int nBlocks, const int nThreads) { - gpu::computeLayerCellNeighboursKernel<<>>( + gpu::computeLayerCellNeighboursKernel<<>>( cellsLayersDevice, neighboursLUT, neighboursIndexTable, @@ -1076,39 +1072,10 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice, nCells, maxCellNeighbours); - void *d_temp_storage = nullptr, *d_temp_storage_2 = nullptr; - size_t temp_storage_bytes = 0, temp_storage_bytes_2 = 0; - GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - neighboursLUT, // d_in - neighboursLUT, // d_out - nCellsNext)); // num_items - - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - neighboursLUT, // d_in - neighboursLUT, // d_out - nCellsNext)); // num_items - - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage_2, // d_temp_storage - temp_storage_bytes_2, // temp_storage_bytes - neighboursIndexTable, // d_in - neighboursIndexTable, // d_out - nCells + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - - GPUChkErrS(cudaMalloc(&d_temp_storage_2, temp_storage_bytes_2)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage_2, // d_temp_storage - temp_storage_bytes_2, // temp_storage_bytes - neighboursIndexTable, // d_in - neighboursIndexTable, // d_out - nCells + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer + gpu::cubInclusiveScanInPlace(neighboursLUT, nCellsNext); + gpu::cubExclusiveScanInPlace(neighboursIndexTable, nCells + 1); unsigned int nNeighbours; GPUChkErrS(cudaMemcpy(&nNeighbours, &neighboursLUT[nCellsNext - 1], sizeof(unsigned int), cudaMemcpyDeviceToHost)); - GPUChkErrS(cudaFree(d_temp_storage)); - GPUChkErrS(cudaFree(d_temp_storage_2)); return nNeighbours; } @@ -1190,69 +1157,56 @@ void processNeighboursHandler(const int startLayer, const int nBlocks, const int nThreads) { - thrust::device_vector foundSeedsTable(nCells[startLayer] + 1); // Shortcut: device_vector skips central memory management, we are relying on the contingency. TODO: fix this. - // thrust::device_vector lastCellIds(lastCellIdHost); - // thrust::device_vector lastCellSeed(lastCellSeedHost); - thrust::device_vector lastCellId, updatedCellId; - thrust::device_vector lastCellSeed, updatedCellSeed; - gpu::processNeighboursKernel<<>>(startLayer, - startLevel, - allCellSeeds, - currentCellSeeds, - nullptr, - nCells[startLayer], - nullptr, - nullptr, - thrust::raw_pointer_cast(&foundSeedsTable[0]), - usedClusters, - neighbours[startLayer - 1], - neighboursDeviceLUTs[startLayer - 1], - foundTrackingFrameInfo, - bz, - maxChi2ClusterAttachment, - propagator, - matCorrType); - void* d_temp_storage = nullptr; - size_t temp_storage_bytes = 0; - GPUChkErrS(cub::DeviceScan::ExclusiveSum(nullptr, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - nCells[startLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - nCells[startLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - - updatedCellId.resize(foundSeedsTable.back()); - updatedCellSeed.resize(foundSeedsTable.back()); - - gpu::processNeighboursKernel<<>>(startLayer, - startLevel, - allCellSeeds, - currentCellSeeds, - nullptr, - nCells[startLayer], - thrust::raw_pointer_cast(&updatedCellSeed[0]), - thrust::raw_pointer_cast(&updatedCellId[0]), - thrust::raw_pointer_cast(&foundSeedsTable[0]), - usedClusters, - neighbours[startLayer - 1], - neighboursDeviceLUTs[startLayer - 1], - foundTrackingFrameInfo, - bz, - maxChi2ClusterAttachment, - propagator, - matCorrType); + thrust::device_vector foundSeedsTable(nCells[startLayer] + 1); // Shortcut: device_vector skips central memory management, we are relying on the contingency. + // TODO: fix this. + + gpu::processNeighboursKernel<<>>( + startLayer, + startLevel, + allCellSeeds, + currentCellSeeds, + nullptr, + nCells[startLayer], + nullptr, + nullptr, + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[startLayer - 1], + neighboursDeviceLUTs[startLayer - 1], + foundTrackingFrameInfo, + bz, + maxChi2ClusterAttachment, + propagator, + matCorrType); + gpu::cubExclusiveScanInPlace(foundSeedsTable, nCells[startLayer] + 1); + + thrust::device_vector updatedCellId(foundSeedsTable.back()); + thrust::device_vector updatedCellSeed(foundSeedsTable.back()); + gpu::processNeighboursKernel<<>>( + startLayer, + startLevel, + allCellSeeds, + currentCellSeeds, + nullptr, + nCells[startLayer], + thrust::raw_pointer_cast(&updatedCellSeed[0]), + thrust::raw_pointer_cast(&updatedCellId[0]), + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[startLayer - 1], + neighboursDeviceLUTs[startLayer - 1], + foundTrackingFrameInfo, + bz, + maxChi2ClusterAttachment, + propagator, + matCorrType); - GPUChkErrS(cudaFree(d_temp_storage)); int level = startLevel; + thrust::device_vector lastCellId; + thrust::device_vector lastCellSeed; for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) { - temp_storage_bytes = 0; lastCellSeed.swap(updatedCellSeed); lastCellId.swap(updatedCellId); thrust::device_vector().swap(updatedCellSeed); @@ -1260,62 +1214,55 @@ void processNeighboursHandler(const int startLayer, auto lastCellSeedSize{lastCellSeed.size()}; foundSeedsTable.resize(lastCellSeedSize + 1); thrust::fill(foundSeedsTable.begin(), foundSeedsTable.end(), 0); - --level; - gpu::processNeighboursKernel<<>>(iLayer, - level, - allCellSeeds, - thrust::raw_pointer_cast(&lastCellSeed[0]), - thrust::raw_pointer_cast(&lastCellId[0]), - lastCellSeedSize, - nullptr, - nullptr, - thrust::raw_pointer_cast(&foundSeedsTable[0]), - usedClusters, - neighbours[iLayer - 1], - neighboursDeviceLUTs[iLayer - 1], - foundTrackingFrameInfo, - bz, - maxChi2ClusterAttachment, - propagator, - matCorrType); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(nullptr, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - foundSeedsTable.size(), // num_i_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - foundSeedsTable.size(), // num_i_items - 0)); // NOLINT: this is the offset of the sum, not a pointer + + gpu::processNeighboursKernel<<>>( + iLayer, + --level, + allCellSeeds, + thrust::raw_pointer_cast(&lastCellSeed[0]), + thrust::raw_pointer_cast(&lastCellId[0]), + lastCellSeedSize, + nullptr, + nullptr, + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[iLayer - 1], + neighboursDeviceLUTs[iLayer - 1], + foundTrackingFrameInfo, + bz, + maxChi2ClusterAttachment, + propagator, + matCorrType); + gpu::cubExclusiveScanInPlace(foundSeedsTable, foundSeedsTable.size()); + auto foundSeeds{foundSeedsTable.back()}; updatedCellId.resize(foundSeeds); thrust::fill(updatedCellId.begin(), updatedCellId.end(), 0); updatedCellSeed.resize(foundSeeds); thrust::fill(updatedCellSeed.begin(), updatedCellSeed.end(), CellSeed()); - gpu::processNeighboursKernel<<>>(iLayer, - level, - allCellSeeds, - thrust::raw_pointer_cast(&lastCellSeed[0]), - thrust::raw_pointer_cast(&lastCellId[0]), - lastCellSeedSize, - thrust::raw_pointer_cast(&updatedCellSeed[0]), - thrust::raw_pointer_cast(&updatedCellId[0]), - thrust::raw_pointer_cast(&foundSeedsTable[0]), - usedClusters, - neighbours[iLayer - 1], - neighboursDeviceLUTs[iLayer - 1], - foundTrackingFrameInfo, - bz, - maxChi2ClusterAttachment, - propagator, - matCorrType); - GPUChkErrS(cudaFree(d_temp_storage)); + gpu::processNeighboursKernel<<>>( + iLayer, + level, + allCellSeeds, + thrust::raw_pointer_cast(&lastCellSeed[0]), + thrust::raw_pointer_cast(&lastCellId[0]), + lastCellSeedSize, + thrust::raw_pointer_cast(&updatedCellSeed[0]), + thrust::raw_pointer_cast(&updatedCellId[0]), + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[iLayer - 1], + neighboursDeviceLUTs[iLayer - 1], + foundTrackingFrameInfo, + bz, + maxChi2ClusterAttachment, + propagator, + matCorrType); } + thrust::device_vector outSeeds(updatedCellSeed.size()); auto end = thrust::copy_if(updatedCellSeed.begin(), updatedCellSeed.end(), outSeeds.begin(), gpu::seed_selector(1.e3, maxChi2NDF * ((startLevel + 2) * 2 - 5))); auto s{end - outSeeds.begin()}; @@ -1339,7 +1286,8 @@ void trackSeedHandler(CellSeed* trackSeeds, const int nThreads) { thrust::device_vector minPts(minPtsHost); - gpu::fitTrackSeedsKernel<<>>( + gpu::fitTrackSeedsKernel<<>>( trackSeeds, // CellSeed* foundTrackingFrameInfo, // TrackingFrameInfo** tracks, // TrackITSExt* From 564e27dfa7c8715b4a6c9c55721e3cb62f92d72d Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Fri, 18 Apr 2025 16:46:04 +0200 Subject: [PATCH 3/6] ITS: unify hybrid/cpu clusterToTracks interface --- .../GPU/ITStrackingGPU/TrackerTraitsGPU.h | 31 +-- .../tracking/GPU/cuda/TrackerTraitsGPU.cxx | 25 +- .../tracking/include/ITStracking/Tracker.h | 35 +-- .../include/ITStracking/TrackerTraits.h | 15 +- .../include/ITStracking/TrackingInterface.h | 5 +- Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx | 214 +++++++----------- .../ITSMFT/ITS/tracking/src/TrackerTraits.cxx | 5 +- .../ITS/tracking/src/TrackingInterface.cxx | 5 + .../ITSMFT/ITS/workflow/src/TrackerSpec.cxx | 2 + 9 files changed, 133 insertions(+), 204 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h index f9583d97ca030..21eea4505cdea 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h @@ -17,6 +17,7 @@ #include "ITStracking/Definitions.h" #include "ITStracking/TrackerTraits.h" #include "ITStrackingGPU/TimeFrameGPU.h" +#include "Framework/Logger.h" namespace o2 { @@ -24,28 +25,28 @@ namespace its { template -class TrackerTraitsGPU : public TrackerTraits +class TrackerTraitsGPU final : public TrackerTraits { public: TrackerTraitsGPU() = default; ~TrackerTraitsGPU() override = default; - // void computeLayerCells() final; - void adoptTimeFrame(TimeFrame* tf) override; - void initialiseTimeFrame(const int iteration) override; - void computeLayerTracklets(const int iteration, int, int) final; - void computeLayerCells(const int iteration) override; - void setBz(float) override; - void findCellsNeighbours(const int iteration) override; - void findRoads(const int iteration) override; + void adoptTimeFrame(TimeFrame* tf) final; + void initialiseTimeFrame(const int iteration) final; + void setBz(float) final; - // Methods to get CPU execution from traits - void initialiseTimeFrameHybrid(const int iteration) override { initialiseTimeFrame(iteration); }; - void computeTrackletsHybrid(const int iteration, int, int) override; - void computeCellsHybrid(const int iteration) override; - void findCellsNeighboursHybrid(const int iteration) override; + void computeLayerTracklets(const int iteration, int, int) final { LOGP(fatal, "computeLayerTracklers must never be called from Hybrid traits!"); }; + void computeLayerCells(const int iteration) final { LOGP(fatal, "computeLayerCells must never be called from Hybrid traits!"); }; + void findCellsNeighbours(const int iteration) final { LOGP(fatal, "findCellsNeighbours must never be called from Hybrid traits!"); }; + void findRoads(const int iteration) final { LOGP(fatal, "findRoads must never be called from Hybrid traits!"); }; + void extendTracks(const int iteration) final { LOGP(fatal, "extendTracks must never be called from Hybrid traits!"); }; + void findShortPrimaries() final { LOGP(fatal, "findShortPrimaries must never be called from Hybrid traits!"); }; - void extendTracks(const int iteration) override; + void initialiseTimeFrameHybrid(const int iteration) final { initialiseTimeFrame(iteration); }; + void computeTrackletsHybrid(const int iteration, int, int) final; + void computeCellsHybrid(const int iteration) final; + void findCellsNeighboursHybrid(const int iteration) final; + void findRoadsHybrid(const int iteration) final; // TimeFrameGPU information forwarding int getTFNumberOfClusters() const override; diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx index 3c65faddcff71..7a9b34b2e7de1 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx @@ -11,10 +11,7 @@ /// #include -#include -#include #include -#include #include "DataFormatsITS/TrackITS.h" @@ -40,26 +37,6 @@ void TrackerTraitsGPU::initialiseTimeFrame(const int iteration) mTimeFrameGPU->loadIndexTableUtils(iteration); } -template -void TrackerTraitsGPU::computeLayerTracklets(const int iteration, int, int) -{ -} - -template -void TrackerTraitsGPU::computeLayerCells(const int iteration) -{ -} - -template -void TrackerTraitsGPU::findCellsNeighbours(const int iteration) -{ -} - -template -void TrackerTraitsGPU::extendTracks(const int iteration) -{ -} - template void TrackerTraitsGPU::setBz(float bz) { @@ -260,7 +237,7 @@ void TrackerTraitsGPU::findCellsNeighboursHybrid(const int iteration) }; template -void TrackerTraitsGPU::findRoads(const int iteration) +void TrackerTraitsGPU::findRoadsHybrid(const int iteration) { auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance(); for (int startLevel{mTrkParams[iteration].CellsPerRoad()}; startLevel >= mTrkParams[iteration].CellMinimumLevel(); --startLevel) { diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h index 58483e4aa9f6f..b63e61f0b76f4 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h @@ -50,20 +50,17 @@ class TrackerTraits; class Tracker { + using LogFunc = std::function; public: Tracker(TrackerTraits* traits); - Tracker(const Tracker&) = delete; - Tracker& operator=(const Tracker&) = delete; - ~Tracker(); - void adoptTimeFrame(TimeFrame& tf); void clustersToTracks( - std::function = [](std::string s) { std::cout << s << std::endl; }, std::function = [](std::string s) { std::cerr << s << std::endl; }); + LogFunc = [](std::string s) { std::cout << s << std::endl; }, LogFunc = [](std::string s) { std::cerr << s << std::endl; }); void clustersToTracksHybrid( - std::function = [](std::string s) { std::cout << s << std::endl; }, std::function = [](std::string s) { std::cerr << s << std::endl; }); + LogFunc = [](std::string s) { std::cout << s << std::endl; }, LogFunc = [](std::string s) { std::cerr << s << std::endl; }); std::vector& getTracks(); void setParameters(const std::vector&); @@ -74,15 +71,25 @@ class Tracker bool isMatLUT() const; void setNThreads(int n); int getNThreads() const; - std::uint32_t mTimeFrameCounter = 0; + void printSummary() const; private: + enum TrackerType : uint8_t { CPU = 0, + Hybrid, + NSize }; + template + void clusterToTracksImpl(LogFunc, LogFunc); + static constexpr const char* sTrackerNames[TrackerType::NSize] = {"CPU", "Hybrid"}; + + // CPU void initialiseTimeFrame(int& iteration); void computeTracklets(int& iteration, int& iROFslice, int& iVertex); void computeCells(int& iteration); void findCellsNeighbours(int& iteration); void findRoads(int& iteration); - + void findShortPrimaries(); + void extendTracks(int& iteration); + // Hyrbid void initialiseTimeFrameHybrid(int& iteration); void computeTrackletsHybrid(int& iteration, int& iROFslice, int& iVertex); void computeCellsHybrid(int& iteration); @@ -90,17 +97,13 @@ class Tracker void findRoadsHybrid(int& iteration); void findTracksHybrid(int& iteration); - void findShortPrimaries(); - void findTracks(); - void extendTracks(int& iteration); - // MC interaction void computeRoadsMClabels(); void computeTracksMClabels(); void rectifyClusterIndices(); template - float evaluateTask(void (Tracker::*)(T...), const char*, std::function logger, T&&... args); + float evaluateTask(void (Tracker::*)(T...), const char*, LogFunc logger, T&&... args); TrackerTraits* mTraits = nullptr; /// Observer pointer, not owned by this class TimeFrame* mTimeFrame = nullptr; /// Observer pointer, not owned by this class @@ -108,7 +111,8 @@ class Tracker std::vector mTrkParams; o2::gpu::GPUChainITS* mRecoChain = nullptr; - unsigned int mNumberOfRuns{0}; + unsigned int mNumberOfDroppedTFs{0}; + unsigned int mTimeFrameCounter{0}; }; inline void Tracker::setParameters(const std::vector& trkPars) @@ -117,8 +121,7 @@ inline void Tracker::setParameters(const std::vector& trkPar } template -float Tracker::evaluateTask(void (Tracker::*task)(T...), const char* taskName, std::function logger, - T&&... args) +float Tracker::evaluateTask(void (Tracker::*task)(T...), const char* taskName, LogFunc logger, T&&... args) { float diff{0.f}; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h index 46499db92d4d5..f8d593fbf2480 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h @@ -51,20 +51,21 @@ class TrackerTraits public: virtual ~TrackerTraits() = default; virtual void adoptTimeFrame(TimeFrame* tf); + virtual void initialiseTimeFrame(const int iteration); virtual void computeLayerTracklets(const int iteration, int iROFslice, int iVertex); virtual void computeLayerCells(const int iteration); virtual void findCellsNeighbours(const int iteration); virtual void findRoads(const int iteration); - virtual void initialiseTimeFrameHybrid(const int iteration) { LOGP(error, "initialiseTimeFrameHybrid: this method should never be called with CPU traits"); } - virtual void computeTrackletsHybrid(const int iteration, int, int) { LOGP(error, "computeTrackletsHybrid: this method should never be called with CPU traits"); } - virtual void computeCellsHybrid(const int iteration) { LOGP(error, "computeCellsHybrid: this method should never be called with CPU traits"); } - virtual void findCellsNeighboursHybrid(const int iteration) { LOGP(error, "findCellsNeighboursHybrid: this method should never be called with CPU traits"); } - virtual void findRoadsHybrid(const int iteration) { LOGP(error, "findRoadsHybrid: this method should never be called with CPU traits"); } - virtual void findTracksHybrid(const int iteration) { LOGP(error, "findTracksHybrid: this method should never be called with CPU traits"); } - virtual void findTracks() { LOGP(error, "findTracks: this method is deprecated."); } virtual void extendTracks(const int iteration); virtual void findShortPrimaries(); + + virtual void initialiseTimeFrameHybrid(const int iteration) { LOGP(fatal, "initialiseTimeFrameHybrid: this method should never be called with CPU traits"); } + virtual void computeTrackletsHybrid(const int iteration, int, int) { LOGP(fatal, "computeTrackletsHybrid: this method should never be called with CPU traits"); } + virtual void computeCellsHybrid(const int iteration) { LOGP(fatal, "computeCellsHybrid: this method should never be called with CPU traits"); } + virtual void findCellsNeighboursHybrid(const int iteration) { LOGP(fatal, "findCellsNeighboursHybrid: this method should never be called with CPU traits"); } + virtual void findRoadsHybrid(const int iteration) { LOGP(fatal, "findRoadsHybrid: this method should never be called with CPU traits"); } + virtual void setBz(float bz); virtual bool trackFollowing(TrackITSExt* track, int rof, bool outward, const int iteration); virtual void processNeighbours(int iLayer, int iLevel, const std::vector& currentCellSeed, const std::vector& currentCellId, std::vector& updatedCellSeed, std::vector& updatedCellId); diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h index b584bf6b8008b..6eacb94ebb1ea 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h @@ -37,9 +37,7 @@ class ITSTrackingInterface const bool overrBeamEst) : mIsMC{isMC}, mUseTriggers{trgType}, - mOverrideBeamEstimation{overrBeamEst} - { - } + mOverrideBeamEstimation{overrBeamEst} {} void setClusterDictionary(const o2::itsmft::TopologyDictionary* d) { mDict = d; } void setMeanVertex(const o2::dataformats::MeanVertexObject* v) @@ -56,6 +54,7 @@ class ITSTrackingInterface void initialise(); template void run(framework::ProcessingContext& pc); + void printSummary() const; virtual void updateTimeDependentParams(framework::ProcessingContext& pc); virtual void finaliseCCDB(framework::ConcreteDataMatcher& matcher, void* obj); diff --git a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx index 50dc1f5dfd039..12357de1af5fc 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx @@ -25,8 +25,7 @@ #include "ReconstructionDataFormats/Track.h" #include -#include -#include +#include #include #include #include @@ -37,17 +36,33 @@ namespace its { using o2::its::constants::GB; -Tracker::Tracker(o2::its::TrackerTraits* traits) +Tracker::Tracker(o2::its::TrackerTraits* traits) : mTraits(traits) { /// Initialise standard configuration with 1 iteration mTrkParams.resize(1); - mTraits = traits; } -Tracker::~Tracker() = default; - -void Tracker::clustersToTracks(std::function logger, std::function error) +template +void Tracker::clusterToTracksImpl(LogFunc logger, LogFunc error) { + constexpr auto pickFunc = [](F1&& cpu, F2&& hybrid) { + if constexpr (T == TrackerType::CPU) { + return std::forward(cpu); + } else if constexpr (T == TrackerType::Hybrid) { + return std::forward(hybrid); + } else { + static_assert(false, "Wrong TrackerType!"); + } + }; + constexpr auto initialiseTimeFrame = pickFunc(&Tracker::initialiseTimeFrame, &Tracker::initialiseTimeFrameHybrid); + constexpr auto computeTracklets = pickFunc(&Tracker::computeTracklets, &Tracker::computeTrackletsHybrid); + constexpr auto computeCells = pickFunc(&Tracker::computeCells, &Tracker::computeCellsHybrid); + constexpr auto findCellsNeighbours = pickFunc(&Tracker::findCellsNeighbours, &Tracker::findCellsNeighboursHybrid); + constexpr auto findRoads = pickFunc(&Tracker::findRoads, &Tracker::findRoadsHybrid); + constexpr auto extendTracks = pickFunc(&Tracker::extendTracks, nullptr); + constexpr auto findShortPrimaries = pickFunc(&Tracker::findShortPrimaries, nullptr); + LogFunc evalLog = [](const std::string&) {}; + double total{0}; mTraits->UpdateTrackingParameters(mTrkParams); int maxNvertices{-1}; @@ -62,22 +77,20 @@ void Tracker::clustersToTracks(std::function logger, std::f if (iteration == 3 && mTrkParams[0].DoUPCIteration) { mTimeFrame->swapMasks(); } - logger(fmt::format("ITS Tracking iteration {} summary:", iteration)); double timeTracklets{0.}, timeCells{0.}, timeNeighbours{0.}, timeRoads{0.}; int nTracklets{0}, nCells{0}, nNeighbours{0}, nTracks{-static_cast(mTimeFrame->getNumberOfTracks())}; - - total += evaluateTask(&Tracker::initialiseTimeFrame, "Timeframe initialisation", logger, iteration); int nROFsIterations = mTrkParams[iteration].nROFsPerIterations > 0 ? mTimeFrame->getNrof() / mTrkParams[iteration].nROFsPerIterations + bool(mTimeFrame->getNrof() % mTrkParams[iteration].nROFsPerIterations) : 1; int iVertex{std::min(maxNvertices, 0)}; + logger(std::format("==== ITS {} Tracking iteration {} summary ====", sTrackerNames[T], iteration)); + total += evaluateTask(initialiseTimeFrame, "Timeframe initialisation", logger, iteration); do { for (int iROFs{0}; iROFs < nROFsIterations; ++iROFs) { - timeTracklets += evaluateTask( - &Tracker::computeTracklets, "Tracklet finding", [](std::string) {}, iteration, iROFs, iVertex); + timeTracklets += evaluateTask(computeTracklets, "Tracklet finding", evalLog, iteration, iROFs, iVertex); nTracklets += mTraits->getTFNumberOfTracklets(); if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { mTimeFrame->printSliceInfo(iROFs, mTrkParams[iteration].nROFsPerIterations); - error(fmt::format("Too much memory used during trackleting in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", + error(std::format("Too much memory used during trackleting in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB)); if (mTrkParams[iteration].DropTFUponFailure) { dropTF = true; @@ -86,17 +99,16 @@ void Tracker::clustersToTracks(std::function logger, std::f } float trackletsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfTracklets()) / mTraits->getTFNumberOfClusters() : 0.f; if (trackletsPerCluster > mTrkParams[iteration].TrackletsPerClusterLimit) { - error(fmt::format("Too many tracklets per cluster ({}) in iteration {} in ROF span {}-{}:, check the detector status and/or the selections. Current limit is {}", + error(std::format("Too many tracklets per cluster ({}) in iteration {} in ROF span {}-{}:, check the detector status and/or the selections. Current limit is {}", trackletsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].TrackletsPerClusterLimit)); break; } - timeCells += evaluateTask( - &Tracker::computeCells, "Cell finding", [](std::string) {}, iteration); + timeCells += evaluateTask(computeCells, "Cell finding", evalLog, iteration); nCells += mTraits->getTFNumberOfCells(); if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { mTimeFrame->printSliceInfo(iROFs, mTrkParams[iteration].nROFsPerIterations); - error(fmt::format("Too much memory used during cell finding in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", + error(std::format("Too much memory used during cell finding in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB)); if (mTrkParams[iteration].DropTFUponFailure) { dropTF = true; @@ -105,131 +117,67 @@ void Tracker::clustersToTracks(std::function logger, std::f } float cellsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfCells()) / mTraits->getTFNumberOfClusters() : 0.f; if (cellsPerCluster > mTrkParams[iteration].CellsPerClusterLimit) { - error(fmt::format("Too many cells per cluster ({}) in iteration {} in ROF span {}-{}, check the detector status and/or the selections. Current limit is {}", + error(std::format("Too many cells per cluster ({}) in iteration {} in ROF span {}-{}, check the detector status and/or the selections. Current limit is {}", cellsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].CellsPerClusterLimit)); break; } - timeNeighbours += evaluateTask( - &Tracker::findCellsNeighbours, "Neighbour finding", [](std::string) {}, iteration); + timeNeighbours += evaluateTask(findCellsNeighbours, "Neighbour finding", evalLog, iteration); nNeighbours += mTimeFrame->getNumberOfNeighbours(); - timeRoads += evaluateTask( - &Tracker::findRoads, "Road finding", [](std::string) {}, iteration); + timeRoads += evaluateTask(findRoads, "Road finding", evalLog, iteration); } iVertex++; } while (iVertex < maxNvertices && !dropTF); - logger(fmt::format(" - Tracklet finding: {} tracklets found in {:.2f} ms", nTracklets, timeTracklets)); - logger(fmt::format(" - Cell finding: {} cells found in {:.2f} ms", nCells, timeCells)); - logger(fmt::format(" - Neighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours)); - logger(fmt::format(" - Track finding: {} tracks found in {:.2f} ms", nTracks + mTimeFrame->getNumberOfTracks(), timeRoads)); + logger(std::format(" - {} tracklet finding: {} tracklets found in {:.2f} ms", sTrackerNames[T], nTracklets, timeTracklets)); + logger(std::format(" - {} cell finding: {} cells found in {:.2f} ms", sTrackerNames[T], nCells, timeCells)); + logger(std::format(" - {} neighbours finding: {} neighbours found in {:.2f} ms", sTrackerNames[T], nNeighbours, timeNeighbours)); + logger(std::format(" - {} track finding: {} tracks found in {:.2f} ms", sTrackerNames[T], nTracks + mTimeFrame->getNumberOfTracks(), timeRoads)); total += timeTracklets + timeCells + timeNeighbours + timeRoads; - if (mTrkParams[iteration].UseTrackFollower) { - int nExtendedTracks{-mTimeFrame->mNExtendedTracks}, nExtendedClusters{-mTimeFrame->mNExtendedUsedClusters}; - auto timeExtending = evaluateTask(&Tracker::extendTracks, "Extending tracks", [](const std::string&) {}, iteration); - total += timeExtending; - logger(fmt::format(" - Extending Tracks: {} extended tracks using {} clusters found in {:.2f} ms", nExtendedTracks + mTimeFrame->mNExtendedTracks, nExtendedClusters + mTimeFrame->mNExtendedUsedClusters, timeExtending)); + if constexpr (extendTracks != nullptr) { + if (mTrkParams[iteration].UseTrackFollower && !dropTF) { + int nExtendedTracks{-mTimeFrame->mNExtendedTracks}, nExtendedClusters{-mTimeFrame->mNExtendedUsedClusters}; + auto timeExtending = evaluateTask( + extendTracks, "Extending tracks", [](const std::string&) {}, iteration); + total += timeExtending; + logger(std::format(" - Extending Tracks: {} extended tracks using {} clusters found in {:.2f} ms", nExtendedTracks + mTimeFrame->mNExtendedTracks, nExtendedClusters + mTimeFrame->mNExtendedUsedClusters, timeExtending)); + } } if (dropTF) { - error(fmt::format("...Dropping Timeframe...")); + error("...Dropping Timeframe..."); mTimeFrame->dropTracks(); - break; // breaking out the iterations loop + ++mNumberOfDroppedTFs; + return; } } - total += evaluateTask(&Tracker::findShortPrimaries, "Short primaries finding", logger); + if constexpr (findShortPrimaries != nullptr) { + if (mTrkParams[0].FindShortTracks) { + auto nTracksB = mTimeFrame->getNumberOfTracks(); + total += evaluateTask(findShortPrimaries, "Short primaries finding", logger); + auto nTracksA = mTimeFrame->getNumberOfTracks(); + logger(std::format(" `-> found {} additional tracks", nTracksA - nTracksB)); + } + } - std::stringstream sstream; if constexpr (constants::DoTimeBenchmarks) { - sstream << std::setw(2) << " - " - << "Timeframe " << mTimeFrameCounter++ << " processing completed in: " << total << "ms using " << mTraits->getNThreads() << " threads."; + logger(std::format("=== TimeFrame {} processing completed in: {:.2f} ms using {} threads ===", mTimeFrameCounter, total, mTraits->getNThreads())); } - logger(sstream.str()); if (mTimeFrame->hasMCinformation()) { computeTracksMClabels(); } rectifyClusterIndices(); - mNumberOfRuns++; + ++mTimeFrameCounter; } -void Tracker::clustersToTracksHybrid(std::function logger, std::function error) +void Tracker::clustersToTracks(LogFunc logger, LogFunc error) { - double total{0.}; - mTraits->UpdateTrackingParameters(mTrkParams); - int maxNvertices{-1}; - if (mTrkParams[0].PerPrimaryVertexProcessing) { - for (int iROF{0}; iROF < mTimeFrame->getNrof(); ++iROF) { - maxNvertices = std::max(maxNvertices, (int)mTimeFrame->getPrimaryVertices(iROF).size()); - } - } - - for (int iteration = 0; iteration < (int)mTrkParams.size(); ++iteration) { - int nROFsIterations = mTrkParams[iteration].nROFsPerIterations > 0 ? mTimeFrame->getNrof() / mTrkParams[iteration].nROFsPerIterations + bool(mTimeFrame->getNrof() % mTrkParams[iteration].nROFsPerIterations) : 1; - logger(fmt::format("=========== ITS Hybrid Tracking iteration {} summary ===========", iteration, nROFsIterations, maxNvertices)); - double timeTracklets{0.}, timeCells{0.}, timeNeighbours{0.}, timeRoads{0.}; - int nTracklets{0}, nCells{0}, nNeighbours{0}, nTracks{-static_cast(mTimeFrame->getNumberOfTracks())}; - - total += evaluateTask(&Tracker::initialiseTimeFrameHybrid, "Hybrid Timeframe initialisation", logger, iteration); - int iVertex{std::min(maxNvertices, 0)}; - - do { - for (int iROFs{0}; iROFs < nROFsIterations; ++iROFs) { - timeTracklets += evaluateTask( - &Tracker::computeTrackletsHybrid, "Tracklet finding", [](std::string) {}, iteration, iROFs, iVertex); - nTracklets += mTraits->getTFNumberOfTracklets(); - if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { - error(fmt::format("Too much memory used during trackleting in iteration {}, check the detector status and/or the selections.", iteration)); - break; - } - float trackletsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfTracklets()) / mTraits->getTFNumberOfClusters() : 0.f; - if (trackletsPerCluster > mTrkParams[iteration].TrackletsPerClusterLimit) { - error(fmt::format("Too many tracklets per cluster ({}) in iteration {}, check the detector status and/or the selections. Current limit is {}", trackletsPerCluster, iteration, mTrkParams[iteration].TrackletsPerClusterLimit)); - break; - } - - timeCells += evaluateTask( - &Tracker::computeCellsHybrid, "Cell finding", [](std::string) {}, iteration); - nCells += mTraits->getTFNumberOfCells(); - if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { - error(fmt::format("Too much memory used during cell finding in iteration {}, check the detector status and/or the selections.", iteration)); - break; - } - float cellsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfCells()) / mTraits->getTFNumberOfClusters() : 0.f; - if (cellsPerCluster > mTrkParams[iteration].CellsPerClusterLimit) { - error(fmt::format("Too many cells per cluster ({}) in iteration {}, check the detector status and/or the selections. Current limit is {}", cellsPerCluster, iteration, mTrkParams[iteration].CellsPerClusterLimit)); - break; - } - - timeNeighbours += evaluateTask( - &Tracker::findCellsNeighboursHybrid, "Neighbour finding", [](std::string) {}, iteration); - nNeighbours += mTimeFrame->getNumberOfNeighbours(); - timeRoads += evaluateTask( - &Tracker::findRoads, "Road finding", [](std::string) {}, iteration); - } - iVertex++; - } while (iVertex < maxNvertices); - logger(fmt::format(" - Hybrid tracklet finding: {} tracklets found in {:.2f} ms", nTracklets, timeTracklets)); - logger(fmt::format(" - Hybrid cell finding: {} cells found in {:.2f} ms", nCells, timeCells)); - logger(fmt::format(" - Hybrid neighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours)); - logger(fmt::format(" - Hybrid track finding: {} tracks found in {:.2f} ms", nTracks + mTimeFrame->getNumberOfTracks(), timeRoads)); - total += timeTracklets + timeCells + timeNeighbours + timeRoads; - // total += evaluateTask(&Tracker::extendTracks, "Hybrid extending tracks", logger, iteration); - } - - // total += evaluateTask(&Tracker::findShortPrimaries, "Hybrid short primaries finding", logger); - - std::stringstream sstream; - if constexpr (constants::DoTimeBenchmarks) { - sstream << std::setw(2) << " - " - << "Timeframe " << mTimeFrameCounter++ << " processing completed in: " << total << "ms using " << mTraits->getNThreads() << " threads."; - } - logger(sstream.str()); + clusterToTracksImpl(logger, error); +} - if (mTimeFrame->hasMCinformation()) { - computeTracksMClabels(); - } - rectifyClusterIndices(); - mNumberOfRuns++; +void Tracker::clustersToTracksHybrid(LogFunc logger, LogFunc error) +{ + clusterToTracksImpl(logger, error); } void Tracker::initialiseTimeFrame(int& iteration) @@ -257,6 +205,16 @@ void Tracker::findRoads(int& iteration) mTraits->findRoads(iteration); } +void Tracker::extendTracks(int& iteration) +{ + mTraits->extendTracks(iteration); +} + +void Tracker::findShortPrimaries() +{ + mTraits->findShortPrimaries(); +} + void Tracker::initialiseTimeFrameHybrid(int& iteration) { mTraits->initialiseTimeFrameHybrid(iteration); @@ -282,26 +240,6 @@ void Tracker::findRoadsHybrid(int& iteration) mTraits->findRoadsHybrid(iteration); } -void Tracker::findTracksHybrid(int& iteration) -{ - mTraits->findTracksHybrid(iteration); -} - -void Tracker::findTracks() -{ - mTraits->findTracks(); -} - -void Tracker::extendTracks(int& iteration) -{ - mTraits->extendTracks(iteration); -} - -void Tracker::findShortPrimaries() -{ - mTraits->findShortPrimaries(); -} - void Tracker::computeRoadsMClabels() { /// Moore's Voting Algorithm @@ -575,5 +513,11 @@ int Tracker::getNThreads() const { return mTraits->getNThreads(); } + +void Tracker::printSummary() const +{ + LOGP(info, "Tracker summary: Processed {} TFs (dropped {})", mTimeFrameCounter, mNumberOfDroppedTFs); +} + } // namespace its } // namespace o2 diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx index 8dcb7bfd315c1..c2ee3d3ed6111 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx @@ -723,10 +723,7 @@ void TrackerTraits::extendTracks(const int iteration) void TrackerTraits::findShortPrimaries() { - if (!mTrkParams[0].FindShortTracks) { - return; - } - auto propagator = o2::base::Propagator::Instance(); + const auto propagator = o2::base::Propagator::Instance(); mTimeFrame->fillPrimaryVerticesXandAlpha(); for (auto& cell : mTimeFrame->getCells()[0]) { diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index 613402ce56e97..8570d134fe30d 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -438,6 +438,11 @@ void ITSTrackingInterface::finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) } } +void ITSTrackingInterface::printSummary() const +{ + mTracker->printSummary(); +} + void ITSTrackingInterface::setTraitsFromProvider(VertexerTraits* vertexerTraits, TrackerTraits* trackerTraits, TimeFrame* frame) diff --git a/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx b/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx index 9e4c98ad6e9a1..abbb88aea42fa 100644 --- a/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx +++ b/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx @@ -48,6 +48,7 @@ void TrackerDPL::init(InitContext& ic) void TrackerDPL::stop() { + mITSTrackingInterface.printSummary(); LOGF(info, "CPU Reconstruction total timing: Cpu: %.3e Real: %.3e s in %d slots", mTimer.CpuTime(), mTimer.RealTime(), mTimer.Counter() - 1); } @@ -69,6 +70,7 @@ void TrackerDPL::finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) void TrackerDPL::endOfStream(EndOfStreamContext& ec) { + mITSTrackingInterface.printSummary(); LOGF(info, "ITS CA-Tracker total timing: Cpu: %.3e Real: %.3e s in %d slots", mTimer.CpuTime(), mTimer.RealTime(), mTimer.Counter() - 1); } From d07bf78ec525d35cfbeaeb8338d9432fbad66ef4 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Tue, 22 Apr 2025 14:37:28 +0200 Subject: [PATCH 4/6] ITS: drop hybrid functions use dynamic dispatch --- .../GPU/ITStrackingGPU/TrackerTraitsGPU.h | 26 +++-- .../tracking/GPU/cuda/TrackerTraitsGPU.cxx | 60 ++++++----- .../tracking/include/ITStracking/Tracker.h | 21 +--- .../include/ITStracking/TrackerTraits.h | 29 ++---- Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx | 99 ++++--------------- .../ITS/tracking/src/TrackingInterface.cxx | 14 +-- dependencies/FindO2GPU.cmake | 2 +- 7 files changed, 73 insertions(+), 178 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h index 21eea4505cdea..c765307473749 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h @@ -13,11 +13,8 @@ #ifndef ITSTRACKINGGPU_TRACKERTRAITSGPU_H_ #define ITSTRACKINGGPU_TRACKERTRAITSGPU_H_ -#include "ITStracking/Configuration.h" -#include "ITStracking/Definitions.h" #include "ITStracking/TrackerTraits.h" #include "ITStrackingGPU/TimeFrameGPU.h" -#include "Framework/Logger.h" namespace o2 { @@ -33,20 +30,19 @@ class TrackerTraitsGPU final : public TrackerTraits void adoptTimeFrame(TimeFrame* tf) final; void initialiseTimeFrame(const int iteration) final; - void setBz(float) final; - void computeLayerTracklets(const int iteration, int, int) final { LOGP(fatal, "computeLayerTracklers must never be called from Hybrid traits!"); }; - void computeLayerCells(const int iteration) final { LOGP(fatal, "computeLayerCells must never be called from Hybrid traits!"); }; - void findCellsNeighbours(const int iteration) final { LOGP(fatal, "findCellsNeighbours must never be called from Hybrid traits!"); }; - void findRoads(const int iteration) final { LOGP(fatal, "findRoads must never be called from Hybrid traits!"); }; - void extendTracks(const int iteration) final { LOGP(fatal, "extendTracks must never be called from Hybrid traits!"); }; - void findShortPrimaries() final { LOGP(fatal, "findShortPrimaries must never be called from Hybrid traits!"); }; + void computeLayerTracklets(const int iteration, int, int) final; + void computeLayerCells(const int iteration) final; + void findCellsNeighbours(const int iteration) final; + void findRoads(const int iteration) final; + + bool supportsExtendTracks() const noexcept final { return false; } + bool supportsFindShortPrimaries() const noexcept final { return false; } + + void setBz(float) final; - void initialiseTimeFrameHybrid(const int iteration) final { initialiseTimeFrame(iteration); }; - void computeTrackletsHybrid(const int iteration, int, int) final; - void computeCellsHybrid(const int iteration) final; - void findCellsNeighboursHybrid(const int iteration) final; - void findRoadsHybrid(const int iteration) final; + const char* getName() const noexcept final { return "GPU"; } + bool isGPU() const noexcept final { return true; } // TimeFrameGPU information forwarding int getTFNumberOfClusters() const override; diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx index 7a9b34b2e7de1..b087ab4084daf 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx @@ -38,34 +38,7 @@ void TrackerTraitsGPU::initialiseTimeFrame(const int iteration) } template -void TrackerTraitsGPU::setBz(float bz) -{ - mBz = bz; - mTimeFrameGPU->setBz(bz); -} - -template -int TrackerTraitsGPU::getTFNumberOfClusters() const -{ - return mTimeFrameGPU->getNumberOfClusters(); -} - -template -int TrackerTraitsGPU::getTFNumberOfTracklets() const -{ - return std::accumulate(mTimeFrameGPU->getNTracklets().begin(), mTimeFrameGPU->getNTracklets().end(), 0); -} - -template -int TrackerTraitsGPU::getTFNumberOfCells() const -{ - return mTimeFrameGPU->getNumberOfCells(); -} - -//////////////////////////////////////////////////////////////////////////////// -// Hybrid tracking -template -void TrackerTraitsGPU::computeTrackletsHybrid(const int iteration, int iROFslice, int iVertex) +void TrackerTraitsGPU::computeLayerTracklets(const int iteration, int iROFslice, int iVertex) { auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance(); mTimeFrameGPU->createTrackletsLUTDevice(iteration); @@ -138,7 +111,7 @@ void TrackerTraitsGPU::computeTrackletsHybrid(const int iteration, int } template -void TrackerTraitsGPU::computeCellsHybrid(const int iteration) +void TrackerTraitsGPU::computeLayerCells(const int iteration) { mTimeFrameGPU->createCellsLUTDevice(); auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance(); @@ -185,7 +158,7 @@ void TrackerTraitsGPU::computeCellsHybrid(const int iteration) } template -void TrackerTraitsGPU::findCellsNeighboursHybrid(const int iteration) +void TrackerTraitsGPU::findCellsNeighbours(const int iteration) { mTimeFrameGPU->createNeighboursIndexTablesDevice(); auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance(); @@ -237,7 +210,7 @@ void TrackerTraitsGPU::findCellsNeighboursHybrid(const int iteration) }; template -void TrackerTraitsGPU::findRoadsHybrid(const int iteration) +void TrackerTraitsGPU::findRoads(const int iteration) { auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance(); for (int startLevel{mTrkParams[iteration].CellsPerRoad()}; startLevel >= mTrkParams[iteration].CellMinimumLevel(); --startLevel) { @@ -343,5 +316,30 @@ void TrackerTraitsGPU::findRoadsHybrid(const int iteration) } }; +template +int TrackerTraitsGPU::getTFNumberOfClusters() const +{ + return mTimeFrameGPU->getNumberOfClusters(); +} + +template +int TrackerTraitsGPU::getTFNumberOfTracklets() const +{ + return std::accumulate(mTimeFrameGPU->getNTracklets().begin(), mTimeFrameGPU->getNTracklets().end(), 0); +} + +template +int TrackerTraitsGPU::getTFNumberOfCells() const +{ + return mTimeFrameGPU->getNumberOfCells(); +} + +template +void TrackerTraitsGPU::setBz(float bz) +{ + mBz = bz; + mTimeFrameGPU->setBz(bz); +} + template class TrackerTraitsGPU<7>; } // namespace o2::its diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h index b63e61f0b76f4..8584e233b5a97 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h @@ -57,11 +57,7 @@ class Tracker void adoptTimeFrame(TimeFrame& tf); - void clustersToTracks( - LogFunc = [](std::string s) { std::cout << s << std::endl; }, LogFunc = [](std::string s) { std::cerr << s << std::endl; }); - void clustersToTracksHybrid( - LogFunc = [](std::string s) { std::cout << s << std::endl; }, LogFunc = [](std::string s) { std::cerr << s << std::endl; }); - std::vector& getTracks(); + void clustersToTracks(LogFunc = [](std::string s) { std::cout << s << std::endl; }, LogFunc = [](std::string s) { std::cerr << s << std::endl; }); void setParameters(const std::vector&); std::vector& getParameters() { return mTrkParams; } @@ -74,14 +70,6 @@ class Tracker void printSummary() const; private: - enum TrackerType : uint8_t { CPU = 0, - Hybrid, - NSize }; - template - void clusterToTracksImpl(LogFunc, LogFunc); - static constexpr const char* sTrackerNames[TrackerType::NSize] = {"CPU", "Hybrid"}; - - // CPU void initialiseTimeFrame(int& iteration); void computeTracklets(int& iteration, int& iROFslice, int& iVertex); void computeCells(int& iteration); @@ -89,13 +77,6 @@ class Tracker void findRoads(int& iteration); void findShortPrimaries(); void extendTracks(int& iteration); - // Hyrbid - void initialiseTimeFrameHybrid(int& iteration); - void computeTrackletsHybrid(int& iteration, int& iROFslice, int& iVertex); - void computeCellsHybrid(int& iteration); - void findCellsNeighboursHybrid(int& iteration); - void findRoadsHybrid(int& iteration); - void findTracksHybrid(int& iteration); // MC interaction void computeRoadsMClabels(); diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h index f8d593fbf2480..6b514c6e8d000 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h @@ -16,23 +16,12 @@ #ifndef TRACKINGITSU_INCLUDE_TRACKERTRAITS_H_ #define TRACKINGITSU_INCLUDE_TRACKERTRAITS_H_ -#include -#include #include -#include -#include -#include -#include -#include -#include #include "DetectorsBase/Propagator.h" -#include "DetectorsBase/MatLayerCylSet.h" #include "ITStracking/Configuration.h" -#include "ITStracking/Definitions.h" #include "ITStracking/MathUtils.h" #include "ITStracking/TimeFrame.h" -#include "ITStracking/Road.h" // #define OPTIMISATION_OUTPUT @@ -51,32 +40,30 @@ class TrackerTraits public: virtual ~TrackerTraits() = default; virtual void adoptTimeFrame(TimeFrame* tf); - virtual void initialiseTimeFrame(const int iteration); + virtual void computeLayerTracklets(const int iteration, int iROFslice, int iVertex); virtual void computeLayerCells(const int iteration); virtual void findCellsNeighbours(const int iteration); virtual void findRoads(const int iteration); + + virtual bool supportsExtendTracks() const noexcept { return true; } virtual void extendTracks(const int iteration); + virtual bool supportsFindShortPrimaries() const noexcept { return true; } virtual void findShortPrimaries(); - virtual void initialiseTimeFrameHybrid(const int iteration) { LOGP(fatal, "initialiseTimeFrameHybrid: this method should never be called with CPU traits"); } - virtual void computeTrackletsHybrid(const int iteration, int, int) { LOGP(fatal, "computeTrackletsHybrid: this method should never be called with CPU traits"); } - virtual void computeCellsHybrid(const int iteration) { LOGP(fatal, "computeCellsHybrid: this method should never be called with CPU traits"); } - virtual void findCellsNeighboursHybrid(const int iteration) { LOGP(fatal, "findCellsNeighboursHybrid: this method should never be called with CPU traits"); } - virtual void findRoadsHybrid(const int iteration) { LOGP(fatal, "findRoadsHybrid: this method should never be called with CPU traits"); } - - virtual void setBz(float bz); virtual bool trackFollowing(TrackITSExt* track, int rof, bool outward, const int iteration); virtual void processNeighbours(int iLayer, int iLevel, const std::vector& currentCellSeed, const std::vector& currentCellId, std::vector& updatedCellSeed, std::vector& updatedCellId); void UpdateTrackingParameters(const std::vector& trkPars); TimeFrame* getTimeFrame() { return mTimeFrame; } - void setIsGPU(const unsigned char isgpu) { mIsGPU = isgpu; }; + virtual void setBz(float bz); float getBz() const; void setCorrType(const o2::base::PropagatorImpl::MatCorrType type) { mCorrType = type; } bool isMatLUT() const; + virtual const char* getName() const noexcept { return "CPU"; } + virtual bool isGPU() const noexcept { return false; } // Others GPUhd() static consteval int4 getEmptyBinsRect() { return int4{0, 0, 0, 0}; } @@ -110,13 +97,11 @@ class TrackerTraits o2::gpu::GPUChainITS* mChain = nullptr; TimeFrame* mTimeFrame; std::vector mTrkParams; - bool mIsGPU = false; }; inline void TrackerTraits::initialiseTimeFrame(const int iteration) { mTimeFrame->initialise(iteration, mTrkParams[iteration], mTrkParams[iteration].NLayers); - setIsGPU(false); } inline float TrackerTraits::getBz() const diff --git a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx index 12357de1af5fc..5fa249c2d2dca 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx @@ -42,25 +42,8 @@ Tracker::Tracker(o2::its::TrackerTraits* traits) : mTraits(traits) mTrkParams.resize(1); } -template -void Tracker::clusterToTracksImpl(LogFunc logger, LogFunc error) +void Tracker::clustersToTracks(LogFunc logger, LogFunc error) { - constexpr auto pickFunc = [](F1&& cpu, F2&& hybrid) { - if constexpr (T == TrackerType::CPU) { - return std::forward(cpu); - } else if constexpr (T == TrackerType::Hybrid) { - return std::forward(hybrid); - } else { - static_assert(false, "Wrong TrackerType!"); - } - }; - constexpr auto initialiseTimeFrame = pickFunc(&Tracker::initialiseTimeFrame, &Tracker::initialiseTimeFrameHybrid); - constexpr auto computeTracklets = pickFunc(&Tracker::computeTracklets, &Tracker::computeTrackletsHybrid); - constexpr auto computeCells = pickFunc(&Tracker::computeCells, &Tracker::computeCellsHybrid); - constexpr auto findCellsNeighbours = pickFunc(&Tracker::findCellsNeighbours, &Tracker::findCellsNeighboursHybrid); - constexpr auto findRoads = pickFunc(&Tracker::findRoads, &Tracker::findRoadsHybrid); - constexpr auto extendTracks = pickFunc(&Tracker::extendTracks, nullptr); - constexpr auto findShortPrimaries = pickFunc(&Tracker::findShortPrimaries, nullptr); LogFunc evalLog = [](const std::string&) {}; double total{0}; @@ -81,12 +64,12 @@ void Tracker::clusterToTracksImpl(LogFunc logger, LogFunc error) int nTracklets{0}, nCells{0}, nNeighbours{0}, nTracks{-static_cast(mTimeFrame->getNumberOfTracks())}; int nROFsIterations = mTrkParams[iteration].nROFsPerIterations > 0 ? mTimeFrame->getNrof() / mTrkParams[iteration].nROFsPerIterations + bool(mTimeFrame->getNrof() % mTrkParams[iteration].nROFsPerIterations) : 1; int iVertex{std::min(maxNvertices, 0)}; - logger(std::format("==== ITS {} Tracking iteration {} summary ====", sTrackerNames[T], iteration)); + logger(std::format("==== ITS {} Tracking iteration {} summary ====", mTraits->getName(), iteration)); - total += evaluateTask(initialiseTimeFrame, "Timeframe initialisation", logger, iteration); + total += evaluateTask(&Tracker::initialiseTimeFrame, "Timeframe initialisation", logger, iteration); do { for (int iROFs{0}; iROFs < nROFsIterations; ++iROFs) { - timeTracklets += evaluateTask(computeTracklets, "Tracklet finding", evalLog, iteration, iROFs, iVertex); + timeTracklets += evaluateTask(&Tracker::computeTracklets, "Tracklet finding", evalLog, iteration, iROFs, iVertex); nTracklets += mTraits->getTFNumberOfTracklets(); if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { mTimeFrame->printSliceInfo(iROFs, mTrkParams[iteration].nROFsPerIterations); @@ -104,7 +87,7 @@ void Tracker::clusterToTracksImpl(LogFunc logger, LogFunc error) break; } - timeCells += evaluateTask(computeCells, "Cell finding", evalLog, iteration); + timeCells += evaluateTask(&Tracker::computeCells, "Cell finding", evalLog, iteration); nCells += mTraits->getTFNumberOfCells(); if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { mTimeFrame->printSliceInfo(iROFs, mTrkParams[iteration].nROFsPerIterations); @@ -122,25 +105,22 @@ void Tracker::clusterToTracksImpl(LogFunc logger, LogFunc error) break; } - timeNeighbours += evaluateTask(findCellsNeighbours, "Neighbour finding", evalLog, iteration); + timeNeighbours += evaluateTask(&Tracker::findCellsNeighbours, "Neighbour finding", evalLog, iteration); nNeighbours += mTimeFrame->getNumberOfNeighbours(); - timeRoads += evaluateTask(findRoads, "Road finding", evalLog, iteration); + timeRoads += evaluateTask(&Tracker::findRoads, "Road finding", evalLog, iteration); } iVertex++; } while (iVertex < maxNvertices && !dropTF); - logger(std::format(" - {} tracklet finding: {} tracklets found in {:.2f} ms", sTrackerNames[T], nTracklets, timeTracklets)); - logger(std::format(" - {} cell finding: {} cells found in {:.2f} ms", sTrackerNames[T], nCells, timeCells)); - logger(std::format(" - {} neighbours finding: {} neighbours found in {:.2f} ms", sTrackerNames[T], nNeighbours, timeNeighbours)); - logger(std::format(" - {} track finding: {} tracks found in {:.2f} ms", sTrackerNames[T], nTracks + mTimeFrame->getNumberOfTracks(), timeRoads)); + logger(std::format(" - Tracklet finding: {} tracklets found in {:.2f} ms", nTracklets, timeTracklets)); + logger(std::format(" - Cell finding: {} cells found in {:.2f} ms", nCells, timeCells)); + logger(std::format(" - Meighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours)); + logger(std::format(" - Track finding: {} tracks found in {:.2f} ms", nTracks + mTimeFrame->getNumberOfTracks(), timeRoads)); total += timeTracklets + timeCells + timeNeighbours + timeRoads; - if constexpr (extendTracks != nullptr) { - if (mTrkParams[iteration].UseTrackFollower && !dropTF) { - int nExtendedTracks{-mTimeFrame->mNExtendedTracks}, nExtendedClusters{-mTimeFrame->mNExtendedUsedClusters}; - auto timeExtending = evaluateTask( - extendTracks, "Extending tracks", [](const std::string&) {}, iteration); - total += timeExtending; - logger(std::format(" - Extending Tracks: {} extended tracks using {} clusters found in {:.2f} ms", nExtendedTracks + mTimeFrame->mNExtendedTracks, nExtendedClusters + mTimeFrame->mNExtendedUsedClusters, timeExtending)); - } + if (mTraits->supportsExtendTracks() && mTrkParams[iteration].UseTrackFollower && !dropTF) { + int nExtendedTracks{-mTimeFrame->mNExtendedTracks}, nExtendedClusters{-mTimeFrame->mNExtendedUsedClusters}; + auto timeExtending = evaluateTask(&Tracker::extendTracks, "Extending tracks", [](const std::string&) {}, iteration); + total += timeExtending; + logger(std::format(" - Extending Tracks: {} extended tracks using {} clusters found in {:.2f} ms", nExtendedTracks + mTimeFrame->mNExtendedTracks, nExtendedClusters + mTimeFrame->mNExtendedUsedClusters, timeExtending)); } if (dropTF) { error("...Dropping Timeframe..."); @@ -150,13 +130,11 @@ void Tracker::clusterToTracksImpl(LogFunc logger, LogFunc error) } } - if constexpr (findShortPrimaries != nullptr) { - if (mTrkParams[0].FindShortTracks) { - auto nTracksB = mTimeFrame->getNumberOfTracks(); - total += evaluateTask(findShortPrimaries, "Short primaries finding", logger); - auto nTracksA = mTimeFrame->getNumberOfTracks(); - logger(std::format(" `-> found {} additional tracks", nTracksA - nTracksB)); - } + if (mTraits->supportsFindShortPrimaries() && mTrkParams[0].FindShortTracks) { + auto nTracksB = mTimeFrame->getNumberOfTracks(); + total += evaluateTask(&Tracker::findShortPrimaries, "Short primaries finding", logger); + auto nTracksA = mTimeFrame->getNumberOfTracks(); + logger(std::format(" `-> found {} additional tracks", nTracksA - nTracksB)); } if constexpr (constants::DoTimeBenchmarks) { @@ -170,16 +148,6 @@ void Tracker::clusterToTracksImpl(LogFunc logger, LogFunc error) ++mTimeFrameCounter; } -void Tracker::clustersToTracks(LogFunc logger, LogFunc error) -{ - clusterToTracksImpl(logger, error); -} - -void Tracker::clustersToTracksHybrid(LogFunc logger, LogFunc error) -{ - clusterToTracksImpl(logger, error); -} - void Tracker::initialiseTimeFrame(int& iteration) { mTraits->initialiseTimeFrame(iteration); @@ -215,31 +183,6 @@ void Tracker::findShortPrimaries() mTraits->findShortPrimaries(); } -void Tracker::initialiseTimeFrameHybrid(int& iteration) -{ - mTraits->initialiseTimeFrameHybrid(iteration); -} - -void Tracker::computeTrackletsHybrid(int& iteration, int& iROFslice, int& iVertex) -{ - mTraits->computeTrackletsHybrid(iteration, iROFslice, iVertex); // placeholder for the proper ROF/vertex slicing -} - -void Tracker::computeCellsHybrid(int& iteration) -{ - mTraits->computeCellsHybrid(iteration); -} - -void Tracker::findCellsNeighboursHybrid(int& iteration) -{ - mTraits->findCellsNeighboursHybrid(iteration); -} - -void Tracker::findRoadsHybrid(int& iteration) -{ - mTraits->findRoadsHybrid(iteration); -} - void Tracker::computeRoadsMClabels() { /// Moore's Voting Algorithm diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index 8570d134fe30d..f0dad2722a301 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -310,18 +310,10 @@ void ITSTrackingInterface::run(framework::ProcessingContext& pc) mTimeFrame->setMultiplicityCutMask(processingMask); mTimeFrame->setROFMask(processUPCMask); // Run CA tracker - if constexpr (isGPU) { - if (mMode == o2::its::TrackingMode::Async && o2::its::TrackerParamConfig::Instance().fataliseUponFailure) { - mTracker->clustersToTracksHybrid(logger, fatalLogger); - } else { - mTracker->clustersToTracksHybrid(logger, errorLogger); - } + if (mMode == o2::its::TrackingMode::Async && o2::its::TrackerParamConfig::Instance().fataliseUponFailure) { + mTracker->clustersToTracks(logger, fatalLogger); } else { - if (mMode == o2::its::TrackingMode::Async && o2::its::TrackerParamConfig::Instance().fataliseUponFailure) { - mTracker->clustersToTracks(logger, fatalLogger); - } else { - mTracker->clustersToTracks(logger, errorLogger); - } + mTracker->clustersToTracks(logger, errorLogger); } size_t totTracks{mTimeFrame->getNumberOfTracks()}, totClusIDs{mTimeFrame->getNumberOfUsedClusters()}; allTracks.reserve(totTracks); diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index 95db55041184f..fc4d838abcea3 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -68,7 +68,7 @@ endfunction() STRING(REGEX REPLACE "\-std=[^ ]*" "" O2_GPU_CMAKE_CXX_FLAGS_NOSTD "${CMAKE_CXX_FLAGS}") # ---------------------------------- Fast Math / Deterministic Mode ---------------------------------- -# set(GPUCA_DETERMINISTIC_MODE WHOLEO2) # Override +set(GPUCA_DETERMINISTIC_MODE WHOLEO2) # Override set(GPUCA_DETERMINISTIC_MODE_MAP_OFF 0) set(GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH 1) # No -ffast-math and similar compile flags for GPU folder set(GPUCA_DETERMINISTIC_MODE_MAP_OPTO2 2) # In addition, -O2 optimization on host for GPU folder From 702278a12c20a3ab7616bd94d6551f3f83f10adc Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Wed, 23 Apr 2025 18:48:31 +0200 Subject: [PATCH 5/6] ITS: add total/avg summary + remove some headers --- .../GPU/ITStrackingGPU/TimeFrameGPU.h | 14 ++++---------- .../tracking/include/ITStracking/Tracker.h | 1 + Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx | 7 ++++--- .../ITSMFT/ITS/tracking/src/TrackerTraits.cxx | 19 +++++++++++-------- 4 files changed, 20 insertions(+), 21 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h index 29d2404e98681..88666cdfdb7fb 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h @@ -15,18 +15,13 @@ #include "ITStracking/TimeFrame.h" #include "ITStracking/Configuration.h" - -#include "ITStrackingGPU/ClusterLinesGPU.h" #include "ITStrackingGPU/Utils.h" #include -namespace o2 -{ -namespace its -{ -namespace gpu +namespace o2::its::gpu { + class Stream; class DefaultGPUAllocator : public ExternalAllocator @@ -228,7 +223,6 @@ inline int TimeFrameGPU::getNumberOfCells() const return std::accumulate(mNCells.begin(), mNCells.end(), 0); } -} // namespace gpu -} // namespace its -} // namespace o2 +} // namespace o2::its::gpu + #endif diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h index 8584e233b5a97..8f0a471b40c59 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h @@ -94,6 +94,7 @@ class Tracker unsigned int mNumberOfDroppedTFs{0}; unsigned int mTimeFrameCounter{0}; + double mTotalTime{0}; }; inline void Tracker::setParameters(const std::vector& trkPars) diff --git a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx index 5fa249c2d2dca..c23ba0576c625 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx @@ -113,7 +113,7 @@ void Tracker::clustersToTracks(LogFunc logger, LogFunc error) } while (iVertex < maxNvertices && !dropTF); logger(std::format(" - Tracklet finding: {} tracklets found in {:.2f} ms", nTracklets, timeTracklets)); logger(std::format(" - Cell finding: {} cells found in {:.2f} ms", nCells, timeCells)); - logger(std::format(" - Meighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours)); + logger(std::format(" - Neighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours)); logger(std::format(" - Track finding: {} tracks found in {:.2f} ms", nTracks + mTimeFrame->getNumberOfTracks(), timeRoads)); total += timeTracklets + timeCells + timeNeighbours + timeRoads; if (mTraits->supportsExtendTracks() && mTrkParams[iteration].UseTrackFollower && !dropTF) { @@ -138,7 +138,7 @@ void Tracker::clustersToTracks(LogFunc logger, LogFunc error) } if constexpr (constants::DoTimeBenchmarks) { - logger(std::format("=== TimeFrame {} processing completed in: {:.2f} ms using {} threads ===", mTimeFrameCounter, total, mTraits->getNThreads())); + logger(std::format("=== TimeFrame {} processing completed in: {:.2f} ms using {} thread(s) ===", mTimeFrameCounter, total, mTraits->getNThreads())); } if (mTimeFrame->hasMCinformation()) { @@ -146,6 +146,7 @@ void Tracker::clustersToTracks(LogFunc logger, LogFunc error) } rectifyClusterIndices(); ++mTimeFrameCounter; + mTotalTime += total; } void Tracker::initialiseTimeFrame(int& iteration) @@ -459,7 +460,7 @@ int Tracker::getNThreads() const void Tracker::printSummary() const { - LOGP(info, "Tracker summary: Processed {} TFs (dropped {})", mTimeFrameCounter, mNumberOfDroppedTFs); + LOGP(info, "Tracker summary: Processed {} TFs (dropped {}) in TOT={:.2f} s, AVG/TF={:.2f} s", mTimeFrameCounter, mNumberOfDroppedTFs, mTotalTime * 1.e-3, mTotalTime * 1.e-3 / ((mTimeFrameCounter > 0) ? (double)mTimeFrameCounter : -1.0)); } } // namespace its diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx index c2ee3d3ed6111..987e8e3128fb4 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx @@ -19,7 +19,9 @@ #include #include -#include +#ifdef OPTIMISATION_OUTPUT +#include +#endif #include "CommonConstants/MathConstants.h" #include "DetectorsBase/Propagator.h" @@ -38,7 +40,7 @@ using o2::base::PropagatorF; namespace { -float Sq(float q) +inline float Sq(float q) { return q * q; } @@ -57,7 +59,7 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iROFslice, in #ifdef OPTIMISATION_OUTPUT static int iter{0}; - std::ofstream off(fmt::format("tracklets{}.txt", iter++)); + std::ofstream off(std::format("tracklets{}.txt", iter++)); #endif for (int iLayer = 0; iLayer < mTrkParams[iteration].TrackletsPerRoad(); ++iLayer) { @@ -173,7 +175,7 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iROFslice, in break; } } - off << fmt::format("{}\t{:d}\t{}\t{}\t{}\t{}", iLayer, label.isValid(), (tanLambda * (nextCluster.radius - currentCluster.radius) + currentCluster.zCoordinate - nextCluster.zCoordinate) / sigmaZ, tanLambda, resolution, sigmaZ) << std::endl; + off << std::format("{}\t{:d}\t{}\t{}\t{}\t{}", iLayer, label.isValid(), (tanLambda * (nextCluster.radius - currentCluster.radius) + currentCluster.zCoordinate - nextCluster.zCoordinate) / sigmaZ, tanLambda, resolution, sigmaZ) << std::endl; #endif if (deltaZ / sigmaZ < mTrkParams[iteration].NSigmaCut && @@ -270,7 +272,7 @@ void TrackerTraits::computeLayerCells(const int iteration) { #ifdef OPTIMISATION_OUTPUT static int iter{0}; - std::ofstream off(fmt::format("cells{}.txt", iter++)); + std::ofstream off(std::format("cells{}.txt", iter++)); #endif for (int iLayer = 0; iLayer < mTrkParams[iteration].CellsPerRoad(); ++iLayer) { @@ -318,7 +320,7 @@ void TrackerTraits::computeLayerCells(const int iteration) #ifdef OPTIMISATION_OUTPUT bool good{tf->getTrackletsLabel(iLayer)[iTracklet] == tf->getTrackletsLabel(iLayer + 1)[iNextTracklet]}; float signedDelta{currentTracklet.tanLambda - nextTracklet.tanLambda}; - off << fmt::format("{}\t{:d}\t{}\t{}\t{}\t{}", iLayer, good, signedDelta, signedDelta / (mTrkParams[iteration].CellDeltaTanLambdaSigma), tanLambda, resolution) << std::endl; + off << std::format("{}\t{:d}\t{}\t{}\t{}\t{}", iLayer, good, signedDelta, signedDelta / (mTrkParams[iteration].CellDeltaTanLambdaSigma), tanLambda, resolution) << std::endl; #endif if (deltaTanLambda / mTrkParams[iteration].CellDeltaTanLambdaSigma < mTrkParams[iteration].NSigmaCut) { @@ -402,7 +404,7 @@ void TrackerTraits::computeLayerCells(const int iteration) void TrackerTraits::findCellsNeighbours(const int iteration) { #ifdef OPTIMISATION_OUTPUT - std::ofstream off(fmt::format("cellneighs{}.txt", iteration)); + std::ofstream off(std::format("cellneighs{}.txt", iteration)); #endif for (int iLayer{0}; iLayer < mTrkParams[iteration].CellsPerRoad() - 1; ++iLayer) { const int nextLayerCellsNum{static_cast(mTimeFrame->getCells()[iLayer + 1].size())}; @@ -439,7 +441,7 @@ void TrackerTraits::findCellsNeighbours(const int iteration) #ifdef OPTIMISATION_OUTPUT bool good{mTimeFrame->getCellsLabel(iLayer)[iCell] == mTimeFrame->getCellsLabel(iLayer + 1)[iNextCell]}; - off << fmt::format("{}\t{:d}\t{}", iLayer, good, chi2) << std::endl; + off << std::format("{}\t{:d}\t{}", iLayer, good, chi2) << std::endl; #endif if (chi2 > mTrkParams[0].MaxChi2ClusterAttachment) { @@ -469,6 +471,7 @@ void TrackerTraits::findCellsNeighbours(const int iteration) void TrackerTraits::processNeighbours(int iLayer, int iLevel, const std::vector& currentCellSeed, const std::vector& currentCellId, std::vector& updatedCellSeeds, std::vector& updatedCellsIds) { + bool print = iLayer == 3 && iLevel == 2; if (iLevel < 2 || iLayer < 1) { std::cout << "Error: layer " << iLayer << " or level " << iLevel << " cannot be processed by processNeighbours" << std::endl; exit(1); From bafda50d64201388d1e23052ba77df6cdc50b376 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Wed, 23 Apr 2025 19:06:28 +0200 Subject: [PATCH 6/6] Make ITS GPU tracking deterministic again --- .../GPU/ITStrackingGPU/TrackingKernels.h | 3 +- .../tracking/GPU/cuda/TrackerTraitsGPU.cxx | 6 +--- .../ITS/tracking/GPU/cuda/TrackingKernels.cu | 33 +++++++------------ 3 files changed, 14 insertions(+), 28 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h index 720867ddaba29..21b14fd9292d2 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h @@ -176,8 +176,7 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, const int nBlocks, const int nThreads); -int filterCellNeighboursHandler(std::vector&, - gpuPair*, +int filterCellNeighboursHandler(gpuPair*, int*, unsigned int); diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx index b087ab4084daf..f3b62ec8a6108 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx @@ -200,8 +200,7 @@ void TrackerTraitsGPU::findCellsNeighbours(const int iteration) conf.nBlocks, conf.nThreads); - filterCellNeighboursHandler(mTimeFrameGPU->getCellsNeighbours()[iLayer], - mTimeFrameGPU->getDeviceNeighbourPairs(iLayer), + filterCellNeighboursHandler(mTimeFrameGPU->getDeviceNeighbourPairs(iLayer), mTimeFrameGPU->getDeviceNeighbours(iLayer), nNeigh); } @@ -220,9 +219,6 @@ void TrackerTraitsGPU::findRoads(const int iteration) if ((mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) { continue; } - std::vector lastCellId, updatedCellId; - std::vector lastCellSeed, updatedCellSeed; - processNeighboursHandler(startLayer, startLevel, mTimeFrameGPU->getDeviceArrayCells(), diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index b1cd6725f3003..bb39e9e70341b 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -181,6 +181,11 @@ struct equal_tracklets { GPUhd() bool operator()(const Tracklet& a, const Tracklet& b) { return a.firstClusterIndex == b.firstClusterIndex && a.secondClusterIndex == b.secondClusterIndex; } }; +template +struct sort_by_second { + GPUhd() bool operator()(const gpuPair& a, const gpuPair& b) const { return a.second < b.second; } +}; + template struct pair_to_first { GPUhd() int operator()(const gpuPair& a) const @@ -1110,32 +1115,18 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, GPUChkErrS(cudaDeviceSynchronize()); } -int filterCellNeighboursHandler(std::vector& neighHost, // TODO: eventually remove this! - gpuPair* cellNeighbourPairs, +int filterCellNeighboursHandler(gpuPair* cellNeighbourPairs, int* cellNeighbours, unsigned int nNeigh) { thrust::device_ptr> neighVectorPairs(cellNeighbourPairs); thrust::device_ptr validNeighs(cellNeighbours); - thrust::device_vector keys(nNeigh); // TODO: externally allocate. - thrust::device_vector vals(nNeigh); // TODO: externally allocate. - thrust::copy(thrust::make_transform_iterator(neighVectorPairs, gpu::pair_to_second()), - thrust::make_transform_iterator(neighVectorPairs + nNeigh, gpu::pair_to_second()), - keys.begin()); - thrust::sequence(vals.begin(), vals.end()); - thrust::sort_by_key(keys.begin(), keys.end(), vals.begin()); - thrust::device_vector> sortedNeigh(nNeigh); - thrust::copy(thrust::make_permutation_iterator(neighVectorPairs, vals.begin()), - thrust::make_permutation_iterator(neighVectorPairs, vals.end()), - sortedNeigh.begin()); - GPUChkErrS(cudaDeviceSynchronize()); - auto trimmedBegin = thrust::find_if(sortedNeigh.begin(), sortedNeigh.end(), gpu::is_valid_pair()); // trim leading -1s - auto trimmedSize = sortedNeigh.end() - trimmedBegin; - neighHost.resize(trimmedSize); - thrust::transform(trimmedBegin, sortedNeigh.end(), validNeighs, gpu::pair_to_first()); - GPUChkErrS(cudaMemcpy(neighHost.data(), cellNeighbours, trimmedSize * sizeof(int), cudaMemcpyDeviceToHost)); + auto updatedEnd = thrust::remove_if(neighVectorPairs, neighVectorPairs + nNeigh, gpu::is_invalid_pair()); + size_t newSize = updatedEnd - neighVectorPairs; + thrust::stable_sort(neighVectorPairs, neighVectorPairs + newSize, gpu::sort_by_second()); + thrust::transform(neighVectorPairs, neighVectorPairs + newSize, validNeighs, gpu::pair_to_first()); - return trimmedSize; + return newSize; } template @@ -1267,7 +1258,7 @@ void processNeighboursHandler(const int startLayer, auto end = thrust::copy_if(updatedCellSeed.begin(), updatedCellSeed.end(), outSeeds.begin(), gpu::seed_selector(1.e3, maxChi2NDF * ((startLevel + 2) * 2 - 5))); auto s{end - outSeeds.begin()}; std::vector outSeedsHost(s); - thrust::copy(updatedCellSeed.begin(), updatedCellSeed.begin() + s, outSeedsHost.begin()); + thrust::copy(outSeeds.begin(), outSeeds.begin() + s, outSeedsHost.begin()); seedsHost.insert(seedsHost.end(), outSeedsHost.begin(), outSeedsHost.end()); }