From 2e8c70a4499a2e87c8e0f65d25a355baa82f3bb9 Mon Sep 17 00:00:00 2001 From: Bradley Lowekamp Date: Wed, 3 Dec 2025 06:53:54 -0500 Subject: [PATCH] PERF: Use tuned ImageRangeRegion copy in CastImageFilter Based on performance testing, across converting between Image of Vector and VectorImage, this loop was the best performing. Key features of the improved performane: - Uses NumericTraits::GetLength over Image::GetNumberOfComponents, the former may be constant, while the latter is virtual - Uses const InputPixelType & inputPixel - OutputPixelType value{ outputIt.Get() } initialized to a reference in the output image bufffer and does not perform memory allocation for variable length vectors. --- .../include/itkCastImageFilter.hxx | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/Modules/Filtering/ImageFilterBase/include/itkCastImageFilter.hxx b/Modules/Filtering/ImageFilterBase/include/itkCastImageFilter.hxx index 16753ef1a3d..30106a29a57 100644 --- a/Modules/Filtering/ImageFilterBase/include/itkCastImageFilter.hxx +++ b/Modules/Filtering/ImageFilterBase/include/itkCastImageFilter.hxx @@ -20,6 +20,7 @@ #include "itkProgressReporter.h" #include "itkImageAlgorithm.h" +#include "itkImageRegionRange.h" namespace itk { @@ -139,29 +140,30 @@ CastImageFilter::DynamicThreadedGenerateDataDispatche this->CallCopyOutputRegionToInputRegion(inputRegionForThread, outputRegionForThread); - const unsigned int componentsPerPixel = outputPtr->GetNumberOfComponentsPerPixel(); + ImageRegionRange inputRange(*inputPtr, inputRegionForThread); + ImageRegionRange outputRange(*outputPtr, outputRegionForThread); - // Define the iterators - ImageScanlineConstIterator inputIt(inputPtr, inputRegionForThread); - ImageScanlineIterator outputIt(outputPtr, outputRegionForThread); + auto inputIt = inputRange.begin(); + auto outputIt = outputRange.begin(); + const auto inputEnd = inputRange.end(); - while (!inputIt.IsAtEnd()) + // Note: This loop has been timed for performance with conversions between image of vectors and VectorImages and other + // combinations. The following was evaluated to be the best performance usage of iterators. Important considerations: + // - Usage of NumericTraits::GetLength() is sometimes consant vs virutal method GetNumberOfComponentsPerPixel() + // - The construction of inputPixel and outputPixel for VectorImages both reference the internal buffer and don't + // require memory allocations. + const unsigned int componentsPerPixel = itk::NumericTraits::GetLength(*outputIt); + while (inputIt != inputEnd) { - while (!inputIt.IsAtEndOfLine()) + const InputPixelType & inputPixel = *inputIt; + OutputPixelType outputPixel{ *outputIt }; + for (unsigned int k = 0; k < componentsPerPixel; ++k) { - const InputPixelType & inputPixel = inputIt.Get(); - OutputPixelType value{ outputIt.Get() }; - for (unsigned int k = 0; k < componentsPerPixel; ++k) - { - value[k] = static_cast(inputPixel[k]); - } - outputIt.Set(value); - - ++inputIt; - ++outputIt; + outputPixel[k] = static_cast(inputPixel[k]); } - inputIt.NextLine(); - outputIt.NextLine(); + *outputIt = outputPixel; + ++inputIt; + ++outputIt; } }