From 291f85744ae07c5cf1c258a95e6dc01705b10f47 Mon Sep 17 00:00:00 2001 From: rameel Date: Mon, 14 Jul 2025 04:17:13 +0500 Subject: [PATCH 01/13] Simplify backslash handling --- Ramstack.Globbing/Internal/PathHelper.cs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Ramstack.Globbing/Internal/PathHelper.cs b/Ramstack.Globbing/Internal/PathHelper.cs index 744faec..f411115 100644 --- a/Ramstack.Globbing/Internal/PathHelper.cs +++ b/Ramstack.Globbing/Internal/PathHelper.cs @@ -198,10 +198,10 @@ static void ConvertPathToPosixStyleImpl(ref char p, nint length) /// /// A 256-bit bitmask for escaping characters. /// - private static Vector256 CreateAllowEscaping256Bitmask(MatchFlags flags) + private static Vector256 CreateBackslash256Bitmask(MatchFlags flags) { var mask = Vector256.Zero; - if (flags != MatchFlags.Windows) + if (flags == MatchFlags.Windows) mask = Vector256.AllBitsSet; return mask; @@ -214,10 +214,10 @@ private static Vector256 CreateAllowEscaping256Bitmask(MatchFlags flags) /// /// A 128-bit bitmask for escaping characters. /// - private static Vector128 CreateAllowEscaping128Bitmask(MatchFlags flags) + private static Vector128 CreateBackslash128Bitmask(MatchFlags flags) { var mask = Vector128.Zero; - if (flags != MatchFlags.Windows) + if (flags == MatchFlags.Windows) mask = Vector128.AllBitsSet; return mask; @@ -340,14 +340,14 @@ public PathSegmentIterator() => if (Avx2.IsSupported && (int)_position + Vector256.Count <= length) { var chunk = LoadVector256(ref source, _position); - var allowEscapingMask = CreateAllowEscaping256Bitmask(flags); + var backslashMask = CreateBackslash256Bitmask(flags); var slash = Vector256.Create((ushort)'/'); var backslash = Vector256.Create((ushort)'\\'); var comparison = Avx2.Or( Avx2.CompareEqual(chunk, slash), - Avx2.AndNot( - allowEscapingMask, + Avx2.And( + backslashMask, Avx2.CompareEqual(chunk, backslash))); // @@ -367,14 +367,14 @@ public PathSegmentIterator() => else if (Sse2.IsSupported && !Avx2.IsSupported && (int)_position + Vector128.Count <= length) { var chunk = LoadVector128(ref source, _position); - var allowEscapingMask = CreateAllowEscaping128Bitmask(flags); + var backslashMask = CreateBackslash128Bitmask(flags); var slash = Vector128.Create((ushort)'/'); var backslash = Vector128.Create((ushort)'\\'); var comparison = Sse2.Or( Sse2.CompareEqual(chunk, slash), - Sse2.AndNot( - allowEscapingMask, + Sse2.And( + backslashMask, Sse2.CompareEqual(chunk, backslash))); // From 3f663fa3550809b28147fb982a7d83d1c22242f0 Mon Sep 17 00:00:00 2001 From: rameel Date: Mon, 14 Jul 2025 04:20:45 +0500 Subject: [PATCH 02/13] Add AdvSimd support for ARM platforms --- .github/workflows/test.yml | 18 ++++-- .../Ramstack.Globbing.Tests.csproj | 2 +- .../SimdConfigurationTests.cs | 23 +++++-- Ramstack.Globbing/Internal/PathHelper.cs | 64 ++++++++++++++++++- Ramstack.Globbing/Ramstack.Globbing.csproj | 4 +- 5 files changed, 96 insertions(+), 15 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6b8dcdc..d7e426a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,7 +11,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, windows-latest] + os: [ubuntu-latest, ubuntu-24.04-arm, windows-latest] steps: - name: Setup .NET @@ -38,24 +38,28 @@ jobs: - name: Test (Release) run: dotnet test -c Release --no-build - - name: Test (Debug, Avx2=Disabled) + - name: Test (Debug, Avx2=0) env: COMPlus_EnableAVX2: "0" run: dotnet test -c Debug --no-build - - name: Test (Release, Avx2=Disabled) + - name: Test (Release, Avx2=0) env: COMPlus_EnableAVX2: "0" run: dotnet test -c Release --no-build - - name: Test (Debug, Avx2=Disabled, Sse2=Disabled) + - name: Test (Debug, Sse2=0, Sse41=0, Avx2=0, AdvSimd=0) env: - COMPlus_EnableAVX2: "0" COMPlus_EnableSSE2: "0" + COMPlus_EnableSSE41: "0" + COMPlus_EnableAVX2: "0" + COMPlus_EnableAdvSimd: "0" run: dotnet test -c Debug --no-build - - name: Test (Release, Avx2=Disabled, Sse2=Disabled) + - name: Test (Release, Sse2=0, Sse41=0, Avx2=0, AdvSimd=0) env: - COMPlus_EnableAVX2: "0" COMPlus_EnableSSE2: "0" + COMPlus_EnableSSE41: "0" + COMPlus_EnableAVX2: "0" + COMPlus_EnableAdvSimd: "0" run: dotnet test -c Release --no-build diff --git a/Ramstack.Globbing.Tests/Ramstack.Globbing.Tests.csproj b/Ramstack.Globbing.Tests/Ramstack.Globbing.Tests.csproj index 2a75b0b..2f124f9 100644 --- a/Ramstack.Globbing.Tests/Ramstack.Globbing.Tests.csproj +++ b/Ramstack.Globbing.Tests/Ramstack.Globbing.Tests.csproj @@ -1,6 +1,6 @@ - net6.0 + net6.0;net7.0 enable enable preview diff --git a/Ramstack.Globbing.Tests/SimdConfigurationTests.cs b/Ramstack.Globbing.Tests/SimdConfigurationTests.cs index ba541a2..a60a2a4 100644 --- a/Ramstack.Globbing.Tests/SimdConfigurationTests.cs +++ b/Ramstack.Globbing.Tests/SimdConfigurationTests.cs @@ -1,3 +1,5 @@ +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; namespace Ramstack.Globbing; @@ -8,10 +10,23 @@ public class SimdConfigurationTests [Test] public void VerifySimdConfiguration() { - var isAvx2Disabled = Environment.GetEnvironmentVariable("COMPlus_EnableAVX2") == "0"; - var isSse2Disabled = Environment.GetEnvironmentVariable("COMPlus_EnableSSE2") == "0"; + switch (RuntimeInformation.ProcessArchitecture) + { + case Architecture.X64: + var isSse2Disabled = Environment.GetEnvironmentVariable("COMPlus_EnableSSE2") == "0"; + var isSse41Disabled = Environment.GetEnvironmentVariable("COMPlus_EnableSSE41") == "0"; + var isAvx2Disabled = Environment.GetEnvironmentVariable("COMPlus_EnableAVX2") == "0"; - Assert.That(isAvx2Disabled, Is.EqualTo(!Avx2.IsSupported)); - Assert.That(isSse2Disabled, Is.EqualTo(!Sse2.IsSupported)); + Assert.That(isSse2Disabled, Is.EqualTo(!Sse2.IsSupported)); + Assert.That(isSse41Disabled, Is.EqualTo(!Sse41.IsSupported)); + Assert.That(isAvx2Disabled, Is.EqualTo(!Avx2.IsSupported)); + break; + + case Architecture.Arm64: + var isAdvSimdDisabled = Environment.GetEnvironmentVariable("COMPlus_EnableAdvSimd") == "0"; + + Assert.That(isAdvSimdDisabled, Is.EqualTo(!AdvSimd.IsSupported)); + break; + } } } diff --git a/Ramstack.Globbing/Internal/PathHelper.cs b/Ramstack.Globbing/Internal/PathHelper.cs index f411115..6b63104 100644 --- a/Ramstack.Globbing/Internal/PathHelper.cs +++ b/Ramstack.Globbing/Internal/PathHelper.cs @@ -4,6 +4,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; namespace Ramstack.Globbing.Internal; @@ -172,14 +173,46 @@ static void ConvertPathToPosixStyleImpl(ref char p, nint length) } while (i < tail); + // // Process remaining chars // NOTE: An extra one write for the 'length == Vector128.Count' + // value = LoadVector128(ref p, tail); mask = Sse2.CompareEqual(value, backslash); result = Sse41.BlendVariable(value, slash, mask); WriteVector128(ref p, tail, result); } + else if (AdvSimd.IsSupported && length >= Vector128.Count) + { + Vector128 value; + Vector128 mask; + Vector128 result; + + var slash = Vector128.Create((ushort)'/'); + var backslash = Vector128.Create((ushort)'\\'); + var tail = length - Vector128.Count; + + do + { + value = LoadVector128(ref p, i); + mask = AdvSimd.CompareEqual(value, backslash); + result = AdvSimd.BitwiseSelect(mask, slash, value); + WriteVector128(ref p, i, result); + + i += Vector128.Count; + } + while (i < tail); + + // + // Process remaining chars + // NOTE: An extra one write for the 'length == Vector128.Count' + // + value = LoadVector128(ref p, tail); + mask = AdvSimd.CompareEqual(value, backslash); + result = AdvSimd.BitwiseSelect(mask, slash, value); + WriteVector128(ref p, tail, result); + } else { for (; i < length; i++) @@ -301,7 +334,7 @@ public PathSegmentIterator() => while ((int)_position < length) { - if ((Avx2.IsSupported || Sse2.IsSupported) && _mask != 0) + if ((Avx2.IsSupported || Sse2.IsSupported || AdvSimd.IsSupported) && _mask != 0) { var offset = BitOperations.TrailingZeroCount(_mask); _last = (int)(_position + (nint)((uint)offset >> 1)); @@ -391,6 +424,35 @@ public PathSegmentIterator() => if (_mask == 0) _position += Vector128.Count; } + #if NET7_0_OR_GREATER + else if (AdvSimd.IsSupported && (int)_position + Vector128.Count <= length) + { + var chunk = LoadVector128(ref source, _position); + var backslashMask = CreateBackslash128Bitmask(flags); + var slash = Vector128.Create((ushort)'/'); + var backslash = Vector128.Create((ushort)'\\'); + + var comparison = AdvSimd.Or( + AdvSimd.CompareEqual(chunk, slash), + AdvSimd.And( + backslashMask, + AdvSimd.CompareEqual(chunk, backslash))); + + // + // Store the comparison bitmask and reuse it across iterations + // as long as it contains non-zero bits. + // This avoids reloading SIMD registers and repeating comparisons + // on the same chunk of data. + // + _mask = comparison.ExtractMostSignificantBits(); + + // + // Advance position to the next chunk when no separators found + // + if (_mask == 0) + _position += Vector128.Count; + } + #endif else { for (; (int)_position < length; _position++) diff --git a/Ramstack.Globbing/Ramstack.Globbing.csproj b/Ramstack.Globbing/Ramstack.Globbing.csproj index 4cbbbbb..78e0e99 100644 --- a/Ramstack.Globbing/Ramstack.Globbing.csproj +++ b/Ramstack.Globbing/Ramstack.Globbing.csproj @@ -1,6 +1,6 @@ - + - net6.0 + net6.0;net7.0 Fast and zero-allocation .NET globbing library for matching file paths using glob patterns. enable enable From 2368427e052d32c6aca5d32b0fb839d728c9bcec Mon Sep 17 00:00:00 2001 From: rameel Date: Mon, 14 Jul 2025 04:26:58 +0500 Subject: [PATCH 03/13] Temporarily disable AdvSimd support --- .../SimdConfigurationTests.cs | 12 +- Ramstack.Globbing/Internal/PathHelper.cs | 122 +++++++++--------- 2 files changed, 67 insertions(+), 67 deletions(-) diff --git a/Ramstack.Globbing.Tests/SimdConfigurationTests.cs b/Ramstack.Globbing.Tests/SimdConfigurationTests.cs index a60a2a4..ed3e5cb 100644 --- a/Ramstack.Globbing.Tests/SimdConfigurationTests.cs +++ b/Ramstack.Globbing.Tests/SimdConfigurationTests.cs @@ -1,5 +1,5 @@ using System.Runtime.InteropServices; -using System.Runtime.Intrinsics.Arm; +// using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; namespace Ramstack.Globbing; @@ -22,11 +22,11 @@ public void VerifySimdConfiguration() Assert.That(isAvx2Disabled, Is.EqualTo(!Avx2.IsSupported)); break; - case Architecture.Arm64: - var isAdvSimdDisabled = Environment.GetEnvironmentVariable("COMPlus_EnableAdvSimd") == "0"; - - Assert.That(isAdvSimdDisabled, Is.EqualTo(!AdvSimd.IsSupported)); - break; + // case Architecture.Arm64: + // var isAdvSimdDisabled = Environment.GetEnvironmentVariable("COMPlus_EnableAdvSimd") == "0"; + // + // Assert.That(isAdvSimdDisabled, Is.EqualTo(!AdvSimd.IsSupported)); + // break; } } } diff --git a/Ramstack.Globbing/Internal/PathHelper.cs b/Ramstack.Globbing/Internal/PathHelper.cs index 6b63104..b007034 100644 --- a/Ramstack.Globbing/Internal/PathHelper.cs +++ b/Ramstack.Globbing/Internal/PathHelper.cs @@ -4,7 +4,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.Arm; +// using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; namespace Ramstack.Globbing.Internal; @@ -183,36 +183,36 @@ static void ConvertPathToPosixStyleImpl(ref char p, nint length) result = Sse41.BlendVariable(value, slash, mask); WriteVector128(ref p, tail, result); } - else if (AdvSimd.IsSupported && length >= Vector128.Count) - { - Vector128 value; - Vector128 mask; - Vector128 result; - - var slash = Vector128.Create((ushort)'/'); - var backslash = Vector128.Create((ushort)'\\'); - var tail = length - Vector128.Count; - - do - { - value = LoadVector128(ref p, i); - mask = AdvSimd.CompareEqual(value, backslash); - result = AdvSimd.BitwiseSelect(mask, slash, value); - WriteVector128(ref p, i, result); - - i += Vector128.Count; - } - while (i < tail); - - // - // Process remaining chars - // NOTE: An extra one write for the 'length == Vector128.Count' - // - value = LoadVector128(ref p, tail); - mask = AdvSimd.CompareEqual(value, backslash); - result = AdvSimd.BitwiseSelect(mask, slash, value); - WriteVector128(ref p, tail, result); - } + // else if (AdvSimd.IsSupported && length >= Vector128.Count) + // { + // Vector128 value; + // Vector128 mask; + // Vector128 result; + // + // var slash = Vector128.Create((ushort)'/'); + // var backslash = Vector128.Create((ushort)'\\'); + // var tail = length - Vector128.Count; + // + // do + // { + // value = LoadVector128(ref p, i); + // mask = AdvSimd.CompareEqual(value, backslash); + // result = AdvSimd.BitwiseSelect(mask, slash, value); + // WriteVector128(ref p, i, result); + // + // i += Vector128.Count; + // } + // while (i < tail); + // + // // + // // Process remaining chars + // // NOTE: An extra one write for the 'length == Vector128.Count' + // // + // value = LoadVector128(ref p, tail); + // mask = AdvSimd.CompareEqual(value, backslash); + // result = AdvSimd.BitwiseSelect(mask, slash, value); + // WriteVector128(ref p, tail, result); + // } else { for (; i < length; i++) @@ -334,7 +334,7 @@ public PathSegmentIterator() => while ((int)_position < length) { - if ((Avx2.IsSupported || Sse2.IsSupported || AdvSimd.IsSupported) && _mask != 0) + if ((Avx2.IsSupported || Sse2.IsSupported/* || AdvSimd.IsSupported*/) && _mask != 0) { var offset = BitOperations.TrailingZeroCount(_mask); _last = (int)(_position + (nint)((uint)offset >> 1)); @@ -424,35 +424,35 @@ public PathSegmentIterator() => if (_mask == 0) _position += Vector128.Count; } - #if NET7_0_OR_GREATER - else if (AdvSimd.IsSupported && (int)_position + Vector128.Count <= length) - { - var chunk = LoadVector128(ref source, _position); - var backslashMask = CreateBackslash128Bitmask(flags); - var slash = Vector128.Create((ushort)'/'); - var backslash = Vector128.Create((ushort)'\\'); - - var comparison = AdvSimd.Or( - AdvSimd.CompareEqual(chunk, slash), - AdvSimd.And( - backslashMask, - AdvSimd.CompareEqual(chunk, backslash))); - - // - // Store the comparison bitmask and reuse it across iterations - // as long as it contains non-zero bits. - // This avoids reloading SIMD registers and repeating comparisons - // on the same chunk of data. - // - _mask = comparison.ExtractMostSignificantBits(); - - // - // Advance position to the next chunk when no separators found - // - if (_mask == 0) - _position += Vector128.Count; - } - #endif + // #if NET7_0_OR_GREATER + // else if (AdvSimd.IsSupported && (int)_position + Vector128.Count <= length) + // { + // var chunk = LoadVector128(ref source, _position); + // var backslashMask = CreateBackslash128Bitmask(flags); + // var slash = Vector128.Create((ushort)'/'); + // var backslash = Vector128.Create((ushort)'\\'); + // + // var comparison = AdvSimd.Or( + // AdvSimd.CompareEqual(chunk, slash), + // AdvSimd.And( + // backslashMask, + // AdvSimd.CompareEqual(chunk, backslash))); + // + // // + // // Store the comparison bitmask and reuse it across iterations + // // as long as it contains non-zero bits. + // // This avoids reloading SIMD registers and repeating comparisons + // // on the same chunk of data. + // // + // _mask = comparison.ExtractMostSignificantBits(); + // + // // + // // Advance position to the next chunk when no separators found + // // + // if (_mask == 0) + // _position += Vector128.Count; + // } + // #endif else { for (; (int)_position < length; _position++) From cdeafc2b26636e7021eb4786a67d4019b0bec4a9 Mon Sep 17 00:00:00 2001 From: rameel Date: Mon, 14 Jul 2025 13:36:35 +0500 Subject: [PATCH 04/13] Enable single AdvSimd block for debugging Note: This is a debugging commit and the code may be reverted or modified based on CI test results. --- .../SimdConfigurationTests.cs | 12 ++-- Ramstack.Globbing/Internal/PathHelper.cs | 62 +++++++++---------- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/Ramstack.Globbing.Tests/SimdConfigurationTests.cs b/Ramstack.Globbing.Tests/SimdConfigurationTests.cs index ed3e5cb..a60a2a4 100644 --- a/Ramstack.Globbing.Tests/SimdConfigurationTests.cs +++ b/Ramstack.Globbing.Tests/SimdConfigurationTests.cs @@ -1,5 +1,5 @@ using System.Runtime.InteropServices; -// using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; namespace Ramstack.Globbing; @@ -22,11 +22,11 @@ public void VerifySimdConfiguration() Assert.That(isAvx2Disabled, Is.EqualTo(!Avx2.IsSupported)); break; - // case Architecture.Arm64: - // var isAdvSimdDisabled = Environment.GetEnvironmentVariable("COMPlus_EnableAdvSimd") == "0"; - // - // Assert.That(isAdvSimdDisabled, Is.EqualTo(!AdvSimd.IsSupported)); - // break; + case Architecture.Arm64: + var isAdvSimdDisabled = Environment.GetEnvironmentVariable("COMPlus_EnableAdvSimd") == "0"; + + Assert.That(isAdvSimdDisabled, Is.EqualTo(!AdvSimd.IsSupported)); + break; } } } diff --git a/Ramstack.Globbing/Internal/PathHelper.cs b/Ramstack.Globbing/Internal/PathHelper.cs index b007034..477d22d 100644 --- a/Ramstack.Globbing/Internal/PathHelper.cs +++ b/Ramstack.Globbing/Internal/PathHelper.cs @@ -4,7 +4,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; -// using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; namespace Ramstack.Globbing.Internal; @@ -183,36 +183,36 @@ static void ConvertPathToPosixStyleImpl(ref char p, nint length) result = Sse41.BlendVariable(value, slash, mask); WriteVector128(ref p, tail, result); } - // else if (AdvSimd.IsSupported && length >= Vector128.Count) - // { - // Vector128 value; - // Vector128 mask; - // Vector128 result; - // - // var slash = Vector128.Create((ushort)'/'); - // var backslash = Vector128.Create((ushort)'\\'); - // var tail = length - Vector128.Count; - // - // do - // { - // value = LoadVector128(ref p, i); - // mask = AdvSimd.CompareEqual(value, backslash); - // result = AdvSimd.BitwiseSelect(mask, slash, value); - // WriteVector128(ref p, i, result); - // - // i += Vector128.Count; - // } - // while (i < tail); - // - // // - // // Process remaining chars - // // NOTE: An extra one write for the 'length == Vector128.Count' - // // - // value = LoadVector128(ref p, tail); - // mask = AdvSimd.CompareEqual(value, backslash); - // result = AdvSimd.BitwiseSelect(mask, slash, value); - // WriteVector128(ref p, tail, result); - // } + else if (AdvSimd.IsSupported && length >= Vector128.Count) + { + Vector128 value; + Vector128 mask; + Vector128 result; + + var slash = Vector128.Create((ushort)'/'); + var backslash = Vector128.Create((ushort)'\\'); + var tail = length - Vector128.Count; + + do + { + value = LoadVector128(ref p, i); + mask = AdvSimd.CompareEqual(value, backslash); + result = AdvSimd.BitwiseSelect(mask, slash, value); + WriteVector128(ref p, i, result); + + i += Vector128.Count; + } + while (i < tail); + + // + // Process remaining chars + // NOTE: An extra one write for the 'length == Vector128.Count' + // + value = LoadVector128(ref p, tail); + mask = AdvSimd.CompareEqual(value, backslash); + result = AdvSimd.BitwiseSelect(mask, slash, value); + WriteVector128(ref p, tail, result); + } else { for (; i < length; i++) From 0146cee6b974eda99e5fe33c1d20f9d9c06f0fba Mon Sep 17 00:00:00 2001 From: rameel Date: Mon, 14 Jul 2025 14:10:58 +0500 Subject: [PATCH 05/13] Update environment variables from COMPlus_ to DOTNET_ prefix --- .github/workflows/test.yml | 20 +++++++++---------- .../SimdConfigurationTests.cs | 8 ++++---- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d7e426a..b55263e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,26 +40,26 @@ jobs: - name: Test (Debug, Avx2=0) env: - COMPlus_EnableAVX2: "0" + DOTNET_EnableAVX2: "0" run: dotnet test -c Debug --no-build - name: Test (Release, Avx2=0) env: - COMPlus_EnableAVX2: "0" + DOTNET_EnableAVX2: "0" run: dotnet test -c Release --no-build - name: Test (Debug, Sse2=0, Sse41=0, Avx2=0, AdvSimd=0) env: - COMPlus_EnableSSE2: "0" - COMPlus_EnableSSE41: "0" - COMPlus_EnableAVX2: "0" - COMPlus_EnableAdvSimd: "0" + DOTNET_EnableSSE2: "0" + DOTNET_EnableSSE41: "0" + DOTNET_EnableAVX2: "0" + DOTNET_EnableAdvSimd: "0" run: dotnet test -c Debug --no-build - name: Test (Release, Sse2=0, Sse41=0, Avx2=0, AdvSimd=0) env: - COMPlus_EnableSSE2: "0" - COMPlus_EnableSSE41: "0" - COMPlus_EnableAVX2: "0" - COMPlus_EnableAdvSimd: "0" + DOTNET_EnableSSE2: "0" + DOTNET_EnableSSE41: "0" + DOTNET_EnableAVX2: "0" + DOTNET_EnableAdvSimd: "0" run: dotnet test -c Release --no-build diff --git a/Ramstack.Globbing.Tests/SimdConfigurationTests.cs b/Ramstack.Globbing.Tests/SimdConfigurationTests.cs index a60a2a4..161b79d 100644 --- a/Ramstack.Globbing.Tests/SimdConfigurationTests.cs +++ b/Ramstack.Globbing.Tests/SimdConfigurationTests.cs @@ -13,9 +13,9 @@ public void VerifySimdConfiguration() switch (RuntimeInformation.ProcessArchitecture) { case Architecture.X64: - var isSse2Disabled = Environment.GetEnvironmentVariable("COMPlus_EnableSSE2") == "0"; - var isSse41Disabled = Environment.GetEnvironmentVariable("COMPlus_EnableSSE41") == "0"; - var isAvx2Disabled = Environment.GetEnvironmentVariable("COMPlus_EnableAVX2") == "0"; + var isSse2Disabled = Environment.GetEnvironmentVariable("DOTNET_EnableSSE2") == "0"; + var isSse41Disabled = Environment.GetEnvironmentVariable("DOTNET_EnableSSE41") == "0"; + var isAvx2Disabled = Environment.GetEnvironmentVariable("DOTNET_EnableAVX2") == "0"; Assert.That(isSse2Disabled, Is.EqualTo(!Sse2.IsSupported)); Assert.That(isSse41Disabled, Is.EqualTo(!Sse41.IsSupported)); @@ -23,7 +23,7 @@ public void VerifySimdConfiguration() break; case Architecture.Arm64: - var isAdvSimdDisabled = Environment.GetEnvironmentVariable("COMPlus_EnableAdvSimd") == "0"; + var isAdvSimdDisabled = Environment.GetEnvironmentVariable("DOTNET_EnableAdvSimd") == "0"; Assert.That(isAdvSimdDisabled, Is.EqualTo(!AdvSimd.IsSupported)); break; From f2006f1d6fe5c3c420e69bc5a13ad08953853af7 Mon Sep 17 00:00:00 2001 From: rameel Date: Mon, 14 Jul 2025 14:14:58 +0500 Subject: [PATCH 06/13] Add debug output for AdvSimd support detection in tests --- Ramstack.Globbing.Tests/SimdConfigurationTests.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Ramstack.Globbing.Tests/SimdConfigurationTests.cs b/Ramstack.Globbing.Tests/SimdConfigurationTests.cs index 161b79d..aa50a4b 100644 --- a/Ramstack.Globbing.Tests/SimdConfigurationTests.cs +++ b/Ramstack.Globbing.Tests/SimdConfigurationTests.cs @@ -25,6 +25,9 @@ public void VerifySimdConfiguration() case Architecture.Arm64: var isAdvSimdDisabled = Environment.GetEnvironmentVariable("DOTNET_EnableAdvSimd") == "0"; + Console.WriteLine($"DOTNET_EnableAdvSimd: {isAdvSimdDisabled}"); + Console.WriteLine($"AdvSimd.IsSupported : {AdvSimd.IsSupported}"); + Assert.That(isAdvSimdDisabled, Is.EqualTo(!AdvSimd.IsSupported)); break; } From a15d92926020106635ac95aefc9bb3f48275f874 Mon Sep 17 00:00:00 2001 From: rameel Date: Mon, 14 Jul 2025 15:10:48 +0500 Subject: [PATCH 07/13] Use DOTNET_EnableHWIntrinsic=0 to disable all intrinsics at once --- .github/workflows/test.yml | 18 ++++-------- .../SimdConfigurationTests.cs | 29 +++++++------------ 2 files changed, 17 insertions(+), 30 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b55263e..a3e9f9d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -38,28 +38,22 @@ jobs: - name: Test (Release) run: dotnet test -c Release --no-build - - name: Test (Debug, Avx2=0) + - name: Test (Debug, AVX2=0) env: DOTNET_EnableAVX2: "0" run: dotnet test -c Debug --no-build - - name: Test (Release, Avx2=0) + - name: Test (Release, AVX2=0) env: DOTNET_EnableAVX2: "0" run: dotnet test -c Release --no-build - - name: Test (Debug, Sse2=0, Sse41=0, Avx2=0, AdvSimd=0) + - name: Test (Debug, HWIntrinsic=0) env: - DOTNET_EnableSSE2: "0" - DOTNET_EnableSSE41: "0" - DOTNET_EnableAVX2: "0" - DOTNET_EnableAdvSimd: "0" + DOTNET_EnableHWIntrinsic: "0" run: dotnet test -c Debug --no-build - - name: Test (Release, Sse2=0, Sse41=0, Avx2=0, AdvSimd=0) + - name: Test (Release, HWIntrinsic=0) env: - DOTNET_EnableSSE2: "0" - DOTNET_EnableSSE41: "0" - DOTNET_EnableAVX2: "0" - DOTNET_EnableAdvSimd: "0" + DOTNET_EnableHWIntrinsic: "0" run: dotnet test -c Release --no-build diff --git a/Ramstack.Globbing.Tests/SimdConfigurationTests.cs b/Ramstack.Globbing.Tests/SimdConfigurationTests.cs index aa50a4b..7371115 100644 --- a/Ramstack.Globbing.Tests/SimdConfigurationTests.cs +++ b/Ramstack.Globbing.Tests/SimdConfigurationTests.cs @@ -10,26 +10,19 @@ public class SimdConfigurationTests [Test] public void VerifySimdConfiguration() { - switch (RuntimeInformation.ProcessArchitecture) + if (Environment.GetEnvironmentVariable("DOTNET_EnableHWIntrinsic") == "0") { - case Architecture.X64: - var isSse2Disabled = Environment.GetEnvironmentVariable("DOTNET_EnableSSE2") == "0"; - var isSse41Disabled = Environment.GetEnvironmentVariable("DOTNET_EnableSSE41") == "0"; - var isAvx2Disabled = Environment.GetEnvironmentVariable("DOTNET_EnableAVX2") == "0"; - - Assert.That(isSse2Disabled, Is.EqualTo(!Sse2.IsSupported)); - Assert.That(isSse41Disabled, Is.EqualTo(!Sse41.IsSupported)); - Assert.That(isAvx2Disabled, Is.EqualTo(!Avx2.IsSupported)); - break; - - case Architecture.Arm64: - var isAdvSimdDisabled = Environment.GetEnvironmentVariable("DOTNET_EnableAdvSimd") == "0"; - - Console.WriteLine($"DOTNET_EnableAdvSimd: {isAdvSimdDisabled}"); - Console.WriteLine($"AdvSimd.IsSupported : {AdvSimd.IsSupported}"); + Assert.That(Sse2.IsSupported, Is.False); + Assert.That(Sse41.IsSupported, Is.False); + Assert.That(Avx2.IsSupported, Is.False); + Assert.That(AdvSimd.IsSupported, Is.False); + } - Assert.That(isAdvSimdDisabled, Is.EqualTo(!AdvSimd.IsSupported)); - break; + if (RuntimeInformation.ProcessArchitecture == Architecture.X64 && Environment.GetEnvironmentVariable("DOTNET_EnableAVX2") == "0") + { + Assert.That(Sse2.IsSupported, Is.True); + Assert.That(Sse41.IsSupported, Is.True); + Assert.That(Avx2.IsSupported, Is.False); } } } From 4817e037dc8e5b05feee2f2908f108e55c00379b Mon Sep 17 00:00:00 2001 From: rameel Date: Mon, 14 Jul 2025 15:53:42 +0500 Subject: [PATCH 08/13] Enable another AdvSimd block for debugging Note: This is a debugging commit and the code may be reverted or modified based on CI test results. --- Ramstack.Globbing/Internal/PathHelper.cs | 59 ++++++++++++------------ 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/Ramstack.Globbing/Internal/PathHelper.cs b/Ramstack.Globbing/Internal/PathHelper.cs index 477d22d..ea617ca 100644 --- a/Ramstack.Globbing/Internal/PathHelper.cs +++ b/Ramstack.Globbing/Internal/PathHelper.cs @@ -1,3 +1,4 @@ +using System.Buffers.Binary; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Numerics; @@ -424,35 +425,35 @@ public PathSegmentIterator() => if (_mask == 0) _position += Vector128.Count; } - // #if NET7_0_OR_GREATER - // else if (AdvSimd.IsSupported && (int)_position + Vector128.Count <= length) - // { - // var chunk = LoadVector128(ref source, _position); - // var backslashMask = CreateBackslash128Bitmask(flags); - // var slash = Vector128.Create((ushort)'/'); - // var backslash = Vector128.Create((ushort)'\\'); - // - // var comparison = AdvSimd.Or( - // AdvSimd.CompareEqual(chunk, slash), - // AdvSimd.And( - // backslashMask, - // AdvSimd.CompareEqual(chunk, backslash))); - // - // // - // // Store the comparison bitmask and reuse it across iterations - // // as long as it contains non-zero bits. - // // This avoids reloading SIMD registers and repeating comparisons - // // on the same chunk of data. - // // - // _mask = comparison.ExtractMostSignificantBits(); - // - // // - // // Advance position to the next chunk when no separators found - // // - // if (_mask == 0) - // _position += Vector128.Count; - // } - // #endif + #if NET7_0_OR_GREATER + else if (AdvSimd.IsSupported && (int)_position + Vector128.Count <= length) + { + var chunk = LoadVector128(ref source, _position); + var backslashMask = CreateBackslash128Bitmask(flags); + var slash = Vector128.Create((ushort)'/'); + var backslash = Vector128.Create((ushort)'\\'); + + var comparison = AdvSimd.Or( + AdvSimd.CompareEqual(chunk, slash), + AdvSimd.And( + backslashMask, + AdvSimd.CompareEqual(chunk, backslash))); + + // + // Store the comparison bitmask and reuse it across iterations + // as long as it contains non-zero bits. + // This avoids reloading SIMD registers and repeating comparisons + // on the same chunk of data. + // + _mask = BinaryPrimitives.ReverseEndianness(comparison.ExtractMostSignificantBits()); + + // + // Advance position to the next chunk when no separators found + // + if (_mask == 0) + _position += Vector128.Count; + } + #endif else { for (; (int)_position < length; _position++) From 2ba8c0b8e356a9c2f06fc9943c7bd6ab4b467a74 Mon Sep 17 00:00:00 2001 From: rameel Date: Mon, 14 Jul 2025 16:03:48 +0500 Subject: [PATCH 09/13] Add debug echo to verify CI workflow progression --- .github/workflows/test.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a3e9f9d..121b2a6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,7 +33,9 @@ jobs: run: dotnet build -c Release - name: Test (Debug) - run: dotnet test -c Debug --no-build + run: | + dotnet test -c Debug --no-build + echo "Tests completed" - name: Test (Release) run: dotnet test -c Release --no-build From 5bb57b616f139a9fd87b6bdc26d3aff281037e11 Mon Sep 17 00:00:00 2001 From: rameel Date: Mon, 14 Jul 2025 16:11:32 +0500 Subject: [PATCH 10/13] Fix ARM _mask processing --- .github/workflows/test.yml | 4 +--- Ramstack.Globbing/Internal/PathHelper.cs | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 121b2a6..a3e9f9d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,9 +33,7 @@ jobs: run: dotnet build -c Release - name: Test (Debug) - run: | - dotnet test -c Debug --no-build - echo "Tests completed" + run: dotnet test -c Debug --no-build - name: Test (Release) run: dotnet test -c Release --no-build diff --git a/Ramstack.Globbing/Internal/PathHelper.cs b/Ramstack.Globbing/Internal/PathHelper.cs index ea617ca..d51ad3a 100644 --- a/Ramstack.Globbing/Internal/PathHelper.cs +++ b/Ramstack.Globbing/Internal/PathHelper.cs @@ -335,7 +335,7 @@ public PathSegmentIterator() => while ((int)_position < length) { - if ((Avx2.IsSupported || Sse2.IsSupported/* || AdvSimd.IsSupported*/) && _mask != 0) + if ((Avx2.IsSupported || Sse2.IsSupported || AdvSimd.IsSupported) && _mask != 0) { var offset = BitOperations.TrailingZeroCount(_mask); _last = (int)(_position + (nint)((uint)offset >> 1)); From 72f308c6a4377195abf45102882e46710a25a59c Mon Sep 17 00:00:00 2001 From: rameel Date: Mon, 14 Jul 2025 16:14:54 +0500 Subject: [PATCH 11/13] Test mask extraction without endian swap --- Ramstack.Globbing/Internal/PathHelper.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Ramstack.Globbing/Internal/PathHelper.cs b/Ramstack.Globbing/Internal/PathHelper.cs index d51ad3a..6b63104 100644 --- a/Ramstack.Globbing/Internal/PathHelper.cs +++ b/Ramstack.Globbing/Internal/PathHelper.cs @@ -1,4 +1,3 @@ -using System.Buffers.Binary; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Numerics; @@ -445,7 +444,7 @@ public PathSegmentIterator() => // This avoids reloading SIMD registers and repeating comparisons // on the same chunk of data. // - _mask = BinaryPrimitives.ReverseEndianness(comparison.ExtractMostSignificantBits()); + _mask = comparison.ExtractMostSignificantBits(); // // Advance position to the next chunk when no separators found From 9ec33c776d1f6542a6167ee3c4c9e058e4723cf8 Mon Sep 17 00:00:00 2001 From: rameel Date: Mon, 14 Jul 2025 17:07:57 +0500 Subject: [PATCH 12/13] Attempt to fix ARM64 mask processing --- Ramstack.Globbing/Internal/PathHelper.cs | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/Ramstack.Globbing/Internal/PathHelper.cs b/Ramstack.Globbing/Internal/PathHelper.cs index 6b63104..63c9c40 100644 --- a/Ramstack.Globbing/Internal/PathHelper.cs +++ b/Ramstack.Globbing/Internal/PathHelper.cs @@ -337,12 +337,22 @@ public PathSegmentIterator() => if ((Avx2.IsSupported || Sse2.IsSupported || AdvSimd.IsSupported) && _mask != 0) { var offset = BitOperations.TrailingZeroCount(_mask); - _last = (int)(_position + (nint)((uint)offset >> 1)); - - // - // Clear the bits for the current separator to process the next position in the mask - // - _mask &= ~(0b_11u << offset); + if (AdvSimd.IsSupported) + { + _last = (int)(_position + (nint)(uint)offset); + // + // Clear the bits for the current separator to process the next position in the mask + // + _mask &= ~(1u << offset); + } + else + { + _last = (int)(_position + (nint)((uint)offset >> 1)); + // + // Clear the bits for the current separator to process the next position in the mask + // + _mask &= ~(0b_11u << offset); + } // // Advance position to the next chunk when no separators remain in the mask From a1affb9b76adcb8c18181ea60359c5218396653d Mon Sep 17 00:00:00 2001 From: rameel Date: Mon, 14 Jul 2025 17:21:27 +0500 Subject: [PATCH 13/13] Add comments explaining ARM64 vs x86 SIMD mask differences --- Ramstack.Globbing/Internal/PathHelper.cs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/Ramstack.Globbing/Internal/PathHelper.cs b/Ramstack.Globbing/Internal/PathHelper.cs index 63c9c40..2cff6c5 100644 --- a/Ramstack.Globbing/Internal/PathHelper.cs +++ b/Ramstack.Globbing/Internal/PathHelper.cs @@ -339,17 +339,29 @@ public PathSegmentIterator() => var offset = BitOperations.TrailingZeroCount(_mask); if (AdvSimd.IsSupported) { + // + // On ARM, ExtractMostSignificantBits returns a mask where each bit + // represents one vector element (1 bit per ushort), so offset + // directly corresponds to the element index + // _last = (int)(_position + (nint)(uint)offset); + // - // Clear the bits for the current separator to process the next position in the mask + // Clear the bits for the current separator // _mask &= ~(1u << offset); } else { + // + // On x86, MoveMask (and ExtractMostSignificantBits on byte-based vectors) + // returns a mask where each bit represents one byte (2 bits per ushort), + // so we need to divide offset by 2 to get the actual element index + // _last = (int)(_position + (nint)((uint)offset >> 1)); + // - // Clear the bits for the current separator to process the next position in the mask + // Clear the bits for the current separator // _mask &= ~(0b_11u << offset); }