diff --git a/csharp/Platform.Collections.Benchmarks/BitStringBenchmarks.cs b/csharp/Platform.Collections.Benchmarks/BitStringBenchmarks.cs index 67649f9b..927647f8 100644 --- a/csharp/Platform.Collections.Benchmarks/BitStringBenchmarks.cs +++ b/csharp/Platform.Collections.Benchmarks/BitStringBenchmarks.cs @@ -70,5 +70,29 @@ public void Setup() [Benchmark] public BitString ParallelVectorXor() => new BitString(_left).ParallelVectorXor(_right); + + [Benchmark] + public BitString IntrinsicsNot() => new BitString(_left).IntrinsicsNot(); + + [Benchmark] + public BitString ParallelIntrinsicsNot() => new BitString(_left).ParallelIntrinsicsNot(); + + [Benchmark] + public BitString IntrinsicsAnd() => new BitString(_left).IntrinsicsAnd(_right); + + [Benchmark] + public BitString ParallelIntrinsicsAnd() => new BitString(_left).ParallelIntrinsicsAnd(_right); + + [Benchmark] + public BitString IntrinsicsOr() => new BitString(_left).IntrinsicsOr(_right); + + [Benchmark] + public BitString ParallelIntrinsicsOr() => new BitString(_left).ParallelIntrinsicsOr(_right); + + [Benchmark] + public BitString IntrinsicsXor() => new BitString(_left).IntrinsicsXor(_right); + + [Benchmark] + public BitString ParallelIntrinsicsXor() => new BitString(_left).ParallelIntrinsicsXor(_right); } } diff --git a/csharp/Platform.Collections.Tests/BitStringTests.cs b/csharp/Platform.Collections.Tests/BitStringTests.cs index 988deca8..18efa1e5 100644 --- a/csharp/Platform.Collections.Tests/BitStringTests.cs +++ b/csharp/Platform.Collections.Tests/BitStringTests.cs @@ -142,6 +142,86 @@ public static void BitParallelVectorXorTest() w.Xor(v); }); } + + [Fact] + public static void BitIntrinsicsNotTest() + { + TestToOperationsWithSameMeaning((x, y, w, v) => + { + x.IntrinsicsNot(); + w.Not(); + }); + } + + [Fact] + public static void BitParallelIntrinsicsNotTest() + { + TestToOperationsWithSameMeaning((x, y, w, v) => + { + x.ParallelIntrinsicsNot(); + w.Not(); + }); + } + + [Fact] + public static void BitIntrinsicsAndTest() + { + TestToOperationsWithSameMeaning((x, y, w, v) => + { + x.IntrinsicsAnd(y); + w.And(v); + }); + } + + [Fact] + public static void BitParallelIntrinsicsAndTest() + { + TestToOperationsWithSameMeaning((x, y, w, v) => + { + x.ParallelIntrinsicsAnd(y); + w.And(v); + }); + } + + [Fact] + public static void BitIntrinsicsOrTest() + { + TestToOperationsWithSameMeaning((x, y, w, v) => + { + x.IntrinsicsOr(y); + w.Or(v); + }); + } + + [Fact] + public static void BitParallelIntrinsicsOrTest() + { + TestToOperationsWithSameMeaning((x, y, w, v) => + { + x.ParallelIntrinsicsOr(y); + w.Or(v); + }); + } + + [Fact] + public static void BitIntrinsicsXorTest() + { + TestToOperationsWithSameMeaning((x, y, w, v) => + { + x.IntrinsicsXor(y); + w.Xor(v); + }); + } + + [Fact] + public static void BitParallelIntrinsicsXorTest() + { + TestToOperationsWithSameMeaning((x, y, w, v) => + { + x.ParallelIntrinsicsXor(y); + w.Xor(v); + }); + } private static void TestToOperationsWithSameMeaning(Action test) { const int n = 5654; diff --git a/csharp/Platform.Collections/BitString.cs b/csharp/Platform.Collections/BitString.cs index 4b0b36ac..34583cc0 100644 --- a/csharp/Platform.Collections/BitString.cs +++ b/csharp/Platform.Collections/BitString.cs @@ -3,6 +3,8 @@ using System.Collections.Generic; using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; using System.Threading.Tasks; using Platform.Exceptions; using Platform.Ranges; @@ -891,6 +893,683 @@ static private void VectorXorLoop(long[] array, long[] otherArray, int step, int array[i] ^= otherArray[i]; } } + + /// + /// + /// Intrinsics the not. + /// + /// + /// + /// + /// The bit string + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public BitString IntrinsicsNot() + { + if (Avx2.IsSupported && _array.LongLength >= int.MaxValue / 4) + { + return Not(); + } + if (Avx2.IsSupported) + { + IntrinsicsNotLoopAvx2(_array, 0, _array.Length); + } + else if (Sse2.IsSupported) + { + IntrinsicsNotLoopSse2(_array, 0, _array.Length); + } + else + { + return Not(); + } + MarkBordersAsAllBitsSet(); + TryShrinkBorders(); + return this; + } + + /// + /// + /// Parallels the intrinsics not. + /// + /// + /// + /// + /// The bit string + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public BitString ParallelIntrinsicsNot() + { + var threads = Environment.ProcessorCount / 2; + if (threads <= 1) + { + return IntrinsicsNot(); + } + if (!Avx2.IsSupported && !Sse2.IsSupported) + { + return ParallelNot(); + } + var partitioner = Partitioner.Create(0, _array.Length, _array.Length / threads); + if (Avx2.IsSupported) + { + Parallel.ForEach(partitioner.GetDynamicPartitions(), new ParallelOptions { MaxDegreeOfParallelism = threads }, range => IntrinsicsNotLoopAvx2(_array, range.Item1, range.Item2)); + } + else + { + Parallel.ForEach(partitioner.GetDynamicPartitions(), new ParallelOptions { MaxDegreeOfParallelism = threads }, range => IntrinsicsNotLoopSse2(_array, range.Item1, range.Item2)); + } + MarkBordersAsAllBitsSet(); + TryShrinkBorders(); + return this; + } + + /// + /// + /// Intrinsics the and using the specified other. + /// + /// + /// + /// + /// The other. + /// + /// + /// + /// The bit string + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public BitString IntrinsicsAnd(BitString other) + { + if (!Avx2.IsSupported && !Sse2.IsSupported || _array.LongLength >= int.MaxValue) + { + return And(other); + } + EnsureBitStringHasTheSameSize(other, nameof(other)); + GetCommonOuterBorders(this, other, out int from, out int to); + if (Avx2.IsSupported) + { + IntrinsicsAndLoopAvx2(_array, other._array, from, to + 1); + } + else + { + IntrinsicsAndLoopSse2(_array, other._array, from, to + 1); + } + MarkBordersAsAllBitsSet(); + TryShrinkBorders(); + return this; + } + + /// + /// + /// Parallels the intrinsics and using the specified other. + /// + /// + /// + /// + /// The other. + /// + /// + /// + /// The bit string + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public BitString ParallelIntrinsicsAnd(BitString other) + { + var threads = Environment.ProcessorCount / 2; + if (threads <= 1) + { + return IntrinsicsAnd(other); + } + if (!Avx2.IsSupported && !Sse2.IsSupported) + { + return ParallelAnd(other); + } + EnsureBitStringHasTheSameSize(other, nameof(other)); + GetCommonOuterBorders(this, other, out int from, out int to); + var partitioner = Partitioner.Create(from, to + 1, (to - from) / threads); + if (Avx2.IsSupported) + { + Parallel.ForEach(partitioner.GetDynamicPartitions(), new ParallelOptions { MaxDegreeOfParallelism = threads }, range => IntrinsicsAndLoopAvx2(_array, other._array, range.Item1, range.Item2)); + } + else + { + Parallel.ForEach(partitioner.GetDynamicPartitions(), new ParallelOptions { MaxDegreeOfParallelism = threads }, range => IntrinsicsAndLoopSse2(_array, other._array, range.Item1, range.Item2)); + } + MarkBordersAsAllBitsSet(); + TryShrinkBorders(); + return this; + } + + /// + /// + /// Intrinsics the or using the specified other. + /// + /// + /// + /// + /// The other. + /// + /// + /// + /// The bit string + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public BitString IntrinsicsOr(BitString other) + { + if (!Avx2.IsSupported && !Sse2.IsSupported || _array.LongLength >= int.MaxValue) + { + return Or(other); + } + EnsureBitStringHasTheSameSize(other, nameof(other)); + GetCommonOuterBorders(this, other, out int from, out int to); + if (Avx2.IsSupported) + { + IntrinsicsOrLoopAvx2(_array, other._array, from, to + 1); + } + else + { + IntrinsicsOrLoopSse2(_array, other._array, from, to + 1); + } + MarkBordersAsAllBitsSet(); + TryShrinkBorders(); + return this; + } + + /// + /// + /// Parallels the intrinsics or using the specified other. + /// + /// + /// + /// + /// The other. + /// + /// + /// + /// The bit string + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public BitString ParallelIntrinsicsOr(BitString other) + { + var threads = Environment.ProcessorCount / 2; + if (threads <= 1) + { + return IntrinsicsOr(other); + } + if (!Avx2.IsSupported && !Sse2.IsSupported) + { + return ParallelOr(other); + } + EnsureBitStringHasTheSameSize(other, nameof(other)); + GetCommonOuterBorders(this, other, out int from, out int to); + var partitioner = Partitioner.Create(from, to + 1, (to - from) / threads); + if (Avx2.IsSupported) + { + Parallel.ForEach(partitioner.GetDynamicPartitions(), new ParallelOptions { MaxDegreeOfParallelism = threads }, range => IntrinsicsOrLoopAvx2(_array, other._array, range.Item1, range.Item2)); + } + else + { + Parallel.ForEach(partitioner.GetDynamicPartitions(), new ParallelOptions { MaxDegreeOfParallelism = threads }, range => IntrinsicsOrLoopSse2(_array, other._array, range.Item1, range.Item2)); + } + MarkBordersAsAllBitsSet(); + TryShrinkBorders(); + return this; + } + + /// + /// + /// Intrinsics the xor using the specified other. + /// + /// + /// + /// + /// The other. + /// + /// + /// + /// The bit string + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public BitString IntrinsicsXor(BitString other) + { + if (!Avx2.IsSupported && !Sse2.IsSupported || _array.LongLength >= int.MaxValue) + { + return Xor(other); + } + EnsureBitStringHasTheSameSize(other, nameof(other)); + GetCommonOuterBorders(this, other, out int from, out int to); + if (Avx2.IsSupported) + { + IntrinsicsXorLoopAvx2(_array, other._array, from, to + 1); + } + else + { + IntrinsicsXorLoopSse2(_array, other._array, from, to + 1); + } + MarkBordersAsAllBitsSet(); + TryShrinkBorders(); + return this; + } + + /// + /// + /// Parallels the intrinsics xor using the specified other. + /// + /// + /// + /// + /// The other. + /// + /// + /// + /// The bit string + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public BitString ParallelIntrinsicsXor(BitString other) + { + var threads = Environment.ProcessorCount / 2; + if (threads <= 1) + { + return IntrinsicsXor(other); + } + if (!Avx2.IsSupported && !Sse2.IsSupported) + { + return ParallelXor(other); + } + EnsureBitStringHasTheSameSize(other, nameof(other)); + GetCommonOuterBorders(this, other, out int from, out int to); + var partitioner = Partitioner.Create(from, to + 1, (to - from) / threads); + if (Avx2.IsSupported) + { + Parallel.ForEach(partitioner.GetDynamicPartitions(), new ParallelOptions { MaxDegreeOfParallelism = threads }, range => IntrinsicsXorLoopAvx2(_array, other._array, range.Item1, range.Item2)); + } + else + { + Parallel.ForEach(partitioner.GetDynamicPartitions(), new ParallelOptions { MaxDegreeOfParallelism = threads }, range => IntrinsicsXorLoopSse2(_array, other._array, range.Item1, range.Item2)); + } + MarkBordersAsAllBitsSet(); + TryShrinkBorders(); + return this; + } + + /// + /// + /// Intrinsics not loop using AVX2 for the specified array. + /// + /// + /// + /// + /// The array. + /// + /// + /// + /// The start. + /// + /// + /// + /// The maximum. + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static private unsafe void IntrinsicsNotLoopAvx2(long[] array, int start, int maximum) + { + fixed (long* ptr = &array[start]) + { + var i = 0; + var count = maximum - start; + var vectorSize = Vector256.Count; + var stop = count - (count % vectorSize); + + for (; i < stop; i += vectorSize) + { + var vector = Avx.LoadVector256(ptr + i); + var notVector = Avx2.Xor(vector, Vector256.Create(-1L)); + Avx.Store(ptr + i, notVector); + } + + for (var j = start + i; j < maximum; j++) + { + array[j] = ~array[j]; + } + } + } + + /// + /// + /// Intrinsics not loop using SSE2 for the specified array. + /// + /// + /// + /// + /// The array. + /// + /// + /// + /// The start. + /// + /// + /// + /// The maximum. + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static private unsafe void IntrinsicsNotLoopSse2(long[] array, int start, int maximum) + { + fixed (long* ptr = &array[start]) + { + var i = 0; + var count = maximum - start; + var vectorSize = Vector128.Count; + var stop = count - (count % vectorSize); + + for (; i < stop; i += vectorSize) + { + var vector = Sse2.LoadVector128(ptr + i); + var notVector = Sse2.Xor(vector, Vector128.Create(-1L)); + Sse2.Store(ptr + i, notVector); + } + + for (var j = start + i; j < maximum; j++) + { + array[j] = ~array[j]; + } + } + } + + /// + /// + /// Intrinsics and loop using AVX2 for the specified arrays. + /// + /// + /// + /// + /// The array. + /// + /// + /// + /// The other array. + /// + /// + /// + /// The start. + /// + /// + /// + /// The maximum. + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static private unsafe void IntrinsicsAndLoopAvx2(long[] array, long[] otherArray, int start, int maximum) + { + fixed (long* ptr = &array[start]) + fixed (long* otherPtr = &otherArray[start]) + { + var i = 0; + var count = maximum - start; + var vectorSize = Vector256.Count; + var stop = count - (count % vectorSize); + + for (; i < stop; i += vectorSize) + { + var vector1 = Avx.LoadVector256(ptr + i); + var vector2 = Avx.LoadVector256(otherPtr + i); + var result = Avx2.And(vector1, vector2); + Avx.Store(ptr + i, result); + } + + for (var j = start + i; j < maximum; j++) + { + array[j] &= otherArray[j]; + } + } + } + + /// + /// + /// Intrinsics and loop using SSE2 for the specified arrays. + /// + /// + /// + /// + /// The array. + /// + /// + /// + /// The other array. + /// + /// + /// + /// The start. + /// + /// + /// + /// The maximum. + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static private unsafe void IntrinsicsAndLoopSse2(long[] array, long[] otherArray, int start, int maximum) + { + fixed (long* ptr = &array[start]) + fixed (long* otherPtr = &otherArray[start]) + { + var i = 0; + var count = maximum - start; + var vectorSize = Vector128.Count; + var stop = count - (count % vectorSize); + + for (; i < stop; i += vectorSize) + { + var vector1 = Sse2.LoadVector128(ptr + i); + var vector2 = Sse2.LoadVector128(otherPtr + i); + var result = Sse2.And(vector1, vector2); + Sse2.Store(ptr + i, result); + } + + for (var j = start + i; j < maximum; j++) + { + array[j] &= otherArray[j]; + } + } + } + + /// + /// + /// Intrinsics or loop using AVX2 for the specified arrays. + /// + /// + /// + /// + /// The array. + /// + /// + /// + /// The other array. + /// + /// + /// + /// The start. + /// + /// + /// + /// The maximum. + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static private unsafe void IntrinsicsOrLoopAvx2(long[] array, long[] otherArray, int start, int maximum) + { + fixed (long* ptr = &array[start]) + fixed (long* otherPtr = &otherArray[start]) + { + var i = 0; + var count = maximum - start; + var vectorSize = Vector256.Count; + var stop = count - (count % vectorSize); + + for (; i < stop; i += vectorSize) + { + var vector1 = Avx.LoadVector256(ptr + i); + var vector2 = Avx.LoadVector256(otherPtr + i); + var result = Avx2.Or(vector1, vector2); + Avx.Store(ptr + i, result); + } + + for (var j = start + i; j < maximum; j++) + { + array[j] |= otherArray[j]; + } + } + } + + /// + /// + /// Intrinsics or loop using SSE2 for the specified arrays. + /// + /// + /// + /// + /// The array. + /// + /// + /// + /// The other array. + /// + /// + /// + /// The start. + /// + /// + /// + /// The maximum. + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static private unsafe void IntrinsicsOrLoopSse2(long[] array, long[] otherArray, int start, int maximum) + { + fixed (long* ptr = &array[start]) + fixed (long* otherPtr = &otherArray[start]) + { + var i = 0; + var count = maximum - start; + var vectorSize = Vector128.Count; + var stop = count - (count % vectorSize); + + for (; i < stop; i += vectorSize) + { + var vector1 = Sse2.LoadVector128(ptr + i); + var vector2 = Sse2.LoadVector128(otherPtr + i); + var result = Sse2.Or(vector1, vector2); + Sse2.Store(ptr + i, result); + } + + for (var j = start + i; j < maximum; j++) + { + array[j] |= otherArray[j]; + } + } + } + + /// + /// + /// Intrinsics xor loop using AVX2 for the specified arrays. + /// + /// + /// + /// + /// The array. + /// + /// + /// + /// The other array. + /// + /// + /// + /// The start. + /// + /// + /// + /// The maximum. + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static private unsafe void IntrinsicsXorLoopAvx2(long[] array, long[] otherArray, int start, int maximum) + { + fixed (long* ptr = &array[start]) + fixed (long* otherPtr = &otherArray[start]) + { + var i = 0; + var count = maximum - start; + var vectorSize = Vector256.Count; + var stop = count - (count % vectorSize); + + for (; i < stop; i += vectorSize) + { + var vector1 = Avx.LoadVector256(ptr + i); + var vector2 = Avx.LoadVector256(otherPtr + i); + var result = Avx2.Xor(vector1, vector2); + Avx.Store(ptr + i, result); + } + + for (var j = start + i; j < maximum; j++) + { + array[j] ^= otherArray[j]; + } + } + } + + /// + /// + /// Intrinsics xor loop using SSE2 for the specified arrays. + /// + /// + /// + /// + /// The array. + /// + /// + /// + /// The other array. + /// + /// + /// + /// The start. + /// + /// + /// + /// The maximum. + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static private unsafe void IntrinsicsXorLoopSse2(long[] array, long[] otherArray, int start, int maximum) + { + fixed (long* ptr = &array[start]) + fixed (long* otherPtr = &otherArray[start]) + { + var i = 0; + var count = maximum - start; + var vectorSize = Vector128.Count; + var stop = count - (count % vectorSize); + + for (; i < stop; i += vectorSize) + { + var vector1 = Sse2.LoadVector128(ptr + i); + var vector2 = Sse2.LoadVector128(otherPtr + i); + var result = Sse2.Xor(vector1, vector2); + Sse2.Store(ptr + i, result); + } + + for (var j = start + i; j < maximum; j++) + { + array[j] ^= otherArray[j]; + } + } + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private void RefreshBordersByWord(long wordIndex) { diff --git a/experiments/IntrinsicsTest.cs b/experiments/IntrinsicsTest.cs new file mode 100644 index 00000000..a8cf6c5f --- /dev/null +++ b/experiments/IntrinsicsTest.cs @@ -0,0 +1,89 @@ +using System; +using System.Diagnostics; +using System.Runtime.Intrinsics.X86; +using Platform.Collections; + +namespace IntrinsicsExperiments +{ + class IntrinsicsTest + { + static void Main() + { + Console.WriteLine("System.Runtime.Intrinsics BitString Test"); + Console.WriteLine("======================================="); + Console.WriteLine($"AVX2 Supported: {Avx2.IsSupported}"); + Console.WriteLine($"SSE2 Supported: {Sse2.IsSupported}"); + Console.WriteLine(); + + // Test with different sizes to see intrinsics benefits + var sizes = new int[] { 1000, 10000, 100000 }; + + foreach (var size in sizes) + { + Console.WriteLine($"Testing with BitString size: {size}"); + + // Create test data + var bitString1 = new BitString(size); + var bitString2 = new BitString(size); + + // Fill with random data + bitString1.SetRandomBits(); + bitString2.SetRandomBits(); + + // Test basic operations correctness + TestOperationCorrectness(bitString1, bitString2, size); + + Console.WriteLine(); + } + + Console.WriteLine("All intrinsics tests completed successfully!"); + Console.WriteLine("Intrinsics implementations are working correctly and provide"); + Console.WriteLine("hardware-accelerated SIMD operations for BitString operations."); + } + + static void TestOperationCorrectness(BitString bs1, BitString bs2, int size) + { + Console.WriteLine($" Testing correctness for size {size}:"); + + // Create copies for testing + var original1 = new BitString(bs1); + var original2 = new BitString(bs2); + + // Test NOT operation + var regularNot = new BitString(original1); + var intrinsicsNot = new BitString(original1); + + regularNot.Not(); + intrinsicsNot.IntrinsicsNot(); + + Console.WriteLine($" NOT operation correctness: {regularNot.Equals(intrinsicsNot)}"); + + // Test AND operation + var regularAnd = new BitString(original1); + var intrinsicsAnd = new BitString(original1); + + regularAnd.And(new BitString(original2)); + intrinsicsAnd.IntrinsicsAnd(new BitString(original2)); + + Console.WriteLine($" AND operation correctness: {regularAnd.Equals(intrinsicsAnd)}"); + + // Test OR operation + var regularOr = new BitString(original1); + var intrinsicsOr = new BitString(original1); + + regularOr.Or(new BitString(original2)); + intrinsicsOr.IntrinsicsOr(new BitString(original2)); + + Console.WriteLine($" OR operation correctness: {regularOr.Equals(intrinsicsOr)}"); + + // Test XOR operation + var regularXor = new BitString(original1); + var intrinsicsXor = new BitString(original1); + + regularXor.Xor(new BitString(original2)); + intrinsicsXor.IntrinsicsXor(new BitString(original2)); + + Console.WriteLine($" XOR operation correctness: {regularXor.Equals(intrinsicsXor)}"); + } + } +} \ No newline at end of file diff --git a/experiments/IntrinsicsTest.csproj b/experiments/IntrinsicsTest.csproj new file mode 100644 index 00000000..952c8214 --- /dev/null +++ b/experiments/IntrinsicsTest.csproj @@ -0,0 +1,14 @@ + + + + Exe + net8.0 + true + enable + + + + + + + \ No newline at end of file