In-vector sorting (pcmpstr)
11. 11. 2020 #kód
import gcc.builtins; import core.simd; import core.bitop; void main(string[] args) { import std.algorithm; import std.array; import std.conv; import std.datetime.stopwatch; import std.stdio; auto iters = 100_000_000; ushort[8] src; src[0 .. 8] = args[1 .. $].map!(to!ushort).array; auto timer = StopWatch(AutoStart.yes); foreach (i; 1 .. iters) { auto arr = src; pcmpstrSort(arr); } writeln("pcmpstrSort ", timer.peek.total!"nsecs" / double(iters), " ns/vector"); timer = StopWatch(AutoStart.yes); foreach (i; 1 .. iters) { auto arr = src; vecPrefixSort(arr); } writeln("vecPrefixSort ", timer.peek.total!"nsecs" / double(iters), " ns/vector"); } void vecPrefixSort(ref ushort[8] arr) { pragma(inline, false); ushort[8] res; ushort8 vec = *(cast(ushort8*) arr.ptr); static foreach (i; 0 .. 8) {{ ushort8 x = vec.array[i]; auto byteMask = __builtin_ia32_pcmpgtw128(cast(short8)x, cast(short8)vec); auto mask = __builtin_ia32_pmovmskb128(cast(ubyte16)byteMask); ulong pos = ulong(popcnt(mask)); *(cast(ushort*) ((cast(ubyte*) res.ptr) + pos)) = arr.ptr[i]; }} arr = res; } void pcmpstrSort(ref ushort[8] arr) { pragma(inline, false); ushort[8] res; ushort8 vec = *(cast(ushort8*) arr.ptr); ushort8 range = 0; range.array[0] = 1; static foreach (i; 0 .. 8) {{ range.array[1] = arr[i]; auto mask = __builtin_ia32_pcmpistrm128(cast(ubyte16) range, cast(ubyte16) vec, 0b0_00_01_01); ulong pos = popcnt((cast(ulong2)mask).array[0]); res.ptr[pos-1] = arr[i]; }} arr = res; }