0xDEADBEEF

RSS odkazy

In-vector sorting (pcmpstr)

11. 11. 2020 #kód
import gcc.builtins;
import core.simd;
import core.bitop;

void main(string[] args) {
  import std.algorithm;
  import std.array;
  import std.conv;
  import std.datetime.stopwatch;
  import std.stdio;

  auto iters = 100_000_000;

  ushort[8] src;
  src[0 .. 8] = args[1 .. $].map!(to!ushort).array;

  auto timer = StopWatch(AutoStart.yes);
  foreach (i; 1 .. iters) {
    auto arr = src;
    pcmpstrSort(arr);
  }
  writeln("pcmpstrSort ", timer.peek.total!"nsecs" / double(iters), " ns/vector");

  timer = StopWatch(AutoStart.yes);
  foreach (i; 1 .. iters) {
    auto arr = src;
    vecPrefixSort(arr);
  }
  writeln("vecPrefixSort ", timer.peek.total!"nsecs" / double(iters), " ns/vector");
}


void vecPrefixSort(ref ushort[8] arr) {
  pragma(inline, false);

  ushort[8] res;
  ushort8 vec = *(cast(ushort8*) arr.ptr);

  static foreach (i; 0 .. 8) {{
    ushort8 x = vec.array[i];
    auto byteMask = __builtin_ia32_pcmpgtw128(cast(short8)x, cast(short8)vec);
    auto mask = __builtin_ia32_pmovmskb128(cast(ubyte16)byteMask);
    ulong pos = ulong(popcnt(mask));
    *(cast(ushort*) ((cast(ubyte*) res.ptr) + pos)) = arr.ptr[i];
  }}

  arr = res;
}


void pcmpstrSort(ref ushort[8] arr) {
  pragma(inline, false);

  ushort[8] res;
  ushort8 vec = *(cast(ushort8*) arr.ptr);

  ushort8 range = 0;
  range.array[0] = 1;

  static foreach (i; 0 .. 8) {{
    range.array[1] = arr[i];
    auto mask = __builtin_ia32_pcmpistrm128(cast(ubyte16) range, cast(ubyte16) vec, 0b0_00_01_01);
    ulong pos = popcnt((cast(ulong2)mask).array[0]);
    res.ptr[pos-1] = arr[i];
  }}

  arr = res;
}
píše k47 (@kaja47, k47)