Files
linguist/samples/D/arrayops.d
Martin Nowak fa6ae1116f better heuristic distinction of .d files (#3145)
* fix benchmark

- require json for Hash.to_json

* better heuristic distinction of .d files

- properly recongnize dtrace probes
- recongnize \ in Makefile paths
- recongnize single line `file.ext : dep.ext` make targets
- recognize D module, import, function, and unittest declarations
- add more representative D samples

D changed from 31.2% to 28.1%
DTrace changed from 33.5% to 32.5%
Makefile changed from 35.3% to 39.4%

See
https://gist.github.com/MartinNowak/fda24fdef64f2dbb05c5a5ceabf22bd3
for the scraper used to get a test corpus.
2017-03-30 18:25:53 +01:00

188 lines
4.5 KiB
D

/**
* Benchmark for array ops.
*
* Copyright: Copyright Martin Nowak 2016 -.
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
* Authors: Martin Nowak
*/
import core.cpuid, std.algorithm, std.datetime, std.meta, std.stdio, std.string,
std.range;
float[6] getLatencies(T, string op)()
{
enum N = (64 * (1 << 6) + 64) * T.sizeof;
auto a = Array!T(N), b = Array!T(N), c = Array!T(N);
float[6] latencies = float.max;
foreach (i, ref latency; latencies)
{
auto len = 1 << i;
foreach (_; 1 .. 32)
{
a[] = 24;
b[] = 4;
c[] = 2;
auto sw = StopWatch(AutoStart.yes);
foreach (off; size_t(0) .. size_t(64))
{
off = off * len + off;
enum op = op.replace("const", "2").replace("a",
"a[off .. off + len]").replace("b",
"b[off .. off + len]").replace("c", "c[off .. off + len]");
mixin(op ~ ";");
}
latency = min(latency, sw.peek.nsecs);
}
}
float[6] res = latencies[] / 1024;
return res;
}
float[4] getThroughput(T, string op)()
{
enum N = (40 * 1024 * 1024 + 64 * T.sizeof) / T.sizeof;
auto a = Array!T(N), b = Array!T(N), c = Array!T(N);
float[4] latencies = float.max;
size_t[4] lengths = [
8 * 1024 / T.sizeof, 32 * 1024 / T.sizeof, 512 * 1024 / T.sizeof, 32 * 1024 * 1024 / T
.sizeof
];
foreach (i, ref latency; latencies)
{
auto len = lengths[i] / 64;
foreach (_; 1 .. 4)
{
a[] = 24;
b[] = 4;
c[] = 2;
auto sw = StopWatch(AutoStart.yes);
foreach (off; size_t(0) .. size_t(64))
{
off = off * len + off;
enum op = op.replace("const", "2").replace("a",
"a[off .. off + len]").replace("b",
"b[off .. off + len]").replace("c", "c[off .. off + len]");
mixin(op ~ ";");
}
immutable nsecs = sw.peek.nsecs;
runMasked({latency = min(latency, nsecs);});
}
}
float[4] throughputs = void;
runMasked({throughputs = T.sizeof * lengths[] / latencies[];});
return throughputs;
}
string[] genOps()
{
string[] ops;
foreach (op1; ["+", "-", "*", "/"])
{
ops ~= "a " ~ op1 ~ "= b";
ops ~= "a " ~ op1 ~ "= const";
foreach (op2; ["+", "-", "*", "/"])
{
ops ~= "a " ~ op1 ~ "= b " ~ op2 ~ " c";
ops ~= "a " ~ op1 ~ "= b " ~ op2 ~ " const";
}
}
return ops;
}
void runOp(string op)()
{
foreach (T; AliasSeq!(ubyte, ushort, uint, ulong, byte, short, int, long, float,
double))
writefln("%s, %s, %(%.2f, %), %(%s, %)", T.stringof, op,
getLatencies!(T, op), getThroughput!(T, op));
}
struct Array(T)
{
import core.stdc.stdlib : free, malloc;
this(size_t n)
{
ary = (cast(T*) malloc(T.sizeof * n))[0 .. n];
}
~this()
{
free(ary.ptr);
}
T[] ary;
alias ary this;
}
version (X86)
version = SSE;
else version (X86_64)
version = SSE;
else
static assert(0, "unimplemented");
version (SSE)
{
uint mxcsr()
{
uint ret = void;
asm
{
stmxcsr ret;
}
return ret;
}
void mxcsr(uint val)
{
asm
{
ldmxcsr val;
}
}
// http://softpixel.com/~cwright/programming/simd/sse.php
enum FPU_EXCEPTION_MASKS = 1 << 12 | 1 << 11 | 1 << 10 | 1 << 9 | 1 << 8 | 1 << 7;
enum FPU_EXCEPTION_FLAGS = 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0;
void maskFPUExceptions()
{
mxcsr = mxcsr | FPU_EXCEPTION_MASKS;
}
void unmaskFPUExceptions()
{
mxcsr = mxcsr & ~FPU_EXCEPTION_MASKS;
}
uint FPUExceptionFlags()
{
return mxcsr & FPU_EXCEPTION_FLAGS;
}
void clearFPUExceptionFlags()
{
mxcsr = mxcsr & ~FPU_EXCEPTION_FLAGS;
}
}
void runMasked(scope void delegate() dg)
{
assert(FPUExceptionFlags == 0);
maskFPUExceptions;
dg();
clearFPUExceptionFlags;
unmaskFPUExceptions;
}
void main()
{
unmaskFPUExceptions;
writefln("type, op, %(latency%s, %), %-(throughput%s, %)", iota(6)
.map!(i => 1 << i), ["8KB", "32KB", "512KB", "32MB"]);
foreach (op; mixin("AliasSeq!(%(%s, %))".format(genOps)))
runOp!op;
maskFPUExceptions;
}