better heuristic distinction of .d files (#3145)

* fix benchmark

- require json for Hash.to_json

* better heuristic distinction of .d files

- properly recongnize dtrace probes
- recongnize \ in Makefile paths
- recongnize single line `file.ext : dep.ext` make targets
- recognize D module, import, function, and unittest declarations
- add more representative D samples

D changed from 31.2% to 28.1%
DTrace changed from 33.5% to 32.5%
Makefile changed from 35.3% to 39.4%

See
https://gist.github.com/MartinNowak/fda24fdef64f2dbb05c5a5ceabf22bd3
for the scraper used to get a test corpus.
This commit is contained in:
Martin Nowak
2017-03-30 19:25:53 +02:00
committed by Colin Seymour
parent b7e27a9f58
commit fa6ae1116f
10 changed files with 663 additions and 3 deletions

View File

@@ -4,6 +4,7 @@ require 'rake/testtask'
require 'yaml'
require 'yajl'
require 'open-uri'
require 'json'
task :default => :test

View File

@@ -125,11 +125,18 @@ module Linguist
end
disambiguate ".d" do |data|
if /^module /.match(data)
# see http://dlang.org/spec/grammar
# ModuleDeclaration | ImportDeclaration | FuncDeclaration | unittest
if /^module\s+[\w.]*\s*;|import\s+[\w\s,.:]*;|\w+\s+\w+\s*\(.*\)(?:\(.*\))?\s*{[^}]*}|unittest\s*(?:\(.*\))?\s*{[^}]*}/.match(data)
Language["D"]
elsif /^((dtrace:::)?BEGIN|provider |#pragma (D (option|attributes)|ident)\s)/.match(data)
# see http://dtrace.org/guide/chp-prog.html, http://dtrace.org/guide/chp-profile.html, http://dtrace.org/guide/chp-opt.html
elsif /^(\w+:\w*:\w*:\w*|BEGIN|END|provider\s+|(tick|profile)-\w+\s+{[^}]*}|#pragma\s+D\s+(option|attributes|depends_on)\s|#pragma\s+ident\s)/.match(data)
Language["DTrace"]
elsif /(\/.*:( .* \\)$| : \\$|^ : |: \\$)/.match(data)
# path/target : dependency \
# target : \
# : dependency
# path/file.ext1 : some/path/../file.ext2
elsif /([\/\\].*:\s+.*\s\\$|: \\$|^ : |^[\w\s\/\\.]+\w+\.\w+\s*:\s+[\w\s\/\\.]+\w+\.\w+)/.match(data)
Language["Makefile"]
end
end

440
samples/D/aa.d Normal file
View File

@@ -0,0 +1,440 @@
/**
* Implementation of associative arrays.
*
* Copyright: Martin Nowak 2015 -.
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
* Authors: Martin Nowak
*/
module core.aa;
import core.memory : GC;
private
{
// grow threshold
enum GROW_NUM = 4;
enum GROW_DEN = 5;
// shrink threshold
enum SHRINK_NUM = 1;
enum SHRINK_DEN = 8;
// grow factor
enum GROW_FAC = 4;
// growing the AA doubles it's size, so the shrink threshold must be
// smaller than half the grow threshold to have a hysteresis
static assert(GROW_FAC * SHRINK_NUM * GROW_DEN < GROW_NUM * SHRINK_DEN);
// initial load factor (for literals), mean of both thresholds
enum INIT_NUM = (GROW_DEN * SHRINK_NUM + GROW_NUM * SHRINK_DEN) / 2;
enum INIT_DEN = SHRINK_DEN * GROW_DEN;
// magic hash constants to distinguish empty, deleted, and filled buckets
enum HASH_EMPTY = 0;
enum HASH_DELETED = 0x1;
enum HASH_FILLED_MARK = size_t(1) << 8 * size_t.sizeof - 1;
}
enum INIT_NUM_BUCKETS = 8;
struct AA(Key, Val)
{
this(size_t sz)
{
impl = new Impl(nextpow2(sz));
}
@property bool empty() const pure nothrow @safe @nogc
{
return !length;
}
@property size_t length() const pure nothrow @safe @nogc
{
return impl is null ? 0 : impl.length;
}
void opIndexAssign(Val val, in Key key)
{
// lazily alloc implementation
if (impl is null)
impl = new Impl(INIT_NUM_BUCKETS);
// get hash and bucket for key
immutable hash = calcHash(key);
// found a value => assignment
if (auto p = impl.findSlotLookup(hash, key))
{
p.entry.val = val;
return;
}
auto p = findSlotInsert(hash);
if (p.deleted)
--deleted;
// check load factor and possibly grow
else if (++used * GROW_DEN > dim * GROW_NUM)
{
grow();
p = findSlotInsert(hash);
assert(p.empty);
}
// update search cache and allocate entry
firstUsed = min(firstUsed, cast(uint)(p - buckets.ptr));
p.hash = hash;
p.entry = new Impl.Entry(key, val); // TODO: move
return;
}
ref inout(Val) opIndex(in Key key) inout @trusted
{
auto p = opIn_r(key);
assert(p !is null);
return *p;
}
inout(Val)* opIn_r(in Key key) inout @trusted
{
if (empty)
return null;
immutable hash = calcHash(key);
if (auto p = findSlotLookup(hash, key))
return &p.entry.val;
return null;
}
bool remove(in Key key)
{
if (empty)
return false;
immutable hash = calcHash(key);
if (auto p = findSlotLookup(hash, key))
{
// clear entry
p.hash = HASH_DELETED;
p.entry = null;
++deleted;
if (length * SHRINK_DEN < dim * SHRINK_NUM)
shrink();
return true;
}
return false;
}
Val get(in Key key, lazy Val val)
{
auto p = opIn_r(key);
return p is null ? val : *p;
}
ref Val getOrSet(in Key key, lazy Val val)
{
// lazily alloc implementation
if (impl is null)
impl = new Impl(INIT_NUM_BUCKETS);
// get hash and bucket for key
immutable hash = calcHash(key);
// found a value => assignment
if (auto p = impl.findSlotLookup(hash, key))
return p.entry.val;
auto p = findSlotInsert(hash);
if (p.deleted)
--deleted;
// check load factor and possibly grow
else if (++used * GROW_DEN > dim * GROW_NUM)
{
grow();
p = findSlotInsert(hash);
assert(p.empty);
}
// update search cache and allocate entry
firstUsed = min(firstUsed, cast(uint)(p - buckets.ptr));
p.hash = hash;
p.entry = new Impl.Entry(key, val);
return p.entry.val;
}
/**
Convert the AA to the type of the builtin language AA.
*/
Val[Key] toBuiltinAA() pure nothrow
{
return cast(Val[Key]) _aaFromCoreAA(impl, rtInterface);
}
private:
private this(inout(Impl)* impl) inout
{
this.impl = impl;
}
ref Val getLValue(in Key key)
{
// lazily alloc implementation
if (impl is null)
impl = new Impl(INIT_NUM_BUCKETS);
// get hash and bucket for key
immutable hash = calcHash(key);
// found a value => assignment
if (auto p = impl.findSlotLookup(hash, key))
return p.entry.val;
auto p = findSlotInsert(hash);
if (p.deleted)
--deleted;
// check load factor and possibly grow
else if (++used * GROW_DEN > dim * GROW_NUM)
{
grow();
p = findSlotInsert(hash);
assert(p.empty);
}
// update search cache and allocate entry
firstUsed = min(firstUsed, cast(uint)(p - buckets.ptr));
p.hash = hash;
p.entry = new Impl.Entry(key); // TODO: move
return p.entry.val;
}
static struct Impl
{
this(size_t sz)
{
buckets = allocBuckets(sz);
}
@property size_t length() const pure nothrow @nogc
{
assert(used >= deleted);
return used - deleted;
}
@property size_t dim() const pure nothrow @nogc
{
return buckets.length;
}
@property size_t mask() const pure nothrow @nogc
{
return dim - 1;
}
// find the first slot to insert a value with hash
inout(Bucket)* findSlotInsert(size_t hash) inout pure nothrow @nogc
{
for (size_t i = hash & mask, j = 1;; ++j)
{
if (!buckets[i].filled)
return &buckets[i];
i = (i + j) & mask;
}
}
// lookup a key
inout(Bucket)* findSlotLookup(size_t hash, in Key key) inout
{
for (size_t i = hash & mask, j = 1;; ++j)
{
if (buckets[i].hash == hash && key == buckets[i].entry.key)
return &buckets[i];
else if (buckets[i].empty)
return null;
i = (i + j) & mask;
}
}
void grow()
{
// If there are so many deleted entries, that growing would push us
// below the shrink threshold, we just purge deleted entries instead.
if (length * SHRINK_DEN < GROW_FAC * dim * SHRINK_NUM)
resize(dim);
else
resize(GROW_FAC * dim);
}
void shrink()
{
if (dim > INIT_NUM_BUCKETS)
resize(dim / GROW_FAC);
}
void resize(size_t ndim) pure nothrow
{
auto obuckets = buckets;
buckets = allocBuckets(ndim);
foreach (ref b; obuckets)
if (b.filled)
*findSlotInsert(b.hash) = b;
firstUsed = 0;
used -= deleted;
deleted = 0;
GC.free(obuckets.ptr); // safe to free b/c impossible to reference
}
static struct Entry
{
Key key;
Val val;
}
static struct Bucket
{
size_t hash;
Entry* entry;
@property bool empty() const
{
return hash == HASH_EMPTY;
}
@property bool deleted() const
{
return hash == HASH_DELETED;
}
@property bool filled() const
{
return cast(ptrdiff_t) hash < 0;
}
}
Bucket[] allocBuckets(size_t dim) @trusted pure nothrow
{
enum attr = GC.BlkAttr.NO_INTERIOR;
immutable sz = dim * Bucket.sizeof;
return (cast(Bucket*) GC.calloc(sz, attr))[0 .. dim];
}
Bucket[] buckets;
uint used;
uint deleted;
uint firstUsed;
}
RTInterface* rtInterface()() pure nothrow @nogc
{
static size_t aaLen(in void* pimpl) pure nothrow @nogc
{
auto aa = const(AA)(cast(const(Impl)*) pimpl);
return aa.length;
}
static void* aaGetY(void** pimpl, in void* pkey)
{
auto aa = AA(cast(Impl*)*pimpl);
auto res = &aa.getLValue(*cast(const(Key*)) pkey);
*pimpl = aa.impl; // might have changed
return res;
}
static inout(void)* aaInX(inout void* pimpl, in void* pkey)
{
auto aa = inout(AA)(cast(inout(Impl)*) pimpl);
return aa.opIn_r(*cast(const(Key*)) pkey);
}
static bool aaDelX(void* pimpl, in void* pkey)
{
auto aa = AA(cast(Impl*) pimpl);
return aa.remove(*cast(const(Key*)) pkey);
}
static immutable vtbl = RTInterface(&aaLen, &aaGetY, &aaInX, &aaDelX);
return cast(RTInterface*)&vtbl;
}
static size_t calcHash(in ref Key key)
{
return hashOf(key) | HASH_FILLED_MARK;
}
Impl* impl;
alias impl this;
}
package extern (C) void* _aaFromCoreAA(void* impl, RTInterface* rtIntf) pure nothrow;
private:
struct RTInterface
{
alias AA = void*;
size_t function(in AA aa) pure nothrow @nogc len;
void* function(AA* aa, in void* pkey) getY;
inout(void)* function(inout AA aa, in void* pkey) inX;
bool function(AA aa, in void* pkey) delX;
}
unittest
{
AA!(int, int) aa;
assert(aa.length == 0);
aa[0] = 1;
assert(aa.length == 1 && aa[0] == 1);
aa[1] = 2;
assert(aa.length == 2 && aa[1] == 2);
import core.stdc.stdio;
int[int] rtaa = aa.toBuiltinAA();
assert(rtaa.length == 2);
puts("length");
assert(rtaa[0] == 1);
assert(rtaa[1] == 2);
rtaa[2] = 3;
assert(aa[2] == 3);
}
unittest
{
auto aa = AA!(int, int)(3);
aa[0] = 0;
aa[1] = 1;
aa[2] = 2;
assert(aa.length == 3);
}
//==============================================================================
// Helper functions
//------------------------------------------------------------------------------
size_t nextpow2(in size_t n) pure nothrow @nogc
{
import core.bitop : bsr;
if (n < 2)
return 1;
return size_t(1) << bsr(n - 1) + 1;
}
pure nothrow @nogc unittest
{
// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
foreach (const n, const pow2; [1, 1, 2, 4, 4, 8, 8, 8, 8, 16])
assert(nextpow2(n) == pow2);
}
T min(T)(T a, T b) pure nothrow @nogc
{
return a < b ? a : b;
}
T max(T)(T a, T b) pure nothrow @nogc
{
return b < a ? a : b;
}

187
samples/D/arrayops.d Normal file
View File

@@ -0,0 +1,187 @@
/**
* Benchmark for array ops.
*
* Copyright: Copyright Martin Nowak 2016 -.
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
* Authors: Martin Nowak
*/
import core.cpuid, std.algorithm, std.datetime, std.meta, std.stdio, std.string,
std.range;
float[6] getLatencies(T, string op)()
{
enum N = (64 * (1 << 6) + 64) * T.sizeof;
auto a = Array!T(N), b = Array!T(N), c = Array!T(N);
float[6] latencies = float.max;
foreach (i, ref latency; latencies)
{
auto len = 1 << i;
foreach (_; 1 .. 32)
{
a[] = 24;
b[] = 4;
c[] = 2;
auto sw = StopWatch(AutoStart.yes);
foreach (off; size_t(0) .. size_t(64))
{
off = off * len + off;
enum op = op.replace("const", "2").replace("a",
"a[off .. off + len]").replace("b",
"b[off .. off + len]").replace("c", "c[off .. off + len]");
mixin(op ~ ";");
}
latency = min(latency, sw.peek.nsecs);
}
}
float[6] res = latencies[] / 1024;
return res;
}
float[4] getThroughput(T, string op)()
{
enum N = (40 * 1024 * 1024 + 64 * T.sizeof) / T.sizeof;
auto a = Array!T(N), b = Array!T(N), c = Array!T(N);
float[4] latencies = float.max;
size_t[4] lengths = [
8 * 1024 / T.sizeof, 32 * 1024 / T.sizeof, 512 * 1024 / T.sizeof, 32 * 1024 * 1024 / T
.sizeof
];
foreach (i, ref latency; latencies)
{
auto len = lengths[i] / 64;
foreach (_; 1 .. 4)
{
a[] = 24;
b[] = 4;
c[] = 2;
auto sw = StopWatch(AutoStart.yes);
foreach (off; size_t(0) .. size_t(64))
{
off = off * len + off;
enum op = op.replace("const", "2").replace("a",
"a[off .. off + len]").replace("b",
"b[off .. off + len]").replace("c", "c[off .. off + len]");
mixin(op ~ ";");
}
immutable nsecs = sw.peek.nsecs;
runMasked({latency = min(latency, nsecs);});
}
}
float[4] throughputs = void;
runMasked({throughputs = T.sizeof * lengths[] / latencies[];});
return throughputs;
}
string[] genOps()
{
string[] ops;
foreach (op1; ["+", "-", "*", "/"])
{
ops ~= "a " ~ op1 ~ "= b";
ops ~= "a " ~ op1 ~ "= const";
foreach (op2; ["+", "-", "*", "/"])
{
ops ~= "a " ~ op1 ~ "= b " ~ op2 ~ " c";
ops ~= "a " ~ op1 ~ "= b " ~ op2 ~ " const";
}
}
return ops;
}
void runOp(string op)()
{
foreach (T; AliasSeq!(ubyte, ushort, uint, ulong, byte, short, int, long, float,
double))
writefln("%s, %s, %(%.2f, %), %(%s, %)", T.stringof, op,
getLatencies!(T, op), getThroughput!(T, op));
}
struct Array(T)
{
import core.stdc.stdlib : free, malloc;
this(size_t n)
{
ary = (cast(T*) malloc(T.sizeof * n))[0 .. n];
}
~this()
{
free(ary.ptr);
}
T[] ary;
alias ary this;
}
version (X86)
version = SSE;
else version (X86_64)
version = SSE;
else
static assert(0, "unimplemented");
version (SSE)
{
uint mxcsr()
{
uint ret = void;
asm
{
stmxcsr ret;
}
return ret;
}
void mxcsr(uint val)
{
asm
{
ldmxcsr val;
}
}
// http://softpixel.com/~cwright/programming/simd/sse.php
enum FPU_EXCEPTION_MASKS = 1 << 12 | 1 << 11 | 1 << 10 | 1 << 9 | 1 << 8 | 1 << 7;
enum FPU_EXCEPTION_FLAGS = 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0;
void maskFPUExceptions()
{
mxcsr = mxcsr | FPU_EXCEPTION_MASKS;
}
void unmaskFPUExceptions()
{
mxcsr = mxcsr & ~FPU_EXCEPTION_MASKS;
}
uint FPUExceptionFlags()
{
return mxcsr & FPU_EXCEPTION_FLAGS;
}
void clearFPUExceptionFlags()
{
mxcsr = mxcsr & ~FPU_EXCEPTION_FLAGS;
}
}
void runMasked(scope void delegate() dg)
{
assert(FPUExceptionFlags == 0);
maskFPUExceptions;
dg();
clearFPUExceptionFlags;
unmaskFPUExceptions;
}
void main()
{
unmaskFPUExceptions;
writefln("type, op, %(latency%s, %), %-(throughput%s, %)", iota(6)
.map!(i => 1 << i), ["8KB", "32KB", "512KB", "32MB"]);
foreach (op; mixin("AliasSeq!(%(%s, %))".format(genOps)))
runOp!op;
maskFPUExceptions;
}

3
samples/D/function.d Normal file
View File

@@ -0,0 +1,3 @@
void foo()
{
}

6
samples/D/hello_world.d Normal file
View File

@@ -0,0 +1,6 @@
import std.stdio;
void main()
{
writeln("Hello World");
}

7
samples/D/template.d Normal file
View File

@@ -0,0 +1,7 @@
template Fib(size_t N)
{
static if (N < 2)
enum Fib = size_t(1);
else
enum Fib = Fib!(N - 2) + Fib!(N - 1);
}

View File

@@ -0,0 +1,3 @@
void bar(T)(T t)
{
}

3
samples/D/unittest1.d Normal file
View File

@@ -0,0 +1,3 @@
unittest
{
}

3
samples/D/unittest2.d Normal file
View File

@@ -0,0 +1,3 @@
unittest("optional name")
{
}