mirror of
https://github.com/KevinMidboe/linguist.git
synced 2026-02-12 11:19:31 +00:00
better heuristic distinction of .d files (#3145)
* fix benchmark - require json for Hash.to_json * better heuristic distinction of .d files - properly recongnize dtrace probes - recongnize \ in Makefile paths - recongnize single line `file.ext : dep.ext` make targets - recognize D module, import, function, and unittest declarations - add more representative D samples D changed from 31.2% to 28.1% DTrace changed from 33.5% to 32.5% Makefile changed from 35.3% to 39.4% See https://gist.github.com/MartinNowak/fda24fdef64f2dbb05c5a5ceabf22bd3 for the scraper used to get a test corpus.
This commit is contained in:
committed by
Colin Seymour
parent
b7e27a9f58
commit
fa6ae1116f
440
samples/D/aa.d
Normal file
440
samples/D/aa.d
Normal file
@@ -0,0 +1,440 @@
|
||||
/**
|
||||
* Implementation of associative arrays.
|
||||
*
|
||||
* Copyright: Martin Nowak 2015 -.
|
||||
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
|
||||
* Authors: Martin Nowak
|
||||
*/
|
||||
module core.aa;
|
||||
|
||||
import core.memory : GC;
|
||||
|
||||
private
|
||||
{
|
||||
// grow threshold
|
||||
enum GROW_NUM = 4;
|
||||
enum GROW_DEN = 5;
|
||||
// shrink threshold
|
||||
enum SHRINK_NUM = 1;
|
||||
enum SHRINK_DEN = 8;
|
||||
// grow factor
|
||||
enum GROW_FAC = 4;
|
||||
// growing the AA doubles it's size, so the shrink threshold must be
|
||||
// smaller than half the grow threshold to have a hysteresis
|
||||
static assert(GROW_FAC * SHRINK_NUM * GROW_DEN < GROW_NUM * SHRINK_DEN);
|
||||
// initial load factor (for literals), mean of both thresholds
|
||||
enum INIT_NUM = (GROW_DEN * SHRINK_NUM + GROW_NUM * SHRINK_DEN) / 2;
|
||||
enum INIT_DEN = SHRINK_DEN * GROW_DEN;
|
||||
|
||||
// magic hash constants to distinguish empty, deleted, and filled buckets
|
||||
enum HASH_EMPTY = 0;
|
||||
enum HASH_DELETED = 0x1;
|
||||
enum HASH_FILLED_MARK = size_t(1) << 8 * size_t.sizeof - 1;
|
||||
}
|
||||
|
||||
enum INIT_NUM_BUCKETS = 8;
|
||||
|
||||
struct AA(Key, Val)
|
||||
{
|
||||
this(size_t sz)
|
||||
{
|
||||
impl = new Impl(nextpow2(sz));
|
||||
}
|
||||
|
||||
@property bool empty() const pure nothrow @safe @nogc
|
||||
{
|
||||
return !length;
|
||||
}
|
||||
|
||||
@property size_t length() const pure nothrow @safe @nogc
|
||||
{
|
||||
return impl is null ? 0 : impl.length;
|
||||
}
|
||||
|
||||
void opIndexAssign(Val val, in Key key)
|
||||
{
|
||||
// lazily alloc implementation
|
||||
if (impl is null)
|
||||
impl = new Impl(INIT_NUM_BUCKETS);
|
||||
|
||||
// get hash and bucket for key
|
||||
immutable hash = calcHash(key);
|
||||
|
||||
// found a value => assignment
|
||||
if (auto p = impl.findSlotLookup(hash, key))
|
||||
{
|
||||
p.entry.val = val;
|
||||
return;
|
||||
}
|
||||
|
||||
auto p = findSlotInsert(hash);
|
||||
if (p.deleted)
|
||||
--deleted;
|
||||
// check load factor and possibly grow
|
||||
else if (++used * GROW_DEN > dim * GROW_NUM)
|
||||
{
|
||||
grow();
|
||||
p = findSlotInsert(hash);
|
||||
assert(p.empty);
|
||||
}
|
||||
|
||||
// update search cache and allocate entry
|
||||
firstUsed = min(firstUsed, cast(uint)(p - buckets.ptr));
|
||||
p.hash = hash;
|
||||
p.entry = new Impl.Entry(key, val); // TODO: move
|
||||
return;
|
||||
}
|
||||
|
||||
ref inout(Val) opIndex(in Key key) inout @trusted
|
||||
{
|
||||
auto p = opIn_r(key);
|
||||
assert(p !is null);
|
||||
return *p;
|
||||
}
|
||||
|
||||
inout(Val)* opIn_r(in Key key) inout @trusted
|
||||
{
|
||||
if (empty)
|
||||
return null;
|
||||
|
||||
immutable hash = calcHash(key);
|
||||
if (auto p = findSlotLookup(hash, key))
|
||||
return &p.entry.val;
|
||||
return null;
|
||||
}
|
||||
|
||||
bool remove(in Key key)
|
||||
{
|
||||
if (empty)
|
||||
return false;
|
||||
|
||||
immutable hash = calcHash(key);
|
||||
if (auto p = findSlotLookup(hash, key))
|
||||
{
|
||||
// clear entry
|
||||
p.hash = HASH_DELETED;
|
||||
p.entry = null;
|
||||
|
||||
++deleted;
|
||||
if (length * SHRINK_DEN < dim * SHRINK_NUM)
|
||||
shrink();
|
||||
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
Val get(in Key key, lazy Val val)
|
||||
{
|
||||
auto p = opIn_r(key);
|
||||
return p is null ? val : *p;
|
||||
}
|
||||
|
||||
ref Val getOrSet(in Key key, lazy Val val)
|
||||
{
|
||||
// lazily alloc implementation
|
||||
if (impl is null)
|
||||
impl = new Impl(INIT_NUM_BUCKETS);
|
||||
|
||||
// get hash and bucket for key
|
||||
immutable hash = calcHash(key);
|
||||
|
||||
// found a value => assignment
|
||||
if (auto p = impl.findSlotLookup(hash, key))
|
||||
return p.entry.val;
|
||||
|
||||
auto p = findSlotInsert(hash);
|
||||
if (p.deleted)
|
||||
--deleted;
|
||||
// check load factor and possibly grow
|
||||
else if (++used * GROW_DEN > dim * GROW_NUM)
|
||||
{
|
||||
grow();
|
||||
p = findSlotInsert(hash);
|
||||
assert(p.empty);
|
||||
}
|
||||
|
||||
// update search cache and allocate entry
|
||||
firstUsed = min(firstUsed, cast(uint)(p - buckets.ptr));
|
||||
p.hash = hash;
|
||||
p.entry = new Impl.Entry(key, val);
|
||||
return p.entry.val;
|
||||
}
|
||||
|
||||
/**
|
||||
Convert the AA to the type of the builtin language AA.
|
||||
*/
|
||||
Val[Key] toBuiltinAA() pure nothrow
|
||||
{
|
||||
return cast(Val[Key]) _aaFromCoreAA(impl, rtInterface);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
private this(inout(Impl)* impl) inout
|
||||
{
|
||||
this.impl = impl;
|
||||
}
|
||||
|
||||
ref Val getLValue(in Key key)
|
||||
{
|
||||
// lazily alloc implementation
|
||||
if (impl is null)
|
||||
impl = new Impl(INIT_NUM_BUCKETS);
|
||||
|
||||
// get hash and bucket for key
|
||||
immutable hash = calcHash(key);
|
||||
|
||||
// found a value => assignment
|
||||
if (auto p = impl.findSlotLookup(hash, key))
|
||||
return p.entry.val;
|
||||
|
||||
auto p = findSlotInsert(hash);
|
||||
if (p.deleted)
|
||||
--deleted;
|
||||
// check load factor and possibly grow
|
||||
else if (++used * GROW_DEN > dim * GROW_NUM)
|
||||
{
|
||||
grow();
|
||||
p = findSlotInsert(hash);
|
||||
assert(p.empty);
|
||||
}
|
||||
|
||||
// update search cache and allocate entry
|
||||
firstUsed = min(firstUsed, cast(uint)(p - buckets.ptr));
|
||||
p.hash = hash;
|
||||
p.entry = new Impl.Entry(key); // TODO: move
|
||||
return p.entry.val;
|
||||
}
|
||||
|
||||
static struct Impl
|
||||
{
|
||||
this(size_t sz)
|
||||
{
|
||||
buckets = allocBuckets(sz);
|
||||
}
|
||||
|
||||
@property size_t length() const pure nothrow @nogc
|
||||
{
|
||||
assert(used >= deleted);
|
||||
return used - deleted;
|
||||
}
|
||||
|
||||
@property size_t dim() const pure nothrow @nogc
|
||||
{
|
||||
return buckets.length;
|
||||
}
|
||||
|
||||
@property size_t mask() const pure nothrow @nogc
|
||||
{
|
||||
return dim - 1;
|
||||
}
|
||||
|
||||
// find the first slot to insert a value with hash
|
||||
inout(Bucket)* findSlotInsert(size_t hash) inout pure nothrow @nogc
|
||||
{
|
||||
for (size_t i = hash & mask, j = 1;; ++j)
|
||||
{
|
||||
if (!buckets[i].filled)
|
||||
return &buckets[i];
|
||||
i = (i + j) & mask;
|
||||
}
|
||||
}
|
||||
|
||||
// lookup a key
|
||||
inout(Bucket)* findSlotLookup(size_t hash, in Key key) inout
|
||||
{
|
||||
for (size_t i = hash & mask, j = 1;; ++j)
|
||||
{
|
||||
if (buckets[i].hash == hash && key == buckets[i].entry.key)
|
||||
return &buckets[i];
|
||||
else if (buckets[i].empty)
|
||||
return null;
|
||||
i = (i + j) & mask;
|
||||
}
|
||||
}
|
||||
|
||||
void grow()
|
||||
{
|
||||
// If there are so many deleted entries, that growing would push us
|
||||
// below the shrink threshold, we just purge deleted entries instead.
|
||||
if (length * SHRINK_DEN < GROW_FAC * dim * SHRINK_NUM)
|
||||
resize(dim);
|
||||
else
|
||||
resize(GROW_FAC * dim);
|
||||
}
|
||||
|
||||
void shrink()
|
||||
{
|
||||
if (dim > INIT_NUM_BUCKETS)
|
||||
resize(dim / GROW_FAC);
|
||||
}
|
||||
|
||||
void resize(size_t ndim) pure nothrow
|
||||
{
|
||||
auto obuckets = buckets;
|
||||
buckets = allocBuckets(ndim);
|
||||
|
||||
foreach (ref b; obuckets)
|
||||
if (b.filled)
|
||||
*findSlotInsert(b.hash) = b;
|
||||
|
||||
firstUsed = 0;
|
||||
used -= deleted;
|
||||
deleted = 0;
|
||||
GC.free(obuckets.ptr); // safe to free b/c impossible to reference
|
||||
}
|
||||
|
||||
static struct Entry
|
||||
{
|
||||
Key key;
|
||||
Val val;
|
||||
}
|
||||
|
||||
static struct Bucket
|
||||
{
|
||||
size_t hash;
|
||||
Entry* entry;
|
||||
|
||||
@property bool empty() const
|
||||
{
|
||||
return hash == HASH_EMPTY;
|
||||
}
|
||||
|
||||
@property bool deleted() const
|
||||
{
|
||||
return hash == HASH_DELETED;
|
||||
}
|
||||
|
||||
@property bool filled() const
|
||||
{
|
||||
return cast(ptrdiff_t) hash < 0;
|
||||
}
|
||||
}
|
||||
|
||||
Bucket[] allocBuckets(size_t dim) @trusted pure nothrow
|
||||
{
|
||||
enum attr = GC.BlkAttr.NO_INTERIOR;
|
||||
immutable sz = dim * Bucket.sizeof;
|
||||
return (cast(Bucket*) GC.calloc(sz, attr))[0 .. dim];
|
||||
}
|
||||
|
||||
Bucket[] buckets;
|
||||
uint used;
|
||||
uint deleted;
|
||||
uint firstUsed;
|
||||
}
|
||||
|
||||
RTInterface* rtInterface()() pure nothrow @nogc
|
||||
{
|
||||
static size_t aaLen(in void* pimpl) pure nothrow @nogc
|
||||
{
|
||||
auto aa = const(AA)(cast(const(Impl)*) pimpl);
|
||||
return aa.length;
|
||||
}
|
||||
|
||||
static void* aaGetY(void** pimpl, in void* pkey)
|
||||
{
|
||||
auto aa = AA(cast(Impl*)*pimpl);
|
||||
auto res = &aa.getLValue(*cast(const(Key*)) pkey);
|
||||
*pimpl = aa.impl; // might have changed
|
||||
return res;
|
||||
}
|
||||
|
||||
static inout(void)* aaInX(inout void* pimpl, in void* pkey)
|
||||
{
|
||||
auto aa = inout(AA)(cast(inout(Impl)*) pimpl);
|
||||
return aa.opIn_r(*cast(const(Key*)) pkey);
|
||||
}
|
||||
|
||||
static bool aaDelX(void* pimpl, in void* pkey)
|
||||
{
|
||||
auto aa = AA(cast(Impl*) pimpl);
|
||||
return aa.remove(*cast(const(Key*)) pkey);
|
||||
}
|
||||
|
||||
static immutable vtbl = RTInterface(&aaLen, &aaGetY, &aaInX, &aaDelX);
|
||||
return cast(RTInterface*)&vtbl;
|
||||
}
|
||||
|
||||
static size_t calcHash(in ref Key key)
|
||||
{
|
||||
return hashOf(key) | HASH_FILLED_MARK;
|
||||
}
|
||||
|
||||
Impl* impl;
|
||||
alias impl this;
|
||||
}
|
||||
|
||||
package extern (C) void* _aaFromCoreAA(void* impl, RTInterface* rtIntf) pure nothrow;
|
||||
|
||||
private:
|
||||
|
||||
struct RTInterface
|
||||
{
|
||||
alias AA = void*;
|
||||
|
||||
size_t function(in AA aa) pure nothrow @nogc len;
|
||||
void* function(AA* aa, in void* pkey) getY;
|
||||
inout(void)* function(inout AA aa, in void* pkey) inX;
|
||||
bool function(AA aa, in void* pkey) delX;
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
AA!(int, int) aa;
|
||||
assert(aa.length == 0);
|
||||
aa[0] = 1;
|
||||
assert(aa.length == 1 && aa[0] == 1);
|
||||
aa[1] = 2;
|
||||
assert(aa.length == 2 && aa[1] == 2);
|
||||
import core.stdc.stdio;
|
||||
|
||||
int[int] rtaa = aa.toBuiltinAA();
|
||||
assert(rtaa.length == 2);
|
||||
puts("length");
|
||||
assert(rtaa[0] == 1);
|
||||
assert(rtaa[1] == 2);
|
||||
rtaa[2] = 3;
|
||||
|
||||
assert(aa[2] == 3);
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
auto aa = AA!(int, int)(3);
|
||||
aa[0] = 0;
|
||||
aa[1] = 1;
|
||||
aa[2] = 2;
|
||||
assert(aa.length == 3);
|
||||
}
|
||||
|
||||
//==============================================================================
|
||||
// Helper functions
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
size_t nextpow2(in size_t n) pure nothrow @nogc
|
||||
{
|
||||
import core.bitop : bsr;
|
||||
|
||||
if (n < 2)
|
||||
return 1;
|
||||
return size_t(1) << bsr(n - 1) + 1;
|
||||
}
|
||||
|
||||
pure nothrow @nogc unittest
|
||||
{
|
||||
// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
|
||||
foreach (const n, const pow2; [1, 1, 2, 4, 4, 8, 8, 8, 8, 16])
|
||||
assert(nextpow2(n) == pow2);
|
||||
}
|
||||
|
||||
T min(T)(T a, T b) pure nothrow @nogc
|
||||
{
|
||||
return a < b ? a : b;
|
||||
}
|
||||
|
||||
T max(T)(T a, T b) pure nothrow @nogc
|
||||
{
|
||||
return b < a ? a : b;
|
||||
}
|
||||
Reference in New Issue
Block a user