mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	better heuristic distinction of .d files (#3145)
* fix benchmark - require json for Hash.to_json * better heuristic distinction of .d files - properly recongnize dtrace probes - recongnize \ in Makefile paths - recongnize single line `file.ext : dep.ext` make targets - recognize D module, import, function, and unittest declarations - add more representative D samples D changed from 31.2% to 28.1% DTrace changed from 33.5% to 32.5% Makefile changed from 35.3% to 39.4% See https://gist.github.com/MartinNowak/fda24fdef64f2dbb05c5a5ceabf22bd3 for the scraper used to get a test corpus.
This commit is contained in:
		
				
					committed by
					
						 Colin Seymour
						Colin Seymour
					
				
			
			
				
	
			
			
			
						parent
						
							b7e27a9f58
						
					
				
				
					commit
					fa6ae1116f
				
			
							
								
								
									
										440
									
								
								samples/D/aa.d
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										440
									
								
								samples/D/aa.d
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,440 @@ | ||||
| /** | ||||
|  * Implementation of associative arrays. | ||||
|  * | ||||
|  * Copyright: Martin Nowak 2015 -. | ||||
|  * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) | ||||
|  * Authors:   Martin Nowak | ||||
|  */ | ||||
| module core.aa; | ||||
|  | ||||
| import core.memory : GC; | ||||
|  | ||||
| private | ||||
| { | ||||
|     // grow threshold | ||||
|     enum GROW_NUM = 4; | ||||
|     enum GROW_DEN = 5; | ||||
|     // shrink threshold | ||||
|     enum SHRINK_NUM = 1; | ||||
|     enum SHRINK_DEN = 8; | ||||
|     // grow factor | ||||
|     enum GROW_FAC = 4; | ||||
|     // growing the AA doubles it's size, so the shrink threshold must be | ||||
|     // smaller than half the grow threshold to have a hysteresis | ||||
|     static assert(GROW_FAC * SHRINK_NUM * GROW_DEN < GROW_NUM * SHRINK_DEN); | ||||
|     // initial load factor (for literals), mean of both thresholds | ||||
|     enum INIT_NUM = (GROW_DEN * SHRINK_NUM + GROW_NUM * SHRINK_DEN) / 2; | ||||
|     enum INIT_DEN = SHRINK_DEN * GROW_DEN; | ||||
|  | ||||
|     // magic hash constants to distinguish empty, deleted, and filled buckets | ||||
|     enum HASH_EMPTY = 0; | ||||
|     enum HASH_DELETED = 0x1; | ||||
|     enum HASH_FILLED_MARK = size_t(1) << 8 * size_t.sizeof - 1; | ||||
| } | ||||
|  | ||||
| enum INIT_NUM_BUCKETS = 8; | ||||
|  | ||||
| struct AA(Key, Val) | ||||
| { | ||||
|     this(size_t sz) | ||||
|     { | ||||
|         impl = new Impl(nextpow2(sz)); | ||||
|     } | ||||
|  | ||||
|     @property bool empty() const pure nothrow @safe @nogc | ||||
|     { | ||||
|         return !length; | ||||
|     } | ||||
|  | ||||
|     @property size_t length() const pure nothrow @safe @nogc | ||||
|     { | ||||
|         return impl is null ? 0 : impl.length; | ||||
|     } | ||||
|  | ||||
|     void opIndexAssign(Val val, in Key key) | ||||
|     { | ||||
|         // lazily alloc implementation | ||||
|         if (impl is null) | ||||
|             impl = new Impl(INIT_NUM_BUCKETS); | ||||
|  | ||||
|         // get hash and bucket for key | ||||
|         immutable hash = calcHash(key); | ||||
|  | ||||
|         // found a value => assignment | ||||
|         if (auto p = impl.findSlotLookup(hash, key)) | ||||
|         { | ||||
|             p.entry.val = val; | ||||
|             return; | ||||
|         } | ||||
|  | ||||
|         auto p = findSlotInsert(hash); | ||||
|         if (p.deleted) | ||||
|             --deleted; | ||||
|         // check load factor and possibly grow | ||||
|         else if (++used * GROW_DEN > dim * GROW_NUM) | ||||
|         { | ||||
|             grow(); | ||||
|             p = findSlotInsert(hash); | ||||
|             assert(p.empty); | ||||
|         } | ||||
|  | ||||
|         // update search cache and allocate entry | ||||
|         firstUsed = min(firstUsed, cast(uint)(p - buckets.ptr)); | ||||
|         p.hash = hash; | ||||
|         p.entry = new Impl.Entry(key, val); // TODO: move | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     ref inout(Val) opIndex(in Key key) inout @trusted | ||||
|     { | ||||
|         auto p = opIn_r(key); | ||||
|         assert(p !is null); | ||||
|         return *p; | ||||
|     } | ||||
|  | ||||
|     inout(Val)* opIn_r(in Key key) inout @trusted | ||||
|     { | ||||
|         if (empty) | ||||
|             return null; | ||||
|  | ||||
|         immutable hash = calcHash(key); | ||||
|         if (auto p = findSlotLookup(hash, key)) | ||||
|             return &p.entry.val; | ||||
|         return null; | ||||
|     } | ||||
|  | ||||
|     bool remove(in Key key) | ||||
|     { | ||||
|         if (empty) | ||||
|             return false; | ||||
|  | ||||
|         immutable hash = calcHash(key); | ||||
|         if (auto p = findSlotLookup(hash, key)) | ||||
|         { | ||||
|             // clear entry | ||||
|             p.hash = HASH_DELETED; | ||||
|             p.entry = null; | ||||
|  | ||||
|             ++deleted; | ||||
|             if (length * SHRINK_DEN < dim * SHRINK_NUM) | ||||
|                 shrink(); | ||||
|  | ||||
|             return true; | ||||
|         } | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
|     Val get(in Key key, lazy Val val) | ||||
|     { | ||||
|         auto p = opIn_r(key); | ||||
|         return p is null ? val : *p; | ||||
|     } | ||||
|  | ||||
|     ref Val getOrSet(in Key key, lazy Val val) | ||||
|     { | ||||
|         // lazily alloc implementation | ||||
|         if (impl is null) | ||||
|             impl = new Impl(INIT_NUM_BUCKETS); | ||||
|  | ||||
|         // get hash and bucket for key | ||||
|         immutable hash = calcHash(key); | ||||
|  | ||||
|         // found a value => assignment | ||||
|         if (auto p = impl.findSlotLookup(hash, key)) | ||||
|             return p.entry.val; | ||||
|  | ||||
|         auto p = findSlotInsert(hash); | ||||
|         if (p.deleted) | ||||
|             --deleted; | ||||
|         // check load factor and possibly grow | ||||
|         else if (++used * GROW_DEN > dim * GROW_NUM) | ||||
|         { | ||||
|             grow(); | ||||
|             p = findSlotInsert(hash); | ||||
|             assert(p.empty); | ||||
|         } | ||||
|  | ||||
|         // update search cache and allocate entry | ||||
|         firstUsed = min(firstUsed, cast(uint)(p - buckets.ptr)); | ||||
|         p.hash = hash; | ||||
|         p.entry = new Impl.Entry(key, val); | ||||
|         return p.entry.val; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|        Convert the AA to the type of the builtin language AA. | ||||
|      */ | ||||
|     Val[Key] toBuiltinAA() pure nothrow | ||||
|     { | ||||
|         return cast(Val[Key]) _aaFromCoreAA(impl, rtInterface); | ||||
|     } | ||||
|  | ||||
| private: | ||||
|  | ||||
|     private this(inout(Impl)* impl) inout | ||||
|     { | ||||
|         this.impl = impl; | ||||
|     } | ||||
|  | ||||
|     ref Val getLValue(in Key key) | ||||
|     { | ||||
|         // lazily alloc implementation | ||||
|         if (impl is null) | ||||
|             impl = new Impl(INIT_NUM_BUCKETS); | ||||
|  | ||||
|         // get hash and bucket for key | ||||
|         immutable hash = calcHash(key); | ||||
|  | ||||
|         // found a value => assignment | ||||
|         if (auto p = impl.findSlotLookup(hash, key)) | ||||
|             return p.entry.val; | ||||
|  | ||||
|         auto p = findSlotInsert(hash); | ||||
|         if (p.deleted) | ||||
|             --deleted; | ||||
|         // check load factor and possibly grow | ||||
|         else if (++used * GROW_DEN > dim * GROW_NUM) | ||||
|         { | ||||
|             grow(); | ||||
|             p = findSlotInsert(hash); | ||||
|             assert(p.empty); | ||||
|         } | ||||
|  | ||||
|         // update search cache and allocate entry | ||||
|         firstUsed = min(firstUsed, cast(uint)(p - buckets.ptr)); | ||||
|         p.hash = hash; | ||||
|         p.entry = new Impl.Entry(key); // TODO: move | ||||
|         return p.entry.val; | ||||
|     } | ||||
|  | ||||
|     static struct Impl | ||||
|     { | ||||
|         this(size_t sz) | ||||
|         { | ||||
|             buckets = allocBuckets(sz); | ||||
|         } | ||||
|  | ||||
|         @property size_t length() const pure nothrow @nogc | ||||
|         { | ||||
|             assert(used >= deleted); | ||||
|             return used - deleted; | ||||
|         } | ||||
|  | ||||
|         @property size_t dim() const pure nothrow @nogc | ||||
|         { | ||||
|             return buckets.length; | ||||
|         } | ||||
|  | ||||
|         @property size_t mask() const pure nothrow @nogc | ||||
|         { | ||||
|             return dim - 1; | ||||
|         } | ||||
|  | ||||
|         // find the first slot to insert a value with hash | ||||
|         inout(Bucket)* findSlotInsert(size_t hash) inout pure nothrow @nogc | ||||
|         { | ||||
|             for (size_t i = hash & mask, j = 1;; ++j) | ||||
|             { | ||||
|                 if (!buckets[i].filled) | ||||
|                     return &buckets[i]; | ||||
|                 i = (i + j) & mask; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // lookup a key | ||||
|         inout(Bucket)* findSlotLookup(size_t hash, in Key key) inout | ||||
|         { | ||||
|             for (size_t i = hash & mask, j = 1;; ++j) | ||||
|             { | ||||
|                 if (buckets[i].hash == hash && key == buckets[i].entry.key) | ||||
|                     return &buckets[i]; | ||||
|                 else if (buckets[i].empty) | ||||
|                     return null; | ||||
|                 i = (i + j) & mask; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         void grow() | ||||
|         { | ||||
|             // If there are so many deleted entries, that growing would push us | ||||
|             // below the shrink threshold, we just purge deleted entries instead. | ||||
|             if (length * SHRINK_DEN < GROW_FAC * dim * SHRINK_NUM) | ||||
|                 resize(dim); | ||||
|             else | ||||
|                 resize(GROW_FAC * dim); | ||||
|         } | ||||
|  | ||||
|         void shrink() | ||||
|         { | ||||
|             if (dim > INIT_NUM_BUCKETS) | ||||
|                 resize(dim / GROW_FAC); | ||||
|         } | ||||
|  | ||||
|         void resize(size_t ndim) pure nothrow | ||||
|         { | ||||
|             auto obuckets = buckets; | ||||
|             buckets = allocBuckets(ndim); | ||||
|  | ||||
|             foreach (ref b; obuckets) | ||||
|                 if (b.filled) | ||||
|                     *findSlotInsert(b.hash) = b; | ||||
|  | ||||
|             firstUsed = 0; | ||||
|             used -= deleted; | ||||
|             deleted = 0; | ||||
|             GC.free(obuckets.ptr); // safe to free b/c impossible to reference | ||||
|         } | ||||
|  | ||||
|         static struct Entry | ||||
|         { | ||||
|             Key key; | ||||
|             Val val; | ||||
|         } | ||||
|  | ||||
|         static struct Bucket | ||||
|         { | ||||
|             size_t hash; | ||||
|             Entry* entry; | ||||
|  | ||||
|             @property bool empty() const | ||||
|             { | ||||
|                 return hash == HASH_EMPTY; | ||||
|             } | ||||
|  | ||||
|             @property bool deleted() const | ||||
|             { | ||||
|                 return hash == HASH_DELETED; | ||||
|             } | ||||
|  | ||||
|             @property bool filled() const | ||||
|             { | ||||
|                 return cast(ptrdiff_t) hash < 0; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Bucket[] allocBuckets(size_t dim) @trusted pure nothrow | ||||
|         { | ||||
|             enum attr = GC.BlkAttr.NO_INTERIOR; | ||||
|             immutable sz = dim * Bucket.sizeof; | ||||
|             return (cast(Bucket*) GC.calloc(sz, attr))[0 .. dim]; | ||||
|         } | ||||
|  | ||||
|         Bucket[] buckets; | ||||
|         uint used; | ||||
|         uint deleted; | ||||
|         uint firstUsed; | ||||
|     } | ||||
|  | ||||
|     RTInterface* rtInterface()() pure nothrow @nogc | ||||
|     { | ||||
|         static size_t aaLen(in void* pimpl) pure nothrow @nogc | ||||
|         { | ||||
|             auto aa = const(AA)(cast(const(Impl)*) pimpl); | ||||
|             return aa.length; | ||||
|         } | ||||
|  | ||||
|         static void* aaGetY(void** pimpl, in void* pkey) | ||||
|         { | ||||
|             auto aa = AA(cast(Impl*)*pimpl); | ||||
|             auto res = &aa.getLValue(*cast(const(Key*)) pkey); | ||||
|             *pimpl = aa.impl; // might have changed | ||||
|             return res; | ||||
|         } | ||||
|  | ||||
|         static inout(void)* aaInX(inout void* pimpl, in void* pkey) | ||||
|         { | ||||
|             auto aa = inout(AA)(cast(inout(Impl)*) pimpl); | ||||
|             return aa.opIn_r(*cast(const(Key*)) pkey); | ||||
|         } | ||||
|  | ||||
|         static bool aaDelX(void* pimpl, in void* pkey) | ||||
|         { | ||||
|             auto aa = AA(cast(Impl*) pimpl); | ||||
|             return aa.remove(*cast(const(Key*)) pkey); | ||||
|         } | ||||
|  | ||||
|         static immutable vtbl = RTInterface(&aaLen, &aaGetY, &aaInX, &aaDelX); | ||||
|         return cast(RTInterface*)&vtbl; | ||||
|     } | ||||
|  | ||||
|     static size_t calcHash(in ref Key key) | ||||
|     { | ||||
|         return hashOf(key) | HASH_FILLED_MARK; | ||||
|     } | ||||
|  | ||||
|     Impl* impl; | ||||
|     alias impl this; | ||||
| } | ||||
|  | ||||
| package extern (C) void* _aaFromCoreAA(void* impl, RTInterface* rtIntf) pure nothrow; | ||||
|  | ||||
| private: | ||||
|  | ||||
| struct RTInterface | ||||
| { | ||||
|     alias AA = void*; | ||||
|  | ||||
|     size_t function(in AA aa) pure nothrow @nogc len; | ||||
|     void* function(AA* aa, in void* pkey) getY; | ||||
|     inout(void)* function(inout AA aa, in void* pkey) inX; | ||||
|     bool function(AA aa, in void* pkey) delX; | ||||
| } | ||||
|  | ||||
| unittest | ||||
| { | ||||
|     AA!(int, int) aa; | ||||
|     assert(aa.length == 0); | ||||
|     aa[0] = 1; | ||||
|     assert(aa.length == 1 && aa[0] == 1); | ||||
|     aa[1] = 2; | ||||
|     assert(aa.length == 2 && aa[1] == 2); | ||||
|     import core.stdc.stdio; | ||||
|  | ||||
|     int[int] rtaa = aa.toBuiltinAA(); | ||||
|     assert(rtaa.length == 2); | ||||
|     puts("length"); | ||||
|     assert(rtaa[0] == 1); | ||||
|     assert(rtaa[1] == 2); | ||||
|     rtaa[2] = 3; | ||||
|  | ||||
|     assert(aa[2] == 3); | ||||
| } | ||||
|  | ||||
| unittest | ||||
| { | ||||
|     auto aa = AA!(int, int)(3); | ||||
|     aa[0] = 0; | ||||
|     aa[1] = 1; | ||||
|     aa[2] = 2; | ||||
|     assert(aa.length == 3); | ||||
| } | ||||
|  | ||||
| //============================================================================== | ||||
| // Helper functions | ||||
| //------------------------------------------------------------------------------ | ||||
|  | ||||
| size_t nextpow2(in size_t n) pure nothrow @nogc | ||||
| { | ||||
|     import core.bitop : bsr; | ||||
|  | ||||
|     if (n < 2) | ||||
|         return 1; | ||||
|     return size_t(1) << bsr(n - 1) + 1; | ||||
| } | ||||
|  | ||||
| pure nothrow @nogc unittest | ||||
| { | ||||
|     //                            0, 1, 2, 3, 4, 5, 6, 7, 8,  9 | ||||
|     foreach (const n, const pow2; [1, 1, 2, 4, 4, 8, 8, 8, 8, 16]) | ||||
|         assert(nextpow2(n) == pow2); | ||||
| } | ||||
|  | ||||
| T min(T)(T a, T b) pure nothrow @nogc | ||||
| { | ||||
|     return a < b ? a : b; | ||||
| } | ||||
|  | ||||
| T max(T)(T a, T b) pure nothrow @nogc | ||||
| { | ||||
|     return b < a ? a : b; | ||||
| } | ||||
		Reference in New Issue
	
	Block a user