linguist/samples/D/aa.d

/**
 * Implementation of associative arrays.
 *
 * Copyright: Martin Nowak 2015 -.
 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
 * Authors:   Martin Nowak
 */
module core.aa;

import core.memory : GC;

private
{
    // grow threshold
    enum GROW_NUM = 4;
    enum GROW_DEN = 5;
    // shrink threshold
    enum SHRINK_NUM = 1;
    enum SHRINK_DEN = 8;
    // grow factor
    enum GROW_FAC = 4;
    // growing the AA doubles it's size, so the shrink threshold must be
    // smaller than half the grow threshold to have a hysteresis
    static assert(GROW_FAC * SHRINK_NUM * GROW_DEN < GROW_NUM * SHRINK_DEN);
    // initial load factor (for literals), mean of both thresholds
    enum INIT_NUM = (GROW_DEN * SHRINK_NUM + GROW_NUM * SHRINK_DEN) / 2;
    enum INIT_DEN = SHRINK_DEN * GROW_DEN;

    // magic hash constants to distinguish empty, deleted, and filled buckets
    enum HASH_EMPTY = 0;
    enum HASH_DELETED = 0x1;
    enum HASH_FILLED_MARK = size_t(1) << 8 * size_t.sizeof - 1;
}

enum INIT_NUM_BUCKETS = 8;

struct AA(Key, Val)
{
    this(size_t sz)
    {
        impl = new Impl(nextpow2(sz));
    }

    @property bool empty() const pure nothrow @safe @nogc
    {
        return !length;
    }

    @property size_t length() const pure nothrow @safe @nogc
    {
        return impl is null ? 0 : impl.length;
    }

    void opIndexAssign(Val val, in Key key)
    {
        // lazily alloc implementation
        if (impl is null)
            impl = new Impl(INIT_NUM_BUCKETS);

        // get hash and bucket for key
        immutable hash = calcHash(key);

        // found a value => assignment
        if (auto p = impl.findSlotLookup(hash, key))
        {
            p.entry.val = val;
            return;
        }

        auto p = findSlotInsert(hash);
        if (p.deleted)
            --deleted;
        // check load factor and possibly grow
        else if (++used * GROW_DEN > dim * GROW_NUM)
        {
            grow();
            p = findSlotInsert(hash);
            assert(p.empty);
        }

        // update search cache and allocate entry
        firstUsed = min(firstUsed, cast(uint)(p - buckets.ptr));
        p.hash = hash;
        p.entry = new Impl.Entry(key, val); // TODO: move
        return;
    }

    ref inout(Val) opIndex(in Key key) inout @trusted
    {
        auto p = opIn_r(key);
        assert(p !is null);
        return *p;
    }

    inout(Val)* opIn_r(in Key key) inout @trusted
    {
        if (empty)
            return null;

        immutable hash = calcHash(key);
        if (auto p = findSlotLookup(hash, key))
            return &p.entry.val;
        return null;
    }

    bool remove(in Key key)
    {
        if (empty)
            return false;

        immutable hash = calcHash(key);
        if (auto p = findSlotLookup(hash, key))
        {
            // clear entry
            p.hash = HASH_DELETED;
            p.entry = null;

            ++deleted;
            if (length * SHRINK_DEN < dim * SHRINK_NUM)
                shrink();

            return true;
        }
        return false;
    }

    Val get(in Key key, lazy Val val)
    {
        auto p = opIn_r(key);
        return p is null ? val : *p;
    }

    ref Val getOrSet(in Key key, lazy Val val)
    {
        // lazily alloc implementation
        if (impl is null)
            impl = new Impl(INIT_NUM_BUCKETS);

        // get hash and bucket for key
        immutable hash = calcHash(key);

        // found a value => assignment
        if (auto p = impl.findSlotLookup(hash, key))
            return p.entry.val;

        auto p = findSlotInsert(hash);
        if (p.deleted)
            --deleted;
        // check load factor and possibly grow
        else if (++used * GROW_DEN > dim * GROW_NUM)
        {
            grow();
            p = findSlotInsert(hash);
            assert(p.empty);
        }

        // update search cache and allocate entry
        firstUsed = min(firstUsed, cast(uint)(p - buckets.ptr));
        p.hash = hash;
        p.entry = new Impl.Entry(key, val);
        return p.entry.val;
    }

    /**
       Convert the AA to the type of the builtin language AA.
     */
    Val[Key] toBuiltinAA() pure nothrow
    {
        return cast(Val[Key]) _aaFromCoreAA(impl, rtInterface);
    }

private:

    private this(inout(Impl)* impl) inout
    {
        this.impl = impl;
    }

    ref Val getLValue(in Key key)
    {
        // lazily alloc implementation
        if (impl is null)
            impl = new Impl(INIT_NUM_BUCKETS);

        // get hash and bucket for key
        immutable hash = calcHash(key);

        // found a value => assignment
        if (auto p = impl.findSlotLookup(hash, key))
            return p.entry.val;

        auto p = findSlotInsert(hash);
        if (p.deleted)
            --deleted;
        // check load factor and possibly grow
        else if (++used * GROW_DEN > dim * GROW_NUM)
        {
            grow();
            p = findSlotInsert(hash);
            assert(p.empty);
        }

        // update search cache and allocate entry
        firstUsed = min(firstUsed, cast(uint)(p - buckets.ptr));
        p.hash = hash;
        p.entry = new Impl.Entry(key); // TODO: move
        return p.entry.val;
    }

    static struct Impl
    {
        this(size_t sz)
        {
            buckets = allocBuckets(sz);
        }

        @property size_t length() const pure nothrow @nogc
        {
            assert(used >= deleted);
            return used - deleted;
        }

        @property size_t dim() const pure nothrow @nogc
        {
            return buckets.length;
        }

        @property size_t mask() const pure nothrow @nogc
        {
            return dim - 1;
        }

        // find the first slot to insert a value with hash
        inout(Bucket)* findSlotInsert(size_t hash) inout pure nothrow @nogc
        {
            for (size_t i = hash & mask, j = 1;; ++j)
            {
                if (!buckets[i].filled)
                    return &buckets[i];
                i = (i + j) & mask;
            }
        }

        // lookup a key
        inout(Bucket)* findSlotLookup(size_t hash, in Key key) inout
        {
            for (size_t i = hash & mask, j = 1;; ++j)
            {
                if (buckets[i].hash == hash && key == buckets[i].entry.key)
                    return &buckets[i];
                else if (buckets[i].empty)
                    return null;
                i = (i + j) & mask;
            }
        }

        void grow()
        {
            // If there are so many deleted entries, that growing would push us
            // below the shrink threshold, we just purge deleted entries instead.
            if (length * SHRINK_DEN < GROW_FAC * dim * SHRINK_NUM)
                resize(dim);
            else
                resize(GROW_FAC * dim);
        }

        void shrink()
        {
            if (dim > INIT_NUM_BUCKETS)
                resize(dim / GROW_FAC);
        }

        void resize(size_t ndim) pure nothrow
        {
            auto obuckets = buckets;
            buckets = allocBuckets(ndim);

            foreach (ref b; obuckets)
                if (b.filled)
                    *findSlotInsert(b.hash) = b;

            firstUsed = 0;
            used -= deleted;
            deleted = 0;
            GC.free(obuckets.ptr); // safe to free b/c impossible to reference
        }

        static struct Entry
        {
            Key key;
            Val val;
        }

        static struct Bucket
        {
            size_t hash;
            Entry* entry;

            @property bool empty() const
            {
                return hash == HASH_EMPTY;
            }

            @property bool deleted() const
            {
                return hash == HASH_DELETED;
            }

            @property bool filled() const
            {
                return cast(ptrdiff_t) hash < 0;
            }
        }

        Bucket[] allocBuckets(size_t dim) @trusted pure nothrow
        {
            enum attr = GC.BlkAttr.NO_INTERIOR;
            immutable sz = dim * Bucket.sizeof;
            return (cast(Bucket*) GC.calloc(sz, attr))[0 .. dim];
        }

        Bucket[] buckets;
        uint used;
        uint deleted;
        uint firstUsed;
    }

    RTInterface* rtInterface()() pure nothrow @nogc
    {
        static size_t aaLen(in void* pimpl) pure nothrow @nogc
        {
            auto aa = const(AA)(cast(const(Impl)*) pimpl);
            return aa.length;
        }

        static void* aaGetY(void** pimpl, in void* pkey)
        {
            auto aa = AA(cast(Impl*)*pimpl);
            auto res = &aa.getLValue(*cast(const(Key*)) pkey);
            *pimpl = aa.impl; // might have changed
            return res;
        }

        static inout(void)* aaInX(inout void* pimpl, in void* pkey)
        {
            auto aa = inout(AA)(cast(inout(Impl)*) pimpl);
            return aa.opIn_r(*cast(const(Key*)) pkey);
        }

        static bool aaDelX(void* pimpl, in void* pkey)
        {
            auto aa = AA(cast(Impl*) pimpl);
            return aa.remove(*cast(const(Key*)) pkey);
        }

        static immutable vtbl = RTInterface(&aaLen, &aaGetY, &aaInX, &aaDelX);
        return cast(RTInterface*)&vtbl;
    }

    static size_t calcHash(in ref Key key)
    {
        return hashOf(key) | HASH_FILLED_MARK;
    }

    Impl* impl;
    alias impl this;
}

package extern (C) void* _aaFromCoreAA(void* impl, RTInterface* rtIntf) pure nothrow;

private:

struct RTInterface
{
    alias AA = void*;

    size_t function(in AA aa) pure nothrow @nogc len;
    void* function(AA* aa, in void* pkey) getY;
    inout(void)* function(inout AA aa, in void* pkey) inX;
    bool function(AA aa, in void* pkey) delX;
}

unittest
{
    AA!(int, int) aa;
    assert(aa.length == 0);
    aa[0] = 1;
    assert(aa.length == 1 && aa[0] == 1);
    aa[1] = 2;
    assert(aa.length == 2 && aa[1] == 2);
    import core.stdc.stdio;

    int[int] rtaa = aa.toBuiltinAA();
    assert(rtaa.length == 2);
    puts("length");
    assert(rtaa[0] == 1);
    assert(rtaa[1] == 2);
    rtaa[2] = 3;

    assert(aa[2] == 3);
}

unittest
{
    auto aa = AA!(int, int)(3);
    aa[0] = 0;
    aa[1] = 1;
    aa[2] = 2;
    assert(aa.length == 3);
}

//==============================================================================
// Helper functions
//------------------------------------------------------------------------------

size_t nextpow2(in size_t n) pure nothrow @nogc
{
    import core.bitop : bsr;

    if (n < 2)
        return 1;
    return size_t(1) << bsr(n - 1) + 1;
}

pure nothrow @nogc unittest
{
    //                            0, 1, 2, 3, 4, 5, 6, 7, 8,  9
    foreach (const n, const pow2; [1, 1, 2, 4, 4, 8, 8, 8, 8, 16])
        assert(nextpow2(n) == pow2);
}

T min(T)(T a, T b) pure nothrow @nogc
{
    return a < b ? a : b;
}

T max(T)(T a, T b) pure nothrow @nogc
{
    return b < a ? a : b;
}