src/hash_map.c

Fri, 27 May 2022 12:59:32 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 27 May 2022 12:59:32 +0200
changeset 556
3d19cae7e924
parent 554
fd3d843b839d
child 557
2aae1246b578
permissions
-rw-r--r--

#199 tests for hash map

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <errno.h>
#include <string.h>
#include "cx/hash_map.h"
#include "cx/utils.h"

/**
 * Computes a murmur hash-2.
 *
 * @param data the data to hash
 * @param len the length of the data
 * @return the murmur hash-2 of the data
 */
static unsigned cx_hash_map_murmur(
        unsigned char const *data,
        size_t len
) {
    unsigned m = 0x5bd1e995;
    unsigned r = 24;
    unsigned h = 25 ^ len;
    unsigned i = 0;
    while (len >= 4) {
        unsigned k = data[i + 0] & 0xFF;
        k |= (data[i + 1] & 0xFF) << 8;
        k |= (data[i + 2] & 0xFF) << 16;
        k |= (data[i + 3] & 0xFF) << 24;

        k *= m;
        k ^= k >> r;
        k *= m;

        h *= m;
        h ^= k;

        i += 4;
        len -= 4;
    }

    switch (len) {
        case 3:
            h ^= (data[i + 2] & 0xFF) << 16;
                    __attribute__((__fallthrough__));
        case 2:
            h ^= (data[i + 1] & 0xFF) << 8;
                    __attribute__((__fallthrough__));
        case 1:
            h ^= (data[i + 0] & 0xFF);
            h *= m;
                    __attribute__((__fallthrough__));
        default:
            /* do nothing */;
    }

    h ^= h >> 13;
    h *= m;
    h ^= h >> 15;

    return h;
}

static void cx_hash_map_clear(struct cx_map_s *map) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    cx_for_n(i, hash_map->bucket_count) {
        struct cx_hash_map_element_s *elem = hash_map->buckets[i];
        if (elem != NULL) {
            do {
                struct cx_hash_map_element_s *next = elem->next;
                // free the key data
                cxFree(map->allocator, elem->key.data);
                // free the node
                cxFree(map->allocator, elem);
                // proceed
                elem = next;
            } while (elem != NULL);

            // do not leave a dangling pointer
            hash_map->buckets[i] = NULL;
        }
    }
    map->size = 0;
}

static void cx_hash_map_destructor(struct cx_map_s *map) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;

    // free the buckets
    cx_hash_map_clear(map);
    cxFree(map->allocator, hash_map->buckets);

    // free the map structure
    cxFree(map->allocator, map);
}

static int cx_hash_map_put(
        CxMap *map,
        CxDataPtr key,
        void *value
) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    CxAllocator *allocator = map->allocator;

    unsigned hash = cx_hash_map_murmur(key.data, key.len);

    size_t slot = hash % hash_map->bucket_count;
    struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
    struct cx_hash_map_element_s *prev = NULL;

    while (elm != NULL && elm->key.hash < hash) {
        prev = elm;
        elm = elm->next;
    }

    if (elm == NULL || elm->key.hash != hash) {
        struct cx_hash_map_element_s *e = cxMalloc(allocator, sizeof(struct cx_hash_map_element_s));
        if (e == NULL) {
            return -1;
        }

        // write the value
        // TODO: depending on future map features, we may want to copy here
        e->data = value;

        // copy the key
        void *kd = cxMalloc(allocator, key.len);
        if (kd == NULL) {
            return -1;
        }
        memcpy(kd, key.data, key.len);
        e->key.data = kd;
        e->key.len = key.len;
        e->key.hash = hash;

        // insert the element into the linked list
        if (prev == NULL) {
            hash_map->buckets[slot] = e;
        } else {
            prev->next = e;
        }
        e->next = elm;

        // increase the size
        map->size++;
    } else {
        // (elem != NULL && elem->key.hash == hash) - overwrite value of existing element
        elm->data = value;
    }

    return 0;
}

static void cx_hash_map_unlink(
        struct cx_hash_map_s *hash_map,
        size_t slot,
        struct cx_hash_map_element_s *prev,
        struct cx_hash_map_element_s *elm
) {
    // unlink
    if (prev == NULL) {
        hash_map->buckets[slot] = elm->next;
    } else {
        prev->next = elm->next;
    }
    // free element
    cxFree(hash_map->base.allocator, elm->key.data);
    cxFree(hash_map->base.allocator, elm);
    // decrease size
    hash_map->base.size--;
}

/**
 * Helper function to avoid code duplication.
 *
 * @param map the map
 * @param key the key to look up
 * @param remove flag indicating whether the looked up entry shall be removed
 * @return the value corresponding to the key or \c NULL
 */
static void *cx_hash_map_get_remove(
        CxMap *map,
        CxDataPtr key,
        bool remove
) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;

    unsigned hash = cx_hash_map_murmur(key.data, key.len);

    size_t slot = hash % hash_map->bucket_count;
    struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
    struct cx_hash_map_element_s *prev = NULL;
    while (elm && elm->key.hash <= hash) {
        if (elm->key.hash == hash && elm->key.len == key.len) {
            if (memcmp(elm->key.data, key.data, key.len) == 0) {
                void *data = elm->data;
                if (remove) {
                    cx_hash_map_unlink(hash_map, slot, prev, elm);
                }
                return data;
            }
        }
        prev = elm;
        elm = prev->next;
    }

    return NULL;
}

static void *cx_hash_map_get(
        CxMap const *map,
        CxDataPtr key
) {
    // we can safely cast, because we know when remove=false, the map stays untouched
    return cx_hash_map_get_remove((CxMap *) map, key, false);
}

static void *cx_hash_map_remove(
        CxMap *map,
        CxDataPtr key
) {
    return cx_hash_map_get_remove(map, key, true);
}

static void *cx_hash_map_iter_current_entry(CxIterator const *iter) {
    // struct has to have a compatible signature
    struct cx_map_entry_s *entry = (struct cx_map_entry_s *) &(iter->kv_data);
    return entry;
}

static void *cx_hash_map_iter_current_key(CxIterator const *iter) {
    struct cx_hash_map_element_s *elm = iter->elem_handle;
    return &elm->key;
}

static void *cx_hash_map_iter_current_value(CxIterator const *iter) {
    struct cx_hash_map_element_s *elm = iter->elem_handle;
    // TODO: return a pointer to data if this map is storing copies
    return elm->data;
}

static bool cx_hash_map_iter_valid(CxIterator const *iter) {
    return iter->elem_handle != NULL;
}

static void cx_hash_map_iter_next(CxIterator *iter) {
    struct cx_hash_map_s *map = iter->src_handle;
    struct cx_hash_map_element_s *elm = iter->elem_handle;

    // remove current element, if asked
    if (iter->remove) {
        // clear the flag
        iter->remove = false;

        // determine the next element
        struct cx_hash_map_element_s *next = elm->next;

        // search the previous element
        struct cx_hash_map_element_s *prev = NULL;
        if (map->buckets[iter->slot] != elm) {
            prev = map->buckets[iter->slot];
            while (prev->next != elm) {
                prev = prev->next;
            }
        }

        // unlink
        cx_hash_map_unlink(map, iter->slot, prev, elm);

        // advance
        elm = next;
    } else {
        // just advance
        elm = elm->next;
    }

    // do we leave the bucket?
    if (elm == NULL) {
        // search the next bucket
        for (; elm == NULL && iter->slot < map->bucket_count; iter->slot++) {
            elm = map->buckets[iter->slot];
        }
    }

    // advance the index in any case
    iter->index++;

    // fill the struct with the current element
    iter->elem_handle = elm;
    if (elm == NULL) {
        iter->kv_data.key = NULL;
        iter->kv_data.value = NULL;
    } else {
        iter->kv_data.key = &elm->key;
        // TODO: pointer to data if this map is storing copies
        iter->kv_data.value = elm->data;
    }
}

static CxIterator cx_hash_map_iterator(CxMap *map) {
    CxIterator iter;

    iter.src_handle = map;
    iter.valid = cx_hash_map_iter_valid;
    iter.next = cx_hash_map_iter_next;
    iter.current = cx_hash_map_iter_current_entry;

    iter.slot = 0;
    iter.index = 0;
    iter.remove = false;

    if (map->size > 0) {
        struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
        struct cx_hash_map_element_s *elm = NULL;
        for (; elm == NULL; iter.slot++) {
            elm = hash_map->buckets[iter.slot];
        }
        iter.elem_handle = elm;
        iter.kv_data.key = &elm->key;
        // TODO: pointer to data if this map is storing copies
        iter.kv_data.value = elm->data;
    } else {
        iter.elem_handle = NULL;
        iter.kv_data.key = NULL;
        iter.kv_data.value = NULL;
    }

    return iter;
}

static CxIterator cx_hash_map_iterator_keys(CxMap *map) {
    CxIterator iter = cx_hash_map_iterator(map);
    iter.current = cx_hash_map_iter_current_key;
    return iter;
}

static CxIterator cx_hash_map_iterator_values(CxMap *map) {
    CxIterator iter = cx_hash_map_iterator(map);
    iter.current = cx_hash_map_iter_current_value;
    return iter;
}

static cx_map_class cx_hash_map_class = {
        cx_hash_map_destructor,
        cx_hash_map_clear,
        cx_hash_map_put,
        cx_hash_map_get,
        cx_hash_map_remove,
        cx_hash_map_iterator,
        cx_hash_map_iterator_keys,
        cx_hash_map_iterator_values,
};

CxMap *cxHashMapCreate(
        CxAllocator *allocator,
        size_t buckets
) {
    if (buckets == 0) {
        // implementation defined default
        buckets = 16;
    }

    struct cx_hash_map_s *map = cxMalloc(allocator, sizeof(struct cx_hash_map_s));
    if (map == NULL) return NULL;

    // initialize hash map members
    map->bucket_count = buckets;
    map->buckets = cxCalloc(allocator, buckets, sizeof(struct cx_hash_map_element_s *));
    if (map->buckets == NULL) {
        cxFree(allocator, map);
        return NULL;
    }

    // initialize base members
    map->base.cl = &cx_hash_map_class;
    map->base.allocator = allocator;
    map->base.size = 0;

    return (CxMap *) map;
}

mercurial