src/hash_map.c

Fri, 23 May 2025 12:44:24 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 23 May 2025 12:44:24 +0200
changeset 1327
ed75dc1db503
parent 1115
6db21dee4929
permissions
-rw-r--r--

make test-compile depend on both static and shared

the shared lib is not needed for the tests,
but when run with coverage, gcov will be confused
when outdated line information is available from
a previous shared build

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "cx/hash_map.h"

#include <string.h>
#include <assert.h>
#include <errno.h>

struct cx_hash_map_element_s {
    /** A pointer to the next element in the current bucket. */
    struct cx_hash_map_element_s *next;

    /** The corresponding key. */
    CxHashKey key;

    /** The value data. */
    char data[];
};

static void cx_hash_map_clear(struct cx_map_s *map) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    for (size_t i = 0; i < hash_map->bucket_count; i++) {
        struct cx_hash_map_element_s *elem = hash_map->buckets[i];
        if (elem != NULL) {
            do {
                struct cx_hash_map_element_s *next = elem->next;
                // invoke the destructor
                cx_invoke_destructor(map, elem->data);
                // free the key data
                cxFree(map->collection.allocator, (void *) elem->key.data);
                // free the node
                cxFree(map->collection.allocator, elem);
                // proceed
                elem = next;
            } while (elem != NULL);

            // do not leave a dangling pointer
            hash_map->buckets[i] = NULL;
        }
    }
    map->collection.size = 0;
}

static void cx_hash_map_destructor(struct cx_map_s *map) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;

    // free the buckets
    cx_hash_map_clear(map);
    cxFree(map->collection.allocator, hash_map->buckets);

    // free the map structure
    cxFree(map->collection.allocator, map);
}

static int cx_hash_map_put(
        CxMap *map,
        CxHashKey key,
        void *value
) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    const CxAllocator *allocator = map->collection.allocator;

    unsigned hash = key.hash;
    if (hash == 0) {
        cx_hash_murmur(&key);
        hash = key.hash;
    }

    size_t slot = hash % hash_map->bucket_count;
    struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
    struct cx_hash_map_element_s *prev = NULL;

    while (elm != NULL && elm->key.hash < hash) {
        prev = elm;
        elm = elm->next;
    }

    if (elm != NULL && elm->key.hash == hash && elm->key.len == key.len &&
        memcmp(elm->key.data, key.data, key.len) == 0) {
        // overwrite existing element, but call destructors first
        cx_invoke_destructor(map, elm->data);
        if (map->collection.store_pointer) {
            memcpy(elm->data, &value, sizeof(void *));
        } else {
            memcpy(elm->data, value, map->collection.elem_size);
        }
    } else {
        // allocate new element
        struct cx_hash_map_element_s *e = cxMalloc(
                allocator,
                sizeof(struct cx_hash_map_element_s) + map->collection.elem_size
        );
        if (e == NULL) return -1;

        // write the value
        if (map->collection.store_pointer) {
            memcpy(e->data, &value, sizeof(void *));
        } else {
            memcpy(e->data, value, map->collection.elem_size);
        }

        // copy the key
        void *kd = cxMalloc(allocator, key.len);
        if (kd == NULL) return -1;
        memcpy(kd, key.data, key.len);
        e->key.data = kd;
        e->key.len = key.len;
        e->key.hash = hash;

        // insert the element into the linked list
        if (prev == NULL) {
            hash_map->buckets[slot] = e;
        } else {
            prev->next = e;
        }
        e->next = elm;

        // increase the size
        map->collection.size++;
    }

    return 0;
}

static void cx_hash_map_unlink(
        struct cx_hash_map_s *hash_map,
        size_t slot,
        struct cx_hash_map_element_s *prev,
        struct cx_hash_map_element_s *elm
) {
    // unlink
    if (prev == NULL) {
        hash_map->buckets[slot] = elm->next;
    } else {
        prev->next = elm->next;
    }
    // free element
    cxFree(hash_map->base.collection.allocator, (void *) elm->key.data);
    cxFree(hash_map->base.collection.allocator, elm);
    // decrease size
    hash_map->base.collection.size--;
}

/**
 * Helper function to avoid code duplication.
 *
 * If @p remove is true, and @p targetbuf is @c NULL, the element
 * will be destroyed when found.
 *
 * If @p remove is true, and @p targetbuf is set, the element will
 * be copied to that buffer and no destructor function is called.
 *
 * If @p remove is false, @p targetbuf must not be non-null and
 * either the pointer, when the map is storing pointers, is copied
 * to the target buffer, or a pointer to the stored object will
 * be copied to the target buffer.
 *
 * @param map the map
 * @param key the key to look up
 * @param targetbuf see description
 * @param remove flag indicating whether the looked up entry shall be removed
 * @return zero, if the key was found, non-zero otherwise
 */
static int cx_hash_map_get_remove(
        CxMap *map,
        CxHashKey key,
        void *targetbuf,
        bool remove
) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;

    unsigned hash = key.hash;
    if (hash == 0) {
        cx_hash_murmur(&key);
        hash = key.hash;
    }

    size_t slot = hash % hash_map->bucket_count;
    struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
    struct cx_hash_map_element_s *prev = NULL;
    while (elm && elm->key.hash <= hash) {
        if (elm->key.hash == hash && elm->key.len == key.len) {
            if (memcmp(elm->key.data, key.data, key.len) == 0) {
                if (remove) {
                    if (targetbuf == NULL) {
                        cx_invoke_destructor(map, elm->data);
                    } else {
                        memcpy(targetbuf, elm->data, map->collection.elem_size);
                    }
                    cx_hash_map_unlink(hash_map, slot, prev, elm);
                } else {
                    assert(targetbuf != NULL);
                    void *data = NULL;
                    if (map->collection.store_pointer) {
                        data = *(void **) elm->data;
                    } else {
                        data = elm->data;
                    }
                    memcpy(targetbuf, &data, sizeof(void *));
                }
                return 0;
            }
        }
        prev = elm;
        elm = prev->next;
    }

    return 1;
}

static void *cx_hash_map_get(
        const CxMap *map,
        CxHashKey key
) {
    // we can safely cast, because we know the map stays untouched
    void *ptr = NULL;
    int found = cx_hash_map_get_remove((CxMap *) map, key, &ptr, false);
    return found == 0 ? ptr : NULL;
}

static int cx_hash_map_remove(
        CxMap *map,
        CxHashKey key,
        void *targetbuf
) {
    return cx_hash_map_get_remove(map, key, targetbuf, true);
}

static void *cx_hash_map_iter_current_entry(const void *it) {
    const CxMapIterator *iter = it;
    // we have to cast away const, because of the signature
    return (void*) &iter->entry;
}

static void *cx_hash_map_iter_current_key(const void *it) {
    const CxMapIterator *iter = it;
    struct cx_hash_map_element_s *elm = iter->elem;
    return &elm->key;
}

static void *cx_hash_map_iter_current_value(const void *it) {
    const CxMapIterator *iter = it;
    const CxMap *map = iter->map.c;
    struct cx_hash_map_element_s *elm = iter->elem;
    if (map->collection.store_pointer) {
        return *(void **) elm->data;
    } else {
        return elm->data;
    }
}

static bool cx_hash_map_iter_valid(const void *it) {
    const CxMapIterator *iter = it;
    return iter->elem != NULL;
}

static void cx_hash_map_iter_next(void *it) {
    CxMapIterator *iter = it;
    CxMap *map = iter->map.m;
    struct cx_hash_map_s *hmap = (struct cx_hash_map_s *) map;
    struct cx_hash_map_element_s *elm = iter->elem;

    // remove current element, if asked
    if (iter->base.remove) {

        // clear the flag
        iter->base.remove = false;

        // determine the next element
        struct cx_hash_map_element_s *next = elm->next;

        // search the previous element
        struct cx_hash_map_element_s *prev = NULL;
        if (hmap->buckets[iter->slot] != elm) {
            prev = hmap->buckets[iter->slot];
            while (prev->next != elm) {
                prev = prev->next;
            }
        }

        // destroy
        cx_invoke_destructor(map, elm->data);

        // unlink
        cx_hash_map_unlink(hmap, iter->slot, prev, elm);

        // advance
        elm = next;
    } else {
        // just advance
        elm = elm->next;
        iter->index++;
    }

    // search the next bucket, if required
    while (elm == NULL && ++iter->slot < hmap->bucket_count) {
        elm = hmap->buckets[iter->slot];
    }
    iter->elem = elm;

    // copy data to a location where the iterator can point to
    // we need to do it here, because the iterator function call
    // must not modify the iterator (the parameter is const)
    if (elm != NULL) {
        iter->entry.key = &elm->key;
        if (iter->map.c->collection.store_pointer) {
            iter->entry.value = *(void **) elm->data;
        } else {
            iter->entry.value = elm->data;
        }
    }
}

static CxMapIterator cx_hash_map_iterator(
        const CxMap *map,
        enum cx_map_iterator_type type
) {
    CxMapIterator iter;

    iter.map.c = map;
    iter.elem_count = map->collection.size;

    switch (type) {
        case CX_MAP_ITERATOR_PAIRS:
            iter.elem_size = sizeof(CxMapEntry);
            iter.base.current = cx_hash_map_iter_current_entry;
            break;
        case CX_MAP_ITERATOR_KEYS:
            iter.elem_size = sizeof(CxHashKey);
            iter.base.current = cx_hash_map_iter_current_key;
            break;
        case CX_MAP_ITERATOR_VALUES:
            iter.elem_size = map->collection.elem_size;
            iter.base.current = cx_hash_map_iter_current_value;
            break;
        default:
            assert(false); // LCOV_EXCL_LINE
    }

    iter.base.valid = cx_hash_map_iter_valid;
    iter.base.next = cx_hash_map_iter_next;
    iter.base.remove = false;
    iter.base.mutating = false;

    iter.slot = 0;
    iter.index = 0;

    if (map->collection.size > 0) {
        struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
        struct cx_hash_map_element_s *elm = hash_map->buckets[0];
        while (elm == NULL) {
            elm = hash_map->buckets[++iter.slot];
        }
        iter.elem = elm;
        iter.entry.key = &elm->key;
        if (map->collection.store_pointer) {
            iter.entry.value = *(void **) elm->data;
        } else {
            iter.entry.value = elm->data;
        }
    } else {
        iter.elem = NULL;
    }

    return iter;
}

static cx_map_class cx_hash_map_class = {
        cx_hash_map_destructor,
        cx_hash_map_clear,
        cx_hash_map_put,
        cx_hash_map_get,
        cx_hash_map_remove,
        cx_hash_map_iterator,
};

CxMap *cxHashMapCreate(
        const CxAllocator *allocator,
        size_t itemsize,
        size_t buckets
) {
    if (allocator == NULL) {
        allocator = cxDefaultAllocator;
    }

    if (buckets == 0) {
        // implementation defined default
        buckets = 16;
    }

    struct cx_hash_map_s *map = cxCalloc(allocator, 1,
                                         sizeof(struct cx_hash_map_s));
    if (map == NULL) return NULL;

    // initialize hash map members
    map->bucket_count = buckets;
    map->buckets = cxCalloc(allocator, buckets,
                            sizeof(struct cx_hash_map_element_s *));
    if (map->buckets == NULL) { // LCOV_EXCL_START
        cxFree(allocator, map);
        return NULL;
    } // LCOV_EXCL_STOP

    // initialize base members
    map->base.cl = &cx_hash_map_class;
    map->base.collection.allocator = allocator;

    if (itemsize > 0) {
        map->base.collection.elem_size = itemsize;
    } else {
        map->base.collection.elem_size = sizeof(void *);
        map->base.collection.store_pointer = true;
    }

    return (CxMap *) map;
}

int cxMapRehash(CxMap *map) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    if (map->collection.size > ((hash_map->bucket_count * 3) >> 2)) {

        size_t new_bucket_count = (map->collection.size * 5) >> 1;
        if (new_bucket_count < hash_map->bucket_count) {
            errno = EOVERFLOW;
            return 1;
        }
        struct cx_hash_map_element_s **new_buckets = cxCalloc(
                map->collection.allocator,
                new_bucket_count, sizeof(struct cx_hash_map_element_s *)
        );

        if (new_buckets == NULL) return 1;

        // iterate through the elements and assign them to their new slots
        for (size_t slot = 0; slot < hash_map->bucket_count; slot++) {
            struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
            while (elm != NULL) {
                struct cx_hash_map_element_s *next = elm->next;
                size_t new_slot = elm->key.hash % new_bucket_count;

                // find position where to insert
                struct cx_hash_map_element_s *bucket_next = new_buckets[new_slot];
                struct cx_hash_map_element_s *bucket_prev = NULL;
                while (bucket_next != NULL &&
                       bucket_next->key.hash < elm->key.hash) {
                    bucket_prev = bucket_next;
                    bucket_next = bucket_next->next;
                }

                // insert
                if (bucket_prev == NULL) {
                    elm->next = new_buckets[new_slot];
                    new_buckets[new_slot] = elm;
                } else {
                    bucket_prev->next = elm;
                    elm->next = bucket_next;
                }

                // advance
                elm = next;
            }
        }

        // assign result to the map
        hash_map->bucket_count = new_bucket_count;
        cxFree(map->collection.allocator, hash_map->buckets);
        hash_map->buckets = new_buckets;
    }
    return 0;
}

mercurial