src/hash_map.c

Sun, 26 Oct 2025 13:08:42 +0100

author
Mike Becker <universe@uap-core.de>
date
Sun, 26 Oct 2025 13:08:42 +0100
changeset 1450
09a73312d5ec
parent 1442
244fb8bc3584
permissions
-rw-r--r--

add more test coverage for new map functions and fixes some issues

the "what if dst already contains a key" part did not really work
plus the cxMapListDifference() did not fallback to a default allocator

really resolves #746

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "cx/hash_map.h"

#include <string.h>
#include <assert.h>
#include <errno.h>

struct cx_hash_map_element_s {
    /** A pointer to the next element in the current bucket. */
    struct cx_hash_map_element_s *next;

    /** The corresponding key. */
    CxHashKey key;

    /** The value data. */
    char data[];
};

static void cx_hash_map_clear(struct cx_map_s *map) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    for (size_t i = 0; i < hash_map->bucket_count; i++) {
        struct cx_hash_map_element_s *elem = hash_map->buckets[i];
        if (elem != NULL) {
            do {
                struct cx_hash_map_element_s *next = elem->next;
                // invoke the destructor
                cx_invoke_destructor(map, elem->data);
                // free the key data
                cxFree(map->collection.allocator, (void *) elem->key.data);
                // free the node
                cxFree(map->collection.allocator, elem);
                // proceed
                elem = next;
            } while (elem != NULL);

            // do not leave a dangling pointer
            hash_map->buckets[i] = NULL;
        }
    }
    map->collection.size = 0;
}

static void cx_hash_map_destructor(struct cx_map_s *map) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;

    // free the buckets
    cx_hash_map_clear(map);
    cxFree(map->collection.allocator, hash_map->buckets);

    // free the map structure
    cxFree(map->collection.allocator, map);
}

static void *cx_hash_map_put(
        CxMap *map,
        CxHashKey key,
        void *value
) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    const CxAllocator *allocator = map->collection.allocator;

    uint64_t hash = key.hash;
    if (hash == 0) {
        cx_hash_murmur(&key);
        hash = key.hash;
    }

    size_t slot = hash % hash_map->bucket_count;
    struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
    struct cx_hash_map_element_s *prev = NULL;

    while (elm != NULL && elm->key.hash < hash) {
        prev = elm;
        elm = elm->next;
    }

    if (elm != NULL && cx_hash_key_cmp(&elm->key, &key) == 0) {
        // overwrite existing element, but call destructors first
        cx_invoke_destructor(map, elm->data);
        if (value == NULL) {
            memset(elm->data, 0, map->collection.elem_size);
        } else if (map->collection.store_pointer) {
            memcpy(elm->data, &value, sizeof(void *));
        } else {
            memcpy(elm->data, value, map->collection.elem_size);
        }
    } else {
        // allocate new element
        struct cx_hash_map_element_s *e = cxMalloc(
                allocator,
                sizeof(struct cx_hash_map_element_s) + map->collection.elem_size
        );
        if (e == NULL) return NULL;

        // write the value
        if (value == NULL) {
            memset(e->data, 0, map->collection.elem_size);
        } else if (map->collection.store_pointer) {
            memcpy(e->data, &value, sizeof(void *));
        } else {
            memcpy(e->data, value, map->collection.elem_size);
        }

        // copy the key
        void *kd = cxMalloc(allocator, key.len);
        if (kd == NULL) {
            cxFree(allocator, e);
            return NULL;
        }
        memcpy(kd, key.data, key.len);
        e->key.data = kd;
        e->key.len = key.len;
        e->key.hash = hash;

        // insert the element into the linked list
        if (prev == NULL) {
            hash_map->buckets[slot] = e;
        } else {
            prev->next = e;
        }
        e->next = elm;
        elm = e;

        // increase the size
        map->collection.size++;
    }

    // return pointer to the element
    return elm->data;
}

static void cx_hash_map_unlink(
        struct cx_hash_map_s *hash_map,
        size_t slot,
        struct cx_hash_map_element_s *prev,
        struct cx_hash_map_element_s *elm
) {
    // unlink
    if (prev == NULL) {
        hash_map->buckets[slot] = elm->next;
    } else {
        prev->next = elm->next;
    }
    // free element
    cxFree(hash_map->base.collection.allocator, (void *) elm->key.data);
    cxFree(hash_map->base.collection.allocator, elm);
    // decrease size
    hash_map->base.collection.size--;
}

/**
 * Helper function to avoid code duplication.
 *
 * If @p remove is true, and @p targetbuf is @c NULL, the element
 * will be destroyed when found.
 *
 * If @p remove is true, and @p targetbuf is set, the element will
 * be copied to that buffer and no destructor function is called.
 *
 * If @p remove is false, @p targetbuf must not be non-null and
 * either the pointer, when the map is storing pointers, is copied
 * to the target buffer, or a pointer to the stored object will
 * be copied to the target buffer.
 *
 * @param map the map
 * @param key the key to look up
 * @param targetbuf see description
 * @param remove flag indicating whether the looked up entry shall be removed
 * @return zero, if the key was found, non-zero otherwise
 */
static int cx_hash_map_get_remove(
        CxMap *map,
        CxHashKey key,
        void *targetbuf,
        bool remove
) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;

    uint64_t hash = key.hash;
    if (hash == 0) {
        cx_hash_murmur(&key);
        hash = key.hash;
    }

    size_t slot = hash % hash_map->bucket_count;
    struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
    struct cx_hash_map_element_s *prev = NULL;
    while (elm && elm->key.hash <= hash) {
        if (cx_hash_key_cmp(&elm->key, &key) == 0) {
            if (remove) {
                if (targetbuf == NULL) {
                    cx_invoke_destructor(map, elm->data);
                } else {
                    memcpy(targetbuf, elm->data, map->collection.elem_size);
                }
                cx_hash_map_unlink(hash_map, slot, prev, elm);
            } else {
                assert(targetbuf != NULL);
                void *data = NULL;
                if (map->collection.store_pointer) {
                    data = *(void **) elm->data;
                } else {
                    data = elm->data;
                }
                memcpy(targetbuf, &data, sizeof(void *));
            }
            return 0;
        }
        prev = elm;
        elm = prev->next;
    }

    return 1;
}

static void *cx_hash_map_get(
        const CxMap *map,
        CxHashKey key
) {
    // we can safely cast, because we know the map stays untouched
    void *ptr = NULL;
    int found = cx_hash_map_get_remove((CxMap *) map, key, &ptr, false);
    return found == 0 ? ptr : NULL;
}

static int cx_hash_map_remove(
        CxMap *map,
        CxHashKey key,
        void *targetbuf
) {
    return cx_hash_map_get_remove(map, key, targetbuf, true);
}

static void *cx_hash_map_iter_current_entry(const void *it) {
    const CxMapIterator *iter = it;
    // we have to cast away const, because of the signature
    return (void*) &iter->entry;
}

static void *cx_hash_map_iter_current_key(const void *it) {
    const CxMapIterator *iter = it;
    return (void*) iter->entry.key;
}

static void *cx_hash_map_iter_current_value(const void *it) {
    const CxMapIterator *iter = it;
    return iter->entry.value;
}

static bool cx_hash_map_iter_valid(const void *it) {
    const CxMapIterator *iter = it;
    return iter->elem != NULL;
}

static void cx_hash_map_iter_next(void *it) {
    CxMapIterator *iter = it;
    CxMap *map = iter->map;
    struct cx_hash_map_s *hmap = (struct cx_hash_map_s *) map;
    struct cx_hash_map_element_s *elm = iter->elem;

    // remove current element, if asked
    if (iter->base.remove) {

        // clear the flag
        iter->base.remove = false;

        // determine the next element
        struct cx_hash_map_element_s *next = elm->next;

        // search the previous element
        struct cx_hash_map_element_s *prev = NULL;
        if (hmap->buckets[iter->slot] != elm) {
            prev = hmap->buckets[iter->slot];
            while (prev->next != elm) {
                prev = prev->next;
            }
        }

        // destroy
        cx_invoke_destructor(map, elm->data);

        // unlink
        cx_hash_map_unlink(hmap, iter->slot, prev, elm);
        iter->elem_count--;

        // advance
        elm = next;
    } else {
        // just advance
        elm = elm->next;
        iter->index++;
    }

    // search the next bucket, if required
    while (elm == NULL && ++iter->slot < hmap->bucket_count) {
        elm = hmap->buckets[iter->slot];
    }
    iter->elem = elm;

    // copy data to a location where the iterator can point to
    // we need to do it here, because the iterator function call
    // must not modify the iterator (the parameter is const)
    if (elm != NULL) {
        iter->entry.key = &elm->key;
        if (map->collection.store_pointer) {
            iter->entry.value = *(void **) elm->data;
        } else {
            iter->entry.value = elm->data;
        }
    }
}

static CxMapIterator cx_hash_map_iterator(
        const CxMap *map,
        enum cx_map_iterator_type type
) {
    CxMapIterator iter;

    iter.map = (CxMap*) map;
    iter.elem_count = map->collection.size;

    switch (type) {
        case CX_MAP_ITERATOR_PAIRS:
            iter.elem_size = sizeof(CxMapEntry);
            iter.base.current = cx_hash_map_iter_current_entry;
            break;
        case CX_MAP_ITERATOR_KEYS:
            iter.elem_size = sizeof(CxHashKey);
            iter.base.current = cx_hash_map_iter_current_key;
            break;
        case CX_MAP_ITERATOR_VALUES:
            iter.elem_size = map->collection.elem_size;
            iter.base.current = cx_hash_map_iter_current_value;
            break;
        default:
            assert(false); // LCOV_EXCL_LINE
    }

    iter.base.valid = cx_hash_map_iter_valid;
    iter.base.next = cx_hash_map_iter_next;
    iter.base.remove = false;
    iter.base.allow_remove = true;

    iter.slot = 0;
    iter.index = 0;

    if (map->collection.size > 0) {
        struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
        struct cx_hash_map_element_s *elm = hash_map->buckets[0];
        while (elm == NULL) {
            elm = hash_map->buckets[++iter.slot];
        }
        iter.elem = elm;
        iter.entry.key = &elm->key;
        if (map->collection.store_pointer) {
            iter.entry.value = *(void **) elm->data;
        } else {
            iter.entry.value = elm->data;
        }
    } else {
        iter.elem = NULL;
    }

    return iter;
}

static cx_map_class cx_hash_map_class = {
        cx_hash_map_destructor,
        cx_hash_map_clear,
        cx_hash_map_put,
        cx_hash_map_get,
        cx_hash_map_remove,
        cx_hash_map_iterator,
};

CxMap *cxHashMapCreate(
        const CxAllocator *allocator,
        size_t itemsize,
        size_t buckets
) {
    if (allocator == NULL) {
        allocator = cxDefaultAllocator;
    }

    if (buckets == 0) {
        // implementation defined default
        buckets = 16;
    }

    struct cx_hash_map_s *map = cxCalloc(allocator, 1,
                                         sizeof(struct cx_hash_map_s));
    if (map == NULL) return NULL;

    // initialize hash map members
    map->bucket_count = buckets;
    map->buckets = cxCalloc(allocator, buckets,
                            sizeof(struct cx_hash_map_element_s *));
    if (map->buckets == NULL) { // LCOV_EXCL_START
        cxFree(allocator, map);
        return NULL;
    } // LCOV_EXCL_STOP

    // initialize base members
    map->base.cl = &cx_hash_map_class;
    map->base.collection.allocator = allocator;

    if (itemsize > 0) {
        map->base.collection.elem_size = itemsize;
    } else {
        map->base.collection.elem_size = sizeof(void *);
        map->base.collection.store_pointer = true;
    }

    return (CxMap *) map;
}

int cxMapRehash(CxMap *map) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    if (map->collection.size > ((hash_map->bucket_count * 3) >> 2)) {

        size_t new_bucket_count = (map->collection.size * 5) >> 1;
        if (new_bucket_count < hash_map->bucket_count) {
            errno = EOVERFLOW;
            return 1;
        }
        struct cx_hash_map_element_s **new_buckets = cxCalloc(
                map->collection.allocator,
                new_bucket_count, sizeof(struct cx_hash_map_element_s *)
        );

        if (new_buckets == NULL) return 1;

        // iterate through the elements and assign them to their new slots
        for (size_t slot = 0; slot < hash_map->bucket_count; slot++) {
            struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
            while (elm != NULL) {
                struct cx_hash_map_element_s *next = elm->next;
                size_t new_slot = elm->key.hash % new_bucket_count;

                // find position where to insert
                struct cx_hash_map_element_s *bucket_next = new_buckets[new_slot];
                struct cx_hash_map_element_s *bucket_prev = NULL;
                while (bucket_next != NULL &&
                       bucket_next->key.hash < elm->key.hash) {
                    bucket_prev = bucket_next;
                    bucket_next = bucket_next->next;
                }

                // insert
                if (bucket_prev == NULL) {
                    elm->next = new_buckets[new_slot];
                    new_buckets[new_slot] = elm;
                } else {
                    bucket_prev->next = elm;
                    elm->next = bucket_next;
                }

                // advance
                elm = next;
            }
        }

        // assign result to the map
        hash_map->bucket_count = new_bucket_count;
        cxFree(map->collection.allocator, hash_map->buckets);
        hash_map->buckets = new_buckets;
    }
    return 0;
}

mercurial