Thu, 19 May 2022 14:30:20 +0200
#189 basic map implementation
src/cx/hash_map.h | file | annotate | diff | comparison | revisions | |
src/cx/map.h | file | annotate | diff | comparison | revisions | |
src/hash_map.c | file | annotate | diff | comparison | revisions |
--- a/src/cx/hash_map.h Wed May 18 16:26:32 2022 +0200 +++ b/src/cx/hash_map.h Thu May 19 14:30:20 2022 +0200 @@ -46,7 +46,11 @@ /** Internal structure for a key within a hash map. */ struct cx_hash_map_key_s { /** The key data. */ - CxDataPtr data; + unsigned char *data; + /** + * The key data length. + */ + size_t len; /** The hash value of the key data. */ unsigned hash; }; @@ -63,14 +67,39 @@ struct cx_hash_map_key_s key; }; +/** + * Internal structure for a hash map. + */ +struct cx_hash_map_s { + /** + * Base structure for maps. + */ + struct cx_map_s base; + /** + * The buckets of this map, each containing a linked list of elements. + */ + struct cx_hash_map_element_s **buckets; + /** + * The number of buckets. + */ + size_t bucket_count; +}; + /** - * Creates a new hash map with the specified size using a UcxAllocator. + * Creates a new hash map with the specified number of buckets. + * + * If \p buckets is zero, an implementation defined default will be used. + * + * @note Iterators provided by this hash map implementation do provide the remove operation, because + * a remove never causes an immediate rehashing. The iterators are also position-aware in the sense + * that the index is initialized with zero and incremented when the iterator advances. * * @param allocator the allocator to use * @param buckets the initial number of buckets in this hash map * @return a pointer to the new hash map */ +__attribute__((__nonnull__, __warn_unused_result__)) CxMap *cxHashMapCreate( CxAllocator *allocator, size_t buckets
--- a/src/cx/map.h Wed May 18 16:26:32 2022 +0200 +++ b/src/cx/map.h Thu May 19 14:30:20 2022 +0200 @@ -88,7 +88,7 @@ int (*put)( CxMap *map, CxDataPtr key, - void const *value + void *value ); /** @@ -105,7 +105,7 @@ */ __attribute__((__nonnull__, __warn_unused_result__)) void *(*remove)( - CxMap const *map, + CxMap *map, CxDataPtr key ); @@ -177,7 +177,7 @@ static inline int cxMapPut( CxMap *map, CxDataPtr key, - void const *value + void *value ) { return map->cl->put(map, key, value); }
--- a/src/hash_map.c Wed May 18 16:26:32 2022 +0200 +++ b/src/hash_map.c Thu May 19 14:30:20 2022 +0200 @@ -26,8 +26,10 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <errno.h> #include <string.h> #include "cx/hash_map.h" +#include "cx/utils.h" /** * Computes a murmur hash-2. @@ -83,23 +85,211 @@ return h; } +static void cx_hash_map_clear(struct cx_map_s *map) { + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; + cx_for_n(i, hash_map->bucket_count) { + struct cx_hash_map_element_s *elem = hash_map->buckets[i]; + if (elem != NULL) { + do { + struct cx_hash_map_element_s *next = elem->next; + // free the key data + cxFree(map->allocator, elem->key.data); + // free the node + cxFree(map->allocator, elem); + // proceed + elem = next; + } while (elem != NULL); + + // do not leave a dangling pointer + hash_map->buckets[i] = NULL; + } + } + map->size = 0; +} + +static void cx_hash_map_destructor(struct cx_map_s *map) { + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; + + // free the buckets + cx_hash_map_clear(map); + cxFree(map->allocator, hash_map->buckets); + + // free the map structure + cxFree(map->allocator, map); +} + +static int cx_hash_map_put( + CxMap *map, + CxDataPtr key, + void *value +) { + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; + CxAllocator *allocator = map->allocator; + + unsigned hash = cx_hash_map_murmur(key.data, key.len); + + size_t slot = hash % hash_map->bucket_count; + struct cx_hash_map_element_s *elm = hash_map->buckets[slot]; + struct cx_hash_map_element_s *prev = NULL; + + while (elm != NULL && elm->key.hash < hash) { + prev = elm; + elm = elm->next; + } + + if (elm == NULL || elm->key.hash != hash) { + struct cx_hash_map_element_s *e = cxMalloc(allocator, sizeof(struct cx_hash_map_element_s)); + if (e == NULL) { + return -1; + } + + // write the value + // TODO: depending on future map features, we may want to copy here + e->data = value; + + // copy the key + void *kd = cxMalloc(allocator, key.len); + if (kd == NULL) { + return -1; + } + memcpy(kd, key.data, key.len); + e->key.data = kd; + e->key.len = key.len; + e->key.hash = hash; + + // insert the element into the linked list + if (prev == NULL) { + hash_map->buckets[slot] = e; + } else { + prev->next = e; + } + e->next = elm; + + // increase the size + map->size++; + } else { + // (elem != NULL && elem->key.hash == hash) - overwrite value of existing element + elm->data = value; + } + + return 0; +} /** - * Creates a hash map key based on the given data. - * - * This function implicitly computes the hash. + * Helper function to avoid code duplication. * - * @attention The data will be copied to the key structure. - * - * @param data the data for the key - * @return the computed key + * @param map the map + * @param key the key to look up + * @param remove flag indicating whether the looked up entry shall be removed + * @return the value corresponding to the key or \c NULL */ -static struct cx_hash_map_key_s cx_hash_map_key(CxDataPtr data) { - unsigned char *target = malloc(data.len); - memcpy(target, data.data, data.len); - struct cx_hash_map_key_s key; - key.data.data = target; - key.data.len = data.len; - key.hash = cx_hash_map_murmur(target, data.len); - return key; +static void *cx_hash_map_get_remove( + CxMap *map, + CxDataPtr key, + bool remove +) { + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; + + unsigned hash = cx_hash_map_murmur(key.data, key.len); + + size_t slot = hash % hash_map->bucket_count; + struct cx_hash_map_element_s *elm = hash_map->buckets[slot]; + struct cx_hash_map_element_s *prev = NULL; + while (elm && elm->key.hash <= hash) { + if (elm->key.hash == hash && elm->key.len == key.len) { + if (memcmp(elm->key.data, key.data, key.len) == 0) { + void *data = elm->data; + if (remove) { + // unlink + if (prev) { + prev->next = elm->next; + } else { + hash_map->buckets[slot] = elm->next; + } + // free element + cxFree(map->allocator, elm->key.data); + cxFree(map->allocator, elm); + // decrease size + map->size--; + } + return data; + } + } + prev = elm; + elm = prev->next; + } + + return NULL; +} + +static void *cx_hash_map_get( + CxMap const *map, + CxDataPtr key +) { + // we can safely cast, because we know when remove=false, the map stays untouched + return cx_hash_map_get_remove((CxMap *) map, key, false); +} + +static void *cx_hash_map_remove( + CxMap *map, + CxDataPtr key +) { + return cx_hash_map_get_remove(map, key, true); } + +static CxIterator cx_hash_map_iterator(CxMap const *map) { + CxIterator iter; + // TODO: initialize iter + return iter; +} + +static CxIterator cx_hash_map_iterator_keys(CxMap const *map) { + CxIterator iter; + // TODO: initialize iter + return iter; +} + +static CxIterator cx_hash_map_iterator_values(CxMap const *map) { + CxIterator iter; + // TODO: initialize iter + return iter; +} + +static cx_map_class cx_hash_map_class = { + cx_hash_map_destructor, + cx_hash_map_clear, + cx_hash_map_put, + cx_hash_map_get, + cx_hash_map_remove, + cx_hash_map_iterator, + cx_hash_map_iterator_keys, + cx_hash_map_iterator_values, +}; + +CxMap *cxHashMapCreate( + CxAllocator *allocator, + size_t buckets +) { + if (buckets == 0) { + // implementation defined default + buckets = 16; + } + + struct cx_hash_map_s *map = cxMalloc(allocator, sizeof(struct cx_hash_map_s)); + if (map == NULL) return NULL; + + // initialize hash map members + map->bucket_count = buckets; + map->buckets = cxCalloc(allocator, buckets, sizeof(struct cx_hash_map_element_s *)); + if (map->buckets == NULL) { + cxFree(allocator, map); + return NULL; + } + + // initialize base members + map->base.cl = &cx_hash_map_class; + map->base.allocator = allocator; + map->base.size = 0; + + return (CxMap *) map; +}