#189 basic map implementation

2022-05-19

author
Mike Becker <universe@uap-core.de>
date
Thu, 19 May 2022 14:30:20 +0200 (2022-05-19)
changeset 550
89b2a83728b1
parent 549
d7f0b5a9a985
child 551
2946e13c89a4

#189 basic map implementation

src/cx/hash_map.h file | annotate | diff | comparison | revisions
src/cx/map.h file | annotate | diff | comparison | revisions
src/hash_map.c file | annotate | diff | comparison | revisions
--- a/src/cx/hash_map.h	Wed May 18 16:26:32 2022 +0200
+++ b/src/cx/hash_map.h	Thu May 19 14:30:20 2022 +0200
@@ -46,7 +46,11 @@
 /** Internal structure for a key within a hash map. */
 struct cx_hash_map_key_s {
     /** The key data. */
-    CxDataPtr data;
+    unsigned char *data;
+    /**
+     * The key data length.
+     */
+    size_t len;
     /** The hash value of the key data. */
     unsigned hash;
 };
@@ -63,14 +67,39 @@
     struct cx_hash_map_key_s key;
 };
 
+/**
+ * Internal structure for a hash map.
+ */
+struct cx_hash_map_s {
+    /**
+     * Base structure for maps.
+     */
+    struct cx_map_s base;
+    /**
+     * The buckets of this map, each containing a linked list of elements.
+     */
+    struct cx_hash_map_element_s **buckets;
+    /**
+     * The number of buckets.
+     */
+    size_t bucket_count;
+};
+
 
 /**
- * Creates a new hash map with the specified size using a UcxAllocator.
+ * Creates a new hash map with the specified number of buckets.
+ *
+ * If \p buckets is zero, an implementation defined default will be used.
+ *
+ * @note Iterators provided by this hash map implementation do provide the remove operation, because
+ * a remove never causes an immediate rehashing. The iterators are also position-aware in the sense
+ * that the index is initialized with zero and incremented when the iterator advances.
  *
  * @param allocator the allocator to use
  * @param buckets the initial number of buckets in this hash map
  * @return a pointer to the new hash map
  */
+__attribute__((__nonnull__, __warn_unused_result__))
 CxMap *cxHashMapCreate(
         CxAllocator *allocator,
         size_t buckets
--- a/src/cx/map.h	Wed May 18 16:26:32 2022 +0200
+++ b/src/cx/map.h	Thu May 19 14:30:20 2022 +0200
@@ -88,7 +88,7 @@
     int (*put)(
             CxMap *map,
             CxDataPtr key,
-            void const *value
+            void *value
     );
 
     /**
@@ -105,7 +105,7 @@
      */
     __attribute__((__nonnull__, __warn_unused_result__))
     void *(*remove)(
-            CxMap const *map,
+            CxMap *map,
             CxDataPtr key
     );
 
@@ -177,7 +177,7 @@
 static inline int cxMapPut(
         CxMap *map,
         CxDataPtr key,
-        void const *value
+        void *value
 ) {
     return map->cl->put(map, key, value);
 }
--- a/src/hash_map.c	Wed May 18 16:26:32 2022 +0200
+++ b/src/hash_map.c	Thu May 19 14:30:20 2022 +0200
@@ -26,8 +26,10 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <errno.h>
 #include <string.h>
 #include "cx/hash_map.h"
+#include "cx/utils.h"
 
 /**
  * Computes a murmur hash-2.
@@ -83,23 +85,211 @@
     return h;
 }
 
+static void cx_hash_map_clear(struct cx_map_s *map) {
+    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
+    cx_for_n(i, hash_map->bucket_count) {
+        struct cx_hash_map_element_s *elem = hash_map->buckets[i];
+        if (elem != NULL) {
+            do {
+                struct cx_hash_map_element_s *next = elem->next;
+                // free the key data
+                cxFree(map->allocator, elem->key.data);
+                // free the node
+                cxFree(map->allocator, elem);
+                // proceed
+                elem = next;
+            } while (elem != NULL);
+
+            // do not leave a dangling pointer
+            hash_map->buckets[i] = NULL;
+        }
+    }
+    map->size = 0;
+}
+
+static void cx_hash_map_destructor(struct cx_map_s *map) {
+    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
+
+    // free the buckets
+    cx_hash_map_clear(map);
+    cxFree(map->allocator, hash_map->buckets);
+
+    // free the map structure
+    cxFree(map->allocator, map);
+}
+
+static int cx_hash_map_put(
+        CxMap *map,
+        CxDataPtr key,
+        void *value
+) {
+    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
+    CxAllocator *allocator = map->allocator;
+
+    unsigned hash = cx_hash_map_murmur(key.data, key.len);
+
+    size_t slot = hash % hash_map->bucket_count;
+    struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
+    struct cx_hash_map_element_s *prev = NULL;
+
+    while (elm != NULL && elm->key.hash < hash) {
+        prev = elm;
+        elm = elm->next;
+    }
+
+    if (elm == NULL || elm->key.hash != hash) {
+        struct cx_hash_map_element_s *e = cxMalloc(allocator, sizeof(struct cx_hash_map_element_s));
+        if (e == NULL) {
+            return -1;
+        }
+
+        // write the value
+        // TODO: depending on future map features, we may want to copy here
+        e->data = value;
+
+        // copy the key
+        void *kd = cxMalloc(allocator, key.len);
+        if (kd == NULL) {
+            return -1;
+        }
+        memcpy(kd, key.data, key.len);
+        e->key.data = kd;
+        e->key.len = key.len;
+        e->key.hash = hash;
+
+        // insert the element into the linked list
+        if (prev == NULL) {
+            hash_map->buckets[slot] = e;
+        } else {
+            prev->next = e;
+        }
+        e->next = elm;
+
+        // increase the size
+        map->size++;
+    } else {
+        // (elem != NULL && elem->key.hash == hash) - overwrite value of existing element
+        elm->data = value;
+    }
+
+    return 0;
+}
 
 /**
- * Creates a hash map key based on the given data.
- *
- * This function implicitly computes the hash.
+ * Helper function to avoid code duplication.
  *
- * @attention The data will be copied to the key structure.
- *
- * @param data the data for the key
- * @return the computed key
+ * @param map the map
+ * @param key the key to look up
+ * @param remove flag indicating whether the looked up entry shall be removed
+ * @return the value corresponding to the key or \c NULL
  */
-static struct cx_hash_map_key_s cx_hash_map_key(CxDataPtr data) {
-    unsigned char *target = malloc(data.len);
-    memcpy(target, data.data, data.len);
-    struct cx_hash_map_key_s key;
-    key.data.data = target;
-    key.data.len = data.len;
-    key.hash = cx_hash_map_murmur(target, data.len);
-    return key;
+static void *cx_hash_map_get_remove(
+        CxMap *map,
+        CxDataPtr key,
+        bool remove
+) {
+    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
+
+    unsigned hash = cx_hash_map_murmur(key.data, key.len);
+
+    size_t slot = hash % hash_map->bucket_count;
+    struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
+    struct cx_hash_map_element_s *prev = NULL;
+    while (elm && elm->key.hash <= hash) {
+        if (elm->key.hash == hash && elm->key.len == key.len) {
+            if (memcmp(elm->key.data, key.data, key.len) == 0) {
+                void *data = elm->data;
+                if (remove) {
+                    // unlink
+                    if (prev) {
+                        prev->next = elm->next;
+                    } else {
+                        hash_map->buckets[slot] = elm->next;
+                    }
+                    // free element
+                    cxFree(map->allocator, elm->key.data);
+                    cxFree(map->allocator, elm);
+                    // decrease size
+                    map->size--;
+                }
+                return data;
+            }
+        }
+        prev = elm;
+        elm = prev->next;
+    }
+
+    return NULL;
+}
+
+static void *cx_hash_map_get(
+        CxMap const *map,
+        CxDataPtr key
+) {
+    // we can safely cast, because we know when remove=false, the map stays untouched
+    return cx_hash_map_get_remove((CxMap *) map, key, false);
+}
+
+static void *cx_hash_map_remove(
+        CxMap *map,
+        CxDataPtr key
+) {
+    return cx_hash_map_get_remove(map, key, true);
 }
+
+static CxIterator cx_hash_map_iterator(CxMap const *map) {
+    CxIterator iter;
+    // TODO: initialize iter
+    return iter;
+}
+
+static CxIterator cx_hash_map_iterator_keys(CxMap const *map) {
+    CxIterator iter;
+    // TODO: initialize iter
+    return iter;
+}
+
+static CxIterator cx_hash_map_iterator_values(CxMap const *map) {
+    CxIterator iter;
+    // TODO: initialize iter
+    return iter;
+}
+
+static cx_map_class cx_hash_map_class = {
+        cx_hash_map_destructor,
+        cx_hash_map_clear,
+        cx_hash_map_put,
+        cx_hash_map_get,
+        cx_hash_map_remove,
+        cx_hash_map_iterator,
+        cx_hash_map_iterator_keys,
+        cx_hash_map_iterator_values,
+};
+
+CxMap *cxHashMapCreate(
+        CxAllocator *allocator,
+        size_t buckets
+) {
+    if (buckets == 0) {
+        // implementation defined default
+        buckets = 16;
+    }
+
+    struct cx_hash_map_s *map = cxMalloc(allocator, sizeof(struct cx_hash_map_s));
+    if (map == NULL) return NULL;
+
+    // initialize hash map members
+    map->bucket_count = buckets;
+    map->buckets = cxCalloc(allocator, buckets, sizeof(struct cx_hash_map_element_s *));
+    if (map->buckets == NULL) {
+        cxFree(allocator, map);
+        return NULL;
+    }
+
+    // initialize base members
+    map->base.cl = &cx_hash_map_class;
+    map->base.allocator = allocator;
+    map->base.size = 0;
+
+    return (CxMap *) map;
+}

mercurial