add support for integer keys - resolves #720

Sat, 27 Sep 2025 17:49:13 +0200

author
Mike Becker <universe@uap-core.de>
date
Sat, 27 Sep 2025 17:49:13 +0200
changeset 1400
7bc88ae62755
parent 1399
40c3b850f859
child 1401
a76249f50237

add support for integer keys - resolves #720

docs/Writerside/topics/hash_key.h.md file | annotate | diff | comparison | revisions
src/cx/hash_key.h file | annotate | diff | comparison | revisions
src/hash_key.c file | annotate | diff | comparison | revisions
src/hash_map.c file | annotate | diff | comparison | revisions
--- a/docs/Writerside/topics/hash_key.h.md	Sat Sep 27 17:47:10 2025 +0200
+++ b/docs/Writerside/topics/hash_key.h.md	Sat Sep 27 17:49:13 2025 +0200
@@ -7,17 +7,25 @@
 ```C
 #include <cx/hash_key.h>
 
-void 	  cx_hash_murmur(CxHashKey *key);
+void cx_hash_murmur(CxHashKey *key);
+
+uint32_t cx_hash_u32(uint32_t x);
+uint64_t cx_hash_u64(uint64_t x);
+
 CxHashKey cx_hash_key(const void *obj, size_t len);
 CxHashKey cx_hash_key_str(const char *str);
 CxHashKey cx_hash_key_bytes(const unsigned char *bytes, size_t len);
 CxHashKey cx_hash_key_cxstr(cxstring str);
+CxHashKey cx_hash_key_u32(uint32_t x);
+CxHashKey cx_hash_key_u64(uint64_t x);
+
+int cx_hash_key_cmp(const CxHashKey *left, const CxHashKey *right);
 ``` 
 
 ## Description
 
-The primary function for creating a `CxHashKey` structure is `cx_hash_key()`.
-The other functions do effectively the same, but
+The primary function for creating a `CxHashKey` structure from non-integers is `cx_hash_key()`.
+The other functions effectively do the same, but
 
 * `cx_hash_key_bytes()` is strongly typed if you want to avoid passing `void*` 
 * `cx_hash_key_str()` conveniently takes a C string and computes the length
@@ -25,7 +33,6 @@
 
 In all cases, the hash will be available in the `hash` field of the returned structure.
 
-
 > UCX assigns the hash value `1574210520` to the `NULL` pointer.
 > This is a careful choice which is not standard MurmurHash2 and an extension to support `NULL` pointers.
 
@@ -42,6 +49,14 @@
 cx_hash_murmur(&key);
 ```
 
+Hashes from integers are created more efficiently by mixing up the bits to produce a good statistical distribution.
+The function `cx_hash_u32()` and `cx_hash_u64()` are provided for this purpose and provide collision-free hashes.
+The corresponding functions `cx_hash_key_u32()` and `cx_hash_key_u64()` can be used to create `CxHashKey` structures with those hashes.
+
+> Since integer hashes are collision-free, there is no need to store any `data` in the `CxHashKey` structure. 
+
+Hash keys are compared with `cx_hash_key_cmp()`.
+
 <seealso>
 <category ref="apidoc">
 <a href="https://ucx.sourceforge.io/api/hash__key_8h.html">hash_key.h</a>
--- a/src/cx/hash_key.h	Sat Sep 27 17:47:10 2025 +0200
+++ b/src/cx/hash_key.h	Sat Sep 27 17:49:13 2025 +0200
@@ -46,14 +46,17 @@
 
 /** Internal structure for a key within a hash map. */
 struct cx_hash_key_s {
-    /** The key data. */
+    /**
+     * The key data.
+     * May be NULL when the hash is collision-free.
+     */
     const void *data;
     /**
      * The key data length.
      */
     size_t len;
     /** The hash value of the key data. */
-    unsigned hash;
+    uint64_t hash;
 };
 
 /**
@@ -80,6 +83,48 @@
 void cx_hash_murmur(CxHashKey *key);
 
 /**
+ * Mixes up a 32-bit integer to be used as a hash.
+ *
+ * This function produces no collisions and has a good statistical distribution.
+ *
+ * @param x the integer
+ * @return the hash
+ */
+cx_attr_export
+uint32_t cx_hash_u32(uint32_t x);
+
+/**
+ * Mixes up a 64-bit integer to be used as a hash.
+ *
+ * This function produces no collisions and has a good statistical distribution.
+ *
+ * @param x the integer
+ * @return the hash
+ */
+cx_attr_export
+uint64_t cx_hash_u64(uint64_t x);
+
+/**
+ * Computes a hash key from a 32-bit integer.
+ *
+ * @param x the integer
+ * @return the hash key
+ */
+cx_attr_nodiscard
+cx_attr_export
+CxHashKey cx_hash_key_u32(uint32_t x);
+
+/**
+ * Computes a hash key from a 64-bit integer.
+ *
+ * @param x the integer
+ * @return the hash key
+ */
+cx_attr_nodiscard
+cx_attr_export
+CxHashKey cx_hash_key_u64(uint64_t x);
+
+/**
  * Computes a hash key from a string.
  *
  * The string needs to be zero-terminated.
@@ -145,6 +190,18 @@
  */
 #define cx_hash_key_cxstr(str) cx_hash_key_cxstr(cx_strcast(str))
 
+/**
+ * Compare function for hash keys.
+ *
+ * @param left the first key
+ * @param right the second key
+ * @return zero when the keys equal, non-zero when they differ
+ */
+cx_attr_nodiscard
+cx_attr_nonnull
+cx_attr_export
+int cx_hash_key_cmp(const CxHashKey *left, const CxHashKey *right);
+
 #ifdef __cplusplus
 } // extern "C"
 #endif
--- a/src/hash_key.c	Sat Sep 27 17:47:10 2025 +0200
+++ b/src/hash_key.c	Sat Sep 27 17:49:13 2025 +0200
@@ -27,6 +27,7 @@
  */
 
 #include "cx/hash_key.h"
+#include "cx/compare.h"
 #include <string.h>
 
 void cx_hash_murmur(CxHashKey *key) {
@@ -81,6 +82,21 @@
     key->hash = h;
 }
 
+
+uint32_t cx_hash_u32(uint32_t x) {
+    x = ((x >> 16) ^ x) * 0x45d9f3bu;
+    x = ((x >> 16) ^ x) * 0x45d9f3bu;
+    x = (x >> 16) ^ x;
+    return x;
+}
+
+uint64_t cx_hash_u64(uint64_t x) {
+    x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9);
+    x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb);
+    x = x ^ (x >> 31);
+    return x;
+}
+
 CxHashKey cx_hash_key_str(const char *str) {
     CxHashKey key;
     key.data = str;
@@ -110,3 +126,29 @@
     cx_hash_murmur(&key);
     return key;
 }
+
+CxHashKey cx_hash_key_u32(uint32_t x) {
+    CxHashKey key;
+    key.data = NULL;
+    key.len = 0;
+    key.hash = cx_hash_u32(x);
+    return key;
+}
+
+CxHashKey cx_hash_key_u64(uint64_t x) {
+    CxHashKey key;
+    key.data = NULL;
+    key.len = 0;
+    key.hash = cx_hash_u64(x);
+    return key;
+}
+
+int cx_hash_key_cmp(const CxHashKey *left, const CxHashKey *right) {
+    int d;
+    d = cx_vcmp_uint64(left->hash, right->hash);
+    if (d != 0) return d;
+    d = cx_vcmp_size(left->len, right->len);
+    if (d != 0) return d;
+    if (left->len == 0) return 0;
+    return memcmp(left->data, right->data, left->len);
+}
--- a/src/hash_map.c	Sat Sep 27 17:47:10 2025 +0200
+++ b/src/hash_map.c	Sat Sep 27 17:49:13 2025 +0200
@@ -101,8 +101,7 @@
         elm = elm->next;
     }
 
-    if (elm != NULL && elm->key.hash == hash && elm->key.len == key.len &&
-        memcmp(elm->key.data, key.data, key.len) == 0) {
+    if (elm != NULL && cx_hash_key_cmp(&elm->key, &key) == 0) {
         // overwrite existing element, but call destructors first
         cx_invoke_destructor(map, elm->data);
         if (value == NULL) {
@@ -214,27 +213,25 @@
     struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
     struct cx_hash_map_element_s *prev = NULL;
     while (elm && elm->key.hash <= hash) {
-        if (elm->key.hash == hash && elm->key.len == key.len) {
-            if (memcmp(elm->key.data, key.data, key.len) == 0) {
-                if (remove) {
-                    if (targetbuf == NULL) {
-                        cx_invoke_destructor(map, elm->data);
-                    } else {
-                        memcpy(targetbuf, elm->data, map->collection.elem_size);
-                    }
-                    cx_hash_map_unlink(hash_map, slot, prev, elm);
+        if (cx_hash_key_cmp(&elm->key, &key) == 0) {
+            if (remove) {
+                if (targetbuf == NULL) {
+                    cx_invoke_destructor(map, elm->data);
                 } else {
-                    assert(targetbuf != NULL);
-                    void *data = NULL;
-                    if (map->collection.store_pointer) {
-                        data = *(void **) elm->data;
-                    } else {
-                        data = elm->data;
-                    }
-                    memcpy(targetbuf, &data, sizeof(void *));
+                    memcpy(targetbuf, elm->data, map->collection.elem_size);
                 }
-                return 0;
+                cx_hash_map_unlink(hash_map, slot, prev, elm);
+            } else {
+                assert(targetbuf != NULL);
+                void *data = NULL;
+                if (map->collection.store_pointer) {
+                    data = *(void **) elm->data;
+                } else {
+                    data = elm->data;
+                }
+                memcpy(targetbuf, &data, sizeof(void *));
             }
+            return 0;
         }
         prev = elm;
         elm = prev->next;

mercurial