Sat, 27 Sep 2025 17:49:13 +0200
add support for integer keys - resolves #720
--- a/docs/Writerside/topics/hash_key.h.md Sat Sep 27 17:47:10 2025 +0200 +++ b/docs/Writerside/topics/hash_key.h.md Sat Sep 27 17:49:13 2025 +0200 @@ -7,17 +7,25 @@ ```C #include <cx/hash_key.h> -void cx_hash_murmur(CxHashKey *key); +void cx_hash_murmur(CxHashKey *key); + +uint32_t cx_hash_u32(uint32_t x); +uint64_t cx_hash_u64(uint64_t x); + CxHashKey cx_hash_key(const void *obj, size_t len); CxHashKey cx_hash_key_str(const char *str); CxHashKey cx_hash_key_bytes(const unsigned char *bytes, size_t len); CxHashKey cx_hash_key_cxstr(cxstring str); +CxHashKey cx_hash_key_u32(uint32_t x); +CxHashKey cx_hash_key_u64(uint64_t x); + +int cx_hash_key_cmp(const CxHashKey *left, const CxHashKey *right); ``` ## Description -The primary function for creating a `CxHashKey` structure is `cx_hash_key()`. -The other functions do effectively the same, but +The primary function for creating a `CxHashKey` structure from non-integers is `cx_hash_key()`. +The other functions effectively do the same, but * `cx_hash_key_bytes()` is strongly typed if you want to avoid passing `void*` * `cx_hash_key_str()` conveniently takes a C string and computes the length @@ -25,7 +33,6 @@ In all cases, the hash will be available in the `hash` field of the returned structure. - > UCX assigns the hash value `1574210520` to the `NULL` pointer. > This is a careful choice which is not standard MurmurHash2 and an extension to support `NULL` pointers. @@ -42,6 +49,14 @@ cx_hash_murmur(&key); ``` +Hashes from integers are created more efficiently by mixing up the bits to produce a good statistical distribution. +The function `cx_hash_u32()` and `cx_hash_u64()` are provided for this purpose and provide collision-free hashes. +The corresponding functions `cx_hash_key_u32()` and `cx_hash_key_u64()` can be used to create `CxHashKey` structures with those hashes. + +> Since integer hashes are collision-free, there is no need to store any `data` in the `CxHashKey` structure. + +Hash keys are compared with `cx_hash_key_cmp()`. + <seealso> <category ref="apidoc"> <a href="https://ucx.sourceforge.io/api/hash__key_8h.html">hash_key.h</a>
--- a/src/cx/hash_key.h Sat Sep 27 17:47:10 2025 +0200 +++ b/src/cx/hash_key.h Sat Sep 27 17:49:13 2025 +0200 @@ -46,14 +46,17 @@ /** Internal structure for a key within a hash map. */ struct cx_hash_key_s { - /** The key data. */ + /** + * The key data. + * May be NULL when the hash is collision-free. + */ const void *data; /** * The key data length. */ size_t len; /** The hash value of the key data. */ - unsigned hash; + uint64_t hash; }; /** @@ -80,6 +83,48 @@ void cx_hash_murmur(CxHashKey *key); /** + * Mixes up a 32-bit integer to be used as a hash. + * + * This function produces no collisions and has a good statistical distribution. + * + * @param x the integer + * @return the hash + */ +cx_attr_export +uint32_t cx_hash_u32(uint32_t x); + +/** + * Mixes up a 64-bit integer to be used as a hash. + * + * This function produces no collisions and has a good statistical distribution. + * + * @param x the integer + * @return the hash + */ +cx_attr_export +uint64_t cx_hash_u64(uint64_t x); + +/** + * Computes a hash key from a 32-bit integer. + * + * @param x the integer + * @return the hash key + */ +cx_attr_nodiscard +cx_attr_export +CxHashKey cx_hash_key_u32(uint32_t x); + +/** + * Computes a hash key from a 64-bit integer. + * + * @param x the integer + * @return the hash key + */ +cx_attr_nodiscard +cx_attr_export +CxHashKey cx_hash_key_u64(uint64_t x); + +/** * Computes a hash key from a string. * * The string needs to be zero-terminated. @@ -145,6 +190,18 @@ */ #define cx_hash_key_cxstr(str) cx_hash_key_cxstr(cx_strcast(str)) +/** + * Compare function for hash keys. + * + * @param left the first key + * @param right the second key + * @return zero when the keys equal, non-zero when they differ + */ +cx_attr_nodiscard +cx_attr_nonnull +cx_attr_export +int cx_hash_key_cmp(const CxHashKey *left, const CxHashKey *right); + #ifdef __cplusplus } // extern "C" #endif
--- a/src/hash_key.c Sat Sep 27 17:47:10 2025 +0200 +++ b/src/hash_key.c Sat Sep 27 17:49:13 2025 +0200 @@ -27,6 +27,7 @@ */ #include "cx/hash_key.h" +#include "cx/compare.h" #include <string.h> void cx_hash_murmur(CxHashKey *key) { @@ -81,6 +82,21 @@ key->hash = h; } + +uint32_t cx_hash_u32(uint32_t x) { + x = ((x >> 16) ^ x) * 0x45d9f3bu; + x = ((x >> 16) ^ x) * 0x45d9f3bu; + x = (x >> 16) ^ x; + return x; +} + +uint64_t cx_hash_u64(uint64_t x) { + x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9); + x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb); + x = x ^ (x >> 31); + return x; +} + CxHashKey cx_hash_key_str(const char *str) { CxHashKey key; key.data = str; @@ -110,3 +126,29 @@ cx_hash_murmur(&key); return key; } + +CxHashKey cx_hash_key_u32(uint32_t x) { + CxHashKey key; + key.data = NULL; + key.len = 0; + key.hash = cx_hash_u32(x); + return key; +} + +CxHashKey cx_hash_key_u64(uint64_t x) { + CxHashKey key; + key.data = NULL; + key.len = 0; + key.hash = cx_hash_u64(x); + return key; +} + +int cx_hash_key_cmp(const CxHashKey *left, const CxHashKey *right) { + int d; + d = cx_vcmp_uint64(left->hash, right->hash); + if (d != 0) return d; + d = cx_vcmp_size(left->len, right->len); + if (d != 0) return d; + if (left->len == 0) return 0; + return memcmp(left->data, right->data, left->len); +}
--- a/src/hash_map.c Sat Sep 27 17:47:10 2025 +0200 +++ b/src/hash_map.c Sat Sep 27 17:49:13 2025 +0200 @@ -101,8 +101,7 @@ elm = elm->next; } - if (elm != NULL && elm->key.hash == hash && elm->key.len == key.len && - memcmp(elm->key.data, key.data, key.len) == 0) { + if (elm != NULL && cx_hash_key_cmp(&elm->key, &key) == 0) { // overwrite existing element, but call destructors first cx_invoke_destructor(map, elm->data); if (value == NULL) { @@ -214,27 +213,25 @@ struct cx_hash_map_element_s *elm = hash_map->buckets[slot]; struct cx_hash_map_element_s *prev = NULL; while (elm && elm->key.hash <= hash) { - if (elm->key.hash == hash && elm->key.len == key.len) { - if (memcmp(elm->key.data, key.data, key.len) == 0) { - if (remove) { - if (targetbuf == NULL) { - cx_invoke_destructor(map, elm->data); - } else { - memcpy(targetbuf, elm->data, map->collection.elem_size); - } - cx_hash_map_unlink(hash_map, slot, prev, elm); + if (cx_hash_key_cmp(&elm->key, &key) == 0) { + if (remove) { + if (targetbuf == NULL) { + cx_invoke_destructor(map, elm->data); } else { - assert(targetbuf != NULL); - void *data = NULL; - if (map->collection.store_pointer) { - data = *(void **) elm->data; - } else { - data = elm->data; - } - memcpy(targetbuf, &data, sizeof(void *)); + memcpy(targetbuf, elm->data, map->collection.elem_size); } - return 0; + cx_hash_map_unlink(hash_map, slot, prev, elm); + } else { + assert(targetbuf != NULL); + void *data = NULL; + if (map->collection.store_pointer) { + data = *(void **) elm->data; + } else { + data = elm->data; + } + memcpy(targetbuf, &data, sizeof(void *)); } + return 0; } prev = elm; elm = prev->next;