change hash functions

2022-11-06

author
Mike Becker <universe@uap-core.de>
date
Sun, 06 Nov 2022 16:07:32 +0100 (2022-11-06)
changeset 604
056e5f592d84
parent 603
c49104015a6b
child 605
be5a4902d405

change hash functions

1) for zero-terminated strings, the terminator is no longer included in the hash
2) for NULL there is now a special hash value different from the hash for empty data

src/cx/hash_key.h file | annotate | diff | comparison | revisions
src/hash_key.c file | annotate | diff | comparison | revisions
test/test_hash_key.cpp file | annotate | diff | comparison | revisions
test/test_map.cpp file | annotate | diff | comparison | revisions
--- a/src/cx/hash_key.h	Sun Nov 06 14:46:59 2022 +0100
+++ b/src/cx/hash_key.h	Sun Nov 06 16:07:32 2022 +0100
@@ -69,11 +69,13 @@
 typedef struct cx_hash_key_s CxHashKey;
 
 /**
- * Computes a murmur hash-2.
+ * Computes a murmur2 32 bit hash.
  *
- * You need to initialize data and len in the key struct.
+ * You need to initialize \c data and \c len in the key struct.
  * The hash is then directly written to that struct.
  *
+ * \note If \c data is \c NULL, the hash is defined as 1574210520.
+ *
  * @param key the key, the hash shall be computed for
  */
 void cx_hash_murmur(CxHashKey *key);
--- a/src/hash_key.c	Sun Nov 06 14:46:59 2022 +0100
+++ b/src/hash_key.c	Sun Nov 06 16:07:32 2022 +0100
@@ -30,8 +30,13 @@
 #include <string.h>
 
 void cx_hash_murmur(CxHashKey *key) {
+    unsigned char const *data = key->data.cbytes;
+    if (data == NULL) {
+        /* extension: special value for NULL */
+        key->hash = 1574210520u;
+        return;
+    }
     size_t len = key->len;
-    unsigned char const *data = key->data.cbytes;
 
     unsigned m = 0x5bd1e995;
     unsigned r = 24;
@@ -79,7 +84,7 @@
 CxHashKey cx_hash_key_str(char const *str) {
     CxHashKey key;
     key.data.cstr = str;
-    key.len = str == NULL ? 0 : (1 + strlen(str));
+    key.len = str == NULL ? 0 : strlen(str);
     cx_hash_murmur(&key);
     return key;
 }
--- a/test/test_hash_key.cpp	Sun Nov 06 14:46:59 2022 +0100
+++ b/test/test_hash_key.cpp	Sun Nov 06 16:07:32 2022 +0100
@@ -32,7 +32,7 @@
 
 TEST(cx_hash_key, functions) {
     auto str = "my key";
-    auto len = 1 + strlen(str);
+    auto len = strlen(str);
 
     auto str_key = cx_hash_key_str(str);
     auto bytes_key = cx_hash_key_bytes(
@@ -47,5 +47,41 @@
     EXPECT_EQ(bytes_key.len, len);
     EXPECT_EQ(str_key.data.cstr, str);
     EXPECT_EQ(bytes_key.data.cbytes, reinterpret_cast<unsigned char const *>(str));
-    EXPECT_EQ(bytes_key.data.obj, (void *) str);
+    EXPECT_EQ(bytes_key.data.cobj, reinterpret_cast<void const *>(str));
 }
+
+TEST(cx_hash_key, empty_string) {
+    auto str = "";
+
+    auto str_key = cx_hash_key_str(str);
+    auto bytes_key = cx_hash_key_bytes(
+            reinterpret_cast<unsigned char const *>(str), 0);
+    auto obj_key = cx_hash_key(
+            reinterpret_cast<void const *>(str), 0);
+
+    EXPECT_EQ(bytes_key.hash, 4152238450u);
+    EXPECT_EQ(str_key.hash, 4152238450u);
+    EXPECT_EQ(obj_key.hash, 4152238450u);
+    EXPECT_EQ(str_key.len, 0);
+    EXPECT_EQ(bytes_key.len, 0);
+    EXPECT_EQ(bytes_key.len, 0);
+    EXPECT_EQ(str_key.data.cstr, str);
+    EXPECT_EQ(bytes_key.data.cbytes, reinterpret_cast<unsigned char const *>(str));
+    EXPECT_EQ(bytes_key.data.cobj, reinterpret_cast<void const *>(str));
+}
+
+TEST(cx_hash_key, null_ptr) {
+    auto str_key = cx_hash_key_str(nullptr);
+    auto bytes_key = cx_hash_key_bytes(nullptr, 0);
+    auto obj_key = cx_hash_key(nullptr, 0);
+
+    EXPECT_EQ(bytes_key.hash, 1574210520u);
+    EXPECT_EQ(str_key.hash, 1574210520u);
+    EXPECT_EQ(obj_key.hash, 1574210520u);
+    EXPECT_EQ(str_key.len, 0);
+    EXPECT_EQ(bytes_key.len, 0);
+    EXPECT_EQ(bytes_key.len, 0);
+    EXPECT_EQ(str_key.data.cstr, nullptr);
+    EXPECT_EQ(bytes_key.data.cbytes, nullptr);
+    EXPECT_EQ(bytes_key.data.cobj, nullptr);
+}
--- a/test/test_map.cpp	Sun Nov 06 14:46:59 2022 +0100
+++ b/test/test_map.cpp	Sun Nov 06 16:07:32 2022 +0100
@@ -73,8 +73,7 @@
         auto keyiter = cxMapIteratorKeys(map);
         std::unordered_set<std::string> keys;
         cx_foreach(CxHashKey*, elem, keyiter) {
-            // we use that our test keys contain NULL-terminated strings
-            keys.insert(std::string(elem->data.cstr));
+            keys.insert(std::string(elem->data.cstr, elem->len));
         }
         EXPECT_EQ(keyiter.index, map->size);
         ASSERT_EQ(keys.size(), map->size);
@@ -103,7 +102,7 @@
         auto pairiter = cxMapIterator(map);
         std::unordered_map<std::string, std::string> pairs;
         cx_foreach(CxMapEntry*, entry, pairiter) {
-            pairs[std::string(entry->key->data.cstr)] = std::string((char *) entry->value);
+            pairs[std::string(entry->key->data.cstr, entry->key->len)] = std::string((char *) entry->value);
         }
         EXPECT_EQ(pairiter.index, map->size);
         ASSERT_EQ(pairs.size(), refmap.size());

mercurial