2022-11-06
change hash functions
1) for zero-terminated strings, the terminator is no longer included in the hash
2) for NULL there is now a special hash value different from the hash for empty data
src/cx/hash_key.h | file | annotate | diff | comparison | revisions | |
src/hash_key.c | file | annotate | diff | comparison | revisions | |
test/test_hash_key.cpp | file | annotate | diff | comparison | revisions | |
test/test_map.cpp | file | annotate | diff | comparison | revisions |
--- a/src/cx/hash_key.h Sun Nov 06 14:46:59 2022 +0100 +++ b/src/cx/hash_key.h Sun Nov 06 16:07:32 2022 +0100 @@ -69,11 +69,13 @@ typedef struct cx_hash_key_s CxHashKey; /** - * Computes a murmur hash-2. + * Computes a murmur2 32 bit hash. * - * You need to initialize data and len in the key struct. + * You need to initialize \c data and \c len in the key struct. * The hash is then directly written to that struct. * + * \note If \c data is \c NULL, the hash is defined as 1574210520. + * * @param key the key, the hash shall be computed for */ void cx_hash_murmur(CxHashKey *key);
--- a/src/hash_key.c Sun Nov 06 14:46:59 2022 +0100 +++ b/src/hash_key.c Sun Nov 06 16:07:32 2022 +0100 @@ -30,8 +30,13 @@ #include <string.h> void cx_hash_murmur(CxHashKey *key) { + unsigned char const *data = key->data.cbytes; + if (data == NULL) { + /* extension: special value for NULL */ + key->hash = 1574210520u; + return; + } size_t len = key->len; - unsigned char const *data = key->data.cbytes; unsigned m = 0x5bd1e995; unsigned r = 24; @@ -79,7 +84,7 @@ CxHashKey cx_hash_key_str(char const *str) { CxHashKey key; key.data.cstr = str; - key.len = str == NULL ? 0 : (1 + strlen(str)); + key.len = str == NULL ? 0 : strlen(str); cx_hash_murmur(&key); return key; }
--- a/test/test_hash_key.cpp Sun Nov 06 14:46:59 2022 +0100 +++ b/test/test_hash_key.cpp Sun Nov 06 16:07:32 2022 +0100 @@ -32,7 +32,7 @@ TEST(cx_hash_key, functions) { auto str = "my key"; - auto len = 1 + strlen(str); + auto len = strlen(str); auto str_key = cx_hash_key_str(str); auto bytes_key = cx_hash_key_bytes( @@ -47,5 +47,41 @@ EXPECT_EQ(bytes_key.len, len); EXPECT_EQ(str_key.data.cstr, str); EXPECT_EQ(bytes_key.data.cbytes, reinterpret_cast<unsigned char const *>(str)); - EXPECT_EQ(bytes_key.data.obj, (void *) str); + EXPECT_EQ(bytes_key.data.cobj, reinterpret_cast<void const *>(str)); } + +TEST(cx_hash_key, empty_string) { + auto str = ""; + + auto str_key = cx_hash_key_str(str); + auto bytes_key = cx_hash_key_bytes( + reinterpret_cast<unsigned char const *>(str), 0); + auto obj_key = cx_hash_key( + reinterpret_cast<void const *>(str), 0); + + EXPECT_EQ(bytes_key.hash, 4152238450u); + EXPECT_EQ(str_key.hash, 4152238450u); + EXPECT_EQ(obj_key.hash, 4152238450u); + EXPECT_EQ(str_key.len, 0); + EXPECT_EQ(bytes_key.len, 0); + EXPECT_EQ(bytes_key.len, 0); + EXPECT_EQ(str_key.data.cstr, str); + EXPECT_EQ(bytes_key.data.cbytes, reinterpret_cast<unsigned char const *>(str)); + EXPECT_EQ(bytes_key.data.cobj, reinterpret_cast<void const *>(str)); +} + +TEST(cx_hash_key, null_ptr) { + auto str_key = cx_hash_key_str(nullptr); + auto bytes_key = cx_hash_key_bytes(nullptr, 0); + auto obj_key = cx_hash_key(nullptr, 0); + + EXPECT_EQ(bytes_key.hash, 1574210520u); + EXPECT_EQ(str_key.hash, 1574210520u); + EXPECT_EQ(obj_key.hash, 1574210520u); + EXPECT_EQ(str_key.len, 0); + EXPECT_EQ(bytes_key.len, 0); + EXPECT_EQ(bytes_key.len, 0); + EXPECT_EQ(str_key.data.cstr, nullptr); + EXPECT_EQ(bytes_key.data.cbytes, nullptr); + EXPECT_EQ(bytes_key.data.cobj, nullptr); +}
--- a/test/test_map.cpp Sun Nov 06 14:46:59 2022 +0100 +++ b/test/test_map.cpp Sun Nov 06 16:07:32 2022 +0100 @@ -73,8 +73,7 @@ auto keyiter = cxMapIteratorKeys(map); std::unordered_set<std::string> keys; cx_foreach(CxHashKey*, elem, keyiter) { - // we use that our test keys contain NULL-terminated strings - keys.insert(std::string(elem->data.cstr)); + keys.insert(std::string(elem->data.cstr, elem->len)); } EXPECT_EQ(keyiter.index, map->size); ASSERT_EQ(keys.size(), map->size); @@ -103,7 +102,7 @@ auto pairiter = cxMapIterator(map); std::unordered_map<std::string, std::string> pairs; cx_foreach(CxMapEntry*, entry, pairiter) { - pairs[std::string(entry->key->data.cstr)] = std::string((char *) entry->value); + pairs[std::string(entry->key->data.cstr, entry->key->len)] = std::string((char *) entry->value); } EXPECT_EQ(pairiter.index, map->size); ASSERT_EQ(pairs.size(), refmap.size());