2022-05-18
#189 declare basic map functions
src/CMakeLists.txt | file | annotate | diff | comparison | revisions | |
src/cx/common.h | file | annotate | diff | comparison | revisions | |
src/cx/hash_map.h | file | annotate | diff | comparison | revisions | |
src/cx/map.h | file | annotate | diff | comparison | revisions | |
src/hash_map.c | file | annotate | diff | comparison | revisions |
--- a/src/CMakeLists.txt Mon May 16 19:25:19 2022 +0200 +++ b/src/CMakeLists.txt Wed May 18 16:26:32 2022 +0200 @@ -5,8 +5,10 @@ linked_list.c tree.c buffer.c + hash_map.c ) set(headers + cx/common.h cx/utils.h cx/allocator.h cx/iterator.h @@ -14,7 +16,9 @@ cx/linked_list.h cx/tree.h cx/buffer.h -) + cx/map.h + cx/hash_map.h + ) add_library(ucx SHARED ${sources}) add_library(ucx_static STATIC ${sources})
--- a/src/cx/common.h Mon May 16 19:25:19 2022 +0200 +++ b/src/cx/common.h Wed May 18 16:26:32 2022 +0200 @@ -94,6 +94,25 @@ #include <stdbool.h> /** + * Structure that contains a pointer to arbitrary data. + */ +struct cx_data_ptr_s { + /** + * A pointer to the data. + */ + unsigned const char *data; + /** + * The length of the data. + */ + size_t len; +}; + +/** + * Type for a data pointer with length information. + */ +typedef struct cx_data_ptr_s CxDataPtr; + +/** * Function pointer compatible with fwrite-like functions. */ typedef size_t (*cx_write_func)(
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cx/hash_map.h Wed May 18 16:26:32 2022 +0200 @@ -0,0 +1,123 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +/** + * \file map.h + * \brief Interface for map implementations. + * \author Mike Becker + * \author Olaf Wintermann + * \version 3.0 + * \copyright 2-Clause BSD License + */ + +#ifndef UCX_HASH_MAP_H +#define UCX_HASH_MAP_H + +#include "map.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** Internal structure for a key within a hash map. */ +struct cx_hash_map_key_s { + /** The key data. */ + CxDataPtr data; + /** The hash value of the key data. */ + unsigned hash; +}; + +/** Internal structure for an element of a hash map. */ +struct cx_hash_map_element_s { + /** The value data. */ + void *data; + + /** A pointer to the next element in the current bucket. */ + struct cx_hash_map_element_s *next; + + /** The corresponding key. */ + struct cx_hash_map_key_s key; +}; + + +/** + * Creates a new hash map with the specified size using a UcxAllocator. + * + * @param allocator the allocator to use + * @param buckets the initial number of buckets in this hash map + * @return a pointer to the new hash map + */ +CxMap *cxHashMapCreate( + CxAllocator *allocator, + size_t buckets +); + +/** + * Increases the number of buckets, if necessary. + * + * The load value is \c loadFactor*buckets. If the element count exceeds the load + * value, the map needs to be rehashed. Otherwise, no action is performed and + * this function simply returns 0. + * + * The rehashing process ensures, that the number of buckets is at least + * \p bucketFactor times the number of stored items. So there is enough room for additional + * elements without the need of another soon rehashing. + * + * You can use this function after filling a map to dramatically increase access performance. + * + * @note If the specified map is not a hash map, the behavior is undefined. + * + * @param map the map to rehash + * @param loadFactor a percentage threshold for the load of the map + * @param bucketFactor a factor for the number of buckets that shall be available after rehashing + * @return zero on success, non-zero if a memory allocation error occurred + */ +__attribute__((__nonnull__)) +int cxMapRehash2( + CxMap *map, + float loadFactor, + float bucketFactor +); + +/** + * Rehashes the map with load factor 0.75 and bucket factor 2.5. + * + * @param map the map to rehash + * @return zero on success, non-zero if a memory allocation error occurred + * @see cxMapRehash2() + */ +__attribute__((__nonnull__)) +static inline int cxMapRehash(CxMap *map) { + return cxMapRehash2(map, 0.75f, 2.5f); +} + + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // UCX_HASH_MAP_H
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cx/map.h Wed May 18 16:26:32 2022 +0200 @@ -0,0 +1,268 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +/** + * \file map.h + * \brief Interface for map implementations. + * \author Mike Becker + * \author Olaf Wintermann + * \version 3.0 + * \copyright 2-Clause BSD License + */ + +#ifndef UCX_MAP_H +#define UCX_MAP_H + +#include "common.h" +#include "allocator.h" +#include "iterator.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** Type for the UCX map. */ +typedef struct cx_map_s CxMap; + +/** Type for a map entry. */ +typedef struct cx_map_entry_s CxMapEntry; + +/** Type for map class definitions. */ +typedef struct cx_map_class_s cx_map_class; + +/** Structure for the UCX map. */ +struct cx_map_s { + /** The map class definition. */ + cx_map_class *cl; + /** An allocator that is used for the map elements. */ + CxAllocator *allocator; + /** The number of elements currently stored. */ + size_t size; + // TODO: elemsize + a flag if values shall be copied to the map +}; + +/** + * The class definition for arbitrary maps. + */ +struct cx_map_class_s { + /** + * Deallocates the entire memory. + */ + __attribute__((__nonnull__)) + void (*destructor)(struct cx_map_s *map); + + /** + * Removes all elements. + */ + __attribute__((__nonnull__)) + void (*clear)(struct cx_map_s *map); + + /** + * Add or overwrite an element. + */ + __attribute__((__nonnull__)) + int (*put)( + CxMap *map, + CxDataPtr key, + void const *value + ); + + /** + * Returns an element. + */ + __attribute__((__nonnull__, __warn_unused_result__)) + void *(*get)( + CxMap const *map, + CxDataPtr key + ); + + /** + * Removes an element. + */ + __attribute__((__nonnull__, __warn_unused_result__)) + void *(*remove)( + CxMap const *map, + CxDataPtr key + ); + + /** + * Iterator over the key/value pairs. + */ + __attribute__((__nonnull__, __warn_unused_result__)) + CxIterator (*iterator)(CxMap const *map); + + /** + * Iterator over the keys. + */ + __attribute__((__nonnull__, __warn_unused_result__)) + CxIterator (*iterator_keys)(CxMap const *map); + + /** + * Iterator over the values. + */ + __attribute__((__nonnull__, __warn_unused_result__)) + CxIterator (*iterator_values)(CxMap const *map); +}; + +/** + * A map entry. + */ +struct cx_map_entry_s { + /** + * The key. + */ + CxDataPtr key; + /** + * The value. + */ + void const *value; +}; + + +/** + * Deallocates the memory of the specified map. + * + * @param map the map to be destroyed + */ +__attribute__((__nonnull__)) +static inline void cxMapDestroy(CxMap *map) { + // TODO: likely to add auto-free feature for contents in the future + map->cl->destructor(map); +} + + +/** + * Clears a map by removing all elements. + * + * @param map the map to be cleared + */ +__attribute__((__nonnull__)) +static inline void cxMapClear(CxMap *map) { + map->cl->clear(map); +} + +/** + * Puts a key/value-pair into the map. + * + * @param map the map + * @param key the key + * @param value the value + * @return 0 on success, non-zero value on failure + */ +__attribute__((__nonnull__)) +static inline int cxMapPut( + CxMap *map, + CxDataPtr key, + void const *value +) { + return map->cl->put(map, key, value); +} + +/** + * Retrieves a value by using a key. + * + * @param map the map + * @param key the key + * @return the value + */ +__attribute__((__nonnull__, __warn_unused_result__)) +void *cxMapGet( + CxMap const *map, + CxDataPtr key +) { + return map->cl->get(map, key); +} + +/** + * Removes a key/value-pair from the map by using the key. + * + * @param map the map + * @param key the key + * @return the removed value + */ +__attribute__((__nonnull__, __warn_unused_result__)) +void *cxMapRemove( + CxMap *map, + CxDataPtr key +) { + return map->cl->remove(map, key); +} + +// TODO: set-like map operations (union, intersect, difference) + +/** + * Creates a value iterator for a map. + * + * \note An iterator iterates over all elements successively. Therefore the order + * highly depends on the map implementation and may change arbitrarily when the contents change. + * + * @param map the map to create the iterator for + * @return an iterator for the currently stored values + */ +__attribute__((__nonnull__, __warn_unused_result__)) +static inline CxIterator cxMapIteratorValues(CxMap const *map) { + return map->cl->iterator_values(map); +} + +/** + * Creates a key iterator for a map. + * + * \note An iterator iterates over all elements successively. Therefore the order + * highly depends on the map implementation and may change arbitrarily when the contents change. + * + * @param map the map to create the iterator for + * @return an iterator for the currently stored keys + */ +__attribute__((__nonnull__, __warn_unused_result__)) +static inline CxIterator cxMapIteratorKeys(CxMap const *map) { + return map->cl->iterator_keys(map); +} + +/** + * Creates an iterator for a map. + * + * The values of the iterator are dynamically created key/value pairs of type CxMapEntry. + * + * \note An iterator iterates over all elements successively. Therefore the order + * highly depends on the map implementation and may change arbitrarily when the contents change. + * + * @param map the map to create the iterator for + * @return an iterator for the currently stored keys + * @see cxMapIteratorKeys() + * @see cxMapIteratorValues() + */ +__attribute__((__nonnull__, __warn_unused_result__)) +static inline CxIterator cxMapIterator(CxMap const *map) { + return map->cl->iterator(map); +} + + +#ifdef __cplusplus +} +#endif + +#endif // UCX_MAP_H \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/hash_map.c Wed May 18 16:26:32 2022 +0200 @@ -0,0 +1,105 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string.h> +#include "cx/hash_map.h" + +/** + * Computes a murmur hash-2. + * + * @param data the data to hash + * @param len the length of the data + * @return the murmur hash-2 of the data + */ +static unsigned cx_hash_map_murmur( + unsigned char const *data, + size_t len +) { + unsigned m = 0x5bd1e995; + unsigned r = 24; + unsigned h = 25 ^ len; + unsigned i = 0; + while (len >= 4) { + unsigned k = data[i + 0] & 0xFF; + k |= (data[i + 1] & 0xFF) << 8; + k |= (data[i + 2] & 0xFF) << 16; + k |= (data[i + 3] & 0xFF) << 24; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + i += 4; + len -= 4; + } + + switch (len) { + case 3: + h ^= (data[i + 2] & 0xFF) << 16; + __attribute__((__fallthrough__)); + case 2: + h ^= (data[i + 1] & 0xFF) << 8; + __attribute__((__fallthrough__)); + case 1: + h ^= (data[i + 0] & 0xFF); + h *= m; + __attribute__((__fallthrough__)); + default: + /* do nothing */; + } + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + + +/** + * Creates a hash map key based on the given data. + * + * This function implicitly computes the hash. + * + * @attention The data will be copied to the key structure. + * + * @param data the data for the key + * @return the computed key + */ +static struct cx_hash_map_key_s cx_hash_map_key(CxDataPtr data) { + unsigned char *target = malloc(data.len); + memcpy(target, data.data, data.len); + struct cx_hash_map_key_s key; + key.data.data = target; + key.data.len = data.len; + key.hash = cx_hash_map_murmur(target, data.len); + return key; +}