Mon, 09 Sep 2024 21:34:39 +0200
implement optimized sorted insert for linked lists - resolves #415
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2024 Mike Becker, Olaf Wintermann All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /** * \file tree.h * \brief Interface for tree implementations. * \author Mike Becker * \author Olaf Wintermann * \copyright 2-Clause BSD License */ #ifndef UCX_TREE_H #define UCX_TREE_H #include "common.h" #include "iterator.h" #ifdef __cplusplus extern "C" { #endif /** * A depth-first tree iterator. * * This iterator is not position-aware in a strict sense, as it does not assume * a particular order of elements in the tree. However, the iterator keeps track * of the number of nodes it has passed in a counter variable. * Each node, regardless of the number of passes, is counted only once. * * @note Objects that are pointed to by an iterator are mutable through that * iterator. However, if the * underlying data structure is mutated by other means than this iterator (e.g. * elements added or removed), the iterator becomes invalid (regardless of what * cxIteratorValid() returns). * * @see CxIterator */ typedef struct cx_tree_iterator_s { /** * Base members. */ CX_ITERATOR_BASE; /** * Indicates whether the subtree below the current node shall be skipped. */ bool skip; /** * Set to true, when the iterator shall visit a node again * when all it's children have been processed. */ bool visit_on_exit; /** * True, if this iterator is currently leaving the node. */ bool exiting; /** * Offset in the node struct for the children linked list. */ ptrdiff_t loc_children; /** * Offset in the node struct for the next pointer. */ ptrdiff_t loc_next; /** * The total number of distinct nodes that have been passed so far. */ size_t counter; /** * The currently observed node. * * This is the same what cxIteratorCurrent() would return. */ void *node; /** * Stores a copy of the next pointer of the visited node. * Allows freeing a node on exit without corrupting the iteration. */ void *node_next; /** * Internal stack. * Will be automatically freed once the iterator becomes invalid. * * If you want to discard the iterator before, you need to manually * call cxTreeIteratorDispose(). */ void **stack; /** * Internal capacity of the stack. */ size_t stack_capacity; union { /** * Internal stack size. */ size_t stack_size; /** * The current depth in the tree. */ size_t depth; }; } CxTreeIterator; /** * An element in a visitor queue. */ struct cx_tree_visitor_queue_s { /** * The tree node to visit. */ void *node; /** * The depth of the node. */ size_t depth; /** * The next element in the queue or \c NULL. */ struct cx_tree_visitor_queue_s *next; }; /** * A breadth-first tree iterator. * * This iterator needs to maintain a visitor queue that will be automatically * freed once the iterator becomes invalid. * If you want to discard the iterator before, you MUST manually call * cxTreeVisitorDispose(). * * This iterator is not position-aware in a strict sense, as it does not assume * a particular order of elements in the tree. However, the iterator keeps track * of the number of nodes it has passed in a counter variable. * Each node, regardless of the number of passes, is counted only once. * * @note Objects that are pointed to by an iterator are mutable through that * iterator. However, if the * underlying data structure is mutated by other means than this iterator (e.g. * elements added or removed), the iterator becomes invalid (regardless of what * cxIteratorValid() returns). * * @see CxIterator */ typedef struct cx_tree_visitor_s { /** * Base members. */ CX_ITERATOR_BASE; /** * Indicates whether the subtree below the current node shall be skipped. */ bool skip; /** * Offset in the node struct for the children linked list. */ ptrdiff_t loc_children; /** * Offset in the node struct for the next pointer. */ ptrdiff_t loc_next; /** * The total number of distinct nodes that have been passed so far. */ size_t counter; /** * The currently observed node. * * This is the same what cxIteratorCurrent() would return. */ void *node; /** * The current depth in the tree. */ size_t depth; /** * The next element in the visitor queue. */ struct cx_tree_visitor_queue_s *queue_next; /** * The last element in the visitor queue. */ struct cx_tree_visitor_queue_s *queue_last; } CxTreeVisitor; /** * Releases internal memory of the given tree iterator. * @param iter the iterator */ __attribute__((__nonnull__)) static inline void cxTreeIteratorDispose(CxTreeIterator *iter) { free(iter->stack); iter->stack = NULL; } /** * Releases internal memory of the given tree visitor. * @param visitor the visitor */ __attribute__((__nonnull__)) static inline void cxTreeVisitorDispose(CxTreeVisitor *visitor) { struct cx_tree_visitor_queue_s *q = visitor->queue_next; while (q != NULL) { struct cx_tree_visitor_queue_s *next = q->next; free(q); q = next; } } /** * Advises the iterator to skip the subtree below the current node and * also continues the current loop. * * @param iterator the iterator */ #define cxTreeIteratorContinue(iterator) (iterator).skip = true; continue /** * Advises the visitor to skip the subtree below the current node and * also continues the current loop. * * @param visitor the visitor */ #define cxTreeVisitorContinue(visitor) cxTreeIteratorContinue(visitor) /** * Links a node to a (new) parent. * * If the node has already a parent, it is unlinked, first. * If the parent has children already, the node is \em appended to the list * of all currently existing children. * * @param parent the parent node * @param node the node that shall be linked * @param loc_parent offset in the node struct for the parent pointer * @param loc_children offset in the node struct for the children linked list * @param loc_last_child optional offset in the node struct for the pointer to * the last child in the linked list (negative if there is no such pointer) * @param loc_prev offset in the node struct for the prev pointer * @param loc_next offset in the node struct for the next pointer * @see cx_tree_unlink() */ __attribute__((__nonnull__)) void cx_tree_link( void *restrict parent, void *restrict node, ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ); /** * Unlinks a node from its parent. * * If the node has no parent, this function does nothing. * * @param node the node that shall be unlinked from its parent * @param loc_parent offset in the node struct for the parent pointer * @param loc_children offset in the node struct for the children linked list * @param loc_last_child optional offset in the node struct for the pointer to * the last child in the linked list (negative if there is no such pointer) * @param loc_prev offset in the node struct for the prev pointer * @param loc_next offset in the node struct for the next pointer * @see cx_tree_link() */ __attribute__((__nonnull__)) void cx_tree_unlink( void *node, ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ); /** * Function pointer for a search function. * * A function of this kind shall check if the specified \p node * contains the given \p data or if one of the children might contain * the data. * * The function should use the returned integer to indicate how close the * match is, where a negative number means that it does not match at all. * * For example if a tree stores file path information, a node that is * describing a parent directory of a filename that is searched, shall * return a positive number to indicate that a child node might contain the * searched item. On the other hand, if the node denotes a path that is not a * prefix of the searched filename, the function would return -1 to indicate * that the search does not need to be continued in that branch. * * @param node the node that is currently investigated * @param data the data that is searched for * * @return 0 if the node contains the data, * positive if one of the children might contain the data, * negative if neither the node, nor the children contains the data */ typedef int (*cx_tree_search_data_func)(void const *node, void const *data); /** * Function pointer for a search function. * * A function of this kind shall check if the specified \p node * contains the same \p data as \p new_node or if one of the children might * contain the data. * * The function should use the returned integer to indicate how close the * match is, where a negative number means that it does not match at all. * * For example if a tree stores file path information, a node that is * describing a parent directory of a filename that is searched, shall * return a positive number to indicate that a child node might contain the * searched item. On the other hand, if the node denotes a path that is not a * prefix of the searched filename, the function would return -1 to indicate * that the search does not need to be continued in that branch. * * @param node the node that is currently investigated * @param new_node a new node with the information which is searched * * @return 0 if \p node contains the same data as \p new_node, * positive if one of the children might contain the data, * negative if neither the node, nor the children contains the data */ typedef int (*cx_tree_search_func)(void const *node, void const *new_node); /** * Searches for data in a tree. * * When the data cannot be found exactly, the search function might return a * closest result which might be a good starting point for adding a new node * to the tree (see also #cx_tree_add()). * * Depending on the tree structure it is not necessarily guaranteed that the * "closest" match is uniquely defined. This function will search for a node * with the best match according to the \p sfunc (meaning: the return value of * \p sfunc which is closest to zero). If that is also ambiguous, an arbitrary * node matching the criteria is returned. * * @param root the root node * @param data the data to search for * @param sfunc the search function * @param result where the result shall be stored * @param loc_children offset in the node struct for the children linked list * @param loc_next offset in the node struct for the next pointer * @return zero if the node was found exactly, positive if a node was found that * could contain the node (but doesn't right now), negative if the tree does not * contain any node that might be related to the searched data */ __attribute__((__nonnull__)) int cx_tree_search_data( void const *root, void const *data, cx_tree_search_data_func sfunc, void **result, ptrdiff_t loc_children, ptrdiff_t loc_next ); /** * Searches for a node in a tree. * * When no node with the same data can be found, the search function might * return a closest result which might be a good starting point for adding the * new node to the tree (see also #cx_tree_add()). * * Depending on the tree structure it is not necessarily guaranteed that the * "closest" match is uniquely defined. This function will search for a node * with the best match according to the \p sfunc (meaning: the return value of * \p sfunc which is closest to zero). If that is also ambiguous, an arbitrary * node matching the criteria is returned. * * @param root the root node * @param node the node to search for * @param sfunc the search function * @param result where the result shall be stored * @param loc_children offset in the node struct for the children linked list * @param loc_next offset in the node struct for the next pointer * @return zero if the node was found exactly, positive if a node was found that * could contain the node (but doesn't right now), negative if the tree does not * contain any node that might be related to the searched data */ __attribute__((__nonnull__)) int cx_tree_search( void const *root, void const *node, cx_tree_search_func sfunc, void **result, ptrdiff_t loc_children, ptrdiff_t loc_next ); /** * Creates a depth-first iterator for a tree with the specified root node. * * @note A tree iterator needs to maintain a stack of visited nodes, which is * allocated using stdlib malloc(). * When the iterator becomes invalid, this memory is automatically released. * However, if you wish to cancel the iteration before the iterator becomes * invalid by itself, you MUST call cxTreeIteratorDispose() manually to release * the memory. * * @remark The returned iterator does not support cxIteratorFlagRemoval(). * * @param root the root node * @param visit_on_exit set to true, when the iterator shall visit a node again * after processing all children * @param loc_children offset in the node struct for the children linked list * @param loc_next offset in the node struct for the next pointer * @return the new tree iterator * @see cxTreeIteratorDispose() */ __attribute__((__nonnull__)) CxTreeIterator cx_tree_iterator( void *root, bool visit_on_exit, ptrdiff_t loc_children, ptrdiff_t loc_next ); /** * Creates a breadth-first iterator for a tree with the specified root node. * * @note A tree visitor needs to maintain a queue of to be visited nodes, which * is allocated using stdlib malloc(). * When the visitor becomes invalid, this memory is automatically released. * However, if you wish to cancel the iteration before the visitor becomes * invalid by itself, you MUST call cxTreeVisitorDispose() manually to release * the memory. * * @remark The returned iterator does not support cxIteratorFlagRemoval(). * * @param root the root node * @param loc_children offset in the node struct for the children linked list * @param loc_next offset in the node struct for the next pointer * @return the new tree visitor * @see cxTreeVisitorDispose() */ __attribute__((__nonnull__)) CxTreeVisitor cx_tree_visitor( void *root, ptrdiff_t loc_children, ptrdiff_t loc_next ); /** * Describes a function that creates a tree node from the specified data. * The first argument points to the data the node shall contain and * the second argument may be used for additional data (e.g. an allocator). * Functions of this type shall either return a new pointer to a newly * created node or \c NULL when allocation fails. * * \note the function may leave the node pointers in the struct uninitialized. * The caller is responsible to set them according to the intended use case. */ typedef void *(*cx_tree_node_create_func)(void const *, void *); /** * The local search depth for a new subtree when adding multiple elements. * The default value is 3. * This variable is used by #cx_tree_add_array() and #cx_tree_add_iter() to * implement optimized insertion of multiple elements into a tree. */ extern unsigned int cx_tree_add_look_around_depth; /** * Adds multiple elements efficiently to a tree. * * Once an element cannot be added to the tree, this function returns, leaving * the iterator in a valid state pointing to the element that could not be * added. * Also, the pointer of the created node will be stored to \p failed. * The integer returned by this function denotes the number of elements obtained * from the \p iter that have been successfully processed. * When all elements could be processed, a \c NULL pointer will be written to * \p failed. * * The advantage of this function compared to multiple invocations of * #cx_tree_add() is that the search for the insert locations is not always * started from the root node. * Instead, the function checks #cx_tree_add_look_around_depth many parent nodes * of the current insert location before starting from the root node again. * When the variable is set to zero, only the last found location is checked * again. * * Refer to the documentation of #cx_tree_add() for more details. * * @param iter a pointer to an arbitrary iterator * @param sfunc a search function * @param cfunc a node creation function * @param cdata optional additional data * @param root the root node of the tree * @param failed location where the pointer to a failed node shall be stored * @param loc_parent offset in the node struct for the parent pointer * @param loc_children offset in the node struct for the children linked list * @param loc_last_child optional offset in the node struct for the pointer to * the last child in the linked list (negative if there is no such pointer) * @param loc_prev offset in the node struct for the prev pointer * @param loc_next offset in the node struct for the next pointer * @return the number of nodes created and added * @see cx_tree_add() */ __attribute__((__nonnull__(1, 2, 3, 5, 6))) size_t cx_tree_add_iter( struct cx_iterator_base_s *iter, cx_tree_search_func sfunc, cx_tree_node_create_func cfunc, void *cdata, void **failed, void *root, ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ); /** * Adds multiple elements efficiently to a tree. * * Once an element cannot be added to the tree, this function returns, storing * the pointer of the created node to \p failed. * The integer returned by this function denotes the number of elements from * the \p src array that have been successfully processed. * When all elements could be processed, a \c NULL pointer will be written to * \p failed. * * The advantage of this function compared to multiple invocations of * #cx_tree_add() is that the search for the insert locations is not always * started from the root node. * Instead, the function checks #cx_tree_add_look_around_depth many parent nodes * of the current insert location before starting from the root node again. * When the variable is set to zero, only the last found location is checked * again. * * Refer to the documentation of #cx_tree_add() for more details. * * @param src a pointer to the source data array * @param num the number of elements in the \p src array * @param elem_size the size of each element in the \p src array * @param sfunc a search function * @param cfunc a node creation function * @param cdata optional additional data * @param failed location where the pointer to a failed node shall be stored * @param root the root node of the tree * @param loc_parent offset in the node struct for the parent pointer * @param loc_children offset in the node struct for the children linked list * @param loc_last_child optional offset in the node struct for the pointer to * the last child in the linked list (negative if there is no such pointer) * @param loc_prev offset in the node struct for the prev pointer * @param loc_next offset in the node struct for the next pointer * @return the number of array elements successfully processed * @see cx_tree_add() */ __attribute__((__nonnull__(1, 4, 5, 7, 8))) size_t cx_tree_add_array( void const *src, size_t num, size_t elem_size, cx_tree_search_func sfunc, cx_tree_node_create_func cfunc, void *cdata, void **failed, void *root, ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ); /** * Adds data to a tree. * * An adequate location where to add the new tree node is searched with the * specified \p sfunc. * * When a location is found, the \p cfunc will be invoked with \p cdata. * * The node returned by \p cfunc will be linked into the tree. * When \p sfunc returned a positive integer, the new node will be linked as a * child. The other children (now siblings of the new node) are then checked * with \p sfunc, whether they could be children of the new node and re-linked * accordingly. * * When \p sfunc returned zero and the found node has a parent, the new * node will be added as sibling - otherwise, the new node will be added * as a child. * * When \p sfunc returned a negative value, the new node will not be added to * the tree and this function returns a non-zero value. * The caller should check if \p cnode contains a node pointer and deal with the * node that could not be added. * * This function also returns a non-zero value when \p cfunc tries to allocate * a new node but fails to do so. In that case, the pointer stored to \p cnode * will be \c NULL. * * Multiple elements can be added more efficiently with * #cx_tree_add_array() or #cx_tree_add_iter(). * * @param src a pointer to the data * @param sfunc a search function * @param cfunc a node creation function * @param cdata optional additional data * @param cnode the location where a pointer to the new node is stored * @param root the root node of the tree * @param loc_parent offset in the node struct for the parent pointer * @param loc_children offset in the node struct for the children linked list * @param loc_last_child optional offset in the node struct for the pointer to * the last child in the linked list (negative if there is no such pointer) * @param loc_prev offset in the node struct for the prev pointer * @param loc_next offset in the node struct for the next pointer * @return zero when a new node was created and added to the tree, * non-zero otherwise */ __attribute__((__nonnull__(1, 2, 3, 5, 6))) int cx_tree_add( void const *src, cx_tree_search_func sfunc, cx_tree_node_create_func cfunc, void *cdata, void **cnode, void *root, ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ); #ifdef __cplusplus } // extern "C" #endif #endif //UCX_TREE_H