src/cx/tree.h

Mon, 09 Sep 2024 21:34:39 +0200

author
Mike Becker <universe@uap-core.de>
date
Mon, 09 Sep 2024 21:34:39 +0200
changeset 879
9c24a4eb5ac9
parent 871
e29c1f96646d
child 890
54565fd74e74
permissions
-rw-r--r--

implement optimized sorted insert for linked lists - resolves #415

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2024 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
/**
 * \file tree.h
 * \brief Interface for tree implementations.
 * \author Mike Becker
 * \author Olaf Wintermann
 * \copyright 2-Clause BSD License
 */

#ifndef UCX_TREE_H
#define UCX_TREE_H

#include "common.h"

#include "iterator.h"

#ifdef __cplusplus
extern "C" {
#endif

/**
 * A depth-first tree iterator.
 *
 * This iterator is not position-aware in a strict sense, as it does not assume
 * a particular order of elements in the tree. However, the iterator keeps track
 * of the number of nodes it has passed in a counter variable.
 * Each node, regardless of the number of passes, is counted only once.
 *
 * @note Objects that are pointed to by an iterator are mutable through that
 * iterator. However, if the
 * underlying data structure is mutated by other means than this iterator (e.g.
 * elements added or removed), the iterator becomes invalid (regardless of what
 * cxIteratorValid() returns).
 *
 * @see CxIterator
 */
typedef struct cx_tree_iterator_s {
    /**
     * Base members.
     */
    CX_ITERATOR_BASE;
    /**
     * Indicates whether the subtree below the current node shall be skipped.
     */
    bool skip;
    /**
     * Set to true, when the iterator shall visit a node again
     * when all it's children have been processed.
     */
    bool visit_on_exit;
    /**
     * True, if this iterator is currently leaving the node.
     */
    bool exiting;
    /**
     * Offset in the node struct for the children linked list.
     */
    ptrdiff_t loc_children;
    /**
     * Offset in the node struct for the next pointer.
     */
    ptrdiff_t loc_next;
    /**
     * The total number of distinct nodes that have been passed so far.
     */
    size_t counter;
    /**
     * The currently observed node.
     *
     * This is the same what cxIteratorCurrent() would return.
     */
    void *node;
    /**
     * Stores a copy of the next pointer of the visited node.
     * Allows freeing a node on exit without corrupting the iteration.
     */
    void *node_next;
    /**
     * Internal stack.
     * Will be automatically freed once the iterator becomes invalid.
     *
     * If you want to discard the iterator before, you need to manually
     * call cxTreeIteratorDispose().
     */
    void **stack;
    /**
     * Internal capacity of the stack.
     */
    size_t stack_capacity;
    union {
        /**
         * Internal stack size.
         */
        size_t stack_size;
        /**
         * The current depth in the tree.
         */
        size_t depth;
    };
} CxTreeIterator;

/**
 * An element in a visitor queue.
 */
struct cx_tree_visitor_queue_s {
    /**
     * The tree node to visit.
     */
    void *node;
    /**
     * The depth of the node.
     */
    size_t depth;
    /**
     * The next element in the queue or \c NULL.
     */
    struct cx_tree_visitor_queue_s *next;
};

/**
 * A breadth-first tree iterator.
 *
 * This iterator needs to maintain a visitor queue that will be automatically
 * freed once the iterator becomes invalid.
 * If you want to discard the iterator before, you MUST manually call
 * cxTreeVisitorDispose().
 *
 * This iterator is not position-aware in a strict sense, as it does not assume
 * a particular order of elements in the tree. However, the iterator keeps track
 * of the number of nodes it has passed in a counter variable.
 * Each node, regardless of the number of passes, is counted only once.
 *
 * @note Objects that are pointed to by an iterator are mutable through that
 * iterator. However, if the
 * underlying data structure is mutated by other means than this iterator (e.g.
 * elements added or removed), the iterator becomes invalid (regardless of what
 * cxIteratorValid() returns).
 *
 * @see CxIterator
 */
typedef struct cx_tree_visitor_s {
    /**
     * Base members.
     */
    CX_ITERATOR_BASE;
    /**
     * Indicates whether the subtree below the current node shall be skipped.
     */
    bool skip;
    /**
     * Offset in the node struct for the children linked list.
     */
    ptrdiff_t loc_children;
    /**
     * Offset in the node struct for the next pointer.
     */
    ptrdiff_t loc_next;
    /**
     * The total number of distinct nodes that have been passed so far.
     */
    size_t counter;
    /**
     * The currently observed node.
     *
     * This is the same what cxIteratorCurrent() would return.
     */
    void *node;
    /**
     * The current depth in the tree.
     */
    size_t depth;
    /**
     * The next element in the visitor queue.
     */
    struct cx_tree_visitor_queue_s *queue_next;
    /**
     * The last element in the visitor queue.
     */
    struct cx_tree_visitor_queue_s *queue_last;
} CxTreeVisitor;

/**
 * Releases internal memory of the given tree iterator.
 * @param iter the iterator
 */
 __attribute__((__nonnull__))
static inline void cxTreeIteratorDispose(CxTreeIterator *iter) {
    free(iter->stack);
    iter->stack = NULL;
}

/**
 * Releases internal memory of the given tree visitor.
 * @param visitor the visitor
 */
__attribute__((__nonnull__))
static inline void cxTreeVisitorDispose(CxTreeVisitor *visitor) {
    struct cx_tree_visitor_queue_s *q = visitor->queue_next;
    while (q != NULL) {
        struct cx_tree_visitor_queue_s *next = q->next;
        free(q);
        q = next;
    }
}

/**
 * Advises the iterator to skip the subtree below the current node and
 * also continues the current loop.
 *
 * @param iterator the iterator
 */
#define cxTreeIteratorContinue(iterator) (iterator).skip = true; continue

/**
 * Advises the visitor to skip the subtree below the current node and
 * also continues the current loop.
 *
 * @param visitor the visitor
 */
#define cxTreeVisitorContinue(visitor) cxTreeIteratorContinue(visitor)

/**
 * Links a node to a (new) parent.
 *
 * If the node has already a parent, it is unlinked, first.
 * If the parent has children already, the node is \em appended to the list
 * of all currently existing children.
 *
 * @param parent the parent node
 * @param node the node that shall be linked
 * @param loc_parent offset in the node struct for the parent pointer
 * @param loc_children offset in the node struct for the children linked list
 * @param loc_last_child optional offset in the node struct for the pointer to
 * the last child in the linked list (negative if there is no such pointer)
 * @param loc_prev offset in the node struct for the prev pointer
 * @param loc_next offset in the node struct for the next pointer
 * @see cx_tree_unlink()
 */
__attribute__((__nonnull__))
void cx_tree_link(
        void *restrict parent,
        void *restrict node,
        ptrdiff_t loc_parent,
        ptrdiff_t loc_children,
        ptrdiff_t loc_last_child,
        ptrdiff_t loc_prev,
        ptrdiff_t loc_next
);

/**
 * Unlinks a node from its parent.
 *
 * If the node has no parent, this function does nothing.
 *
 * @param node the node that shall be unlinked from its parent
 * @param loc_parent offset in the node struct for the parent pointer
 * @param loc_children offset in the node struct for the children linked list
 * @param loc_last_child optional offset in the node struct for the pointer to
 * the last child in the linked list (negative if there is no such pointer)
 * @param loc_prev offset in the node struct for the prev pointer
 * @param loc_next offset in the node struct for the next pointer
 * @see cx_tree_link()
 */
__attribute__((__nonnull__))
void cx_tree_unlink(
        void *node,
        ptrdiff_t loc_parent,
        ptrdiff_t loc_children,
        ptrdiff_t loc_last_child,
        ptrdiff_t loc_prev,
        ptrdiff_t loc_next
);

/**
 * Function pointer for a search function.
 *
 * A function of this kind shall check if the specified \p node
 * contains the given \p data or if one of the children might contain
 * the data.
 *
 * The function should use the returned integer to indicate how close the
 * match is, where a negative number means that it does not match at all.
 *
 * For example if a tree stores file path information, a node that is
 * describing a parent directory of a filename that is searched, shall
 * return a positive number to indicate that a child node might contain the
 * searched item. On the other hand, if the node denotes a path that is not a
 * prefix of the searched filename, the function would return -1 to indicate
 * that the search does not need to be continued in that branch.
 *
 * @param node the node that is currently investigated
 * @param data the data that is searched for
 *
 * @return 0 if the node contains the data,
 * positive if one of the children might contain the data,
 * negative if neither the node, nor the children contains the data
 */
typedef int (*cx_tree_search_data_func)(void const *node, void const *data);


/**
 * Function pointer for a search function.
 *
 * A function of this kind shall check if the specified \p node
 * contains the same \p data as \p new_node or if one of the children might
 * contain the data.
 *
 * The function should use the returned integer to indicate how close the
 * match is, where a negative number means that it does not match at all.
 *
 * For example if a tree stores file path information, a node that is
 * describing a parent directory of a filename that is searched, shall
 * return a positive number to indicate that a child node might contain the
 * searched item. On the other hand, if the node denotes a path that is not a
 * prefix of the searched filename, the function would return -1 to indicate
 * that the search does not need to be continued in that branch.
 *
 * @param node the node that is currently investigated
 * @param new_node a new node with the information which is searched
 *
 * @return 0 if \p node contains the same data as \p new_node,
 * positive if one of the children might contain the data,
 * negative if neither the node, nor the children contains the data
 */
typedef int (*cx_tree_search_func)(void const *node, void const *new_node);

/**
 * Searches for data in a tree.
 *
 * When the data cannot be found exactly, the search function might return a
 * closest result which might be a good starting point for adding a new node
 * to the tree (see also #cx_tree_add()).
 *
 * Depending on the tree structure it is not necessarily guaranteed that the
 * "closest" match is uniquely defined. This function will search for a node
 * with the best match according to the \p sfunc (meaning: the return value of
 * \p sfunc which is closest to zero). If that is also ambiguous, an arbitrary
 * node matching the criteria is returned.
 *
 * @param root the root node
 * @param data the data to search for
 * @param sfunc the search function
 * @param result where the result shall be stored
 * @param loc_children offset in the node struct for the children linked list
 * @param loc_next offset in the node struct for the next pointer
 * @return zero if the node was found exactly, positive if a node was found that
 * could contain the node (but doesn't right now), negative if the tree does not
 * contain any node that might be related to the searched data
 */
__attribute__((__nonnull__))
int cx_tree_search_data(
        void const *root,
        void const *data,
        cx_tree_search_data_func sfunc,
        void **result,
        ptrdiff_t loc_children,
        ptrdiff_t loc_next
);

/**
 * Searches for a node in a tree.
 *
 * When no node with the same data can be found, the search function might
 * return a closest result which might be a good starting point for adding the
 * new node to the tree (see also #cx_tree_add()).
 *
 * Depending on the tree structure it is not necessarily guaranteed that the
 * "closest" match is uniquely defined. This function will search for a node
 * with the best match according to the \p sfunc (meaning: the return value of
 * \p sfunc which is closest to zero). If that is also ambiguous, an arbitrary
 * node matching the criteria is returned.
 *
 * @param root the root node
 * @param node the node to search for
 * @param sfunc the search function
 * @param result where the result shall be stored
 * @param loc_children offset in the node struct for the children linked list
 * @param loc_next offset in the node struct for the next pointer
 * @return zero if the node was found exactly, positive if a node was found that
 * could contain the node (but doesn't right now), negative if the tree does not
 * contain any node that might be related to the searched data
 */
__attribute__((__nonnull__))
int cx_tree_search(
        void const *root,
        void const *node,
        cx_tree_search_func sfunc,
        void **result,
        ptrdiff_t loc_children,
        ptrdiff_t loc_next
);

/**
 * Creates a depth-first iterator for a tree with the specified root node.
 *
 * @note A tree iterator needs to maintain a stack of visited nodes, which is
 * allocated using stdlib malloc().
 * When the iterator becomes invalid, this memory is automatically released.
 * However, if you wish to cancel the iteration before the iterator becomes
 * invalid by itself, you MUST call cxTreeIteratorDispose() manually to release
 * the memory.
 *
 * @remark The returned iterator does not support cxIteratorFlagRemoval().
 *
 * @param root the root node
 * @param visit_on_exit set to true, when the iterator shall visit a node again
 * after processing all children
 * @param loc_children offset in the node struct for the children linked list
 * @param loc_next offset in the node struct for the next pointer
 * @return the new tree iterator
 * @see cxTreeIteratorDispose()
 */
__attribute__((__nonnull__))
CxTreeIterator cx_tree_iterator(
        void *root,
        bool visit_on_exit,
        ptrdiff_t loc_children,
        ptrdiff_t loc_next
);

/**
 * Creates a breadth-first iterator for a tree with the specified root node.
 *
 * @note A tree visitor needs to maintain a queue of to be visited nodes, which
 * is allocated using stdlib malloc().
 * When the visitor becomes invalid, this memory is automatically released.
 * However, if you wish to cancel the iteration before the visitor becomes
 * invalid by itself, you MUST call cxTreeVisitorDispose() manually to release
 * the memory.
 *
 * @remark The returned iterator does not support cxIteratorFlagRemoval().
 *
 * @param root the root node
 * @param loc_children offset in the node struct for the children linked list
 * @param loc_next offset in the node struct for the next pointer
 * @return the new tree visitor
 * @see cxTreeVisitorDispose()
 */
__attribute__((__nonnull__))
CxTreeVisitor cx_tree_visitor(
        void *root,
        ptrdiff_t loc_children,
        ptrdiff_t loc_next
);

/**
 * Describes a function that creates a tree node from the specified data.
 * The first argument points to the data the node shall contain and
 * the second argument may be used for additional data (e.g. an allocator).
 * Functions of this type shall either return a new pointer to a newly
 * created node or \c NULL when allocation fails.
 *
 * \note the function may leave the node pointers in the struct uninitialized.
 * The caller is responsible to set them according to the intended use case.
 */
typedef void *(*cx_tree_node_create_func)(void const *, void *);

/**
 * The local search depth for a new subtree when adding multiple elements.
 * The default value is 3.
 * This variable is used by #cx_tree_add_array() and #cx_tree_add_iter() to
 * implement optimized insertion of multiple elements into a tree.
 */
extern unsigned int cx_tree_add_look_around_depth;

/**
 * Adds multiple elements efficiently to a tree.
 *
 * Once an element cannot be added to the tree, this function returns, leaving
 * the iterator in a valid state pointing to the element that could not be
 * added.
 * Also, the pointer of the created node will be stored to \p failed.
 * The integer returned by this function denotes the number of elements obtained
 * from the \p iter that have been successfully processed.
 * When all elements could be processed, a \c NULL pointer will be written to
 * \p failed.
 *
 * The advantage of this function compared to multiple invocations of
 * #cx_tree_add() is that the search for the insert locations is not always
 * started from the root node.
 * Instead, the function checks #cx_tree_add_look_around_depth many parent nodes
 * of the current insert location before starting from the root node again.
 * When the variable is set to zero, only the last found location is checked
 * again.
 *
 * Refer to the documentation of #cx_tree_add() for more details.
 *
 * @param iter a pointer to an arbitrary iterator
 * @param sfunc a search function
 * @param cfunc a node creation function
 * @param cdata optional additional data
 * @param root the root node of the tree
 * @param failed location where the pointer to a failed node shall be stored
 * @param loc_parent offset in the node struct for the parent pointer
 * @param loc_children offset in the node struct for the children linked list
 * @param loc_last_child optional offset in the node struct for the pointer to
 * the last child in the linked list (negative if there is no such pointer)
 * @param loc_prev offset in the node struct for the prev pointer
 * @param loc_next offset in the node struct for the next pointer
 * @return the number of nodes created and added
 * @see cx_tree_add()
 */
__attribute__((__nonnull__(1, 2, 3, 5, 6)))
size_t cx_tree_add_iter(
        struct cx_iterator_base_s *iter,
        cx_tree_search_func sfunc,
        cx_tree_node_create_func cfunc,
        void *cdata,
        void **failed,
        void *root,
        ptrdiff_t loc_parent,
        ptrdiff_t loc_children,
        ptrdiff_t loc_last_child,
        ptrdiff_t loc_prev,
        ptrdiff_t loc_next
);

/**
 * Adds multiple elements efficiently to a tree.
 *
 * Once an element cannot be added to the tree, this function returns, storing
 * the pointer of the created node to \p failed.
 * The integer returned by this function denotes the number of elements from
 * the \p src array that have been successfully processed.
 * When all elements could be processed, a \c NULL pointer will be written to
 * \p failed.
 *
 * The advantage of this function compared to multiple invocations of
 * #cx_tree_add() is that the search for the insert locations is not always
 * started from the root node.
 * Instead, the function checks #cx_tree_add_look_around_depth many parent nodes
 * of the current insert location before starting from the root node again.
 * When the variable is set to zero, only the last found location is checked
 * again.
 *
 * Refer to the documentation of #cx_tree_add() for more details.
 *
 * @param src a pointer to the source data array
 * @param num the number of elements in the \p src array
 * @param elem_size the size of each element in the \p src array
 * @param sfunc a search function
 * @param cfunc a node creation function
 * @param cdata optional additional data
 * @param failed location where the pointer to a failed node shall be stored
 * @param root the root node of the tree
 * @param loc_parent offset in the node struct for the parent pointer
 * @param loc_children offset in the node struct for the children linked list
 * @param loc_last_child optional offset in the node struct for the pointer to
 * the last child in the linked list (negative if there is no such pointer)
 * @param loc_prev offset in the node struct for the prev pointer
 * @param loc_next offset in the node struct for the next pointer
 * @return the number of array elements successfully processed
 * @see cx_tree_add()
 */
__attribute__((__nonnull__(1, 4, 5, 7, 8)))
size_t cx_tree_add_array(
        void const *src,
        size_t num,
        size_t elem_size,
        cx_tree_search_func sfunc,
        cx_tree_node_create_func cfunc,
        void *cdata,
        void **failed,
        void *root,
        ptrdiff_t loc_parent,
        ptrdiff_t loc_children,
        ptrdiff_t loc_last_child,
        ptrdiff_t loc_prev,
        ptrdiff_t loc_next
);

/**
 * Adds data to a tree.
 *
 * An adequate location where to add the new tree node is searched with the
 * specified \p sfunc.
 *
 * When a location is found, the \p cfunc will be invoked with \p cdata.
 *
 * The node returned by \p cfunc will be linked into the tree.
 * When \p sfunc returned a positive integer, the new node will be linked as a
 * child. The other children (now siblings of the new node) are then checked
 * with \p sfunc, whether they could be children of the new node and re-linked
 * accordingly.
 *
 * When \p sfunc returned zero and the found node has a parent, the new
 * node will be added as sibling - otherwise, the new node will be added
 * as a child.
 *
 * When \p sfunc returned a negative value, the new node will not be added to
 * the tree and this function returns a non-zero value.
 * The caller should check if \p cnode contains a node pointer and deal with the
 * node that could not be added.
 *
 * This function also returns a non-zero value when \p cfunc tries to allocate
 * a new node but fails to do so. In that case, the pointer stored to \p cnode
 * will be \c NULL.
 *
 * Multiple elements can be added more efficiently with
 * #cx_tree_add_array() or #cx_tree_add_iter().
 *
 * @param src a pointer to the data
 * @param sfunc a search function
 * @param cfunc a node creation function
 * @param cdata optional additional data
 * @param cnode the location where a pointer to the new node is stored
 * @param root the root node of the tree
 * @param loc_parent offset in the node struct for the parent pointer
 * @param loc_children offset in the node struct for the children linked list
 * @param loc_last_child optional offset in the node struct for the pointer to
 * the last child in the linked list (negative if there is no such pointer)
 * @param loc_prev offset in the node struct for the prev pointer
 * @param loc_next offset in the node struct for the next pointer
 * @return zero when a new node was created and added to the tree,
 * non-zero otherwise
 */
__attribute__((__nonnull__(1, 2, 3, 5, 6)))
int cx_tree_add(
        void const *src,
        cx_tree_search_func sfunc,
        cx_tree_node_create_func cfunc,
        void *cdata,
        void **cnode,
        void *root,
        ptrdiff_t loc_parent,
        ptrdiff_t loc_children,
        ptrdiff_t loc_last_child,
        ptrdiff_t loc_prev,
        ptrdiff_t loc_next
);

#ifdef __cplusplus
} // extern "C"
#endif

#endif //UCX_TREE_H

mercurial