src/tree.c

Mon, 09 Sep 2024 21:34:39 +0200

author
Mike Becker <universe@uap-core.de>
date
Mon, 09 Sep 2024 21:34:39 +0200
changeset 879
9c24a4eb5ac9
parent 871
e29c1f96646d
child 890
54565fd74e74
permissions
-rw-r--r--

implement optimized sorted insert for linked lists - resolves #415

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2024 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "cx/tree.h"

#include "cx/array_list.h"

#include <assert.h>

#define CX_TREE_PTR(cur, off) (*(void**)(((char*)(cur))+(off)))
#define tree_parent(node) CX_TREE_PTR(node, loc_parent)
#define tree_children(node) CX_TREE_PTR(node, loc_children)
#define tree_last_child(node) CX_TREE_PTR(node, loc_last_child)
#define tree_prev(node) CX_TREE_PTR(node, loc_prev)
#define tree_next(node) CX_TREE_PTR(node, loc_next)

#define cx_tree_ptr_locations \
    loc_parent, loc_children, loc_last_child, loc_prev, loc_next

static void cx_tree_zero_pointers(
        void *node,
        ptrdiff_t loc_parent,
        ptrdiff_t loc_children,
        ptrdiff_t loc_last_child,
        ptrdiff_t loc_prev,
        ptrdiff_t loc_next
) {
    tree_parent(node) = NULL;
    tree_prev(node) = NULL;
    tree_next(node) = NULL;
    tree_children(node) = NULL;
    if (loc_last_child >= 0) {
        tree_last_child(node) = NULL;
    }
}

void cx_tree_link(
        void *restrict parent,
        void *restrict node,
        ptrdiff_t loc_parent,
        ptrdiff_t loc_children,
        ptrdiff_t loc_last_child,
        ptrdiff_t loc_prev,
        ptrdiff_t loc_next
) {
    void *current_parent = tree_parent(node);
    if (current_parent == parent) return;
    if (current_parent != NULL) {
        cx_tree_unlink(node, cx_tree_ptr_locations);
    }

    if (tree_children(parent) == NULL) {
        tree_children(parent) = node;
        if (loc_last_child >= 0) {
            tree_last_child(parent) = node;
        }
    } else {
        if (loc_last_child >= 0) {
            void *child = tree_last_child(parent);
            tree_prev(node) = child;
            tree_next(child) = node;
            tree_last_child(parent) = node;
        } else {
            void *child = tree_children(parent);
            void *next;
            while ((next = tree_next(child)) != NULL) {
                child = next;
            }
            tree_prev(node) = child;
            tree_next(child) = node;
        }
    }
    tree_parent(node) = parent;
}

void cx_tree_unlink(
        void *node,
        ptrdiff_t loc_parent,
        ptrdiff_t loc_children,
        ptrdiff_t loc_last_child,
        ptrdiff_t loc_prev,
        ptrdiff_t loc_next
) {
    if (tree_parent(node) == NULL) return;

    void *left = tree_prev(node);
    void *right = tree_next(node);
    void *parent = tree_parent(node);
    assert(left == NULL || tree_children(parent) != node);
    assert(right == NULL || loc_last_child < 0 ||
           tree_last_child(parent) != node);

    if (left == NULL) {
        tree_children(parent) = right;
    } else {
        tree_next(left) = right;
    }
    if (right == NULL) {
        if (loc_last_child >= 0) {
            tree_last_child(parent) = left;
        }
    } else {
        tree_prev(right) = left;
    }

    tree_parent(node) = NULL;
    tree_prev(node) = NULL;
    tree_next(node) = NULL;
}

int cx_tree_search(
        void const *root,
        void const *node,
        cx_tree_search_func sfunc,
        void **result,
        ptrdiff_t loc_children,
        ptrdiff_t loc_next
) {
    int ret;
    *result = NULL;

    // shortcut: compare root before doing anything else
    ret = sfunc(root, node);
    if (ret < 0) {
        return ret;
    } else if (ret == 0 || tree_children(root) == NULL) {
        *result = (void*)root;
        return ret;
    }

    // create a working stack
    CX_ARRAY_DECLARE(void const*, work);
    cx_array_initialize(work, 32);

    // add the children of root to the working stack
    {
        void *c = tree_children(root);
        while (c != NULL) {
            cx_array_simple_add(work, c);
            c = tree_next(c);
        }
    }

    // remember a candidate for adding the data
    // also remember the exact return code from sfunc
    void *candidate = (void *) root;
    int ret_candidate = ret;

    // process the working stack
    while (work_size > 0) {
        // pop element
        void const *elem = work[--work_size];

        // apply the search function
        ret = sfunc(elem, node);

        if (ret == 0) {
            // if found, exit the search
            *result = (void *) elem;
            work_size = 0;
            break;
        } else if (ret > 0) {
            // if children might contain the data, add them to the stack
            void *c = tree_children(elem);
            while (c != NULL) {
                cx_array_simple_add(work, c);
                c = tree_next(c);
            }

            // remember this node in case no child is suitable
            if (ret < ret_candidate) {
                candidate = (void *) elem;
                ret_candidate = ret;
            }
        }
    }

    // not found, but was there a candidate?
    if (ret != 0 && candidate != NULL) {
        ret = ret_candidate;
        *result = candidate;
    }

    // free the working queue and return
    free(work);
    return ret;
}

int cx_tree_search_data(
        void const *root,
        void const *data,
        cx_tree_search_data_func sfunc,
        void **result,
        ptrdiff_t loc_children,
        ptrdiff_t loc_next
) {
    // it is basically the same implementation
    return cx_tree_search(
            root, data,
            (cx_tree_search_func) sfunc,
            result,
            loc_children, loc_next);
}

static bool cx_tree_iter_valid(void const *it) {
    struct cx_tree_iterator_s const *iter = it;
    return iter->node != NULL;
}

static void *cx_tree_iter_current(void const *it) {
    struct cx_tree_iterator_s const *iter = it;
    return iter->node;
}

static void cx_tree_iter_next(void *it) {
    struct cx_tree_iterator_s *iter = it;
    ptrdiff_t const loc_next = iter->loc_next;
    ptrdiff_t const loc_children = iter->loc_children;

    void *children;

    // check if we are currently exiting or entering nodes
    if (iter->exiting) {
        children = NULL;
        // skipping on exit is pointless, just clear the flag
        iter->skip = false;
    } else {
        if (iter->skip) {
            // skip flag is set, pretend that there are no children
            iter->skip = false;
            children = NULL;
        } else {
            // try to enter the children (if any)
            children = tree_children(iter->node);
        }
    }

    if (children == NULL) {
        // search for the next node
        void *next;
        cx_tree_iter_search_next:
        // check if there is a sibling
        if (iter->exiting) {
            next = iter->node_next;
        } else {
            next = tree_next(iter->node);
            iter->node_next = next;
        }
        if (next == NULL) {
            // no sibling, we are done with this node and exit
            if (iter->visit_on_exit && !iter->exiting) {
                // iter is supposed to visit the node again
                iter->exiting = true;
            } else {
                iter->exiting = false;
                if (iter->depth == 1) {
                    // there is no parent - we have iterated the entire tree
                    // invalidate the iterator and free the node stack
                    iter->node = iter->node_next = NULL;
                    iter->stack_capacity = iter->depth = 0;
                    free(iter->stack);
                    iter->stack = NULL;
                } else {
                    // the parent node can be obtained from the top of stack
                    // this way we can avoid the loc_parent in the iterator
                    iter->depth--;
                    iter->node = iter->stack[iter->depth - 1];
                    // retry with the parent node to find a sibling
                    goto cx_tree_iter_search_next;
                }
            }
        } else {
            if (iter->visit_on_exit && !iter->exiting) {
                // iter is supposed to visit the node again
                iter->exiting = true;
            } else {
                iter->exiting = false;
                // move to the sibling
                iter->counter++;
                iter->node = next;
                // new top of stack is the sibling
                iter->stack[iter->depth - 1] = next;
            }
        }
    } else {
        // node has children, push the first child onto the stack and enter it
        cx_array_simple_add(iter->stack, children);
        iter->node = children;
        iter->counter++;
    }
}

CxTreeIterator cx_tree_iterator(
        void *root,
        bool visit_on_exit,
        ptrdiff_t loc_children,
        ptrdiff_t loc_next
) {
    CxTreeIterator iter;
    iter.loc_children = loc_children;
    iter.loc_next = loc_next;
    iter.visit_on_exit = visit_on_exit;

    // allocate stack
    iter.stack_capacity = 16;
    iter.stack = malloc(sizeof(void *) * 16);
    iter.depth = 0;

    // visit the root node
    iter.node = root;
    iter.node_next = NULL;
    iter.counter = 1;
    iter.depth = 1;
    iter.stack[0] = root;
    iter.exiting = false;
    iter.skip = false;

    // assign base iterator functions
    iter.base.mutating = false;
    iter.base.remove = false;
    iter.base.current_impl = NULL;
    iter.base.valid = cx_tree_iter_valid;
    iter.base.next = cx_tree_iter_next;
    iter.base.current = cx_tree_iter_current;

    return iter;
}

static bool cx_tree_visitor_valid(void const *it) {
    struct cx_tree_visitor_s const *iter = it;
    return iter->node != NULL;
}

static void *cx_tree_visitor_current(void const *it) {
    struct cx_tree_visitor_s const *iter = it;
    return iter->node;
}

__attribute__((__nonnull__))
static void cx_tree_visitor_enqueue_siblings(
        struct cx_tree_visitor_s *iter, void *node, ptrdiff_t loc_next) {
    node = tree_next(node);
    while (node != NULL) {
        struct cx_tree_visitor_queue_s *q;
        q = malloc(sizeof(struct cx_tree_visitor_queue_s));
        q->depth = iter->queue_last->depth;
        q->node = node;
        iter->queue_last->next = q;
        iter->queue_last = q;
        node = tree_next(node);
    }
    iter->queue_last->next = NULL;
}

static void cx_tree_visitor_next(void *it) {
    struct cx_tree_visitor_s *iter = it;
    ptrdiff_t const loc_next = iter->loc_next;
    ptrdiff_t const loc_children = iter->loc_children;

    // add the children of the current node to the queue
    // unless the skip flag is set
    void *children;
    if (iter->skip) {
        iter->skip = false;
        children = NULL;
    } else {
        children = tree_children(iter->node);
    }
    if (children != NULL) {
        struct cx_tree_visitor_queue_s *q;
        q = malloc(sizeof(struct cx_tree_visitor_queue_s));
        q->depth = iter->depth + 1;
        q->node = children;
        if (iter->queue_last == NULL) {
            assert(iter->queue_next == NULL);
            iter->queue_next = q;
        } else {
            iter->queue_last->next = q;
        }
        iter->queue_last = q;
        cx_tree_visitor_enqueue_siblings(iter, children, loc_next);
    }

    // check if there is a next node
    if (iter->queue_next == NULL) {
        iter->node = NULL;
        return;
    }

    // dequeue the next node
    iter->node = iter->queue_next->node;
    iter->depth = iter->queue_next->depth;
    {
        struct cx_tree_visitor_queue_s *q = iter->queue_next;
        iter->queue_next = q->next;
        if (iter->queue_next == NULL) {
            assert(iter->queue_last == q);
            iter->queue_last = NULL;
        }
        free(q);
    }

    // increment the node counter
    iter->counter++;
}

CxTreeVisitor cx_tree_visitor(
        void *root,
        ptrdiff_t loc_children,
        ptrdiff_t loc_next
) {
    CxTreeVisitor iter;
    iter.loc_children = loc_children;
    iter.loc_next = loc_next;

    // allocate stack
    iter.depth = 0;

    // visit the root node
    iter.node = root;
    iter.counter = 1;
    iter.depth = 1;
    iter.skip = false;
    iter.queue_next = NULL;
    iter.queue_last = NULL;

    // assign base iterator functions
    iter.base.mutating = false;
    iter.base.remove = false;
    iter.base.current_impl = NULL;
    iter.base.valid = cx_tree_visitor_valid;
    iter.base.next = cx_tree_visitor_next;
    iter.base.current = cx_tree_visitor_current;

    return iter;
}

static void cx_tree_add_link_duplicate(
        void *original, void *duplicate,
        ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child,
        ptrdiff_t loc_prev, ptrdiff_t loc_next
) {
    void *shared_parent = tree_parent(original);
    if (shared_parent == NULL) {
        cx_tree_link(original, duplicate, cx_tree_ptr_locations);
    } else {
        cx_tree_link(shared_parent, duplicate, cx_tree_ptr_locations);
    }
}

static void cx_tree_add_link_new(
        void *parent, void *node, cx_tree_search_func sfunc,
        ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child,
        ptrdiff_t loc_prev, ptrdiff_t loc_next
) {
    // check the current children one by one,
    // if they could be children of the new node
    void *child = tree_children(parent);
    while (child != NULL) {
        void *next = tree_next(child);

        if (sfunc(node, child) > 0) {
            // the sibling could be a child -> re-link
            cx_tree_link(node, child, cx_tree_ptr_locations);
        }

        child = next;
    }

    // add new node as new child
    cx_tree_link(parent, node, cx_tree_ptr_locations);
}

int cx_tree_add(
        void const *src,
        cx_tree_search_func sfunc,
        cx_tree_node_create_func cfunc,
        void *cdata,
        void **cnode,
        void *root,
        ptrdiff_t loc_parent,
        ptrdiff_t loc_children,
        ptrdiff_t loc_last_child,
        ptrdiff_t loc_prev,
        ptrdiff_t loc_next
) {
    *cnode = cfunc(src, cdata);
    if (*cnode == NULL) return 1;
    cx_tree_zero_pointers(*cnode, cx_tree_ptr_locations);

    void *match = NULL;
    int result = cx_tree_search(
            root,
            *cnode,
            sfunc,
            &match,
            loc_children,
            loc_next
    );

    if (result < 0) {
        // node does not fit into the tree - return non-zero value
        return 1;
    } else if (result == 0) {
        // data already found in the tree, link duplicate
        cx_tree_add_link_duplicate(match, *cnode, cx_tree_ptr_locations);
    } else {
        // closest match found, add new node
        cx_tree_add_link_new(match, *cnode, sfunc, cx_tree_ptr_locations);
    }

    return 0;
}

unsigned int cx_tree_add_look_around_depth = 3;

size_t cx_tree_add_iter(
        struct cx_iterator_base_s *iter,
        cx_tree_search_func sfunc,
        cx_tree_node_create_func cfunc,
        void *cdata,
        void **failed,
        void *root,
        ptrdiff_t loc_parent,
        ptrdiff_t loc_children,
        ptrdiff_t loc_last_child,
        ptrdiff_t loc_prev,
        ptrdiff_t loc_next
) {
    // erase the failed pointer
    *failed = NULL;

    // iter not valid? cancel...
    if (!iter->valid(iter)) return 0;

    size_t processed = 0;
    void *current_node = root;
    void const *elem;

    for (void **eptr;
         iter->valid(iter) && (eptr = iter->current(iter)) != NULL;
         iter->next(iter)) {
        elem = *eptr;

        // create the new node
        void *new_node = cfunc(elem, cdata);
        if (new_node == NULL) return processed;
        cx_tree_zero_pointers(new_node, cx_tree_ptr_locations);

        // start searching from current node
        void *match;
        int result;
        unsigned int look_around_retries = cx_tree_add_look_around_depth;
        cx_tree_add_look_around_retry:
        result = cx_tree_search(
                current_node,
                new_node,
                sfunc,
                &match,
                loc_children,
                loc_next
        );

        if (result < 0) {
            // traverse upwards and try to find better parents
            void *parent = tree_parent(current_node);
            if (parent != NULL) {
                if (look_around_retries > 0) {
                    look_around_retries--;
                    current_node = parent;
                } else {
                    // look around retries exhausted, start from the root
                    current_node = root;
                }
                goto cx_tree_add_look_around_retry;
            } else {
                // no parents. so we failed
                *failed = new_node;
                return processed;
            }
        } else if (result == 0) {
            // data already found in the tree, link duplicate
            cx_tree_add_link_duplicate(match, new_node, cx_tree_ptr_locations);
            // but stick with the original match, in case we needed a new root
            current_node = match;
        } else {
            // closest match found, add new node as child
            cx_tree_add_link_new(match, new_node, sfunc,
                                 cx_tree_ptr_locations);
            current_node = match;
        }

        processed++;
    }
    return processed;
}

size_t cx_tree_add_array(
        void const *src,
        size_t num,
        size_t elem_size,
        cx_tree_search_func sfunc,
        cx_tree_node_create_func cfunc,
        void *cdata,
        void **failed,
        void *root,
        ptrdiff_t loc_parent,
        ptrdiff_t loc_children,
        ptrdiff_t loc_last_child,
        ptrdiff_t loc_prev,
        ptrdiff_t loc_next
) {
    // erase failed pointer
    *failed = NULL;

    // super special case: zero elements
    if (num == 0) {
        return 0;
    }

    // special case: one element does not need an iterator
    if (num == 1) {
        void *node;
        if (0 == cx_tree_add(
                src, sfunc, cfunc, cdata, &node, root,
                loc_parent, loc_children, loc_last_child,
                loc_prev, loc_next)) {
            return 1;
        } else {
            *failed = node;
            return 0;
        }
    }

    // otherwise, create iterator and hand over to other function
    CxIterator iter = cxIterator(src, elem_size, num);
    return cx_tree_add_iter(cxIteratorRef(iter), sfunc,
                            cfunc, cdata, failed, root,
                            loc_parent, loc_children, loc_last_child,
                            loc_prev, loc_next);
}

mercurial