Mon, 16 Sep 2024 19:52:17 +0200
add optimized implementation of insert_sorted for array lists
relates to #416
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2024 Mike Becker, Olaf Wintermann All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "cx/tree.h" #include "cx/array_list.h" #include <assert.h> #define CX_TREE_PTR(cur, off) (*(void**)(((char*)(cur))+(off))) #define tree_parent(node) CX_TREE_PTR(node, loc_parent) #define tree_children(node) CX_TREE_PTR(node, loc_children) #define tree_last_child(node) CX_TREE_PTR(node, loc_last_child) #define tree_prev(node) CX_TREE_PTR(node, loc_prev) #define tree_next(node) CX_TREE_PTR(node, loc_next) #define cx_tree_ptr_locations \ loc_parent, loc_children, loc_last_child, loc_prev, loc_next static void cx_tree_zero_pointers( void *node, ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ) { tree_parent(node) = NULL; tree_prev(node) = NULL; tree_next(node) = NULL; tree_children(node) = NULL; if (loc_last_child >= 0) { tree_last_child(node) = NULL; } } void cx_tree_link( void *restrict parent, void *restrict node, ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ) { void *current_parent = tree_parent(node); if (current_parent == parent) return; if (current_parent != NULL) { cx_tree_unlink(node, cx_tree_ptr_locations); } if (tree_children(parent) == NULL) { tree_children(parent) = node; if (loc_last_child >= 0) { tree_last_child(parent) = node; } } else { if (loc_last_child >= 0) { void *child = tree_last_child(parent); tree_prev(node) = child; tree_next(child) = node; tree_last_child(parent) = node; } else { void *child = tree_children(parent); void *next; while ((next = tree_next(child)) != NULL) { child = next; } tree_prev(node) = child; tree_next(child) = node; } } tree_parent(node) = parent; } void cx_tree_unlink( void *node, ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ) { if (tree_parent(node) == NULL) return; void *left = tree_prev(node); void *right = tree_next(node); void *parent = tree_parent(node); assert(left == NULL || tree_children(parent) != node); assert(right == NULL || loc_last_child < 0 || tree_last_child(parent) != node); if (left == NULL) { tree_children(parent) = right; } else { tree_next(left) = right; } if (right == NULL) { if (loc_last_child >= 0) { tree_last_child(parent) = left; } } else { tree_prev(right) = left; } tree_parent(node) = NULL; tree_prev(node) = NULL; tree_next(node) = NULL; } int cx_tree_search( void const *root, void const *node, cx_tree_search_func sfunc, void **result, ptrdiff_t loc_children, ptrdiff_t loc_next ) { int ret; *result = NULL; // shortcut: compare root before doing anything else ret = sfunc(root, node); if (ret < 0) { return ret; } else if (ret == 0 || tree_children(root) == NULL) { *result = (void*)root; return ret; } // create a working stack CX_ARRAY_DECLARE(void const*, work); cx_array_initialize(work, 32); // add the children of root to the working stack { void *c = tree_children(root); while (c != NULL) { cx_array_simple_add(work, c); c = tree_next(c); } } // remember a candidate for adding the data // also remember the exact return code from sfunc void *candidate = (void *) root; int ret_candidate = ret; // process the working stack while (work_size > 0) { // pop element void const *elem = work[--work_size]; // apply the search function ret = sfunc(elem, node); if (ret == 0) { // if found, exit the search *result = (void *) elem; work_size = 0; break; } else if (ret > 0) { // if children might contain the data, add them to the stack void *c = tree_children(elem); while (c != NULL) { cx_array_simple_add(work, c); c = tree_next(c); } // remember this node in case no child is suitable if (ret < ret_candidate) { candidate = (void *) elem; ret_candidate = ret; } } } // not found, but was there a candidate? if (ret != 0 && candidate != NULL) { ret = ret_candidate; *result = candidate; } // free the working queue and return free(work); return ret; } int cx_tree_search_data( void const *root, void const *data, cx_tree_search_data_func sfunc, void **result, ptrdiff_t loc_children, ptrdiff_t loc_next ) { // it is basically the same implementation return cx_tree_search( root, data, (cx_tree_search_func) sfunc, result, loc_children, loc_next); } static bool cx_tree_iter_valid(void const *it) { struct cx_tree_iterator_s const *iter = it; return iter->node != NULL; } static void *cx_tree_iter_current(void const *it) { struct cx_tree_iterator_s const *iter = it; return iter->node; } static void cx_tree_iter_next(void *it) { struct cx_tree_iterator_s *iter = it; ptrdiff_t const loc_next = iter->loc_next; ptrdiff_t const loc_children = iter->loc_children; void *children; // check if we are currently exiting or entering nodes if (iter->exiting) { children = NULL; // skipping on exit is pointless, just clear the flag iter->skip = false; } else { if (iter->skip) { // skip flag is set, pretend that there are no children iter->skip = false; children = NULL; } else { // try to enter the children (if any) children = tree_children(iter->node); } } if (children == NULL) { // search for the next node void *next; cx_tree_iter_search_next: // check if there is a sibling if (iter->exiting) { next = iter->node_next; } else { next = tree_next(iter->node); iter->node_next = next; } if (next == NULL) { // no sibling, we are done with this node and exit if (iter->visit_on_exit && !iter->exiting) { // iter is supposed to visit the node again iter->exiting = true; } else { iter->exiting = false; if (iter->depth == 1) { // there is no parent - we have iterated the entire tree // invalidate the iterator and free the node stack iter->node = iter->node_next = NULL; iter->stack_capacity = iter->depth = 0; free(iter->stack); iter->stack = NULL; } else { // the parent node can be obtained from the top of stack // this way we can avoid the loc_parent in the iterator iter->depth--; iter->node = iter->stack[iter->depth - 1]; // retry with the parent node to find a sibling goto cx_tree_iter_search_next; } } } else { if (iter->visit_on_exit && !iter->exiting) { // iter is supposed to visit the node again iter->exiting = true; } else { iter->exiting = false; // move to the sibling iter->counter++; iter->node = next; // new top of stack is the sibling iter->stack[iter->depth - 1] = next; } } } else { // node has children, push the first child onto the stack and enter it cx_array_simple_add(iter->stack, children); iter->node = children; iter->counter++; } } CxTreeIterator cx_tree_iterator( void *root, bool visit_on_exit, ptrdiff_t loc_children, ptrdiff_t loc_next ) { CxTreeIterator iter; iter.loc_children = loc_children; iter.loc_next = loc_next; iter.visit_on_exit = visit_on_exit; // allocate stack iter.stack_capacity = 16; iter.stack = malloc(sizeof(void *) * 16); iter.depth = 0; // visit the root node iter.node = root; iter.node_next = NULL; iter.counter = 1; iter.depth = 1; iter.stack[0] = root; iter.exiting = false; iter.skip = false; // assign base iterator functions iter.base.mutating = false; iter.base.remove = false; iter.base.current_impl = NULL; iter.base.valid = cx_tree_iter_valid; iter.base.next = cx_tree_iter_next; iter.base.current = cx_tree_iter_current; return iter; } static bool cx_tree_visitor_valid(void const *it) { struct cx_tree_visitor_s const *iter = it; return iter->node != NULL; } static void *cx_tree_visitor_current(void const *it) { struct cx_tree_visitor_s const *iter = it; return iter->node; } __attribute__((__nonnull__)) static void cx_tree_visitor_enqueue_siblings( struct cx_tree_visitor_s *iter, void *node, ptrdiff_t loc_next) { node = tree_next(node); while (node != NULL) { struct cx_tree_visitor_queue_s *q; q = malloc(sizeof(struct cx_tree_visitor_queue_s)); q->depth = iter->queue_last->depth; q->node = node; iter->queue_last->next = q; iter->queue_last = q; node = tree_next(node); } iter->queue_last->next = NULL; } static void cx_tree_visitor_next(void *it) { struct cx_tree_visitor_s *iter = it; ptrdiff_t const loc_next = iter->loc_next; ptrdiff_t const loc_children = iter->loc_children; // add the children of the current node to the queue // unless the skip flag is set void *children; if (iter->skip) { iter->skip = false; children = NULL; } else { children = tree_children(iter->node); } if (children != NULL) { struct cx_tree_visitor_queue_s *q; q = malloc(sizeof(struct cx_tree_visitor_queue_s)); q->depth = iter->depth + 1; q->node = children; if (iter->queue_last == NULL) { assert(iter->queue_next == NULL); iter->queue_next = q; } else { iter->queue_last->next = q; } iter->queue_last = q; cx_tree_visitor_enqueue_siblings(iter, children, loc_next); } // check if there is a next node if (iter->queue_next == NULL) { iter->node = NULL; return; } // dequeue the next node iter->node = iter->queue_next->node; iter->depth = iter->queue_next->depth; { struct cx_tree_visitor_queue_s *q = iter->queue_next; iter->queue_next = q->next; if (iter->queue_next == NULL) { assert(iter->queue_last == q); iter->queue_last = NULL; } free(q); } // increment the node counter iter->counter++; } CxTreeVisitor cx_tree_visitor( void *root, ptrdiff_t loc_children, ptrdiff_t loc_next ) { CxTreeVisitor iter; iter.loc_children = loc_children; iter.loc_next = loc_next; // allocate stack iter.depth = 0; // visit the root node iter.node = root; iter.counter = 1; iter.depth = 1; iter.skip = false; iter.queue_next = NULL; iter.queue_last = NULL; // assign base iterator functions iter.base.mutating = false; iter.base.remove = false; iter.base.current_impl = NULL; iter.base.valid = cx_tree_visitor_valid; iter.base.next = cx_tree_visitor_next; iter.base.current = cx_tree_visitor_current; return iter; } static void cx_tree_add_link_duplicate( void *original, void *duplicate, ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ) { void *shared_parent = tree_parent(original); if (shared_parent == NULL) { cx_tree_link(original, duplicate, cx_tree_ptr_locations); } else { cx_tree_link(shared_parent, duplicate, cx_tree_ptr_locations); } } static void cx_tree_add_link_new( void *parent, void *node, cx_tree_search_func sfunc, ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ) { // check the current children one by one, // if they could be children of the new node void *child = tree_children(parent); while (child != NULL) { void *next = tree_next(child); if (sfunc(node, child) > 0) { // the sibling could be a child -> re-link cx_tree_link(node, child, cx_tree_ptr_locations); } child = next; } // add new node as new child cx_tree_link(parent, node, cx_tree_ptr_locations); } int cx_tree_add( void const *src, cx_tree_search_func sfunc, cx_tree_node_create_func cfunc, void *cdata, void **cnode, void *root, ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ) { *cnode = cfunc(src, cdata); if (*cnode == NULL) return 1; cx_tree_zero_pointers(*cnode, cx_tree_ptr_locations); void *match = NULL; int result = cx_tree_search( root, *cnode, sfunc, &match, loc_children, loc_next ); if (result < 0) { // node does not fit into the tree - return non-zero value return 1; } else if (result == 0) { // data already found in the tree, link duplicate cx_tree_add_link_duplicate(match, *cnode, cx_tree_ptr_locations); } else { // closest match found, add new node cx_tree_add_link_new(match, *cnode, sfunc, cx_tree_ptr_locations); } return 0; } unsigned int cx_tree_add_look_around_depth = 3; size_t cx_tree_add_iter( struct cx_iterator_base_s *iter, cx_tree_search_func sfunc, cx_tree_node_create_func cfunc, void *cdata, void **failed, void *root, ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ) { // erase the failed pointer *failed = NULL; // iter not valid? cancel... if (!iter->valid(iter)) return 0; size_t processed = 0; void *current_node = root; void const *elem; for (void **eptr; iter->valid(iter) && (eptr = iter->current(iter)) != NULL; iter->next(iter)) { elem = *eptr; // create the new node void *new_node = cfunc(elem, cdata); if (new_node == NULL) return processed; cx_tree_zero_pointers(new_node, cx_tree_ptr_locations); // start searching from current node void *match; int result; unsigned int look_around_retries = cx_tree_add_look_around_depth; cx_tree_add_look_around_retry: result = cx_tree_search( current_node, new_node, sfunc, &match, loc_children, loc_next ); if (result < 0) { // traverse upwards and try to find better parents void *parent = tree_parent(current_node); if (parent != NULL) { if (look_around_retries > 0) { look_around_retries--; current_node = parent; } else { // look around retries exhausted, start from the root current_node = root; } goto cx_tree_add_look_around_retry; } else { // no parents. so we failed *failed = new_node; return processed; } } else if (result == 0) { // data already found in the tree, link duplicate cx_tree_add_link_duplicate(match, new_node, cx_tree_ptr_locations); // but stick with the original match, in case we needed a new root current_node = match; } else { // closest match found, add new node as child cx_tree_add_link_new(match, new_node, sfunc, cx_tree_ptr_locations); current_node = match; } processed++; } return processed; } size_t cx_tree_add_array( void const *src, size_t num, size_t elem_size, cx_tree_search_func sfunc, cx_tree_node_create_func cfunc, void *cdata, void **failed, void *root, ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ) { // erase failed pointer *failed = NULL; // super special case: zero elements if (num == 0) { return 0; } // special case: one element does not need an iterator if (num == 1) { void *node; if (0 == cx_tree_add( src, sfunc, cfunc, cdata, &node, root, loc_parent, loc_children, loc_last_child, loc_prev, loc_next)) { return 1; } else { *failed = node; return 0; } } // otherwise, create iterator and hand over to other function CxIterator iter = cxIterator(src, elem_size, num); return cx_tree_add_iter(cxIteratorRef(iter), sfunc, cfunc, cdata, failed, root, loc_parent, loc_children, loc_last_child, loc_prev, loc_next); }