add cx_tree_search() - relates to #165

11 months ago

author
Mike Becker <universe@uap-core.de>
date
Thu, 15 Feb 2024 21:54:43 +0100 (11 months ago)
changeset 826
21840975d541
parent 825
3f324ea53152
child 827
13b40a598d16

add cx_tree_search() - relates to #165

src/cx/tree.h file | annotate | diff | comparison | revisions
src/tree.c file | annotate | diff | comparison | revisions
tests/test_tree.c file | annotate | diff | comparison | revisions
--- a/src/cx/tree.h	Wed Feb 14 22:32:13 2024 +0100
+++ b/src/cx/tree.h	Thu Feb 15 21:54:43 2024 +0100
@@ -47,6 +47,8 @@
  * Links a node to a (new) parent.
  *
  * If the node has already a parent, it is unlinked, first.
+ * If the parent has children already, the node is prepended to the list
+ * of all currently existing children.
  *
  * @param parent the parent node
  * @param node the node that shall be linked
@@ -94,22 +96,59 @@
  * contains the given \p data or if one of the children might contain
  * the data.
  *
+ * The function should use the returned integer to indicate how close the
+ * match is, where a negative number means that it does not match at all.
+ *
  * For example if a tree stores file path information, a node that is
  * describing a parent directory of a filename that is searched, shall
- * return 1 to indicate that a child node might contain the searched item.
- * On the other hand, if the node denotes a path that is not a prefix of
- * the searched filename, the function would return -1 to indicate that
- * the search does not need to be continued in that branch.
+ * return a positive number to indicate that a child node might contain the
+ * searched item. On the other hand, if the node denotes a path that is not a
+ * prefix of the searched filename, the function would return -1 to indicate
+ * that * the search does not need to be continued in that branch.
  *
  * @param node the node that is currently investigated
  * @param data the data that is searched for
  *
  * @return 0 if the node contains the data,
- * 1 if one of the children might contain the data,
- * -1 if neither the node, nor the children contains the data
+ * positive if one of the children might contain the data,
+ * negative if neither the node, nor the children contains the data
  */
 typedef int (*cx_tree_search_func)(void const *node, void const* data);
 
+
+/**
+ * Searches for data in a tree.
+ *
+ * When the data cannot be found exactly, the search function might return a
+ * closest result which might be a good starting point for adding a new node
+ * to the tree.
+ *
+ * Depending on the tree structure it is not necessarily guaranteed that the
+ * "closest" match is uniquely defined. This function will search for a node
+ * with the best match according to the \p sfunc (meaning: the return value of
+ * \p sfunc which is closest to zero). If that is also ambiguous, an arbitrary
+ * node matching the criteria is returned.
+ *
+ * @param root the root node
+ * @param data the data to search for
+ * @param sfunc the search function
+ * @param result where the result shall be stored
+ * @param loc_children offset in the node struct for the children linked list
+ * @param loc_next offset in the node struct for the next pointer
+ * @return zero if the node was found exactly, positive if a node was found that
+ * could contain the node (but doesn't right now), negative if the tree does not
+ * contain any node that might be related to the searched data
+ */
+__attribute__((__nonnull__))
+int cx_tree_search(
+        void const *root,
+        void const *data,
+        cx_tree_search_func sfunc,
+        void **result,
+        ptrdiff_t loc_children,
+        ptrdiff_t loc_next
+);
+
 #ifdef __cplusplus
 } // extern "C"
 #endif
--- a/src/tree.c	Wed Feb 14 22:32:13 2024 +0100
+++ b/src/tree.c	Thu Feb 15 21:54:43 2024 +0100
@@ -28,6 +28,8 @@
 
 #include "cx/tree.h"
 
+#include "cx/array_list.h"
+
 #include <assert.h>
 
 #define CX_TREE_PTR(cur, off) (*(void**)(((char*)(cur))+(off)))
@@ -85,3 +87,85 @@
     tree_prev(node) = NULL;
     tree_next(node) = NULL;
 }
+
+int cx_tree_search(
+        void const *root,
+        void const *data,
+        cx_tree_search_func sfunc,
+        void **result,
+        ptrdiff_t loc_children,
+        ptrdiff_t loc_next
+) {
+    int ret;
+    *result = NULL;
+
+    // shortcut: compare root before doing anything else
+    ret = sfunc(root, data);
+    if (ret < 0) {
+        return ret;
+    } else if (ret == 0 || tree_children(root) == NULL) {
+        *result = (void*)root;
+        return ret;
+    }
+
+    // create a working stack
+    size_t work_cap = 32;
+    size_t work_size = 0;
+    void const **work = malloc(sizeof(void*) * work_cap);
+    #define work_add(node) cx_array_add(&work, &work_size, &work_cap, \
+        sizeof(void*), &(node), cx_array_default_reallocator)
+
+    // add the children of root to the working stack
+    {
+        void *c = tree_children(root);
+        while (c != NULL) {
+            work_add(c);
+            c = tree_next(c);
+        }
+    }
+
+    // remember a candidate for adding the data
+    // also remember the exact return code from sfunc
+    void *candidate = NULL;
+    int ret_candidate = -1;
+
+    // process the working stack
+    while (work_size > 0) {
+        // pop element
+        void const *node = work[--work_size];
+
+        // apply the search function
+        ret = sfunc(node, data);
+
+        if (ret == 0) {
+            // if found, exit the search
+            *result = (void*) node;
+            work_size = 0;
+            break;
+        } else if (ret > 0) {
+            // if children might contain the data, add them to the stack
+            void *c = tree_children(node);
+            while (c != NULL) {
+                work_add(c);
+                c = tree_next(c);
+            }
+
+            // remember this node in case no child is suitable
+            if (ret_candidate < 0 || ret < ret_candidate) {
+                candidate = (void *) node;
+                ret_candidate = ret;
+            }
+        }
+    }
+
+    // not found, but was there a candidate?
+    if (ret != 0 && candidate != NULL) {
+        ret = ret_candidate;
+        *result = candidate;
+    }
+
+    // free the working queue and return
+    #undef workq_add
+    free(work);
+    return ret;
+}
--- a/tests/test_tree.c	Wed Feb 14 22:32:13 2024 +0100
+++ b/tests/test_tree.c	Thu Feb 15 21:54:43 2024 +0100
@@ -42,6 +42,8 @@
     offsetof(tree_node, parent), offsetof(tree_node, children), \
     offsetof(tree_node, prev), offsetof(tree_node, next)
 
+#define tree_child_list offsetof(tree_node, children),offsetof(tree_node, next)
+
 CX_TEST(test_tree_link_new_child) {
     tree_node parent = {0};
     tree_node child = {0};
@@ -160,6 +162,94 @@
     }
 }
 
+static int test_tree_search_function(void const *n, void const *d) {
+    tree_node const *node = n;
+    int data = *((int const*)d);
+
+    if (data < node->data) return -1;
+    else if (data == node->data) return 0;
+    else return data - node->data;
+}
+
+CX_TEST(test_tree_search) {
+    tree_node root = {0};
+    tree_node a = {0};
+    tree_node b = {0};
+    tree_node c = {0};
+    tree_node aa = {0};
+    tree_node ab = {0};
+    tree_node ba = {0};
+    tree_node ca = {0};
+    tree_node cb = {0};
+    tree_node cc = {0};
+    tree_node cba = {0};
+
+    int testdata[] = {0, 10, 14, 18, 20, 25, 30, 32, 34, 36, 40};
+    tree_node* testnodes[] = {&root, &a, &aa, &ab, &b, &ba, &c, &ca, &cb, &cba, &cc};
+
+    for (unsigned i = 0 ; i <= 10 ; i++) {
+        testnodes[i]->data = testdata[i];
+    }
+
+    cx_tree_link(&root, &a, tree_node_layout);
+    cx_tree_link(&root, &b, tree_node_layout);
+    cx_tree_link(&root, &c, tree_node_layout);
+
+    cx_tree_link(&a, &aa, tree_node_layout);
+    cx_tree_link(&a, &ab, tree_node_layout);
+
+    cx_tree_link(&b, &ba, tree_node_layout);
+
+    cx_tree_link(&c, &ca, tree_node_layout);
+    cx_tree_link(&c, &cb, tree_node_layout);
+    cx_tree_link(&c, &cc, tree_node_layout);
+
+    cx_tree_link(&cb, &cba, tree_node_layout);
+
+    int s;
+    int r;
+    tree_node *n;
+    CX_TEST_DO {
+        for (unsigned i = 0 ; i <= 10 ; i++) {
+            s = testdata[i];
+            r = cx_tree_search(&root, &s, test_tree_search_function,
+                               (void **) &n, tree_child_list);
+            CX_TEST_ASSERT(r == 0);
+            CX_TEST_ASSERT(n == testnodes[i]);
+        }
+
+        s = -5;
+        r = cx_tree_search(&root, &s, test_tree_search_function,
+                           (void **) &n, tree_child_list);
+        CX_TEST_ASSERT(r < 0);
+        CX_TEST_ASSERT(n == NULL);
+
+        s = 26;
+        r = cx_tree_search(&root, &s, test_tree_search_function,
+                           (void **) &n, tree_child_list);
+        CX_TEST_ASSERT(r > 0);
+        CX_TEST_ASSERT(n == &ba);
+
+        s = 35;
+        r = cx_tree_search(&root, &s, test_tree_search_function,
+                           (void **) &n, tree_child_list);
+        CX_TEST_ASSERT(r > 0);
+        CX_TEST_ASSERT(n == &cb);
+
+        s = 38;
+        r = cx_tree_search(&root, &s, test_tree_search_function,
+                           (void **) &n, tree_child_list);
+        CX_TEST_ASSERT(r > 0);
+        CX_TEST_ASSERT(n == &cba);
+
+        s = 42;
+        r = cx_tree_search(&root, &s, test_tree_search_function,
+                           (void **) &n, tree_child_list);
+        CX_TEST_ASSERT(r > 0);
+        CX_TEST_ASSERT(n == &cc);
+    }
+}
+
 CxTestSuite *cx_test_suite_tree_low_level(void) {
     CxTestSuite *suite = cx_test_suite_new("tree (low level)");
 
@@ -167,6 +257,7 @@
     cx_test_register(suite, test_tree_link_add_child);
     cx_test_register(suite, test_tree_link_move_to_other_parent);
     cx_test_register(suite, test_tree_unlink);
+    cx_test_register(suite, test_tree_search);
 
     return suite;
 }

mercurial