add max depth for tree search - closes #459

Sat, 19 Oct 2024 13:08:06 +0200

author
Mike Becker <universe@uap-core.de>
date
Sat, 19 Oct 2024 13:08:06 +0200
changeset 930
6540096c17b7
parent 929
192a440b99df
child 931
be71809e69d1

add max depth for tree search - closes #459

src/cx/tree.h file | annotate | diff | comparison | revisions
src/tree.c file | annotate | diff | comparison | revisions
tests/test_tree.c file | annotate | diff | comparison | revisions
--- a/src/cx/tree.h	Sun Oct 13 16:47:14 2024 +0200
+++ b/src/cx/tree.h	Sat Oct 19 13:08:06 2024 +0200
@@ -298,6 +298,11 @@
 );
 
 /**
+ * Macro that can be used instead of the magic value for infinite search depth.
+ */
+#define CX_TREE_SEARCH_INFINITE_DEPTH 0
+
+/**
  * Function pointer for a search function.
  *
  * A function of this kind shall check if the specified \p node
@@ -306,6 +311,8 @@
  *
  * The function should use the returned integer to indicate how close the
  * match is, where a negative number means that it does not match at all.
+ * Zero means exact match and a positive number is an implementation defined
+ * measure for the distance to an exact match.
  *
  * For example if a tree stores file path information, a node that is
  * describing a parent directory of a filename that is searched, shall
@@ -333,6 +340,8 @@
  *
  * The function should use the returned integer to indicate how close the
  * match is, where a negative number means that it does not match at all.
+ * Zero means exact match and a positive number is an implementation defined
+ * measure for the distance to an exact match.
  *
  * For example if a tree stores file path information, a node that is
  * describing a parent directory of a filename that is searched, shall
@@ -364,6 +373,7 @@
  * node matching the criteria is returned.
  *
  * @param root the root node
+ * @param depth the maximum depth (zero=indefinite, one=just root)
  * @param data the data to search for
  * @param sfunc the search function
  * @param result where the result shall be stored
@@ -376,6 +386,7 @@
 __attribute__((__nonnull__))
 int cx_tree_search_data(
         const void *root,
+        size_t depth,
         const void *data,
         cx_tree_search_data_func sfunc,
         void **result,
@@ -397,6 +408,7 @@
  * node matching the criteria is returned.
  *
  * @param root the root node
+* @param depth the maximum depth (zero=indefinite, one=just root)
  * @param node the node to search for
  * @param sfunc the search function
  * @param result where the result shall be stored
@@ -409,6 +421,7 @@
 __attribute__((__nonnull__))
 int cx_tree_search(
         const void *root,
+        size_t depth,
         const void *node,
         cx_tree_search_func sfunc,
         void **result,
@@ -839,7 +852,8 @@
     void *(*find)(
             struct cx_tree_s *tree,
             const void *subtree,
-            const void *data
+            const void *data,
+            size_t depth
     );
 };
 
@@ -1031,12 +1045,15 @@
         CxTree *tree,
         const void *data
 ) {
-    return tree->cl->find(tree, tree->root, data);
+    return tree->cl->find(tree, tree->root, data, 0);
 }
 
 /**
  * Searches the data in the specified subtree.
  *
+ * When \p max_depth is zero, the depth is not limited.
+ * The \p subtree_root itself is on depth 1 and its children have depth 2.
+ *
  * \note When \p subtree_root is not part of the \p tree, the behavior is
  * undefined.
  *
@@ -1047,15 +1064,17 @@
  * @param tree the tree
  * @param data the data to search for
  * @param subtree_root the node where to start
+ * @param max_depth the maximum search depth
  * @return the first matching node, or \c NULL when the data cannot be found
  */
 __attribute__((__nonnull__))
 static inline void *cxTreeFindInSubtree(
         CxTree *tree,
         const void *data,
-        void *subtree_root
+        void *subtree_root,
+        size_t max_depth
 ) {
-    return tree->cl->find(tree, subtree_root, data);
+    return tree->cl->find(tree, subtree_root, data, max_depth);
 }
 
 /**
@@ -1095,7 +1114,7 @@
  * @param tree the tree to iterate
  * @param node the node where to start
  * @param visit_on_exit true, if the iterator shall visit a node again when
- * leaving the sub-tree
+ * leaving the subtree
  * @return a tree iterator (depth-first)
  * @see cxTreeVisit()
  */
@@ -1133,7 +1152,7 @@
  *
  * @param tree the tree to iterate
  * @param visit_on_exit true, if the iterator shall visit a node again when
- * leaving the sub-tree
+ * leaving the subtree
  * @return a tree iterator (depth-first)
  * @see cxTreeVisit()
  */
--- a/src/tree.c	Sun Oct 13 16:47:14 2024 +0200
+++ b/src/tree.c	Sat Oct 19 13:08:06 2024 +0200
@@ -172,84 +172,81 @@
 
 int cx_tree_search(
         const void *root,
+        size_t depth,
         const void *node,
         cx_tree_search_func sfunc,
         void **result,
         ptrdiff_t loc_children,
         ptrdiff_t loc_next
 ) {
-    int ret;
+    // help avoiding bugs due to uninitialized memory
+    assert(result != NULL);
     *result = NULL;
 
-    // shortcut: compare root before doing anything else
-    ret = sfunc(root, node);
+    // remember return value for best match
+    int ret = sfunc(root, node);
     if (ret < 0) {
-        return ret;
-    } else if (ret == 0 || tree_children(root) == NULL) {
-        *result = (void*)root;
+        // not contained, exit
+        return -1;
+    }
+    *result = (void*) root;
+    // if root is already exact match, exit
+    if (ret == 0) {
+        return 0;
+    }
+
+    // when depth is one, we are already done
+    if (depth == 1) {
         return ret;
     }
 
-    // create a working stack
-    CX_ARRAY_DECLARE(const void *, work);
-    cx_array_initialize(work, 32);
+    // special case: indefinite depth
+    if (depth == 0) {
+        depth = SIZE_MAX;
+    }
+
+    // create an iterator
+    CxTreeIterator iter = cx_tree_iterator(
+            (void*) root, false, loc_children, loc_next
+    );
+
+    // skip root, we already handled it
+    cxIteratorNext(iter);
 
-    // add the children of root to the working stack
-    {
-        void *c = tree_children(root);
-        while (c != NULL) {
-            cx_array_simple_add(work, c);
-            c = tree_next(c);
+    // loop through the remaining tree
+    cx_foreach(void *, elem, iter) {
+        // investigate the current node
+        int ret_elem = sfunc(elem, node);
+        if (ret_elem == 0) {
+            // if found, exit the search
+            *result = (void *) elem;
+            ret = 0;
+            break;
+        } else if (ret_elem > 0 && ret_elem < ret) {
+            // new distance is better
+            *result = elem;
+            ret = ret_elem;
+        } else {
+            // not contained or distance is worse, skip entire subtree
+            cxTreeIteratorContinue(iter);
+        }
+
+        // when we reached the max depth, skip the subtree
+        if (iter.depth == depth) {
+            cxTreeIteratorContinue(iter);
         }
     }
 
-    // remember a candidate for adding the data
-    // also remember the exact return code from sfunc
-    void *candidate = (void *) root;
-    int ret_candidate = ret;
-
-    // process the working stack
-    while (work_size > 0) {
-        // pop element
-        const void *elem = work[--work_size];
-
-        // apply the search function
-        ret = sfunc(elem, node);
+    // dispose the iterator as we might have exited the loop early
+    cxTreeIteratorDispose(&iter);
 
-        if (ret == 0) {
-            // if found, exit the search
-            *result = (void *) elem;
-            work_size = 0;
-            break;
-        } else if (ret > 0) {
-            // if children might contain the data, add them to the stack
-            void *c = tree_children(elem);
-            while (c != NULL) {
-                cx_array_simple_add(work, c);
-                c = tree_next(c);
-            }
-
-            // remember this node in case no child is suitable
-            if (ret < ret_candidate) {
-                candidate = (void *) elem;
-                ret_candidate = ret;
-            }
-        }
-    }
-
-    // not found, but was there a candidate?
-    if (ret != 0 && candidate != NULL) {
-        ret = ret_candidate;
-        *result = candidate;
-    }
-
-    // free the working queue and return
-    free(work);
+    assert(ret < 0 || *result != NULL);
     return ret;
 }
 
 int cx_tree_search_data(
         const void *root,
+        size_t depth,
         const void *data,
         cx_tree_search_data_func sfunc,
         void **result,
@@ -258,7 +255,7 @@
 ) {
     // it is basically the same implementation
     return cx_tree_search(
-            root, data,
+            root, depth, data,
             (cx_tree_search_func) sfunc,
             result,
             loc_children, loc_next);
@@ -568,6 +565,7 @@
     void *match = NULL;
     int result = cx_tree_search(
             root,
+            0,
             *cnode,
             sfunc,
             &match,
@@ -632,6 +630,7 @@
         cx_tree_add_look_around_retry:
         result = cx_tree_search(
                 current_node,
+                0,
                 new_node,
                 sfunc,
                 &match,
@@ -771,13 +770,15 @@
 static void *cx_tree_default_find(
         CxTree *tree,
         const void *subtree,
-        const void *data
+        const void *data,
+        size_t depth
 ) {
     if (tree->root == NULL) return NULL;
 
     void *found;
     if (0 == cx_tree_search_data(
             subtree,
+            depth,
             data,
             tree->search_data,
             &found,
--- a/tests/test_tree.c	Sun Oct 13 16:47:14 2024 +0200
+++ b/tests/test_tree.c	Sat Oct 19 13:08:06 2024 +0200
@@ -429,44 +429,165 @@
     CX_TEST_DO {
         for (unsigned i = 0 ; i <= 10 ; i++) {
             s = testdata[i];
-            r = cx_tree_search_data(&root, &s, test_tree_search_function,
+            r = cx_tree_search_data(&root, 0, &s, test_tree_search_function,
                                (void **) &n, tree_children(tree_node));
             CX_TEST_ASSERT(r == 0);
             CX_TEST_ASSERT(n == testnodes[i]);
         }
 
         s = -5;
-        r = cx_tree_search_data(&root, &s, test_tree_search_function,
+        r = cx_tree_search_data(&root, 0, &s, test_tree_search_function,
                            (void **) &n, tree_children(tree_node));
         CX_TEST_ASSERT(r < 0);
         CX_TEST_ASSERT(n == NULL);
 
         s = 26;
-        r = cx_tree_search_data(&root, &s, test_tree_search_function,
+        r = cx_tree_search_data(&root, 0, &s, test_tree_search_function,
                            (void **) &n, tree_children(tree_node));
         CX_TEST_ASSERT(r > 0);
         CX_TEST_ASSERT(n == &ba);
 
         s = 35;
-        r = cx_tree_search_data(&root, &s, test_tree_search_function,
+        r = cx_tree_search_data(&root, 0, &s, test_tree_search_function,
                            (void **) &n, tree_children(tree_node));
         CX_TEST_ASSERT(r > 0);
         CX_TEST_ASSERT(n == &cb);
 
         s = 38;
-        r = cx_tree_search_data(&root, &s, test_tree_search_function,
+        r = cx_tree_search_data(&root, 0, &s, test_tree_search_function,
                            (void **) &n, tree_children(tree_node));
         CX_TEST_ASSERT(r > 0);
         CX_TEST_ASSERT(n == &cba);
 
         s = 42;
-        r = cx_tree_search_data(&root, &s, test_tree_search_function,
+        r = cx_tree_search_data(&root, 0, &s, test_tree_search_function,
                            (void **) &n, tree_children(tree_node));
         CX_TEST_ASSERT(r > 0);
         CX_TEST_ASSERT(n == &cc);
     }
 }
 
+CX_TEST(test_tree_search_with_max_depth) {
+    tree_node_file root = {0};
+    root.path = "/";
+    tree_node_file usr = {0};
+    usr.path = "/usr/";
+    cx_tree_link(&root, &usr, tree_node_file_layout);
+    tree_node_file home = {0};
+    home.path = "/home/";
+    cx_tree_link(&root, &home, tree_node_file_layout);
+    tree_node_file doe = {0};
+    doe.path = "/home/doe/";
+    cx_tree_link(&home, &doe, tree_node_file_layout);
+    tree_node_file lib = {0};
+    lib.path = "/usr/lib/";
+    cx_tree_link(&usr, &lib, tree_node_file_layout);
+    tree_node_file modules = {0};
+    modules.path = "/usr/lib/modules/";
+    cx_tree_link(&lib, &modules, tree_node_file_layout);
+
+    CX_TEST_DO {
+        int result;
+        void *found;
+
+        result = cx_tree_search_data(
+                &root, 1, "/",
+                tree_node_file_search_data, &found,
+                tree_children(tree_node_file)
+        );
+        CX_TEST_ASSERTM(result == 0, "root not found");
+        CX_TEST_ASSERT(found == &root);
+
+        result = cx_tree_search_data(
+                &root, 1, "/usr/",
+                tree_node_file_search_data, &found,
+                tree_children(tree_node_file)
+        );
+        CX_TEST_ASSERT(result > 0);
+        CX_TEST_ASSERT(found == &root);
+
+        result = cx_tree_search_data(
+                &root, 2, "/usr/",
+                tree_node_file_search_data, &found,
+                tree_children(tree_node_file)
+        );
+        CX_TEST_ASSERT(result == 0);
+        CX_TEST_ASSERT(found == &usr);
+
+        result = cx_tree_search_data(
+                &root, 2, "/usr/lib/",
+                tree_node_file_search_data, &found,
+                tree_children(tree_node_file)
+        );
+        CX_TEST_ASSERT(result > 0);
+        CX_TEST_ASSERT(found == &usr);
+
+        result = cx_tree_search_data(
+                &root, 3, "/usr/lib/",
+                tree_node_file_search_data, &found,
+                tree_children(tree_node_file)
+        );
+        CX_TEST_ASSERTM(result == 0, "lib not found");
+        CX_TEST_ASSERT(found == &lib);
+
+        result = cx_tree_search_data(
+                &root, 1, "/home/",
+                tree_node_file_search_data, &found,
+                tree_children(tree_node_file)
+        );
+        CX_TEST_ASSERT(result > 0);
+        CX_TEST_ASSERT(found == &root);
+
+        result = cx_tree_search_data(
+                &root, 2, "/home/",
+                tree_node_file_search_data, &found,
+                tree_children(tree_node_file)
+        );
+        CX_TEST_ASSERTM(result == 0, "home not found");
+        CX_TEST_ASSERT(found == &home);
+
+        result = cx_tree_search_data(
+                &root, 2, "/home/doe/",
+                tree_node_file_search_data, &found,
+                tree_children(tree_node_file)
+        );
+        CX_TEST_ASSERT(result > 0);
+        CX_TEST_ASSERT(found == &home);
+
+        result = cx_tree_search_data(
+                &root, 3, "/home/doe/",
+                tree_node_file_search_data, &found,
+                tree_children(tree_node_file)
+        );
+        CX_TEST_ASSERTM(result == 0, "doe not found");
+        CX_TEST_ASSERT(found == &doe);
+
+        result = cx_tree_search_data(
+                &root, 3, "/usr/lib/modules/",
+                tree_node_file_search_data, &found,
+                tree_children(tree_node_file)
+        );
+        CX_TEST_ASSERT(result > 0);
+        CX_TEST_ASSERT(found == &lib);
+
+        result = cx_tree_search_data(
+                &root, 4, "/usr/lib/modules/",
+                tree_node_file_search_data, &found,
+                tree_children(tree_node_file)
+        );
+        CX_TEST_ASSERTM(result == 0, "modules not found (start=root)");
+        CX_TEST_ASSERT(found == &modules);
+
+        result = cx_tree_search_data(
+                &usr, 3, "/usr/lib/modules/",
+                tree_node_file_search_data, &found,
+                tree_children(tree_node_file)
+        );
+        CX_TEST_ASSERTM(result == 0, "modules not found (start=usr)");
+        CX_TEST_ASSERT(found == &modules);
+    }
+}
+
 CX_TEST(test_tree_iterator_create_and_dispose) {
     tree_node root;
     CX_TEST_DO {
@@ -1825,8 +1946,8 @@
         CX_TEST_ASSERT(tree->size == 7);
 
         tree_node_file *home = cxTreeFind(tree, "/home/");
-        CX_TEST_ASSERT(NULL == cxTreeFindInSubtree(tree, "/usr/", foo));
-        tree_node_file *bar = cxTreeFindInSubtree(tree, "/home/foo/bar/", home);
+        CX_TEST_ASSERT(NULL == cxTreeFindInSubtree(tree, "/usr/", foo, 0));
+        tree_node_file *bar = cxTreeFindInSubtree(tree, "/home/foo/bar/", home, 0);
         CX_TEST_ASSERT(NULL != bar);
         CX_TEST_ASSERT(0 == strcmp("/home/foo/bar/", bar->path));
         CX_TEST_ASSERT(bar->parent == foo);
@@ -1916,8 +2037,8 @@
         CX_TEST_ASSERT(tree->size == 7);
 
         tree_node_file *home = cxTreeFind(tree, "/home/");
-        CX_TEST_ASSERT(NULL == cxTreeFindInSubtree(tree, "/usr/", foo));
-        tree_node_file *bar = cxTreeFindInSubtree(tree, "/home/foo/bar/", home);
+        CX_TEST_ASSERT(NULL == cxTreeFindInSubtree(tree, "/usr/", foo, 0));
+        tree_node_file *bar = cxTreeFindInSubtree(tree, "/home/foo/bar/", home, 0);
         CX_TEST_ASSERT(NULL != bar);
         CX_TEST_ASSERT(0 == strcmp("/home/foo/bar/", bar->path));
         CX_TEST_ASSERT(bar->parent == foo);
@@ -2032,6 +2153,7 @@
     cx_test_register(suite, test_tree2_link_move_to_other_parent);
     cx_test_register(suite, test_tree2_unlink);
     cx_test_register(suite, test_tree_search);
+    cx_test_register(suite, test_tree_search_with_max_depth);
     cx_test_register(suite, test_tree_iterator_create_and_dispose);
     cx_test_register(suite, test_tree_iterator_create_for_empty_tree);
     cx_test_register(suite, test_tree_iterator_basic_only_enter);

mercurial