optimize cx_arl_find_remove for sorted arrays - fixes #547 default

Tue, 28 Jan 2025 18:27:46 +0100

author
Mike Becker <universe@uap-core.de>
date
Tue, 28 Jan 2025 18:27:46 +0100
changeset 1163
68ff0839bc6a
parent 1162
e3bb67b72d33
child 1164
148b7c7ccaf9

optimize cx_arl_find_remove for sorted arrays - fixes #547

src/array_list.c file | annotate | diff | comparison | revisions
src/cx/collection.h file | annotate | diff | comparison | revisions
src/cx/list.h file | annotate | diff | comparison | revisions
src/list.c file | annotate | diff | comparison | revisions
src/map.c file | annotate | diff | comparison | revisions
tests/test_list.c file | annotate | diff | comparison | revisions
--- a/src/array_list.c	Mon Jan 27 20:27:39 2025 +0100
+++ b/src/array_list.c	Tue Jan 28 18:27:46 2025 +0100
@@ -861,26 +861,36 @@
         const void *elem,
         bool remove
 ) {
+    assert(list != NULL);
     assert(list->collection.cmpfunc != NULL);
-    assert(list->collection.size < SIZE_MAX / 2);
+    if (list->collection.size == 0) return 0;
     char *cur = ((const cx_array_list *) list)->data;
 
+    // optimize with binary search, when sorted
+    if (list->collection.sorted) {
+        size_t i = cx_array_binary_search(
+            cur,
+            list->collection.size,
+            list->collection.elem_size,
+            elem,
+            list->collection.cmpfunc
+        );
+        if (remove && i < list->collection.size) {
+            cx_arl_remove(list, i, 1, NULL);
+        }
+        return i;
+    }
+
+    // fallback: linear search
     for (size_t i = 0; i < list->collection.size; i++) {
         if (0 == list->collection.cmpfunc(elem, cur)) {
             if (remove) {
-                if (1 == cx_arl_remove(list, i, 1, NULL)) {
-                    return i;
-                } else {
-                    // should be unreachable
-                    return list->collection.size;  // LCOV_EXCL_LINE
-                }
-            } else {
-                return i;
+                cx_arl_remove(list, i, 1, NULL);
             }
+            return i;
         }
         cur += list->collection.elem_size;
     }
-
     return list->collection.size;
 }
 
--- a/src/cx/collection.h	Mon Jan 27 20:27:39 2025 +0100
+++ b/src/cx/collection.h	Tue Jan 28 18:27:46 2025 +0100
@@ -92,6 +92,11 @@
      * instead of copies of the actual objects.
      */
     bool store_pointer;
+    /**
+     * Indicates if this collection is guaranteed to be sorted.
+     * Note that the elements can still be sorted, even when the collection is not aware of that.
+     */
+    bool sorted;
 };
 
 /**
--- a/src/cx/list.h	Mon Jan 27 20:27:39 2025 +0100
+++ b/src/cx/list.h	Tue Jan 28 18:27:46 2025 +0100
@@ -362,6 +362,7 @@
         CxList *list,
         const void *elem
 ) {
+    list->collection.sorted = false;
     return list->cl->insert_element(list, list->collection.size, elem);
 }
 
@@ -387,6 +388,7 @@
         const void *array,
         size_t n
 ) {
+    list->collection.sorted = false;
     return list->cl->insert_array(list, list->collection.size, array, n);
 }
 
@@ -409,12 +411,15 @@
         size_t index,
         const void *elem
 ) {
+    list->collection.sorted = false;
     return list->cl->insert_element(list, index, elem);
 }
 
 /**
  * Inserts an item into a sorted list.
  *
+ * If the list is not sorted already, the behavior is undefined.
+ *
  * @param list the list
  * @param elem a pointer to the element to add
  * @retval zero success
@@ -425,6 +430,7 @@
         CxList *list,
         const void *elem
 ) {
+    list->collection.sorted = true; // guaranteed by definition
     const void *data = list->collection.store_pointer ? &elem : elem;
     return list->cl->insert_sorted(list, data, 1) == 0;
 }
@@ -455,6 +461,7 @@
         const void *array,
         size_t n
 ) {
+    list->collection.sorted = false;
     return list->cl->insert_array(list, index, array, n);
 }
 
@@ -470,6 +477,8 @@
  * If this list is storing pointers instead of objects @p array is expected to
  * be an array of pointers.
  *
+ * If the list is not sorted already, the behavior is undefined.
+ *
  * @param list the list
  * @param array a pointer to the elements to add
  * @param n the number of elements to add
@@ -481,6 +490,7 @@
         const void *array,
         size_t n
 ) {
+    list->collection.sorted = true; // guaranteed by definition
     return list->cl->insert_sorted(list, array, n);
 }
 
@@ -505,7 +515,9 @@
         CxIterator *iter,
         const void *elem
 ) {
-    return ((struct cx_list_s *) iter->src_handle.m)->cl->insert_iter(iter, elem, 0);
+    CxList* list = iter->src_handle.m;
+    list->collection.sorted = false;
+    return list->cl->insert_iter(iter, elem, 0);
 }
 
 /**
@@ -529,7 +541,9 @@
         CxIterator *iter,
         const void *elem
 ) {
-    return ((struct cx_list_s *) iter->src_handle.m)->cl->insert_iter(iter, elem, 1);
+    CxList* list = iter->src_handle.m;
+    list->collection.sorted = false;
+    return list->cl->insert_iter(iter, elem, 1);
 }
 
 /**
@@ -630,6 +644,7 @@
  */
 cx_attr_nonnull
 static inline void cxListClear(CxList *list) {
+    list->collection.sorted = true; // empty lists are always sorted
     list->cl->clear(list);
 }
 
@@ -652,6 +667,7 @@
         size_t i,
         size_t j
 ) {
+    list->collection.sorted = false;
     return list->cl->swap(list, i, j);
 }
 
@@ -874,6 +890,7 @@
 cx_attr_nonnull
 static inline void cxListSort(CxList *list) {
     list->cl->sort(list);
+    list->collection.sorted = true;
 }
 
 /**
@@ -883,6 +900,8 @@
  */
 cx_attr_nonnull
 static inline void cxListReverse(CxList *list) {
+    // still sorted, but not according to the cmp_func
+    list->collection.sorted = false;
     list->cl->reverse(list);
 }
 
--- a/src/list.c	Mon Jan 27 20:27:39 2025 +0100
+++ b/src/list.c	Tue Jan 28 18:27:46 2025 +0100
@@ -249,18 +249,19 @@
 };
 
 CxList cx_empty_list = {
-        {
-                NULL,
-                NULL,
-                0,
-                0,
-                NULL,
-                NULL,
-                NULL,
-                false
-        },
-        &cx_empty_list_class,
-        NULL
+    {
+        NULL,
+        NULL,
+        0,
+        0,
+        NULL,
+        NULL,
+        NULL,
+        false,
+        true,
+    },
+    &cx_empty_list_class,
+    NULL
 };
 
 CxList *const cxEmptyList = &cx_empty_list;
--- a/src/map.c	Mon Jan 27 20:27:39 2025 +0100
+++ b/src/map.c	Tue Jan 28 18:27:46 2025 +0100
@@ -66,17 +66,18 @@
 };
 
 CxMap cx_empty_map = {
-        {
-                NULL,
-                NULL,
-                0,
-                0,
-                NULL,
-                NULL,
-                NULL,
-                false
-        },
-        &cx_empty_map_class
+    {
+        NULL,
+        NULL,
+        0,
+        0,
+        NULL,
+        NULL,
+        NULL,
+        false,
+        true
+    },
+    &cx_empty_map_class
 };
 
 CxMap *const cxEmptyMap = &cx_empty_map;
--- a/tests/test_list.c	Mon Jan 27 20:27:39 2025 +0100
+++ b/tests/test_list.c	Tue Jan 28 18:27:46 2025 +0100
@@ -1567,6 +1567,34 @@
     free(testdata);
 })
 
+roll_out_test_combos(find_remove_sorted, {
+    const size_t testdata_len = 250;
+    int *testdata = int_test_data_added_to_list(list, isptrlist, testdata_len);
+    qsort(testdata, testdata_len, sizeof(int), cx_cmp_int);
+    cxListSort(list);
+
+    unsigned exp = rand() % testdata_len; // NOLINT(cert-msc50-cpp)
+    int val = testdata[exp];
+    // randomly picked number could occur earlier in list - find first position
+    for (unsigned i = 0 ; i < exp ; i++) {
+        if (testdata[i] == val) {
+            exp = i;
+            break;
+        }
+    }
+    CX_TEST_ASSERT(cxListSize(list) == testdata_len);
+    CX_TEST_ASSERT(cxListFind(list, &val) == exp);
+    CX_TEST_ASSERT(cxListFindRemove(list, &val) == exp);
+    CX_TEST_ASSERT(cxListSize(list) == testdata_len - 1);
+    CX_TEST_ASSERT(cxListFind(list, &val) != exp);
+
+    int notinlist = -1;
+    CX_TEST_ASSERT(cxListFindRemove(list, &notinlist) == cxListSize(list));
+    CX_TEST_ASSERT(cxListSize(list) == testdata_len - 1);
+
+    free(testdata);
+})
+
 roll_out_test_combos(clear, {
     int *testdata = int_test_data_added_to_list(list, isptrlist, 8);
     CX_TEST_ASSERT(cxListSize(list) > 0);
@@ -1935,6 +1963,8 @@
     cx_test_register(suite, test_list_parl_remove_array);
     cx_test_register(suite, test_list_arl_find_remove);
     cx_test_register(suite, test_list_parl_find_remove);
+    cx_test_register(suite, test_list_arl_find_remove_sorted);
+    cx_test_register(suite, test_list_parl_find_remove_sorted);
     cx_test_register(suite, test_list_arl_clear);
     cx_test_register(suite, test_list_parl_clear);
     cx_test_register(suite, test_list_arl_at);
@@ -2032,6 +2062,8 @@
     cx_test_register(suite, test_list_pll_remove_array);
     cx_test_register(suite, test_list_ll_find_remove);
     cx_test_register(suite, test_list_pll_find_remove);
+    cx_test_register(suite, test_list_ll_find_remove_sorted);
+    cx_test_register(suite, test_list_pll_find_remove_sorted);
     cx_test_register(suite, test_list_ll_clear);
     cx_test_register(suite, test_list_pll_clear);
     cx_test_register(suite, test_list_ll_at);

mercurial