stable return value for binary search when there are duplicates in the array

Sun, 23 Nov 2025 12:19:24 +0100

author
Mike Becker <universe@uap-core.de>
date
Sun, 23 Nov 2025 12:19:24 +0100
changeset 1507
f4010cda9a2a
parent 1506
2a4339475bcf
child 1508
dfc0ddd9571e

stable return value for binary search when there are duplicates in the array

CHANGELOG file | annotate | diff | comparison | revisions
docs/Writerside/topics/about.md file | annotate | diff | comparison | revisions
docs/Writerside/topics/array_list.h.md file | annotate | diff | comparison | revisions
src/array_list.c file | annotate | diff | comparison | revisions
src/cx/array_list.h file | annotate | diff | comparison | revisions
tests/test_list.c file | annotate | diff | comparison | revisions
--- a/CHANGELOG	Sat Nov 22 19:16:27 2025 +0100
+++ b/CHANGELOG	Sun Nov 23 12:19:24 2025 +0100
@@ -38,6 +38,8 @@
  * changes struct cx_array_reallocator_s by replacing the four generic data members with two specifically named members
  * changes cx_array_reserve() so that it reserves exactly the requested capacity (i.e., without overallocation)
  * changes all other array functions to perform smart overallocation to avoid too many subsequent allocations
+ * changes that binary search and infimum always report the largest index, and supremum always reports the smallest index
+   when the found element appears more than once in the array
  * fixes critical memory overflow in the stack-based array reallocator (this unfortunately breaks the function signature)
  * fixes critical bug in cx_array_insert_sorted() that caused an infinite loop when inserting duplicates
  * fixes mempool implementation not supporting NULL as argument for realloc
--- a/docs/Writerside/topics/about.md	Sat Nov 22 19:16:27 2025 +0100
+++ b/docs/Writerside/topics/about.md	Sun Nov 23 12:19:24 2025 +0100
@@ -65,6 +65,8 @@
 * changes struct cx_array_reallocator_s by replacing the four generic data members with two specifically named members
 * changes cx_array_reserve() so that it reserves exactly the requested capacity (i.e., without overallocation)
 * changes all other array functions to perform smart overallocation to avoid too many subsequent allocations
+* changes that binary search and infimum always report the largest index, and supremum always reports the smallest index
+  when the found element appears more than once in the array
 * fixes critical memory overflow in the stack-based array reallocator (this unfortunately breaks the function signature)
 * fixes critical bug in cx_array_insert_sorted() that caused an infinite loop when inserting duplicates
 * fixes mempool implementation not supporting NULL as argument for realloc
--- a/docs/Writerside/topics/array_list.h.md	Sat Nov 22 19:16:27 2025 +0100
+++ b/docs/Writerside/topics/array_list.h.md	Sun Nov 23 12:19:24 2025 +0100
@@ -311,6 +311,10 @@
 and the latter function returns the closest element that is larger or equal (the least upper bound / supremum)
 than the searched element.
 
+When the found element appears more than once in the array,
+the binary search and the infimum report the largest index
+and the supremum reports the smallest index of the identical items, respectively.
+
 > Note that all the above functions are only well-defined on arrays which are sorted with respect to the given compare function.
 > 
 > This can, for example, easily be achieved by calling the standard library's `qsort()` function.
--- a/src/array_list.c	Sat Nov 22 19:16:27 2025 +0100
+++ b/src/array_list.c	Sun Nov 23 12:19:24 2025 +0100
@@ -595,7 +595,8 @@
         cmp_func, sorted_data, elem_size, elem_count, reallocator, false);
 }
 
-size_t cx_array_binary_search_inf(
+// implementation that finds ANY index
+static size_t cx_array_binary_search_inf_impl(
         const void *arr,
         size_t size,
         size_t elem_size,
@@ -640,13 +641,6 @@
         result = cmp_func(elem, arr_elem);
         if (result == 0) {
             // found it!
-            // check previous elements;
-            // when they are equal, report the smallest index
-            arr_elem -= elem_size;
-            while (pivot_index > 0 && cmp_func(elem, arr_elem) == 0) {
-                pivot_index--;
-                arr_elem -= elem_size;
-            }
             return pivot_index;
         } else if (result < 0) {
             // element is smaller than pivot, continue search left
@@ -661,6 +655,24 @@
     return result < 0 ? (pivot_index - 1) : pivot_index;
 }
 
+size_t cx_array_binary_search_inf(
+        const void *arr,
+        size_t size,
+        size_t elem_size,
+        const void *elem,
+        cx_compare_func cmp_func
+) {
+    size_t index = cx_array_binary_search_inf_impl(
+        arr, size, elem_size, elem, cmp_func);
+    // in case of equality, report the largest index
+    const char *e = ((const char *) arr) + (index + 1) * elem_size;
+    while (index + 1 < size && cmp_func(e, elem) == 0) {
+        e += elem_size;
+        index++;
+    }
+    return index;
+}
+
 size_t cx_array_binary_search(
         const void *arr,
         size_t size,
@@ -686,16 +698,25 @@
         const void *elem,
         cx_compare_func cmp_func
 ) {
-    size_t inf = cx_array_binary_search_inf(
+    size_t index = cx_array_binary_search_inf_impl(
             arr, size, elem_size, elem, cmp_func
     );
-    if (inf == size) {
-        // no infimum means, first element is supremum
+    const char *e = ((const char *) arr) + index * elem_size;
+    if (index == size) {
+        // no infimum means the first element is supremum
         return 0;
-    } else if (cmp_func(((const char *) arr) + inf * elem_size, elem) == 0) {
-        return inf;
+    } else if (cmp_func(e, elem) == 0) {
+        // found an equal element, search the smallest index
+        e -= elem_size; // e now contains the element at index-1
+        while (index > 0 && cmp_func(e, elem) == 0) {
+            e -= elem_size;
+            index--;
+        }
+        return index;
     } else {
-        return inf + 1;
+        // we already have the largest index of the infimum (by design)
+        // the next element is the supremum (or there is no supremum)
+        return index + 1;
     }
 }
 
--- a/src/cx/array_list.h	Sat Nov 22 19:16:27 2025 +0100
+++ b/src/cx/array_list.h	Sun Nov 23 12:19:24 2025 +0100
@@ -676,6 +676,9 @@
  * in @p arr that is less or equal to @p elem with respect to @p cmp_func.
  * When no such element exists, @p size is returned.
  *
+ * When such an element exists more than once, the largest index of all those
+ * elements is returned.
+ *
  * If @p elem is contained in the array, this is identical to
  * #cx_array_binary_search().
  *
@@ -698,6 +701,9 @@
 /**
  * Searches an item in a sorted array.
  *
+ * When such an element exists more than once, the largest index of all those
+ * elements is returned.
+ *
  * If the array is not sorted with respect to the @p cmp_func, the behavior
  * is undefined.
  *
@@ -722,6 +728,9 @@
  * in @p arr that is greater or equal to @p elem with respect to @p cmp_func.
  * When no such element exists, @p size is returned.
  *
+ * When such an element exists more than once, the smallest index of all those
+ * elements is returned.
+ *
  * If @p elem is contained in the array, this is identical to
  * #cx_array_binary_search().
  *
--- a/tests/test_list.c	Sat Nov 22 19:16:27 2025 +0100
+++ b/tests/test_list.c	Sun Nov 23 12:19:24 2025 +0100
@@ -484,6 +484,61 @@
     }
 }
 
+CX_TEST(test_array_binary_search_with_duplicates) {
+    int array[18] = {
+        40, 50, 55, 55, 55, 57, 57, 58, 60,
+        62, 64, 65, 70, 70, 70, 78, 80, 90
+    };
+    int s;
+    CX_TEST_DO {
+        // exact matches (largest index on duplicate)
+        s = 40;
+        CX_TEST_ASSERT(0 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int));
+        s = 50;
+        CX_TEST_ASSERT(1 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int));
+        s = 55;
+        CX_TEST_ASSERT(4 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int));
+        s = 57;
+        CX_TEST_ASSERT(6 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int));
+        s = 58;
+        CX_TEST_ASSERT(7 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int));
+        s = 65;
+        CX_TEST_ASSERT(11 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int));
+        s = 70;
+        CX_TEST_ASSERT(14 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int));
+        s = 78;
+        CX_TEST_ASSERT(15 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int));
+
+        // not found
+        s = 30;
+        CX_TEST_ASSERT(18 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int));
+        s = 75;
+        CX_TEST_ASSERT(18 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int));
+        s = 52;
+        CX_TEST_ASSERT(18 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int));
+        s = 100;
+        CX_TEST_ASSERT(18 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int));
+
+        // infimum
+        s = 55; // also yields an exact match
+        CX_TEST_ASSERT(4 == cx_array_binary_search_inf(array, 18, sizeof(int), &s, cx_cmp_int));
+        s = 56;
+        CX_TEST_ASSERT(4 == cx_array_binary_search_inf(array, 18, sizeof(int), &s, cx_cmp_int));
+        s = 75;
+        CX_TEST_ASSERT(14 == cx_array_binary_search_inf(array, 18, sizeof(int), &s, cx_cmp_int));
+
+        // supremum (smallest index)
+        s = 52;
+        CX_TEST_ASSERT(2 == cx_array_binary_search_sup(array, 18, sizeof(int), &s, cx_cmp_int));
+        s = 66;
+        CX_TEST_ASSERT(12 == cx_array_binary_search_sup(array, 18, sizeof(int), &s, cx_cmp_int));
+        s = 75;
+        CX_TEST_ASSERT(15 == cx_array_binary_search_sup(array, 18, sizeof(int), &s, cx_cmp_int));
+        s = 70; // exact match, we want the smallest index
+        CX_TEST_ASSERT(12 == cx_array_binary_search_sup(array, 18, sizeof(int), &s, cx_cmp_int));
+    }
+}
+
 typedef struct node {
     struct node *next;
     struct node *prev;
@@ -3344,6 +3399,7 @@
     cx_test_register(suite, test_array_insert_sorted);
     cx_test_register(suite, test_array_insert_unique);
     cx_test_register(suite, test_array_binary_search);
+    cx_test_register(suite, test_array_binary_search_with_duplicates);
 
     cx_test_register(suite, test_list_arl_create);
     cx_test_register(suite, test_list_arl_create_simple);

mercurial