# HG changeset patch # User Mike Becker # Date 1763896764 -3600 # Node ID f4010cda9a2a19f2947a198855967ef7d7594001 # Parent 2a4339475bcfc44af09d25f0c7505dd233200fe0 stable return value for binary search when there are duplicates in the array diff -r 2a4339475bcf -r f4010cda9a2a CHANGELOG --- a/CHANGELOG Sat Nov 22 19:16:27 2025 +0100 +++ b/CHANGELOG Sun Nov 23 12:19:24 2025 +0100 @@ -38,6 +38,8 @@ * changes struct cx_array_reallocator_s by replacing the four generic data members with two specifically named members * changes cx_array_reserve() so that it reserves exactly the requested capacity (i.e., without overallocation) * changes all other array functions to perform smart overallocation to avoid too many subsequent allocations + * changes that binary search and infimum always report the largest index, and supremum always reports the smallest index + when the found element appears more than once in the array * fixes critical memory overflow in the stack-based array reallocator (this unfortunately breaks the function signature) * fixes critical bug in cx_array_insert_sorted() that caused an infinite loop when inserting duplicates * fixes mempool implementation not supporting NULL as argument for realloc diff -r 2a4339475bcf -r f4010cda9a2a docs/Writerside/topics/about.md --- a/docs/Writerside/topics/about.md Sat Nov 22 19:16:27 2025 +0100 +++ b/docs/Writerside/topics/about.md Sun Nov 23 12:19:24 2025 +0100 @@ -65,6 +65,8 @@ * changes struct cx_array_reallocator_s by replacing the four generic data members with two specifically named members * changes cx_array_reserve() so that it reserves exactly the requested capacity (i.e., without overallocation) * changes all other array functions to perform smart overallocation to avoid too many subsequent allocations +* changes that binary search and infimum always report the largest index, and supremum always reports the smallest index + when the found element appears more than once in the array * fixes critical memory overflow in the stack-based array reallocator (this unfortunately breaks the function signature) * fixes critical bug in cx_array_insert_sorted() that caused an infinite loop when inserting duplicates * fixes mempool implementation not supporting NULL as argument for realloc diff -r 2a4339475bcf -r f4010cda9a2a docs/Writerside/topics/array_list.h.md --- a/docs/Writerside/topics/array_list.h.md Sat Nov 22 19:16:27 2025 +0100 +++ b/docs/Writerside/topics/array_list.h.md Sun Nov 23 12:19:24 2025 +0100 @@ -311,6 +311,10 @@ and the latter function returns the closest element that is larger or equal (the least upper bound / supremum) than the searched element. +When the found element appears more than once in the array, +the binary search and the infimum report the largest index +and the supremum reports the smallest index of the identical items, respectively. + > Note that all the above functions are only well-defined on arrays which are sorted with respect to the given compare function. > > This can, for example, easily be achieved by calling the standard library's `qsort()` function. diff -r 2a4339475bcf -r f4010cda9a2a src/array_list.c --- a/src/array_list.c Sat Nov 22 19:16:27 2025 +0100 +++ b/src/array_list.c Sun Nov 23 12:19:24 2025 +0100 @@ -595,7 +595,8 @@ cmp_func, sorted_data, elem_size, elem_count, reallocator, false); } -size_t cx_array_binary_search_inf( +// implementation that finds ANY index +static size_t cx_array_binary_search_inf_impl( const void *arr, size_t size, size_t elem_size, @@ -640,13 +641,6 @@ result = cmp_func(elem, arr_elem); if (result == 0) { // found it! - // check previous elements; - // when they are equal, report the smallest index - arr_elem -= elem_size; - while (pivot_index > 0 && cmp_func(elem, arr_elem) == 0) { - pivot_index--; - arr_elem -= elem_size; - } return pivot_index; } else if (result < 0) { // element is smaller than pivot, continue search left @@ -661,6 +655,24 @@ return result < 0 ? (pivot_index - 1) : pivot_index; } +size_t cx_array_binary_search_inf( + const void *arr, + size_t size, + size_t elem_size, + const void *elem, + cx_compare_func cmp_func +) { + size_t index = cx_array_binary_search_inf_impl( + arr, size, elem_size, elem, cmp_func); + // in case of equality, report the largest index + const char *e = ((const char *) arr) + (index + 1) * elem_size; + while (index + 1 < size && cmp_func(e, elem) == 0) { + e += elem_size; + index++; + } + return index; +} + size_t cx_array_binary_search( const void *arr, size_t size, @@ -686,16 +698,25 @@ const void *elem, cx_compare_func cmp_func ) { - size_t inf = cx_array_binary_search_inf( + size_t index = cx_array_binary_search_inf_impl( arr, size, elem_size, elem, cmp_func ); - if (inf == size) { - // no infimum means, first element is supremum + const char *e = ((const char *) arr) + index * elem_size; + if (index == size) { + // no infimum means the first element is supremum return 0; - } else if (cmp_func(((const char *) arr) + inf * elem_size, elem) == 0) { - return inf; + } else if (cmp_func(e, elem) == 0) { + // found an equal element, search the smallest index + e -= elem_size; // e now contains the element at index-1 + while (index > 0 && cmp_func(e, elem) == 0) { + e -= elem_size; + index--; + } + return index; } else { - return inf + 1; + // we already have the largest index of the infimum (by design) + // the next element is the supremum (or there is no supremum) + return index + 1; } } diff -r 2a4339475bcf -r f4010cda9a2a src/cx/array_list.h --- a/src/cx/array_list.h Sat Nov 22 19:16:27 2025 +0100 +++ b/src/cx/array_list.h Sun Nov 23 12:19:24 2025 +0100 @@ -676,6 +676,9 @@ * in @p arr that is less or equal to @p elem with respect to @p cmp_func. * When no such element exists, @p size is returned. * + * When such an element exists more than once, the largest index of all those + * elements is returned. + * * If @p elem is contained in the array, this is identical to * #cx_array_binary_search(). * @@ -698,6 +701,9 @@ /** * Searches an item in a sorted array. * + * When such an element exists more than once, the largest index of all those + * elements is returned. + * * If the array is not sorted with respect to the @p cmp_func, the behavior * is undefined. * @@ -722,6 +728,9 @@ * in @p arr that is greater or equal to @p elem with respect to @p cmp_func. * When no such element exists, @p size is returned. * + * When such an element exists more than once, the smallest index of all those + * elements is returned. + * * If @p elem is contained in the array, this is identical to * #cx_array_binary_search(). * diff -r 2a4339475bcf -r f4010cda9a2a tests/test_list.c --- a/tests/test_list.c Sat Nov 22 19:16:27 2025 +0100 +++ b/tests/test_list.c Sun Nov 23 12:19:24 2025 +0100 @@ -484,6 +484,61 @@ } } +CX_TEST(test_array_binary_search_with_duplicates) { + int array[18] = { + 40, 50, 55, 55, 55, 57, 57, 58, 60, + 62, 64, 65, 70, 70, 70, 78, 80, 90 + }; + int s; + CX_TEST_DO { + // exact matches (largest index on duplicate) + s = 40; + CX_TEST_ASSERT(0 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int)); + s = 50; + CX_TEST_ASSERT(1 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int)); + s = 55; + CX_TEST_ASSERT(4 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int)); + s = 57; + CX_TEST_ASSERT(6 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int)); + s = 58; + CX_TEST_ASSERT(7 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int)); + s = 65; + CX_TEST_ASSERT(11 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int)); + s = 70; + CX_TEST_ASSERT(14 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int)); + s = 78; + CX_TEST_ASSERT(15 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int)); + + // not found + s = 30; + CX_TEST_ASSERT(18 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int)); + s = 75; + CX_TEST_ASSERT(18 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int)); + s = 52; + CX_TEST_ASSERT(18 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int)); + s = 100; + CX_TEST_ASSERT(18 == cx_array_binary_search(array, 18, sizeof(int), &s, cx_cmp_int)); + + // infimum + s = 55; // also yields an exact match + CX_TEST_ASSERT(4 == cx_array_binary_search_inf(array, 18, sizeof(int), &s, cx_cmp_int)); + s = 56; + CX_TEST_ASSERT(4 == cx_array_binary_search_inf(array, 18, sizeof(int), &s, cx_cmp_int)); + s = 75; + CX_TEST_ASSERT(14 == cx_array_binary_search_inf(array, 18, sizeof(int), &s, cx_cmp_int)); + + // supremum (smallest index) + s = 52; + CX_TEST_ASSERT(2 == cx_array_binary_search_sup(array, 18, sizeof(int), &s, cx_cmp_int)); + s = 66; + CX_TEST_ASSERT(12 == cx_array_binary_search_sup(array, 18, sizeof(int), &s, cx_cmp_int)); + s = 75; + CX_TEST_ASSERT(15 == cx_array_binary_search_sup(array, 18, sizeof(int), &s, cx_cmp_int)); + s = 70; // exact match, we want the smallest index + CX_TEST_ASSERT(12 == cx_array_binary_search_sup(array, 18, sizeof(int), &s, cx_cmp_int)); + } +} + typedef struct node { struct node *next; struct node *prev; @@ -3344,6 +3399,7 @@ cx_test_register(suite, test_array_insert_sorted); cx_test_register(suite, test_array_insert_unique); cx_test_register(suite, test_array_binary_search); + cx_test_register(suite, test_array_binary_search_with_duplicates); cx_test_register(suite, test_list_arl_create); cx_test_register(suite, test_list_arl_create_simple);