Fri, 07 Nov 2025 19:13:28 +0100
implement cxListUnion() - resolves #755
| CHANGELOG | file | annotate | diff | comparison | revisions | |
| docs/Writerside/topics/about.md | file | annotate | diff | comparison | revisions | |
| docs/Writerside/topics/list.h.md | file | annotate | diff | comparison | revisions | |
| src/cx/list.h | file | annotate | diff | comparison | revisions | |
| src/list.c | file | annotate | diff | comparison | revisions | |
| tests/test_list.c | file | annotate | diff | comparison | revisions |
--- a/CHANGELOG Fri Nov 07 18:42:06 2025 +0100 +++ b/CHANGELOG Fri Nov 07 19:13:28 2025 +0100 @@ -7,6 +7,7 @@ + adds support for integer keys to CxHashKey * adds support for comparing arbitrary strings without explicit call to cx_strcast() * adds cxListClone() and cxMapClone() + * adds cxListUnion(), and cxMapUnion() * adds cxListDifference(), cxMapDifference(), and cxMapListDifference() * adds cxListIntersection(), cxMapIntersection(), and cxMapListIntersection() * adds cxListContains() and cxMapContains()
--- a/docs/Writerside/topics/about.md Fri Nov 07 18:42:06 2025 +0100 +++ b/docs/Writerside/topics/about.md Fri Nov 07 19:13:28 2025 +0100 @@ -34,6 +34,7 @@ * adds support for integer keys to CxHashKey * adds support for comparing arbitrary strings without explicit call to cx_strcast() * adds cxListClone() and cxMapClone() +* adds cxListUnion(), and cxMapUnion() * adds cxListDifference(), cxMapDifference(), and cxMapListDifference() * adds cxListIntersection(), cxMapIntersection(), and cxMapListIntersection() * adds cxListContains() and cxMapContains()
--- a/docs/Writerside/topics/list.h.md Fri Nov 07 18:42:06 2025 +0100 +++ b/docs/Writerside/topics/list.h.md Fri Nov 07 19:13:28 2025 +0100 @@ -380,6 +380,12 @@ cx_clone_func clone_func, const CxAllocator *clone_allocator, void *data); + +int cxListUnion(CxList *dst, + const CxList *src, const CxList *other, + cx_clone_func clone_func, + const CxAllocator *clone_allocator, + void *data); ``` With `cxListClone()` you can create deep copies of the elements in a list and insert them into another list. @@ -387,10 +393,17 @@ Depending on the concrete list implementation, `cxListClone()` tries to allocate enough memory up-front, before trying to insert anything. -The function `cxListDifference()` is similar to `cxListClone()`, -except that it only clones elements from the minuend that are _not_ contained in the subtrahend, -while `cxListIntersection()` only clones elements that _are_ contained in both lists. -Both functions are optimized for sorted lists, in which case they will take linear time instead of quadratic time for the operation. +The function `cxListDifference()` clones elements from the `minuend` that are _not_ contained in the `subtrahend`, +while `cxListIntersection()` only clones elements from the `src` that are _also_ contained in the `other` list. +And `cxListUnion()` clones elements from `src` first, and from `other` only when they are _not_ contained in `src`. + +All three functions, for union, difference, and intersection, are optimized for sorted lists. +In that case they will take linear time instead of quadratic time for the operation. +Also, `cxListUnion()` makes sure that the elements from `src` and `other` are cloned interleaving, +so that the result of the union is still sorted. + +However, when the `dst` list already contains elements, all functions will only append the result of the operation to that list. +That means, the `dst` list is only guaranteed to be sorted, when it was empty and the input lists are all sorted. Refer to the documentation of the [clone-function callback](allocator.h.md#clone-function) to learn how to implement it.
--- a/src/cx/list.h Fri Nov 07 18:42:06 2025 +0100 +++ b/src/cx/list.h Fri Nov 07 19:13:28 2025 +0100 @@ -978,6 +978,7 @@ * @param data optional additional data that is passed to the clone function * @retval zero when all elements were successfully cloned * @retval non-zero when an allocation error occurred + * @see cxListUnion() */ cx_attr_nonnull_arg(1, 2, 3) CX_EXPORT int cxListClone(CxList *dst, const CxList *src, @@ -1009,8 +1010,8 @@ /** * Clones elements from a list only if they are also present in another list. * - * This function is optimized for the case when both the @p minuend and the - * @p subtrahend are sorted. + * This function is optimized for the case when both the @p src and the + * @p other list are sorted. * * If the destination list already contains elements, the intersection is appended * to that list. @@ -1028,6 +1029,32 @@ CX_EXPORT int cxListIntersection(CxList *dst, const CxList *src, const CxList *other, cx_clone_func clone_func, const CxAllocator *clone_allocator, void *data); +/** + * Performs a deep clone of one list into another, skipping duplicates. + * + * This function is optimized for the case when both the @p src and the + * @p other list are sorted. + * In that case, the union will also be sorted. + * + * If the destination list already contains elements, the union is appended + * to that list. In that case the destination is not necessarily sorted. + * + * @param dst the destination list + * @param src the primary source list + * @param other the other list, where elements are only cloned from + * when they are not in @p src + * @param clone_func the clone function for the elements + * @param clone_allocator the allocator that is passed to the clone function + * @param data optional additional data that is passed to the clone function + * @retval zero when the elements were successfully cloned + * @retval non-zero when an allocation error occurred + * @see cxListClone() + */ +cx_attr_nonnull_arg(1, 2, 3, 4) +CX_EXPORT int cxListUnion(CxList *dst, const CxList *src, const CxList *other, + cx_clone_func clone_func, const CxAllocator *clone_allocator, void *data); + + #ifdef __cplusplus } // extern "C" #endif
--- a/src/list.c Fri Nov 07 18:42:06 2025 +0100 +++ b/src/list.c Fri Nov 07 19:13:28 2025 +0100 @@ -993,3 +993,88 @@ return 0; } + +int cxListUnion(CxList *dst, + const CxList *src, const CxList *other, + cx_clone_func clone_func, const CxAllocator *clone_allocator, void *data) { + if (clone_allocator == NULL) clone_allocator = cxDefaultAllocator; + + // optimize for sorted collections + if (cxCollectionSorted(src) && cxCollectionSorted(other)) { + bool dst_was_empty = cxCollectionSize(dst) == 0; + + CxIterator src_iter = cxListIterator(src); + CxIterator other_iter = cxListIterator(other); + while (cxIteratorValid(src_iter) || cxIteratorValid(other_iter)) { + void *src_elem, *other_elem; + int d; + if (!cxIteratorValid(src_iter)) { + other_elem = cxIteratorCurrent(other_iter); + d = 1; + } else if (!cxIteratorValid(other_iter)) { + src_elem = cxIteratorCurrent(src_iter); + d = -1; + } else { + src_elem = cxIteratorCurrent(src_iter); + other_elem = cxIteratorCurrent(other_iter); + d = src->collection.cmpfunc(src_elem, other_elem); + } + if (d <= 0) { + // source element is smaller or equal, clone it + void **dst_mem = cxListEmplace(dst); + void *target = cxCollectionStoresPointers(dst) ? NULL : dst_mem; + void* dst_ptr = clone_func(target, src_elem, clone_allocator, data); + if (dst_ptr == NULL) { + cx_list_pop_uninitialized_elements(dst, 1); + return 1; + } + if (cxCollectionStoresPointers(dst)) { + *dst_mem = dst_ptr; + } + cxIteratorNext(src_iter); + // if the other element was equal, skip it + if (d == 0) { + cxIteratorNext(other_iter); + } + } else { + // the other element is smaller, clone it + void **dst_mem = cxListEmplace(dst); + void *target = cxCollectionStoresPointers(dst) ? NULL : dst_mem; + void* dst_ptr = clone_func(target, other_elem, clone_allocator, data); + if (dst_ptr == NULL) { + cx_list_pop_uninitialized_elements(dst, 1); + return 1; + } + if (cxCollectionStoresPointers(dst)) { + *dst_mem = dst_ptr; + } + cxIteratorNext(other_iter); + } + } + + // if dst was empty, it is now guaranteed to be sorted + dst->collection.sorted = dst_was_empty; + } else { + if (cxListClone(dst, src, clone_func, clone_allocator, data)) { + return 1; + } + CxIterator other_iter = cxListIterator(other); + cx_foreach(void *, elem, other_iter) { + if (cxListContains(src, elem)) { + continue; + } + void **dst_mem = cxListEmplace(dst); + void *target = cxCollectionStoresPointers(dst) ? NULL : dst_mem; + void* dst_ptr = clone_func(target, elem, clone_allocator, data); + if (dst_ptr == NULL) { + cx_list_pop_uninitialized_elements(dst, 1); + return 1; + } + if (cxCollectionStoresPointers(dst)) { + *dst_mem = dst_ptr; + } + } + } + + return 0; +}
--- a/tests/test_list.c Fri Nov 07 18:42:06 2025 +0100 +++ b/tests/test_list.c Fri Nov 07 19:13:28 2025 +0100 @@ -2872,6 +2872,110 @@ } } +static CX_TEST_SUBROUTINE(verify_union, bool sorted, bool alloc_fail) { + CxTestingAllocator talloc; + cx_testing_allocator_init(&talloc); + + CxList *dst = cxLinkedListCreate(cxDefaultAllocator, cx_cmp_int, CX_STORE_POINTERS); + cxDefineAdvancedDestructor(dst, cxFree, &talloc); + CxList *src = cxLinkedListCreate(cxDefaultAllocator, cx_cmp_int, sizeof(int)); + CxList *other = cxLinkedListCreate(cxDefaultAllocator, cx_cmp_int, sizeof(int)); + + int dst_data[] = {47, 178, 176, 83}; + int src_data[] = { + 153, 106, 171, 130, 74, 173, 150, 94, 27, 92, 70, 175, 200, 20, 29, 161, 88, 116, 71, 53, 199, 124, 32, 9, 76, + 151, 33, 51, 37, 65, 176, 49, 12, 162, 28, 85, 4, 177, 198, 54, 109, 188, 44, 77, 194, 63, 41, 129, 97, 83 + }; + int other_data[] = { + 75, 137, 176, 111, 85, 27, 197, 141, 46, 103, 69, 146, 49, 79, 63, 130, 154, 45, 38, 139, 193, 90, 64, 142, 115, + 120, 78, 100, 101, 42, 21, 1, 161, 10, 114, 198, 181, 178, 136, 188, 59, 41, 73, 99, 151, 144, 118, 53, 199, 71 + }; + + for (unsigned i = 0 ; i < cx_nmemb(dst_data) ; i++) { + int *x = cxMalloc(&talloc.base, sizeof(int)); + *x = dst_data[i]; + cxListAdd(dst, x); + } + cxListAddArray(src, src_data, 50); + cxListAddArray(other, other_data, 50); + if (sorted) { + cxListSort(dst); + cxListSort(src); + cxListSort(other); + } + + // the 14 elements from the intersection should not appear twice in the destination: + // 27, 41, 49, 53, 63, 71, 85, 130, 151, 161, 176, 188, 198, 199 + // however, the elements that are already in dst may appear twice + size_t expected_len = 90; + int expected_unsorted[] = { + 47, 178, 176, 83, + 153, 106, 171, 130, 74, 173, 150, 94, 27, 92, 70, 175, 200, 20, 29, 161, + 88, 116, 71, 53, 199, 124, 32, 9, 76, 151, 33, 51, 37, 65, 176, 49, 12, + 162, 28, 85, 4, 177, 198, 54, 109, 188, 44, 77, 194, 63, 41, 129, 97, + 83, 75, 137, 111, 197, 141, 46, 103, 69, 146, 79, 154, 45, 38, 139, 193, + 90, 64, 142, 115, 120, 78, 100, 101, 42, 21, 1, 10, 114, 181, 178, 136, + 59, 73, 99, 144, 118 + }; + int expected_sorted[] = { + 47, 83, 176, 178, + 1, 4, 9, 10, 12, 20, 21, 27, 28, 29, 32, 33, 37, 38, 41, 42, 44, 45, + 46, 49, 51, 53, 54, 59, 63, 64, 65, 69, 70, 71, 73, 74, 75, 76, 77, 78, + 79, 83, 85, 88, 90, 92, 94, 97, 99, 100, 101, 103, 106, 109, 111, 114, + 115, 116, 118, 120, 124, 129, 130, 136, 137, 139, 141, 142, 144, 146, + 150, 151, 153, 154, 161, 162, 171, 173, 175, 176, 177, 178, 181, 188, + 193, 194, 197, 198, 199, 200 + }; + + if (alloc_fail) { + test_clone_func_max_enabled = true; + test_clone_func_max_clones = 66; + expected_len = 70; + } + CxList *expected = cxArrayListCreate(cxDefaultAllocator, cx_cmp_int, sizeof(int), expected_len); + cxListAddArray(expected, sorted ? expected_sorted : expected_unsorted, expected_len); + + int result = cxListUnion(dst, src, other, test_clone_func, &talloc.base, NULL); + if (alloc_fail) { + CX_TEST_ASSERT(result != 0); + } else { + CX_TEST_ASSERT(result == 0); + } + test_clone_func_max_enabled = false; + CX_TEST_ASSERT(expected_len == cxListSize(dst)); + CX_TEST_ASSERT(0 == cxListCompare(dst, expected)); + + cxListFree(dst); + cxListFree(src); + cxListFree(other); + cxListFree(expected); + CX_TEST_ASSERT(cx_testing_allocator_verify(&talloc)); +} + +CX_TEST(test_list_union_unsorted) { + CX_TEST_DO { + CX_TEST_CALL_SUBROUTINE(verify_union, false, false); + } +} + +CX_TEST(test_list_union_sorted) { + CX_TEST_DO { + CX_TEST_CALL_SUBROUTINE(verify_union, true, false); + } +} + +CX_TEST(test_list_union_unsorted_alloc_fail) { + CX_TEST_DO { + CX_TEST_CALL_SUBROUTINE(verify_union, false, true); + } +} + +CX_TEST(test_list_union_sorted_alloc_fail) { + CX_TEST_DO { + CX_TEST_CALL_SUBROUTINE(verify_union, true, true); + } +} + CX_TEST(test_list_pointer_list_supports_null) { CxList *list = cxLinkedListCreate(cxDefaultAllocator, cx_cmp_int, CX_STORE_POINTERS); int x = 47; @@ -3327,6 +3431,10 @@ CxTestSuite *suite = cx_test_suite_new("list collection operations"); // we do not perform the following tests with every combination of list types + cx_test_register(suite, test_list_union_unsorted); + cx_test_register(suite, test_list_union_sorted); + cx_test_register(suite, test_list_union_unsorted_alloc_fail); + cx_test_register(suite, test_list_union_sorted_alloc_fail); cx_test_register(suite, test_list_difference_unsorted); cx_test_register(suite, test_list_difference_sorted); cx_test_register(suite, test_list_difference_unsorted_alloc_fail);