implement cxListUnion() - resolves #755

Fri, 07 Nov 2025 19:13:28 +0100

author
Mike Becker <universe@uap-core.de>
date
Fri, 07 Nov 2025 19:13:28 +0100
changeset 1477
9170a7dff573
parent 1476
79d4c281a63b
child 1478
bba2e5efdca0

implement cxListUnion() - resolves #755

CHANGELOG file | annotate | diff | comparison | revisions
docs/Writerside/topics/about.md file | annotate | diff | comparison | revisions
docs/Writerside/topics/list.h.md file | annotate | diff | comparison | revisions
src/cx/list.h file | annotate | diff | comparison | revisions
src/list.c file | annotate | diff | comparison | revisions
tests/test_list.c file | annotate | diff | comparison | revisions
--- a/CHANGELOG	Fri Nov 07 18:42:06 2025 +0100
+++ b/CHANGELOG	Fri Nov 07 19:13:28 2025 +0100
@@ -7,6 +7,7 @@
  + adds support for integer keys to CxHashKey
  * adds support for comparing arbitrary strings without explicit call to cx_strcast()
  * adds cxListClone() and cxMapClone()
+ * adds cxListUnion(), and cxMapUnion()
  * adds cxListDifference(), cxMapDifference(), and cxMapListDifference()
  * adds cxListIntersection(), cxMapIntersection(), and cxMapListIntersection()
  * adds cxListContains() and cxMapContains()
--- a/docs/Writerside/topics/about.md	Fri Nov 07 18:42:06 2025 +0100
+++ b/docs/Writerside/topics/about.md	Fri Nov 07 19:13:28 2025 +0100
@@ -34,6 +34,7 @@
 * adds support for integer keys to CxHashKey
 * adds support for comparing arbitrary strings without explicit call to cx_strcast()
 * adds cxListClone() and cxMapClone()
+* adds cxListUnion(), and cxMapUnion()
 * adds cxListDifference(), cxMapDifference(), and cxMapListDifference()
 * adds cxListIntersection(), cxMapIntersection(), and cxMapListIntersection()
 * adds cxListContains() and cxMapContains()
--- a/docs/Writerside/topics/list.h.md	Fri Nov 07 18:42:06 2025 +0100
+++ b/docs/Writerside/topics/list.h.md	Fri Nov 07 19:13:28 2025 +0100
@@ -380,6 +380,12 @@
         cx_clone_func clone_func,
         const CxAllocator *clone_allocator,
         void *data);
+        
+int cxListUnion(CxList *dst,
+        const CxList *src,  const CxList *other,
+        cx_clone_func clone_func,
+        const CxAllocator *clone_allocator,
+        void *data);
 ```
 
 With `cxListClone()` you can create deep copies of the elements in a list and insert them into another list.
@@ -387,10 +393,17 @@
 Depending on the concrete list implementation, `cxListClone()` tries to allocate enough memory up-front, before trying
 to insert anything.
 
-The function `cxListDifference()` is similar to `cxListClone()`,
-except that it only clones elements from the minuend that are _not_ contained in the subtrahend,
-while `cxListIntersection()` only clones elements that _are_ contained in both lists.
-Both functions are optimized for sorted lists, in which case they will take linear time instead of quadratic time for the operation.
+The function `cxListDifference()` clones elements from the `minuend` that are _not_ contained in the `subtrahend`,
+while `cxListIntersection()` only clones elements from the `src` that are _also_ contained in the `other` list.
+And `cxListUnion()` clones elements from `src` first, and from `other` only when they are _not_ contained in `src`.
+
+All three functions, for union, difference, and intersection, are optimized for sorted lists.
+In that case they will take linear time instead of quadratic time for the operation.
+Also, `cxListUnion()` makes sure that the elements from `src` and `other` are cloned interleaving,
+so that the result of the union is still sorted.
+
+However, when the `dst` list already contains elements, all functions will only append the result of the operation to that list.
+That means, the `dst` list is only guaranteed to be sorted, when it was empty and the input lists are all sorted.
 
 Refer to the documentation of the [clone-function callback](allocator.h.md#clone-function) to learn how to implement it.
 
--- a/src/cx/list.h	Fri Nov 07 18:42:06 2025 +0100
+++ b/src/cx/list.h	Fri Nov 07 19:13:28 2025 +0100
@@ -978,6 +978,7 @@
  * @param data optional additional data that is passed to the clone function
  * @retval zero when all elements were successfully cloned
  * @retval non-zero when an allocation error occurred
+ * @see cxListUnion()
  */
 cx_attr_nonnull_arg(1, 2, 3)
 CX_EXPORT int cxListClone(CxList *dst, const CxList *src,
@@ -1009,8 +1010,8 @@
 /**
  * Clones elements from a list only if they are also present in another list.
  *
- * This function is optimized for the case when both the @p minuend and the
- * @p subtrahend are sorted.
+ * This function is optimized for the case when both the @p src and the
+ * @p other list are sorted.
  *
  * If the destination list already contains elements, the intersection is appended
  * to that list.
@@ -1028,6 +1029,32 @@
 CX_EXPORT int cxListIntersection(CxList *dst, const CxList *src, const CxList *other,
         cx_clone_func clone_func, const CxAllocator *clone_allocator, void *data);
 
+/**
+ * Performs a deep clone of one list into another, skipping duplicates.
+ *
+ * This function is optimized for the case when both the @p src and the
+ * @p other list are sorted.
+ * In that case, the union will also be sorted.
+ *
+ * If the destination list already contains elements, the union is appended
+ * to that list. In that case the destination is not necessarily sorted.
+ *
+ * @param dst the destination list
+ * @param src the primary source list
+ * @param other the other list, where elements are only cloned from
+ * when they are not in @p src
+ * @param clone_func the clone function for the elements
+ * @param clone_allocator the allocator that is passed to the clone function
+ * @param data optional additional data that is passed to the clone function
+ * @retval zero when the elements were successfully cloned
+ * @retval non-zero when an allocation error occurred
+ * @see cxListClone()
+ */
+cx_attr_nonnull_arg(1, 2, 3, 4)
+CX_EXPORT int cxListUnion(CxList *dst, const CxList *src, const CxList *other,
+        cx_clone_func clone_func, const CxAllocator *clone_allocator, void *data);
+
+
 #ifdef __cplusplus
 } // extern "C"
 #endif
--- a/src/list.c	Fri Nov 07 18:42:06 2025 +0100
+++ b/src/list.c	Fri Nov 07 19:13:28 2025 +0100
@@ -993,3 +993,88 @@
 
     return 0;
 }
+
+int cxListUnion(CxList *dst,
+        const CxList *src, const CxList *other,
+        cx_clone_func clone_func, const CxAllocator *clone_allocator, void *data) {
+    if (clone_allocator == NULL) clone_allocator = cxDefaultAllocator;
+
+    // optimize for sorted collections
+    if (cxCollectionSorted(src) && cxCollectionSorted(other)) {
+        bool dst_was_empty = cxCollectionSize(dst) == 0;
+
+        CxIterator src_iter = cxListIterator(src);
+        CxIterator other_iter = cxListIterator(other);
+        while (cxIteratorValid(src_iter) || cxIteratorValid(other_iter)) {
+            void *src_elem, *other_elem;
+            int d;
+            if (!cxIteratorValid(src_iter)) {
+                other_elem = cxIteratorCurrent(other_iter);
+                d = 1;
+            } else if (!cxIteratorValid(other_iter)) {
+                src_elem = cxIteratorCurrent(src_iter);
+                d = -1;
+            } else {
+                src_elem = cxIteratorCurrent(src_iter);
+                other_elem = cxIteratorCurrent(other_iter);
+                d = src->collection.cmpfunc(src_elem, other_elem);
+            }
+            if (d <= 0) {
+                // source element is smaller or equal, clone it
+                void **dst_mem = cxListEmplace(dst);
+                void *target = cxCollectionStoresPointers(dst) ? NULL : dst_mem;
+                void* dst_ptr = clone_func(target, src_elem, clone_allocator, data);
+                if (dst_ptr == NULL) {
+                    cx_list_pop_uninitialized_elements(dst, 1);
+                    return 1;
+                }
+                if (cxCollectionStoresPointers(dst)) {
+                    *dst_mem = dst_ptr;
+                }
+                cxIteratorNext(src_iter);
+                // if the other element was equal, skip it
+                if (d == 0) {
+                    cxIteratorNext(other_iter);
+                }
+            } else {
+                // the other element is smaller, clone it
+                void **dst_mem = cxListEmplace(dst);
+                void *target = cxCollectionStoresPointers(dst) ? NULL : dst_mem;
+                void* dst_ptr = clone_func(target, other_elem, clone_allocator, data);
+                if (dst_ptr == NULL) {
+                    cx_list_pop_uninitialized_elements(dst, 1);
+                    return 1;
+                }
+                if (cxCollectionStoresPointers(dst)) {
+                    *dst_mem = dst_ptr;
+                }
+                cxIteratorNext(other_iter);
+            }
+        }
+
+        // if dst was empty, it is now guaranteed to be sorted
+        dst->collection.sorted = dst_was_empty;
+    } else {
+        if (cxListClone(dst, src, clone_func, clone_allocator, data)) {
+            return 1;
+        }
+        CxIterator other_iter = cxListIterator(other);
+        cx_foreach(void *, elem, other_iter) {
+            if (cxListContains(src, elem)) {
+                continue;
+            }
+            void **dst_mem = cxListEmplace(dst);
+            void *target = cxCollectionStoresPointers(dst) ? NULL : dst_mem;
+            void* dst_ptr = clone_func(target, elem, clone_allocator, data);
+            if (dst_ptr == NULL) {
+                cx_list_pop_uninitialized_elements(dst, 1);
+                return 1;
+            }
+            if (cxCollectionStoresPointers(dst)) {
+                *dst_mem = dst_ptr;
+            }
+        }
+    }
+
+    return 0;
+}
--- a/tests/test_list.c	Fri Nov 07 18:42:06 2025 +0100
+++ b/tests/test_list.c	Fri Nov 07 19:13:28 2025 +0100
@@ -2872,6 +2872,110 @@
     }
 }
 
+static CX_TEST_SUBROUTINE(verify_union, bool sorted, bool alloc_fail) {
+    CxTestingAllocator talloc;
+    cx_testing_allocator_init(&talloc);
+
+    CxList *dst = cxLinkedListCreate(cxDefaultAllocator, cx_cmp_int, CX_STORE_POINTERS);
+    cxDefineAdvancedDestructor(dst, cxFree, &talloc);
+    CxList *src = cxLinkedListCreate(cxDefaultAllocator, cx_cmp_int, sizeof(int));
+    CxList *other = cxLinkedListCreate(cxDefaultAllocator, cx_cmp_int, sizeof(int));
+
+    int dst_data[] = {47, 178, 176, 83};
+    int src_data[] = {
+        153, 106, 171, 130, 74, 173, 150, 94, 27, 92, 70, 175, 200, 20, 29, 161, 88, 116, 71, 53, 199, 124, 32, 9, 76,
+        151, 33, 51, 37, 65, 176, 49, 12, 162, 28, 85, 4, 177, 198, 54, 109, 188, 44, 77, 194, 63, 41, 129, 97, 83
+    };
+    int other_data[] = {
+        75, 137, 176, 111, 85, 27, 197, 141, 46, 103, 69, 146, 49, 79, 63, 130, 154, 45, 38, 139, 193, 90, 64, 142, 115,
+        120, 78, 100, 101, 42, 21, 1, 161, 10, 114, 198, 181, 178, 136, 188, 59, 41, 73, 99, 151, 144, 118, 53, 199, 71
+    };
+
+    for (unsigned i = 0 ; i < cx_nmemb(dst_data) ; i++) {
+        int *x = cxMalloc(&talloc.base, sizeof(int));
+        *x = dst_data[i];
+        cxListAdd(dst, x);
+    }
+    cxListAddArray(src, src_data, 50);
+    cxListAddArray(other, other_data, 50);
+    if (sorted) {
+        cxListSort(dst);
+        cxListSort(src);
+        cxListSort(other);
+    }
+
+    // the 14 elements from the intersection should not appear twice in the destination:
+    // 27, 41, 49, 53, 63, 71, 85, 130, 151, 161, 176, 188, 198, 199
+    // however, the elements that are already in dst may appear twice
+    size_t expected_len = 90;
+    int expected_unsorted[] = {
+        47, 178, 176, 83,
+        153, 106, 171, 130, 74, 173, 150, 94, 27, 92, 70, 175, 200, 20, 29, 161,
+        88, 116, 71, 53, 199, 124, 32, 9, 76, 151, 33, 51, 37, 65, 176, 49, 12,
+        162, 28, 85, 4, 177, 198, 54, 109, 188, 44, 77, 194, 63, 41, 129, 97,
+        83, 75, 137, 111, 197, 141, 46, 103, 69, 146, 79, 154, 45, 38, 139, 193,
+        90, 64, 142, 115, 120, 78, 100, 101, 42, 21, 1, 10, 114, 181, 178, 136,
+        59, 73, 99, 144, 118
+    };
+    int expected_sorted[] = {
+        47, 83, 176, 178,
+        1, 4, 9, 10, 12, 20, 21, 27, 28, 29, 32, 33, 37, 38, 41, 42, 44, 45,
+        46, 49, 51, 53, 54, 59, 63, 64, 65, 69, 70, 71, 73, 74, 75, 76, 77, 78,
+        79, 83, 85, 88, 90, 92, 94, 97, 99, 100, 101, 103, 106, 109, 111, 114,
+        115, 116, 118, 120, 124, 129, 130, 136, 137, 139, 141, 142, 144, 146,
+        150, 151, 153, 154, 161, 162, 171, 173, 175, 176, 177, 178, 181, 188,
+        193, 194, 197, 198, 199, 200
+    };
+
+    if (alloc_fail) {
+        test_clone_func_max_enabled = true;
+        test_clone_func_max_clones = 66;
+        expected_len = 70;
+    }
+    CxList *expected = cxArrayListCreate(cxDefaultAllocator, cx_cmp_int, sizeof(int), expected_len);
+    cxListAddArray(expected, sorted ? expected_sorted : expected_unsorted, expected_len);
+
+    int result = cxListUnion(dst, src, other, test_clone_func, &talloc.base, NULL);
+    if (alloc_fail) {
+        CX_TEST_ASSERT(result != 0);
+    } else {
+        CX_TEST_ASSERT(result == 0);
+    }
+    test_clone_func_max_enabled = false;
+    CX_TEST_ASSERT(expected_len == cxListSize(dst));
+    CX_TEST_ASSERT(0 == cxListCompare(dst, expected));
+
+    cxListFree(dst);
+    cxListFree(src);
+    cxListFree(other);
+    cxListFree(expected);
+    CX_TEST_ASSERT(cx_testing_allocator_verify(&talloc));
+}
+
+CX_TEST(test_list_union_unsorted) {
+    CX_TEST_DO {
+        CX_TEST_CALL_SUBROUTINE(verify_union, false, false);
+    }
+}
+
+CX_TEST(test_list_union_sorted) {
+    CX_TEST_DO {
+        CX_TEST_CALL_SUBROUTINE(verify_union, true, false);
+    }
+}
+
+CX_TEST(test_list_union_unsorted_alloc_fail) {
+    CX_TEST_DO {
+        CX_TEST_CALL_SUBROUTINE(verify_union, false, true);
+    }
+}
+
+CX_TEST(test_list_union_sorted_alloc_fail) {
+    CX_TEST_DO {
+        CX_TEST_CALL_SUBROUTINE(verify_union, true, true);
+    }
+}
+
 CX_TEST(test_list_pointer_list_supports_null) {
     CxList *list = cxLinkedListCreate(cxDefaultAllocator, cx_cmp_int, CX_STORE_POINTERS);
     int x = 47;
@@ -3327,6 +3431,10 @@
     CxTestSuite *suite = cx_test_suite_new("list collection operations");
 
     // we do not perform the following tests with every combination of list types
+    cx_test_register(suite, test_list_union_unsorted);
+    cx_test_register(suite, test_list_union_sorted);
+    cx_test_register(suite, test_list_union_unsorted_alloc_fail);
+    cx_test_register(suite, test_list_union_sorted_alloc_fail);
     cx_test_register(suite, test_list_difference_unsorted);
     cx_test_register(suite, test_list_difference_sorted);
     cx_test_register(suite, test_list_difference_unsorted_alloc_fail);

mercurial