improve cx_strreplacen() - resolves #623

Thu, 17 Apr 2025 21:45:01 +0200

author
Mike Becker <universe@uap-core.de>
date
Thu, 17 Apr 2025 21:45:01 +0200
changeset 1300
fcb149ee60ff
parent 1299
5dfce68057ce
child 1301
f81d8b4f40c4

improve cx_strreplacen() - resolves #623

CHANGELOG file | annotate | diff | comparison | revisions
docs/Writerside/topics/about.md file | annotate | diff | comparison | revisions
src/string.c file | annotate | diff | comparison | revisions
--- a/CHANGELOG	Thu Apr 17 20:48:29 2025 +0200
+++ b/CHANGELOG	Thu Apr 17 21:45:01 2025 +0200
@@ -9,6 +9,7 @@
  * adds CX_PRIstr and CX_SFMT macros for formatting UCX strings
  * changes grow strategy for the mempory pool to reduce reallocations
  * changes grow strategy for CxBuffer, which does now take the page size into account
+ * changes the implementation of cx_strreplacen() for improved efficiency
  * fixes unnecessary allocations in cx_strcat() family of functions
  * fixes errno value after failing cxBufferSeek() to be consistently EINVAL
  * fixes implementation of cxBufferTerminate()
--- a/docs/Writerside/topics/about.md	Thu Apr 17 20:48:29 2025 +0200
+++ b/docs/Writerside/topics/about.md	Thu Apr 17 21:45:01 2025 +0200
@@ -34,8 +34,9 @@
 * adds cxBufferShrink()
 * adds cxTreeSize()
 * adds CX_PRIstr and CX_SFMT macros for formatting UCX strings
-* changes grow strategy for the mempory pool to reduce reallocations
+* changes grow strategy for the memory pool to reduce reallocations
 * changes grow strategy for CxBuffer, which does now take the page size into account
+* changes the implementation of cx_strreplacen() for improved efficiency
 * fixes unnecessary allocations in cx_strcat() family of functions
 * fixes errno value after failing cxBufferSeek() to be consistently EINVAL
 * fixes implementation of cxBufferTerminate()
--- a/src/string.c	Thu Apr 17 20:48:29 2025 +0200
+++ b/src/string.c	Thu Apr 17 21:45:01 2025 +0200
@@ -569,27 +569,6 @@
 #endif
 }
 
-#ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE
-#define CX_STRREPLACE_INDEX_BUFFER_SIZE 64
-#endif
-
-struct cx_strreplace_ibuf {
-    size_t *buf;
-    struct cx_strreplace_ibuf *next;
-    unsigned int len;
-};
-
-static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
-    // remember, the first data is on the stack!
-    buf = buf->next;
-    while (buf) {
-        struct cx_strreplace_ibuf *next = buf->next;
-        free(buf->buf);
-        free(buf);
-        buf = next;
-    }
-}
-
 cxmutstr cx_strreplacen_a(
         const CxAllocator *allocator,
         cxstring str,
@@ -597,108 +576,60 @@
         cxstring replacement,
         size_t replmax
 ) {
+    // special cases
+    if (search.length == 0 || search.length > str.length || replmax == 0) {
+        return cx_strdup_a(allocator, str);
+    }
 
-    if (search.length == 0 || search.length > str.length || replmax == 0)
-        return cx_strdup_a(allocator, str);
+    size_t in_len = str.length;
+    size_t search_len = search.length;
+    size_t repl_len = replacement.length;
 
-    // Compute expected buffer length
-    size_t ibufmax = str.length / search.length;
-    size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
-    if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) {
-        ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE;
+    // first run, count the occurrences
+    // and remember where the first is
+    size_t occurrences = 1;
+    cxstring first = cx_strstr(str, search);
+    if (first.length == 0) {
+        // special case, no replacements
+        return cx_strdup_a(allocator, str);
+    }
+    cxstring tmp = cx_strsubs(first, search_len);
+    while (occurrences < replmax &&
+            (tmp = cx_strstr(tmp, search)).length > 0) {
+        occurrences++;
+        tmp = cx_strsubs(tmp, search_len);
     }
 
-    // First index buffer can be on the stack
-    struct cx_strreplace_ibuf ibuf, *curbuf = &ibuf;
-    size_t ibuf_sbo[CX_STRREPLACE_INDEX_BUFFER_SIZE];
-    ibuf.buf = ibuf_sbo;
-    ibuf.next = NULL;
-    ibuf.len = 0;
+    // calculate necessary memory
+    signed long long diff_len = (signed long long) repl_len - search_len;
+    size_t out_len = in_len + diff_len * occurrences;
+    cxmutstr out = {
+        cxMalloc(allocator, out_len + 1),
+        out_len
+    };
+    if (out.ptr == NULL) return out;
 
-    // Search occurrences
-    cxstring searchstr = str;
-    size_t found = 0;
-    do {
-        cxstring match = cx_strstr(searchstr, search);
-        if (match.length > 0) {
-            // Allocate next buffer in chain, if required
-            if (curbuf->len == ibuflen) {
-                struct cx_strreplace_ibuf *nextbuf =
-                        calloc(1, sizeof(struct cx_strreplace_ibuf));
-                if (!nextbuf) {
-                    cx_strrepl_free_ibuf(&ibuf);
-                    return cx_mutstrn(NULL, 0);
-                }
-                nextbuf->buf = calloc(ibuflen, sizeof(size_t));
-                if (!nextbuf->buf) {
-                    free(nextbuf);
-                    cx_strrepl_free_ibuf(&ibuf);
-                    return cx_mutstrn(NULL, 0);
-                }
-                curbuf->next = nextbuf;
-                curbuf = nextbuf;
-            }
-
-            // Record match index
-            found++;
-            size_t idx = match.ptr - str.ptr;
-            curbuf->buf[curbuf->len++] = idx;
-            searchstr.ptr = match.ptr + search.length;
-            searchstr.length = str.length - idx - search.length;
-        } else {
-            break;
-        }
-    } while (searchstr.length > 0 && found < replmax);
-
-    // Allocate result string
-    cxmutstr result;
-    {
-        long long adjlen = (long long) replacement.length - (long long) search.length;
-        size_t rcount = 0;
-        curbuf = &ibuf;
-        do {
-            rcount += curbuf->len;
-            curbuf = curbuf->next;
-        } while (curbuf);
-        result.length = str.length + rcount * adjlen;
-        result.ptr = cxMalloc(allocator, result.length + 1);
-        if (!result.ptr) {
-            cx_strrepl_free_ibuf(&ibuf);
-            return cx_mutstrn(NULL, 0);
-        }
+    // second run: perform the replacements
+    // but start where we found the first occurrence
+    const char *inp = str.ptr;
+    tmp = first;
+    char *outp = out.ptr;
+    while (occurrences-- > 0 && (tmp = cx_strstr(tmp, search)).length > 0) {
+        size_t copylen = tmp.ptr - inp;
+        memcpy(outp, inp, copylen);
+        outp += copylen;
+        memcpy(outp, replacement.ptr, repl_len);
+        outp += repl_len;
+        inp += copylen + search_len;
+        tmp = cx_strsubs(tmp, search_len);
     }
 
-    // Build result string
-    curbuf = &ibuf;
-    size_t srcidx = 0;
-    char *destptr = result.ptr;
-    do {
-        for (size_t i = 0; i < curbuf->len; i++) {
-            // Copy source part up to next match
-            size_t idx = curbuf->buf[i];
-            size_t srclen = idx - srcidx;
-            if (srclen > 0) {
-                memcpy(destptr, str.ptr + srcidx, srclen);
-                destptr += srclen;
-                srcidx += srclen;
-            }
+    // add the remaining string
+    size_t copylen = in_len - (inp - str.ptr);
+    memcpy(outp, inp, copylen);
+    out.ptr[out_len] = '\0';
 
-            // Copy the replacement and skip the source pattern
-            srcidx += search.length;
-            memcpy(destptr, replacement.ptr, replacement.length);
-            destptr += replacement.length;
-        }
-        curbuf = curbuf->next;
-    } while (curbuf);
-    memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
-
-    // Result is guaranteed to be zero-terminated
-    result.ptr[result.length] = '\0';
-
-    // Free index buffer
-    cx_strrepl_free_ibuf(&ibuf);
-
-    return result;
+    return out;
 }
 
 CxStrtokCtx cx_strtok_(

mercurial