Thu, 17 Apr 2025 21:45:01 +0200
improve cx_strreplacen() - resolves #623
CHANGELOG | file | annotate | diff | comparison | revisions | |
docs/Writerside/topics/about.md | file | annotate | diff | comparison | revisions | |
src/string.c | file | annotate | diff | comparison | revisions |
--- a/CHANGELOG Thu Apr 17 20:48:29 2025 +0200 +++ b/CHANGELOG Thu Apr 17 21:45:01 2025 +0200 @@ -9,6 +9,7 @@ * adds CX_PRIstr and CX_SFMT macros for formatting UCX strings * changes grow strategy for the mempory pool to reduce reallocations * changes grow strategy for CxBuffer, which does now take the page size into account + * changes the implementation of cx_strreplacen() for improved efficiency * fixes unnecessary allocations in cx_strcat() family of functions * fixes errno value after failing cxBufferSeek() to be consistently EINVAL * fixes implementation of cxBufferTerminate()
--- a/docs/Writerside/topics/about.md Thu Apr 17 20:48:29 2025 +0200 +++ b/docs/Writerside/topics/about.md Thu Apr 17 21:45:01 2025 +0200 @@ -34,8 +34,9 @@ * adds cxBufferShrink() * adds cxTreeSize() * adds CX_PRIstr and CX_SFMT macros for formatting UCX strings -* changes grow strategy for the mempory pool to reduce reallocations +* changes grow strategy for the memory pool to reduce reallocations * changes grow strategy for CxBuffer, which does now take the page size into account +* changes the implementation of cx_strreplacen() for improved efficiency * fixes unnecessary allocations in cx_strcat() family of functions * fixes errno value after failing cxBufferSeek() to be consistently EINVAL * fixes implementation of cxBufferTerminate()
--- a/src/string.c Thu Apr 17 20:48:29 2025 +0200 +++ b/src/string.c Thu Apr 17 21:45:01 2025 +0200 @@ -569,27 +569,6 @@ #endif } -#ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE -#define CX_STRREPLACE_INDEX_BUFFER_SIZE 64 -#endif - -struct cx_strreplace_ibuf { - size_t *buf; - struct cx_strreplace_ibuf *next; - unsigned int len; -}; - -static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) { - // remember, the first data is on the stack! - buf = buf->next; - while (buf) { - struct cx_strreplace_ibuf *next = buf->next; - free(buf->buf); - free(buf); - buf = next; - } -} - cxmutstr cx_strreplacen_a( const CxAllocator *allocator, cxstring str, @@ -597,108 +576,60 @@ cxstring replacement, size_t replmax ) { + // special cases + if (search.length == 0 || search.length > str.length || replmax == 0) { + return cx_strdup_a(allocator, str); + } - if (search.length == 0 || search.length > str.length || replmax == 0) - return cx_strdup_a(allocator, str); + size_t in_len = str.length; + size_t search_len = search.length; + size_t repl_len = replacement.length; - // Compute expected buffer length - size_t ibufmax = str.length / search.length; - size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; - if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) { - ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE; + // first run, count the occurrences + // and remember where the first is + size_t occurrences = 1; + cxstring first = cx_strstr(str, search); + if (first.length == 0) { + // special case, no replacements + return cx_strdup_a(allocator, str); + } + cxstring tmp = cx_strsubs(first, search_len); + while (occurrences < replmax && + (tmp = cx_strstr(tmp, search)).length > 0) { + occurrences++; + tmp = cx_strsubs(tmp, search_len); } - // First index buffer can be on the stack - struct cx_strreplace_ibuf ibuf, *curbuf = &ibuf; - size_t ibuf_sbo[CX_STRREPLACE_INDEX_BUFFER_SIZE]; - ibuf.buf = ibuf_sbo; - ibuf.next = NULL; - ibuf.len = 0; + // calculate necessary memory + signed long long diff_len = (signed long long) repl_len - search_len; + size_t out_len = in_len + diff_len * occurrences; + cxmutstr out = { + cxMalloc(allocator, out_len + 1), + out_len + }; + if (out.ptr == NULL) return out; - // Search occurrences - cxstring searchstr = str; - size_t found = 0; - do { - cxstring match = cx_strstr(searchstr, search); - if (match.length > 0) { - // Allocate next buffer in chain, if required - if (curbuf->len == ibuflen) { - struct cx_strreplace_ibuf *nextbuf = - calloc(1, sizeof(struct cx_strreplace_ibuf)); - if (!nextbuf) { - cx_strrepl_free_ibuf(&ibuf); - return cx_mutstrn(NULL, 0); - } - nextbuf->buf = calloc(ibuflen, sizeof(size_t)); - if (!nextbuf->buf) { - free(nextbuf); - cx_strrepl_free_ibuf(&ibuf); - return cx_mutstrn(NULL, 0); - } - curbuf->next = nextbuf; - curbuf = nextbuf; - } - - // Record match index - found++; - size_t idx = match.ptr - str.ptr; - curbuf->buf[curbuf->len++] = idx; - searchstr.ptr = match.ptr + search.length; - searchstr.length = str.length - idx - search.length; - } else { - break; - } - } while (searchstr.length > 0 && found < replmax); - - // Allocate result string - cxmutstr result; - { - long long adjlen = (long long) replacement.length - (long long) search.length; - size_t rcount = 0; - curbuf = &ibuf; - do { - rcount += curbuf->len; - curbuf = curbuf->next; - } while (curbuf); - result.length = str.length + rcount * adjlen; - result.ptr = cxMalloc(allocator, result.length + 1); - if (!result.ptr) { - cx_strrepl_free_ibuf(&ibuf); - return cx_mutstrn(NULL, 0); - } + // second run: perform the replacements + // but start where we found the first occurrence + const char *inp = str.ptr; + tmp = first; + char *outp = out.ptr; + while (occurrences-- > 0 && (tmp = cx_strstr(tmp, search)).length > 0) { + size_t copylen = tmp.ptr - inp; + memcpy(outp, inp, copylen); + outp += copylen; + memcpy(outp, replacement.ptr, repl_len); + outp += repl_len; + inp += copylen + search_len; + tmp = cx_strsubs(tmp, search_len); } - // Build result string - curbuf = &ibuf; - size_t srcidx = 0; - char *destptr = result.ptr; - do { - for (size_t i = 0; i < curbuf->len; i++) { - // Copy source part up to next match - size_t idx = curbuf->buf[i]; - size_t srclen = idx - srcidx; - if (srclen > 0) { - memcpy(destptr, str.ptr + srcidx, srclen); - destptr += srclen; - srcidx += srclen; - } + // add the remaining string + size_t copylen = in_len - (inp - str.ptr); + memcpy(outp, inp, copylen); + out.ptr[out_len] = '\0'; - // Copy the replacement and skip the source pattern - srcidx += search.length; - memcpy(destptr, replacement.ptr, replacement.length); - destptr += replacement.length; - } - curbuf = curbuf->next; - } while (curbuf); - memcpy(destptr, str.ptr + srcidx, str.length - srcidx); - - // Result is guaranteed to be zero-terminated - result.ptr[result.length] = '\0'; - - // Free index buffer - cx_strrepl_free_ibuf(&ibuf); - - return result; + return out; } CxStrtokCtx cx_strtok_(