Sun, 29 Dec 2019 11:29:17 +0100
adds string replace functions
src/string.c | file | annotate | diff | comparison | revisions | |
src/ucx/string.h | file | annotate | diff | comparison | revisions | |
test/main.c | file | annotate | diff | comparison | revisions | |
test/string_tests.c | file | annotate | diff | comparison | revisions | |
test/string_tests.h | file | annotate | diff | comparison | revisions |
--- a/src/string.c Fri Dec 27 11:48:55 2019 +0100 +++ b/src/string.c Sun Dec 29 11:29:17 2019 +0100 @@ -662,6 +662,130 @@ return ret; } +#define REPLACE_INDEX_BUFFER_MAX 100 + +struct scstrreplace_ibuf { + size_t* buf; + unsigned int len; /* small indices */ + struct scstrreplace_ibuf* next; +}; + +static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) { + while (buf) { + struct scstrreplace_ibuf *next = buf->next; + free(buf->buf); + free(buf); + buf = next; + } +} + +sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str, + scstr_t pattern, scstr_t replacement, size_t replmax) { + + if (pattern.length == 0 || pattern.length > str.length) + return sstrdup(str); + + /* Compute expected buffer length */ + size_t ibufmax = str.length / pattern.length; + size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; + if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { + ibuflen = REPLACE_INDEX_BUFFER_MAX; + } + + /* Allocate first index buffer */ + struct scstrreplace_ibuf *firstbuf, *curbuf; + firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf)); + if (!firstbuf) return sstrn(NULL, 0); + firstbuf->buf = calloc(ibuflen, sizeof(size_t)); + if (!firstbuf->buf) { + free(firstbuf); + return sstrn(NULL, 0); + } + + /* Search occurrences */ + scstr_t searchstr = str; + size_t found = 0; + do { + scstr_t match = scstrscstr(searchstr, pattern); + if (match.length > 0) { + /* Allocate next buffer in chain, if required */ + if (curbuf->len == ibuflen) { + struct scstrreplace_ibuf *nextbuf = + calloc(1, sizeof(struct scstrreplace_ibuf)); + if (!nextbuf) return sstrn(NULL, 0); + nextbuf->buf = calloc(ibuflen, sizeof(size_t)); + if (!nextbuf->buf) { + free(nextbuf); + scstrrepl_free_ibuf(firstbuf); + return sstrn(NULL, 0); + } + curbuf->next = nextbuf; + curbuf = nextbuf; + } + + /* Record match index */ + found++; + size_t idx = match.ptr - str.ptr; + curbuf->buf[curbuf->len++] = idx; + searchstr.ptr = match.ptr + pattern.length; + searchstr.length = str.length - idx - pattern.length; + } else { + break; + } + } while (searchstr.length > 0 && found < replmax); + + /* Allocate result string */ + sstr_t result; + { + ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; + size_t rcount = 0; + curbuf = firstbuf; + do { + rcount += curbuf->len; + curbuf = curbuf->next; + } while (curbuf); + result.length = str.length + rcount * adjlen; + result.ptr = almalloc(allocator, result.length); + if (!result.ptr) { + scstrrepl_free_ibuf(firstbuf); + return sstrn(NULL, 0); + } + } + + /* Build result string */ + curbuf = firstbuf; + size_t srcidx = 0; + char* destptr = result.ptr; + do { + for (size_t i = 0; i < curbuf->len; i++) { + /* Copy source part up to next match*/ + size_t idx = curbuf->buf[i]; + size_t srclen = idx - srcidx; + if (srclen > 0) { + memcpy(destptr, str.ptr+srcidx, srclen); + destptr += srclen; + srcidx += srclen; + } + + /* Copy the replacement and skip the source pattern */ + srcidx += pattern.length; + memcpy(destptr, replacement.ptr, replacement.length); + destptr += replacement.length; + } + curbuf = curbuf->next; + } while (curbuf); + memcpy(destptr, str.ptr+srcidx, str.length-srcidx); + + return result; +} + +sstr_t scstrreplacen(scstr_t str, scstr_t pattern, + scstr_t replacement, size_t replmax) { + return scstrreplacen_a(ucx_default_allocator(), + str, pattern, replacement, replmax); +} + + // type adjustment functions scstr_t ucx_sc2sc(scstr_t str) { return str;
--- a/src/ucx/string.h Fri Dec 27 11:48:55 2019 +0100 +++ b/src/ucx/string.h Sun Dec 29 11:29:17 2019 +0100 @@ -1072,6 +1072,128 @@ */ #define sstrupper_a(allocator, string) scstrupper_a(allocator, string) + +/** + * Replaces a pattern in a string with another string. + * + * The pattern is taken literally and is no regular expression. + * Replaces at most <code>replmax</code> occurrences. + * + * The resulting string is allocated by the specified allocator. I.e. it + * depends on the used allocator, whether the sstr_t.ptr must be freed + * manually. + * + * If allocation fails, the sstr_t.ptr of the return value is NULL. + * + * @param allocator the allocator to use + * @param str the string where replacements should be applied + * @param pattern the pattern to search for + * @param replacement the replacement string + * @param replmax maximum number of replacements + * @return the resulting string after applying the replacements + */ +sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str, + scstr_t pattern, scstr_t replacement, size_t replmax); + +/** + * Replaces a pattern in a string with another string. + * + * The pattern is taken literally and is no regular expression. + * Replaces at most <code>replmax</code> occurrences. + * + * The sstr_t.ptr of the resulting string must be freed manually. + * + * If allocation fails, the sstr_t.ptr of the return value is NULL. + * + * @param str the string where replacements should be applied + * @param pattern the pattern to search for + * @param replacement the replacement string + * @param replmax maximum number of replacements + * @return the resulting string after applying the replacements + */ +sstr_t scstrreplacen(scstr_t str, scstr_t pattern, + scstr_t replacement, size_t replmax); + +/** + * Replaces a pattern in a string with another string. + * + * The pattern is taken literally and is no regular expression. + * Replaces at most <code>replmax</code> occurrences. + * + * The resulting string is allocated by the specified allocator. I.e. it + * depends on the used allocator, whether the sstr_t.ptr must be freed + * manually. + * + * @param allocator the allocator to use + * @param str the string where replacements should be applied + * @param pattern the pattern to search for + * @param replacement the replacement string + * @param replmax maximum number of replacements + * @return the resulting string after applying the replacements + */ +#define sstrreplacen_a(allocator, str, pattern, replacement, replmax) \ + scstrreplacen_a(allocator, SCSTR(str), SCSTR(pattern), \ + SCSTR(replacement), replmax) + +/** + * Replaces a pattern in a string with another string. + * + * The pattern is taken literally and is no regular expression. + * Replaces at most <code>replmax</code> occurrences. + * + * The sstr_t.ptr of the resulting string must be freed manually. + * + * If allocation fails, the sstr_t.ptr of the return value is NULL. + * + * @param str the string where replacements should be applied + * @param pattern the pattern to search for + * @param replacement the replacement string + * @param replmax maximum number of replacements + * @return the resulting string after applying the replacements + */ +#define sstrreplacen(str, pattern, replacement, replmax) \ + scstrreplacen(SCSTR(str), SCSTR(pattern), SCSTR(replacement), replmax) + +/** + * Replaces a pattern in a string with another string. + * + * The pattern is taken literally and is no regular expression. + * Replaces at most <code>replmax</code> occurrences. + * + * The resulting string is allocated by the specified allocator. I.e. it + * depends on the used allocator, whether the sstr_t.ptr must be freed + * manually. + * + * If allocation fails, the sstr_t.ptr of the return value is NULL. + * + * @param allocator the allocator to use + * @param str the string where replacements should be applied + * @param pattern the pattern to search for + * @param replacement the replacement string + * @return the resulting string after applying the replacements + */ +#define sstrreplace_a(allocator, str, pattern, replacement) \ + scstrreplacen_a(allocator, SCSTR(str), SCSTR(pattern), \ + SCSTR(replacement), SIZE_MAX) + +/** + * Replaces a pattern in a string with another string. + * + * The pattern is taken literally and is no regular expression. + * Replaces at most <code>replmax</code> occurrences. + * + * The sstr_t.ptr of the resulting string must be freed manually. + * + * If allocation fails, the sstr_t.ptr of the return value is NULL. + * + * @param str the string where replacements should be applied + * @param pattern the pattern to search for + * @param replacement the replacement string + * @return the resulting string after applying the replacements + */ +#define sstrreplace(str, pattern, replacement) \ + scstrreplacen(SCSTR(str), SCSTR(pattern), SCSTR(replacement), SIZE_MAX) + #ifdef __cplusplus } #endif
--- a/test/main.c Fri Dec 27 11:48:55 2019 +0100 +++ b/test/main.c Sun Dec 29 11:29:17 2019 +0100 @@ -138,6 +138,7 @@ ucx_test_register(suite, test_sstrtrim); ucx_test_register(suite, test_sstrprefixsuffix); ucx_test_register(suite, test_sstrcaseprefixsuffix); + ucx_test_register(suite, test_sstrreplace); /* UcxLogger Tests */ ucx_test_register(suite, test_ucx_logger_new);
--- a/test/string_tests.c Fri Dec 27 11:48:55 2019 +0100 +++ b/test/string_tests.c Sun Dec 29 11:29:17 2019 +0100 @@ -476,3 +476,41 @@ UCX_TEST_END } + +UCX_TEST(test_sstrreplace) { + + sstr_t str = ST("test ababab string aba"); + sstr_t longstr = ST("xyaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaacd"); + sstr_t notrail = ST("test abab"); + + sstr_t repl = sstrreplace(str, SC("abab"), SC("muchlonger")); + sstr_t expected = ST("test muchlongerab string aba"); + + sstr_t repln = sstrreplacen(str, SC("ab"), SC("c"), 2); + sstr_t expectedn = ST("test ccab string aba"); + + sstr_t longrepl = sstrreplace(longstr, SC("a"), SC("z")); + sstr_t longexpect = ST("xyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzcd"); + + sstr_t replnotrail = sstrreplace(notrail, SC("ab"), SC("z")); + sstr_t notrailexpect = ST("test zz"); + + UCX_TEST_BEGIN + + UCX_TEST_ASSERT(repl.ptr != str.ptr, "result string is not fresh"); + UCX_TEST_ASSERT(!sstrcmp(repl, expected), "incorrect replacement"); + + UCX_TEST_ASSERT(repln.ptr != str.ptr, "result string is not fresh"); + UCX_TEST_ASSERT(!sstrcmp(repln, expectedn), "incorrect replacement"); + + UCX_TEST_ASSERT(!sstrcmp(longrepl, longexpect), + "incorrect handling of long strings"); + + UCX_TEST_ASSERT(!sstrcmp(replnotrail, notrailexpect), + "no trail replacement fails"); + + UCX_TEST_END + + free(repl.ptr); + free(repln.ptr); +} \ No newline at end of file