Mercurial > hg > ucx / changeset

--- a/src/cx/string.h	Sat Sep 03 15:11:23 2022 +0200
+++ b/src/cx/string.h	Fri Sep 09 20:19:08 2022 +0200
@@ -78,6 +78,15 @@
  */
 typedef struct cx_string_s cxstring;

+/**
+ * A literal initializer for an UCX string structure.
+ *
+ * The argument MUST be a string (const char*) \em literal.
+ *
+ * @param literal the string literal
+ */
+#define CX_STR(literal) {literal, sizeof(literal) - 1}
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -190,9 +199,28 @@
  *
  * @param str the string to free
  */
+__attribute__((__nonnull__))
 void cx_strfree(cxmutstr *str);

 /**
+ * Passes the pointer in this string to the allocators free function.
+ *
+ * The pointer in the struct is set to \c NULL and the length is set to zero.
+ *
+ * \note There is no implementation for cxstring, because it is unlikely that
+ * you ever have a \c char \c const* you are really supposed to free. If you
+ * encounter such situation, you should double-check your code.
+ *
+ * @param alloc the allocator
+ * @param str the string to free
+ */
+__attribute__((__nonnull__))
+void cx_strfree_a(
+        CxAllocator *alloc,
+        cxmutstr *str
+);
+
+/**
  * Returns the accumulated length of all specified strings.
  *
  * \attention if the count argument is larger than the number of the
@@ -720,7 +748,7 @@
  * The returned string will be allocated by \p allocator.
  *
  * If allocation fails, or the input string is empty,
- * the returned string will point to \c NULL.
+ * the returned string will be empty.
  *
  * @param allocator the allocator to use
  * @param str the string where replacements should be applied
@@ -730,7 +758,7 @@
  * @return the resulting string after applying the replacements
  */
 __attribute__((__warn_unused_result__, __nonnull__))
-cxmutstr cx_strreplace_a(
+cxmutstr cx_strreplacen_a(
         CxAllocator *allocator,
         cxstring str,
         cxstring pattern,
@@ -748,7 +776,7 @@
  * to cx_strfree() eventually.
  *
  * If allocation fails, or the input string is empty,
- * the returned string will point to \c NULL.
+ * the returned string will be empty.
  *
  * @param str the string where replacements should be applied
  * @param pattern the pattern to search for
@@ -756,8 +784,47 @@
  * @param replmax maximum number of replacements
  * @return the resulting string after applying the replacements
  */
-#define cx_strreplace(str, pattern, replacement, replmax) \
-cx_strreplace_a(cxDefaultAllocator, str, pattern, replacement, replmax)
+#define cx_strreplacen(str, pattern, replacement, replmax) \
+cx_strreplacen_a(cxDefaultAllocator, str, pattern, replacement, replmax)
+
+/**
+ * Replaces a pattern in a string with another string.
+ *
+ * The pattern is taken literally and is no regular expression.
+ *
+ * The returned string will be allocated by \p allocator.
+ *
+ * If allocation fails, or the input string is empty,
+ * the returned string will be empty.
+ *
+ * @param allocator the allocator to use
+ * @param str the string where replacements should be applied
+ * @param pattern the pattern to search for
+ * @param replacement the replacement string
+ * @return the resulting string after applying the replacements
+ */
+#define cx_strreplace_a(allocator, str, pattern, replacement) \
+cx_strreplacen_a(allocator, str, pattern, replacement, SIZE_MAX)
+
+/**
+ * Replaces a pattern in a string with another string.
+ *
+ * The pattern is taken literally and is no regular expression.
+ * Replaces at most \p replmax occurrences.
+ *
+ * The returned string will be allocated by \c malloc() and \em must be passed
+ * to cx_strfree() eventually.
+ *
+ * If allocation fails, or the input string is empty,
+ * the returned string will be empty.
+ *
+ * @param str the string where replacements should be applied
+ * @param pattern the pattern to search for
+ * @param replacement the replacement string
+ * @return the resulting string after applying the replacements
+ */
+#define cx_strreplace(str, pattern, replacement) \
+cx_strreplacen_a(cxDefaultAllocator, str, pattern, replacement, SIZE_MAX)

 #ifdef __cplusplus
 } // extern "C"
--- a/src/string.c	Sat Sep 03 15:11:23 2022 +0200
+++ b/src/string.c	Fri Sep 09 20:19:08 2022 +0200
@@ -72,6 +72,15 @@
     str->length = 0;
 }

+void cx_strfree_a(
+        CxAllocator *alloc,
+        cxmutstr *str
+) {
+    cxFree(alloc, str->ptr);
+    str->ptr = NULL;
+    str->length = 0;
+}
+
 size_t cx_strlen(
         size_t count,
         ...
@@ -235,6 +244,11 @@
         return haystack;
     }

+    /* optimize for single-char needles */
+    if (needle.length == 1) {
+        return cx_strchr(haystack, *needle.ptr);
+    }
+
     /*
      * IMPORTANT:
      * Our prefix table contains the prefix length PLUS ONE
@@ -308,8 +322,55 @@
         size_t limit,
         cxstring *output
 ) {
-    // TODO: implement
-    return 0;
+    /* special case: output limit is zero */
+    if (limit == 0) return 0;
+
+    /* special case: delimiter is empty */
+    if (delim.length == 0) {
+        output[0] = string;
+        return 1;
+    }
+
+    /* special cases: delimiter is at least as large as the string */
+    if (delim.length >= string.length) {
+        /* exact match */
+        if (cx_strcmp(string, delim) == 0) {
+            output[0] = cx_strn(string.ptr, 0);
+            output[1] = cx_strn(string.ptr + string.length, 0);
+            return 2;
+        } else /* no match possible */ {
+            output[0] = string;
+            return 1;
+        }
+    }
+
+    size_t n = 0;
+    cxstring curpos = string;
+    while (1) {
+        ++n;
+        cxstring match = cx_strstr(curpos, delim);
+        if (match.length > 0) {
+            /* is the limit reached? */
+            if (n < limit) {
+                /* copy the current string to the array */
+                cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
+                output[n - 1] = item;
+                size_t processed = item.length + delim.length;
+                curpos.ptr += processed;
+                curpos.length -= processed;
+            } else {
+                /* limit reached, copy the _full_ remaining string */
+                output[n - 1] = curpos;
+                break;
+            }
+        } else {
+            /* no more matches, copy last string */
+            output[n - 1] = curpos;
+            break;
+        }
+    }
+
+    return n;
 }

 size_t cx_strsplit_a(
@@ -319,8 +380,29 @@
         size_t limit,
         cxstring **output
 ) {
-    // TODO: implement
-    return 0;
+    /* find out how many splits we're going to make and allocate memory */
+    size_t n = 0;
+    cxstring curpos = string;
+    while (1) {
+        ++n;
+        cxstring match = cx_strstr(curpos, delim);
+        if (match.length > 0) {
+            /* is the limit reached? */
+            if (n < limit) {
+                size_t processed = match.ptr - curpos.ptr + delim.length;
+                curpos.ptr += processed;
+                curpos.length -= processed;
+            } else {
+                /* limit reached */
+                break;
+            }
+        } else {
+            /* no more matches */
+            break;
+        }
+    }
+    *output = cxCalloc(allocator, n, sizeof(cxstring));
+    return cx_strsplit(string, delim, n, *output);
 }

 size_t cx_strsplit_m(
@@ -344,7 +426,10 @@
                          delim, limit, (cxstring **) output);
 }

-int cx_strcmp(cxstring s1, cxstring s2) {
+int cx_strcmp(
+        cxstring s1,
+        cxstring s2
+) {
     if (s1.length == s2.length) {
         return memcmp(s1.ptr, s2.ptr, s1.length);
     } else if (s1.length > s2.length) {
@@ -354,7 +439,10 @@
     }
 }

-int cx_strcasecmp(cxstring s1, cxstring s2) {
+int cx_strcasecmp(
+        cxstring s1,
+        cxstring s2
+) {
     if (s1.length == s2.length) {
 #ifdef _WIN32
         return _strnicmp(s1.ptr, s2.ptr, s1.length);
@@ -368,7 +456,10 @@
     }
 }

-cxmutstr cx_strdup_a(CxAllocator *allocator, cxstring string) {
+cxmutstr cx_strdup_a(
+        CxAllocator *allocator,
+        cxstring string
+) {
     cxmutstr result = {
             cxMalloc(allocator, string.length + 1),
             string.length
@@ -400,18 +491,27 @@
     return (cxmutstr) {(char *) result.ptr, result.length};
 }

-bool cx_strprefix(cxstring string, cxstring prefix) {
+bool cx_strprefix(
+        cxstring string,
+        cxstring prefix
+) {
     if (string.length < prefix.length) return false;
     return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
 }

-bool cx_strsuffix(cxstring string, cxstring suffix) {
+bool cx_strsuffix(
+        cxstring string,
+        cxstring suffix
+) {
     if (string.length < suffix.length) return false;
     return memcmp(string.ptr + string.length - suffix.length,
                   suffix.ptr, suffix.length) == 0;
 }

-bool cx_casestrprefix(cxstring string, cxstring prefix) {
+bool cx_strcaseprefix(
+        cxstring string,
+        cxstring prefix
+) {
     if (string.length < prefix.length) return false;
 #ifdef _WIN32
     return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
@@ -420,7 +520,10 @@
 #endif
 }

-bool cx_casestrsuffix(cxstring string, cxstring suffix) {
+bool cx_strcasesuffix(
+        cxstring string,
+        cxstring suffix
+) {
     if (string.length < suffix.length) return false;
 #ifdef _WIN32
     return _strnicmp(string.ptr+string.length-suffix.length,
@@ -442,3 +545,133 @@
         string.ptr[i] = toupper(string.ptr[i]);
     }
 }
+
+#define REPLACE_INDEX_BUFFER_MAX 100
+
+struct cx_strreplace_ibuf {
+    size_t *buf;
+    unsigned int len; /* small indices */
+    struct cx_strreplace_ibuf *next;
+};
+
+static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
+    while (buf) {
+        struct cx_strreplace_ibuf *next = buf->next;
+        free(buf->buf);
+        free(buf);
+        buf = next;
+    }
+}
+
+cxmutstr cx_strreplacen_a(
+        CxAllocator *allocator,
+        cxstring str,
+        cxstring pattern,
+        cxstring replacement,
+        size_t replmax
+) {
+
+    if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
+        return cx_strdup_a(allocator, str);
+
+    /* Compute expected buffer length */
+    size_t ibufmax = str.length / pattern.length;
+    size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
+    if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
+        ibuflen = REPLACE_INDEX_BUFFER_MAX;
+    }
+
+    /* Allocate first index buffer */
+    struct cx_strreplace_ibuf *firstbuf, *curbuf;
+    firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
+    if (!firstbuf) return cx_mutstrn(NULL, 0);
+    firstbuf->buf = calloc(ibuflen, sizeof(size_t));
+    if (!firstbuf->buf) {
+        free(firstbuf);
+        return cx_mutstrn(NULL, 0);
+    }
+
+    /* Search occurrences */
+    cxstring searchstr = str;
+    size_t found = 0;
+    do {
+        cxstring match = cx_strstr(searchstr, pattern);
+        if (match.length > 0) {
+            /* Allocate next buffer in chain, if required */
+            if (curbuf->len == ibuflen) {
+                struct cx_strreplace_ibuf *nextbuf =
+                        calloc(1, sizeof(struct cx_strreplace_ibuf));
+                if (!nextbuf) {
+                    cx_strrepl_free_ibuf(firstbuf);
+                    return cx_mutstrn(NULL, 0);
+                }
+                nextbuf->buf = calloc(ibuflen, sizeof(size_t));
+                if (!nextbuf->buf) {
+                    free(nextbuf);
+                    cx_strrepl_free_ibuf(firstbuf);
+                    return cx_mutstrn(NULL, 0);
+                }
+                curbuf->next = nextbuf;
+                curbuf = nextbuf;
+            }
+
+            /* Record match index */
+            found++;
+            size_t idx = match.ptr - str.ptr;
+            curbuf->buf[curbuf->len++] = idx;
+            searchstr.ptr = match.ptr + pattern.length;
+            searchstr.length = str.length - idx - pattern.length;
+        } else {
+            break;
+        }
+    } while (searchstr.length > 0 && found < replmax);
+
+    /* Allocate result string */
+    cxmutstr result;
+    {
+        ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
+        size_t rcount = 0;
+        curbuf = firstbuf;
+        do {
+            rcount += curbuf->len;
+            curbuf = curbuf->next;
+        } while (curbuf);
+        result.length = str.length + rcount * adjlen;
+        result.ptr = cxMalloc(allocator, result.length);
+        if (!result.ptr) {
+            cx_strrepl_free_ibuf(firstbuf);
+            return cx_mutstrn(NULL, 0);
+        }
+    }
+
+    /* Build result string */
+    curbuf = firstbuf;
+    size_t srcidx = 0;
+    char *destptr = result.ptr;
+    do {
+        for (size_t i = 0; i < curbuf->len; i++) {
+            /* Copy source part up to next match*/
+            size_t idx = curbuf->buf[i];
+            size_t srclen = idx - srcidx;
+            if (srclen > 0) {
+                memcpy(destptr, str.ptr + srcidx, srclen);
+                destptr += srclen;
+                srcidx += srclen;
+            }
+
+            /* Copy the replacement and skip the source pattern */
+            srcidx += pattern.length;
+            memcpy(destptr, replacement.ptr, replacement.length);
+            destptr += replacement.length;
+        }
+        curbuf = curbuf->next;
+    } while (curbuf);
+    memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
+
+    /* Free index buffer */
+    cx_strrepl_free_ibuf(firstbuf);
+
+    return result;
+}
+
+
--- a/test/CMakeLists.txt	Sat Sep 03 15:11:23 2022 +0200
+++ b/test/CMakeLists.txt	Fri Sep 09 20:19:08 2022 +0200
@@ -15,6 +15,7 @@

 add_executable(ucxtest
         test_allocator.cpp
+        test_string.cpp
         test_buffer.cpp
         test_list.cpp
         test_tree.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/test_string.cpp	Fri Sep 09 20:19:08 2022 +0200
@@ -0,0 +1,531 @@
+/*
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
+ *
+ * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cx/string.h"
+#include "util_allocator.h"
+
+#include <gtest/gtest.h>
+
+TEST(String, construct) {
+    cxstring s1 = cx_str("1234");
+    cxstring s2 = cx_strn("abcd", 2);
+    cxmutstr s3 = cx_mutstr((char *) "1234");
+    cxmutstr s4 = cx_mutstrn((char *) "abcd", 2);
+
+    EXPECT_EQ(s1.length, 4);
+    EXPECT_EQ(s2.length, 2);
+    EXPECT_EQ(s3.length, 4);
+    EXPECT_EQ(s4.length, 2);
+}
+
+TEST(String, strfree) {
+    CxTestingAllocator alloc;
+    auto test = (char *) cxMalloc(&alloc, 16);
+    cxmutstr str = cx_mutstrn(test, 16);
+    ASSERT_EQ(str.ptr, test);
+    EXPECT_EQ(str.length, 16);
+    cx_strfree_a(&alloc, &str);
+    EXPECT_EQ(str.ptr, nullptr);
+    EXPECT_EQ(str.length, 0);
+    EXPECT_TRUE(alloc.verify());
+}
+
+TEST(String, strlen) {
+    cxstring s1 = CX_STR("1234");
+    cxstring s2 = CX_STR(".:.:.");
+    cxstring s3 = CX_STR("X");
+
+    size_t len0 = cx_strlen(0);
+    size_t len1 = cx_strlen(1, s1);
+    size_t len2 = cx_strlen(2, s1, s2);
+    size_t len3 = cx_strlen(3, s1, s2, s3);
+
+    EXPECT_EQ(len0, 0);
+    EXPECT_EQ(len1, 4);
+    EXPECT_EQ(len2, 9);
+    EXPECT_EQ(len3, 10);
+}
+
+
+TEST(String, strchr) {
+    cxstring str = CX_STR("I will find you - and I will kill you");
+
+    cxstring notfound = cx_strchr(str, 'x');
+    EXPECT_EQ(notfound.length, 0);
+
+    cxstring result = cx_strchr(str, 'w');
+    EXPECT_EQ(result.length, 35);
+    EXPECT_EQ(strcmp("will find you - and I will kill you", result.ptr), 0);
+}
+
+TEST(String, strrchr) {
+    cxstring str = CX_STR("I will find you - and I will kill you");
+
+    cxstring notfound = cx_strrchr(str, 'x');
+    EXPECT_EQ(notfound.length, 0);
+
+    cxstring result = cx_strrchr(str, 'w');
+    EXPECT_EQ(result.length, 13);
+    EXPECT_EQ(strcmp("will kill you", result.ptr), 0);
+}
+
+TEST(String, strstr) {
+    cxstring str = CX_STR("find the match in this string");
+    cxstring longstr = CX_STR(
+            "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijkl"
+            "mnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwx"
+            "yzabcdeababababnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghij"
+            "klmnopqrstuvwxyzaababababababababrstuvwxyzabcdefghijklmnopqrstuv"
+            "abababababababababababababababababababababababababababababababab"
+            "abababababababababababababababababababababababababababababababab"
+            "abababababababababababababababababababababababababababababababab"
+            "abababababababababababababababababababababababababababababababab"
+            "abababababababababababababababababababababababababababababababab"
+            "abababababababababababababababababababababababababababababababab"
+            "wxyz1234567890");
+    cxstring longstrpattern = CX_STR(
+            "abababababababababababababababababababababababababababababababab"
+            "abababababababababababababababababababababababababababababababab"
+            "abababababababababababababababababababababababababababababababab"
+            "abababababababababababababababababababababababababababababababab"
+            "abababababababababababababababababababababababababababababababab"
+    );
+    cxstring longstrresult = CX_STR(
+            "abababababababababababababababababababababababababababababababab"
+            "abababababababababababababababababababababababababababababababab"
+            "abababababababababababababababababababababababababababababababab"
+            "abababababababababababababababababababababababababababababababab"
+            "abababababababababababababababababababababababababababababababab"
+            "abababababababababababababababababababababababababababababababab"
+            "wxyz1234567890"
+    );
+
+    cxstring notfound = cx_strstr(str, cx_str("no match"));
+    EXPECT_EQ(notfound.length, 0);
+
+    cxstring result = cx_strstr(str, cx_str("match"));
+    EXPECT_EQ(result.length, 20);
+    EXPECT_EQ(strcmp("match in this string", result.ptr), 0);
+
+    result = cx_strstr(str, cx_str(""));
+    EXPECT_EQ(result.length, str.length);
+    EXPECT_EQ(strcmp(str.ptr, result.ptr), 0);
+
+    result = cx_strstr(longstr, longstrpattern);
+    EXPECT_EQ(result.length, longstrresult.length);
+    EXPECT_EQ(strcmp(result.ptr, longstrresult.ptr), 0);
+}
+
+TEST(String, strcmp) {
+    cxstring str = CX_STR("compare this");
+
+    EXPECT_EQ(cx_strcmp(CX_STR(""), CX_STR("")), 0);
+    EXPECT_GT(cx_strcmp(str, CX_STR("")), 0);
+    EXPECT_EQ(cx_strcmp(str, CX_STR("compare this")), 0);
+    EXPECT_NE(cx_strcmp(str, CX_STR("Compare This")), 0);
+    EXPECT_LT(cx_strcmp(str, CX_STR("compare tool")), 0);
+    EXPECT_GT(cx_strcmp(str, CX_STR("compare shit")), 0);
+    EXPECT_LT(cx_strcmp(str, CX_STR("compare this not")), 0);
+    EXPECT_GT(cx_strcmp(str, CX_STR("compare")), 0);
+}
+
+TEST(String, strcasecmp) {
+    cxstring str = CX_STR("compare this");
+
+    EXPECT_EQ(cx_strcasecmp(CX_STR(""), CX_STR("")), 0);
+    EXPECT_GT(cx_strcasecmp(str, CX_STR("")), 0);
+    EXPECT_EQ(cx_strcasecmp(str, CX_STR("compare this")), 0);
+    EXPECT_EQ(cx_strcasecmp(str, CX_STR("Compare This")), 0);
+    EXPECT_LT(cx_strcasecmp(str, CX_STR("compare tool")), 0);
+    EXPECT_GT(cx_strcasecmp(str, CX_STR("compare shit")), 0);
+    EXPECT_LT(cx_strcasecmp(str, CX_STR("compare this not")), 0);
+    EXPECT_GT(cx_strcasecmp(str, CX_STR("compare")), 0);
+}
+
+
+TEST(String, strcat) {
+    cxstring s1 = CX_STR("12");
+    cxstring s2 = CX_STR("34");
+    cxstring s3 = CX_STR("56");
+    cxstring sn = {nullptr, 0};
+
+    CxTestingAllocator alloc;
+
+    cxmutstr t1 = cx_strcat_a(&alloc, 2, s1, s2);
+    EXPECT_EQ(cx_strcmp(cx_strcast(t1), CX_STR("1234")), 0);
+    cx_strfree_a(&alloc, &t1);
+
+    cxmutstr t2 = cx_strcat_a(&alloc, 3, s1, s2, s3);
+    EXPECT_EQ(cx_strcmp(cx_strcast(t2), CX_STR("123456")), 0);
+    cx_strfree_a(&alloc, &t2);
+
+    cxmutstr t3 = cx_strcat_a(&alloc, 6, s1, sn, s2, sn, s3, sn);
+    EXPECT_EQ(cx_strcmp(cx_strcast(t3), CX_STR("123456")), 0);
+    cx_strfree_a(&alloc, &t3);
+
+    cxmutstr t4 = cx_strcat_a(&alloc, 2, sn, sn);
+    EXPECT_EQ(cx_strcmp(cx_strcast(t4), CX_STR("")), 0);
+    cx_strfree_a(&alloc, &t4);
+
+    EXPECT_TRUE(alloc.verify());
+}
+
+TEST(String, strsplit) {
+
+    cxstring test = cx_str("this,is,a,csv,string");
+    size_t capa = 8;
+    cxstring list[8];
+    size_t n;
+
+    /* special case: empty string */
+    n = cx_strsplit(test, cx_str(""), capa, list);
+    ASSERT_EQ(n, 1);
+    EXPECT_EQ(cx_strcmp(list[0], test), 0);
+
+    /* no delimiter occurrence */
+    n = cx_strsplit(test, cx_str("z"), capa, list);
+    ASSERT_EQ(n, 1);
+    EXPECT_EQ(cx_strcmp(list[0], test), 0);
+
+    /* partially matching delimiter */
+    n = cx_strsplit(test, cx_str("is,not"), capa, list);
+    ASSERT_EQ(n, 1);
+    EXPECT_EQ(cx_strcmp(list[0], test), 0);
+
+    /* matching single-char delimiter */
+    n = cx_strsplit(test, cx_str(","), capa, list);
+    ASSERT_EQ(n, 5);
+    EXPECT_EQ(cx_strcmp(list[0], cx_str("this")), 0);
+    EXPECT_EQ(cx_strcmp(list[1], cx_str("is")), 0);
+    EXPECT_EQ(cx_strcmp(list[2], cx_str("a")), 0);
+    EXPECT_EQ(cx_strcmp(list[3], cx_str("csv")), 0);
+    EXPECT_EQ(cx_strcmp(list[4], cx_str("string")), 0);
+
+    /* matching multi-char delimiter */
+    n = cx_strsplit(test, cx_str("is"), capa, list);
+    ASSERT_EQ(n, 3);
+    EXPECT_EQ(cx_strcmp(list[0], cx_str("th")), 0);
+    EXPECT_EQ(cx_strcmp(list[1], cx_str(",")), 0);
+    EXPECT_EQ(cx_strcmp(list[2], cx_str(",a,csv,string")), 0);
+
+    /* bounded list using single-char delimiter */
+    n = cx_strsplit(test, cx_str(","), 3, list);
+    ASSERT_EQ(n, 3);
+    EXPECT_EQ(cx_strcmp(list[0], cx_str("this")), 0);
+    EXPECT_EQ(cx_strcmp(list[1], cx_str("is")), 0);
+    EXPECT_EQ(cx_strcmp(list[2], cx_str("a,csv,string")), 0);
+
+    /* bounded list using multi-char delimiter */
+    n = cx_strsplit(test, cx_str("is"), 2, list);
+    ASSERT_EQ(n, 2);
+    EXPECT_EQ(cx_strcmp(list[0], cx_str("th")), 0);
+    EXPECT_EQ(cx_strcmp(list[1], cx_str(",is,a,csv,string")), 0);
+
+    /* start with delimiter */
+    n = cx_strsplit(test, cx_str("this"), capa, list);
+    ASSERT_EQ(n, 2);
+    EXPECT_EQ(cx_strcmp(list[0], cx_str("")), 0);
+    EXPECT_EQ(cx_strcmp(list[1], cx_str(",is,a,csv,string")), 0);
+
+    /* end with delimiter */
+    n = cx_strsplit(test, cx_str("string"), capa, list);
+    ASSERT_EQ(n, 2);
+    EXPECT_EQ(cx_strcmp(list[0], cx_str("this,is,a,csv,")), 0);
+    EXPECT_EQ(cx_strcmp(list[1], cx_str("")), 0);
+
+
+    /* end with delimiter exceed bound */
+    n = cx_strsplit(cx_str("a,b,c,"), cx_str(","), 3, list);
+    ASSERT_EQ(n, 3);
+    EXPECT_EQ(cx_strcmp(list[0], cx_str("a")), 0);
+    EXPECT_EQ(cx_strcmp(list[1], cx_str("b")), 0);
+    EXPECT_EQ(cx_strcmp(list[2], cx_str("c,")), 0);
+
+    /* exact match */
+    n = cx_strsplit(test, cx_str("this,is,a,csv,string"), capa, list);
+    ASSERT_EQ(n, 2);
+    EXPECT_EQ(cx_strcmp(list[0], cx_str("")), 0);
+    EXPECT_EQ(cx_strcmp(list[1], cx_str("")), 0);
+
+    /* string to be split is only substring */
+    n = cx_strsplit(test, cx_str("this,is,a,csv,string,with,extension"), capa, list);
+    ASSERT_EQ(n, 1);
+    EXPECT_EQ(cx_strcmp(list[0], test), 0);
+
+    /* subsequent encounter of delimiter (the string between is empty) */
+    n = cx_strsplit(test, cx_str("is,"), capa, list);
+    ASSERT_EQ(n, 3);
+    EXPECT_EQ(cx_strcmp(list[0], cx_str("th")), 0);
+    EXPECT_EQ(cx_strcmp(list[1], cx_str("")), 0);
+    EXPECT_EQ(cx_strcmp(list[2], cx_str("a,csv,string")), 0);
+}
+
+TEST(String, strsplit_a) {
+    CxTestingAllocator alloc;
+
+    cxstring test = cx_str("this,is,a,csv,string");
+    size_t capa = 8;
+    cxstring *list;
+    size_t n;
+
+    /* special case: empty string */
+    n = cx_strsplit_a(&alloc, test, cx_str(""), capa, &list);
+    ASSERT_EQ(n, 1);
+    EXPECT_EQ(cx_strcmp(list[0], test), 0);
+    cxFree(&alloc, list);
+
+    /* no delimiter occurrence */
+    n = cx_strsplit_a(&alloc, test, cx_str("z"), capa, &list);
+    ASSERT_EQ(n, 1);
+    EXPECT_EQ(cx_strcmp(list[0], test), 0);
+    cxFree(&alloc, list);
+
+    /* partially matching delimiter */
+    n = cx_strsplit_a(&alloc, test, cx_str("is,not"), capa, &list);
+    ASSERT_EQ(n, 1);
+    EXPECT_EQ(cx_strcmp(list[0], test), 0);
+    cxFree(&alloc, list);
+
+    /* matching single-char delimiter */
+    n = cx_strsplit_a(&alloc, test, cx_str(","), capa, &list);
+    ASSERT_EQ(n, 5);
+    EXPECT_EQ(cx_strcmp(list[0], cx_str("this")), 0);
+    EXPECT_EQ(cx_strcmp(list[1], cx_str("is")), 0);
+    EXPECT_EQ(cx_strcmp(list[2], cx_str("a")), 0);
+    EXPECT_EQ(cx_strcmp(list[3], cx_str("csv")), 0);
+    EXPECT_EQ(cx_strcmp(list[4], cx_str("string")), 0);
+    cxFree(&alloc, list);
+
+    /* matching multi-char delimiter */
+    n = cx_strsplit_a(&alloc, test, cx_str("is"), capa, &list);
+    ASSERT_EQ(n, 3);
+    EXPECT_EQ(cx_strcmp(list[0], cx_str("th")), 0);
+    EXPECT_EQ(cx_strcmp(list[1], cx_str(",")), 0);
+    EXPECT_EQ(cx_strcmp(list[2], cx_str(",a,csv,string")), 0);
+    cxFree(&alloc, list);
+
+    /* bounded list using single-char delimiter */
+    n = cx_strsplit_a(&alloc, test, cx_str(","), 3, &list);
+    ASSERT_EQ(n, 3);
+    EXPECT_EQ(cx_strcmp(list[0], cx_str("this")), 0);
+    EXPECT_EQ(cx_strcmp(list[1], cx_str("is")), 0);
+    EXPECT_EQ(cx_strcmp(list[2], cx_str("a,csv,string")), 0);
+    cxFree(&alloc, list);
+
+    /* bounded list using multi-char delimiter */
+    n = cx_strsplit_a(&alloc, test, cx_str("is"), 2, &list);
+    ASSERT_EQ(n, 2);
+    EXPECT_EQ(cx_strcmp(list[0], cx_str("th")), 0);
+    EXPECT_EQ(cx_strcmp(list[1], cx_str(",is,a,csv,string")), 0);
+    cxFree(&alloc, list);
+
+    /* start with delimiter */
+    n = cx_strsplit_a(&alloc, test, cx_str("this"), capa, &list);
+    ASSERT_EQ(n, 2);
+    EXPECT_EQ(cx_strcmp(list[0], cx_str("")), 0);
+    EXPECT_EQ(cx_strcmp(list[1], cx_str(",is,a,csv,string")), 0);
+    cxFree(&alloc, list);
+
+    /* end with delimiter */
+    n = cx_strsplit_a(&alloc, test, cx_str("string"), capa, &list);
+    ASSERT_EQ(n, 2);
+    EXPECT_EQ(cx_strcmp(list[0], cx_str("this,is,a,csv,")), 0);
+    EXPECT_EQ(cx_strcmp(list[1], cx_str("")), 0);
+    cxFree(&alloc, list);
+
+    /* end with delimiter exceed bound */
+    n = cx_strsplit_a(&alloc, cx_str("a,b,c,"), cx_str(","), 3, &list);
+    ASSERT_EQ(n, 3);
+    EXPECT_EQ(cx_strcmp(list[0], cx_str("a")), 0);
+    EXPECT_EQ(cx_strcmp(list[1], cx_str("b")), 0);
+    EXPECT_EQ(cx_strcmp(list[2], cx_str("c,")), 0);
+    cxFree(&alloc, list);
+
+    /* exact match */
+    n = cx_strsplit_a(&alloc, test, cx_str("this,is,a,csv,string"), capa, &list);
+    ASSERT_EQ(n, 2);
+    EXPECT_EQ(cx_strcmp(list[0], cx_str("")), 0);
+    EXPECT_EQ(cx_strcmp(list[1], cx_str("")), 0);
+    cxFree(&alloc, list);
+
+    /* string to be split is only substring */
+    n = cx_strsplit_a(&alloc, test, cx_str("this,is,a,csv,string,with,extension"), capa, &list);
+    ASSERT_EQ(n, 1);
+    EXPECT_EQ(cx_strcmp(list[0], test), 0);
+    cxFree(&alloc, list);
+
+    /* subsequent encounter of delimiter (the string between is empty) */
+    n = cx_strsplit_a(&alloc, test, cx_str("is,"), capa, &list);
+    ASSERT_EQ(n, 3);
+    EXPECT_EQ(cx_strcmp(list[0], cx_str("th")), 0);
+    EXPECT_EQ(cx_strcmp(list[1], cx_str("")), 0);
+    EXPECT_EQ(cx_strcmp(list[2], cx_str("a,csv,string")), 0);
+    cxFree(&alloc, list);
+
+    EXPECT_TRUE(alloc.verify());
+}
+
+TEST(String, strtrim) {
+    cxstring t1 = cx_strtrim(cx_str("  ein test  \t "));
+    cxstring t2 = cx_strtrim(cx_str("abc"));
+    cxstring t3 = cx_strtrim(cx_str(" 123"));
+    cxstring t4 = cx_strtrim(cx_str("xyz "));
+    cxstring t5 = cx_strtrim(cx_str("   "));
+    cxstring empty = cx_strtrim(cx_str(""));
+
+    EXPECT_EQ(cx_strcmp(t1, cx_str("ein test")), 0);
+    EXPECT_EQ(cx_strcmp(t2, cx_str("abc")), 0);
+    EXPECT_EQ(cx_strcmp(t3, cx_str("123")), 0);
+    EXPECT_EQ(cx_strcmp(t4, cx_str("xyz")), 0);
+    EXPECT_EQ(cx_strcmp(t5, cx_str("")), 0);
+    EXPECT_EQ(cx_strcmp(empty, cx_str("")), 0);
+}
+
+TEST(String, strprefix) {
+    cxstring str = CX_STR("test my prefix and my suffix");
+    cxstring empty = CX_STR("");
+    EXPECT_FALSE(cx_strprefix(empty, cx_str("pref")));
+    EXPECT_TRUE(cx_strprefix(str, empty));
+    EXPECT_TRUE(cx_strprefix(empty, empty));
+    EXPECT_TRUE(cx_strprefix(str, cx_str("test ")));
+    EXPECT_FALSE(cx_strprefix(str, cx_str("8-) fsck ")));
+}
+
+TEST(String, strsuffix) {
+    cxstring str = CX_STR("test my prefix and my suffix");
+    cxstring empty = CX_STR("");
+    EXPECT_FALSE(cx_strsuffix(empty, cx_str("suf")));
+    EXPECT_TRUE(cx_strsuffix(str, empty));
+    EXPECT_TRUE(cx_strsuffix(empty, empty));
+    EXPECT_TRUE(cx_strsuffix(str, cx_str("fix")));
+    EXPECT_FALSE(cx_strsuffix(str, cx_str("fox")));
+}
+
+TEST(String, strcaseprefix) {
+    cxstring str = CX_STR("test my prefix and my suffix");
+    cxstring empty = CX_STR("");
+    EXPECT_FALSE(cx_strcaseprefix(empty, cx_str("pREf")));
+    EXPECT_TRUE(cx_strcaseprefix(str, empty));
+    EXPECT_TRUE(cx_strcaseprefix(empty, empty));
+    EXPECT_TRUE(cx_strcaseprefix(str, cx_str("TEST ")));
+    EXPECT_FALSE(cx_strcaseprefix(str, cx_str("8-) fsck ")));
+}
+
+TEST(String, strcasesuffix) {
+    cxstring str = CX_STR("test my prefix and my suffix");
+    cxstring empty = CX_STR("");
+    EXPECT_FALSE(cx_strcasesuffix(empty, cx_str("sUf")));
+    EXPECT_TRUE(cx_strcasesuffix(str, empty));
+    EXPECT_TRUE(cx_strcasesuffix(empty, empty));
+    EXPECT_TRUE(cx_strcasesuffix(str, cx_str("FIX")));
+    EXPECT_FALSE(cx_strcasesuffix(str, cx_str("fox")));
+}
+
+TEST(String, strreplace) {
+    cxstring str = CX_STR("test ababab string aba");
+    cxstring longstr = CX_STR(
+            "xyaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaacd");
+    cxstring notrail = CX_STR("test abab");
+    cxstring empty = CX_STR("");
+    cxstring astr = CX_STR("aaaaaaaaaa");
+    cxstring csstr = CX_STR("test AB ab TEST xyz");
+
+    cxmutstr repl = cx_strreplace(str, cx_str("abab"), cx_str("muchlonger"));
+    cxstring expected = CX_STR("test muchlongerab string aba");
+
+    cxmutstr repln = cx_strreplacen(str, cx_str("ab"), cx_str("c"), 2);
+    cxstring expectedn = CX_STR("test ccab string aba");
+
+    cxmutstr longrepl = cx_strreplace(longstr, cx_str("a"), cx_str("z"));
+    cxstring longexpect = CX_STR(
+            "xyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzcd");
+
+    cxmutstr replnotrail = cx_strreplace(notrail, cx_str("ab"), cx_str("z"));
+    cxstring notrailexpect = CX_STR("test zz");
+
+    cxmutstr repleq = cx_strreplace(str, str, cx_str("hello"));
+    cxstring eqexpect = CX_STR("hello");
+
+    cxmutstr replempty1 = cx_strreplace(empty, cx_str("ab"), cx_str("c")); // expect: empty
+    cxmutstr replempty2 = cx_strreplace(str, cx_str("abab"), empty);
+    cxstring emptyexpect2 = CX_STR("test ab string aba");
+
+    cxmutstr replpre = cx_strreplace(str, cx_str("test "), cx_str("TEST "));
+    cxstring preexpected = CX_STR("TEST ababab string aba");
+
+    cxmutstr replan1 = cx_strreplacen(astr, cx_str("a"), cx_str("x"), 1);
+    cxstring an1expected = CX_STR("xaaaaaaaaa");
+
+    cxmutstr replan4 = cx_strreplacen(astr, cx_str("a"), cx_str("x"), 4);
+    cxstring an4expected = CX_STR("xxxxaaaaaa");
+
+    cxmutstr replan9 = cx_strreplacen(astr, cx_str("a"), cx_str("x"), 9);
+    cxstring an9expected = CX_STR("xxxxxxxxxa");
+
+    cxmutstr replan10 = cx_strreplacen(astr, cx_str("a"), cx_str("x"), 10);
+    cxstring an10expected = CX_STR("xxxxxxxxxx");
+
+    cxmutstr replcs1 = cx_strreplace(csstr, cx_str("AB"), cx_str("*"));
+    cxstring cs1expected = CX_STR("test * ab TEST xyz");
+
+    cxmutstr replcs2 = cx_strreplace(csstr, cx_str("test"), cx_str("TEST"));
+    cxstring cs2expected = CX_STR("TEST AB ab TEST xyz");
+
+
+    EXPECT_NE(repl.ptr, str.ptr);
+    EXPECT_EQ(cx_strcmp(cx_strcast(repl), expected), 0);
+    EXPECT_NE(repln.ptr, str.ptr);
+    EXPECT_EQ(cx_strcmp(cx_strcast(repln), expectedn), 0);
+    EXPECT_EQ(cx_strcmp(cx_strcast(longrepl), longexpect), 0);
+    EXPECT_EQ(cx_strcmp(cx_strcast(replnotrail), notrailexpect), 0);
+    EXPECT_EQ(cx_strcmp(cx_strcast(repleq), eqexpect), 0);
+    EXPECT_EQ(cx_strcmp(cx_strcast(replempty1), empty), 0);
+    EXPECT_EQ(cx_strcmp(cx_strcast(replempty2), emptyexpect2), 0);
+    EXPECT_EQ(cx_strcmp(cx_strcast(replpre), preexpected), 0);
+    EXPECT_EQ(cx_strcmp(cx_strcast(replan1), an1expected), 0);
+    EXPECT_EQ(cx_strcmp(cx_strcast(replan4), an4expected), 0);
+    EXPECT_EQ(cx_strcmp(cx_strcast(replan9), an9expected), 0);
+    EXPECT_EQ(cx_strcmp(cx_strcast(replan10), an10expected), 0);
+    EXPECT_EQ(cx_strcmp(cx_strcast(replcs1), cs1expected), 0);
+    EXPECT_EQ(cx_strcmp(cx_strcast(replcs2), cs2expected), 0);
+
+    cx_strfree(&repl);
+    cx_strfree(&repln);
+    cx_strfree(&longrepl);
+    cx_strfree(&replnotrail);
+    cx_strfree(&repleq);
+    cx_strfree(&replempty1);
+    cx_strfree(&replempty2);
+    cx_strfree(&replpre);
+    cx_strfree(&replan1);
+    cx_strfree(&replan4);
+    cx_strfree(&replan9);
+    cx_strfree(&replan10);
+    cx_strfree(&replcs1);
+    cx_strfree(&replcs2);
+}
src/cx/string.h		file \| annotate \| diff \| comparison \| revisions
src/string.c		file \| annotate \| diff \| comparison \| revisions
test/CMakeLists.txt		file \| annotate \| diff \| comparison \| revisions
test/test_string.cpp		file \| annotate \| diff \| comparison \| revisions