src/string.c

changeset 583
0f3c9662f9b5
parent 582
96fa7fa6af4f
child 590
02a56701a5cb
--- a/src/string.c	Sat Sep 03 15:11:23 2022 +0200
+++ b/src/string.c	Fri Sep 09 20:19:08 2022 +0200
@@ -72,6 +72,15 @@
     str->length = 0;
 }
 
+void cx_strfree_a(
+        CxAllocator *alloc,
+        cxmutstr *str
+) {
+    cxFree(alloc, str->ptr);
+    str->ptr = NULL;
+    str->length = 0;
+}
+
 size_t cx_strlen(
         size_t count,
         ...
@@ -235,6 +244,11 @@
         return haystack;
     }
 
+    /* optimize for single-char needles */
+    if (needle.length == 1) {
+        return cx_strchr(haystack, *needle.ptr);
+    }
+
     /*
      * IMPORTANT:
      * Our prefix table contains the prefix length PLUS ONE
@@ -308,8 +322,55 @@
         size_t limit,
         cxstring *output
 ) {
-    // TODO: implement
-    return 0;
+    /* special case: output limit is zero */
+    if (limit == 0) return 0;
+
+    /* special case: delimiter is empty */
+    if (delim.length == 0) {
+        output[0] = string;
+        return 1;
+    }
+
+    /* special cases: delimiter is at least as large as the string */
+    if (delim.length >= string.length) {
+        /* exact match */
+        if (cx_strcmp(string, delim) == 0) {
+            output[0] = cx_strn(string.ptr, 0);
+            output[1] = cx_strn(string.ptr + string.length, 0);
+            return 2;
+        } else /* no match possible */ {
+            output[0] = string;
+            return 1;
+        }
+    }
+
+    size_t n = 0;
+    cxstring curpos = string;
+    while (1) {
+        ++n;
+        cxstring match = cx_strstr(curpos, delim);
+        if (match.length > 0) {
+            /* is the limit reached? */
+            if (n < limit) {
+                /* copy the current string to the array */
+                cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
+                output[n - 1] = item;
+                size_t processed = item.length + delim.length;
+                curpos.ptr += processed;
+                curpos.length -= processed;
+            } else {
+                /* limit reached, copy the _full_ remaining string */
+                output[n - 1] = curpos;
+                break;
+            }
+        } else {
+            /* no more matches, copy last string */
+            output[n - 1] = curpos;
+            break;
+        }
+    }
+
+    return n;
 }
 
 size_t cx_strsplit_a(
@@ -319,8 +380,29 @@
         size_t limit,
         cxstring **output
 ) {
-    // TODO: implement
-    return 0;
+    /* find out how many splits we're going to make and allocate memory */
+    size_t n = 0;
+    cxstring curpos = string;
+    while (1) {
+        ++n;
+        cxstring match = cx_strstr(curpos, delim);
+        if (match.length > 0) {
+            /* is the limit reached? */
+            if (n < limit) {
+                size_t processed = match.ptr - curpos.ptr + delim.length;
+                curpos.ptr += processed;
+                curpos.length -= processed;
+            } else {
+                /* limit reached */
+                break;
+            }
+        } else {
+            /* no more matches */
+            break;
+        }
+    }
+    *output = cxCalloc(allocator, n, sizeof(cxstring));
+    return cx_strsplit(string, delim, n, *output);
 }
 
 size_t cx_strsplit_m(
@@ -344,7 +426,10 @@
                          delim, limit, (cxstring **) output);
 }
 
-int cx_strcmp(cxstring s1, cxstring s2) {
+int cx_strcmp(
+        cxstring s1,
+        cxstring s2
+) {
     if (s1.length == s2.length) {
         return memcmp(s1.ptr, s2.ptr, s1.length);
     } else if (s1.length > s2.length) {
@@ -354,7 +439,10 @@
     }
 }
 
-int cx_strcasecmp(cxstring s1, cxstring s2) {
+int cx_strcasecmp(
+        cxstring s1,
+        cxstring s2
+) {
     if (s1.length == s2.length) {
 #ifdef _WIN32
         return _strnicmp(s1.ptr, s2.ptr, s1.length);
@@ -368,7 +456,10 @@
     }
 }
 
-cxmutstr cx_strdup_a(CxAllocator *allocator, cxstring string) {
+cxmutstr cx_strdup_a(
+        CxAllocator *allocator,
+        cxstring string
+) {
     cxmutstr result = {
             cxMalloc(allocator, string.length + 1),
             string.length
@@ -400,18 +491,27 @@
     return (cxmutstr) {(char *) result.ptr, result.length};
 }
 
-bool cx_strprefix(cxstring string, cxstring prefix) {
+bool cx_strprefix(
+        cxstring string,
+        cxstring prefix
+) {
     if (string.length < prefix.length) return false;
     return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
 }
 
-bool cx_strsuffix(cxstring string, cxstring suffix) {
+bool cx_strsuffix(
+        cxstring string,
+        cxstring suffix
+) {
     if (string.length < suffix.length) return false;
     return memcmp(string.ptr + string.length - suffix.length,
                   suffix.ptr, suffix.length) == 0;
 }
 
-bool cx_casestrprefix(cxstring string, cxstring prefix) {
+bool cx_strcaseprefix(
+        cxstring string,
+        cxstring prefix
+) {
     if (string.length < prefix.length) return false;
 #ifdef _WIN32
     return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
@@ -420,7 +520,10 @@
 #endif
 }
 
-bool cx_casestrsuffix(cxstring string, cxstring suffix) {
+bool cx_strcasesuffix(
+        cxstring string,
+        cxstring suffix
+) {
     if (string.length < suffix.length) return false;
 #ifdef _WIN32
     return _strnicmp(string.ptr+string.length-suffix.length,
@@ -442,3 +545,133 @@
         string.ptr[i] = toupper(string.ptr[i]);
     }
 }
+
+#define REPLACE_INDEX_BUFFER_MAX 100
+
+struct cx_strreplace_ibuf {
+    size_t *buf;
+    unsigned int len; /* small indices */
+    struct cx_strreplace_ibuf *next;
+};
+
+static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
+    while (buf) {
+        struct cx_strreplace_ibuf *next = buf->next;
+        free(buf->buf);
+        free(buf);
+        buf = next;
+    }
+}
+
+cxmutstr cx_strreplacen_a(
+        CxAllocator *allocator,
+        cxstring str,
+        cxstring pattern,
+        cxstring replacement,
+        size_t replmax
+) {
+
+    if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
+        return cx_strdup_a(allocator, str);
+
+    /* Compute expected buffer length */
+    size_t ibufmax = str.length / pattern.length;
+    size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
+    if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
+        ibuflen = REPLACE_INDEX_BUFFER_MAX;
+    }
+
+    /* Allocate first index buffer */
+    struct cx_strreplace_ibuf *firstbuf, *curbuf;
+    firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
+    if (!firstbuf) return cx_mutstrn(NULL, 0);
+    firstbuf->buf = calloc(ibuflen, sizeof(size_t));
+    if (!firstbuf->buf) {
+        free(firstbuf);
+        return cx_mutstrn(NULL, 0);
+    }
+
+    /* Search occurrences */
+    cxstring searchstr = str;
+    size_t found = 0;
+    do {
+        cxstring match = cx_strstr(searchstr, pattern);
+        if (match.length > 0) {
+            /* Allocate next buffer in chain, if required */
+            if (curbuf->len == ibuflen) {
+                struct cx_strreplace_ibuf *nextbuf =
+                        calloc(1, sizeof(struct cx_strreplace_ibuf));
+                if (!nextbuf) {
+                    cx_strrepl_free_ibuf(firstbuf);
+                    return cx_mutstrn(NULL, 0);
+                }
+                nextbuf->buf = calloc(ibuflen, sizeof(size_t));
+                if (!nextbuf->buf) {
+                    free(nextbuf);
+                    cx_strrepl_free_ibuf(firstbuf);
+                    return cx_mutstrn(NULL, 0);
+                }
+                curbuf->next = nextbuf;
+                curbuf = nextbuf;
+            }
+
+            /* Record match index */
+            found++;
+            size_t idx = match.ptr - str.ptr;
+            curbuf->buf[curbuf->len++] = idx;
+            searchstr.ptr = match.ptr + pattern.length;
+            searchstr.length = str.length - idx - pattern.length;
+        } else {
+            break;
+        }
+    } while (searchstr.length > 0 && found < replmax);
+
+    /* Allocate result string */
+    cxmutstr result;
+    {
+        ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
+        size_t rcount = 0;
+        curbuf = firstbuf;
+        do {
+            rcount += curbuf->len;
+            curbuf = curbuf->next;
+        } while (curbuf);
+        result.length = str.length + rcount * adjlen;
+        result.ptr = cxMalloc(allocator, result.length);
+        if (!result.ptr) {
+            cx_strrepl_free_ibuf(firstbuf);
+            return cx_mutstrn(NULL, 0);
+        }
+    }
+
+    /* Build result string */
+    curbuf = firstbuf;
+    size_t srcidx = 0;
+    char *destptr = result.ptr;
+    do {
+        for (size_t i = 0; i < curbuf->len; i++) {
+            /* Copy source part up to next match*/
+            size_t idx = curbuf->buf[i];
+            size_t srclen = idx - srcidx;
+            if (srclen > 0) {
+                memcpy(destptr, str.ptr + srcidx, srclen);
+                destptr += srclen;
+                srcidx += srclen;
+            }
+
+            /* Copy the replacement and skip the source pattern */
+            srcidx += pattern.length;
+            memcpy(destptr, replacement.ptr, replacement.length);
+            destptr += replacement.length;
+        }
+        curbuf = curbuf->next;
+    } while (curbuf);
+    memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
+
+    /* Free index buffer */
+    cx_strrepl_free_ibuf(firstbuf);
+
+    return result;
+}
+
+

mercurial