more implementations of string functions

2022-08-31

author
Mike Becker <universe@uap-core.de>
date
Wed, 31 Aug 2022 23:12:05 +0200 (2022-08-31)
changeset 580
aac47db8da0b
parent 579
bbc46dcd5255
child 581
c067394737ca

more implementations of string functions

src/string.c file | annotate | diff | comparison | revisions
--- a/src/string.c	Tue Aug 30 19:56:07 2022 +0200
+++ b/src/string.c	Wed Aug 31 23:12:05 2022 +0200
@@ -125,5 +125,214 @@
     return result;
 }
 
+cxstring cx_strsubs(
+        cxstring string,
+        size_t start
+) {
+    return cx_strsubsl(string, start, string.length - start);
+}
+
+cxmutstr cx_strsubs_m(
+        cxmutstr string,
+        size_t start
+) {
+    return cx_strsubsl_m(string, start, string.length - start);
+}
+
+cxstring cx_strsubsl(
+        cxstring string,
+        size_t start,
+        size_t length
+) {
+    if (start > string.length) {
+        return (cxstring) {NULL, 0};
+    }
+
+    size_t rem_len = string.length - start;
+    if (length > rem_len) {
+        length = rem_len;
+    }
+
+    return (cxstring) {string.ptr + start, length};
+}
+
+cxmutstr cx_strsubsl_m(
+        cxmutstr string,
+        size_t start,
+        size_t length
+) {
+    cxstring result = cx_strsubsl(cx_strcast(string), start, length);
+    return (cxmutstr) {(char *) result.ptr, result.length};
+}
+
+cxstring cx_strchr(
+        cxstring string,
+        int chr
+) {
+    chr = 0xFF & chr;
+    // TODO: improve by comparing multiple bytes at once
+    cx_for_n(i, string.length) {
+        if (string.ptr[i] == chr) {
+            return cx_strsubs(string, i);
+        }
+    }
+    return (cxstring) {NULL, 0};
+}
+
+cxmutstr cx_strchr_m(
+        cxmutstr string,
+        int chr
+) {
+    cxstring result = cx_strchr(cx_strcast(string), chr);
+    return (cxmutstr) {(char *) result.ptr, result.length};
+}
+
+cxstring cx_strrchr(
+        cxstring string,
+        int chr
+) {
+    chr = 0xFF & chr;
+    size_t i = string.length;
+    while (i > 0) {
+        i--;
+        // TODO: improve by comparing multiple bytes at once
+        if (string.ptr[i] == chr) {
+            return cx_strsubs(string, i);
+        }
+    }
+    return (cxstring) {NULL, 0};
+}
+
+cxmutstr cx_strrchr_m(
+        cxmutstr string,
+        int chr
+) {
+    cxstring result = cx_strrchr(cx_strcast(string), chr);
+    return (cxmutstr) {(char *) result.ptr, result.length};
+}
+
+#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
+    ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
+
+#define ptable_w(useheap, ptable, index, src) do {\
+    if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
+    else ((size_t*)ptable)[index] = src;\
+    } while (0)
 
 
+cxstring cx_strstr(
+        cxstring haystack,
+        cxstring needle
+) {
+    if (needle.length == 0) {
+        return haystack;
+    }
+
+    /*
+     * IMPORTANT:
+     * Our prefix table contains the prefix length PLUS ONE
+     * this is our decision, because we want to use the full range of size_t.
+     * The original algorithm needs a (-1) at one single place,
+     * and we want to avoid that.
+     */
+
+    /* static prefix table */
+    static uint8_t s_prefix_table[512];
+
+    /* check pattern length and use appropriate prefix table */
+    /* if the pattern exceeds static prefix table, allocate on the heap */
+    register int useheap = needle.length >= 512;
+    register void *ptable = useheap ? calloc(needle.length + 1,
+                                             sizeof(size_t)) : s_prefix_table;
+
+    /* keep counter in registers */
+    register size_t i, j;
+
+    /* fill prefix table */
+    i = 0;
+    j = 0;
+    ptable_w(useheap, ptable, i, j);
+    while (i < needle.length) {
+        while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
+            ptable_r(j, useheap, ptable, j - 1);
+        }
+        i++;
+        j++;
+        ptable_w(useheap, ptable, i, j);
+    }
+
+    /* search */
+    cxstring result = {NULL, 0};
+    i = 0;
+    j = 1;
+    while (i < haystack.length) {
+        while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
+            ptable_r(j, useheap, ptable, j - 1);
+        }
+        i++;
+        j++;
+        if (j - 1 == needle.length) {
+            size_t start = i - needle.length;
+            result.ptr = haystack.ptr + start;
+            result.length = haystack.length - start;
+            break;
+        }
+    }
+
+    /* if prefix table was allocated on the heap, free it */
+    if (ptable != s_prefix_table) {
+        free(ptable);
+    }
+
+    return result;
+}
+
+cxmutstr cx_strstr_m(
+        cxmutstr haystack,
+        cxstring needle
+) {
+    cxstring result = cx_strstr(cx_strcast(haystack), needle);
+    return (cxmutstr) {(char *) result.ptr, result.length};
+}
+
+size_t cx_strsplit(
+        cxstring string,
+        cxstring delim,
+        size_t limit,
+        cxstring *output
+) {
+    // TODO: implement
+    return 0;
+}
+
+size_t cx_strsplit_a(
+        CxAllocator *allocator,
+        cxstring string,
+        cxstring delim,
+        size_t limit,
+        cxstring **output
+) {
+    // TODO: implement
+    return 0;
+}
+
+size_t cx_strsplit_m(
+        cxmutstr string,
+        cxstring delim,
+        size_t limit,
+        cxmutstr *output
+) {
+    return cx_strsplit(cx_strcast(string),
+                       delim, limit, (cxstring *) output);
+}
+
+size_t cx_strsplit_ma(
+        CxAllocator *allocator,
+        cxmutstr string,
+        cxstring delim,
+        size_t limit,
+        cxmutstr **output
+) {
+    return cx_strsplit_a(allocator, cx_strcast(string),
+                         delim, limit, (cxstring **) output);
+}

mercurial