src/string.c

changeset 251
fae240d633fc
parent 250
b7d1317b138e
child 259
2f5dea574a75
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/string.c	Tue Oct 17 16:15:41 2017 +0200
@@ -0,0 +1,463 @@
+/*
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
+ *
+ * Copyright 2017 Olaf Wintermann. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ucx/string.h"
+
+#include "ucx/allocator.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <ctype.h>
+
+sstr_t sstr(char *cstring) {
+    sstr_t string;
+    string.ptr = cstring;
+    string.length = strlen(cstring);
+    return string;
+}
+
+sstr_t sstrn(char *cstring, size_t length) {
+    sstr_t string;
+    string.ptr = cstring;
+    string.length = length;
+    return string;
+}
+
+size_t sstrnlen(size_t n, sstr_t s, ...) {
+    va_list ap;
+    size_t size = s.length;
+    va_start(ap, s);
+
+    for (size_t i = 1 ; i < n ; i++) {
+        sstr_t str = va_arg(ap, sstr_t);
+        size += str.length;
+    }
+    va_end(ap);
+
+    return size;
+}
+
+static sstr_t sstrvcat_a(
+        UcxAllocator *a,
+        size_t count,
+        sstr_t s1,
+        sstr_t s2,
+        va_list ap) {
+    sstr_t str;
+    str.ptr = NULL;
+    str.length = 0;
+    if(count < 2) {
+        return str;
+    }
+    
+    sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
+    if(!strings) {
+        return str;
+    }
+    
+    // get all args and overall length
+    strings[0] = s1;
+    strings[1] = s2;
+    size_t strlen = s1.length + s2.length;
+    for (size_t i=2;i<count;i++) {
+        sstr_t s = va_arg (ap, sstr_t);
+        strings[i] = s;
+        strlen += s.length;
+    }
+    
+    // create new string
+    str.ptr = (char*) almalloc(a, strlen + 1);
+    str.length = strlen;
+    if(!str.ptr) {
+        free(strings);
+        str.length = 0;
+        return str;
+    }
+    
+    // concatenate strings
+    size_t pos = 0;
+    for (size_t i=0;i<count;i++) {
+        sstr_t s = strings[i];
+        memcpy(str.ptr + pos, s.ptr, s.length);
+        pos += s.length;
+    }
+    
+    str.ptr[str.length] = '\0';
+    
+    free(strings);
+    
+    return str;
+}
+
+sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
+    va_list ap;
+    va_start(ap, s2);
+    sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
+    va_end(ap);
+    return s;
+}
+
+sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
+    va_list ap;
+    va_start(ap, s2);
+    sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
+    va_end(ap);
+    return s;
+}
+
+sstr_t sstrsubs(sstr_t s, size_t start) {
+    return sstrsubsl (s, start, s.length-start);
+}
+
+sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
+    sstr_t new_sstr;
+    if (start >= s.length) {
+        new_sstr.ptr = NULL;
+        new_sstr.length = 0;
+    } else {
+        if (length > s.length-start) {
+            length = s.length-start;
+        }
+        new_sstr.ptr = &s.ptr[start];
+        new_sstr.length = length;
+    }
+    return new_sstr;
+}
+
+sstr_t sstrchr(sstr_t s, int c) {
+    for(size_t i=0;i<s.length;i++) {
+        if(s.ptr[i] == c) {
+            return sstrsubs(s, i);
+        }
+    }
+    sstr_t n;
+    n.ptr = NULL;
+    n.length = 0;
+    return n;
+}
+
+sstr_t sstrrchr(sstr_t s, int c) {
+    if (s.length > 0) {
+        for(size_t i=s.length;i>0;i--) {
+            if(s.ptr[i-1] == c) {
+                return sstrsubs(s, i-1);
+            }
+        }
+    }
+    sstr_t n;
+    n.ptr = NULL;
+    n.length = 0;
+    return n;
+}
+
+#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
+    ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
+
+#define ptable_w(useheap, ptable, index, src) do {\
+    if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
+    else ((size_t*)ptable)[index] = src;\
+    } while (0);
+
+sstr_t sstrstr(sstr_t string, sstr_t match) {
+    if (match.length == 0) {
+        return string;
+    }
+    
+    /* prepare default return value in case of no match */
+    sstr_t result = sstrn(NULL, 0);
+    
+    /*
+     * IMPORTANT:
+     * our prefix table contains the prefix length PLUS ONE
+     * this is our decision, because we want to use the full range of size_t
+     * the original algorithm needs a (-1) at one single place
+     * and we want to avoid that
+     */
+    
+    /* static prefix table */
+    static uint8_t s_prefix_table[256];
+    
+    /* check pattern length and use appropriate prefix table */
+    /* if the pattern exceeds static prefix table, allocate on the heap */
+    register int useheap = match.length > 255;
+    register void* ptable = useheap ?
+        calloc(match.length+1, sizeof(size_t)): s_prefix_table;
+    
+    /* keep counter in registers */
+    register size_t i, j;
+    
+    /* fill prefix table */
+    i = 0; j = 0;
+    ptable_w(useheap, ptable, i, j);
+    while (i < match.length) {
+        while (j >= 1 && match.ptr[j-1] != match.ptr[i]) {
+            ptable_r(j, useheap, ptable, j-1);
+        }
+        i++; j++;
+        ptable_w(useheap, ptable, i, j);
+    }
+
+    /* search */
+    i = 0; j = 1;
+    while (i < string.length) {
+        while (j >= 1 && string.ptr[i] != match.ptr[j-1]) {
+            ptable_r(j, useheap, ptable, j-1);
+        }
+        i++; j++;
+        if (j-1 == match.length) {
+            size_t start = i - match.length;
+            result.ptr = string.ptr + start;
+            result.length = string.length - start;
+            break;
+        }
+    }
+
+    /* if prefix table was allocated on the heap, free it */
+    if (ptable != s_prefix_table) {
+        free(ptable);
+    }
+    
+    return result;
+}
+
+#undef ptable_r
+#undef ptable_w
+
+sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
+    return sstrsplit_a(ucx_default_allocator(), s, d, n);
+}
+
+sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
+    if (s.length == 0 || d.length == 0) {
+        *n = -1;
+        return NULL;
+    }
+    
+    /* special cases: delimiter is at least as large as the string */
+    if (d.length >= s.length) {
+        /* exact match */
+        if (sstrcmp(s, d) == 0) {
+            *n = 0;
+            return NULL;
+        } else /* no match possible */ {
+            *n = 1;
+            sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
+            *result = sstrdup_a(allocator, s);
+            return result;
+        }
+    }
+    
+    ssize_t nmax = *n;
+    size_t arrlen = 16;
+    sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t));
+
+    if (result) {
+        sstr_t curpos = s;
+        ssize_t j = 1;
+        while (1) {
+            sstr_t match;
+            /* optimize for one byte delimiters */
+            if (d.length == 1) {
+                match = curpos;
+                for (size_t i = 0 ; i < curpos.length ; i++) {
+                    if (curpos.ptr[i] == *(d.ptr)) {
+                        match.ptr = curpos.ptr + i;
+                        break;
+                    }
+                    match.length--;
+                }
+            } else {
+                match = sstrstr(curpos, d);
+            }
+            if (match.length > 0) {
+                /* is this our last try? */
+                if (nmax == 0 || j < nmax) {
+                    /* copy the current string to the array */
+                    sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
+                    result[j-1] = sstrdup_a(allocator, item);
+                    size_t processed = item.length + d.length;
+                    curpos.ptr += processed;
+                    curpos.length -= processed;
+
+                    /* allocate memory for the next string */
+                    j++;
+                    if (j > arrlen) {
+                        arrlen *= 2;
+                        sstr_t* reallocated = (sstr_t*) alrealloc(
+                                allocator, result, arrlen*sizeof(sstr_t));
+                        if (reallocated) {
+                            result = reallocated;
+                        } else {
+                            for (ssize_t i = 0 ; i < j-1 ; i++) {
+                                alfree(allocator, result[i].ptr);
+                            }
+                            alfree(allocator, result);
+                            *n = -2;
+                            return NULL;
+                        }
+                    }
+                } else {
+                    /* nmax reached, copy the _full_ remaining string */
+                    result[j-1] = sstrdup_a(allocator, curpos);
+                    break;
+                }
+            } else {
+                /* no more matches, copy last string */
+                result[j-1] = sstrdup_a(allocator, curpos);
+                break;
+            }
+        }
+        *n = j;
+    } else {
+        *n = -2;
+    }
+
+    return result;
+}
+
+int sstrcmp(sstr_t s1, sstr_t s2) {
+    if (s1.length == s2.length) {
+        return memcmp(s1.ptr, s2.ptr, s1.length);
+    } else if (s1.length > s2.length) {
+        return 1;
+    } else {
+        return -1;
+    }
+}
+
+int sstrcasecmp(sstr_t s1, sstr_t s2) {
+    if (s1.length == s2.length) {
+#ifdef _WIN32
+        return _strnicmp(s1.ptr, s2.ptr, s1.length);
+#else
+        return strncasecmp(s1.ptr, s2.ptr, s1.length);
+#endif
+    } else if (s1.length > s2.length) {
+        return 1;
+    } else {
+        return -1;
+    }
+}
+
+sstr_t sstrdup(sstr_t s) {
+    return sstrdup_a(ucx_default_allocator(), s);
+}
+
+sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) {
+    sstr_t newstring;
+    newstring.ptr = (char*)almalloc(allocator, s.length + 1);
+    if (newstring.ptr) {
+        newstring.length = s.length;
+        newstring.ptr[newstring.length] = 0;
+        
+        memcpy(newstring.ptr, s.ptr, s.length);
+    } else {
+        newstring.length = 0;
+    }
+    
+    return newstring;
+}
+
+sstr_t sstrtrim(sstr_t string) {
+    sstr_t newstr = string;
+    
+    while (newstr.length > 0 && isspace(*newstr.ptr)) {
+        newstr.ptr++;
+        newstr.length--;
+    }
+    while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
+        newstr.length--;
+    }
+    
+    return newstr;
+}
+
+int sstrprefix(sstr_t string, sstr_t prefix) {
+    if (string.length == 0) {
+        return prefix.length == 0;
+    }
+    if (prefix.length == 0) {
+        return 1;
+    }
+    
+    if (prefix.length > string.length) {
+        return 0;
+    } else {
+        return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
+    }
+}
+
+int sstrsuffix(sstr_t string, sstr_t suffix) {
+    if (string.length == 0) {
+        return suffix.length == 0;
+    }
+    if (suffix.length == 0) {
+        return 1;
+    }
+    
+    if (suffix.length > string.length) {
+        return 0;
+    } else {
+        return memcmp(string.ptr+string.length-suffix.length,
+            suffix.ptr, suffix.length) == 0;
+    }
+}
+
+sstr_t sstrlower(sstr_t string) {
+    sstr_t ret = sstrdup(string);
+    for (size_t i = 0; i < ret.length ; i++) {
+        ret.ptr[i] = tolower(ret.ptr[i]);
+    }
+    return ret;
+}
+
+sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) {
+    sstr_t ret = sstrdup_a(allocator, string);
+    for (size_t i = 0; i < ret.length ; i++) {
+        ret.ptr[i] = tolower(ret.ptr[i]);
+    }
+    return ret;
+}
+
+sstr_t sstrupper(sstr_t string) {
+    sstr_t ret = sstrdup(string);
+    for (size_t i = 0; i < ret.length ; i++) {
+        ret.ptr[i] = toupper(ret.ptr[i]);
+    }
+    return ret;
+}
+
+sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) {
+    sstr_t ret = sstrdup_a(allocator, string);
+    for (size_t i = 0; i < ret.length ; i++) {
+        ret.ptr[i] = toupper(ret.ptr[i]);
+    }
+    return ret;
+}

mercurial