src/ucx/string.c

changeset 39
ac35daceb24c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/ucx/string.c	Tue Aug 23 13:49:38 2016 +0200
@@ -0,0 +1,381 @@
+/*
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
+ *
+ * Copyright 2015 Olaf Wintermann. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <ctype.h>
+
+#include "string.h"
+#include "allocator.h"
+
+sstr_t sstr(char *cstring) {
+    sstr_t string;
+    string.ptr = cstring;
+    string.length = strlen(cstring);
+    return string;
+}
+
+sstr_t sstrn(char *cstring, size_t length) {
+    sstr_t string;
+    string.ptr = cstring;
+    string.length = length;
+    return string;
+}
+
+size_t sstrnlen(size_t n, sstr_t s, ...) {
+    va_list ap;
+    size_t size = s.length;
+    va_start(ap, s);
+
+    for (size_t i = 1 ; i < n ; i++) {
+        sstr_t str = va_arg(ap, sstr_t);
+        size += str.length;
+    }
+    va_end(ap);
+
+    return size;
+}
+
+static sstr_t sstrvcat_a(
+        UcxAllocator *a,
+        size_t count,
+        sstr_t s1,
+        sstr_t s2,
+        va_list ap) {
+    sstr_t str;
+    str.ptr = NULL;
+    str.length = 0;
+    if(count < 2) {
+        return str;
+    }
+    
+    sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
+    if(!strings) {
+        return str;
+    }
+    
+    // get all args and overall length
+    strings[0] = s1;
+    strings[1] = s2;
+    size_t strlen = s1.length + s2.length;
+    for (size_t i=2;i<count;i++) {
+        sstr_t s = va_arg (ap, sstr_t);
+        strings[i] = s;
+        strlen += s.length;
+    }
+    
+    // create new string
+    str.ptr = (char*) almalloc(a, strlen + 1);
+    str.length = strlen;
+    if(!str.ptr) {
+        free(strings);
+        str.length = 0;
+        return str;
+    }
+    
+    // concatenate strings
+    size_t pos = 0;
+    for (size_t i=0;i<count;i++) {
+        sstr_t s = strings[i];
+        memcpy(str.ptr + pos, s.ptr, s.length);
+        pos += s.length;
+    }
+    
+    str.ptr[str.length] = '\0';
+    
+    free(strings);
+    
+    return str;
+}
+
+sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
+    va_list ap;
+    va_start(ap, s2);
+    sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
+    va_end(ap);
+    return s;
+}
+
+sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
+    va_list ap;
+    va_start(ap, s2);
+    sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
+    va_end(ap);
+    return s;
+}
+
+sstr_t sstrsubs(sstr_t s, size_t start) {
+    return sstrsubsl (s, start, s.length-start);
+}
+
+sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
+    sstr_t new_sstr;
+    if (start >= s.length) {
+        new_sstr.ptr = NULL;
+        new_sstr.length = 0;
+    } else {
+        if (length > s.length-start) {
+            length = s.length-start;
+        }
+        new_sstr.ptr = &s.ptr[start];
+        new_sstr.length = length;
+    }
+    return new_sstr;
+}
+
+sstr_t sstrchr(sstr_t s, int c) {
+    for(size_t i=0;i<s.length;i++) {
+        if(s.ptr[i] == c) {
+            return sstrsubs(s, i);
+        }
+    }
+    sstr_t n;
+    n.ptr = NULL;
+    n.length = 0;
+    return n;
+}
+
+sstr_t sstrrchr(sstr_t s, int c) {
+    if (s.length > 0) {
+        for(size_t i=s.length;i>0;i--) {
+            if(s.ptr[i-1] == c) {
+                return sstrsubs(s, i-1);
+            }
+        }
+    }
+    sstr_t n;
+    n.ptr = NULL;
+    n.length = 0;
+    return n;
+}
+
+sstr_t sstrstr(sstr_t string, sstr_t match) {
+    if (match.length == 0) {
+        return string;
+    }
+    
+    for (size_t i = 0 ; i < string.length ; i++) {
+        sstr_t substr = sstrsubs(string, i);
+        if (sstrprefix(substr, match)) {
+            return substr;
+        }
+    }
+    
+    sstr_t emptystr;
+    emptystr.length = 0;
+    emptystr.ptr = NULL;
+    return emptystr;
+}
+
+sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
+    return sstrsplit_a(ucx_default_allocator(), s, d, n);
+}
+
+sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
+    if (s.length == 0 || d.length == 0) {
+        *n = -1;
+        return NULL;
+    }
+
+    sstr_t* result;
+    ssize_t nmax = *n;
+    *n = 1;
+
+    /* special case: exact match - no processing needed */
+    if (sstrcmp(s, d) == 0) {
+        *n = 0;
+        return NULL;
+    }
+    sstr_t sv = sstrdup(s);
+    if (sv.length == 0) {
+        *n = -2;
+        return NULL;
+    }
+
+    for (size_t i = 0 ; i < s.length ; i++) {
+        sstr_t substr = sstrsubs(sv, i);
+        if (sstrprefix(substr, d)) {
+            (*n)++;
+            for (size_t j = 0 ; j < d.length ; j++) {
+                sv.ptr[i+j] = 0;
+            }
+            i += d.length - 1; // -1, because the loop will do a i++
+        }
+        if ((*n) == nmax) break;
+    }
+    result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)*(*n));
+
+    if (result) {
+        char *pptr = sv.ptr;
+        for (ssize_t i = 0 ; i < *n ; i++) {
+            size_t l = strlen(pptr);
+            char* ptr = (char*) almalloc(allocator, l + 1);
+            if (ptr) {
+                memcpy(ptr, pptr, l);
+                ptr[l] = 0;
+
+                result[i] = sstrn(ptr, l);
+                pptr += l + d.length;
+            } else {
+                for (ssize_t j = i-1 ; j >= 0 ; j--) {
+                    alfree(allocator, result[j].ptr);
+                }
+                alfree(allocator, result);
+                *n = -2;
+                break;
+            }
+        }
+    } else {
+        *n = -2;
+    }
+    
+    free(sv.ptr);
+
+    return result;
+}
+
+int sstrcmp(sstr_t s1, sstr_t s2) {
+    if (s1.length == s2.length) {
+        return memcmp(s1.ptr, s2.ptr, s1.length);
+    } else if (s1.length > s2.length) {
+        return 1;
+    } else {
+        return -1;
+    }
+}
+
+int sstrcasecmp(sstr_t s1, sstr_t s2) {
+    if (s1.length == s2.length) {
+#ifdef _WIN32
+        return _strnicmp(s1.ptr, s2.ptr, s1.length);
+#else
+        return strncasecmp(s1.ptr, s2.ptr, s1.length);
+#endif
+    } else if (s1.length > s2.length) {
+        return 1;
+    } else {
+        return -1;
+    }
+}
+
+sstr_t sstrdup(sstr_t s) {
+    return sstrdup_a(ucx_default_allocator(), s);
+}
+
+sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) {
+    sstr_t newstring;
+    newstring.ptr = (char*)almalloc(allocator, s.length + 1);
+    if (newstring.ptr) {
+        newstring.length = s.length;
+        newstring.ptr[newstring.length] = 0;
+        
+        memcpy(newstring.ptr, s.ptr, s.length);
+    } else {
+        newstring.length = 0;
+    }
+    
+    return newstring;
+}
+
+sstr_t sstrtrim(sstr_t string) {
+    sstr_t newstr = string;
+    
+    while (newstr.length > 0 && isspace(*newstr.ptr)) {
+        newstr.ptr++;
+        newstr.length--;
+    }
+    while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
+        newstr.length--;
+    }
+    
+    return newstr;
+}
+
+int sstrprefix(sstr_t string, sstr_t prefix) {
+    if (string.length == 0) {
+        return prefix.length == 0;
+    }
+    if (prefix.length == 0) {
+        return 1;
+    }
+    
+    if (prefix.length > string.length) {
+        return 0;
+    } else {
+        return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
+    }
+}
+
+int sstrsuffix(sstr_t string, sstr_t suffix) {
+    if (string.length == 0) {
+        return suffix.length == 0;
+    }
+    if (suffix.length == 0) {
+        return 1;
+    }
+    
+    if (suffix.length > string.length) {
+        return 0;
+    } else {
+        return memcmp(string.ptr+string.length-suffix.length,
+            suffix.ptr, suffix.length) == 0;
+    }
+}
+
+sstr_t sstrlower(sstr_t string) {
+    sstr_t ret = sstrdup(string);
+    for (size_t i = 0; i < ret.length ; i++) {
+        ret.ptr[i] = tolower(ret.ptr[i]);
+    }
+    return ret;
+}
+
+sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) {
+    sstr_t ret = sstrdup_a(allocator, string);
+    for (size_t i = 0; i < ret.length ; i++) {
+        ret.ptr[i] = tolower(ret.ptr[i]);
+    }
+    return ret;
+}
+
+sstr_t sstrupper(sstr_t string) {
+    sstr_t ret = sstrdup(string);
+    for (size_t i = 0; i < ret.length ; i++) {
+        ret.ptr[i] = toupper(ret.ptr[i]);
+    }
+    return ret;
+}
+
+sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) {
+    sstr_t ret = sstrdup_a(allocator, string);
+    for (size_t i = 0; i < ret.length ; i++) {
+        ret.ptr[i] = toupper(ret.ptr[i]);
+    }
+    return ret;
+}

mercurial