--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/string.c Tue Oct 17 16:15:41 2017 +0200 @@ -0,0 +1,463 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 2017 Olaf Wintermann. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ucx/string.h" + +#include "ucx/allocator.h" + +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <stdint.h> +#include <ctype.h> + +sstr_t sstr(char *cstring) { + sstr_t string; + string.ptr = cstring; + string.length = strlen(cstring); + return string; +} + +sstr_t sstrn(char *cstring, size_t length) { + sstr_t string; + string.ptr = cstring; + string.length = length; + return string; +} + +size_t sstrnlen(size_t n, sstr_t s, ...) { + va_list ap; + size_t size = s.length; + va_start(ap, s); + + for (size_t i = 1 ; i < n ; i++) { + sstr_t str = va_arg(ap, sstr_t); + size += str.length; + } + va_end(ap); + + return size; +} + +static sstr_t sstrvcat_a( + UcxAllocator *a, + size_t count, + sstr_t s1, + sstr_t s2, + va_list ap) { + sstr_t str; + str.ptr = NULL; + str.length = 0; + if(count < 2) { + return str; + } + + sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t)); + if(!strings) { + return str; + } + + // get all args and overall length + strings[0] = s1; + strings[1] = s2; + size_t strlen = s1.length + s2.length; + for (size_t i=2;i<count;i++) { + sstr_t s = va_arg (ap, sstr_t); + strings[i] = s; + strlen += s.length; + } + + // create new string + str.ptr = (char*) almalloc(a, strlen + 1); + str.length = strlen; + if(!str.ptr) { + free(strings); + str.length = 0; + return str; + } + + // concatenate strings + size_t pos = 0; + for (size_t i=0;i<count;i++) { + sstr_t s = strings[i]; + memcpy(str.ptr + pos, s.ptr, s.length); + pos += s.length; + } + + str.ptr[str.length] = '\0'; + + free(strings); + + return str; +} + +sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) { + va_list ap; + va_start(ap, s2); + sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap); + va_end(ap); + return s; +} + +sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) { + va_list ap; + va_start(ap, s2); + sstr_t s = sstrvcat_a(a, count, s1, s2, ap); + va_end(ap); + return s; +} + +sstr_t sstrsubs(sstr_t s, size_t start) { + return sstrsubsl (s, start, s.length-start); +} + +sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { + sstr_t new_sstr; + if (start >= s.length) { + new_sstr.ptr = NULL; + new_sstr.length = 0; + } else { + if (length > s.length-start) { + length = s.length-start; + } + new_sstr.ptr = &s.ptr[start]; + new_sstr.length = length; + } + return new_sstr; +} + +sstr_t sstrchr(sstr_t s, int c) { + for(size_t i=0;i<s.length;i++) { + if(s.ptr[i] == c) { + return sstrsubs(s, i); + } + } + sstr_t n; + n.ptr = NULL; + n.length = 0; + return n; +} + +sstr_t sstrrchr(sstr_t s, int c) { + if (s.length > 0) { + for(size_t i=s.length;i>0;i--) { + if(s.ptr[i-1] == c) { + return sstrsubs(s, i-1); + } + } + } + sstr_t n; + n.ptr = NULL; + n.length = 0; + return n; +} + +#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ + ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) + +#define ptable_w(useheap, ptable, index, src) do {\ + if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ + else ((size_t*)ptable)[index] = src;\ + } while (0); + +sstr_t sstrstr(sstr_t string, sstr_t match) { + if (match.length == 0) { + return string; + } + + /* prepare default return value in case of no match */ + sstr_t result = sstrn(NULL, 0); + + /* + * IMPORTANT: + * our prefix table contains the prefix length PLUS ONE + * this is our decision, because we want to use the full range of size_t + * the original algorithm needs a (-1) at one single place + * and we want to avoid that + */ + + /* static prefix table */ + static uint8_t s_prefix_table[256]; + + /* check pattern length and use appropriate prefix table */ + /* if the pattern exceeds static prefix table, allocate on the heap */ + register int useheap = match.length > 255; + register void* ptable = useheap ? + calloc(match.length+1, sizeof(size_t)): s_prefix_table; + + /* keep counter in registers */ + register size_t i, j; + + /* fill prefix table */ + i = 0; j = 0; + ptable_w(useheap, ptable, i, j); + while (i < match.length) { + while (j >= 1 && match.ptr[j-1] != match.ptr[i]) { + ptable_r(j, useheap, ptable, j-1); + } + i++; j++; + ptable_w(useheap, ptable, i, j); + } + + /* search */ + i = 0; j = 1; + while (i < string.length) { + while (j >= 1 && string.ptr[i] != match.ptr[j-1]) { + ptable_r(j, useheap, ptable, j-1); + } + i++; j++; + if (j-1 == match.length) { + size_t start = i - match.length; + result.ptr = string.ptr + start; + result.length = string.length - start; + break; + } + } + + /* if prefix table was allocated on the heap, free it */ + if (ptable != s_prefix_table) { + free(ptable); + } + + return result; +} + +#undef ptable_r +#undef ptable_w + +sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) { + return sstrsplit_a(ucx_default_allocator(), s, d, n); +} + +sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) { + if (s.length == 0 || d.length == 0) { + *n = -1; + return NULL; + } + + /* special cases: delimiter is at least as large as the string */ + if (d.length >= s.length) { + /* exact match */ + if (sstrcmp(s, d) == 0) { + *n = 0; + return NULL; + } else /* no match possible */ { + *n = 1; + sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); + *result = sstrdup_a(allocator, s); + return result; + } + } + + ssize_t nmax = *n; + size_t arrlen = 16; + sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t)); + + if (result) { + sstr_t curpos = s; + ssize_t j = 1; + while (1) { + sstr_t match; + /* optimize for one byte delimiters */ + if (d.length == 1) { + match = curpos; + for (size_t i = 0 ; i < curpos.length ; i++) { + if (curpos.ptr[i] == *(d.ptr)) { + match.ptr = curpos.ptr + i; + break; + } + match.length--; + } + } else { + match = sstrstr(curpos, d); + } + if (match.length > 0) { + /* is this our last try? */ + if (nmax == 0 || j < nmax) { + /* copy the current string to the array */ + sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr); + result[j-1] = sstrdup_a(allocator, item); + size_t processed = item.length + d.length; + curpos.ptr += processed; + curpos.length -= processed; + + /* allocate memory for the next string */ + j++; + if (j > arrlen) { + arrlen *= 2; + sstr_t* reallocated = (sstr_t*) alrealloc( + allocator, result, arrlen*sizeof(sstr_t)); + if (reallocated) { + result = reallocated; + } else { + for (ssize_t i = 0 ; i < j-1 ; i++) { + alfree(allocator, result[i].ptr); + } + alfree(allocator, result); + *n = -2; + return NULL; + } + } + } else { + /* nmax reached, copy the _full_ remaining string */ + result[j-1] = sstrdup_a(allocator, curpos); + break; + } + } else { + /* no more matches, copy last string */ + result[j-1] = sstrdup_a(allocator, curpos); + break; + } + } + *n = j; + } else { + *n = -2; + } + + return result; +} + +int sstrcmp(sstr_t s1, sstr_t s2) { + if (s1.length == s2.length) { + return memcmp(s1.ptr, s2.ptr, s1.length); + } else if (s1.length > s2.length) { + return 1; + } else { + return -1; + } +} + +int sstrcasecmp(sstr_t s1, sstr_t s2) { + if (s1.length == s2.length) { +#ifdef _WIN32 + return _strnicmp(s1.ptr, s2.ptr, s1.length); +#else + return strncasecmp(s1.ptr, s2.ptr, s1.length); +#endif + } else if (s1.length > s2.length) { + return 1; + } else { + return -1; + } +} + +sstr_t sstrdup(sstr_t s) { + return sstrdup_a(ucx_default_allocator(), s); +} + +sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) { + sstr_t newstring; + newstring.ptr = (char*)almalloc(allocator, s.length + 1); + if (newstring.ptr) { + newstring.length = s.length; + newstring.ptr[newstring.length] = 0; + + memcpy(newstring.ptr, s.ptr, s.length); + } else { + newstring.length = 0; + } + + return newstring; +} + +sstr_t sstrtrim(sstr_t string) { + sstr_t newstr = string; + + while (newstr.length > 0 && isspace(*newstr.ptr)) { + newstr.ptr++; + newstr.length--; + } + while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) { + newstr.length--; + } + + return newstr; +} + +int sstrprefix(sstr_t string, sstr_t prefix) { + if (string.length == 0) { + return prefix.length == 0; + } + if (prefix.length == 0) { + return 1; + } + + if (prefix.length > string.length) { + return 0; + } else { + return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; + } +} + +int sstrsuffix(sstr_t string, sstr_t suffix) { + if (string.length == 0) { + return suffix.length == 0; + } + if (suffix.length == 0) { + return 1; + } + + if (suffix.length > string.length) { + return 0; + } else { + return memcmp(string.ptr+string.length-suffix.length, + suffix.ptr, suffix.length) == 0; + } +} + +sstr_t sstrlower(sstr_t string) { + sstr_t ret = sstrdup(string); + for (size_t i = 0; i < ret.length ; i++) { + ret.ptr[i] = tolower(ret.ptr[i]); + } + return ret; +} + +sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) { + sstr_t ret = sstrdup_a(allocator, string); + for (size_t i = 0; i < ret.length ; i++) { + ret.ptr[i] = tolower(ret.ptr[i]); + } + return ret; +} + +sstr_t sstrupper(sstr_t string) { + sstr_t ret = sstrdup(string); + for (size_t i = 0; i < ret.length ; i++) { + ret.ptr[i] = toupper(ret.ptr[i]); + } + return ret; +} + +sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) { + sstr_t ret = sstrdup_a(allocator, string); + for (size_t i = 0; i < ret.length ; i++) { + ret.ptr[i] = toupper(ret.ptr[i]); + } + return ret; +}