Thu, 07 Nov 2019 10:10:36 +0100
removes some bugs by redesigning the array API
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "ucx/string.h" #include "ucx/allocator.h" #include <stdlib.h> #include <string.h> #include <stdarg.h> #include <stdint.h> #include <ctype.h> #ifndef _WIN32 #include <strings.h> /* for strncasecmp() */ #endif /* _WIN32 */ sstr_t sstr(char *cstring) { sstr_t string; string.ptr = cstring; string.length = strlen(cstring); return string; } sstr_t sstrn(char *cstring, size_t length) { sstr_t string; string.ptr = cstring; string.length = length; return string; } scstr_t scstr(const char *cstring) { scstr_t string; string.ptr = cstring; string.length = strlen(cstring); return string; } scstr_t scstrn(const char *cstring, size_t length) { scstr_t string; string.ptr = cstring; string.length = length; return string; } size_t scstrnlen(size_t n, ...) { if (n == 0) return 0; va_list ap; va_start(ap, n); size_t size = 0; for (size_t i = 0 ; i < n ; i++) { scstr_t str = va_arg(ap, scstr_t); if(SIZE_MAX - str.length < size) { size = SIZE_MAX; break; } size += str.length; } va_end(ap); return size; } static sstr_t sstrvcat_a( UcxAllocator *a, size_t count, scstr_t s1, va_list ap) { sstr_t str; str.ptr = NULL; str.length = 0; if(count < 2) { return str; } scstr_t s2 = va_arg (ap, scstr_t); if(((size_t)-1) - s1.length < s2.length) { return str; } scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t)); if(!strings) { return str; } // get all args and overall length strings[0] = s1; strings[1] = s2; size_t slen = s1.length + s2.length; int error = 0; for (size_t i=2;i<count;i++) { scstr_t s = va_arg (ap, scstr_t); strings[i] = s; if(((size_t)-1) - s.length < slen) { error = 1; break; } slen += s.length; } if(error) { free(strings); return str; } // create new string str.ptr = (char*) almalloc(a, slen + 1); str.length = slen; if(!str.ptr) { free(strings); str.length = 0; return str; } // concatenate strings size_t pos = 0; for (size_t i=0;i<count;i++) { scstr_t s = strings[i]; memcpy(str.ptr + pos, s.ptr, s.length); pos += s.length; } str.ptr[str.length] = '\0'; free(strings); return str; } sstr_t scstrcat(size_t count, scstr_t s1, ...) { va_list ap; va_start(ap, s1); sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap); va_end(ap); return s; } sstr_t scstrcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) { va_list ap; va_start(ap, s1); sstr_t s = sstrvcat_a(a, count, s1, ap); va_end(ap); return s; } static int ucx_substring( size_t str_length, size_t start, size_t length, size_t *newlen, size_t *newpos) { *newlen = 0; *newpos = 0; if(start > str_length) { return 0; } if(length > str_length - start) { length = str_length - start; } *newlen = length; *newpos = start; return 1; } sstr_t sstrsubs(sstr_t s, size_t start) { return sstrsubsl (s, start, s.length-start); } sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { size_t pos; sstr_t ret = { NULL, 0 }; if(ucx_substring(s.length, start, length, &ret.length, &pos)) { ret.ptr = s.ptr + pos; } return ret; } scstr_t scstrsubs(scstr_t string, size_t start) { return scstrsubsl(string, start, string.length-start); } scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) { size_t pos; scstr_t ret = { NULL, 0 }; if(ucx_substring(s.length, start, length, &ret.length, &pos)) { ret.ptr = s.ptr + pos; } return ret; } static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) { for(size_t i=0;i<length;i++) { if(str[i] == chr) { *pos = i; return 1; } } return 0; } static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) { if(length > 0) { for(size_t i=length ; i>0 ; i--) { if(str[i-1] == chr) { *pos = i-1; return 1; } } } return 0; } sstr_t sstrchr(sstr_t s, int c) { size_t pos = 0; if(ucx_strchr(s.ptr, s.length, c, &pos)) { return sstrsubs(s, pos); } return sstrn(NULL, 0); } sstr_t sstrrchr(sstr_t s, int c) { size_t pos = 0; if(ucx_strrchr(s.ptr, s.length, c, &pos)) { return sstrsubs(s, pos); } return sstrn(NULL, 0); } scstr_t scstrchr(scstr_t s, int c) { size_t pos = 0; if(ucx_strchr(s.ptr, s.length, c, &pos)) { return scstrsubs(s, pos); } return scstrn(NULL, 0); } scstr_t scstrrchr(scstr_t s, int c) { size_t pos = 0; if(ucx_strrchr(s.ptr, s.length, c, &pos)) { return scstrsubs(s, pos); } return scstrn(NULL, 0); } #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) #define ptable_w(useheap, ptable, index, src) do {\ if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ else ((size_t*)ptable)[index] = src;\ } while (0); static const char* ucx_strstr( const char *str, size_t length, const char *match, size_t matchlen, size_t *newlen) { *newlen = length; if (matchlen == 0) { return str; } const char *result = NULL; size_t resultlen = 0; /* * IMPORTANT: * our prefix table contains the prefix length PLUS ONE * this is our decision, because we want to use the full range of size_t * the original algorithm needs a (-1) at one single place * and we want to avoid that */ /* static prefix table */ static uint8_t s_prefix_table[256]; /* check pattern length and use appropriate prefix table */ /* if the pattern exceeds static prefix table, allocate on the heap */ register int useheap = matchlen > 255; register void* ptable = useheap ? calloc(matchlen+1, sizeof(size_t)): s_prefix_table; /* keep counter in registers */ register size_t i, j; /* fill prefix table */ i = 0; j = 0; ptable_w(useheap, ptable, i, j); while (i < matchlen) { while (j >= 1 && match[j-1] != match[i]) { ptable_r(j, useheap, ptable, j-1); } i++; j++; ptable_w(useheap, ptable, i, j); } /* search */ i = 0; j = 1; while (i < length) { while (j >= 1 && str[i] != match[j-1]) { ptable_r(j, useheap, ptable, j-1); } i++; j++; if (j-1 == matchlen) { size_t start = i - matchlen; result = str + start; resultlen = length - start; break; } } /* if prefix table was allocated on the heap, free it */ if (ptable != s_prefix_table) { free(ptable); } *newlen = resultlen; return result; } sstr_t scstrsstr(sstr_t string, scstr_t match) { sstr_t result; size_t reslen; const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); if(!resstr) { result.ptr = NULL; result.length = 0; return result; } size_t pos = resstr - string.ptr; result.ptr = string.ptr + pos; result.length = reslen; return result; } scstr_t scstrscstr(scstr_t string, scstr_t match) { scstr_t result; size_t reslen; const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); if(!resstr) { result.ptr = NULL; result.length = 0; return result; } size_t pos = resstr - string.ptr; result.ptr = string.ptr + pos; result.length = reslen; return result; } #undef ptable_r #undef ptable_w sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) { return scstrsplit_a(ucx_default_allocator(), s, d, n); } sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) { if (s.length == 0 || d.length == 0) { *n = -1; return NULL; } /* special cases: delimiter is at least as large as the string */ if (d.length >= s.length) { /* exact match */ if (sstrcmp(s, d) == 0) { *n = 0; return NULL; } else /* no match possible */ { *n = 1; sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); if(result) { *result = sstrdup_a(allocator, s); } else { *n = -2; } return result; } } ssize_t nmax = *n; size_t arrlen = 16; sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t)); if (result) { scstr_t curpos = s; ssize_t j = 1; while (1) { scstr_t match; /* optimize for one byte delimiters */ if (d.length == 1) { match = curpos; for (size_t i = 0 ; i < curpos.length ; i++) { if (curpos.ptr[i] == *(d.ptr)) { match.ptr = curpos.ptr + i; break; } match.length--; } } else { match = scstrscstr(curpos, d); } if (match.length > 0) { /* is this our last try? */ if (nmax == 0 || j < nmax) { /* copy the current string to the array */ scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr); result[j-1] = sstrdup_a(allocator, item); size_t processed = item.length + d.length; curpos.ptr += processed; curpos.length -= processed; /* allocate memory for the next string */ j++; if (j > arrlen) { arrlen *= 2; size_t reallocsz; sstr_t* reallocated = NULL; if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) { reallocated = (sstr_t*) alrealloc( allocator, result, reallocsz); } if (reallocated) { result = reallocated; } else { for (ssize_t i = 0 ; i < j-1 ; i++) { alfree(allocator, result[i].ptr); } alfree(allocator, result); *n = -2; return NULL; } } } else { /* nmax reached, copy the _full_ remaining string */ result[j-1] = sstrdup_a(allocator, curpos); break; } } else { /* no more matches, copy last string */ result[j-1] = sstrdup_a(allocator, curpos); break; } } *n = j; } else { *n = -2; } return result; } int scstrcmp(scstr_t s1, scstr_t s2) { if (s1.length == s2.length) { return memcmp(s1.ptr, s2.ptr, s1.length); } else if (s1.length > s2.length) { return 1; } else { return -1; } } int scstrcasecmp(scstr_t s1, scstr_t s2) { if (s1.length == s2.length) { #ifdef _WIN32 return _strnicmp(s1.ptr, s2.ptr, s1.length); #else return strncasecmp(s1.ptr, s2.ptr, s1.length); #endif } else if (s1.length > s2.length) { return 1; } else { return -1; } } sstr_t scstrdup(scstr_t s) { return sstrdup_a(ucx_default_allocator(), s); } sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) { sstr_t newstring; newstring.ptr = (char*)almalloc(allocator, s.length + 1); if (newstring.ptr) { newstring.length = s.length; newstring.ptr[newstring.length] = 0; memcpy(newstring.ptr, s.ptr, s.length); } else { newstring.length = 0; } return newstring; } static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) { const char *newptr = s; size_t length = len; while(length > 0 && isspace(*newptr)) { newptr++; length--; } while(length > 0 && isspace(newptr[length-1])) { length--; } *newlen = length; return newptr - s; } sstr_t sstrtrim(sstr_t string) { sstr_t newstr; newstr.ptr = string.ptr + ucx_strtrim(string.ptr, string.length, &newstr.length); return newstr; } scstr_t scstrtrim(scstr_t string) { scstr_t newstr; newstr.ptr = string.ptr + ucx_strtrim(string.ptr, string.length, &newstr.length); return newstr; } int scstrprefix(scstr_t string, scstr_t prefix) { if (string.length == 0) { return prefix.length == 0; } if (prefix.length == 0) { return 1; } if (prefix.length > string.length) { return 0; } else { return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; } } int scstrsuffix(scstr_t string, scstr_t suffix) { if (string.length == 0) { return suffix.length == 0; } if (suffix.length == 0) { return 1; } if (suffix.length > string.length) { return 0; } else { return memcmp(string.ptr+string.length-suffix.length, suffix.ptr, suffix.length) == 0; } } int scstrcaseprefix(scstr_t string, scstr_t prefix) { if (string.length == 0) { return prefix.length == 0; } if (prefix.length == 0) { return 1; } if (prefix.length > string.length) { return 0; } else { scstr_t subs = scstrsubsl(string, 0, prefix.length); return scstrcasecmp(subs, prefix) == 0; } } int scstrcasesuffix(scstr_t string, scstr_t suffix) { if (string.length == 0) { return suffix.length == 0; } if (suffix.length == 0) { return 1; } if (suffix.length > string.length) { return 0; } else { scstr_t subs = scstrsubs(string, string.length-suffix.length); return scstrcasecmp(subs, suffix) == 0; } } sstr_t scstrlower(scstr_t string) { sstr_t ret = sstrdup(string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = tolower(ret.ptr[i]); } return ret; } sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) { sstr_t ret = sstrdup_a(allocator, string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = tolower(ret.ptr[i]); } return ret; } sstr_t scstrupper(scstr_t string) { sstr_t ret = sstrdup(string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = toupper(ret.ptr[i]); } return ret; } sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) { sstr_t ret = sstrdup_a(allocator, string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = toupper(ret.ptr[i]); } return ret; } // type adjustment functions scstr_t ucx_sc2sc(scstr_t str) { return str; } scstr_t ucx_ss2sc(sstr_t str) { scstr_t cs; cs.ptr = str.ptr; cs.length = str.length; return cs; } scstr_t ucx_ss2c_s(scstr_t c) { return c; }