Sat, 03 Sep 2022 14:56:07 +0200
implement more string functions
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "cx/string.h" #include "cx/utils.h" #include <string.h> #include <stdarg.h> #include <stdint.h> #include <ctype.h> #ifndef _WIN32 #include <strings.h> /* for strncasecmp() */ #endif /* _WIN32 */ cxmutstr cx_mutstr(char *cstring) { return (cxmutstr) {cstring, strlen(cstring)}; } cxmutstr cx_mutstrn( char *cstring, size_t length ) { return (cxmutstr) {cstring, length}; } cxstring cx_str(const char *cstring) { return (cxstring) {cstring, strlen(cstring)}; } cxstring cx_strn( const char *cstring, size_t length ) { return (cxstring) {cstring, length}; } cxstring cx_strcast(cxmutstr str) { return (cxstring) {str.ptr, str.length}; } void cx_strfree(cxmutstr *str) { free(str->ptr); str->ptr = NULL; str->length = 0; } size_t cx_strlen( size_t count, ... ) { if (count == 0) return 0; va_list ap; va_start(ap, count); size_t size = 0; cx_for_n(i, count) { cxstring str = va_arg(ap, cxstring); size += str.length; } va_end(ap); return size; } cxmutstr cx_strcat_a( CxAllocator *alloc, size_t count, ... ) { cxstring *strings = calloc(count, sizeof(cxstring)); if (!strings) abort(); va_list ap; va_start(ap, count); // get all args and overall length size_t slen = 0; cx_for_n(i, count) { cxstring s = va_arg (ap, cxstring); strings[i] = s; slen += s.length; } // create new string cxmutstr result; result.ptr = cxMalloc(alloc, slen + 1); result.length = slen; if (result.ptr == NULL) abort(); // concatenate strings size_t pos = 0; cx_for_n(i, count) { cxstring s = strings[i]; memcpy(result.ptr + pos, s.ptr, s.length); pos += s.length; } // terminate string result.ptr[result.length] = '\0'; // free temporary array free(strings); return result; } cxstring cx_strsubs( cxstring string, size_t start ) { return cx_strsubsl(string, start, string.length - start); } cxmutstr cx_strsubs_m( cxmutstr string, size_t start ) { return cx_strsubsl_m(string, start, string.length - start); } cxstring cx_strsubsl( cxstring string, size_t start, size_t length ) { if (start > string.length) { return (cxstring) {NULL, 0}; } size_t rem_len = string.length - start; if (length > rem_len) { length = rem_len; } return (cxstring) {string.ptr + start, length}; } cxmutstr cx_strsubsl_m( cxmutstr string, size_t start, size_t length ) { cxstring result = cx_strsubsl(cx_strcast(string), start, length); return (cxmutstr) {(char *) result.ptr, result.length}; } cxstring cx_strchr( cxstring string, int chr ) { chr = 0xFF & chr; // TODO: improve by comparing multiple bytes at once cx_for_n(i, string.length) { if (string.ptr[i] == chr) { return cx_strsubs(string, i); } } return (cxstring) {NULL, 0}; } cxmutstr cx_strchr_m( cxmutstr string, int chr ) { cxstring result = cx_strchr(cx_strcast(string), chr); return (cxmutstr) {(char *) result.ptr, result.length}; } cxstring cx_strrchr( cxstring string, int chr ) { chr = 0xFF & chr; size_t i = string.length; while (i > 0) { i--; // TODO: improve by comparing multiple bytes at once if (string.ptr[i] == chr) { return cx_strsubs(string, i); } } return (cxstring) {NULL, 0}; } cxmutstr cx_strrchr_m( cxmutstr string, int chr ) { cxstring result = cx_strrchr(cx_strcast(string), chr); return (cxmutstr) {(char *) result.ptr, result.length}; } #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) #define ptable_w(useheap, ptable, index, src) do {\ if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ else ((size_t*)ptable)[index] = src;\ } while (0) cxstring cx_strstr( cxstring haystack, cxstring needle ) { if (needle.length == 0) { return haystack; } /* * IMPORTANT: * Our prefix table contains the prefix length PLUS ONE * this is our decision, because we want to use the full range of size_t. * The original algorithm needs a (-1) at one single place, * and we want to avoid that. */ /* static prefix table */ static uint8_t s_prefix_table[512]; /* check pattern length and use appropriate prefix table */ /* if the pattern exceeds static prefix table, allocate on the heap */ register int useheap = needle.length >= 512; register void *ptable = useheap ? calloc(needle.length + 1, sizeof(size_t)) : s_prefix_table; /* keep counter in registers */ register size_t i, j; /* fill prefix table */ i = 0; j = 0; ptable_w(useheap, ptable, i, j); while (i < needle.length) { while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) { ptable_r(j, useheap, ptable, j - 1); } i++; j++; ptable_w(useheap, ptable, i, j); } /* search */ cxstring result = {NULL, 0}; i = 0; j = 1; while (i < haystack.length) { while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) { ptable_r(j, useheap, ptable, j - 1); } i++; j++; if (j - 1 == needle.length) { size_t start = i - needle.length; result.ptr = haystack.ptr + start; result.length = haystack.length - start; break; } } /* if prefix table was allocated on the heap, free it */ if (ptable != s_prefix_table) { free(ptable); } return result; } cxmutstr cx_strstr_m( cxmutstr haystack, cxstring needle ) { cxstring result = cx_strstr(cx_strcast(haystack), needle); return (cxmutstr) {(char *) result.ptr, result.length}; } size_t cx_strsplit( cxstring string, cxstring delim, size_t limit, cxstring *output ) { // TODO: implement return 0; } size_t cx_strsplit_a( CxAllocator *allocator, cxstring string, cxstring delim, size_t limit, cxstring **output ) { // TODO: implement return 0; } size_t cx_strsplit_m( cxmutstr string, cxstring delim, size_t limit, cxmutstr *output ) { return cx_strsplit(cx_strcast(string), delim, limit, (cxstring *) output); } size_t cx_strsplit_ma( CxAllocator *allocator, cxmutstr string, cxstring delim, size_t limit, cxmutstr **output ) { return cx_strsplit_a(allocator, cx_strcast(string), delim, limit, (cxstring **) output); } int cx_strcmp(cxstring s1, cxstring s2) { if (s1.length == s2.length) { return memcmp(s1.ptr, s2.ptr, s1.length); } else if (s1.length > s2.length) { return 1; } else { return -1; } } int cx_strcasecmp(cxstring s1, cxstring s2) { if (s1.length == s2.length) { #ifdef _WIN32 return _strnicmp(s1.ptr, s2.ptr, s1.length); #else return strncasecmp(s1.ptr, s2.ptr, s1.length); #endif } else if (s1.length > s2.length) { return 1; } else { return -1; } } cxmutstr cx_strdup_a(CxAllocator *allocator, cxstring string) { cxmutstr result = { cxMalloc(allocator, string.length + 1), string.length }; if (result.ptr == NULL) { result.length = 0; return result; } memcpy(result.ptr, string.ptr, string.length); result.ptr[string.length] = '\0'; return result; } cxstring cx_strtrim(cxstring string) { cxstring result = string; // TODO: optimize by comparing multiple bytes at once while (result.length > 0 && isspace(*result.ptr)) { result.ptr++; result.length--; } while (result.length > 0 && isspace(result.ptr[result.length - 1])) { result.length--; } return result; } cxmutstr cx_strtrim_m(cxmutstr string) { cxstring result = cx_strtrim(cx_strcast(string)); return (cxmutstr) {(char *) result.ptr, result.length}; } bool cx_strprefix(cxstring string, cxstring prefix) { if (string.length < prefix.length) return false; return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; } bool cx_strsuffix(cxstring string, cxstring suffix) { if (string.length < suffix.length) return false; return memcmp(string.ptr + string.length - suffix.length, suffix.ptr, suffix.length) == 0; } bool cx_casestrprefix(cxstring string, cxstring prefix) { if (string.length < prefix.length) return false; #ifdef _WIN32 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0; #else return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0; #endif } bool cx_casestrsuffix(cxstring string, cxstring suffix) { if (string.length < suffix.length) return false; #ifdef _WIN32 return _strnicmp(string.ptr+string.length-suffix.length, suffix.ptr, suffix.length) == 0; #else return strncasecmp(string.ptr + string.length - suffix.length, suffix.ptr, suffix.length) == 0; #endif }