src/string.c

Fri, 24 Oct 2025 20:52:10 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 24 Oct 2025 20:52:10 +0200
changeset 1442
244fb8bc3584
parent 1426
3a89b31f0724
permissions
-rw-r--r--

fix hash map not being able to deal with 64-bit hashes

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#ifdef MEMRCHR_NEED_GNU
#define _GNU_SOURCE
#endif

#include "cx/string.h"

#include <string.h>
#include <stdarg.h>
#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <float.h>
#include <ctype.h>

#ifdef _WIN32
#define cx_strcasecmp_impl _strnicmp
#else
#include <strings.h>
#define cx_strcasecmp_impl strncasecmp
#endif

cxmutstr cx_mutstr(char *cstring) {
    return (cxmutstr) {cstring, cstring == NULL ? 0 : strlen(cstring)};
}

cxmutstr cx_mutstrn(
        char *cstring,
        size_t length
) {
    return (cxmutstr) {cstring, length};
}

cxstring cx_str(const char *cstring) {
    return (cxstring) {cstring, cstring == NULL ? 0 : strlen(cstring)};
}

cxstring cx_strn(
        const char *cstring,
        size_t length
) {
    return (cxstring) {cstring, length};
}

void cx_strfree(cxmutstr *str) {
    if (str == NULL) return;
    cxFreeDefault(str->ptr);
    str->ptr = NULL;
    str->length = 0;
}

void cx_strfree_a(
        const CxAllocator *alloc,
        cxmutstr *str
) {
    if (str == NULL) return;
    cxFree(alloc, str->ptr);
    str->ptr = NULL;
    str->length = 0;
}

int cx_strcpy_a(
        const CxAllocator *alloc,
        cxmutstr *dest,
        cxstring src
) {
    if (cxReallocate(alloc, &dest->ptr, src.length + 1)) {
        return 1;
    }

    memcpy(dest->ptr, src.ptr, src.length);
    dest->length = src.length;
    dest->ptr[dest->length] = '\0';

    return 0;
}

size_t cx_strlen(
        size_t count,
        ...
) {
    if (count == 0) return 0;

    va_list ap;
    va_start(ap, count);
    size_t size = 0;
    for (size_t i = 0; i < count; i++) {
        cxstring str = va_arg(ap, cxstring);
        if (size > SIZE_MAX - str.length) errno = EOVERFLOW;
        size += str.length;
    }
    va_end(ap);

    return size;
}

cxmutstr cx_strcat_ma(
        const CxAllocator *alloc,
        cxmutstr str,
        size_t count,
        ...
) {
    if (count == 0) return str;
    va_list ap;
    va_start(ap, count);
    va_list ap2;
    va_copy(ap2, ap);

    // compute overall length
    bool overflow = false;
    size_t slen = str.length;
    for (size_t i = 0; i < count; i++) {
        cxstring s = va_arg(ap, cxstring);
        if (slen > SIZE_MAX - str.length) overflow = true;
        slen += s.length;
    }
    va_end(ap);

    // abort in case of overflow
    if (overflow) {
        va_end(ap2);
        errno = EOVERFLOW;
        return (cxmutstr) { NULL, 0 };
    }

    // reallocate or create new string
    char *newstr;
    if (str.ptr == NULL) {
        newstr = cxMalloc(alloc, slen + 1);
    } else {
        newstr = cxRealloc(alloc, str.ptr, slen + 1);
    }
    if (newstr == NULL) {
        va_end(ap2);
        return (cxmutstr) {NULL, 0};
    }
    str.ptr = newstr;

    // concatenate strings
    size_t pos = str.length;
    str.length = slen;
    for (size_t i = 0; i < count; i++) {
        cxstring s = va_arg(ap2, cxstring);
        memcpy(str.ptr + pos, s.ptr, s.length);
        pos += s.length;
    }
    va_end(ap2);

    // terminate string
    str.ptr[str.length] = '\0';

    return str;
}

cxstring cx_strsubs(
        cxstring string,
        size_t start
) {
    return cx_strsubsl(string, start, string.length - start);
}

cxmutstr cx_strsubs_m(
        cxmutstr string,
        size_t start
) {
    return cx_strsubsl_m(string, start, string.length - start);
}

cxstring cx_strsubsl(
        cxstring string,
        size_t start,
        size_t length
) {
    if (start > string.length) {
        return (cxstring) {NULL, 0};
    }

    size_t rem_len = string.length - start;
    if (length > rem_len) {
        length = rem_len;
    }

    return (cxstring) {string.ptr + start, length};
}

cxmutstr cx_strsubsl_m(
        cxmutstr string,
        size_t start,
        size_t length
) {
    cxstring result = cx_strsubsl(cx_strcast(string), start, length);
    return (cxmutstr) {(char *) result.ptr, result.length};
}

cxstring cx_strchr(
        cxstring string,
        int chr
) {
    char *ret = memchr(string.ptr, 0xFF & chr, string.length);
    if (ret == NULL) return (cxstring) {NULL, 0};
    return (cxstring) {ret, string.length - (ret - string.ptr)};
}

cxmutstr cx_strchr_m(
        cxmutstr string,
        int chr
) {
    cxstring result = cx_strchr(cx_strcast(string), chr);
    return (cxmutstr) {(char *) result.ptr, result.length};
}

cxstring cx_strrchr(
    cxstring string,
    int chr
) {
#ifdef WITH_MEMRCHR
    char *ret = memrchr(string.ptr, 0xFF & chr, string.length);
    if (ret == NULL) return (cxstring) {NULL, 0};
    return (cxstring) {ret, string.length - (ret - string.ptr)};
#else
    chr = 0xFF & chr;
    size_t i = string.length;
    while (i > 0) {
        i--;
        if (string.ptr[i] == chr) {
            return cx_strsubs(string, i);
        }
    }
    return (cxstring) {NULL, 0};
#endif
}

cxmutstr cx_strrchr_m(
        cxmutstr string,
        int chr
) {
    cxstring result = cx_strrchr(cx_strcast(string), chr);
    return (cxmutstr) {(char *) result.ptr, result.length};
}

#ifndef CX_STRSTR_SBO_SIZE
#define CX_STRSTR_SBO_SIZE 128
#endif
const unsigned cx_strstr_sbo_size = CX_STRSTR_SBO_SIZE;

cxstring cx_strstr(
        cxstring haystack,
        cxstring needle
) {
    if (needle.length == 0) {
        return haystack;
    }

    // optimize for single-char needles
    if (needle.length == 1) {
        return cx_strchr(haystack, *needle.ptr);
    }

    /*
     * IMPORTANT:
     * Our prefix table contains the prefix length PLUS ONE
     * this is our decision, because we want to use the full range of size_t.
     * The original algorithm needs a (-1) at one single place,
     * and we want to avoid that.
     */

    // local prefix table
    size_t s_prefix_table[CX_STRSTR_SBO_SIZE];

    // check needle length and use appropriate prefix table
    // if the pattern exceeds static prefix table, allocate on the heap
    const bool useheap = needle.length >= CX_STRSTR_SBO_SIZE;
    register size_t *ptable = useheap
        ? cxCallocDefault(needle.length + 1, sizeof(size_t))
        : s_prefix_table;

    // keep counter in registers
    register size_t i, j;

    // fill prefix table
    i = 0;
    j = 0;
    ptable[i] = j;
    while (i < needle.length) {
        while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
            j = ptable[j - 1];
        }
        i++;
        j++;
        ptable[i] = j;
    }

    // search
    cxstring result = {NULL, 0};
    i = 0;
    j = 1;
    while (i < haystack.length) {
        while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
            j = ptable[j - 1];
        }
        i++;
        j++;
        if (j - 1 == needle.length) {
            size_t start = i - needle.length;
            result.ptr = haystack.ptr + start;
            result.length = haystack.length - start;
            break;
        }
    }

    // if prefix table was allocated on the heap, free it
    if (useheap) {
        cxFreeDefault(ptable);
    }

    return result;
}

cxmutstr cx_strstr_m(
        cxmutstr haystack,
        cxstring needle
) {
    cxstring result = cx_strstr(cx_strcast(haystack), needle);
    return (cxmutstr) {(char *) result.ptr, result.length};
}

size_t cx_strsplit(
        cxstring string,
        cxstring delim,
        size_t limit,
        cxstring *output
) {
    // special case: output limit is zero
    if (limit == 0) return 0;

    // special case: delimiter is empty
    if (delim.length == 0) {
        output[0] = string;
        return 1;
    }

    // special cases: delimiter is at least as large as the string
    if (delim.length >= string.length) {
        // exact match
        if (cx_strcmp(string, delim) == 0) {
            output[0] = cx_strn(string.ptr, 0);
            output[1] = cx_strn(string.ptr + string.length, 0);
            return 2;
        } else {
            // no match possible
            output[0] = string;
            return 1;
        }
    }

    size_t n = 0;
    cxstring curpos = string;
    while (1) {
        ++n;
        cxstring match = cx_strstr(curpos, delim);
        if (match.length > 0) {
            // is the limit reached?
            if (n < limit) {
                // copy the current string to the array
                cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
                output[n - 1] = item;
                size_t processed = item.length + delim.length;
                curpos.ptr += processed;
                curpos.length -= processed;
            } else {
                // limit reached, copy the _full_ remaining string
                output[n - 1] = curpos;
                break;
            }
        } else {
            // no more matches, copy last string
            output[n - 1] = curpos;
            break;
        }
    }

    return n;
}

size_t cx_strsplit_a(
        const CxAllocator *allocator,
        cxstring string,
        cxstring delim,
        size_t limit,
        cxstring **output
) {
    // find out how many splits we're going to make and allocate memory
    size_t n = 0;
    cxstring curpos = string;
    while (1) {
        ++n;
        cxstring match = cx_strstr(curpos, delim);
        if (match.length > 0) {
            // is the limit reached?
            if (n < limit) {
                size_t processed = match.ptr - curpos.ptr + delim.length;
                curpos.ptr += processed;
                curpos.length -= processed;
            } else {
                // limit reached
                break;
            }
        } else {
            // no more matches
            break;
        }
    }
    *output = cxCalloc(allocator, n, sizeof(cxstring));
    return cx_strsplit(string, delim, n, *output);
}

size_t cx_strsplit_m(
        cxmutstr string,
        cxstring delim,
        size_t limit,
        cxmutstr *output
) {
    return cx_strsplit(cx_strcast(string),
                       delim, limit, (cxstring *) output);
}

size_t cx_strsplit_ma(
        const CxAllocator *allocator,
        cxmutstr string,
        cxstring delim,
        size_t limit,
        cxmutstr **output
) {
    return cx_strsplit_a(allocator, cx_strcast(string),
                         delim, limit, (cxstring **) output);
}

int cx_strcmp_(
        cxstring s1,
        cxstring s2
) {
    if (s1.length == s2.length) {
        return strncmp(s1.ptr, s2.ptr, s1.length);
    } else if (s1.length > s2.length) {
        int r = strncmp(s1.ptr, s2.ptr, s2.length);
        if (r != 0) return r;
        return 1;
    } else {
        int r = strncmp(s1.ptr, s2.ptr, s1.length);
        if (r != 0) return r;
        return -1;
    }
}

int cx_strcasecmp_(
        cxstring s1,
        cxstring s2
) {
    if (s1.length == s2.length) {
        return cx_strcasecmp_impl(s1.ptr, s2.ptr, s1.length);
    } else if (s1.length > s2.length) {
        int r = cx_strcasecmp_impl(s1.ptr, s2.ptr, s2.length);
        if (r != 0) return r;
        return 1;
    } else {
        int r = cx_strcasecmp_impl(s1.ptr, s2.ptr, s1.length);
        if (r != 0) return r;
        return -1;
    }
}

int cx_strcmp_p(
        const void *s1,
        const void *s2
) {
    const cxstring *left = s1;
    const cxstring *right = s2;
    return cx_strcmp(*left, *right);
}

int cx_strcasecmp_p(
        const void *s1,
        const void *s2
) {
    const cxstring *left = s1;
    const cxstring *right = s2;
    return cx_strcasecmp(*left, *right);
}

cxmutstr cx_strdup_a_(
        const CxAllocator *allocator,
        cxstring string
) {
    cxmutstr result = {
            cxMalloc(allocator, string.length + 1),
            string.length
    };
    if (result.ptr == NULL) {
        result.length = 0;
        return result;
    }
    memcpy(result.ptr, string.ptr, string.length);
    result.ptr[string.length] = '\0';
    return result;
}

cxstring cx_strtrim(cxstring string) {
    cxstring result = string;
    while (result.length > 0 && isspace((unsigned char)(result.ptr[0]))) {
        result.ptr++;
        result.length--;
    }
    while (result.length > 0 && isspace((unsigned char)result.ptr[result.length - 1])) {
        result.length--;
    }
    return result;
}

cxmutstr cx_strtrim_m(cxmutstr string) {
    cxstring result = cx_strtrim(cx_strcast(string));
    return (cxmutstr) {(char *) result.ptr, result.length};
}

bool cx_strprefix_(
        cxstring string,
        cxstring prefix
) {
    if (string.length < prefix.length) return false;
    return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
}

bool cx_strsuffix_(
        cxstring string,
        cxstring suffix
) {
    if (string.length < suffix.length) return false;
    return memcmp(string.ptr + string.length - suffix.length,
                  suffix.ptr, suffix.length) == 0;
}

bool cx_strcaseprefix_(
        cxstring string,
        cxstring prefix
) {
    if (string.length < prefix.length) return false;
#ifdef _WIN32
    return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
#else
    return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
#endif
}

bool cx_strcasesuffix_(
        cxstring string,
        cxstring suffix
) {
    if (string.length < suffix.length) return false;
#ifdef _WIN32
    return _strnicmp(string.ptr+string.length-suffix.length,
                  suffix.ptr, suffix.length) == 0;
#else
    return strncasecmp(string.ptr + string.length - suffix.length,
                       suffix.ptr, suffix.length) == 0;
#endif
}

cxmutstr cx_strreplacen_a(
        const CxAllocator *allocator,
        cxstring str,
        cxstring search,
        cxstring replacement,
        size_t replmax
) {
    // special cases
    if (search.length == 0 || search.length > str.length || replmax == 0) {
        return cx_strdup_a(allocator, str);
    }

    size_t in_len = str.length;
    size_t search_len = search.length;
    size_t repl_len = replacement.length;

    // first run, count the occurrences
    // and remember where the first is
    size_t occurrences = 1;
    cxstring first = cx_strstr(str, search);
    if (first.length == 0) {
        // special case, no replacements
        return cx_strdup_a(allocator, str);
    }
    cxstring tmp = cx_strsubs(first, search_len);
    while (occurrences < replmax &&
            (tmp = cx_strstr(tmp, search)).length > 0) {
        occurrences++;
        tmp = cx_strsubs(tmp, search_len);
    }

    // calculate necessary memory
    signed long long diff_len = (signed long long) repl_len - search_len;
    size_t out_len = in_len + diff_len * occurrences;
    cxmutstr out = {
        cxMalloc(allocator, out_len + 1),
        out_len
    };
    if (out.ptr == NULL) return out;

    // second run: perform the replacements
    // but start where we found the first occurrence
    const char *inp = str.ptr;
    tmp = first;
    char *outp = out.ptr;
    while (occurrences-- > 0 && (tmp = cx_strstr(tmp, search)).length > 0) {
        size_t copylen = tmp.ptr - inp;
        memcpy(outp, inp, copylen);
        outp += copylen;
        memcpy(outp, replacement.ptr, repl_len);
        outp += repl_len;
        inp += copylen + search_len;
        tmp = cx_strsubs(tmp, search_len);
    }

    // add the remaining string
    size_t copylen = in_len - (inp - str.ptr);
    memcpy(outp, inp, copylen);
    out.ptr[out_len] = '\0';

    return out;
}

CxStrtokCtx cx_strtok_(
        cxstring str,
        cxstring delim,
        size_t limit
) {
    CxStrtokCtx ctx;
    ctx.str = str;
    ctx.delim = delim;
    ctx.limit = limit;
    ctx.pos = 0;
    ctx.next_pos = 0;
    ctx.delim_pos = 0;
    ctx.found = 0;
    ctx.delim_more = NULL;
    ctx.delim_more_count = 0;
    return ctx;
}

bool cx_strtok_next(
        CxStrtokCtx *ctx,
        cxstring *token
) {
    // abortion criteria
    if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) {
        return false;
    }

    // determine the search start
    cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos);

    // search the next delimiter
    cxstring delim = cx_strstr(haystack, ctx->delim);

    // if found, make delim capture exactly the delimiter
    if (delim.length > 0) {
        delim.length = ctx->delim.length;
    }

    // if more delimiters are specified, check them now
    if (ctx->delim_more_count > 0) {
        for (size_t i = 0; i < ctx->delim_more_count; i++) {
            cxstring d = cx_strstr(haystack, ctx->delim_more[i]);
            if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) {
                delim.ptr = d.ptr;
                delim.length = ctx->delim_more[i].length;
            }
        }
    }

    // store the token information and adjust the context
    ctx->found++;
    ctx->pos = ctx->next_pos;
    token->ptr = &ctx->str.ptr[ctx->pos];
    ctx->delim_pos = delim.length == 0 ?
                     ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr);
    token->length = ctx->delim_pos - ctx->pos;
    ctx->next_pos = ctx->delim_pos + delim.length;

    return true;
}

bool cx_strtok_next_m(
        CxStrtokCtx *ctx,
        cxmutstr *token
) {
    return cx_strtok_next(ctx, (cxstring *) token);
}

void cx_strtok_delim(
        CxStrtokCtx *ctx,
        const cxstring *delim,
        size_t count
) {
    ctx->delim_more = delim;
    ctx->delim_more_count = count;
}

#define cx_strtoX_signed_impl(rtype, rmin, rmax) \
    long long result; \
    if (cx_strtoll_lc(str, &result, base, groupsep)) { \
        return -1; \
    } \
    if (result < rmin || result > rmax) { \
        errno = ERANGE; \
        return -1; \
    } \
    *output = (rtype) result; \
    return 0

int cx_strtos_lc_(cxstring str, short *output, int base, const char *groupsep) {
    cx_strtoX_signed_impl(short, SHRT_MIN, SHRT_MAX);
}

int cx_strtoi_lc_(cxstring str, int *output, int base, const char *groupsep) {
    cx_strtoX_signed_impl(int, INT_MIN, INT_MAX);
}

int cx_strtol_lc_(cxstring str, long *output, int base, const char *groupsep) {
    cx_strtoX_signed_impl(long, LONG_MIN, LONG_MAX);
}

int cx_strtoll_lc_(cxstring str, long long *output, int base, const char *groupsep) {
    // strategy: parse as unsigned, check range, negate if required
    bool neg = false;
    size_t start_unsigned = 0;

    // emptiness check
    if (str.length == 0) {
        errno = EINVAL;
        return -1;
    }

    // test if we have a negative sign character
    if (str.ptr[start_unsigned] == '-') {
        neg = true;
        start_unsigned++;
        // must not be followed by positive sign character
        if (str.length == 1 || str.ptr[start_unsigned] == '+') {
            errno = EINVAL;
            return -1;
        }
    }

    // now parse the number with strtoull
    unsigned long long v;
    cxstring ustr = start_unsigned == 0 ? str
        : cx_strn(str.ptr + start_unsigned, str.length - start_unsigned);
    int ret = cx_strtoull_lc(ustr, &v, base, groupsep);
    if (ret != 0) return ret;
    if (neg) {
        if (v - 1 > LLONG_MAX) {
            errno = ERANGE;
            return -1;
        }
        *output = -(long long) v;
        return 0;
    } else {
        if (v > LLONG_MAX) {
            errno = ERANGE;
            return -1;
        }
        *output = (long long) v;
        return 0;
    }
}

int cx_strtoi8_lc_(cxstring str, int8_t *output, int base, const char *groupsep) {
    cx_strtoX_signed_impl(int8_t, INT8_MIN, INT8_MAX);
}

int cx_strtoi16_lc_(cxstring str, int16_t *output, int base, const char *groupsep) {
    cx_strtoX_signed_impl(int16_t, INT16_MIN, INT16_MAX);
}

int cx_strtoi32_lc_(cxstring str, int32_t *output, int base, const char *groupsep) {
    cx_strtoX_signed_impl(int32_t, INT32_MIN, INT32_MAX);
}

int cx_strtoi64_lc_(cxstring str, int64_t *output, int base, const char *groupsep) {
    assert(sizeof(long long) == sizeof(int64_t)); // should be true on all platforms
    return cx_strtoll_lc(str, (long long*) output, base, groupsep);
}

#define cx_strtoX_unsigned_impl(rtype, rmax) \
    uint64_t result; \
    if (cx_strtou64_lc(str, &result, base, groupsep)) { \
        return -1; \
    } \
    if (result > rmax) { \
        errno = ERANGE; \
        return -1; \
    } \
    *output = (rtype) result; \
    return 0

int cx_strtous_lc_(cxstring str, unsigned short *output, int base, const char *groupsep) {
    cx_strtoX_unsigned_impl(unsigned short, USHRT_MAX);
}

int cx_strtou_lc_(cxstring str, unsigned int *output, int base, const char *groupsep) {
    cx_strtoX_unsigned_impl(unsigned int, UINT_MAX);
}

int cx_strtoul_lc_(cxstring str, unsigned long *output, int base, const char *groupsep) {
    cx_strtoX_unsigned_impl(unsigned long, ULONG_MAX);
}

int cx_strtoull_lc_(cxstring str, unsigned long long *output, int base, const char *groupsep) {
    // some sanity checks
    if (str.length == 0) {
        errno = EINVAL;
        return -1;
    }
    if (!(base == 2 || base == 8 || base == 10 || base == 16)) {
        errno = EINVAL;
        return -1;
    }
    if (groupsep == NULL) groupsep = "";

    // find the actual start of the number
    if (str.ptr[0] == '+') {
        str.ptr++;
        str.length--;
        if (str.length == 0) {
            errno = EINVAL;
            return -1;
        }
    }
    size_t start = 0;

    // if base is 2 or 16, some leading stuff may appear
    if (base == 2) {
        if ((str.ptr[0] | 32) == 'b') {
            start = 1;
        } else if (str.ptr[0] == '0' && str.length > 1) {
            if ((str.ptr[1] | 32) == 'b') {
                start = 2;
            }
        }
    } else if (base == 16) {
        if ((str.ptr[0] | 32) == 'x' || str.ptr[0] == '#') {
            start = 1;
        } else if (str.ptr[0] == '0' && str.length > 1) {
            if ((str.ptr[1] | 32) == 'x') {
                start = 2;
            }
        }
    }

    // check if there are digits left
    if (start >= str.length) {
        errno = EINVAL;
        return -1;
    }

    // now parse the number
    unsigned long long result = 0;
    for (size_t i = start; i < str.length; i++) {
        // ignore group separators
        if (strchr(groupsep, str.ptr[i])) continue;

        // determine the digit value of the character
        unsigned char c = str.ptr[i];
        if (c >= 'a') c = 10 + (c - 'a');
        else if (c >= 'A') c = 10 + (c - 'A');
        else if (c >= '0') c = c - '0';
        else c = 255;
        if (c >= base) {
            errno = EINVAL;
            return -1;
        }

        // now combine the digit with what we already have
        unsigned long right = (result & 0xff) * base + c;
        unsigned long long left = (result >> 8) * base + (right >> 8);
        if (left > (ULLONG_MAX >> 8)) {
            errno = ERANGE;
            return -1;
        }
        result = (left << 8) + (right & 0xff);
    }

    *output = result;
    return 0;
}

int cx_strtou8_lc_(cxstring str, uint8_t *output, int base, const char *groupsep) {
    cx_strtoX_unsigned_impl(uint8_t, UINT8_MAX);
}

int cx_strtou16_lc_(cxstring str, uint16_t *output, int base, const char *groupsep) {
    cx_strtoX_unsigned_impl(uint16_t, UINT16_MAX);
}

int cx_strtou32_lc_(cxstring str, uint32_t *output, int base, const char *groupsep) {
    cx_strtoX_unsigned_impl(uint32_t, UINT32_MAX);
}

int cx_strtou64_lc_(cxstring str, uint64_t *output, int base, const char *groupsep) {
    assert(sizeof(unsigned long long) == sizeof(uint64_t)); // should be true on all platforms
    return cx_strtoull_lc(str, (unsigned long long*) output, base, groupsep);
}

int cx_strtoz_lc_(cxstring str, size_t *output, int base, const char *groupsep) {
#if SIZE_MAX == UINT32_MAX
    return cx_strtou32_lc_(str, (uint32_t*) output, base, groupsep);
#elif SIZE_MAX == UINT64_MAX
    return cx_strtoull_lc_(str, (unsigned long long *) output, base, groupsep);
#else
#error "unsupported size_t size"
#endif
}

int cx_strtof_lc_(cxstring str, float *output, char decsep, const char *groupsep) {
    // use string to double and add a range check
    double d;
    int ret = cx_strtod_lc_(str, &d, decsep, groupsep);
    if (ret != 0) return ret;
    // note: FLT_MIN is the smallest POSITIVE number that can be represented
    double test = d < 0 ? -d : d;
    if (test < FLT_MIN || test > FLT_MAX) {
        errno = ERANGE;
        return -1;
    }
    *output = (float) d;
    return 0;
}

int cx_strtod_lc_(cxstring str, double *output, char decsep, const char *groupsep) {
    // TODO: overflow check
    // TODO: increase precision

    // emptiness check
    if (str.length == 0) {
        errno = EINVAL;
        return -1;
    }

    double result = 0.;
    int sign = 1;

    // check if there is a sign
    if (str.ptr[0] == '-') {
        sign = -1;
        str.ptr++;
        str.length--;
    } else if (str.ptr[0] == '+') {
        str.ptr++;
        str.length--;
    }

    // there must be at least one char to parse
    if (str.length == 0) {
        errno = EINVAL;
        return -1;
    }

    // parse all digits until we find the decsep
    size_t pos = 0;
    do {
        if (isdigit((unsigned char)str.ptr[pos])) {
            result = result * 10 + (str.ptr[pos] - '0');
        } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
            break;
        }
    } while (++pos < str.length);

    // already done?
    if (pos == str.length) {
        *output = result * sign;
        return 0;
    }

    // is the next char the decsep?
    if (str.ptr[pos] == decsep) {
        pos++;
        // it may end with the decsep, if it did not start with it
        if (pos == str.length) {
            if (str.length == 1) {
                errno = EINVAL;
                return -1;
            } else {
                *output = result * sign;
                return 0;
            }
        }
        // parse everything until exponent or end
        double factor = 1.;
        do {
            if (isdigit((unsigned char)str.ptr[pos])) {
                factor *= 0.1;
                result = result + factor * (str.ptr[pos] - '0');
            } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
                break;
            }
        } while (++pos < str.length);
    }

    // no exponent?
    if (pos == str.length) {
        *output = result * sign;
        return 0;
    }

    // now the next separator MUST be the exponent separator
    // and at least one char must follow
    if ((str.ptr[pos] | 32) != 'e' || str.length <= pos + 1) {
        errno = EINVAL;
        return -1;
    }
    pos++;

    // check if we have a sign for the exponent
    double factor = 10.;
    if (str.ptr[pos] == '-') {
        factor = .1;
        pos++;
    } else if (str.ptr[pos] == '+') {
        pos++;
    }

    // at least one digit must follow
    if (pos == str.length) {
        errno = EINVAL;
        return -1;
    }

    // parse the exponent
    unsigned int exp = 0;
    do {
        if (isdigit((unsigned char)str.ptr[pos])) {
            exp = 10 * exp + (str.ptr[pos] - '0');
        } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
            errno = EINVAL;
            return -1;
        }
    } while (++pos < str.length);

    // apply the exponent by fast exponentiation
    do {
        if (exp & 1) {
            result *= factor;
        }
        factor *= factor;
    } while ((exp >>= 1) > 0);

    // store the result and exit
    *output = result * sign;
    return 0;
}

mercurial