src/string.c

Fri, 23 May 2025 12:44:24 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 23 May 2025 12:44:24 +0200
changeset 1327
ed75dc1db503
parent 1319
aa1f580f8f59
permissions
-rw-r--r--

make test-compile depend on both static and shared

the shared lib is not needed for the tests,
but when run with coverage, gcov will be confused
when outdated line information is available from
a previous shared build

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include "cx/string.h"

#include <string.h>
#include <stdarg.h>
#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <float.h>

#ifdef _WIN32
#define cx_strcasecmp_impl _strnicmp
#else
#include <strings.h>
#define cx_strcasecmp_impl strncasecmp
#endif

cxmutstr cx_mutstr(char *cstring) {
    return (cxmutstr) {cstring, strlen(cstring)};
}

cxmutstr cx_mutstrn(
        char *cstring,
        size_t length
) {
    return (cxmutstr) {cstring, length};
}

cxstring cx_str(const char *cstring) {
    return (cxstring) {cstring, strlen(cstring)};
}

cxstring cx_strn(
        const char *cstring,
        size_t length
) {
    return (cxstring) {cstring, length};
}

void cx_strfree(cxmutstr *str) {
    if (str == NULL) return;
    cxFreeDefault(str->ptr);
    str->ptr = NULL;
    str->length = 0;
}

void cx_strfree_a(
        const CxAllocator *alloc,
        cxmutstr *str
) {
    if (str == NULL) return;
    cxFree(alloc, str->ptr);
    str->ptr = NULL;
    str->length = 0;
}

int cx_strcpy_a(
        const CxAllocator *alloc,
        cxmutstr *dest,
        cxstring src
) {
    if (cxReallocate(alloc, &dest->ptr, src.length + 1)) {
        return 1;
    }

    memcpy(dest->ptr, src.ptr, src.length);
    dest->length = src.length;
    dest->ptr[dest->length] = '\0';

    return 0;
}

size_t cx_strlen(
        size_t count,
        ...
) {
    if (count == 0) return 0;

    va_list ap;
    va_start(ap, count);
    size_t size = 0;
    for (size_t i = 0; i < count; i++) {
        cxstring str = va_arg(ap, cxstring);
        if (size > SIZE_MAX - str.length) errno = EOVERFLOW;
        size += str.length;
    }
    va_end(ap);

    return size;
}

cxmutstr cx_strcat_ma(
        const CxAllocator *alloc,
        cxmutstr str,
        size_t count,
        ...
) {
    if (count == 0) return str;
    va_list ap;
    va_start(ap, count);
    va_list ap2;
    va_copy(ap2, ap);

    // compute overall length
    bool overflow = false;
    size_t slen = str.length;
    for (size_t i = 0; i < count; i++) {
        cxstring s = va_arg(ap, cxstring);
        if (slen > SIZE_MAX - str.length) overflow = true;
        slen += s.length;
    }
    va_end(ap);

    // abort in case of overflow
    if (overflow) {
        va_end(ap2);
        errno = EOVERFLOW;
        return (cxmutstr) { NULL, 0 };
    }

    // reallocate or create new string
    char *newstr;
    if (str.ptr == NULL) {
        newstr = cxMalloc(alloc, slen + 1);
    } else {
        newstr = cxRealloc(alloc, str.ptr, slen + 1);
    }
    if (newstr == NULL) {
        va_end(ap2);
        return (cxmutstr) {NULL, 0};
    }
    str.ptr = newstr;

    // concatenate strings
    size_t pos = str.length;
    str.length = slen;
    for (size_t i = 0; i < count; i++) {
        cxstring s = va_arg(ap2, cxstring);
        memcpy(str.ptr + pos, s.ptr, s.length);
        pos += s.length;
    }
    va_end(ap2);

    // terminate string
    str.ptr[str.length] = '\0';

    return str;
}

cxstring cx_strsubs(
        cxstring string,
        size_t start
) {
    return cx_strsubsl(string, start, string.length - start);
}

cxmutstr cx_strsubs_m(
        cxmutstr string,
        size_t start
) {
    return cx_strsubsl_m(string, start, string.length - start);
}

cxstring cx_strsubsl(
        cxstring string,
        size_t start,
        size_t length
) {
    if (start > string.length) {
        return (cxstring) {NULL, 0};
    }

    size_t rem_len = string.length - start;
    if (length > rem_len) {
        length = rem_len;
    }

    return (cxstring) {string.ptr + start, length};
}

cxmutstr cx_strsubsl_m(
        cxmutstr string,
        size_t start,
        size_t length
) {
    cxstring result = cx_strsubsl(cx_strcast(string), start, length);
    return (cxmutstr) {(char *) result.ptr, result.length};
}

cxstring cx_strchr(
        cxstring string,
        int chr
) {
    char *ret = memchr(string.ptr, 0xFF & chr, string.length);
    if (ret == NULL) return (cxstring) {NULL, 0};
    return (cxstring) {ret, string.length - (ret - string.ptr)};
}

cxmutstr cx_strchr_m(
        cxmutstr string,
        int chr
) {
    cxstring result = cx_strchr(cx_strcast(string), chr);
    return (cxmutstr) {(char *) result.ptr, result.length};
}

cxstring cx_strrchr(
        cxstring string,
        int chr
) {
    chr = 0xFF & chr;
    size_t i = string.length;
    while (i > 0) {
        i--;
        // TODO: improve by comparing multiple bytes at once
        if (string.ptr[i] == chr) {
            return cx_strsubs(string, i);
        }
    }
    return (cxstring) {NULL, 0};
}

cxmutstr cx_strrchr_m(
        cxmutstr string,
        int chr
) {
    cxstring result = cx_strrchr(cx_strcast(string), chr);
    return (cxmutstr) {(char *) result.ptr, result.length};
}

#ifndef CX_STRSTR_SBO_SIZE
#define CX_STRSTR_SBO_SIZE 128
#endif
const unsigned cx_strstr_sbo_size = CX_STRSTR_SBO_SIZE;

cxstring cx_strstr(
        cxstring haystack,
        cxstring needle
) {
    if (needle.length == 0) {
        return haystack;
    }

    // optimize for single-char needles
    if (needle.length == 1) {
        return cx_strchr(haystack, *needle.ptr);
    }

    /*
     * IMPORTANT:
     * Our prefix table contains the prefix length PLUS ONE
     * this is our decision, because we want to use the full range of size_t.
     * The original algorithm needs a (-1) at one single place,
     * and we want to avoid that.
     */

    // local prefix table
    size_t s_prefix_table[CX_STRSTR_SBO_SIZE];

    // check needle length and use appropriate prefix table
    // if the pattern exceeds static prefix table, allocate on the heap
    const bool useheap = needle.length >= CX_STRSTR_SBO_SIZE;
    register size_t *ptable = useheap
        ? cxCallocDefault(needle.length + 1, sizeof(size_t))
        : s_prefix_table;

    // keep counter in registers
    register size_t i, j;

    // fill prefix table
    i = 0;
    j = 0;
    ptable[i] = j;
    while (i < needle.length) {
        while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
            j = ptable[j - 1];
        }
        i++;
        j++;
        ptable[i] = j;
    }

    // search
    cxstring result = {NULL, 0};
    i = 0;
    j = 1;
    while (i < haystack.length) {
        while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
            j = ptable[j - 1];
        }
        i++;
        j++;
        if (j - 1 == needle.length) {
            size_t start = i - needle.length;
            result.ptr = haystack.ptr + start;
            result.length = haystack.length - start;
            break;
        }
    }

    // if prefix table was allocated on the heap, free it
    if (useheap) {
        cxFreeDefault(ptable);
    }

    return result;
}

cxmutstr cx_strstr_m(
        cxmutstr haystack,
        cxstring needle
) {
    cxstring result = cx_strstr(cx_strcast(haystack), needle);
    return (cxmutstr) {(char *) result.ptr, result.length};
}

size_t cx_strsplit(
        cxstring string,
        cxstring delim,
        size_t limit,
        cxstring *output
) {
    // special case: output limit is zero
    if (limit == 0) return 0;

    // special case: delimiter is empty
    if (delim.length == 0) {
        output[0] = string;
        return 1;
    }

    // special cases: delimiter is at least as large as the string
    if (delim.length >= string.length) {
        // exact match
        if (cx_strcmp(string, delim) == 0) {
            output[0] = cx_strn(string.ptr, 0);
            output[1] = cx_strn(string.ptr + string.length, 0);
            return 2;
        } else {
            // no match possible
            output[0] = string;
            return 1;
        }
    }

    size_t n = 0;
    cxstring curpos = string;
    while (1) {
        ++n;
        cxstring match = cx_strstr(curpos, delim);
        if (match.length > 0) {
            // is the limit reached?
            if (n < limit) {
                // copy the current string to the array
                cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
                output[n - 1] = item;
                size_t processed = item.length + delim.length;
                curpos.ptr += processed;
                curpos.length -= processed;
            } else {
                // limit reached, copy the _full_ remaining string
                output[n - 1] = curpos;
                break;
            }
        } else {
            // no more matches, copy last string
            output[n - 1] = curpos;
            break;
        }
    }

    return n;
}

size_t cx_strsplit_a(
        const CxAllocator *allocator,
        cxstring string,
        cxstring delim,
        size_t limit,
        cxstring **output
) {
    // find out how many splits we're going to make and allocate memory
    size_t n = 0;
    cxstring curpos = string;
    while (1) {
        ++n;
        cxstring match = cx_strstr(curpos, delim);
        if (match.length > 0) {
            // is the limit reached?
            if (n < limit) {
                size_t processed = match.ptr - curpos.ptr + delim.length;
                curpos.ptr += processed;
                curpos.length -= processed;
            } else {
                // limit reached
                break;
            }
        } else {
            // no more matches
            break;
        }
    }
    *output = cxCalloc(allocator, n, sizeof(cxstring));
    return cx_strsplit(string, delim, n, *output);
}

size_t cx_strsplit_m(
        cxmutstr string,
        cxstring delim,
        size_t limit,
        cxmutstr *output
) {
    return cx_strsplit(cx_strcast(string),
                       delim, limit, (cxstring *) output);
}

size_t cx_strsplit_ma(
        const CxAllocator *allocator,
        cxmutstr string,
        cxstring delim,
        size_t limit,
        cxmutstr **output
) {
    return cx_strsplit_a(allocator, cx_strcast(string),
                         delim, limit, (cxstring **) output);
}

int cx_strcmp(
        cxstring s1,
        cxstring s2
) {
    if (s1.length == s2.length) {
        return strncmp(s1.ptr, s2.ptr, s1.length);
    } else if (s1.length > s2.length) {
        int r = strncmp(s1.ptr, s2.ptr, s2.length);
        if (r != 0) return r;
        return 1;
    } else {
        int r = strncmp(s1.ptr, s2.ptr, s1.length);
        if (r != 0) return r;
        return -1;
    }
}

int cx_strcasecmp(
        cxstring s1,
        cxstring s2
) {
    if (s1.length == s2.length) {
        return cx_strcasecmp_impl(s1.ptr, s2.ptr, s1.length);
    } else if (s1.length > s2.length) {
        int r = cx_strcasecmp_impl(s1.ptr, s2.ptr, s2.length);
        if (r != 0) return r;
        return 1;
    } else {
        int r = cx_strcasecmp_impl(s1.ptr, s2.ptr, s1.length);
        if (r != 0) return r;
        return -1;
    }
}

int cx_strcmp_p(
        const void *s1,
        const void *s2
) {
    const cxstring *left = s1;
    const cxstring *right = s2;
    return cx_strcmp(*left, *right);
}

int cx_strcasecmp_p(
        const void *s1,
        const void *s2
) {
    const cxstring *left = s1;
    const cxstring *right = s2;
    return cx_strcasecmp(*left, *right);
}

cxmutstr cx_strdup_a_(
        const CxAllocator *allocator,
        cxstring string
) {
    cxmutstr result = {
            cxMalloc(allocator, string.length + 1),
            string.length
    };
    if (result.ptr == NULL) {
        result.length = 0;
        return result;
    }
    memcpy(result.ptr, string.ptr, string.length);
    result.ptr[string.length] = '\0';
    return result;
}

static bool str_isspace(char c) {
    // TODO: remove once UCX has public API for this
    return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v' || c == '\f';
}

cxstring cx_strtrim(cxstring string) {
    cxstring result = string;
    // TODO: optimize by comparing multiple bytes at once
    while (result.length > 0 && str_isspace(*result.ptr)) {
        result.ptr++;
        result.length--;
    }
    while (result.length > 0 && str_isspace(result.ptr[result.length - 1])) {
        result.length--;
    }
    return result;
}

cxmutstr cx_strtrim_m(cxmutstr string) {
    cxstring result = cx_strtrim(cx_strcast(string));
    return (cxmutstr) {(char *) result.ptr, result.length};
}

bool cx_strprefix(
        cxstring string,
        cxstring prefix
) {
    if (string.length < prefix.length) return false;
    return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
}

bool cx_strsuffix(
        cxstring string,
        cxstring suffix
) {
    if (string.length < suffix.length) return false;
    return memcmp(string.ptr + string.length - suffix.length,
                  suffix.ptr, suffix.length) == 0;
}

bool cx_strcaseprefix(
        cxstring string,
        cxstring prefix
) {
    if (string.length < prefix.length) return false;
#ifdef _WIN32
    return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
#else
    return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
#endif
}

bool cx_strcasesuffix(
        cxstring string,
        cxstring suffix
) {
    if (string.length < suffix.length) return false;
#ifdef _WIN32
    return _strnicmp(string.ptr+string.length-suffix.length,
                  suffix.ptr, suffix.length) == 0;
#else
    return strncasecmp(string.ptr + string.length - suffix.length,
                       suffix.ptr, suffix.length) == 0;
#endif
}

cxmutstr cx_strreplacen_a(
        const CxAllocator *allocator,
        cxstring str,
        cxstring search,
        cxstring replacement,
        size_t replmax
) {
    // special cases
    if (search.length == 0 || search.length > str.length || replmax == 0) {
        return cx_strdup_a(allocator, str);
    }

    size_t in_len = str.length;
    size_t search_len = search.length;
    size_t repl_len = replacement.length;

    // first run, count the occurrences
    // and remember where the first is
    size_t occurrences = 1;
    cxstring first = cx_strstr(str, search);
    if (first.length == 0) {
        // special case, no replacements
        return cx_strdup_a(allocator, str);
    }
    cxstring tmp = cx_strsubs(first, search_len);
    while (occurrences < replmax &&
            (tmp = cx_strstr(tmp, search)).length > 0) {
        occurrences++;
        tmp = cx_strsubs(tmp, search_len);
    }

    // calculate necessary memory
    signed long long diff_len = (signed long long) repl_len - search_len;
    size_t out_len = in_len + diff_len * occurrences;
    cxmutstr out = {
        cxMalloc(allocator, out_len + 1),
        out_len
    };
    if (out.ptr == NULL) return out;

    // second run: perform the replacements
    // but start where we found the first occurrence
    const char *inp = str.ptr;
    tmp = first;
    char *outp = out.ptr;
    while (occurrences-- > 0 && (tmp = cx_strstr(tmp, search)).length > 0) {
        size_t copylen = tmp.ptr - inp;
        memcpy(outp, inp, copylen);
        outp += copylen;
        memcpy(outp, replacement.ptr, repl_len);
        outp += repl_len;
        inp += copylen + search_len;
        tmp = cx_strsubs(tmp, search_len);
    }

    // add the remaining string
    size_t copylen = in_len - (inp - str.ptr);
    memcpy(outp, inp, copylen);
    out.ptr[out_len] = '\0';

    return out;
}

CxStrtokCtx cx_strtok_(
        cxstring str,
        cxstring delim,
        size_t limit
) {
    CxStrtokCtx ctx;
    ctx.str = str;
    ctx.delim = delim;
    ctx.limit = limit;
    ctx.pos = 0;
    ctx.next_pos = 0;
    ctx.delim_pos = 0;
    ctx.found = 0;
    ctx.delim_more = NULL;
    ctx.delim_more_count = 0;
    return ctx;
}

bool cx_strtok_next(
        CxStrtokCtx *ctx,
        cxstring *token
) {
    // abortion criteria
    if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) {
        return false;
    }

    // determine the search start
    cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos);

    // search the next delimiter
    cxstring delim = cx_strstr(haystack, ctx->delim);

    // if found, make delim capture exactly the delimiter
    if (delim.length > 0) {
        delim.length = ctx->delim.length;
    }

    // if more delimiters are specified, check them now
    if (ctx->delim_more_count > 0) {
        for (size_t i = 0; i < ctx->delim_more_count; i++) {
            cxstring d = cx_strstr(haystack, ctx->delim_more[i]);
            if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) {
                delim.ptr = d.ptr;
                delim.length = ctx->delim_more[i].length;
            }
        }
    }

    // store the token information and adjust the context
    ctx->found++;
    ctx->pos = ctx->next_pos;
    token->ptr = &ctx->str.ptr[ctx->pos];
    ctx->delim_pos = delim.length == 0 ?
                     ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr);
    token->length = ctx->delim_pos - ctx->pos;
    ctx->next_pos = ctx->delim_pos + delim.length;

    return true;
}

bool cx_strtok_next_m(
        CxStrtokCtx *ctx,
        cxmutstr *token
) {
    return cx_strtok_next(ctx, (cxstring *) token);
}

void cx_strtok_delim(
        CxStrtokCtx *ctx,
        const cxstring *delim,
        size_t count
) {
    ctx->delim_more = delim;
    ctx->delim_more_count = count;
}

#define cx_strtoX_signed_impl(rtype, rmin, rmax) \
    long long result; \
    if (cx_strtoll_lc(str, &result, base, groupsep)) { \
        return -1; \
    } \
    if (result < rmin || result > rmax) { \
        errno = ERANGE; \
        return -1; \
    } \
    *output = (rtype) result; \
    return 0

int cx_strtos_lc_(cxstring str, short *output, int base, const char *groupsep) {
    cx_strtoX_signed_impl(short, SHRT_MIN, SHRT_MAX);
}

int cx_strtoi_lc_(cxstring str, int *output, int base, const char *groupsep) {
    cx_strtoX_signed_impl(int, INT_MIN, INT_MAX);
}

int cx_strtol_lc_(cxstring str, long *output, int base, const char *groupsep) {
    cx_strtoX_signed_impl(long, LONG_MIN, LONG_MAX);
}

int cx_strtoll_lc_(cxstring str, long long *output, int base, const char *groupsep) {
    // strategy: parse as unsigned, check range, negate if required
    bool neg = false;
    size_t start_unsigned = 0;

    // emptiness check
    if (str.length == 0) {
        errno = EINVAL;
        return -1;
    }

    // test if we have a negative sign character
    if (str.ptr[start_unsigned] == '-') {
        neg = true;
        start_unsigned++;
        // must not be followed by positive sign character
        if (str.length == 1 || str.ptr[start_unsigned] == '+') {
            errno = EINVAL;
            return -1;
        }
    }

    // now parse the number with strtoull
    unsigned long long v;
    cxstring ustr = start_unsigned == 0 ? str
        : cx_strn(str.ptr + start_unsigned, str.length - start_unsigned);
    int ret = cx_strtoull_lc(ustr, &v, base, groupsep);
    if (ret != 0) return ret;
    if (neg) {
        if (v - 1 > LLONG_MAX) {
            errno = ERANGE;
            return -1;
        }
        *output = -(long long) v;
        return 0;
    } else {
        if (v > LLONG_MAX) {
            errno = ERANGE;
            return -1;
        }
        *output = (long long) v;
        return 0;
    }
}

int cx_strtoi8_lc_(cxstring str, int8_t *output, int base, const char *groupsep) {
    cx_strtoX_signed_impl(int8_t, INT8_MIN, INT8_MAX);
}

int cx_strtoi16_lc_(cxstring str, int16_t *output, int base, const char *groupsep) {
    cx_strtoX_signed_impl(int16_t, INT16_MIN, INT16_MAX);
}

int cx_strtoi32_lc_(cxstring str, int32_t *output, int base, const char *groupsep) {
    cx_strtoX_signed_impl(int32_t, INT32_MIN, INT32_MAX);
}

int cx_strtoi64_lc_(cxstring str, int64_t *output, int base, const char *groupsep) {
    assert(sizeof(long long) == sizeof(int64_t)); // should be true on all platforms
    return cx_strtoll_lc(str, (long long*) output, base, groupsep);
}

#define cx_strtoX_unsigned_impl(rtype, rmax) \
    uint64_t result; \
    if (cx_strtou64_lc(str, &result, base, groupsep)) { \
        return -1; \
    } \
    if (result > rmax) { \
        errno = ERANGE; \
        return -1; \
    } \
    *output = (rtype) result; \
    return 0

int cx_strtous_lc_(cxstring str, unsigned short *output, int base, const char *groupsep) {
    cx_strtoX_unsigned_impl(unsigned short, USHRT_MAX);
}

int cx_strtou_lc_(cxstring str, unsigned int *output, int base, const char *groupsep) {
    cx_strtoX_unsigned_impl(unsigned int, UINT_MAX);
}

int cx_strtoul_lc_(cxstring str, unsigned long *output, int base, const char *groupsep) {
    cx_strtoX_unsigned_impl(unsigned long, ULONG_MAX);
}

int cx_strtoull_lc_(cxstring str, unsigned long long *output, int base, const char *groupsep) {
    // some sanity checks
    if (str.length == 0) {
        errno = EINVAL;
        return -1;
    }
    if (!(base == 2 || base == 8 || base == 10 || base == 16)) {
        errno = EINVAL;
        return -1;
    }
    if (groupsep == NULL) groupsep = "";

    // find the actual start of the number
    if (str.ptr[0] == '+') {
        str.ptr++;
        str.length--;
        if (str.length == 0) {
            errno = EINVAL;
            return -1;
        }
    }
    size_t start = 0;

    // if base is 2 or 16, some leading stuff may appear
    if (base == 2) {
        if ((str.ptr[0] | 32) == 'b') {
            start = 1;
        } else if (str.ptr[0] == '0' && str.length > 1) {
            if ((str.ptr[1] | 32) == 'b') {
                start = 2;
            }
        }
    } else if (base == 16) {
        if ((str.ptr[0] | 32) == 'x' || str.ptr[0] == '#') {
            start = 1;
        } else if (str.ptr[0] == '0' && str.length > 1) {
            if ((str.ptr[1] | 32) == 'x') {
                start = 2;
            }
        }
    }

    // check if there are digits left
    if (start >= str.length) {
        errno = EINVAL;
        return -1;
    }

    // now parse the number
    unsigned long long result = 0;
    for (size_t i = start; i < str.length; i++) {
        // ignore group separators
        if (strchr(groupsep, str.ptr[i])) continue;

        // determine the digit value of the character
        unsigned char c = str.ptr[i];
        if (c >= 'a') c = 10 + (c - 'a');
        else if (c >= 'A') c = 10 + (c - 'A');
        else if (c >= '0') c = c - '0';
        else c = 255;
        if (c >= base) {
            errno = EINVAL;
            return -1;
        }

        // now combine the digit with what we already have
        unsigned long right = (result & 0xff) * base + c;
        unsigned long long left = (result >> 8) * base + (right >> 8);
        if (left > (ULLONG_MAX >> 8)) {
            errno = ERANGE;
            return -1;
        }
        result = (left << 8) + (right & 0xff);
    }

    *output = result;
    return 0;
}

int cx_strtou8_lc_(cxstring str, uint8_t *output, int base, const char *groupsep) {
    cx_strtoX_unsigned_impl(uint8_t, UINT8_MAX);
}

int cx_strtou16_lc_(cxstring str, uint16_t *output, int base, const char *groupsep) {
    cx_strtoX_unsigned_impl(uint16_t, UINT16_MAX);
}

int cx_strtou32_lc_(cxstring str, uint32_t *output, int base, const char *groupsep) {
    cx_strtoX_unsigned_impl(uint32_t, UINT32_MAX);
}

int cx_strtou64_lc_(cxstring str, uint64_t *output, int base, const char *groupsep) {
    assert(sizeof(unsigned long long) == sizeof(uint64_t)); // should be true on all platforms
    return cx_strtoull_lc(str, (unsigned long long*) output, base, groupsep);
}

int cx_strtoz_lc_(cxstring str, size_t *output, int base, const char *groupsep) {
#if SIZE_MAX == UINT32_MAX
    return cx_strtou32_lc_(str, (uint32_t*) output, base, groupsep);
#elif SIZE_MAX == UINT64_MAX
    return cx_strtoull_lc_(str, (unsigned long long *) output, base, groupsep);
#else
#error "unsupported size_t size"
#endif
}

int cx_strtof_lc_(cxstring str, float *output, char decsep, const char *groupsep) {
    // use string to double and add a range check
    double d;
    int ret = cx_strtod_lc_(str, &d, decsep, groupsep);
    if (ret != 0) return ret;
    // note: FLT_MIN is the smallest POSITIVE number that can be represented
    double test = d < 0 ? -d : d;
    if (test < FLT_MIN || test > FLT_MAX) {
        errno = ERANGE;
        return -1;
    }
    *output = (float) d;
    return 0;
}

static bool str_isdigit(char c) {
    // TODO: remove once UCX has public API for this
    return c >= '0' && c <= '9';
}

int cx_strtod_lc_(cxstring str, double *output, char decsep, const char *groupsep) {
    // TODO: overflow check
    // TODO: increase precision

    // emptiness check
    if (str.length == 0) {
        errno = EINVAL;
        return -1;
    }

    double result = 0.;
    int sign = 1;

    // check if there is a sign
    if (str.ptr[0] == '-') {
        sign = -1;
        str.ptr++;
        str.length--;
    } else if (str.ptr[0] == '+') {
        str.ptr++;
        str.length--;
    }

    // there must be at least one char to parse
    if (str.length == 0) {
        errno = EINVAL;
        return -1;
    }

    // parse all digits until we find the decsep
    size_t pos = 0;
    do {
        if (str_isdigit(str.ptr[pos])) {
            result = result * 10 + (str.ptr[pos] - '0');
        } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
            break;
        }
    } while (++pos < str.length);

    // already done?
    if (pos == str.length) {
        *output = result * sign;
        return 0;
    }

    // is the next char the decsep?
    if (str.ptr[pos] == decsep) {
        pos++;
        // it may end with the decsep, if it did not start with it
        if (pos == str.length) {
            if (str.length == 1) {
                errno = EINVAL;
                return -1;
            } else {
                *output = result * sign;
                return 0;
            }
        }
        // parse everything until exponent or end
        double factor = 1.;
        do {
            if (str_isdigit(str.ptr[pos])) {
                factor *= 0.1;
                result = result + factor * (str.ptr[pos] - '0');
            } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
                break;
            }
        } while (++pos < str.length);
    }

    // no exponent?
    if (pos == str.length) {
        *output = result * sign;
        return 0;
    }

    // now the next separator MUST be the exponent separator
    // and at least one char must follow
    if ((str.ptr[pos] | 32) != 'e' || str.length <= pos + 1) {
        errno = EINVAL;
        return -1;
    }
    pos++;

    // check if we have a sign for the exponent
    double factor = 10.;
    if (str.ptr[pos] == '-') {
        factor = .1;
        pos++;
    } else if (str.ptr[pos] == '+') {
        pos++;
    }

    // at least one digit must follow
    if (pos == str.length) {
        errno = EINVAL;
        return -1;
    }

    // parse the exponent
    unsigned int exp = 0;
    do {
        if (str_isdigit(str.ptr[pos])) {
            exp = 10 * exp + (str.ptr[pos] - '0');
        } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
            errno = EINVAL;
            return -1;
        }
    } while (++pos < str.length);

    // apply the exponent by fast exponentiation
    do {
        if (exp & 1) {
            result *= factor;
        }
        factor *= factor;
    } while ((exp >>= 1) > 0);

    // store the result and exit
    *output = result * sign;
    return 0;
}

mercurial