src/string.c

Wed, 22 Jan 2025 21:02:46 +0100

author
Mike Becker <universe@uap-core.de>
date
Wed, 22 Jan 2025 21:02:46 +0100
changeset 1139
7dfa5bcf39ee
parent 1137
61939929030a
permissions
-rw-r--r--

remove token_escape bool from CxJson struct

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include "cx/string.h"

#include <string.h>
#include <stdarg.h>
#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <float.h>

#ifdef _WIN32
#define cx_strcasecmp_impl _strnicmp
#else
#include <strings.h>
#define cx_strcasecmp_impl strncasecmp
#endif

cxmutstr cx_mutstr(char *cstring) {
    return (cxmutstr) {cstring, strlen(cstring)};
}

cxmutstr cx_mutstrn(
        char *cstring,
        size_t length
) {
    return (cxmutstr) {cstring, length};
}

cxstring cx_str(const char *cstring) {
    return (cxstring) {cstring, strlen(cstring)};
}

cxstring cx_strn(
        const char *cstring,
        size_t length
) {
    return (cxstring) {cstring, length};
}

void cx_strfree(cxmutstr *str) {
    if (str == NULL) return;
    free(str->ptr);
    str->ptr = NULL;
    str->length = 0;
}

void cx_strfree_a(
        const CxAllocator *alloc,
        cxmutstr *str
) {
    if (str == NULL) return;
    cxFree(alloc, str->ptr);
    str->ptr = NULL;
    str->length = 0;
}

size_t cx_strlen(
        size_t count,
        ...
) {
    if (count == 0) return 0;

    va_list ap;
    va_start(ap, count);
    size_t size = 0;
    for (size_t i = 0; i < count; i++) {
        cxstring str = va_arg(ap, cxstring);
        if (size > SIZE_MAX - str.length) errno = EOVERFLOW;
        size += str.length;
    }
    va_end(ap);

    return size;
}

cxmutstr cx_strcat_ma(
        const CxAllocator *alloc,
        cxmutstr str,
        size_t count,
        ...
) {
    if (count == 0) return str;

    cxstring strings_stack[8];
    cxstring *strings;
    if (count > 8) {
        strings = calloc(count, sizeof(cxstring));
        if (strings == NULL) {
            return (cxmutstr) {NULL, 0};
        }
    } else {
        strings = strings_stack;
    }

    va_list ap;
    va_start(ap, count);

    // get all args and overall length
    bool overflow = false;
    size_t slen = str.length;
    for (size_t i = 0; i < count; i++) {
        cxstring s = va_arg (ap, cxstring);
        strings[i] = s;
        if (slen > SIZE_MAX - str.length) overflow = true;
        slen += s.length;
    }
    va_end(ap);

    // abort in case of overflow
    if (overflow) {
        errno = EOVERFLOW;
        if (strings != strings_stack) {
            free(strings);
        }
        return (cxmutstr) { NULL, 0 };
    }

    // reallocate or create new string
    char *newstr;
    if (str.ptr == NULL) {
        newstr = cxMalloc(alloc, slen + 1);
    } else {
        newstr = cxRealloc(alloc, str.ptr, slen + 1);
    }
    if (newstr == NULL) {
        if (strings != strings_stack) {
            free(strings);
        }
        return (cxmutstr) {NULL, 0};
    }
    str.ptr = newstr;

    // concatenate strings
    size_t pos = str.length;
    str.length = slen;
    for (size_t i = 0; i < count; i++) {
        cxstring s = strings[i];
        memcpy(str.ptr + pos, s.ptr, s.length);
        pos += s.length;
    }

    // terminate string
    str.ptr[str.length] = '\0';

    // free temporary array
    if (strings != strings_stack) {
        free(strings);
    }

    return str;
}

cxstring cx_strsubs(
        cxstring string,
        size_t start
) {
    return cx_strsubsl(string, start, string.length - start);
}

cxmutstr cx_strsubs_m(
        cxmutstr string,
        size_t start
) {
    return cx_strsubsl_m(string, start, string.length - start);
}

cxstring cx_strsubsl(
        cxstring string,
        size_t start,
        size_t length
) {
    if (start > string.length) {
        return (cxstring) {NULL, 0};
    }

    size_t rem_len = string.length - start;
    if (length > rem_len) {
        length = rem_len;
    }

    return (cxstring) {string.ptr + start, length};
}

cxmutstr cx_strsubsl_m(
        cxmutstr string,
        size_t start,
        size_t length
) {
    cxstring result = cx_strsubsl(cx_strcast(string), start, length);
    return (cxmutstr) {(char *) result.ptr, result.length};
}

cxstring cx_strchr(
        cxstring string,
        int chr
) {
    chr = 0xFF & chr;
    // TODO: improve by comparing multiple bytes at once
    for (size_t i = 0; i < string.length; i++) {
        if (string.ptr[i] == chr) {
            return cx_strsubs(string, i);
        }
    }
    return (cxstring) {NULL, 0};
}

cxmutstr cx_strchr_m(
        cxmutstr string,
        int chr
) {
    cxstring result = cx_strchr(cx_strcast(string), chr);
    return (cxmutstr) {(char *) result.ptr, result.length};
}

cxstring cx_strrchr(
        cxstring string,
        int chr
) {
    chr = 0xFF & chr;
    size_t i = string.length;
    while (i > 0) {
        i--;
        // TODO: improve by comparing multiple bytes at once
        if (string.ptr[i] == chr) {
            return cx_strsubs(string, i);
        }
    }
    return (cxstring) {NULL, 0};
}

cxmutstr cx_strrchr_m(
        cxmutstr string,
        int chr
) {
    cxstring result = cx_strrchr(cx_strcast(string), chr);
    return (cxmutstr) {(char *) result.ptr, result.length};
}

#ifndef CX_STRSTR_SBO_SIZE
#define CX_STRSTR_SBO_SIZE 128
#endif
const unsigned cx_strstr_sbo_size = CX_STRSTR_SBO_SIZE;

cxstring cx_strstr(
        cxstring haystack,
        cxstring needle
) {
    if (needle.length == 0) {
        return haystack;
    }

    // optimize for single-char needles
    if (needle.length == 1) {
        return cx_strchr(haystack, *needle.ptr);
    }

    /*
     * IMPORTANT:
     * Our prefix table contains the prefix length PLUS ONE
     * this is our decision, because we want to use the full range of size_t.
     * The original algorithm needs a (-1) at one single place,
     * and we want to avoid that.
     */

    // local prefix table
    size_t s_prefix_table[CX_STRSTR_SBO_SIZE];

    // check needle length and use appropriate prefix table
    // if the pattern exceeds static prefix table, allocate on the heap
    const bool useheap = needle.length >= CX_STRSTR_SBO_SIZE;
    register size_t *ptable = useheap ? calloc(needle.length + 1,
                                               sizeof(size_t)) : s_prefix_table;

    // keep counter in registers
    register size_t i, j;

    // fill prefix table
    i = 0;
    j = 0;
    ptable[i] = j;
    while (i < needle.length) {
        while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
            j = ptable[j - 1];
        }
        i++;
        j++;
        ptable[i] = j;
    }

    // search
    cxstring result = {NULL, 0};
    i = 0;
    j = 1;
    while (i < haystack.length) {
        while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
            j = ptable[j - 1];
        }
        i++;
        j++;
        if (j - 1 == needle.length) {
            size_t start = i - needle.length;
            result.ptr = haystack.ptr + start;
            result.length = haystack.length - start;
            break;
        }
    }

    // if prefix table was allocated on the heap, free it
    if (useheap) {
        free(ptable);
    }

    return result;
}

cxmutstr cx_strstr_m(
        cxmutstr haystack,
        cxstring needle
) {
    cxstring result = cx_strstr(cx_strcast(haystack), needle);
    return (cxmutstr) {(char *) result.ptr, result.length};
}

size_t cx_strsplit(
        cxstring string,
        cxstring delim,
        size_t limit,
        cxstring *output
) {
    // special case: output limit is zero
    if (limit == 0) return 0;

    // special case: delimiter is empty
    if (delim.length == 0) {
        output[0] = string;
        return 1;
    }

    // special cases: delimiter is at least as large as the string
    if (delim.length >= string.length) {
        // exact match
        if (cx_strcmp(string, delim) == 0) {
            output[0] = cx_strn(string.ptr, 0);
            output[1] = cx_strn(string.ptr + string.length, 0);
            return 2;
        } else {
            // no match possible
            output[0] = string;
            return 1;
        }
    }

    size_t n = 0;
    cxstring curpos = string;
    while (1) {
        ++n;
        cxstring match = cx_strstr(curpos, delim);
        if (match.length > 0) {
            // is the limit reached?
            if (n < limit) {
                // copy the current string to the array
                cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
                output[n - 1] = item;
                size_t processed = item.length + delim.length;
                curpos.ptr += processed;
                curpos.length -= processed;
            } else {
                // limit reached, copy the _full_ remaining string
                output[n - 1] = curpos;
                break;
            }
        } else {
            // no more matches, copy last string
            output[n - 1] = curpos;
            break;
        }
    }

    return n;
}

size_t cx_strsplit_a(
        const CxAllocator *allocator,
        cxstring string,
        cxstring delim,
        size_t limit,
        cxstring **output
) {
    // find out how many splits we're going to make and allocate memory
    size_t n = 0;
    cxstring curpos = string;
    while (1) {
        ++n;
        cxstring match = cx_strstr(curpos, delim);
        if (match.length > 0) {
            // is the limit reached?
            if (n < limit) {
                size_t processed = match.ptr - curpos.ptr + delim.length;
                curpos.ptr += processed;
                curpos.length -= processed;
            } else {
                // limit reached
                break;
            }
        } else {
            // no more matches
            break;
        }
    }
    *output = cxCalloc(allocator, n, sizeof(cxstring));
    return cx_strsplit(string, delim, n, *output);
}

size_t cx_strsplit_m(
        cxmutstr string,
        cxstring delim,
        size_t limit,
        cxmutstr *output
) {
    return cx_strsplit(cx_strcast(string),
                       delim, limit, (cxstring *) output);
}

size_t cx_strsplit_ma(
        const CxAllocator *allocator,
        cxmutstr string,
        cxstring delim,
        size_t limit,
        cxmutstr **output
) {
    return cx_strsplit_a(allocator, cx_strcast(string),
                         delim, limit, (cxstring **) output);
}

int cx_strcmp(
        cxstring s1,
        cxstring s2
) {
    if (s1.length == s2.length) {
        return strncmp(s1.ptr, s2.ptr, s1.length);
    } else if (s1.length > s2.length) {
        int r = strncmp(s1.ptr, s2.ptr, s2.length);
        if (r != 0) return r;
        return 1;
    } else {
        int r = strncmp(s1.ptr, s2.ptr, s1.length);
        if (r != 0) return r;
        return -1;
    }
}

int cx_strcasecmp(
        cxstring s1,
        cxstring s2
) {
    if (s1.length == s2.length) {
        return cx_strcasecmp_impl(s1.ptr, s2.ptr, s1.length);
    } else if (s1.length > s2.length) {
        int r = cx_strcasecmp_impl(s1.ptr, s2.ptr, s2.length);
        if (r != 0) return r;
        return 1;
    } else {
        int r = cx_strcasecmp_impl(s1.ptr, s2.ptr, s1.length);
        if (r != 0) return r;
        return -1;
    }
}

int cx_strcmp_p(
        const void *s1,
        const void *s2
) {
    const cxstring *left = s1;
    const cxstring *right = s2;
    return cx_strcmp(*left, *right);
}

int cx_strcasecmp_p(
        const void *s1,
        const void *s2
) {
    const cxstring *left = s1;
    const cxstring *right = s2;
    return cx_strcasecmp(*left, *right);
}

cxmutstr cx_strdup_a_(
        const CxAllocator *allocator,
        cxstring string
) {
    cxmutstr result = {
            cxMalloc(allocator, string.length + 1),
            string.length
    };
    if (result.ptr == NULL) {
        result.length = 0;
        return result;
    }
    memcpy(result.ptr, string.ptr, string.length);
    result.ptr[string.length] = '\0';
    return result;
}

static bool str_isspace(char c) {
    // TODO: remove once UCX has public API for this
    return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v' || c == '\f';
}

cxstring cx_strtrim(cxstring string) {
    cxstring result = string;
    // TODO: optimize by comparing multiple bytes at once
    while (result.length > 0 && str_isspace(*result.ptr)) {
        result.ptr++;
        result.length--;
    }
    while (result.length > 0 && str_isspace(result.ptr[result.length - 1])) {
        result.length--;
    }
    return result;
}

cxmutstr cx_strtrim_m(cxmutstr string) {
    cxstring result = cx_strtrim(cx_strcast(string));
    return (cxmutstr) {(char *) result.ptr, result.length};
}

bool cx_strprefix(
        cxstring string,
        cxstring prefix
) {
    if (string.length < prefix.length) return false;
    return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
}

bool cx_strsuffix(
        cxstring string,
        cxstring suffix
) {
    if (string.length < suffix.length) return false;
    return memcmp(string.ptr + string.length - suffix.length,
                  suffix.ptr, suffix.length) == 0;
}

bool cx_strcaseprefix(
        cxstring string,
        cxstring prefix
) {
    if (string.length < prefix.length) return false;
#ifdef _WIN32
    return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
#else
    return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
#endif
}

bool cx_strcasesuffix(
        cxstring string,
        cxstring suffix
) {
    if (string.length < suffix.length) return false;
#ifdef _WIN32
    return _strnicmp(string.ptr+string.length-suffix.length,
                  suffix.ptr, suffix.length) == 0;
#else
    return strncasecmp(string.ptr + string.length - suffix.length,
                       suffix.ptr, suffix.length) == 0;
#endif
}

void cx_strlower(cxmutstr string) {
    for (size_t i = 0; i < string.length; i++) {
        if ((unsigned int) (string.ptr[i] - 'A') < 26u) {
            string.ptr[i] += 'a' - 'A';
        }
    }
}

void cx_strupper(cxmutstr string) {
    for (size_t i = 0; i < string.length; i++) {
        if ((unsigned int) (string.ptr[i] - 'a') < 26u) {
            string.ptr[i] += 'A' - 'a';
        }
    }
}

#ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE
#define CX_STRREPLACE_INDEX_BUFFER_SIZE 64
#endif

struct cx_strreplace_ibuf {
    size_t *buf;
    struct cx_strreplace_ibuf *next;
    unsigned int len;
};

static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
    // remember, the first data is on the stack!
    buf = buf->next;
    while (buf) {
        struct cx_strreplace_ibuf *next = buf->next;
        free(buf->buf);
        free(buf);
        buf = next;
    }
}

cxmutstr cx_strreplacen_a(
        const CxAllocator *allocator,
        cxstring str,
        cxstring pattern,
        cxstring replacement,
        size_t replmax
) {

    if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
        return cx_strdup_a(allocator, str);

    // Compute expected buffer length
    size_t ibufmax = str.length / pattern.length;
    size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
    if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) {
        ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE;
    }

    // First index buffer can be on the stack
    struct cx_strreplace_ibuf ibuf, *curbuf = &ibuf;
    size_t ibuf_sbo[CX_STRREPLACE_INDEX_BUFFER_SIZE];
    ibuf.buf = ibuf_sbo;
    ibuf.next = NULL;
    ibuf.len = 0;

    // Search occurrences
    cxstring searchstr = str;
    size_t found = 0;
    do {
        cxstring match = cx_strstr(searchstr, pattern);
        if (match.length > 0) {
            // Allocate next buffer in chain, if required
            if (curbuf->len == ibuflen) {
                struct cx_strreplace_ibuf *nextbuf =
                        calloc(1, sizeof(struct cx_strreplace_ibuf));
                if (!nextbuf) {
                    cx_strrepl_free_ibuf(&ibuf);
                    return cx_mutstrn(NULL, 0);
                }
                nextbuf->buf = calloc(ibuflen, sizeof(size_t));
                if (!nextbuf->buf) {
                    free(nextbuf);
                    cx_strrepl_free_ibuf(&ibuf);
                    return cx_mutstrn(NULL, 0);
                }
                curbuf->next = nextbuf;
                curbuf = nextbuf;
            }

            // Record match index
            found++;
            size_t idx = match.ptr - str.ptr;
            curbuf->buf[curbuf->len++] = idx;
            searchstr.ptr = match.ptr + pattern.length;
            searchstr.length = str.length - idx - pattern.length;
        } else {
            break;
        }
    } while (searchstr.length > 0 && found < replmax);

    // Allocate result string
    cxmutstr result;
    {
        ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
        size_t rcount = 0;
        curbuf = &ibuf;
        do {
            rcount += curbuf->len;
            curbuf = curbuf->next;
        } while (curbuf);
        result.length = str.length + rcount * adjlen;
        result.ptr = cxMalloc(allocator, result.length + 1);
        if (!result.ptr) {
            cx_strrepl_free_ibuf(&ibuf);
            return cx_mutstrn(NULL, 0);
        }
    }

    // Build result string
    curbuf = &ibuf;
    size_t srcidx = 0;
    char *destptr = result.ptr;
    do {
        for (size_t i = 0; i < curbuf->len; i++) {
            // Copy source part up to next match
            size_t idx = curbuf->buf[i];
            size_t srclen = idx - srcidx;
            if (srclen > 0) {
                memcpy(destptr, str.ptr + srcidx, srclen);
                destptr += srclen;
                srcidx += srclen;
            }

            // Copy the replacement and skip the source pattern
            srcidx += pattern.length;
            memcpy(destptr, replacement.ptr, replacement.length);
            destptr += replacement.length;
        }
        curbuf = curbuf->next;
    } while (curbuf);
    memcpy(destptr, str.ptr + srcidx, str.length - srcidx);

    // Result is guaranteed to be zero-terminated
    result.ptr[result.length] = '\0';

    // Free index buffer
    cx_strrepl_free_ibuf(&ibuf);

    return result;
}

CxStrtokCtx cx_strtok_(
        cxstring str,
        cxstring delim,
        size_t limit
) {
    CxStrtokCtx ctx;
    ctx.str = str;
    ctx.delim = delim;
    ctx.limit = limit;
    ctx.pos = 0;
    ctx.next_pos = 0;
    ctx.delim_pos = 0;
    ctx.found = 0;
    ctx.delim_more = NULL;
    ctx.delim_more_count = 0;
    return ctx;
}

bool cx_strtok_next(
        CxStrtokCtx *ctx,
        cxstring *token
) {
    // abortion criteria
    if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) {
        return false;
    }

    // determine the search start
    cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos);

    // search the next delimiter
    cxstring delim = cx_strstr(haystack, ctx->delim);

    // if found, make delim capture exactly the delimiter
    if (delim.length > 0) {
        delim.length = ctx->delim.length;
    }

    // if more delimiters are specified, check them now
    if (ctx->delim_more_count > 0) {
        for (size_t i = 0; i < ctx->delim_more_count; i++) {
            cxstring d = cx_strstr(haystack, ctx->delim_more[i]);
            if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) {
                delim.ptr = d.ptr;
                delim.length = ctx->delim_more[i].length;
            }
        }
    }

    // store the token information and adjust the context
    ctx->found++;
    ctx->pos = ctx->next_pos;
    token->ptr = &ctx->str.ptr[ctx->pos];
    ctx->delim_pos = delim.length == 0 ?
                     ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr);
    token->length = ctx->delim_pos - ctx->pos;
    ctx->next_pos = ctx->delim_pos + delim.length;

    return true;
}

bool cx_strtok_next_m(
        CxStrtokCtx *ctx,
        cxmutstr *token
) {
    return cx_strtok_next(ctx, (cxstring *) token);
}

void cx_strtok_delim(
        CxStrtokCtx *ctx,
        const cxstring *delim,
        size_t count
) {
    ctx->delim_more = delim;
    ctx->delim_more_count = count;
}

#define cx_strtoX_signed_impl(rtype, rmin, rmax) \
    long long result; \
    if (cx_strtoll_lc(str, &result, base, groupsep)) { \
        return -1; \
    } \
    if (result < rmin || result > rmax) { \
        errno = ERANGE; \
        return -1; \
    } \
    *output = (rtype) result; \
    return 0

int cx_strtos_lc_(cxstring str, short *output, int base, const char *groupsep) {
    cx_strtoX_signed_impl(short, SHRT_MIN, SHRT_MAX);
}

int cx_strtoi_lc_(cxstring str, int *output, int base, const char *groupsep) {
    cx_strtoX_signed_impl(int, INT_MIN, INT_MAX);
}

int cx_strtol_lc_(cxstring str, long *output, int base, const char *groupsep) {
    cx_strtoX_signed_impl(long, LONG_MIN, LONG_MAX);
}

int cx_strtoll_lc_(cxstring str, long long *output, int base, const char *groupsep) {
    // strategy: parse as unsigned, check range, negate if required
    bool neg = false;
    size_t start_unsigned = 0;

    // trim already, to search for a sign character
    str = cx_strtrim(str);
    if (str.length == 0) {
        errno = EINVAL;
        return -1;
    }

    // test if we have a negative sign character
    if (str.ptr[start_unsigned] == '-') {
        neg = true;
        start_unsigned++;
        // must not be followed by positive sign character
        if (str.length == 1 || str.ptr[start_unsigned] == '+') {
            errno = EINVAL;
            return -1;
        }
    }

    // now parse the number with strtoull
    unsigned long long v;
    cxstring ustr = start_unsigned == 0 ? str
        : cx_strn(str.ptr + start_unsigned, str.length - start_unsigned);
    int ret = cx_strtoull_lc(ustr, &v, base, groupsep);
    if (ret != 0) return ret;
    if (neg) {
        if (v - 1 > LLONG_MAX) {
            errno = ERANGE;
            return -1;
        }
        *output = -(long long) v;
        return 0;
    } else {
        if (v > LLONG_MAX) {
            errno = ERANGE;
            return -1;
        }
        *output = (long long) v;
        return 0;
    }
}

int cx_strtoi8_lc_(cxstring str, int8_t *output, int base, const char *groupsep) {
    cx_strtoX_signed_impl(int8_t, INT8_MIN, INT8_MAX);
}

int cx_strtoi16_lc_(cxstring str, int16_t *output, int base, const char *groupsep) {
    cx_strtoX_signed_impl(int16_t, INT16_MIN, INT16_MAX);
}

int cx_strtoi32_lc_(cxstring str, int32_t *output, int base, const char *groupsep) {
    cx_strtoX_signed_impl(int32_t, INT32_MIN, INT32_MAX);
}

int cx_strtoi64_lc_(cxstring str, int64_t *output, int base, const char *groupsep) {
    assert(sizeof(long long) == sizeof(int64_t)); // should be true on all platforms
    return cx_strtoll_lc(str, (long long*) output, base, groupsep);
}

int cx_strtoz_lc_(cxstring str, ssize_t *output, int base, const char *groupsep) {
#if SSIZE_MAX == INT32_MAX
    return cx_strtoi32_lc_(str, (int32_t*) output, base, groupsep);
#elif SSIZE_MAX == INT64_MAX
    return cx_strtoll_lc_(str, (long long*) output, base, groupsep);
#else
#error "unsupported ssize_t size"
#endif
}

#define cx_strtoX_unsigned_impl(rtype, rmax) \
    uint64_t result; \
    if (cx_strtou64_lc(str, &result, base, groupsep)) { \
        return -1; \
    } \
    if (result > rmax) { \
        errno = ERANGE; \
        return -1; \
    } \
    *output = (rtype) result; \
    return 0

int cx_strtous_lc_(cxstring str, unsigned short *output, int base, const char *groupsep) {
    cx_strtoX_unsigned_impl(unsigned short, USHRT_MAX);
}

int cx_strtou_lc_(cxstring str, unsigned int *output, int base, const char *groupsep) {
    cx_strtoX_unsigned_impl(unsigned int, UINT_MAX);
}

int cx_strtoul_lc_(cxstring str, unsigned long *output, int base, const char *groupsep) {
    cx_strtoX_unsigned_impl(unsigned long, ULONG_MAX);
}

int cx_strtoull_lc_(cxstring str, unsigned long long *output, int base, const char *groupsep) {
    // some sanity checks
    str = cx_strtrim(str);
    if (str.length == 0) {
        errno = EINVAL;
        return -1;
    }
    if (!(base == 2 || base == 8 || base == 10 || base == 16)) {
        errno = EINVAL;
        return -1;
    }
    if (groupsep == NULL) groupsep = "";

    // find the actual start of the number
    if (str.ptr[0] == '+') {
        str.ptr++;
        str.length--;
        if (str.length == 0) {
            errno = EINVAL;
            return -1;
        }
    }
    size_t start = 0;

    // if base is 2 or 16, some leading stuff may appear
    if (base == 2) {
        if ((str.ptr[0] | 32) == 'b') {
            start = 1;
        } else if (str.ptr[0] == '0' && str.length > 1) {
            if ((str.ptr[1] | 32) == 'b') {
                start = 2;
            }
        }
    } else if (base == 16) {
        if ((str.ptr[0] | 32) == 'x' || str.ptr[0] == '#') {
            start = 1;
        } else if (str.ptr[0] == '0' && str.length > 1) {
            if ((str.ptr[1] | 32) == 'x') {
                start = 2;
            }
        }
    }

    // check if there are digits left
    if (start >= str.length) {
        errno = EINVAL;
        return -1;
    }

    // now parse the number
    unsigned long long result = 0;
    for (size_t i = start; i < str.length; i++) {
        // ignore group separators
        if (strchr(groupsep, str.ptr[i])) continue;

        // determine the digit value of the character
        unsigned char c = str.ptr[i];
        if (c >= 'a') c = 10 + (c - 'a');
        else if (c >= 'A') c = 10 + (c - 'A');
        else if (c >= '0') c = c - '0';
        else c = 255;
        if (c >= base) {
            errno = EINVAL;
            return -1;
        }

        // now combine the digit with what we already have
        unsigned long right = (result & 0xff) * base + c;
        unsigned long long left = (result >> 8) * base + (right >> 8);
        if (left > (ULLONG_MAX >> 8)) {
            errno = ERANGE;
            return -1;
        }
        result = (left << 8) + (right & 0xff);
    }

    *output = result;
    return 0;
}

int cx_strtou8_lc_(cxstring str, uint8_t *output, int base, const char *groupsep) {
    cx_strtoX_unsigned_impl(uint8_t, UINT8_MAX);
}

int cx_strtou16_lc_(cxstring str, uint16_t *output, int base, const char *groupsep) {
    cx_strtoX_unsigned_impl(uint16_t, UINT16_MAX);
}

int cx_strtou32_lc_(cxstring str, uint32_t *output, int base, const char *groupsep) {
    cx_strtoX_unsigned_impl(uint32_t, UINT32_MAX);
}

int cx_strtou64_lc_(cxstring str, uint64_t *output, int base, const char *groupsep) {
    assert(sizeof(unsigned long long) == sizeof(uint64_t)); // should be true on all platforms
    return cx_strtoull_lc(str, (unsigned long long*) output, base, groupsep);
}

int cx_strtouz_lc_(cxstring str, size_t *output, int base, const char *groupsep) {
#if SIZE_MAX == UINT32_MAX
    return cx_strtou32_lc_(str, (uint32_t*) output, base, groupsep);
#elif SIZE_MAX == UINT64_MAX
    return cx_strtoull_lc_(str, (unsigned long long *) output, base, groupsep);
#else
#error "unsupported size_t size"
#endif
}

int cx_strtof_lc_(cxstring str, float *output, char decsep, const char *groupsep) {
    // use string to double and add a range check
    double d;
    int ret = cx_strtod_lc_(str, &d, decsep, groupsep);
    if (ret != 0) return ret;
    // note: FLT_MIN is the smallest POSITIVE number that can be represented
    double test = d < 0 ? -d : d;
    if (test < FLT_MIN || test > FLT_MAX) {
        errno = ERANGE;
        return -1;
    }
    *output = (float) d;
    return 0;
}

static bool str_isdigit(char c) {
    // TODO: remove once UCX has public API for this
    return c >= '0' && c <= '9';
}

int cx_strtod_lc_(cxstring str, double *output, char decsep, const char *groupsep) {
    // TODO: overflow check
    // TODO: increase precision

    // trim and check
    str = cx_strtrim(str);
    if (str.length == 0) {
        errno = EINVAL;
        return -1;
    }

    double result = 0.;
    int sign = 1;

    // check if there is a sign
    if (str.ptr[0] == '-') {
        sign = -1;
        str.ptr++;
        str.length--;
    } else if (str.ptr[0] == '+') {
        str.ptr++;
        str.length--;
    }

    // there must be at least one char to parse
    if (str.length == 0) {
        errno = EINVAL;
        return -1;
    }

    // parse all digits until we find the decsep
    size_t pos = 0;
    do {
        if (str_isdigit(str.ptr[pos])) {
            result = result * 10 + (str.ptr[pos] - '0');
        } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
            break;
        }
    } while (++pos < str.length);

    // already done?
    if (pos == str.length) {
        *output = result * sign;
        return 0;
    }

    // is the next char the decsep?
    if (str.ptr[pos] == decsep) {
        pos++;
        // it may end with the decsep, if it did not start with it
        if (pos == str.length) {
            if (str.length == 1) {
                errno = EINVAL;
                return -1;
            } else {
                *output = result * sign;
                return 0;
            }
        }
        // parse everything until exponent or end
        double factor = 1.;
        do {
            if (str_isdigit(str.ptr[pos])) {
                factor *= 0.1;
                result = result + factor * (str.ptr[pos] - '0');
            } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
                break;
            }
        } while (++pos < str.length);
    }

    // no exponent?
    if (pos == str.length) {
        *output = result * sign;
        return 0;
    }

    // now the next separator MUST be the exponent separator
    // and at least one char must follow
    if ((str.ptr[pos] | 32) != 'e' || str.length <= pos + 1) {
        errno = EINVAL;
        return -1;
    }
    pos++;

    // check if we have a sign for the exponent
    double factor = 10.;
    if (str.ptr[pos] == '-') {
        factor = .1;
        pos++;
    } else if (str.ptr[pos] == '+') {
        pos++;
    }

    // at least one digit must follow
    if (pos == str.length) {
        errno = EINVAL;
        return -1;
    }

    // parse the exponent
    unsigned int exp = 0;
    do {
        if (str_isdigit(str.ptr[pos])) {
            exp = 10 * exp + (str.ptr[pos] - '0');
        } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
            errno = EINVAL;
            return -1;
        }
    } while (++pos < str.length);

    // apply the exponent by fast exponentiation
    do {
        if (exp & 1) {
            result *= factor;
        }
        factor *= factor;
    } while ((exp >>= 1) > 0);

    // store the result and exit
    *output = result * sign;
    return 0;
}

mercurial