src/string.c

Sat, 03 Sep 2022 14:56:07 +0200

author
Mike Becker <universe@uap-core.de>
date
Sat, 03 Sep 2022 14:56:07 +0200
changeset 581
c067394737ca
parent 580
aac47db8da0b
child 582
96fa7fa6af4f
permissions
-rw-r--r--

implement more string functions

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "cx/string.h"
#include "cx/utils.h"

#include <string.h>
#include <stdarg.h>
#include <stdint.h>
#include <ctype.h>

#ifndef _WIN32

#include <strings.h> /* for strncasecmp() */

#endif /* _WIN32 */

cxmutstr cx_mutstr(char *cstring) {
    return (cxmutstr) {cstring, strlen(cstring)};
}

cxmutstr cx_mutstrn(
        char *cstring,
        size_t length
) {
    return (cxmutstr) {cstring, length};
}

cxstring cx_str(const char *cstring) {
    return (cxstring) {cstring, strlen(cstring)};
}

cxstring cx_strn(
        const char *cstring,
        size_t length
) {
    return (cxstring) {cstring, length};
}

cxstring cx_strcast(cxmutstr str) {
    return (cxstring) {str.ptr, str.length};
}

void cx_strfree(cxmutstr *str) {
    free(str->ptr);
    str->ptr = NULL;
    str->length = 0;
}

size_t cx_strlen(
        size_t count,
        ...
) {
    if (count == 0) return 0;

    va_list ap;
    va_start(ap, count);
    size_t size = 0;
    cx_for_n(i, count) {
        cxstring str = va_arg(ap, cxstring);
        size += str.length;
    }
    va_end(ap);

    return size;
}

cxmutstr cx_strcat_a(
        CxAllocator *alloc,
        size_t count,
        ...
) {
    cxstring *strings = calloc(count, sizeof(cxstring));
    if (!strings) abort();

    va_list ap;
    va_start(ap, count);

    // get all args and overall length
    size_t slen = 0;
    cx_for_n(i, count) {
        cxstring s = va_arg (ap, cxstring);
        strings[i] = s;
        slen += s.length;
    }

    // create new string
    cxmutstr result;
    result.ptr = cxMalloc(alloc, slen + 1);
    result.length = slen;
    if (result.ptr == NULL) abort();

    // concatenate strings
    size_t pos = 0;
    cx_for_n(i, count) {
        cxstring s = strings[i];
        memcpy(result.ptr + pos, s.ptr, s.length);
        pos += s.length;
    }

    // terminate string
    result.ptr[result.length] = '\0';

    // free temporary array
    free(strings);

    return result;
}

cxstring cx_strsubs(
        cxstring string,
        size_t start
) {
    return cx_strsubsl(string, start, string.length - start);
}

cxmutstr cx_strsubs_m(
        cxmutstr string,
        size_t start
) {
    return cx_strsubsl_m(string, start, string.length - start);
}

cxstring cx_strsubsl(
        cxstring string,
        size_t start,
        size_t length
) {
    if (start > string.length) {
        return (cxstring) {NULL, 0};
    }

    size_t rem_len = string.length - start;
    if (length > rem_len) {
        length = rem_len;
    }

    return (cxstring) {string.ptr + start, length};
}

cxmutstr cx_strsubsl_m(
        cxmutstr string,
        size_t start,
        size_t length
) {
    cxstring result = cx_strsubsl(cx_strcast(string), start, length);
    return (cxmutstr) {(char *) result.ptr, result.length};
}

cxstring cx_strchr(
        cxstring string,
        int chr
) {
    chr = 0xFF & chr;
    // TODO: improve by comparing multiple bytes at once
    cx_for_n(i, string.length) {
        if (string.ptr[i] == chr) {
            return cx_strsubs(string, i);
        }
    }
    return (cxstring) {NULL, 0};
}

cxmutstr cx_strchr_m(
        cxmutstr string,
        int chr
) {
    cxstring result = cx_strchr(cx_strcast(string), chr);
    return (cxmutstr) {(char *) result.ptr, result.length};
}

cxstring cx_strrchr(
        cxstring string,
        int chr
) {
    chr = 0xFF & chr;
    size_t i = string.length;
    while (i > 0) {
        i--;
        // TODO: improve by comparing multiple bytes at once
        if (string.ptr[i] == chr) {
            return cx_strsubs(string, i);
        }
    }
    return (cxstring) {NULL, 0};
}

cxmutstr cx_strrchr_m(
        cxmutstr string,
        int chr
) {
    cxstring result = cx_strrchr(cx_strcast(string), chr);
    return (cxmutstr) {(char *) result.ptr, result.length};
}

#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
    ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])

#define ptable_w(useheap, ptable, index, src) do {\
    if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
    else ((size_t*)ptable)[index] = src;\
    } while (0)


cxstring cx_strstr(
        cxstring haystack,
        cxstring needle
) {
    if (needle.length == 0) {
        return haystack;
    }

    /*
     * IMPORTANT:
     * Our prefix table contains the prefix length PLUS ONE
     * this is our decision, because we want to use the full range of size_t.
     * The original algorithm needs a (-1) at one single place,
     * and we want to avoid that.
     */

    /* static prefix table */
    static uint8_t s_prefix_table[512];

    /* check pattern length and use appropriate prefix table */
    /* if the pattern exceeds static prefix table, allocate on the heap */
    register int useheap = needle.length >= 512;
    register void *ptable = useheap ? calloc(needle.length + 1,
                                             sizeof(size_t)) : s_prefix_table;

    /* keep counter in registers */
    register size_t i, j;

    /* fill prefix table */
    i = 0;
    j = 0;
    ptable_w(useheap, ptable, i, j);
    while (i < needle.length) {
        while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
            ptable_r(j, useheap, ptable, j - 1);
        }
        i++;
        j++;
        ptable_w(useheap, ptable, i, j);
    }

    /* search */
    cxstring result = {NULL, 0};
    i = 0;
    j = 1;
    while (i < haystack.length) {
        while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
            ptable_r(j, useheap, ptable, j - 1);
        }
        i++;
        j++;
        if (j - 1 == needle.length) {
            size_t start = i - needle.length;
            result.ptr = haystack.ptr + start;
            result.length = haystack.length - start;
            break;
        }
    }

    /* if prefix table was allocated on the heap, free it */
    if (ptable != s_prefix_table) {
        free(ptable);
    }

    return result;
}

cxmutstr cx_strstr_m(
        cxmutstr haystack,
        cxstring needle
) {
    cxstring result = cx_strstr(cx_strcast(haystack), needle);
    return (cxmutstr) {(char *) result.ptr, result.length};
}

size_t cx_strsplit(
        cxstring string,
        cxstring delim,
        size_t limit,
        cxstring *output
) {
    // TODO: implement
    return 0;
}

size_t cx_strsplit_a(
        CxAllocator *allocator,
        cxstring string,
        cxstring delim,
        size_t limit,
        cxstring **output
) {
    // TODO: implement
    return 0;
}

size_t cx_strsplit_m(
        cxmutstr string,
        cxstring delim,
        size_t limit,
        cxmutstr *output
) {
    return cx_strsplit(cx_strcast(string),
                       delim, limit, (cxstring *) output);
}

size_t cx_strsplit_ma(
        CxAllocator *allocator,
        cxmutstr string,
        cxstring delim,
        size_t limit,
        cxmutstr **output
) {
    return cx_strsplit_a(allocator, cx_strcast(string),
                         delim, limit, (cxstring **) output);
}

int cx_strcmp(cxstring s1, cxstring s2) {
    if (s1.length == s2.length) {
        return memcmp(s1.ptr, s2.ptr, s1.length);
    } else if (s1.length > s2.length) {
        return 1;
    } else {
        return -1;
    }
}

int cx_strcasecmp(cxstring s1, cxstring s2) {
    if (s1.length == s2.length) {
#ifdef _WIN32
        return _strnicmp(s1.ptr, s2.ptr, s1.length);
#else
        return strncasecmp(s1.ptr, s2.ptr, s1.length);
#endif
    } else if (s1.length > s2.length) {
        return 1;
    } else {
        return -1;
    }
}

cxmutstr cx_strdup_a(CxAllocator *allocator, cxstring string) {
    cxmutstr result = {
            cxMalloc(allocator, string.length + 1),
            string.length
    };
    if (result.ptr == NULL) {
        result.length = 0;
        return result;
    }
    memcpy(result.ptr, string.ptr, string.length);
    result.ptr[string.length] = '\0';
    return result;
}

cxstring cx_strtrim(cxstring string) {
    cxstring result = string;
    // TODO: optimize by comparing multiple bytes at once
    while (result.length > 0 && isspace(*result.ptr)) {
        result.ptr++;
        result.length--;
    }
    while (result.length > 0 && isspace(result.ptr[result.length - 1])) {
        result.length--;
    }
    return result;
}

cxmutstr cx_strtrim_m(cxmutstr string) {
    cxstring result = cx_strtrim(cx_strcast(string));
    return (cxmutstr) {(char *) result.ptr, result.length};
}

bool cx_strprefix(cxstring string, cxstring prefix) {
    if (string.length < prefix.length) return false;
    return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
}

bool cx_strsuffix(cxstring string, cxstring suffix) {
    if (string.length < suffix.length) return false;
    return memcmp(string.ptr + string.length - suffix.length,
                  suffix.ptr, suffix.length) == 0;
}

bool cx_casestrprefix(cxstring string, cxstring prefix) {
    if (string.length < prefix.length) return false;
#ifdef _WIN32
    return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
#else
    return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
#endif
}

bool cx_casestrsuffix(cxstring string, cxstring suffix) {
    if (string.length < suffix.length) return false;
#ifdef _WIN32
    return _strnicmp(string.ptr+string.length-suffix.length,
                  suffix.ptr, suffix.length) == 0;
#else
    return strncasecmp(string.ptr + string.length - suffix.length,
                       suffix.ptr, suffix.length) == 0;
#endif
}

mercurial