2022-08-29
first proposal for the string header
src/CMakeLists.txt | file | annotate | diff | comparison | revisions | |
src/cx/string.h | file | annotate | diff | comparison | revisions | |
src/string.c | file | annotate | diff | comparison | revisions |
--- a/src/CMakeLists.txt Fri Aug 12 16:56:41 2022 +0200 +++ b/src/CMakeLists.txt Mon Aug 29 20:54:42 2022 +0200 @@ -1,6 +1,7 @@ set(sources utils.c allocator.c + string.c list.c linked_list.c tree.c @@ -12,6 +13,7 @@ set(headers cx/common.h cx/utils.h + cx/string.h cx/allocator.h cx/iterator.h cx/list.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cx/string.h Mon Aug 29 20:54:42 2022 +0200 @@ -0,0 +1,771 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +/** + * \file string.h + * \brief Strings that know their length. + * \author Mike Becker + * \author Olaf Wintermann + * \version 3.0 + * \copyright 2-Clause BSD License + */ + +#ifndef UCX_STRING_H +#define UCX_STRING_H + +#include "common.h" +#include "allocator.h" + +/** + * The UCX string structure. + */ +struct { + /** + * A pointer to the string. + * \note The string is not necessarily \c NULL terminated. + * Always use the length. + */ + char *ptr; + /** The length of the string */ + size_t length; +} cx_mutstr_s; + +/** + * A mutable string. + */ +typedef struct cx_mutstr_s cxmutstr; + +/** + * The UCX string structure for immutable (constant) strings. + */ +struct { + /** + * A pointer to the immutable string. + * \note The string is not necessarily \c NULL terminated. + * Always use the length. + */ + char const *ptr; + /** The length of the string */ + size_t length; +} cx_string_s; + +/** + * An immutable string. + */ +typedef struct cx_string_s cxstring; + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Wraps a mutable string that must be zero-terminated. + * + * The length is implicitly inferred by using a call to \c strlen(). + * As a special case, a \c NULL argument is treated like an empty string. + * + * \note the wrapped string will share the specified pointer to the string. + * If you do want a copy, use cx_strdup() on the return value of this function. + * + * If you need to wrap a constant string, use cx_str(). + * + * @param cstring the string to wrap, must be zero-terminated (or \c NULL) + * @return the wrapped string + * + * @see cx_mutstrn() + */ +__attribute__((__warn_unused_result__)) +cxmutstr cx_mutstr(char *cstring); + +/** + * Wraps a string that does not need to be zero-terminated. + * + * The argument may be \c NULL if the length is zero. + * + * \note the wrapped string will share the specified pointer to the string. + * If you do want a copy, use cx_strdup() on the return value of this function. + * + * If you need to wrap a constant string, use cx_strn(). + * + * @param cstring the string to wrap (or \c NULL, if the length is zero) + * @param length the length of the string + * @return the wrapped string + * + * @see cx_mutstr() + */ +__attribute__((__warn_unused_result__)) +cxmutstr cx_mutstrn( + char *cstring, + size_t length +); + +/** + * Wraps a string that must be zero-terminated. + * + * The length is implicitly inferred by using a call to \c strlen(). + * As a special case, a \c NULL argument is treated like an empty string. + * + * \note the wrapped string will share the specified pointer to the string. + * If you do want a copy, use cx_strdup() on the return value of this function. + * + * If you need to wrap a non-constant string, use cx_mutstr(). + * + * @param cstring the string to wrap, must be zero-terminated (or \c NULL) + * @return the wrapped string + * + * @see cx_strn() + */ +__attribute__((__warn_unused_result__)) +cxstring cx_str(char const *cstring); + + +/** + * Wraps a string that does not need to be zero-terminated. + * + * The argument may be \c NULL if the length is zero. + * + * \note the wrapped string will share the specified pointer to the string. + * If you do want a copy, use cx_strdup() on the return value of this function. + * + * If you need to wrap a non-constant string, use cx_mutstrn(). + * + * @param cstring the string to wrap (or \c NULL, if the length is zero) + * @param length the length of the string + * @return the wrapped string + * + * @see cx_str() + */ +__attribute__((__warn_unused_result__)) +cxstring cx_strn( + char const *cstring, + size_t length +); + +/** +* Casts a mutable string to an immutable string. +* +* \note This is not seriously a cast. Instead you get a copy +* of the struct with the desired pointer type. Both structs still +* point to the same location, though! +* +* @param str the mutable string to cast +* @return an immutable copy of the string pointer +*/ +__attribute__((__warn_unused_result__)) +cxstring cx_strcast(cxmutstr str); + +/** + * Passes the pointer in this string to \c free(). + * + * The pointer in the struct is set to \c NULL and the length is set to zero. + * + * \note There is no implementation for cxstring, because it is unlikely that + * you ever have a \c char \c const* you are really supposed to free. If you + * encounter such situation, you should double-check your code. + * + * @param str the string to free + */ +void cx_strfree(cxmutstr *str); + +/** + * Returns the accumulated length of all specified strings. + * + * \attention if the count argument is larger than the number of the + * specified strings, the behavior is undefined. + * + * @param count the total number of specified strings + * @param ... all strings + * @return the accumulated length of all strings + */ +__attribute__((__warn_unused_result__)) +size_t cx_strlen( + size_t count, + ... +); + +/** + * Concatenates two or more strings. + * + * The resulting string will be allocated by the specified allocator. + * So developers \em must pass the return value to cx_strfree() eventually. + * + * \note It is guaranteed that there is only one allocation. + * + * @param alloc the allocator to use + * @param count the total number of strings to concatenate + * @param ... all strings + * @return the concatenated string + */ +__attribute__((__warn_unused_result__, __nonnull__)) +cxmutstr cx_strcat_a( + CxAllocator *alloc, + size_t count, + ... +); + +/** + * Concatenates two or more strings. + * + * The resulting string will be allocated by standard \c malloc(). + * So developers \em must pass the return value to cx_strfree() eventually. + * + * @param count the total number of strings to concatenate + * @param ... all strings + * @return the concatenated string + */ +#define cx_strcat(count, ...) \ +cx_strcat_a(cxDefaultAllocator, count, __VA_ARGS__) + +/** + * Returns a substring starting at the specified location. + * + * \attention the new string references the same memory area as the + * input string and is usually \em not zero-terminated. + * Use cx_strdup() to get a copy. + * + * @param string input string + * @param start start location of the substring + * @return a substring of \p string starting at \p start + * + * @see cx_strsubsl() + * @see cx_strsubs_m() + * @see cx_strsubsl_m() + */ +__attribute__((__warn_unused_result__)) +cxstring cx_strsubs( + cxstring string, + size_t start +); + +/** + * Returns a substring starting at the specified location. + * + * The returned string will be limited to \p length bytes or the number + * of bytes available in \p string, whichever is smaller. + * + * \attention the new string references the same memory area as the + * input string and is usually \em not zero-terminated. + * Use cx_strdup() to get a copy. + * + * @param string input string + * @param start start location of the substring + * @param length the maximum length of the returned string + * @return a substring of \p string starting at \p start + * + * @see cx_strsubs() + * @see cx_strsubs_m() + * @see cx_strsubsl_m() + */ +__attribute__((__warn_unused_result__)) +cxstring cx_strsubsl( + cxstring string, + size_t start, + size_t length +); + +/** + * Returns a substring starting at the specified location. + * + * \attention the new string references the same memory area as the + * input string and is usually \em not zero-terminated. + * Use cx_strdup() to get a copy. + * + * @param string input string + * @param start start location of the substring + * @return a substring of \p string starting at \p start + * + * @see cx_strsubsl_m() + * @see cx_strsubs() + * @see cx_strsubsl() + */ +__attribute__((__warn_unused_result__)) +cxmutstr cx_strsubs_m( + cxmutstr string, + size_t start +); + +/** + * Returns a substring starting at the specified location. + * + * The returned string will be limited to \p length bytes or the number + * of bytes available in \p string, whichever is smaller. + * + * \attention the new string references the same memory area as the + * input string and is usually \em not zero-terminated. + * Use cx_strdup() to get a copy. + * + * @param string input string + * @param start start location of the substring + * @param length the maximum length of the returned string + * @return a substring of \p string starting at \p start + * + * @see cx_strsubs_m() + * @see cx_strsubs() + * @see cx_strsubsl() + */ +__attribute__((__warn_unused_result__)) +cxmutstr cx_strsubsl_m( + cxmutstr string, + size_t start, + size_t length +); + +/** + * Returns a substring starting at the location of the first occurrence of the + * specified character. + * + * If the string does not contain the character, an empty string is returned. + * + * @param string the string where to locate the character + * @param chr the character to locate + * @return a substring starting at the first location of \p chr + * + * @see cx_strchr_m() + */ +__attribute__((__warn_unused_result__)) +cxstring cx_strchr( + cxstring string, + int chr +); + +/** + * Returns a substring starting at the location of the first occurrence of the + * specified character. + * + * If the string does not contain the character, an empty string is returned. + * + * @param string the string where to locate the character + * @param chr the character to locate + * @return a substring starting at the first location of \p chr + * + * @see cx_strchr() + */ +__attribute__((__warn_unused_result__)) +cxmutstr cx_strchr_m( + cxmutstr string, + int chr +); + +/** + * Returns a substring starting at the location of the last occurrence of the + * specified character. + * + * If the string does not contain the character, an empty string is returned. + * + * @param string the string where to locate the character + * @param chr the character to locate + * @return a substring starting at the last location of \p chr + * + * @see cx_strrchr_m() + */ +__attribute__((__warn_unused_result__)) +cxstring cx_strrchr( + cxstring string, + int chr +); + +/** + * Returns a substring starting at the location of the last occurrence of the + * specified character. + * + * If the string does not contain the character, an empty string is returned. + * + * @param string the string where to locate the character + * @param chr the character to locate + * @return a substring starting at the last location of \p chr + * + * @see cx_strrchr() + */ +__attribute__((__warn_unused_result__)) +cxmutstr cx_strrchr_m( + cxmutstr string, + int chr +); + +/** + * Returns a substring starting at the location of the first occurrence of the + * specified string. + * + * If \p haystack does not contain \p needle, an empty string is returned. + * + * If \p needle is an empty string, the complete \p haystack is + * returned. + * + * @param haystack the string to be scanned + * @param needle string containing the sequence of characters to match + * @return a substring starting at the first occurrence of + * \p needle, or an empty string, if the sequence is not + * contained + * @see cx_strstr_m() + */ +__attribute__((__warn_unused_result__)) +cxstring cx_strstr( + cxstring haystack, + cxstring needle +); + +/** + * Returns a substring starting at the location of the first occurrence of the + * specified string. + * + * If \p haystack does not contain \p needle, an empty string is returned. + * + * If \p needle is an empty string, the complete \p haystack is + * returned. + * + * @param haystack the string to be scanned + * @param needle string containing the sequence of characters to match + * @return a substring starting at the first occurrence of + * \p needle, or an empty string, if the sequence is not + * contained + * @see cx_strstr() + */ +__attribute__((__warn_unused_result__)) +cxmutstr cx_strstr_m( + cxmutstr haystack, + cxstring needle +); + +/** + * Splits a given string using a delimiter string. + * + * \note The resulting array contains strings that point to the source + * \p string. Use cx_strdup() to get copies. + * + * @param string the string to split + * @param delim the delimiter + * @param limit the maximum number of split items + * @param output a pre-allocated array of at least \p limit length + * @return the actual number of split items + */ +__attribute__((__warn_unused_result__, __nonnull__)) +size_t cx_strsplit( + cxstring string, + cxstring delim, + size_t limit, + cxstring *output +); + +/** + * Splits a given string using a delimiter string. + * + * The array pointed to by \p output will be allocated by \p allocator. + * + * \note The resulting array contains strings that point to the source + * \p string. Use cx_strdup() to get copies. + * + * \attention If allocation fails, the \c NULL pointer will be written to + * \p output and the number returned will be zero. + * + * @param allocator the allocator to use for allocating the resulting array + * @param string the string to split + * @param delim the delimiter + * @param limit the maximum number of split items + * @param output a pointer where the address of the allocated array shall be + * written to + * @return the actual number of split items + */ +__attribute__((__warn_unused_result__, __nonnull__)) +size_t cx_strsplit_a( + CxAllocator *allocator, + cxstring string, + cxstring delim, + size_t limit, + cxstring **output +); + + +/** + * Splits a given string using a delimiter string. + * + * \note The resulting array contains strings that point to the source + * \p string. Use cx_strdup() to get copies. + * + * @param string the string to split + * @param delim the delimiter + * @param limit the maximum number of split items + * @param output a pre-allocated array of at least \p limit length + * @return the actual number of split items + */ +__attribute__((__warn_unused_result__, __nonnull__)) +size_t cx_strsplit_m( + cxmutstr string, + cxstring delim, + size_t limit, + cxmutstr *output +); + +/** + * Splits a given string using a delimiter string. + * + * The array pointed to by \p output will be allocated by \p allocator. + * + * \note The resulting array contains strings that point to the source + * \p string. Use cx_strdup() to get copies. + * + * \attention If allocation fails, the \c NULL pointer will be written to + * \p output and the number returned will be zero. + * + * @param allocator the allocator to use for allocating the resulting array + * @param string the string to split + * @param delim the delimiter + * @param limit the maximum number of split items + * @param output a pointer where the address of the allocated array shall be + * written to + * @return the actual number of split items + */ +__attribute__((__warn_unused_result__, __nonnull__)) +size_t cx_strsplit_ma( + CxAllocator *allocator, + cxmutstr string, + cxstring delim, + size_t limit, + cxmutstr **output +); + +/** + * Compares two strings. + * + * @param s1 the first string + * @param s2 the second string + * @return negative if \p s1 is smaller than \p s2, positive if \p s1 is larger + * than \p s2, zero if both strings equal + */ +__attribute__((__warn_unused_result__)) +int cx_strcmp( + cxstring s1, + cxstring s2 +); + +/** + * Compares two strings ignoring case. + * + * @param s1 the first string + * @param s2 the second string + * @return negative if \p s1 is smaller than \p s2, positive if \p s1 is larger + * than \p s2, zero if both strings equal ignoring case + */ +__attribute__((__warn_unused_result__)) +int cx_strcasecmp( + cxstring s1, + cxstring s2 +); + +/** + * Creates a duplicate of the specified string. + * + * The new string will contain a copy allocated by standard + * \c malloc(). So developers \em must pass the return value to cx_strfree(). + * + * \note The returned string is guaranteed to be zero-terminated and can safely + * be passed to other APIs. + * + * @param string the string to duplicate + * @return a duplicate of the string + * @see cx_strdup_a() + */ +__attribute__((__warn_unused_result__)) +cxmutstr cx_strdup(cxstring string); + +/** + * Creates a duplicate of the specified string. + * + * The new string will contain a copy allocated by \p allocator. + * + * \note The returned string is guaranteed to be zero-terminated and can safely + * be passed to other APIs. + * + * @param allocator the allocator to use + * @param string the string to duplicate + * @return a duplicate of the string + * @see cx_strdup() + */ +__attribute__((__warn_unused_result__, __nonnull__)) +cxmutstr cx_strdup_a( + CxAllocator *allocator, + cxstring string +); + +/** + * Omits leading and trailing spaces. + * + * \note the returned string references the same memory, thus you + * must \em not free the returned memory. + * + * @param string the string that shall be trimmed + * @return the trimmed string + */ +__attribute__((__warn_unused_result__)) +cxstring cx_strtrim(cxstring string); + +/** + * Omits leading and trailing spaces. + * + * \note the returned string references the same memory, thus you + * must \em not free the returned memory. + * + * @param string the string that shall be trimmed + * @return the trimmed string + */ +__attribute__((__warn_unused_result__)) +cxmutstr cx_strtrim_m(cxmutstr string); + +/** + * Checks, if a string has a specific prefix. + * + * @param string the string to check + * @param prefix the prefix the string should have + * @return \c true, if and only if the string has the specified prefix, + * \c false otherwise + */ +__attribute__((__warn_unused_result__)) +bool cx_strprefix( + cxstring string, + cxstring prefix +); + +/** + * Checks, if a string has a specific suffix. + * + * @param string the string to check + * @param suffix the suffix the string should have + * @return \c true, if and only if the string has the specified suffix, + * \c false otherwise + */ +__attribute__((__warn_unused_result__)) +int cx_strsuffix( + cxstring string, + cxstring suffix +); + +/** + * Checks, if a string has a specific prefix, ignoring the case. + * + * @param string the string to check + * @param prefix the prefix the string should have + * @return \c true, if and only if the string has the specified prefix, + * \c false otherwise + */ +__attribute__((__warn_unused_result__)) +int cx_strcaseprefix( + cxstring string, + cxstring prefix +); + +/** + * Checks, if a string has a specific suffix, ignoring the case. + * + * @param string the string to check + * @param suffix the suffix the string should have + * @return \c true, if and only if the string has the specified suffix, + * \c false otherwise + */ +__attribute__((__warn_unused_result__)) +int cx_strcasesuffix( + cxstring string, + cxstring suffix +); + +/** + * Converts the string to lower case. + * + * The change is made in-place. If you want a copy, use cx_strdup(), first. + * + * @param string the string to modify + * @see cx_strdup() + */ +void cx_strlower(cxmutstr string); + +/** + * Converts the string to upper case. + * + * The change is made in-place. If you want a copy, use cx_strdup(), first. + * + * @param string the string to modify + * @see cx_strdup() + */ +void cx_strupper(cxmutstr string); + +/** + * Replaces a pattern in a string with another string. + * + * The pattern is taken literally and is no regular expression. + * Replaces at most \p replmax occurrences. + * + * The returned string will be allocated by \c malloc() and \em must be passed + * to cx_strfree() eventually. + * + * If allocation fails, or the input string is empty, + * the returned string will point to \c NULL. + * + * @param str the string where replacements should be applied + * @param pattern the pattern to search for + * @param replacement the replacement string + * @param replmax maximum number of replacements + * @return the resulting string after applying the replacements + */ +__attribute__((__warn_unused_result__)) +cxmutstr cx_strreplace( + cxstring str, + cxstring pattern, + cxstring replacement, + size_t replmax +); + +/** + * Replaces a pattern in a string with another string. + * + * The pattern is taken literally and is no regular expression. + * Replaces at most \p replmax occurrences. + * + * The returned string will be allocated by \p allocator. + * + * If allocation fails, or the input string is empty, + * the returned string will point to \c NULL. + * + * @param allocator the allocator to use + * @param str the string where replacements should be applied + * @param pattern the pattern to search for + * @param replacement the replacement string + * @param replmax maximum number of replacements + * @return the resulting string after applying the replacements + */ +__attribute__((__warn_unused_result__, __nonnull__)) +cxmutstr cx_strreplace_a( + CxAllocator *allocator, + cxstring str, + cxstring pattern, + cxstring replacement, + size_t replmax +); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif //UCX_STRING_H
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/string.c Mon Aug 29 20:54:42 2022 +0200 @@ -0,0 +1,29 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cx/string.h"