# HG changeset patch # User Mike Becker # Date 1738085477 -3600 # Node ID 148b7c7ccaf92576a31f77f1b5216ce08cb7224d # Parent 8ff82697f2c3577f50408653aaeb18fd360b8337# Parent 68ff0839bc6a9574f3b5e08df2a0164433041d9f merge recent changes into docs branch diff -r 8ff82697f2c3 -r 148b7c7ccaf9 src/Makefile --- a/src/Makefile Sat Jan 25 15:22:01 2025 +0100 +++ b/src/Makefile Tue Jan 28 18:31:17 2025 +0100 @@ -34,13 +34,16 @@ shared: $(build_dir)/libucx$(SHLIB_EXT) -check-coverage: $(SRC:%.c=$(build_dir)/%.gcda) - gcovr --html-details $(build_dir)/coverage.html \ +check-coverage: $(SRC:%.c=$(build_dir)/%.gcda) $(build_dir)/coverage + gcovr --html-details $(build_dir)/coverage/ucx.html \ --object-directory $(build_dir) \ --root $(root_dir)/src \ --exclude-directories $(build_dir)/tests \ $(build_dir) +$(build_dir)/coverage: + $(MKDIR) $@ + $(build_dir)/%.gcda: test -f "$@" diff -r 8ff82697f2c3 -r 148b7c7ccaf9 src/array_list.c --- a/src/array_list.c Sat Jan 25 15:22:01 2025 +0100 +++ b/src/array_list.c Tue Jan 28 18:31:17 2025 +0100 @@ -856,32 +856,42 @@ } } -static ssize_t cx_arl_find_remove( +static size_t cx_arl_find_remove( struct cx_list_s *list, const void *elem, bool remove ) { + assert(list != NULL); assert(list->collection.cmpfunc != NULL); - assert(list->collection.size < SIZE_MAX / 2); + if (list->collection.size == 0) return 0; char *cur = ((const cx_array_list *) list)->data; - for (ssize_t i = 0; i < (ssize_t) list->collection.size; i++) { + // optimize with binary search, when sorted + if (list->collection.sorted) { + size_t i = cx_array_binary_search( + cur, + list->collection.size, + list->collection.elem_size, + elem, + list->collection.cmpfunc + ); + if (remove && i < list->collection.size) { + cx_arl_remove(list, i, 1, NULL); + } + return i; + } + + // fallback: linear search + for (size_t i = 0; i < list->collection.size; i++) { if (0 == list->collection.cmpfunc(elem, cur)) { if (remove) { - if (1 == cx_arl_remove(list, i, 1, NULL)) { - return i; - } else { - // should be unreachable - return -1; // LCOV_EXCL_LINE - } - } else { - return i; + cx_arl_remove(list, i, 1, NULL); } + return i; } cur += list->collection.elem_size; } - - return -1; + return list->collection.size; } static void cx_arl_sort(struct cx_list_s *list) { diff -r 8ff82697f2c3 -r 148b7c7ccaf9 src/cx/collection.h --- a/src/cx/collection.h Sat Jan 25 15:22:01 2025 +0100 +++ b/src/cx/collection.h Tue Jan 28 18:31:17 2025 +0100 @@ -92,6 +92,11 @@ * instead of copies of the actual objects. */ bool store_pointer; + /** + * Indicates if this collection is guaranteed to be sorted. + * Note that the elements can still be sorted, even when the collection is not aware of that. + */ + bool sorted; }; /** diff -r 8ff82697f2c3 -r 148b7c7ccaf9 src/cx/common.h --- a/src/cx/common.h Sat Jan 25 15:22:01 2025 +0100 +++ b/src/cx/common.h Tue Jan 28 18:31:17 2025 +0100 @@ -120,22 +120,6 @@ #endif // --------------------------------------------------------------------------- -// Missing Defines -// --------------------------------------------------------------------------- - -#ifndef SSIZE_MAX // not defined in glibc since C23 and MSVC -#if CX_WORDSIZE == 64 -/** - * The maximum representable value in ssize_t. - */ -#define SSIZE_MAX 0x7fffffffffffffffll -#else -#define SSIZE_MAX 0x7fffffffl -#endif -#endif - - -// --------------------------------------------------------------------------- // Attribute definitions // --------------------------------------------------------------------------- @@ -365,10 +349,6 @@ // --------------------------------------------------------------------------- #ifdef _MSC_VER -// fix missing ssize_t definition -#include -typedef SSIZE_T ssize_t; - // fix missing _Thread_local support #define _Thread_local __declspec(thread) #endif // _MSC_VER diff -r 8ff82697f2c3 -r 148b7c7ccaf9 src/cx/linked_list.h --- a/src/cx/linked_list.h Sat Jan 25 15:22:01 2025 +0100 +++ b/src/cx/linked_list.h Tue Jan 28 18:31:17 2025 +0100 @@ -111,44 +111,25 @@ ); /** - * Finds the index of an element within a linked list. + * Finds the node containing an element within a linked list. * * @param start a pointer to the start node * @param loc_advance the location of the pointer to advance * @param loc_data the location of the @c data pointer within your node struct * @param cmp_func a compare function to compare @p elem against the node data * @param elem a pointer to the element to find - * @return the index of the element or a negative value if it could not be found + * @param found_index an optional pointer where the index of the found node + * (given that @p start has index 0) is stored + * @return the index of the element, if found - unspecified if not found */ -cx_attr_nonnull -ssize_t cx_linked_list_find( +cx_attr_nonnull_arg(1, 4, 5) +void *cx_linked_list_find( const void *start, ptrdiff_t loc_advance, ptrdiff_t loc_data, cx_compare_func cmp_func, - const void *elem -); - -/** - * Finds the node containing an element within a linked list. - * - * @param result a pointer to the memory where the node pointer (or @c NULL if the element - * could not be found) shall be stored to - * @param start a pointer to the start node - * @param loc_advance the location of the pointer to advance - * @param loc_data the location of the @c data pointer within your node struct - * @param cmp_func a compare function to compare @p elem against the node data - * @param elem a pointer to the element to find - * @return the index of the element or a negative value if it could not be found - */ -cx_attr_nonnull -ssize_t cx_linked_list_find_node( - void **result, - const void *start, - ptrdiff_t loc_advance, - ptrdiff_t loc_data, - cx_compare_func cmp_func, - const void *elem + const void *elem, + size_t *found_index ); /** diff -r 8ff82697f2c3 -r 148b7c7ccaf9 src/cx/list.h --- a/src/cx/list.h Sat Jan 25 15:22:01 2025 +0100 +++ b/src/cx/list.h Tue Jan 28 18:31:17 2025 +0100 @@ -165,7 +165,7 @@ /** * Member function for finding and optionally removing an element. */ - ssize_t (*find_remove)( + size_t (*find_remove)( struct cx_list_s *list, const void *elem, bool remove @@ -362,6 +362,7 @@ CxList *list, const void *elem ) { + list->collection.sorted = false; return list->cl->insert_element(list, list->collection.size, elem); } @@ -387,6 +388,7 @@ const void *array, size_t n ) { + list->collection.sorted = false; return list->cl->insert_array(list, list->collection.size, array, n); } @@ -409,12 +411,15 @@ size_t index, const void *elem ) { + list->collection.sorted = false; return list->cl->insert_element(list, index, elem); } /** * Inserts an item into a sorted list. * + * If the list is not sorted already, the behavior is undefined. + * * @param list the list * @param elem a pointer to the element to add * @retval zero success @@ -425,6 +430,7 @@ CxList *list, const void *elem ) { + list->collection.sorted = true; // guaranteed by definition const void *data = list->collection.store_pointer ? &elem : elem; return list->cl->insert_sorted(list, data, 1) == 0; } @@ -455,6 +461,7 @@ const void *array, size_t n ) { + list->collection.sorted = false; return list->cl->insert_array(list, index, array, n); } @@ -470,6 +477,8 @@ * If this list is storing pointers instead of objects @p array is expected to * be an array of pointers. * + * If the list is not sorted already, the behavior is undefined. + * * @param list the list * @param array a pointer to the elements to add * @param n the number of elements to add @@ -481,6 +490,7 @@ const void *array, size_t n ) { + list->collection.sorted = true; // guaranteed by definition return list->cl->insert_sorted(list, array, n); } @@ -505,7 +515,9 @@ CxIterator *iter, const void *elem ) { - return ((struct cx_list_s *) iter->src_handle.m)->cl->insert_iter(iter, elem, 0); + CxList* list = iter->src_handle.m; + list->collection.sorted = false; + return list->cl->insert_iter(iter, elem, 0); } /** @@ -529,7 +541,9 @@ CxIterator *iter, const void *elem ) { - return ((struct cx_list_s *) iter->src_handle.m)->cl->insert_iter(iter, elem, 1); + CxList* list = iter->src_handle.m; + list->collection.sorted = false; + return list->cl->insert_iter(iter, elem, 1); } /** @@ -630,6 +644,7 @@ */ cx_attr_nonnull static inline void cxListClear(CxList *list) { + list->collection.sorted = true; // empty lists are always sorted list->cl->clear(list); } @@ -652,6 +667,7 @@ size_t i, size_t j ) { + list->collection.sorted = false; return list->cl->swap(list, i, j); } @@ -819,12 +835,12 @@ * * @param list the list * @param elem the element to find - * @return the index of the element or a negative - * value when the element is not found + * @return the index of the element or the size of the list when the element is not found + * @see cxListIndexValid() */ cx_attr_nonnull cx_attr_nodiscard -static inline ssize_t cxListFind( +static inline size_t cxListFind( const CxList *list, const void *elem ) { @@ -832,17 +848,32 @@ } /** + * Checks if the specified index is within bounds. + * + * @param list the list + * @param index the index + * @retval true if the index is within bounds + * @retval false if the index is out of bounds + */ +cx_attr_nonnull +cx_attr_nodiscard +static inline bool cxListIndexValid(const CxList *list, size_t index) { + return index < list->collection.size; +} + +/** * Removes and returns the index of the first element that equals @p elem. * * Determining equality is performed by the list's comparator function. * * @param list the list * @param elem the element to find and remove - * @return the index of the now removed element or a negative - * value when the element is not found or could not be removed + * @return the index of the now removed element or the list size + * when the element is not found or could not be removed + * @see cxListIndexValid() */ cx_attr_nonnull -static inline ssize_t cxListFindRemove( +static inline size_t cxListFindRemove( CxList *list, const void *elem ) { @@ -859,6 +890,7 @@ cx_attr_nonnull static inline void cxListSort(CxList *list) { list->cl->sort(list); + list->collection.sorted = true; } /** @@ -868,6 +900,8 @@ */ cx_attr_nonnull static inline void cxListReverse(CxList *list) { + // still sorted, but not according to the cmp_func + list->collection.sorted = false; list->cl->reverse(list); } diff -r 8ff82697f2c3 -r 148b7c7ccaf9 src/cx/string.h --- a/src/cx/string.h Sat Jan 25 15:22:01 2025 +0100 +++ b/src/cx/string.h Tue Jan 28 18:31:17 2025 +0100 @@ -1296,23 +1296,6 @@ * @retval non-zero conversion was not possible */ cx_attr_access_w(2) cx_attr_nonnull_arg(2) -int cx_strtoz_lc_(cxstring str, ssize_t *output, int base, const char *groupsep); - -/** - * Converts a string to a number. - * - * The function returns non-zero when conversion is not possible. - * In that case the function sets errno to EINVAL when the reason is an invalid character or an unsupported base. - * It sets errno to ERANGE when the target datatype is too small. - * - * @param str the string to convert - * @param output a pointer to the integer variable where the result shall be stored - * @param base 2, 8, 10, or 16 - * @param groupsep each character in this string is treated as group separator and ignored during conversion - * @retval zero success - * @retval non-zero conversion was not possible - */ -cx_attr_access_w(2) cx_attr_nonnull_arg(2) int cx_strtous_lc_(cxstring str, unsigned short *output, int base, const char *groupsep); /** @@ -1449,7 +1432,7 @@ * @retval non-zero conversion was not possible */ cx_attr_access_w(2) cx_attr_nonnull_arg(2) -int cx_strtouz_lc_(cxstring str, size_t *output, int base, const char *groupsep); +int cx_strtoz_lc_(cxstring str, size_t *output, int base, const char *groupsep); /** * Converts a string to a single precision floating point number. @@ -1627,22 +1610,6 @@ * @retval zero success * @retval non-zero conversion was not possible */ -#define cx_strtoz_lc(str, output, base, groupsep) cx_strtoz_lc_(cx_strcast(str), output, base, groupsep) - -/** - * Converts a string to a number. - * - * The function returns non-zero when conversion is not possible. - * In that case the function sets errno to EINVAL when the reason is an invalid character or an unsupported base. - * It sets errno to ERANGE when the target datatype is too small. - * - * @param str the string to convert - * @param output a pointer to the integer variable where the result shall be stored - * @param base 2, 8, 10, or 16 - * @param groupsep (@c const @c char*) each character in this string is treated as group separator and ignored during conversion - * @retval zero success - * @retval non-zero conversion was not possible - */ #define cx_strtous_lc(str, output, base, groupsep) cx_strtous_lc_(cx_strcast(str), output, base, groupsep) /** @@ -1771,7 +1738,7 @@ * @retval zero success * @retval non-zero conversion was not possible */ -#define cx_strtouz_lc(str, output, base, groupsep) cx_strtouz_lc_(cx_strcast(str), output, base, groupsep) +#define cx_strtoz_lc(str, output, base, groupsep) cx_strtoz_lc_(cx_strcast(str), output, base, groupsep) /** * Converts a string to a number. @@ -1781,7 +1748,7 @@ * It sets errno to ERANGE when the target datatype is too small. * * The comma character is treated as group separator and ignored during parsing. - * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()). + * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtoz_lc()). * * @param str the string to convert * @param output a pointer to the integer variable where the result shall be stored @@ -1799,7 +1766,7 @@ * It sets errno to ERANGE when the target datatype is too small. * * The comma character is treated as group separator and ignored during parsing. - * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()). + * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtoz_lc()). * * @param str the string to convert * @param output a pointer to the integer variable where the result shall be stored @@ -1817,7 +1784,7 @@ * It sets errno to ERANGE when the target datatype is too small. * * The comma character is treated as group separator and ignored during parsing. - * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()). + * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtoz_lc()). * * @param str the string to convert * @param output a pointer to the integer variable where the result shall be stored @@ -1835,7 +1802,7 @@ * It sets errno to ERANGE when the target datatype is too small. * * The comma character is treated as group separator and ignored during parsing. - * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()). + * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtoz_lc()). * * @param str the string to convert * @param output a pointer to the integer variable where the result shall be stored @@ -1853,7 +1820,7 @@ * It sets errno to ERANGE when the target datatype is too small. * * The comma character is treated as group separator and ignored during parsing. - * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()). + * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtoz_lc()). * * @param str the string to convert * @param output a pointer to the integer variable where the result shall be stored @@ -1871,7 +1838,7 @@ * It sets errno to ERANGE when the target datatype is too small. * * The comma character is treated as group separator and ignored during parsing. - * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()). + * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtoz_lc()). * * @param str the string to convert * @param output a pointer to the integer variable where the result shall be stored @@ -1889,7 +1856,7 @@ * It sets errno to ERANGE when the target datatype is too small. * * The comma character is treated as group separator and ignored during parsing. - * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()). + * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtoz_lc()). * * @param str the string to convert * @param output a pointer to the integer variable where the result shall be stored @@ -1907,7 +1874,7 @@ * It sets errno to ERANGE when the target datatype is too small. * * The comma character is treated as group separator and ignored during parsing. - * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()). + * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtoz_lc()). * * @param str the string to convert * @param output a pointer to the integer variable where the result shall be stored @@ -1925,7 +1892,7 @@ * It sets errno to ERANGE when the target datatype is too small. * * The comma character is treated as group separator and ignored during parsing. - * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()). + * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtoz_lc()). * * @param str the string to convert * @param output a pointer to the integer variable where the result shall be stored @@ -1943,7 +1910,7 @@ * It sets errno to ERANGE when the target datatype is too small. * * The comma character is treated as group separator and ignored during parsing. - * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()). + * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtoz_lc()). * * @param str the string to convert * @param output a pointer to the integer variable where the result shall be stored @@ -1961,7 +1928,7 @@ * It sets errno to ERANGE when the target datatype is too small. * * The comma character is treated as group separator and ignored during parsing. - * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()). + * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtoz_lc()). * * @param str the string to convert * @param output a pointer to the integer variable where the result shall be stored @@ -1979,7 +1946,7 @@ * It sets errno to ERANGE when the target datatype is too small. * * The comma character is treated as group separator and ignored during parsing. - * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()). + * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtoz_lc()). * * @param str the string to convert * @param output a pointer to the integer variable where the result shall be stored @@ -1997,7 +1964,7 @@ * It sets errno to ERANGE when the target datatype is too small. * * The comma character is treated as group separator and ignored during parsing. - * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()). + * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtoz_lc()). * * @param str the string to convert * @param output a pointer to the integer variable where the result shall be stored @@ -2015,7 +1982,7 @@ * It sets errno to ERANGE when the target datatype is too small. * * The comma character is treated as group separator and ignored during parsing. - * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()). + * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtoz_lc()). * * @param str the string to convert * @param output a pointer to the integer variable where the result shall be stored @@ -2033,7 +2000,7 @@ * It sets errno to ERANGE when the target datatype is too small. * * The comma character is treated as group separator and ignored during parsing. - * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()). + * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtoz_lc()). * * @param str the string to convert * @param output a pointer to the integer variable where the result shall be stored @@ -2051,7 +2018,7 @@ * It sets errno to ERANGE when the target datatype is too small. * * The comma character is treated as group separator and ignored during parsing. - * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()). + * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtoz_lc()). * * @param str the string to convert * @param output a pointer to the integer variable where the result shall be stored @@ -2069,7 +2036,7 @@ * It sets errno to ERANGE when the target datatype is too small. * * The comma character is treated as group separator and ignored during parsing. - * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()). + * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtoz_lc()). * * @param str the string to convert * @param output a pointer to the integer variable where the result shall be stored @@ -2080,24 +2047,6 @@ #define cx_strtou64(str, output, base) cx_strtou64_lc_(cx_strcast(str), output, base, ",") /** - * Converts a string to a number. - * - * The function returns non-zero when conversion is not possible. - * In that case the function sets errno to EINVAL when the reason is an invalid character or an unsupported base. - * It sets errno to ERANGE when the target datatype is too small. - * - * The comma character is treated as group separator and ignored during parsing. - * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()). - * - * @param str the string to convert - * @param output a pointer to the integer variable where the result shall be stored - * @param base 2, 8, 10, or 16 - * @retval zero success - * @retval non-zero conversion was not possible - */ -#define cx_strtouz(str, output, base) cx_strtouz_lc_(cx_strcast(str), output, base, ",") - -/** * Converts a string to a single precision floating point number. * * The function returns non-zero when conversion is not possible. diff -r 8ff82697f2c3 -r 148b7c7ccaf9 src/json.c --- a/src/json.c Sat Jan 25 15:22:01 2025 +0100 +++ b/src/json.c Tue Jan 28 18:31:17 2025 +0100 @@ -353,6 +353,72 @@ return CX_JSON_INCOMPLETE_DATA; } +// converts a Unicode codepoint to utf8 +static unsigned codepoint_to_utf8(uint32_t codepoint, char *output_buf) { + if (codepoint <= 0x7F) { + *output_buf = (char)codepoint; + return 1; + } else if (codepoint <= 0x7FF) { + output_buf[0] = (char)(0xC0 | ((codepoint >> 6) & 0x1F)); + output_buf[1] = (char)(0x80 | (codepoint & 0x3F)); + return 2; + } else if (codepoint <= 0xFFFF) { + output_buf[0] = (char)(0xE0 | ((codepoint >> 12) & 0x0F)); + output_buf[1] = (char)(0x80 | ((codepoint >> 6) & 0x3F)); + output_buf[2] = (char)(0x80 | (codepoint & 0x3F)); + return 3; + } else if (codepoint <= 0x10FFFF) { + output_buf[0] = (char)(0xF0 | ((codepoint >> 18) & 0x07)); + output_buf[1] = (char)(0x80 | ((codepoint >> 12) & 0x3F)); + output_buf[2] = (char)(0x80 | ((codepoint >> 6) & 0x3F)); + output_buf[3] = (char)(0x80 | (codepoint & 0x3F)); + return 4; + } + + return 0; // LCOV_EXCL_LINE +} + +// converts a utf16 surrogate pair to utf8 +static inline uint32_t utf16pair_to_codepoint(uint16_t c0, uint16_t c1) { + return ((c0 - 0xD800) << 10) + (c1 - 0xDC00) + 0x10000; +} + +static unsigned unescape_unicode_string(cxstring str, char *utf8buf) { + // str is supposed to start with "\uXXXX" or "\uXXXX\uXXXX" + // remaining bytes in the string are ignored (str may be larger!) + + if (str.length < 6 || str.ptr[0] != '\\' || str.ptr[1] != 'u') { + return 0; + } + + unsigned utf8len = 0; + cxstring ustr1 = { str.ptr + 2, 4}; + uint16_t utf16a, utf16b; + if (!cx_strtou16_lc(ustr1, &utf16a, 16, "")) { + uint32_t codepoint; + if (utf16a < 0xD800 || utf16a > 0xE000) { + // character is in the Basic Multilingual Plane + // and encoded as a single utf16 char + codepoint = utf16a; + utf8len = codepoint_to_utf8(codepoint, utf8buf); + } else if (utf16a >= 0xD800 && utf16a <= 0xDBFF) { + // character is encoded as a surrogate pair + // get next 6 bytes + if (str.length >= 12) { + if (str.ptr[6] == '\\' && str.ptr[7] == 'u') { + cxstring ustr2 = { str.ptr+8, 4 }; + if (!cx_strtou16_lc(ustr2, &utf16b, 16, "") + && utf16b >= 0xDC00 && utf16b <= 0xDFFF) { + codepoint = utf16pair_to_codepoint(utf16a, utf16b); + utf8len = codepoint_to_utf8(codepoint, utf8buf); + } + } + } + } + } + return utf8len; +} + static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) { // note: this function expects that str contains the enclosing quotes! @@ -368,6 +434,8 @@ u = false; if (c == 'n') { c = '\n'; + } else if (c == '"') { + c = '"'; } else if (c == 't') { c = '\t'; } else if (c == 'r') { @@ -380,10 +448,31 @@ c = '\f'; } else if (c == 'b') { c = '\b'; + } else if (c == 'u') { + char utf8buf[4]; + unsigned utf8len = unescape_unicode_string( + cx_strn(str.ptr + i - 1, str.length + 1 - i), + utf8buf + ); + if(utf8len > 0) { + i += utf8len < 4 ? 4 : 10; + // add all bytes from utf8buf except the last char + // to the result (last char will be added below) + utf8len--; + c = utf8buf[utf8len]; + for (unsigned x = 0; x < utf8len; x++) { + result.ptr[result.length++] = utf8buf[x]; + } + } else { + // decoding failed, ignore the entire sequence + result.ptr[result.length++] = '\\'; + } + } else { + // TODO: discuss the behavior for unrecognized escape sequences + // most parsers throw an error here - we just ignore it + result.ptr[result.length++] = '\\'; } - // TODO: support \uXXXX escape sequences - // TODO: discuss the behavior for unrecognized escape sequences - // most parsers throw an error here + result.ptr[result.length++] = c; } else { if (c == '\\') { diff -r 8ff82697f2c3 -r 148b7c7ccaf9 src/linked_list.c --- a/src/linked_list.c Sat Jan 25 15:22:01 2025 +0100 +++ b/src/linked_list.c Tue Jan 28 18:31:17 2025 +0100 @@ -56,48 +56,33 @@ return (void *) cur; } -ssize_t cx_linked_list_find( +void *cx_linked_list_find( const void *start, ptrdiff_t loc_advance, ptrdiff_t loc_data, cx_compare_func cmp_func, - const void *elem + const void *elem, + size_t *found_index ) { - void *dummy; - return cx_linked_list_find_node( - &dummy, start, - loc_advance, loc_data, - cmp_func, elem - ); -} - -ssize_t cx_linked_list_find_node( - void **result, - const void *start, - ptrdiff_t loc_advance, - ptrdiff_t loc_data, - cx_compare_func cmp_func, - const void *elem -) { - assert(result != NULL); assert(start != NULL); assert(loc_advance >= 0); assert(loc_data >= 0); assert(cmp_func); - const void *node = start; - ssize_t index = 0; + void *node = (void*) start; + size_t index = 0; do { void *current = ll_data(node); if (cmp_func(current, elem) == 0) { - *result = (void *) node; - return index; + if (found_index != NULL) { + *found_index = index; + } + return node; } node = ll_advance(node); index++; } while (node != NULL); - *result = NULL; - return -1; + return NULL; } void *cx_linked_list_first( @@ -930,35 +915,30 @@ return node == NULL ? NULL : node->payload; } -static ssize_t cx_ll_find_remove( +static size_t cx_ll_find_remove( struct cx_list_s *list, const void *elem, bool remove ) { + size_t index; + cx_linked_list *ll = ((cx_linked_list *) list); + cx_linked_list_node *node = cx_linked_list_find( + ll->begin, + CX_LL_LOC_NEXT, CX_LL_LOC_DATA, + list->collection.cmpfunc, elem, + &index + ); + if (node == NULL) { + return list->collection.size; + } if (remove) { - cx_linked_list *ll = ((cx_linked_list *) list); - cx_linked_list_node *node; - ssize_t index = cx_linked_list_find_node( - (void **) &node, - ll->begin, - CX_LL_LOC_NEXT, CX_LL_LOC_DATA, - list->collection.cmpfunc, elem - ); - if (node != NULL) { - cx_invoke_destructor(list, node->payload); - cx_linked_list_remove((void **) &ll->begin, (void **) &ll->end, - CX_LL_LOC_PREV, CX_LL_LOC_NEXT, node); - list->collection.size--; - cxFree(list->collection.allocator, node); - } - return index; - } else { - return cx_linked_list_find( - ((cx_linked_list *) list)->begin, - CX_LL_LOC_NEXT, CX_LL_LOC_DATA, - list->collection.cmpfunc, elem - ); + cx_invoke_destructor(list, node->payload); + cx_linked_list_remove((void **) &ll->begin, (void **) &ll->end, + CX_LL_LOC_PREV, CX_LL_LOC_NEXT, node); + list->collection.size--; + cxFree(list->collection.allocator, node); } + return index; } static void cx_ll_sort(struct cx_list_s *list) { diff -r 8ff82697f2c3 -r 148b7c7ccaf9 src/list.c --- a/src/list.c Sat Jan 25 15:22:01 2025 +0100 +++ b/src/list.c Tue Jan 28 18:31:17 2025 +0100 @@ -128,13 +128,13 @@ return ptr == NULL ? NULL : *ptr; } -static ssize_t cx_pl_find_remove( +static size_t cx_pl_find_remove( struct cx_list_s *list, const void *elem, bool remove ) { cx_pl_hack_cmpfunc(list); - ssize_t ret = list->climpl->find_remove(list, &elem, remove); + size_t ret = list->climpl->find_remove(list, &elem, remove); cx_pl_unhack_cmpfunc(list); return ret; } @@ -207,12 +207,12 @@ return NULL; } -static ssize_t cx_emptyl_find_remove( +static size_t cx_emptyl_find_remove( cx_attr_unused struct cx_list_s *list, cx_attr_unused const void *elem, cx_attr_unused bool remove ) { - return -1; + return 0; } static bool cx_emptyl_iter_valid(cx_attr_unused const void *iter) { @@ -249,18 +249,19 @@ }; CxList cx_empty_list = { - { - NULL, - NULL, - 0, - 0, - NULL, - NULL, - NULL, - false - }, - &cx_empty_list_class, - NULL + { + NULL, + NULL, + 0, + 0, + NULL, + NULL, + NULL, + false, + true, + }, + &cx_empty_list_class, + NULL }; CxList *const cxEmptyList = &cx_empty_list; diff -r 8ff82697f2c3 -r 148b7c7ccaf9 src/map.c --- a/src/map.c Sat Jan 25 15:22:01 2025 +0100 +++ b/src/map.c Tue Jan 28 18:31:17 2025 +0100 @@ -66,17 +66,18 @@ }; CxMap cx_empty_map = { - { - NULL, - NULL, - 0, - 0, - NULL, - NULL, - NULL, - false - }, - &cx_empty_map_class + { + NULL, + NULL, + 0, + 0, + NULL, + NULL, + NULL, + false, + true + }, + &cx_empty_map_class }; CxMap *const cxEmptyMap = &cx_empty_map; diff -r 8ff82697f2c3 -r 148b7c7ccaf9 src/string.c --- a/src/string.c Sat Jan 25 15:22:01 2025 +0100 +++ b/src/string.c Tue Jan 28 18:31:17 2025 +0100 @@ -220,14 +220,9 @@ cxstring string, int chr ) { - chr = 0xFF & chr; - // TODO: improve by comparing multiple bytes at once - for (size_t i = 0; i < string.length; i++) { - if (string.ptr[i] == chr) { - return cx_strsubs(string, i); - } - } - return (cxstring) {NULL, 0}; + char *ret = memchr(string.ptr, 0xFF & chr, string.length); + if (ret == NULL) return (cxstring) {NULL, 0}; + return (cxstring) {ret, string.length - (ret - string.ptr)}; } cxmutstr cx_strchr_m( @@ -693,7 +688,7 @@ // Allocate result string cxmutstr result; { - ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; + long long adjlen = (long long) replacement.length - (long long) pattern.length; size_t rcount = 0; curbuf = &ibuf; do { @@ -847,8 +842,7 @@ bool neg = false; size_t start_unsigned = 0; - // trim already, to search for a sign character - str = cx_strtrim(str); + // emptiness check if (str.length == 0) { errno = EINVAL; return -1; @@ -905,16 +899,6 @@ return cx_strtoll_lc(str, (long long*) output, base, groupsep); } -int cx_strtoz_lc_(cxstring str, ssize_t *output, int base, const char *groupsep) { -#if SSIZE_MAX == INT32_MAX - return cx_strtoi32_lc_(str, (int32_t*) output, base, groupsep); -#elif SSIZE_MAX == INT64_MAX - return cx_strtoll_lc_(str, (long long*) output, base, groupsep); -#else -#error "unsupported ssize_t size" -#endif -} - #define cx_strtoX_unsigned_impl(rtype, rmax) \ uint64_t result; \ if (cx_strtou64_lc(str, &result, base, groupsep)) { \ @@ -941,7 +925,6 @@ int cx_strtoull_lc_(cxstring str, unsigned long long *output, int base, const char *groupsep) { // some sanity checks - str = cx_strtrim(str); if (str.length == 0) { errno = EINVAL; return -1; @@ -1036,7 +1019,7 @@ return cx_strtoull_lc(str, (unsigned long long*) output, base, groupsep); } -int cx_strtouz_lc_(cxstring str, size_t *output, int base, const char *groupsep) { +int cx_strtoz_lc_(cxstring str, size_t *output, int base, const char *groupsep) { #if SIZE_MAX == UINT32_MAX return cx_strtou32_lc_(str, (uint32_t*) output, base, groupsep); #elif SIZE_MAX == UINT64_MAX @@ -1070,8 +1053,7 @@ // TODO: overflow check // TODO: increase precision - // trim and check - str = cx_strtrim(str); + // emptiness check if (str.length == 0) { errno = EINVAL; return -1; diff -r 8ff82697f2c3 -r 148b7c7ccaf9 tests/test_json.c --- a/tests/test_json.c Sat Jan 25 15:22:01 2025 +0100 +++ b/tests/test_json.c Tue Jan 28 18:31:17 2025 +0100 @@ -120,7 +120,8 @@ CX_TEST(test_json_escaped_strings) { cxstring text = cx_str( "{\n" - "\t\"object\":\"{\\n\\t\\\"object\\\":null\\n}\"}\"\n" + "\t\"object\":\"{\\n\\t\\\"object\\\":null\\n}\",\n" + "\t\"ctrl-chars\":\"\\\\foo\\r\\nbar\\f*ring\\/ring*\\b\"\n" "}" ); @@ -138,6 +139,152 @@ cxJsonAsCxString(object), CX_STR("{\n\t\"object\":null\n}")) ); + CxJsonValue *ctrl = cxJsonObjGet(obj, "ctrl-chars"); + CX_TEST_ASSERT(cxJsonIsString(ctrl)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(ctrl), + CX_STR("\\foo\r\nbar\f*ring/ring*\b")) + ); + cxJsonValueFree(obj); + } + cxJsonDestroy(&json); +} + +CX_TEST(test_json_escaped_unicode_strings) { + cxstring text = cx_str( + "{\n" + "\"ascii\":\"\\u0041\\u0053\\u0043\\u0049\\u0049\",\n" + "\"unicode\":\"\\u00df\\u00DF\",\n" + "\"mixed\":\"mixed ä ö \\u00e4 \\u00f6\",\n" + "\"wide\":\"\\u03a3\\u29b0\",\n" + "\"surrogatepair1\":\"\\ud83e\\udff5\",\n" + "\"surrogatepair2\":\"test\\ud83e\\udff1AA\"\n," + "\"mixed2\":\"123\\u03a3\\ud83e\\udfc5\\u00df\"" + "}" + ); + + CxJson json; + cxJsonInit(&json, NULL); + CX_TEST_DO { + cxJsonFill(&json, text); + CxJsonValue *obj; + CxJsonStatus result = cxJsonNext(&json, &obj); + CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); + CX_TEST_ASSERT(cxJsonIsObject(obj)); + + CxJsonValue *ascii = cxJsonObjGet(obj, "ascii"); + CX_TEST_ASSERT(cxJsonIsString(ascii)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(ascii), + CX_STR("ASCII")) + ); + + CxJsonValue *unicode = cxJsonObjGet(obj, "unicode"); + CX_TEST_ASSERT(cxJsonIsString(unicode)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(unicode), + CX_STR("ßß")) + ); + + CxJsonValue *mixed = cxJsonObjGet(obj, "mixed"); + CX_TEST_ASSERT(cxJsonIsString(mixed)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(mixed), + CX_STR("mixed ä ö ä ö")) + ); + + CxJsonValue *wide = cxJsonObjGet(obj, "wide"); + CX_TEST_ASSERT(cxJsonIsString(wide)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(wide), + CX_STR("\u03a3\u29b0")) + ); + + CxJsonValue *surrogatepair1 = cxJsonObjGet(obj, "surrogatepair1"); + CX_TEST_ASSERT(cxJsonIsString(surrogatepair1)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(surrogatepair1), + CX_STR("\xf0\x9f\xaf\xb5")) + ); + + CxJsonValue *surrogatepair2 = cxJsonObjGet(obj, "surrogatepair2"); + CX_TEST_ASSERT(cxJsonIsString(surrogatepair2)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(surrogatepair2), + CX_STR("test\xf0\x9f\xaf\xb1" "AA")) + ); + + CxJsonValue *mixed2 = cxJsonObjGet(obj, "mixed2"); + CX_TEST_ASSERT(cxJsonIsString(mixed2)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(mixed2), + CX_STR("123\u03a3\xf0\x9f\xaf\x85ß")) + ); + + cxJsonValueFree(obj); + } + cxJsonDestroy(&json); +} + +CX_TEST(test_json_escaped_unicode_malformed) { + CxJson json; + cxJsonInit(&json, NULL); + CxJsonValue *obj; + CxJsonStatus result; + CX_TEST_DO { + cxJsonFill(&json, "\"too few \\u123 digits\""); + result = cxJsonNext(&json, &obj); + CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); + CX_TEST_ASSERT(cxJsonIsString(obj)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(obj), + CX_STR("too few \\u123 digits") + )); + cxJsonValueFree(obj); + cxJsonFill(&json, "\"too many \\u00E456 digits\""); + result = cxJsonNext(&json, &obj); + CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); + CX_TEST_ASSERT(cxJsonIsString(obj)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(obj), + CX_STR("too many ä56 digits") + )); + cxJsonValueFree(obj); + cxJsonFill(&json, "\"only high \\uD800 surrogate\""); + result = cxJsonNext(&json, &obj); + CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); + CX_TEST_ASSERT(cxJsonIsString(obj)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(obj), + CX_STR("only high \\uD800 surrogate") + )); + cxJsonValueFree(obj); + cxJsonFill(&json, "\"only low \\uDC00 surrogate\""); + result = cxJsonNext(&json, &obj); + CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); + CX_TEST_ASSERT(cxJsonIsString(obj)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(obj), + CX_STR("only low \\uDC00 surrogate") + )); + cxJsonValueFree(obj); + cxJsonFill(&json, "\"two high \\uD800\\uD800 surrogates\""); + result = cxJsonNext(&json, &obj); + CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); + CX_TEST_ASSERT(cxJsonIsString(obj)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(obj), + CX_STR("two high \\uD800\\uD800 surrogates") + )); + cxJsonValueFree(obj); + cxJsonFill(&json, "\"high plus bullshit \\uD800\\u567 foo\""); + result = cxJsonNext(&json, &obj); + CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); + CX_TEST_ASSERT(cxJsonIsString(obj)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(obj), + CX_STR("high plus bullshit \\uD800\\u567 foo") + )); cxJsonValueFree(obj); } cxJsonDestroy(&json); @@ -1042,6 +1189,8 @@ cx_test_register(suite, test_json_init_default); cx_test_register(suite, test_json_simple_object); cx_test_register(suite, test_json_escaped_strings); + cx_test_register(suite, test_json_escaped_unicode_strings); + cx_test_register(suite, test_json_escaped_unicode_malformed); cx_test_register(suite, test_json_escaped_end_of_string); cx_test_register(suite, test_json_object_incomplete_token); cx_test_register(suite, test_json_token_wrongly_completed); diff -r 8ff82697f2c3 -r 148b7c7ccaf9 tests/test_list.c --- a/tests/test_list.c Sat Jan 25 15:22:01 2025 +0100 +++ b/tests/test_list.c Tue Jan 28 18:31:17 2025 +0100 @@ -382,22 +382,31 @@ } CX_TEST(test_linked_list_find) { - void *list = create_nodes_test_data(4); + node *list = create_nodes_test_data(4); assign_nodes_test_data(list, 2, 4, 6, 8); CX_TEST_DO { + size_t i = 10; int s; s = 2; - CX_TEST_ASSERT(cx_linked_list_find(list, loc_next, loc_data, cx_cmp_int, &s) == 0); + node *n = list; + CX_TEST_ASSERT(cx_linked_list_find(list, loc_next, loc_data, cx_cmp_int, &s, &i) == n); + CX_TEST_ASSERT(i == 0); + n = n->next; s = 4; - CX_TEST_ASSERT(cx_linked_list_find(list, loc_next, loc_data, cx_cmp_int, &s) == 1); + CX_TEST_ASSERT(cx_linked_list_find(list, loc_next, loc_data, cx_cmp_int, &s, &i) == n); + CX_TEST_ASSERT(i == 1); + n = n->next; s = 6; - CX_TEST_ASSERT(cx_linked_list_find(list, loc_next, loc_data, cx_cmp_int, &s) == 2); + CX_TEST_ASSERT(cx_linked_list_find(list, loc_next, loc_data, cx_cmp_int, &s, &i) == n); + CX_TEST_ASSERT(i == 2); + n = n->next; s = 8; - CX_TEST_ASSERT(cx_linked_list_find(list, loc_next, loc_data, cx_cmp_int, &s) == 3); + CX_TEST_ASSERT(cx_linked_list_find(list, loc_next, loc_data, cx_cmp_int, &s, &i) == n); + CX_TEST_ASSERT(i == 3); s = 10; - CX_TEST_ASSERT(cx_linked_list_find(list, loc_next, loc_data, cx_cmp_int, &s) < 0); + CX_TEST_ASSERT(cx_linked_list_find(list, loc_next, loc_data, cx_cmp_int, &s, &i) == NULL); s = -2; - CX_TEST_ASSERT(cx_linked_list_find(list, loc_next, loc_data, cx_cmp_int, &s) < 0); + CX_TEST_ASSERT(cx_linked_list_find(list, loc_next, loc_data, cx_cmp_int, &s, &i) == NULL); } destroy_nodes_test_data(list); } @@ -967,8 +976,8 @@ CX_TEST(test_empty_list_find) { int x = 42, y = 1337; CX_TEST_DO { - CX_TEST_ASSERT(cxListFind(cxEmptyList, &x) < 0); - CX_TEST_ASSERT(cxListFind(cxEmptyList, &y) < 0); + CX_TEST_ASSERT(cxListFind(cxEmptyList, &x) == 0); + CX_TEST_ASSERT(cxListFind(cxEmptyList, &y) == 0); } } @@ -1536,10 +1545,10 @@ const size_t testdata_len = 250; int *testdata = int_test_data_added_to_list(list, isptrlist, testdata_len); - int exp = rand() % testdata_len; // NOLINT(cert-msc50-cpp) + unsigned exp = rand() % testdata_len; // NOLINT(cert-msc50-cpp) int val = testdata[exp]; // randomly picked number could occur earlier in list - find first position - for (int i = 0 ; i < exp ; i++) { + for (unsigned i = 0 ; i < exp ; i++) { if (testdata[i] == val) { exp = i; break; @@ -1552,7 +1561,35 @@ CX_TEST_ASSERT(cxListFind(list, &val) != exp); int notinlist = -1; - CX_TEST_ASSERT(cxListFindRemove(list, ¬inlist) < 0); + CX_TEST_ASSERT(cxListFindRemove(list, ¬inlist) == cxListSize(list)); + CX_TEST_ASSERT(cxListSize(list) == testdata_len - 1); + + free(testdata); +}) + +roll_out_test_combos(find_remove_sorted, { + const size_t testdata_len = 250; + int *testdata = int_test_data_added_to_list(list, isptrlist, testdata_len); + qsort(testdata, testdata_len, sizeof(int), cx_cmp_int); + cxListSort(list); + + unsigned exp = rand() % testdata_len; // NOLINT(cert-msc50-cpp) + int val = testdata[exp]; + // randomly picked number could occur earlier in list - find first position + for (unsigned i = 0 ; i < exp ; i++) { + if (testdata[i] == val) { + exp = i; + break; + } + } + CX_TEST_ASSERT(cxListSize(list) == testdata_len); + CX_TEST_ASSERT(cxListFind(list, &val) == exp); + CX_TEST_ASSERT(cxListFindRemove(list, &val) == exp); + CX_TEST_ASSERT(cxListSize(list) == testdata_len - 1); + CX_TEST_ASSERT(cxListFind(list, &val) != exp); + + int notinlist = -1; + CX_TEST_ASSERT(cxListFindRemove(list, ¬inlist) == cxListSize(list)); CX_TEST_ASSERT(cxListSize(list) == testdata_len - 1); free(testdata); @@ -1571,9 +1608,11 @@ int *testdata = int_test_data_added_to_list(list, isptrlist, 128); CX_TEST_ASSERT(cxListSize(list) == len); for (size_t i = 0; i < len; i++) { + CX_TEST_ASSERT(cxListIndexValid(list, i)); CX_TEST_ASSERT(*(int *) cxListAt(list, i) == testdata[i]); } - CX_TEST_ASSERT(cxListAt(list, cxListSize(list)) == NULL); + CX_TEST_ASSERT(!cxListIndexValid(list, len)); + CX_TEST_ASSERT(cxListAt(list, len) == NULL); free(testdata); }) @@ -1620,10 +1659,10 @@ int *testdata = int_test_data_added_to_list(list, isptrlist, testdata_len); for (size_t attempt = 0; attempt < 25; attempt++) { - int exp = rand() % testdata_len; // NOLINT(cert-msc50-cpp) + unsigned exp = rand() % testdata_len; // NOLINT(cert-msc50-cpp) int val = testdata[exp]; // randomly picked number could occur earlier in list - find first position - for (int i = 0 ; i < exp ; i++) { + for (unsigned i = 0 ; i < exp ; i++) { if (testdata[i] == val) { exp = i; break; @@ -1633,7 +1672,7 @@ } int notinlist = -1; - CX_TEST_ASSERT(cxListFind(list, ¬inlist) < 0); + CX_TEST_ASSERT(cxListFind(list, ¬inlist) == cxListSize(list)); free(testdata); }) @@ -1924,6 +1963,8 @@ cx_test_register(suite, test_list_parl_remove_array); cx_test_register(suite, test_list_arl_find_remove); cx_test_register(suite, test_list_parl_find_remove); + cx_test_register(suite, test_list_arl_find_remove_sorted); + cx_test_register(suite, test_list_parl_find_remove_sorted); cx_test_register(suite, test_list_arl_clear); cx_test_register(suite, test_list_parl_clear); cx_test_register(suite, test_list_arl_at); @@ -2021,6 +2062,8 @@ cx_test_register(suite, test_list_pll_remove_array); cx_test_register(suite, test_list_ll_find_remove); cx_test_register(suite, test_list_pll_find_remove); + cx_test_register(suite, test_list_ll_find_remove_sorted); + cx_test_register(suite, test_list_pll_find_remove_sorted); cx_test_register(suite, test_list_ll_clear); cx_test_register(suite, test_list_pll_clear); cx_test_register(suite, test_list_ll_at); diff -r 8ff82697f2c3 -r 148b7c7ccaf9 tests/test_string.c --- a/tests/test_string.c Sat Jan 25 15:22:01 2025 +0100 +++ b/tests/test_string.c Tue Jan 28 18:31:17 2025 +0100 @@ -985,8 +985,7 @@ test_strtoint_impl(LL, num, base, i8, INT8_MIN, INT8_MAX); \ test_strtoint_impl(LL, num, base, i16, INT16_MIN, INT16_MAX); \ test_strtoint_impl(LL, num, base, i32, INT32_MIN, INT32_MAX); \ - test_strtoint_impl(LL, num, base, i64, INT64_MIN, INT64_MAX); \ - test_strtoint_impl(LL, num, base, z, -SSIZE_MAX-1, SSIZE_MAX) + test_strtoint_impl(LL, num, base, i64, INT64_MIN, INT64_MAX); #define test_strtoint_rollout_signed(num, base) \ test_strtoint_rollout_signed_impl(num, base); \ @@ -1001,7 +1000,7 @@ test_strtoint_impl(ULL, num, base, u16, 0, UINT16_MAX); \ test_strtoint_impl(ULL, num, base, u32, 0, UINT32_MAX); \ test_strtoint_impl(ULL, num, base, u64, 0, UINT64_MAX); \ - test_strtoint_impl(ULL, num, base, uz, 0, SIZE_MAX) + test_strtoint_impl(ULL, num, base, z, 0, SIZE_MAX) CX_TEST(test_string_to_signed_integer) { short s; @@ -1012,7 +1011,6 @@ int16_t i16; int32_t i32; int64_t i64; - ssize_t z; CX_TEST_DO { // do some brute force tests with all ranges test_strtoint_rollout_signed(47, 10); @@ -1064,28 +1062,28 @@ CX_TEST_ASSERT(i64 == INT64_MIN); // group separators - CX_TEST_ASSERT(0 == cx_strtoi32(cx_str(" -123,456"), &i32, 10)); + CX_TEST_ASSERT(0 == cx_strtoi32(cx_str("-123,456"), &i32, 10)); CX_TEST_ASSERT(i32 == -123456); errno = 0; - CX_TEST_ASSERT(0 != cx_strtoi16_lc(cx_str(" -Xab,cd"), &i16, 16, "'")); + CX_TEST_ASSERT(0 != cx_strtoi16_lc(cx_str("-Xab,cd"), &i16, 16, "'")); CX_TEST_ASSERT(errno == EINVAL); errno = 0; - CX_TEST_ASSERT(0 != cx_strtoi16_lc(cx_str(" -X'ab'cd"), &i16, 16, "'")); + CX_TEST_ASSERT(0 != cx_strtoi16_lc(cx_str("-X'ab'cd"), &i16, 16, "'")); CX_TEST_ASSERT(errno == ERANGE); errno = 0; - CX_TEST_ASSERT(0 == cx_strtoi16_lc(cx_str(" -X'67'89"), &i16, 16, "'")); + CX_TEST_ASSERT(0 == cx_strtoi16_lc(cx_str("-X'67'89"), &i16, 16, "'")); CX_TEST_ASSERT(errno == 0); CX_TEST_ASSERT(i16 == -0x6789); // binary and (unusual notation of) signed binary errno = 0; - CX_TEST_ASSERT(0 != cx_strtoi8_lc(cx_str(" -1010 1011"), &i8, 2, " ")); + CX_TEST_ASSERT(0 != cx_strtoi8_lc(cx_str("-1010 1011"), &i8, 2, " ")); CX_TEST_ASSERT(errno == ERANGE); errno = 0; - CX_TEST_ASSERT(0 != cx_strtoi8_lc(cx_str(" 1010 1011"), &i8, 2, " ")); + CX_TEST_ASSERT(0 != cx_strtoi8_lc(cx_str("1010 1011"), &i8, 2, " ")); CX_TEST_ASSERT(errno == ERANGE); errno = 0; - CX_TEST_ASSERT(0 == cx_strtoi8_lc(cx_str(" -0101 0101"), &i8, 2, " ")); + CX_TEST_ASSERT(0 == cx_strtoi8_lc(cx_str("-0101 0101"), &i8, 2, " ")); CX_TEST_ASSERT(errno == 0); CX_TEST_ASSERT(i8 == -0x55); } @@ -1100,7 +1098,7 @@ uint16_t u16; uint32_t u32; uint64_t u64; - size_t uz; + size_t z; CX_TEST_DO { // do some brute force tests with all ranges test_strtoint_rollout(47, 10); @@ -1132,13 +1130,13 @@ // -------------------------- // group separators - CX_TEST_ASSERT(0 == cx_strtou32(cx_str(" 123,456"), &u32, 10)); + CX_TEST_ASSERT(0 == cx_strtou32(cx_str("123,456"), &u32, 10)); CX_TEST_ASSERT(u32 == 123456); errno = 0; - CX_TEST_ASSERT(0 != cx_strtou16_lc(cx_str(" ab,cd"), &u16, 16, "'")); + CX_TEST_ASSERT(0 != cx_strtou16_lc(cx_str("ab,cd"), &u16, 16, "'")); CX_TEST_ASSERT(errno == EINVAL); errno = 0; - CX_TEST_ASSERT(0 == cx_strtou16_lc(cx_str(" ab'cd"), &u16, 16, "'")); + CX_TEST_ASSERT(0 == cx_strtou16_lc(cx_str("ab'cd"), &u16, 16, "'")); CX_TEST_ASSERT(errno == 0); CX_TEST_ASSERT(u16 == 0xabcd); @@ -1147,7 +1145,7 @@ CX_TEST_ASSERT(0 != cx_strtou8_lc(cx_str("1 1010 1011"), &u8, 2, " ")); CX_TEST_ASSERT(errno == ERANGE); errno = 0; - CX_TEST_ASSERT(0 == cx_strtou8_lc(cx_str(" 1010 1011"), &u8, 2, " ")); + CX_TEST_ASSERT(0 == cx_strtou8_lc(cx_str("1010 1011"), &u8, 2, " ")); CX_TEST_ASSERT(errno == 0); CX_TEST_ASSERT(u8 == 0xAB); } @@ -1226,6 +1224,34 @@ } } +CX_TEST(test_string_to_number_notrim) { + long long i; + unsigned long long u; + float f; + double d; + CX_TEST_DO { + CX_TEST_ASSERT(0 != cx_strtoll(cx_str("-42 "), &i, 10)); + CX_TEST_ASSERT(0 != cx_strtoll(cx_str(" -42"), &i, 10)); + CX_TEST_ASSERT(0 == cx_strtoll(cx_str("-42"), &i, 10)); + CX_TEST_ASSERT(i == -42); + + CX_TEST_ASSERT(0 != cx_strtoull(cx_str("42 "), &u, 10)); + CX_TEST_ASSERT(0 != cx_strtoull(cx_str(" 42"), &u, 10)); + CX_TEST_ASSERT(0 == cx_strtoull(cx_str("42"), &u, 10)); + CX_TEST_ASSERT(u == 42); + + CX_TEST_ASSERT(0 != cx_strtof(cx_str("13.37 "), &f)); + CX_TEST_ASSERT(0 != cx_strtof(cx_str(" 13.37"), &f)); + CX_TEST_ASSERT(0 == cx_strtof(cx_str("13.37"), &f)); + CX_TEST_ASSERT(0 == cx_vcmp_float(f, 13.37f)); + + CX_TEST_ASSERT(0 != cx_strtod(cx_str("13.37 "), &d)); + CX_TEST_ASSERT(0 != cx_strtod(cx_str(" 13.37"), &d)); + CX_TEST_ASSERT(0 == cx_strtod(cx_str("13.37"), &d)); + CX_TEST_ASSERT(0 == cx_vcmp_double(d, 13.37)); + } +} + CxTestSuite *cx_test_suite_string(void) { CxTestSuite *suite = cx_test_suite_new("string"); @@ -1268,6 +1294,7 @@ cx_test_register(suite, test_string_to_unsigned_integer); cx_test_register(suite, test_string_to_float); cx_test_register(suite, test_string_to_double); + cx_test_register(suite, test_string_to_number_notrim); return suite; }