Fri, 27 Dec 2024 13:01:31 +0100
avoid copying the filled data in the json parser when possible - fixes #530
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "cx/array_list.h" #include "cx/compare.h" #include <assert.h> #include <string.h> #include <errno.h> // Default array reallocator static void *cx_array_default_realloc( void *array, size_t capacity, size_t elem_size, cx_attr_unused CxArrayReallocator *alloc ) { size_t n; if (cx_szmul(capacity, elem_size, &n)) { errno = EOVERFLOW; return NULL; } return realloc(array, n); } CxArrayReallocator cx_array_default_reallocator_impl = { cx_array_default_realloc, NULL, NULL, 0, 0 }; CxArrayReallocator *cx_array_default_reallocator = &cx_array_default_reallocator_impl; // Stack-aware array reallocator static void *cx_array_advanced_realloc( void *array, size_t capacity, size_t elem_size, cx_attr_unused CxArrayReallocator *alloc ) { // check for overflow size_t n; if (cx_szmul(capacity, elem_size, &n)) { errno = EOVERFLOW; return NULL; } // retrieve the pointer to the actual allocator const CxAllocator *al = alloc->ptr1; // check if the array is still located on the stack void *newmem; if (array == alloc->ptr2) { newmem = cxMalloc(al, n); if (newmem != NULL && array != NULL) { memcpy(newmem, array, n); } } else { newmem = cxRealloc(al, array, n); } return newmem; } struct cx_array_reallocator_s cx_array_reallocator( const struct cx_allocator_s *allocator, const void *stackmem ) { if (allocator == NULL) { allocator = cxDefaultAllocator; } return (struct cx_array_reallocator_s) { cx_array_advanced_realloc, (void*) allocator, (void*) stackmem, 0, 0 }; } // LOW LEVEL ARRAY LIST FUNCTIONS static size_t cx_array_align_capacity( size_t cap, size_t alignment, size_t max ) { if (cap > max - alignment) { return cap; } else { return cap - (cap % alignment) + alignment; } } int cx_array_reserve( void **array, void *size, void *capacity, unsigned width, size_t elem_size, size_t elem_count, CxArrayReallocator *reallocator ) { // assert pointers assert(array != NULL); assert(size != NULL); assert(capacity != NULL); assert(reallocator != NULL); // determine size and capacity size_t oldcap; size_t oldsize; size_t max_size; if (width == 0 || width == 8*sizeof(size_t)) { oldcap = *(size_t*) capacity; oldsize = *(size_t*) size; max_size = SIZE_MAX; } else if (width == 16) { oldcap = *(uint16_t*) capacity; oldsize = *(uint16_t*) size; max_size = UINT16_MAX; } else if (width == 8) { oldcap = *(uint8_t*) capacity; oldsize = *(uint8_t*) size; max_size = UINT8_MAX; } #if CX_WORDSIZE == 64 else if (width == 32) { oldcap = *(uint32_t*) capacity; oldsize = *(uint32_t*) size; max_size = UINT32_MAX; } #endif else { errno = EINVAL; return 1; } // assert that the array is allocated when it has capacity assert(*array != NULL || oldcap == 0); // check for overflow if (elem_count > max_size - oldsize) { errno = EOVERFLOW; return 1; } // determine new capacity size_t newcap = oldsize + elem_count; // reallocate if possible if (newcap > oldcap) { // calculate new capacity (next number divisible by 16) newcap = cx_array_align_capacity(newcap, 16, max_size); // perform reallocation void *newmem = reallocator->realloc( *array, newcap, elem_size, reallocator ); if (newmem == NULL) { return 1; } // store new pointer *array = newmem; // store new capacity if (width == 0 || width == 8*sizeof(size_t)) { *(size_t*) capacity = newcap; } else if (width == 16) { *(uint16_t*) capacity = (uint16_t) newcap; } else if (width == 8) { *(uint8_t*) capacity = (uint8_t) newcap; } #if CX_WORDSIZE == 64 else if (width == 32) { *(uint32_t*) capacity = (uint32_t) newcap; } #endif } return 0; } int cx_array_copy( void **target, void *size, void *capacity, unsigned width, size_t index, const void *src, size_t elem_size, size_t elem_count, CxArrayReallocator *reallocator ) { // assert pointers assert(target != NULL); assert(size != NULL); assert(capacity != NULL); assert(src != NULL); assert(reallocator != NULL); // determine size and capacity size_t oldcap; size_t oldsize; size_t max_size; if (width == 0 || width == 8*sizeof(size_t)) { oldcap = *(size_t*) capacity; oldsize = *(size_t*) size; max_size = SIZE_MAX; } else if (width == 16) { oldcap = *(uint16_t*) capacity; oldsize = *(uint16_t*) size; max_size = UINT16_MAX; } else if (width == 8) { oldcap = *(uint8_t*) capacity; oldsize = *(uint8_t*) size; max_size = UINT8_MAX; } #if CX_WORDSIZE == 64 else if (width == 32) { oldcap = *(uint32_t*) capacity; oldsize = *(uint32_t*) size; max_size = UINT32_MAX; } #endif else { errno = EINVAL; return 1; } // assert that the array is allocated when it has capacity assert(*target != NULL || oldcap == 0); // check for overflow if (index > max_size || elem_count > max_size - index) { errno = EOVERFLOW; return 1; } // check if resize is required size_t minsize = index + elem_count; size_t newsize = oldsize < minsize ? minsize : oldsize; // reallocate if possible size_t newcap = oldcap; if (newsize > oldcap) { // check, if we need to repair the src pointer uintptr_t targetaddr = (uintptr_t) *target; uintptr_t srcaddr = (uintptr_t) src; bool repairsrc = targetaddr <= srcaddr && srcaddr < targetaddr + oldcap * elem_size; // calculate new capacity (next number divisible by 16) newcap = cx_array_align_capacity(newsize, 16, max_size); assert(newcap > newsize); // perform reallocation void *newmem = reallocator->realloc( *target, newcap, elem_size, reallocator ); if (newmem == NULL) { return 1; } // repair src pointer, if necessary if (repairsrc) { src = ((char *) newmem) + (srcaddr - targetaddr); } // store new pointer *target = newmem; } // determine target pointer char *start = *target; start += index * elem_size; // copy elements and set new size // note: no overflow check here, b/c we cannot get here w/o allocation memmove(start, src, elem_count * elem_size); // if any of size or capacity changed, store them back if (newsize != oldsize || newcap != oldcap) { if (width == 0 || width == 8*sizeof(size_t)) { *(size_t*) capacity = newcap; *(size_t*) size = newsize; } else if (width == 16) { *(uint16_t*) capacity = (uint16_t) newcap; *(uint16_t*) size = (uint16_t) newsize; } else if (width == 8) { *(uint8_t*) capacity = (uint8_t) newcap; *(uint8_t*) size = (uint8_t) newsize; } #if CX_WORDSIZE == 64 else if (width == 32) { *(uint32_t*) capacity = (uint32_t) newcap; *(uint32_t*) size = (uint32_t) newsize; } #endif } // return successfully return 0; } int cx_array_insert_sorted( void **target, size_t *size, size_t *capacity, cx_compare_func cmp_func, const void *sorted_data, size_t elem_size, size_t elem_count, CxArrayReallocator *reallocator ) { // assert pointers assert(target != NULL); assert(size != NULL); assert(capacity != NULL); assert(cmp_func != NULL); assert(sorted_data != NULL); assert(reallocator != NULL); // corner case if (elem_count == 0) return 0; // overflow check if (elem_count > SIZE_MAX - *size) { errno = EOVERFLOW; return 1; } // store some counts size_t old_size = *size; size_t needed_capacity = old_size + elem_count; // if we need more than we have, try a reallocation if (needed_capacity > *capacity) { size_t new_capacity = cx_array_align_capacity(needed_capacity, 16, SIZE_MAX); void *new_mem = reallocator->realloc( *target, new_capacity, elem_size, reallocator ); if (new_mem == NULL) { // give it up right away, there is no contract // that requires us to insert as much as we can return 1; } *target = new_mem; *capacity = new_capacity; } // now we have guaranteed that we can insert everything size_t new_size = old_size + elem_count; *size = new_size; // declare the source and destination indices/pointers size_t si = 0, di = 0; const char *src = sorted_data; char *dest = *target; // find the first insertion point di = cx_array_binary_search_sup(dest, old_size, elem_size, src, cmp_func); dest += di * elem_size; // move the remaining elements in the array completely to the right // we will call it the "buffer" for parked elements size_t buf_size = old_size - di; size_t bi = new_size - buf_size; char *bptr = ((char *) *target) + bi * elem_size; memmove(bptr, dest, buf_size * elem_size); // while there are both source and buffered elements left, // copy them interleaving while (si < elem_count && bi < new_size) { // determine how many source elements can be inserted size_t copy_len, bytes_copied; copy_len = cx_array_binary_search_sup( src, elem_count - si, elem_size, bptr, cmp_func ); // copy the source elements bytes_copied = copy_len * elem_size; memcpy(dest, src, bytes_copied); dest += bytes_copied; src += bytes_copied; si += copy_len; // when all source elements are in place, we are done if (si >= elem_count) break; // determine how many buffered elements need to be restored copy_len = cx_array_binary_search_sup( bptr, new_size - bi, elem_size, src, cmp_func ); // restore the buffered elements bytes_copied = copy_len * elem_size; memmove(dest, bptr, bytes_copied); dest += bytes_copied; bptr += bytes_copied; bi += copy_len; } // still source elements left? simply append them if (si < elem_count) { memcpy(dest, src, elem_size * (elem_count - si)); } // still buffer elements left? // don't worry, we already moved them to the correct place return 0; } size_t cx_array_binary_search_inf( const void *arr, size_t size, size_t elem_size, const void *elem, cx_compare_func cmp_func ) { // special case: empty array if (size == 0) return 0; // declare a variable that will contain the compare results int result; // cast the array pointer to something we can use offsets with const char *array = arr; // check the first array element result = cmp_func(elem, array); if (result < 0) { return size; } else if (result == 0) { return 0; } // special case: there is only one element and that is smaller if (size == 1) return 0; // check the last array element result = cmp_func(elem, array + elem_size * (size - 1)); if (result >= 0) { return size - 1; } // the element is now guaranteed to be somewhere in the list // so start the binary search size_t left_index = 1; size_t right_index = size - 1; size_t pivot_index; while (left_index <= right_index) { pivot_index = left_index + (right_index - left_index) / 2; const char *arr_elem = array + pivot_index * elem_size; result = cmp_func(elem, arr_elem); if (result == 0) { // found it! return pivot_index; } else if (result < 0) { // element is smaller than pivot, continue search left right_index = pivot_index - 1; } else { // element is larger than pivot, continue search right left_index = pivot_index + 1; } } // report the largest upper bound return result < 0 ? (pivot_index - 1) : pivot_index; } size_t cx_array_binary_search( const void *arr, size_t size, size_t elem_size, const void *elem, cx_compare_func cmp_func ) { size_t index = cx_array_binary_search_inf( arr, size, elem_size, elem, cmp_func ); if (index < size && cmp_func(((const char *) arr) + index * elem_size, elem) == 0) { return index; } else { return size; } } size_t cx_array_binary_search_sup( const void *arr, size_t size, size_t elem_size, const void *elem, cx_compare_func cmp_func ) { size_t inf = cx_array_binary_search_inf( arr, size, elem_size, elem, cmp_func ); if (inf == size) { // no infimum means, first element is supremum return 0; } else if (cmp_func(((const char *) arr) + inf * elem_size, elem) == 0) { return inf; } else { return inf + 1; } } #ifndef CX_ARRAY_SWAP_SBO_SIZE #define CX_ARRAY_SWAP_SBO_SIZE 128 #endif const unsigned cx_array_swap_sbo_size = CX_ARRAY_SWAP_SBO_SIZE; void cx_array_swap( void *arr, size_t elem_size, size_t idx1, size_t idx2 ) { assert(arr != NULL); // short circuit if (idx1 == idx2) return; char sbo_mem[CX_ARRAY_SWAP_SBO_SIZE]; void *tmp; // decide if we can use the local buffer if (elem_size > CX_ARRAY_SWAP_SBO_SIZE) { tmp = malloc(elem_size); // we don't want to enforce error handling if (tmp == NULL) abort(); } else { tmp = sbo_mem; } // calculate memory locations char *left = arr, *right = arr; left += idx1 * elem_size; right += idx2 * elem_size; // three-way swap memcpy(tmp, left, elem_size); memcpy(left, right, elem_size); memcpy(right, tmp, elem_size); // free dynamic memory, if it was needed if (tmp != sbo_mem) { free(tmp); } } // HIGH LEVEL ARRAY LIST FUNCTIONS typedef struct { struct cx_list_s base; void *data; size_t capacity; CxArrayReallocator reallocator; } cx_array_list; static void cx_arl_destructor(struct cx_list_s *list) { cx_array_list *arl = (cx_array_list *) list; char *ptr = arl->data; if (list->collection.simple_destructor) { for (size_t i = 0; i < list->collection.size; i++) { cx_invoke_simple_destructor(list, ptr); ptr += list->collection.elem_size; } } if (list->collection.advanced_destructor) { for (size_t i = 0; i < list->collection.size; i++) { cx_invoke_advanced_destructor(list, ptr); ptr += list->collection.elem_size; } } cxFree(list->collection.allocator, arl->data); cxFree(list->collection.allocator, list); } static size_t cx_arl_insert_array( struct cx_list_s *list, size_t index, const void *array, size_t n ) { // out of bounds and special case check if (index > list->collection.size || n == 0) return 0; // get a correctly typed pointer to the list cx_array_list *arl = (cx_array_list *) list; // do we need to move some elements? if (index < list->collection.size) { const char *first_to_move = (const char *) arl->data; first_to_move += index * list->collection.elem_size; size_t elems_to_move = list->collection.size - index; size_t start_of_moved = index + n; if (cx_array_copy( &arl->data, &list->collection.size, &arl->capacity, 0, start_of_moved, first_to_move, list->collection.elem_size, elems_to_move, &arl->reallocator )) { // if moving existing elems is unsuccessful, abort return 0; } } // note that if we had to move the elements, the following operation // is guaranteed to succeed, because we have the memory already allocated // therefore, it is impossible to leave this function with an invalid array // place the new elements if (cx_array_copy( &arl->data, &list->collection.size, &arl->capacity, 0, index, array, list->collection.elem_size, n, &arl->reallocator )) { // array list implementation is "all or nothing" return 0; } else { return n; } } static size_t cx_arl_insert_sorted( struct cx_list_s *list, const void *sorted_data, size_t n ) { // get a correctly typed pointer to the list cx_array_list *arl = (cx_array_list *) list; if (cx_array_insert_sorted( &arl->data, &list->collection.size, &arl->capacity, list->collection.cmpfunc, sorted_data, list->collection.elem_size, n, &arl->reallocator )) { // array list implementation is "all or nothing" return 0; } else { return n; } } static int cx_arl_insert_element( struct cx_list_s *list, size_t index, const void *element ) { return 1 != cx_arl_insert_array(list, index, element, 1); } static int cx_arl_insert_iter( struct cx_iterator_s *iter, const void *elem, int prepend ) { struct cx_list_s *list = iter->src_handle.m; if (iter->index < list->collection.size) { int result = cx_arl_insert_element( list, iter->index + 1 - prepend, elem ); if (result == 0) { iter->elem_count++; if (prepend != 0) { iter->index++; iter->elem_handle = ((char *) iter->elem_handle) + list->collection.elem_size; } } return result; } else { int result = cx_arl_insert_element(list, list->collection.size, elem); if (result == 0) { iter->elem_count++; iter->index = list->collection.size; } return result; } } static size_t cx_arl_remove( struct cx_list_s *list, size_t index, size_t num, void *targetbuf ) { cx_array_list *arl = (cx_array_list *) list; // out-of-bounds check size_t remove; if (index >= list->collection.size) { remove = 0; } else if (index + num > list->collection.size) { remove = list->collection.size - index; } else { remove = num; } // easy exit if (remove == 0) return 0; // destroy or copy contents if (targetbuf == NULL) { for (size_t idx = index; idx < index + remove; idx++) { cx_invoke_destructor( list, ((char *) arl->data) + idx * list->collection.elem_size ); } } else { memcpy( targetbuf, ((char *) arl->data) + index * list->collection.elem_size, remove * list->collection.elem_size ); } // short-circuit removal of last elements if (index + remove == list->collection.size) { list->collection.size -= remove; return remove; } // just move the elements to the left int result = cx_array_copy( &arl->data, &list->collection.size, &arl->capacity, 0, index, ((char *) arl->data) + (index + remove) * list->collection.elem_size, list->collection.elem_size, list->collection.size - index - remove, &arl->reallocator ); // cx_array_copy cannot fail, array cannot grow assert(result == 0); // decrease the size list->collection.size -= remove; return remove; } static void cx_arl_clear(struct cx_list_s *list) { if (list->collection.size == 0) return; cx_array_list *arl = (cx_array_list *) list; char *ptr = arl->data; if (list->collection.simple_destructor) { for (size_t i = 0; i < list->collection.size; i++) { cx_invoke_simple_destructor(list, ptr); ptr += list->collection.elem_size; } } if (list->collection.advanced_destructor) { for (size_t i = 0; i < list->collection.size; i++) { cx_invoke_advanced_destructor(list, ptr); ptr += list->collection.elem_size; } } memset(arl->data, 0, list->collection.size * list->collection.elem_size); list->collection.size = 0; } static int cx_arl_swap( struct cx_list_s *list, size_t i, size_t j ) { if (i >= list->collection.size || j >= list->collection.size) return 1; cx_array_list *arl = (cx_array_list *) list; cx_array_swap(arl->data, list->collection.elem_size, i, j); return 0; } static void *cx_arl_at( const struct cx_list_s *list, size_t index ) { if (index < list->collection.size) { const cx_array_list *arl = (const cx_array_list *) list; char *space = arl->data; return space + index * list->collection.elem_size; } else { return NULL; } } static ssize_t cx_arl_find_remove( struct cx_list_s *list, const void *elem, bool remove ) { assert(list->collection.cmpfunc != NULL); assert(list->collection.size < SIZE_MAX / 2); char *cur = ((const cx_array_list *) list)->data; for (ssize_t i = 0; i < (ssize_t) list->collection.size; i++) { if (0 == list->collection.cmpfunc(elem, cur)) { if (remove) { if (1 == cx_arl_remove(list, i, 1, NULL)) { return i; } else { return -1; } } else { return i; } } cur += list->collection.elem_size; } return -1; } static void cx_arl_sort(struct cx_list_s *list) { assert(list->collection.cmpfunc != NULL); qsort(((cx_array_list *) list)->data, list->collection.size, list->collection.elem_size, list->collection.cmpfunc ); } static int cx_arl_compare( const struct cx_list_s *list, const struct cx_list_s *other ) { assert(list->collection.cmpfunc != NULL); if (list->collection.size == other->collection.size) { const char *left = ((const cx_array_list *) list)->data; const char *right = ((const cx_array_list *) other)->data; for (size_t i = 0; i < list->collection.size; i++) { int d = list->collection.cmpfunc(left, right); if (d != 0) { return d; } left += list->collection.elem_size; right += other->collection.elem_size; } return 0; } else { return list->collection.size < other->collection.size ? -1 : 1; } } static void cx_arl_reverse(struct cx_list_s *list) { if (list->collection.size < 2) return; void *data = ((const cx_array_list *) list)->data; size_t half = list->collection.size / 2; for (size_t i = 0; i < half; i++) { cx_array_swap(data, list->collection.elem_size, i, list->collection.size - 1 - i); } } static bool cx_arl_iter_valid(const void *it) { const struct cx_iterator_s *iter = it; const struct cx_list_s *list = iter->src_handle.c; return iter->index < list->collection.size; } static void *cx_arl_iter_current(const void *it) { const struct cx_iterator_s *iter = it; return iter->elem_handle; } static void cx_arl_iter_next(void *it) { struct cx_iterator_s *iter = it; if (iter->base.remove) { iter->base.remove = false; cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL); } else { iter->index++; iter->elem_handle = ((char *) iter->elem_handle) + ((const struct cx_list_s *) iter->src_handle.c)->collection.elem_size; } } static void cx_arl_iter_prev(void *it) { struct cx_iterator_s *iter = it; const cx_array_list *list = iter->src_handle.c; if (iter->base.remove) { iter->base.remove = false; cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL); } iter->index--; if (iter->index < list->base.collection.size) { iter->elem_handle = ((char *) list->data) + iter->index * list->base.collection.elem_size; } } static struct cx_iterator_s cx_arl_iterator( const struct cx_list_s *list, size_t index, bool backwards ) { struct cx_iterator_s iter; iter.index = index; iter.src_handle.c = list; iter.elem_handle = cx_arl_at(list, index); iter.elem_size = list->collection.elem_size; iter.elem_count = list->collection.size; iter.base.valid = cx_arl_iter_valid; iter.base.current = cx_arl_iter_current; iter.base.next = backwards ? cx_arl_iter_prev : cx_arl_iter_next; iter.base.remove = false; iter.base.mutating = false; return iter; } static cx_list_class cx_array_list_class = { cx_arl_destructor, cx_arl_insert_element, cx_arl_insert_array, cx_arl_insert_sorted, cx_arl_insert_iter, cx_arl_remove, cx_arl_clear, cx_arl_swap, cx_arl_at, cx_arl_find_remove, cx_arl_sort, cx_arl_compare, cx_arl_reverse, cx_arl_iterator, }; CxList *cxArrayListCreate( const CxAllocator *allocator, cx_compare_func comparator, size_t elem_size, size_t initial_capacity ) { if (allocator == NULL) { allocator = cxDefaultAllocator; } cx_array_list *list = cxCalloc(allocator, 1, sizeof(cx_array_list)); if (list == NULL) return NULL; list->base.cl = &cx_array_list_class; list->base.collection.allocator = allocator; list->capacity = initial_capacity; if (elem_size > 0) { list->base.collection.elem_size = elem_size; list->base.collection.cmpfunc = comparator; } else { elem_size = sizeof(void *); list->base.collection.cmpfunc = comparator == NULL ? cx_cmp_ptr : comparator; cxListStorePointers((CxList *) list); } // allocate the array after the real elem_size is known list->data = cxCalloc(allocator, initial_capacity, elem_size); if (list->data == NULL) { cxFree(allocator, list); return NULL; } // configure the reallocator list->reallocator = cx_array_reallocator(allocator, NULL); return (CxList *) list; }