Tue, 04 Oct 2022 19:25:07 +0200
fix over-optimization of strstr
1. it's actually less performant to frequently read bytes
   from an array instead of using the native word length
2. the SBO buffer should be local and not static to allow
   multi-threading usage
| src/string.c | file | annotate | diff | comparison | revisions | 
--- a/src/string.c Tue Oct 04 18:55:20 2022 +0200 +++ b/src/string.c Tue Oct 04 19:25:07 2022 +0200 @@ -227,14 +227,7 @@ return (cxmutstr) {(char *) result.ptr, result.length}; } -#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ - ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) - -#define ptable_w(useheap, ptable, index, src) do {\ - if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ - else ((size_t*)ptable)[index] = src;\ - } while (0) - +#define STRSTR_SBO_BUFLEN 512 cxstring cx_strstr( cxstring haystack, @@ -257,14 +250,14 @@ * and we want to avoid that. */ - /* static prefix table */ - static uint8_t s_prefix_table[512]; + /* local prefix table */ + size_t s_prefix_table[STRSTR_SBO_BUFLEN]; - /* check pattern length and use appropriate prefix table */ + /* check needle length and use appropriate prefix table */ /* if the pattern exceeds static prefix table, allocate on the heap */ - register int useheap = needle.length >= 512; - register void *ptable = useheap ? calloc(needle.length + 1, - sizeof(size_t)) : s_prefix_table; + bool useheap = needle.length >= STRSTR_SBO_BUFLEN; + register size_t *ptable = useheap ? calloc(needle.length + 1, + sizeof(size_t)) : s_prefix_table; /* keep counter in registers */ register size_t i, j; @@ -272,14 +265,14 @@ /* fill prefix table */ i = 0; j = 0; - ptable_w(useheap, ptable, i, j); + ptable[i] = j; while (i < needle.length) { while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) { - ptable_r(j, useheap, ptable, j - 1); + j = ptable[j - 1]; } i++; j++; - ptable_w(useheap, ptable, i, j); + ptable[i] = j; } /* search */ @@ -288,7 +281,7 @@ j = 1; while (i < haystack.length) { while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) { - ptable_r(j, useheap, ptable, j - 1); + j = ptable[j - 1]; } i++; j++;