fix over-optimization of strstr

2022-10-04

author
Mike Becker <universe@uap-core.de>
date
Tue, 04 Oct 2022 19:25:07 +0200 (2022-10-04)
changeset 591
7df0bcaecffa
parent 590
02a56701a5cb
child 592
bb69ef3ad1f3

fix over-optimization of strstr

1. it's actually less performant to frequently read bytes
from an array instead of using the native word length
2. the SBO buffer should be local and not static to allow
multi-threading usage

src/string.c file | annotate | diff | comparison | revisions
--- a/src/string.c	Tue Oct 04 18:55:20 2022 +0200
+++ b/src/string.c	Tue Oct 04 19:25:07 2022 +0200
@@ -227,14 +227,7 @@
     return (cxmutstr) {(char *) result.ptr, result.length};
 }
 
-#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
-    ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
-
-#define ptable_w(useheap, ptable, index, src) do {\
-    if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
-    else ((size_t*)ptable)[index] = src;\
-    } while (0)
-
+#define STRSTR_SBO_BUFLEN 512
 
 cxstring cx_strstr(
         cxstring haystack,
@@ -257,14 +250,14 @@
      * and we want to avoid that.
      */
 
-    /* static prefix table */
-    static uint8_t s_prefix_table[512];
+    /* local prefix table */
+    size_t s_prefix_table[STRSTR_SBO_BUFLEN];
 
-    /* check pattern length and use appropriate prefix table */
+    /* check needle length and use appropriate prefix table */
     /* if the pattern exceeds static prefix table, allocate on the heap */
-    register int useheap = needle.length >= 512;
-    register void *ptable = useheap ? calloc(needle.length + 1,
-                                             sizeof(size_t)) : s_prefix_table;
+    bool useheap = needle.length >= STRSTR_SBO_BUFLEN;
+    register size_t *ptable = useheap ? calloc(needle.length + 1,
+                                               sizeof(size_t)) : s_prefix_table;
 
     /* keep counter in registers */
     register size_t i, j;
@@ -272,14 +265,14 @@
     /* fill prefix table */
     i = 0;
     j = 0;
-    ptable_w(useheap, ptable, i, j);
+    ptable[i] = j;
     while (i < needle.length) {
         while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
-            ptable_r(j, useheap, ptable, j - 1);
+            j = ptable[j - 1];
         }
         i++;
         j++;
-        ptable_w(useheap, ptable, i, j);
+        ptable[i] = j;
     }
 
     /* search */
@@ -288,7 +281,7 @@
     j = 1;
     while (i < haystack.length) {
         while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
-            ptable_r(j, useheap, ptable, j - 1);
+            j = ptable[j - 1];
         }
         i++;
         j++;

mercurial