optimize asc_memcmp() by enabling the compiler to use SIMD instructions

Tue, 08 Jul 2025 19:32:31 +0200

author
Mike Becker <universe@uap-core.de>
date
Tue, 08 Jul 2025 19:32:31 +0200
changeset 191
95ad77a4b8e3
parent 190
7f72375bedc8
child 192
160f3300f6b7

optimize asc_memcmp() by enabling the compiler to use SIMD instructions

src/ascension/datatypes.h file | annotate | diff | comparison | revisions
--- a/src/ascension/datatypes.h	Tue Jul 08 17:45:33 2025 +0200
+++ b/src/ascension/datatypes.h	Tue Jul 08 19:32:31 2025 +0200
@@ -133,15 +133,29 @@
 //    General Utility Functions
 // --------------------------------------------------------------------------
 
-static inline bool asc_memcmpz(const void *mem, size_t n) {
+static inline bool asc_memcmp(const void *mem, char c, size_t n) {
+    const unsigned char cu = (unsigned char) c;
+    size_t csz;
+    memset(&csz, cu, sizeof(size_t));
     const unsigned char *p = mem;
-    // TODO: for some reason this is not vectorized - find out why!
-    for (size_t i = 0; i < n ; i++) {
-        if (p[i]>0) return false;
+    const size_t wordsize = sizeof(size_t);
+    size_t i = 0;
+    size_t result = 0;
+
+    // Check word-sized chunks
+    for (; i + wordsize <= n; i += wordsize) {
+        result |= *(size_t *) (p + i) - cu;
     }
-    return true;
+
+    // Check remaining bytes
+    for (; i < n; i++) {
+        result |= p[i] - cu;
+    }
+    return result == 0;
 }
 
+#define asc_memcmpz(mem, n) asc_memcmp(mem, 0, n)
+
 static inline int asc_clampi(int v, int min, int max) {
     if (v < min) return min;
     if (v > max) return max;

mercurial