Tue, 08 Jul 2025 19:32:31 +0200
optimize asc_memcmp() by enabling the compiler to use SIMD instructions
src/ascension/datatypes.h | file | annotate | diff | comparison | revisions |
--- a/src/ascension/datatypes.h Tue Jul 08 17:45:33 2025 +0200 +++ b/src/ascension/datatypes.h Tue Jul 08 19:32:31 2025 +0200 @@ -133,15 +133,29 @@ // General Utility Functions // -------------------------------------------------------------------------- -static inline bool asc_memcmpz(const void *mem, size_t n) { +static inline bool asc_memcmp(const void *mem, char c, size_t n) { + const unsigned char cu = (unsigned char) c; + size_t csz; + memset(&csz, cu, sizeof(size_t)); const unsigned char *p = mem; - // TODO: for some reason this is not vectorized - find out why! - for (size_t i = 0; i < n ; i++) { - if (p[i]>0) return false; + const size_t wordsize = sizeof(size_t); + size_t i = 0; + size_t result = 0; + + // Check word-sized chunks + for (; i + wordsize <= n; i += wordsize) { + result |= *(size_t *) (p + i) - cu; } - return true; + + // Check remaining bytes + for (; i < n; i++) { + result |= p[i] - cu; + } + return result == 0; } +#define asc_memcmpz(mem, n) asc_memcmp(mem, 0, n) + static inline int asc_clampi(int v, int min, int max) { if (v < min) return min; if (v > max) return max;