# HG changeset patch
# User Mike Becker <universe@uap-core.de>
# Date 1735403556 -3600
# Node ID e453e717876e9cdf99fc7f16fd80102ed6c422b2
# Parent  8baed9b38bc67c87846d0a85f00965f7a415323c
implement floating point string to number conversions

The current implementation is not extremely precise. That is why
we are only testing against our own compare functions.
A future revision should increase the precision.

fixes #532

diff -r 8baed9b38bc6 -r e453e717876e src/string.c
--- a/src/string.c	Sat Dec 28 17:31:28 2024 +0100
+++ b/src/string.c	Sat Dec 28 17:32:36 2024 +0100
@@ -34,6 +34,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <limits.h>
+#include <float.h>
 
 #ifndef _WIN32
 
@@ -961,18 +962,18 @@
 
     // if base is 2 or 16, some leading stuff may appear
     if (base == 2) {
-        if (str.ptr[0] == 'b' || str.ptr[0] == 'B') {
+        if ((str.ptr[0] | 32) == 'b') {
             start = 1;
         } else if (str.ptr[0] == '0' && str.length > 1) {
-            if (str.ptr[1] == 'b' || str.ptr[1] == 'B') {
+            if ((str.ptr[1] | 32) == 'b') {
                 start = 2;
             }
         }
     } else if (base == 16) {
-        if (str.ptr[0] == 'x' || str.ptr[0] == 'X' || str.ptr[0] == '#') {
+        if ((str.ptr[0] | 32) == 'x' || str.ptr[0] == '#') {
             start = 1;
         } else if (str.ptr[0] == '0' && str.length > 1) {
-            if (str.ptr[1] == 'x' || str.ptr[1] == 'X') {
+            if ((str.ptr[1] | 32) == 'x') {
                 start = 2;
             }
         }
@@ -1043,29 +1044,140 @@
 }
 
 int cx_strtof_lc(cxstring str, float *output, char decsep, const char *groupsep) {
-    // TODO: replace temporary implementation
-    (void) groupsep; // unused in temp impl
-    (void) decsep; // unused in temp impl
-    char *s = malloc(str.length + 1);
-    memcpy(s, str.ptr, str.length);
-    s[str.length] = '\0';
-    char *e;
-    *output = strtof(s, &e);
-    int r = !(e && *e == '\0');
-    free(s);
-    return r;
+    // use string to double and add a range check
+    double d;
+    int ret = cx_strtod_lc(str, &d, decsep, groupsep);
+    if (ret != 0) return ret;
+    // note: FLT_MIN is the smallest POSITIVE number that can be represented
+    double test = d < 0 ? -d : d;
+    if (test < FLT_MIN || test > FLT_MAX) {
+        errno = ERANGE;
+        return -1;
+    }
+    *output = (float) d;
+    return 0;
 }
 
 int cx_strtod_lc(cxstring str, double *output, char decsep, const char *groupsep) {
-    // TODO: replace temporary implementation
-    (void) groupsep; // unused in temp impl
-    (void) decsep; // unused in temp impl
-    char *s = malloc(str.length + 1);
-    memcpy(s, str.ptr, str.length);
-    s[str.length] = '\0';
-    char *e;
-    *output = strtod(s, &e);
-    int r = !(e && *e == '\0');
-    free(s);
-    return r;
+    // TODO: overflow check
+    // TODO: increase precision
+
+    // trim and check
+    str = cx_strtrim(str);
+    if (str.length == 0) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    double result = 0.;
+    int sign = 1;
+
+    // check if there is a sign
+    if (str.ptr[0] == '-') {
+        sign = -1;
+        str.ptr++;
+        str.length--;
+    } else if (str.ptr[0] == '+') {
+        str.ptr++;
+        str.length--;
+    }
+
+    // there must be at least one char to parse
+    if (str.length == 0) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    // parse all digits until we find the decsep
+    size_t pos = 0;
+    do {
+        if (isdigit(str.ptr[pos])) {
+            result = result * 10 + (str.ptr[pos] - '0');
+        } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
+            break;
+        }
+    } while (++pos < str.length);
+
+    // already done?
+    if (pos == str.length) {
+        *output = result * sign;
+        return 0;
+    }
+
+    // is the next char the decsep?
+    if (str.ptr[pos] == decsep) {
+        pos++;
+        // it may end with the decsep, if it did not start with it
+        if (pos == str.length) {
+            if (str.length == 1) {
+                errno = EINVAL;
+                return -1;
+            } else {
+                *output = result * sign;
+                return 0;
+            }
+        }
+        // parse everything until exponent or end
+        double factor = 1.;
+        do {
+            if (isdigit(str.ptr[pos])) {
+                factor *= 0.1;
+                result = result + factor * (str.ptr[pos] - '0');
+            } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
+                break;
+            }
+        } while (++pos < str.length);
+    }
+
+    // no exponent?
+    if (pos == str.length) {
+        *output = result * sign;
+        return 0;
+    }
+
+    // now the next separator MUST be the exponent separator
+    // and at least one char must follow
+    if ((str.ptr[pos] | 32) != 'e' || str.length <= pos + 1) {
+        errno = EINVAL;
+        return -1;
+    }
+    pos++;
+
+    // check if we have a sign for the exponent
+    double factor = 10.;
+    if (str.ptr[pos] == '-') {
+        factor = .1;
+        pos++;
+    } else if (str.ptr[pos] == '+') {
+        pos++;
+    }
+
+    // at least one digit must follow
+    if (pos == str.length) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    // parse the exponent
+    unsigned int exp = 0;
+    do {
+        if (isdigit(str.ptr[pos])) {
+            exp = 10 * exp + (str.ptr[pos] - '0');
+        } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
+            errno = EINVAL;
+            return -1;
+        }
+    } while (++pos < str.length);
+
+    // apply the exponent by fast exponentiation
+    do {
+        if (exp & 1) {
+            result *= factor;
+        }
+        factor *= factor;
+    } while ((exp >>= 1) > 0);
+
+    // store the result and exit
+    *output = result * sign;
+    return 0;
 }
\ No newline at end of file
diff -r 8baed9b38bc6 -r e453e717876e tests/Makefile
--- a/tests/Makefile	Sat Dec 28 17:31:28 2024 +0100
+++ b/tests/Makefile	Sat Dec 28 17:32:36 2024 +0100
@@ -92,7 +92,7 @@
  ../src/cx/json.h ../src/cx/allocator.h ../src/cx/string.h \
  ../src/cx/buffer.h ../src/cx/array_list.h ../src/cx/list.h \
  ../src/cx/collection.h ../src/cx/iterator.h ../src/cx/compare.h \
- ../src/cx/mempool.h
+ ../src/cx/compare.h
 	@echo "Compiling $<"
 	$(CC) -o $@ $(CFLAGS) -I../src -c $<
 
@@ -134,7 +134,7 @@
 
 $(TEST_DIR)/test_string$(OBJ_EXT): test_string.c ../src/cx/test.h \
  ../src/cx/common.h util_allocator.h ../src/cx/allocator.h \
- ../src/cx/string.h ../src/cx/allocator.h
+ ../src/cx/string.h ../src/cx/allocator.h ../src/cx/compare.h
 	@echo "Compiling $<"
 	$(CC) -o $@ $(CFLAGS) -I../src -c $<
 
diff -r 8baed9b38bc6 -r e453e717876e tests/test_json.c
--- a/tests/test_json.c	Sat Dec 28 17:31:28 2024 +0100
+++ b/tests/test_json.c	Sat Dec 28 17:32:36 2024 +0100
@@ -30,7 +30,7 @@
 #include "cx/test.h"
 
 #include "cx/json.h"
-#include "cx/mempool.h"
+#include "cx/compare.h"
 
 CX_TEST(test_json_init_default) {
     CxJson json;
@@ -86,12 +86,12 @@
         CxJsonValue *longitude = cxJsonObjGet(position, "longitude");
         CX_TEST_ASSERT(cxJsonIsNumber(longitude));
         CX_TEST_ASSERT(!cxJsonIsInteger(longitude));
-        CX_TEST_ASSERT(cxJsonAsDouble(longitude) == -94.7099);
+        CX_TEST_ASSERT(0 == cx_vcmp_double(cxJsonAsDouble(longitude), -94.7099));
         CX_TEST_ASSERT(cxJsonAsInteger(longitude) == -94);
         CxJsonValue *latitude = cxJsonObjGet(position, "latitude");
         CX_TEST_ASSERT(cxJsonIsNumber(latitude));
         CX_TEST_ASSERT(!cxJsonIsInteger(latitude));
-        CX_TEST_ASSERT(cxJsonAsDouble(latitude) == 51.5539);
+        CX_TEST_ASSERT(0 == cx_vcmp_double(cxJsonAsDouble(latitude), 51.5539));
         CX_TEST_ASSERT(cxJsonAsInteger(latitude) == 51);
 
         CxJsonValue *timestamp = cxJsonObjGet(obj, "timestamp");
@@ -352,21 +352,21 @@
         result = cxJsonNext(&json, &v);
         CX_TEST_ASSERT(result == CX_JSON_NO_ERROR);
         CX_TEST_ASSERT(cxJsonIsNumber(v));
-        CX_TEST_ASSERT(cxJsonAsDouble(v) == 3.1415);
+        CX_TEST_ASSERT(0 == cx_vcmp_double(cxJsonAsDouble(v), 3.1415));
         cxJsonValueFree(v);
 
         cxJsonFill(&json, "-47.11e2 ");
         result = cxJsonNext(&json, &v);
         CX_TEST_ASSERT(result == CX_JSON_NO_ERROR);
         CX_TEST_ASSERT(cxJsonIsNumber(v));
-        CX_TEST_ASSERT(cxJsonAsDouble(v) == -4711.0);
+        CX_TEST_ASSERT(0 == cx_vcmp_double(cxJsonAsDouble(v), -4711.0));
         cxJsonValueFree(v);
 
         cxJsonFill(&json, "0.815e-3 ");
         result = cxJsonNext(&json, &v);
         CX_TEST_ASSERT(result == CX_JSON_NO_ERROR);
         CX_TEST_ASSERT(cxJsonIsNumber(v));
-        CX_TEST_ASSERT(cxJsonAsDouble(v) == 0.000815);
+        CX_TEST_ASSERT(0 == cx_vcmp_double(cxJsonAsDouble(v), 0.000815));
         cxJsonValueFree(v);
 
         cxJsonFill(&json, "1.23E4 ");
@@ -382,7 +382,8 @@
         CX_TEST_ASSERT(result == CX_JSON_NO_ERROR);
         CX_TEST_ASSERT(cxJsonIsNumber(v));
         // be as precise as possible
-        CX_TEST_ASSERT(cxJsonAsDouble(v) == 1.8446744073709552e+19);
+        // TODO: this might produce format error / out of range in future implementations
+        CX_TEST_ASSERT(0 == cx_vcmp_double(cxJsonAsDouble(v), 1.8446744073709552e+19));
         cxJsonValueFree(v);
     }
     cxJsonDestroy(&json);
diff -r 8baed9b38bc6 -r e453e717876e tests/test_string.c
--- a/tests/test_string.c	Sat Dec 28 17:31:28 2024 +0100
+++ b/tests/test_string.c	Sat Dec 28 17:32:36 2024 +0100
@@ -30,6 +30,7 @@
 #include "util_allocator.h"
 
 #include "cx/string.h"
+#include "cx/compare.h"
 
 #include <limits.h>
 #include <errno.h>
@@ -1167,16 +1168,47 @@
     float f;
     CX_TEST_DO {
         CX_TEST_ASSERT(0 == cx_strtof(cx_str("11.3"), &f));
-        CX_TEST_ASSERT(11.3f == f);
+        CX_TEST_ASSERT(0 == cx_vcmp_float(11.3f, f));
+
+        CX_TEST_ASSERT(0 == cx_strtof(cx_str("-4.711e+1"), &f));
+        CX_TEST_ASSERT(0 == cx_vcmp_float(-47.11f, f));
 
         CX_TEST_ASSERT(0 == cx_strtof(cx_str("1.67262192595e-27"), &f));
-        CX_TEST_ASSERT(1.67262192595e-27f == f);
+        CX_TEST_ASSERT(0 == cx_vcmp_float(1.67262192595e-27f, f));
 
         CX_TEST_ASSERT(0 == cx_strtof_lc(cx_str("138,339.4"), &f, '.', ","));
-        CX_TEST_ASSERT(138339.4f == f);
+        CX_TEST_ASSERT(0 == cx_vcmp_float(138339.4f, f));
 
         CX_TEST_ASSERT(0 == cx_strtof_lc(cx_str("138,339.4"), &f, ',', "."));
-        CX_TEST_ASSERT(138.3394f == f);
+        CX_TEST_ASSERT(0 == cx_vcmp_float(138.3394f, f));
+
+        errno = 0;
+        CX_TEST_ASSERT(0 != cx_strtof(cx_str("15e"), &f));
+        CX_TEST_ASSERT(errno == EINVAL);
+        errno = 0;
+        CX_TEST_ASSERT(0 != cx_strtof(cx_str("15e+"), &f));
+        CX_TEST_ASSERT(errno == EINVAL);
+        errno = 0;
+        CX_TEST_ASSERT(0 != cx_strtof(cx_str("15e-"), &f));
+        CX_TEST_ASSERT(errno == EINVAL);
+        CX_TEST_ASSERT(0 == cx_strtof(cx_str("15e-0"), &f));
+        CX_TEST_ASSERT(0 == cx_vcmp_float(15.f, f));
+
+        CX_TEST_ASSERT(0 == cx_strtof(cx_str("3e38"), &f));
+        CX_TEST_ASSERT(0 == cx_vcmp_float(3e38f, f));
+        errno = 0;
+        CX_TEST_ASSERT(0 != cx_strtof(cx_str("3e39"), &f));
+        CX_TEST_ASSERT(errno == ERANGE);
+        CX_TEST_ASSERT(0 == cx_strtof(cx_str("-3e38"), &f));
+        CX_TEST_ASSERT(0 == cx_vcmp_float(-3e38f, f));
+        errno = 0;
+        CX_TEST_ASSERT(0 != cx_strtof(cx_str("-3e39"), &f));
+        CX_TEST_ASSERT(errno == ERANGE);
+        CX_TEST_ASSERT(0 == cx_strtof(cx_str("1.18e-38"), &f));
+        CX_TEST_ASSERT(0 == cx_vcmp_float(1.18e-38f, f));
+        errno = 0;
+        CX_TEST_ASSERT(0 != cx_strtof(cx_str("1.17e-38"), &f));
+        CX_TEST_ASSERT(errno == ERANGE);
     }
 }
 
@@ -1184,24 +1216,24 @@
     double d;
     CX_TEST_DO {
         CX_TEST_ASSERT(0 == cx_strtod(cx_str("11.3"), &d));
-        CX_TEST_ASSERT(11.3 == d);
+        CX_TEST_ASSERT(0 == cx_vcmp_double(11.3, d));
+
+        CX_TEST_ASSERT(0 == cx_strtod(cx_str("-13.37"), &d));
+        CX_TEST_ASSERT(0 == cx_vcmp_double(-13.37, d));
+
+        CX_TEST_ASSERT(0 == cx_strtod(cx_str("-4.711e+1"), &d));
+        CX_TEST_ASSERT(0 == cx_vcmp_double(-47.11, d));
 
         CX_TEST_ASSERT(0 == cx_strtod(cx_str("1.67262192595e-27"), &d));
-        CX_TEST_ASSERT(1.67262192595e-27 == d);
+        CX_TEST_ASSERT(0 == cx_vcmp_double(1.67262192595e-27, d));
 
         CX_TEST_ASSERT(0 == cx_strtod_lc(cx_str("138,339.4"), &d, '.', ","));
-        CX_TEST_ASSERT(138339.4 == d);
+        CX_TEST_ASSERT(0 == cx_vcmp_double(138339.4, d));
 
         CX_TEST_ASSERT(0 == cx_strtod_lc(cx_str("138,339.4"), &d, ',', "."));
-        CX_TEST_ASSERT(138.3394 == d);
-    }
-}
+        CX_TEST_ASSERT(0 == cx_vcmp_double(138.3394, d));
 
-CX_TEST(test_string_to_float_german) {
-    float f;
-    CX_TEST_DO {
-        // TODO: implement
-        (void)f;
+        // TODO: test and improve support for big numbers, precision, and out-of-range detection
     }
 }
 
@@ -1248,7 +1280,6 @@
     cx_test_register(suite, test_string_to_unsigned_integer);
     cx_test_register(suite, test_string_to_float);
     cx_test_register(suite, test_string_to_double);
-    cx_test_register(suite, test_string_to_float_german);
 
     return suite;
 }