implement floating point string to number conversions

Sat, 28 Dec 2024 17:32:36 +0100

author
Mike Becker <universe@uap-core.de>
date
Sat, 28 Dec 2024 17:32:36 +0100
changeset 1063
e453e717876e
parent 1062
8baed9b38bc6
child 1064
f3b04cd60776

implement floating point string to number conversions

The current implementation is not extremely precise. That is why
we are only testing against our own compare functions.
A future revision should increase the precision.

fixes #532

src/string.c file | annotate | diff | comparison | revisions
tests/Makefile file | annotate | diff | comparison | revisions
tests/test_json.c file | annotate | diff | comparison | revisions
tests/test_string.c file | annotate | diff | comparison | revisions
--- a/src/string.c	Sat Dec 28 17:31:28 2024 +0100
+++ b/src/string.c	Sat Dec 28 17:32:36 2024 +0100
@@ -34,6 +34,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <limits.h>
+#include <float.h>
 
 #ifndef _WIN32
 
@@ -961,18 +962,18 @@
 
     // if base is 2 or 16, some leading stuff may appear
     if (base == 2) {
-        if (str.ptr[0] == 'b' || str.ptr[0] == 'B') {
+        if ((str.ptr[0] | 32) == 'b') {
             start = 1;
         } else if (str.ptr[0] == '0' && str.length > 1) {
-            if (str.ptr[1] == 'b' || str.ptr[1] == 'B') {
+            if ((str.ptr[1] | 32) == 'b') {
                 start = 2;
             }
         }
     } else if (base == 16) {
-        if (str.ptr[0] == 'x' || str.ptr[0] == 'X' || str.ptr[0] == '#') {
+        if ((str.ptr[0] | 32) == 'x' || str.ptr[0] == '#') {
             start = 1;
         } else if (str.ptr[0] == '0' && str.length > 1) {
-            if (str.ptr[1] == 'x' || str.ptr[1] == 'X') {
+            if ((str.ptr[1] | 32) == 'x') {
                 start = 2;
             }
         }
@@ -1043,29 +1044,140 @@
 }
 
 int cx_strtof_lc(cxstring str, float *output, char decsep, const char *groupsep) {
-    // TODO: replace temporary implementation
-    (void) groupsep; // unused in temp impl
-    (void) decsep; // unused in temp impl
-    char *s = malloc(str.length + 1);
-    memcpy(s, str.ptr, str.length);
-    s[str.length] = '\0';
-    char *e;
-    *output = strtof(s, &e);
-    int r = !(e && *e == '\0');
-    free(s);
-    return r;
+    // use string to double and add a range check
+    double d;
+    int ret = cx_strtod_lc(str, &d, decsep, groupsep);
+    if (ret != 0) return ret;
+    // note: FLT_MIN is the smallest POSITIVE number that can be represented
+    double test = d < 0 ? -d : d;
+    if (test < FLT_MIN || test > FLT_MAX) {
+        errno = ERANGE;
+        return -1;
+    }
+    *output = (float) d;
+    return 0;
 }
 
 int cx_strtod_lc(cxstring str, double *output, char decsep, const char *groupsep) {
-    // TODO: replace temporary implementation
-    (void) groupsep; // unused in temp impl
-    (void) decsep; // unused in temp impl
-    char *s = malloc(str.length + 1);
-    memcpy(s, str.ptr, str.length);
-    s[str.length] = '\0';
-    char *e;
-    *output = strtod(s, &e);
-    int r = !(e && *e == '\0');
-    free(s);
-    return r;
+    // TODO: overflow check
+    // TODO: increase precision
+
+    // trim and check
+    str = cx_strtrim(str);
+    if (str.length == 0) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    double result = 0.;
+    int sign = 1;
+
+    // check if there is a sign
+    if (str.ptr[0] == '-') {
+        sign = -1;
+        str.ptr++;
+        str.length--;
+    } else if (str.ptr[0] == '+') {
+        str.ptr++;
+        str.length--;
+    }
+
+    // there must be at least one char to parse
+    if (str.length == 0) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    // parse all digits until we find the decsep
+    size_t pos = 0;
+    do {
+        if (isdigit(str.ptr[pos])) {
+            result = result * 10 + (str.ptr[pos] - '0');
+        } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
+            break;
+        }
+    } while (++pos < str.length);
+
+    // already done?
+    if (pos == str.length) {
+        *output = result * sign;
+        return 0;
+    }
+
+    // is the next char the decsep?
+    if (str.ptr[pos] == decsep) {
+        pos++;
+        // it may end with the decsep, if it did not start with it
+        if (pos == str.length) {
+            if (str.length == 1) {
+                errno = EINVAL;
+                return -1;
+            } else {
+                *output = result * sign;
+                return 0;
+            }
+        }
+        // parse everything until exponent or end
+        double factor = 1.;
+        do {
+            if (isdigit(str.ptr[pos])) {
+                factor *= 0.1;
+                result = result + factor * (str.ptr[pos] - '0');
+            } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
+                break;
+            }
+        } while (++pos < str.length);
+    }
+
+    // no exponent?
+    if (pos == str.length) {
+        *output = result * sign;
+        return 0;
+    }
+
+    // now the next separator MUST be the exponent separator
+    // and at least one char must follow
+    if ((str.ptr[pos] | 32) != 'e' || str.length <= pos + 1) {
+        errno = EINVAL;
+        return -1;
+    }
+    pos++;
+
+    // check if we have a sign for the exponent
+    double factor = 10.;
+    if (str.ptr[pos] == '-') {
+        factor = .1;
+        pos++;
+    } else if (str.ptr[pos] == '+') {
+        pos++;
+    }
+
+    // at least one digit must follow
+    if (pos == str.length) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    // parse the exponent
+    unsigned int exp = 0;
+    do {
+        if (isdigit(str.ptr[pos])) {
+            exp = 10 * exp + (str.ptr[pos] - '0');
+        } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
+            errno = EINVAL;
+            return -1;
+        }
+    } while (++pos < str.length);
+
+    // apply the exponent by fast exponentiation
+    do {
+        if (exp & 1) {
+            result *= factor;
+        }
+        factor *= factor;
+    } while ((exp >>= 1) > 0);
+
+    // store the result and exit
+    *output = result * sign;
+    return 0;
 }
\ No newline at end of file
--- a/tests/Makefile	Sat Dec 28 17:31:28 2024 +0100
+++ b/tests/Makefile	Sat Dec 28 17:32:36 2024 +0100
@@ -92,7 +92,7 @@
  ../src/cx/json.h ../src/cx/allocator.h ../src/cx/string.h \
  ../src/cx/buffer.h ../src/cx/array_list.h ../src/cx/list.h \
  ../src/cx/collection.h ../src/cx/iterator.h ../src/cx/compare.h \
- ../src/cx/mempool.h
+ ../src/cx/compare.h
 	@echo "Compiling $<"
 	$(CC) -o $@ $(CFLAGS) -I../src -c $<
 
@@ -134,7 +134,7 @@
 
 $(TEST_DIR)/test_string$(OBJ_EXT): test_string.c ../src/cx/test.h \
  ../src/cx/common.h util_allocator.h ../src/cx/allocator.h \
- ../src/cx/string.h ../src/cx/allocator.h
+ ../src/cx/string.h ../src/cx/allocator.h ../src/cx/compare.h
 	@echo "Compiling $<"
 	$(CC) -o $@ $(CFLAGS) -I../src -c $<
 
--- a/tests/test_json.c	Sat Dec 28 17:31:28 2024 +0100
+++ b/tests/test_json.c	Sat Dec 28 17:32:36 2024 +0100
@@ -30,7 +30,7 @@
 #include "cx/test.h"
 
 #include "cx/json.h"
-#include "cx/mempool.h"
+#include "cx/compare.h"
 
 CX_TEST(test_json_init_default) {
     CxJson json;
@@ -86,12 +86,12 @@
         CxJsonValue *longitude = cxJsonObjGet(position, "longitude");
         CX_TEST_ASSERT(cxJsonIsNumber(longitude));
         CX_TEST_ASSERT(!cxJsonIsInteger(longitude));
-        CX_TEST_ASSERT(cxJsonAsDouble(longitude) == -94.7099);
+        CX_TEST_ASSERT(0 == cx_vcmp_double(cxJsonAsDouble(longitude), -94.7099));
         CX_TEST_ASSERT(cxJsonAsInteger(longitude) == -94);
         CxJsonValue *latitude = cxJsonObjGet(position, "latitude");
         CX_TEST_ASSERT(cxJsonIsNumber(latitude));
         CX_TEST_ASSERT(!cxJsonIsInteger(latitude));
-        CX_TEST_ASSERT(cxJsonAsDouble(latitude) == 51.5539);
+        CX_TEST_ASSERT(0 == cx_vcmp_double(cxJsonAsDouble(latitude), 51.5539));
         CX_TEST_ASSERT(cxJsonAsInteger(latitude) == 51);
 
         CxJsonValue *timestamp = cxJsonObjGet(obj, "timestamp");
@@ -352,21 +352,21 @@
         result = cxJsonNext(&json, &v);
         CX_TEST_ASSERT(result == CX_JSON_NO_ERROR);
         CX_TEST_ASSERT(cxJsonIsNumber(v));
-        CX_TEST_ASSERT(cxJsonAsDouble(v) == 3.1415);
+        CX_TEST_ASSERT(0 == cx_vcmp_double(cxJsonAsDouble(v), 3.1415));
         cxJsonValueFree(v);
 
         cxJsonFill(&json, "-47.11e2 ");
         result = cxJsonNext(&json, &v);
         CX_TEST_ASSERT(result == CX_JSON_NO_ERROR);
         CX_TEST_ASSERT(cxJsonIsNumber(v));
-        CX_TEST_ASSERT(cxJsonAsDouble(v) == -4711.0);
+        CX_TEST_ASSERT(0 == cx_vcmp_double(cxJsonAsDouble(v), -4711.0));
         cxJsonValueFree(v);
 
         cxJsonFill(&json, "0.815e-3 ");
         result = cxJsonNext(&json, &v);
         CX_TEST_ASSERT(result == CX_JSON_NO_ERROR);
         CX_TEST_ASSERT(cxJsonIsNumber(v));
-        CX_TEST_ASSERT(cxJsonAsDouble(v) == 0.000815);
+        CX_TEST_ASSERT(0 == cx_vcmp_double(cxJsonAsDouble(v), 0.000815));
         cxJsonValueFree(v);
 
         cxJsonFill(&json, "1.23E4 ");
@@ -382,7 +382,8 @@
         CX_TEST_ASSERT(result == CX_JSON_NO_ERROR);
         CX_TEST_ASSERT(cxJsonIsNumber(v));
         // be as precise as possible
-        CX_TEST_ASSERT(cxJsonAsDouble(v) == 1.8446744073709552e+19);
+        // TODO: this might produce format error / out of range in future implementations
+        CX_TEST_ASSERT(0 == cx_vcmp_double(cxJsonAsDouble(v), 1.8446744073709552e+19));
         cxJsonValueFree(v);
     }
     cxJsonDestroy(&json);
--- a/tests/test_string.c	Sat Dec 28 17:31:28 2024 +0100
+++ b/tests/test_string.c	Sat Dec 28 17:32:36 2024 +0100
@@ -30,6 +30,7 @@
 #include "util_allocator.h"
 
 #include "cx/string.h"
+#include "cx/compare.h"
 
 #include <limits.h>
 #include <errno.h>
@@ -1167,16 +1168,47 @@
     float f;
     CX_TEST_DO {
         CX_TEST_ASSERT(0 == cx_strtof(cx_str("11.3"), &f));
-        CX_TEST_ASSERT(11.3f == f);
+        CX_TEST_ASSERT(0 == cx_vcmp_float(11.3f, f));
+
+        CX_TEST_ASSERT(0 == cx_strtof(cx_str("-4.711e+1"), &f));
+        CX_TEST_ASSERT(0 == cx_vcmp_float(-47.11f, f));
 
         CX_TEST_ASSERT(0 == cx_strtof(cx_str("1.67262192595e-27"), &f));
-        CX_TEST_ASSERT(1.67262192595e-27f == f);
+        CX_TEST_ASSERT(0 == cx_vcmp_float(1.67262192595e-27f, f));
 
         CX_TEST_ASSERT(0 == cx_strtof_lc(cx_str("138,339.4"), &f, '.', ","));
-        CX_TEST_ASSERT(138339.4f == f);
+        CX_TEST_ASSERT(0 == cx_vcmp_float(138339.4f, f));
 
         CX_TEST_ASSERT(0 == cx_strtof_lc(cx_str("138,339.4"), &f, ',', "."));
-        CX_TEST_ASSERT(138.3394f == f);
+        CX_TEST_ASSERT(0 == cx_vcmp_float(138.3394f, f));
+
+        errno = 0;
+        CX_TEST_ASSERT(0 != cx_strtof(cx_str("15e"), &f));
+        CX_TEST_ASSERT(errno == EINVAL);
+        errno = 0;
+        CX_TEST_ASSERT(0 != cx_strtof(cx_str("15e+"), &f));
+        CX_TEST_ASSERT(errno == EINVAL);
+        errno = 0;
+        CX_TEST_ASSERT(0 != cx_strtof(cx_str("15e-"), &f));
+        CX_TEST_ASSERT(errno == EINVAL);
+        CX_TEST_ASSERT(0 == cx_strtof(cx_str("15e-0"), &f));
+        CX_TEST_ASSERT(0 == cx_vcmp_float(15.f, f));
+
+        CX_TEST_ASSERT(0 == cx_strtof(cx_str("3e38"), &f));
+        CX_TEST_ASSERT(0 == cx_vcmp_float(3e38f, f));
+        errno = 0;
+        CX_TEST_ASSERT(0 != cx_strtof(cx_str("3e39"), &f));
+        CX_TEST_ASSERT(errno == ERANGE);
+        CX_TEST_ASSERT(0 == cx_strtof(cx_str("-3e38"), &f));
+        CX_TEST_ASSERT(0 == cx_vcmp_float(-3e38f, f));
+        errno = 0;
+        CX_TEST_ASSERT(0 != cx_strtof(cx_str("-3e39"), &f));
+        CX_TEST_ASSERT(errno == ERANGE);
+        CX_TEST_ASSERT(0 == cx_strtof(cx_str("1.18e-38"), &f));
+        CX_TEST_ASSERT(0 == cx_vcmp_float(1.18e-38f, f));
+        errno = 0;
+        CX_TEST_ASSERT(0 != cx_strtof(cx_str("1.17e-38"), &f));
+        CX_TEST_ASSERT(errno == ERANGE);
     }
 }
 
@@ -1184,24 +1216,24 @@
     double d;
     CX_TEST_DO {
         CX_TEST_ASSERT(0 == cx_strtod(cx_str("11.3"), &d));
-        CX_TEST_ASSERT(11.3 == d);
+        CX_TEST_ASSERT(0 == cx_vcmp_double(11.3, d));
+
+        CX_TEST_ASSERT(0 == cx_strtod(cx_str("-13.37"), &d));
+        CX_TEST_ASSERT(0 == cx_vcmp_double(-13.37, d));
+
+        CX_TEST_ASSERT(0 == cx_strtod(cx_str("-4.711e+1"), &d));
+        CX_TEST_ASSERT(0 == cx_vcmp_double(-47.11, d));
 
         CX_TEST_ASSERT(0 == cx_strtod(cx_str("1.67262192595e-27"), &d));
-        CX_TEST_ASSERT(1.67262192595e-27 == d);
+        CX_TEST_ASSERT(0 == cx_vcmp_double(1.67262192595e-27, d));
 
         CX_TEST_ASSERT(0 == cx_strtod_lc(cx_str("138,339.4"), &d, '.', ","));
-        CX_TEST_ASSERT(138339.4 == d);
+        CX_TEST_ASSERT(0 == cx_vcmp_double(138339.4, d));
 
         CX_TEST_ASSERT(0 == cx_strtod_lc(cx_str("138,339.4"), &d, ',', "."));
-        CX_TEST_ASSERT(138.3394 == d);
-    }
-}
+        CX_TEST_ASSERT(0 == cx_vcmp_double(138.3394, d));
 
-CX_TEST(test_string_to_float_german) {
-    float f;
-    CX_TEST_DO {
-        // TODO: implement
-        (void)f;
+        // TODO: test and improve support for big numbers, precision, and out-of-range detection
     }
 }
 
@@ -1248,7 +1280,6 @@
     cx_test_register(suite, test_string_to_unsigned_integer);
     cx_test_register(suite, test_string_to_float);
     cx_test_register(suite, test_string_to_double);
-    cx_test_register(suite, test_string_to_float_german);
 
     return suite;
 }

mercurial