Sat, 28 Dec 2024 17:32:36 +0100
implement floating point string to number conversions
The current implementation is not extremely precise. That is why
we are only testing against our own compare functions.
A future revision should increase the precision.
fixes #532
src/string.c | file | annotate | diff | comparison | revisions | |
tests/Makefile | file | annotate | diff | comparison | revisions | |
tests/test_json.c | file | annotate | diff | comparison | revisions | |
tests/test_string.c | file | annotate | diff | comparison | revisions |
--- a/src/string.c Sat Dec 28 17:31:28 2024 +0100 +++ b/src/string.c Sat Dec 28 17:32:36 2024 +0100 @@ -34,6 +34,7 @@ #include <assert.h> #include <errno.h> #include <limits.h> +#include <float.h> #ifndef _WIN32 @@ -961,18 +962,18 @@ // if base is 2 or 16, some leading stuff may appear if (base == 2) { - if (str.ptr[0] == 'b' || str.ptr[0] == 'B') { + if ((str.ptr[0] | 32) == 'b') { start = 1; } else if (str.ptr[0] == '0' && str.length > 1) { - if (str.ptr[1] == 'b' || str.ptr[1] == 'B') { + if ((str.ptr[1] | 32) == 'b') { start = 2; } } } else if (base == 16) { - if (str.ptr[0] == 'x' || str.ptr[0] == 'X' || str.ptr[0] == '#') { + if ((str.ptr[0] | 32) == 'x' || str.ptr[0] == '#') { start = 1; } else if (str.ptr[0] == '0' && str.length > 1) { - if (str.ptr[1] == 'x' || str.ptr[1] == 'X') { + if ((str.ptr[1] | 32) == 'x') { start = 2; } } @@ -1043,29 +1044,140 @@ } int cx_strtof_lc(cxstring str, float *output, char decsep, const char *groupsep) { - // TODO: replace temporary implementation - (void) groupsep; // unused in temp impl - (void) decsep; // unused in temp impl - char *s = malloc(str.length + 1); - memcpy(s, str.ptr, str.length); - s[str.length] = '\0'; - char *e; - *output = strtof(s, &e); - int r = !(e && *e == '\0'); - free(s); - return r; + // use string to double and add a range check + double d; + int ret = cx_strtod_lc(str, &d, decsep, groupsep); + if (ret != 0) return ret; + // note: FLT_MIN is the smallest POSITIVE number that can be represented + double test = d < 0 ? -d : d; + if (test < FLT_MIN || test > FLT_MAX) { + errno = ERANGE; + return -1; + } + *output = (float) d; + return 0; } int cx_strtod_lc(cxstring str, double *output, char decsep, const char *groupsep) { - // TODO: replace temporary implementation - (void) groupsep; // unused in temp impl - (void) decsep; // unused in temp impl - char *s = malloc(str.length + 1); - memcpy(s, str.ptr, str.length); - s[str.length] = '\0'; - char *e; - *output = strtod(s, &e); - int r = !(e && *e == '\0'); - free(s); - return r; + // TODO: overflow check + // TODO: increase precision + + // trim and check + str = cx_strtrim(str); + if (str.length == 0) { + errno = EINVAL; + return -1; + } + + double result = 0.; + int sign = 1; + + // check if there is a sign + if (str.ptr[0] == '-') { + sign = -1; + str.ptr++; + str.length--; + } else if (str.ptr[0] == '+') { + str.ptr++; + str.length--; + } + + // there must be at least one char to parse + if (str.length == 0) { + errno = EINVAL; + return -1; + } + + // parse all digits until we find the decsep + size_t pos = 0; + do { + if (isdigit(str.ptr[pos])) { + result = result * 10 + (str.ptr[pos] - '0'); + } else if (strchr(groupsep, str.ptr[pos]) == NULL) { + break; + } + } while (++pos < str.length); + + // already done? + if (pos == str.length) { + *output = result * sign; + return 0; + } + + // is the next char the decsep? + if (str.ptr[pos] == decsep) { + pos++; + // it may end with the decsep, if it did not start with it + if (pos == str.length) { + if (str.length == 1) { + errno = EINVAL; + return -1; + } else { + *output = result * sign; + return 0; + } + } + // parse everything until exponent or end + double factor = 1.; + do { + if (isdigit(str.ptr[pos])) { + factor *= 0.1; + result = result + factor * (str.ptr[pos] - '0'); + } else if (strchr(groupsep, str.ptr[pos]) == NULL) { + break; + } + } while (++pos < str.length); + } + + // no exponent? + if (pos == str.length) { + *output = result * sign; + return 0; + } + + // now the next separator MUST be the exponent separator + // and at least one char must follow + if ((str.ptr[pos] | 32) != 'e' || str.length <= pos + 1) { + errno = EINVAL; + return -1; + } + pos++; + + // check if we have a sign for the exponent + double factor = 10.; + if (str.ptr[pos] == '-') { + factor = .1; + pos++; + } else if (str.ptr[pos] == '+') { + pos++; + } + + // at least one digit must follow + if (pos == str.length) { + errno = EINVAL; + return -1; + } + + // parse the exponent + unsigned int exp = 0; + do { + if (isdigit(str.ptr[pos])) { + exp = 10 * exp + (str.ptr[pos] - '0'); + } else if (strchr(groupsep, str.ptr[pos]) == NULL) { + errno = EINVAL; + return -1; + } + } while (++pos < str.length); + + // apply the exponent by fast exponentiation + do { + if (exp & 1) { + result *= factor; + } + factor *= factor; + } while ((exp >>= 1) > 0); + + // store the result and exit + *output = result * sign; + return 0; } \ No newline at end of file
--- a/tests/Makefile Sat Dec 28 17:31:28 2024 +0100 +++ b/tests/Makefile Sat Dec 28 17:32:36 2024 +0100 @@ -92,7 +92,7 @@ ../src/cx/json.h ../src/cx/allocator.h ../src/cx/string.h \ ../src/cx/buffer.h ../src/cx/array_list.h ../src/cx/list.h \ ../src/cx/collection.h ../src/cx/iterator.h ../src/cx/compare.h \ - ../src/cx/mempool.h + ../src/cx/compare.h @echo "Compiling $<" $(CC) -o $@ $(CFLAGS) -I../src -c $< @@ -134,7 +134,7 @@ $(TEST_DIR)/test_string$(OBJ_EXT): test_string.c ../src/cx/test.h \ ../src/cx/common.h util_allocator.h ../src/cx/allocator.h \ - ../src/cx/string.h ../src/cx/allocator.h + ../src/cx/string.h ../src/cx/allocator.h ../src/cx/compare.h @echo "Compiling $<" $(CC) -o $@ $(CFLAGS) -I../src -c $<
--- a/tests/test_json.c Sat Dec 28 17:31:28 2024 +0100 +++ b/tests/test_json.c Sat Dec 28 17:32:36 2024 +0100 @@ -30,7 +30,7 @@ #include "cx/test.h" #include "cx/json.h" -#include "cx/mempool.h" +#include "cx/compare.h" CX_TEST(test_json_init_default) { CxJson json; @@ -86,12 +86,12 @@ CxJsonValue *longitude = cxJsonObjGet(position, "longitude"); CX_TEST_ASSERT(cxJsonIsNumber(longitude)); CX_TEST_ASSERT(!cxJsonIsInteger(longitude)); - CX_TEST_ASSERT(cxJsonAsDouble(longitude) == -94.7099); + CX_TEST_ASSERT(0 == cx_vcmp_double(cxJsonAsDouble(longitude), -94.7099)); CX_TEST_ASSERT(cxJsonAsInteger(longitude) == -94); CxJsonValue *latitude = cxJsonObjGet(position, "latitude"); CX_TEST_ASSERT(cxJsonIsNumber(latitude)); CX_TEST_ASSERT(!cxJsonIsInteger(latitude)); - CX_TEST_ASSERT(cxJsonAsDouble(latitude) == 51.5539); + CX_TEST_ASSERT(0 == cx_vcmp_double(cxJsonAsDouble(latitude), 51.5539)); CX_TEST_ASSERT(cxJsonAsInteger(latitude) == 51); CxJsonValue *timestamp = cxJsonObjGet(obj, "timestamp"); @@ -352,21 +352,21 @@ result = cxJsonNext(&json, &v); CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); CX_TEST_ASSERT(cxJsonIsNumber(v)); - CX_TEST_ASSERT(cxJsonAsDouble(v) == 3.1415); + CX_TEST_ASSERT(0 == cx_vcmp_double(cxJsonAsDouble(v), 3.1415)); cxJsonValueFree(v); cxJsonFill(&json, "-47.11e2 "); result = cxJsonNext(&json, &v); CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); CX_TEST_ASSERT(cxJsonIsNumber(v)); - CX_TEST_ASSERT(cxJsonAsDouble(v) == -4711.0); + CX_TEST_ASSERT(0 == cx_vcmp_double(cxJsonAsDouble(v), -4711.0)); cxJsonValueFree(v); cxJsonFill(&json, "0.815e-3 "); result = cxJsonNext(&json, &v); CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); CX_TEST_ASSERT(cxJsonIsNumber(v)); - CX_TEST_ASSERT(cxJsonAsDouble(v) == 0.000815); + CX_TEST_ASSERT(0 == cx_vcmp_double(cxJsonAsDouble(v), 0.000815)); cxJsonValueFree(v); cxJsonFill(&json, "1.23E4 "); @@ -382,7 +382,8 @@ CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); CX_TEST_ASSERT(cxJsonIsNumber(v)); // be as precise as possible - CX_TEST_ASSERT(cxJsonAsDouble(v) == 1.8446744073709552e+19); + // TODO: this might produce format error / out of range in future implementations + CX_TEST_ASSERT(0 == cx_vcmp_double(cxJsonAsDouble(v), 1.8446744073709552e+19)); cxJsonValueFree(v); } cxJsonDestroy(&json);
--- a/tests/test_string.c Sat Dec 28 17:31:28 2024 +0100 +++ b/tests/test_string.c Sat Dec 28 17:32:36 2024 +0100 @@ -30,6 +30,7 @@ #include "util_allocator.h" #include "cx/string.h" +#include "cx/compare.h" #include <limits.h> #include <errno.h> @@ -1167,16 +1168,47 @@ float f; CX_TEST_DO { CX_TEST_ASSERT(0 == cx_strtof(cx_str("11.3"), &f)); - CX_TEST_ASSERT(11.3f == f); + CX_TEST_ASSERT(0 == cx_vcmp_float(11.3f, f)); + + CX_TEST_ASSERT(0 == cx_strtof(cx_str("-4.711e+1"), &f)); + CX_TEST_ASSERT(0 == cx_vcmp_float(-47.11f, f)); CX_TEST_ASSERT(0 == cx_strtof(cx_str("1.67262192595e-27"), &f)); - CX_TEST_ASSERT(1.67262192595e-27f == f); + CX_TEST_ASSERT(0 == cx_vcmp_float(1.67262192595e-27f, f)); CX_TEST_ASSERT(0 == cx_strtof_lc(cx_str("138,339.4"), &f, '.', ",")); - CX_TEST_ASSERT(138339.4f == f); + CX_TEST_ASSERT(0 == cx_vcmp_float(138339.4f, f)); CX_TEST_ASSERT(0 == cx_strtof_lc(cx_str("138,339.4"), &f, ',', ".")); - CX_TEST_ASSERT(138.3394f == f); + CX_TEST_ASSERT(0 == cx_vcmp_float(138.3394f, f)); + + errno = 0; + CX_TEST_ASSERT(0 != cx_strtof(cx_str("15e"), &f)); + CX_TEST_ASSERT(errno == EINVAL); + errno = 0; + CX_TEST_ASSERT(0 != cx_strtof(cx_str("15e+"), &f)); + CX_TEST_ASSERT(errno == EINVAL); + errno = 0; + CX_TEST_ASSERT(0 != cx_strtof(cx_str("15e-"), &f)); + CX_TEST_ASSERT(errno == EINVAL); + CX_TEST_ASSERT(0 == cx_strtof(cx_str("15e-0"), &f)); + CX_TEST_ASSERT(0 == cx_vcmp_float(15.f, f)); + + CX_TEST_ASSERT(0 == cx_strtof(cx_str("3e38"), &f)); + CX_TEST_ASSERT(0 == cx_vcmp_float(3e38f, f)); + errno = 0; + CX_TEST_ASSERT(0 != cx_strtof(cx_str("3e39"), &f)); + CX_TEST_ASSERT(errno == ERANGE); + CX_TEST_ASSERT(0 == cx_strtof(cx_str("-3e38"), &f)); + CX_TEST_ASSERT(0 == cx_vcmp_float(-3e38f, f)); + errno = 0; + CX_TEST_ASSERT(0 != cx_strtof(cx_str("-3e39"), &f)); + CX_TEST_ASSERT(errno == ERANGE); + CX_TEST_ASSERT(0 == cx_strtof(cx_str("1.18e-38"), &f)); + CX_TEST_ASSERT(0 == cx_vcmp_float(1.18e-38f, f)); + errno = 0; + CX_TEST_ASSERT(0 != cx_strtof(cx_str("1.17e-38"), &f)); + CX_TEST_ASSERT(errno == ERANGE); } } @@ -1184,24 +1216,24 @@ double d; CX_TEST_DO { CX_TEST_ASSERT(0 == cx_strtod(cx_str("11.3"), &d)); - CX_TEST_ASSERT(11.3 == d); + CX_TEST_ASSERT(0 == cx_vcmp_double(11.3, d)); + + CX_TEST_ASSERT(0 == cx_strtod(cx_str("-13.37"), &d)); + CX_TEST_ASSERT(0 == cx_vcmp_double(-13.37, d)); + + CX_TEST_ASSERT(0 == cx_strtod(cx_str("-4.711e+1"), &d)); + CX_TEST_ASSERT(0 == cx_vcmp_double(-47.11, d)); CX_TEST_ASSERT(0 == cx_strtod(cx_str("1.67262192595e-27"), &d)); - CX_TEST_ASSERT(1.67262192595e-27 == d); + CX_TEST_ASSERT(0 == cx_vcmp_double(1.67262192595e-27, d)); CX_TEST_ASSERT(0 == cx_strtod_lc(cx_str("138,339.4"), &d, '.', ",")); - CX_TEST_ASSERT(138339.4 == d); + CX_TEST_ASSERT(0 == cx_vcmp_double(138339.4, d)); CX_TEST_ASSERT(0 == cx_strtod_lc(cx_str("138,339.4"), &d, ',', ".")); - CX_TEST_ASSERT(138.3394 == d); - } -} + CX_TEST_ASSERT(0 == cx_vcmp_double(138.3394, d)); -CX_TEST(test_string_to_float_german) { - float f; - CX_TEST_DO { - // TODO: implement - (void)f; + // TODO: test and improve support for big numbers, precision, and out-of-range detection } } @@ -1248,7 +1280,6 @@ cx_test_register(suite, test_string_to_unsigned_integer); cx_test_register(suite, test_string_to_float); cx_test_register(suite, test_string_to_double); - cx_test_register(suite, test_string_to_float_german); return suite; }