avoid copying the filled data in the json parser when possible - fixes #530

Fri, 27 Dec 2024 13:01:31 +0100

author
Mike Becker <universe@uap-core.de>
date
Fri, 27 Dec 2024 13:01:31 +0100
changeset 1060
0a7c1bb2372d
parent 1059
154eb64ce746
child 1061
c7d23892eab5

avoid copying the filled data in the json parser when possible - fixes #530

src/cx/json.h file | annotate | diff | comparison | revisions
src/json.c file | annotate | diff | comparison | revisions
tests/test_json.c file | annotate | diff | comparison | revisions
--- a/src/cx/json.h	Fri Dec 27 12:23:14 2024 +0100
+++ b/src/cx/json.h	Fri Dec 27 13:01:31 2024 +0100
@@ -398,6 +398,10 @@
      */
     CX_JSON_OK,
     /**
+     * The input buffer has never been filled.
+     */
+    CX_JSON_NULL_DATA,
+    /**
      * Allocating memory for the internal buffer failed.
      */
     CX_JSON_BUFFER_ALLOC_FAILED,
@@ -455,9 +459,15 @@
 }
 
 /**
- * Adds more data to the input buffer.
+ * Fills the input buffer.
  *
- * The data will be copied.
+ * @remark The JSON interface tries to avoid copying the input data.
+ * When you use this function and cxJsonNext() interleaving,
+ * no copies are performed. However, you must not free the
+ * pointer to the data in that case. When you invoke the fill
+ * function more than once before calling cxJsonNext(),
+ * the additional data is appended - inevitably leading to
+ * an allocation of a new buffer and copying the previous contents.
  *
  * @param json the json interface
  * @param buf the source buffer
@@ -500,12 +510,18 @@
 extern "C" {
 #else // __cplusplus
 /**
- * Adds more data to the input buffer.
+ * Fills the input buffer.
  *
- * The data will be copied.
+ * @remark The JSON interface tries to avoid copying the input data.
+ * When you use this function and cxJsonNext() interleaving,
+ * no copies are performed. However, you must not free the
+ * pointer to the data in that case. When you invoke the fill
+ * function more than once before calling cxJsonNext(),
+ * the additional data is appended - inevitably leading to
+ * an allocation of a new buffer and copying the previous contents.
  *
  * @param json the json interface
- * @param str the string to add to the buffer
+ * @param buf the source string
  * @return zero on success, non-zero on internal allocation error
  * @see cxJsonFilln()
  */
--- a/src/json.c	Fri Dec 27 12:23:14 2024 +0100
+++ b/src/json.c	Fri Dec 27 13:01:31 2024 +0100
@@ -289,35 +289,6 @@
     return result;
 }
 
-static int parse_number(cxmutstr str, void *value, bool asint) {
-    char *endptr = NULL;
-    if (str.length > 30) {
-        return 1;
-    }
-    // the buffer guarantees that we are working on a copied string
-    char c = str.ptr[str.length];
-    str.ptr[str.length] = 0;
-
-    if (asint) {
-        errno = 0;
-        long long v = strtoll(str.ptr, &endptr, 10);
-        if (errno == ERANGE) {
-            return 1;
-        }
-        *((int64_t*)value) = (int64_t) v;
-    } else {
-        // TODO: proper JSON spec number parser
-        // TODO: also return an error when loss of precision is high
-        double v = strtod(str.ptr, &endptr);
-        *((double*)value) = v;
-    }
-
-    // recover from the hack
-    str.ptr[str.length] = c;
-
-    return endptr != &str.ptr[str.length];
-}
-
 static CxJsonValue* create_json_value(CxJson *json, CxJsonValueType type) {
     CxJsonValue *v = cxMalloc(json->allocator, sizeof(CxJsonValue));
     if (v == NULL) {
@@ -400,8 +371,6 @@
 
     json->vbuf = json->vbuf_internal;
     json->vbuf_capacity = cx_nmemb(json->vbuf_internal);
-
-    cxBufferInit(&json->buffer, NULL, 256, NULL, CX_BUFFER_AUTO_EXTEND);
 }
 
 void cxJsonDestroy(CxJson *json) {
@@ -417,16 +386,16 @@
 }
 
 int cxJsonFilln(CxJson *json, const char *buf, size_t size) {
-    // we use the UCX buffer to write the data
-    // but reset the position immediately to enable parsing
-    size_t old_pos = json->buffer.pos;
-    cxBufferSeek(&json->buffer, 0, SEEK_END);
-    size_t written = cxBufferWrite(buf, 1, size, &json->buffer);
-    if (0 == cxBufferTerminate(&json->buffer)) {
-        written++;
+    if (cxBufferEof(&json->buffer)) {
+        // reinitialize the buffer
+        cxBufferDestroy(&json->buffer);
+        cxBufferInit(&json->buffer, (char*) buf, size,
+            NULL, CX_BUFFER_AUTO_EXTEND | CX_BUFFER_COPY_ON_WRITE);
+        json->buffer.size = size;
+        return 0;
+    } else {
+        return size != cxBufferAppend(buf, 1, size, &json->buffer);
     }
-    json->buffer.pos = old_pos;
-    return written != size + 1;
 }
 
 static void json_add_state(CxJson *json, int state) {
@@ -508,8 +477,14 @@
                 if (NULL == (vbuf = create_json_value(json, type))) {
                     return_rec(CX_JSON_VALUE_ALLOC_FAILED);
                 }
-                if (parse_number(token.content, &vbuf->value,type == CX_JSON_INTEGER)) {
-                    return_rec(CX_JSON_FORMAT_ERROR_NUMBER);
+                if (type == CX_JSON_INTEGER) {
+                    if (cx_strtoi64(token.content, &vbuf->value.integer, 10)) {
+                        return_rec(CX_JSON_FORMAT_ERROR_NUMBER);
+                    }
+                } else {
+                    if (cx_strtod(token.content, &vbuf->value.number)) {
+                        return_rec(CX_JSON_FORMAT_ERROR_NUMBER);
+                    }
                 }
                 return_rec(CX_JSON_NO_ERROR);
             }
@@ -600,6 +575,11 @@
 }
 
 CxJsonStatus cxJsonNext(CxJson *json, CxJsonValue **value) {
+    // check if buffer has been filled
+    if (json->buffer.space == NULL) {
+        return CX_JSON_NULL_DATA;
+    }
+
     // initialize output value
     *value = &cx_json_value_nothing;
 
@@ -607,7 +587,6 @@
     CxJsonStatus result;
     do {
         result = json_parse(json);
-        cxBufferShiftLeft(&json->buffer, json->buffer.pos);
         if (result == CX_JSON_NO_ERROR && json->states_size == 1) {
             // final state reached
             assert(json->states[0] == JP_STATE_VALUE_END);
--- a/tests/test_json.c	Fri Dec 27 12:23:14 2024 +0100
+++ b/tests/test_json.c	Fri Dec 27 13:01:31 2024 +0100
@@ -376,6 +376,14 @@
         CX_TEST_ASSERT(cxJsonAsInteger(v) == 12300);
         CX_TEST_ASSERT(cxJsonAsDouble(v) == 12300.0);
         cxJsonValueFree(v);
+
+        cxJsonFill(&json, "18446744073709551615.0123456789 ");
+        result = cxJsonNext(&json, &v);
+        CX_TEST_ASSERT(result == CX_JSON_NO_ERROR);
+        CX_TEST_ASSERT(cxJsonIsNumber(v));
+        // be as precise as possible
+        CX_TEST_ASSERT(cxJsonAsDouble(v) == 1.8446744073709552e+19);
+        cxJsonValueFree(v);
     }
     cxJsonDestroy(&json);
 }
@@ -394,15 +402,6 @@
         CX_TEST_ASSERT(v->type == CX_JSON_NOTHING);
         cxJsonReset(&json);
 
-#if 0 // TODO: discuss if it is intended that this produces -47 as valid value
-        cxJsonFill(&json, "-47,11e2 ");
-        result = cxJsonNext(&json, &v);
-        CX_TEST_ASSERTM(result == CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN,
-                        "decimal separator cannot be a comma");
-        CX_TEST_ASSERT(v->type == CX_JSON_NOTHING);
-        cxJsonReset(&json);
-#endif
-
         cxJsonFill(&json, "0.815e-3.0 ");
         result = cxJsonNext(&json, &v);
         CX_TEST_ASSERTM(result == CX_JSON_FORMAT_ERROR_NUMBER,
@@ -451,13 +450,6 @@
             "30 digit int does not fit into 64-bit int");
         CX_TEST_ASSERT(v->type == CX_JSON_NOTHING);
         cxJsonReset(&json);
-
-        cxJsonFill(&json, "18446744073709551615.0123456789 ");
-        result = cxJsonNext(&json, &v);
-        CX_TEST_ASSERTM(result == CX_JSON_FORMAT_ERROR_NUMBER,
-            "numbers with more than 30 characters are unsupported in general");
-        CX_TEST_ASSERT(v->type == CX_JSON_NOTHING);
-        cxJsonReset(&json);
     }
     cxJsonDestroy(&json);
 }

mercurial