7 weeks ago
simplify how the json parser works
relates to #431 and fixes several errors related to issue #475
src/Makefile | file | annotate | diff | comparison | revisions | |
src/cx/json.h | file | annotate | diff | comparison | revisions | |
src/json.c | file | annotate | diff | comparison | revisions | |
tests/Makefile | file | annotate | diff | comparison | revisions | |
tests/test_json.c | file | annotate | diff | comparison | revisions |
--- a/src/Makefile Thu Dec 05 01:51:47 2024 +0100 +++ b/src/Makefile Thu Dec 05 01:54:12 2024 +0100 @@ -99,8 +99,8 @@ @echo "Compiling $<" $(CC) -o $@ $(CFLAGS) -c $< -$(build_dir)/json$(OBJ_EXT): json.c cx/json.h cx/common.h cx/string.h \ - cx/allocator.h cx/array_list.h cx/list.h cx/collection.h cx/iterator.h \ +$(build_dir)/json$(OBJ_EXT): json.c cx/json.h cx/common.h cx/allocator.h \ + cx/string.h cx/array_list.h cx/list.h cx/collection.h cx/iterator.h \ cx/compare.h @echo "Compiling $<" $(CC) -o $@ $(CFLAGS) -c $<
--- a/src/cx/json.h Thu Dec 05 01:51:47 2024 +0100 +++ b/src/cx/json.h Thu Dec 05 01:54:12 2024 +0100 @@ -77,20 +77,8 @@ CX_JSON_FALSE }; -enum cx_json_reader_type { - CX_JSON_READER_OBJECT_BEGIN, - CX_JSON_READER_OBJECT_END, - CX_JSON_READER_ARRAY_BEGIN, - CX_JSON_READER_ARRAY_END, - CX_JSON_READER_STRING, - CX_JSON_READER_INTEGER, - CX_JSON_READER_NUMBER, - CX_JSON_READER_LITERAL -}; - typedef enum cx_json_token_type CxJsonTokenType; typedef enum cx_json_value_type CxJsonValueType; -typedef enum cx_json_reader_type CxJsonReaderType; typedef struct cx_json_s CxJson; typedef struct cx_json_token_s CxJsonToken; @@ -106,47 +94,6 @@ typedef struct cx_json_obj_value_s CxJsonObjValue; -struct cx_json_token_s { - CxJsonTokenType tokentype; - const char *content; - size_t length; - size_t alloc; -}; - -struct cx_json_s { - const CxAllocator *allocator; - const char *buffer; - size_t size; - size_t pos; - - CxJsonToken uncompleted; - int tokenizer_escape; - - int *states; - size_t nstates; - size_t states_alloc; - int states_internal[8]; - - CxJsonToken reader_token; - CxJsonReaderType reader_type; - int value_ready; - char *value_name; - size_t value_name_len; - char *value_str; - size_t value_str_len; - int64_t value_int; - double value_double; - - CxJsonValue **readvalue_stack; - unsigned readvalue_nelm; - unsigned readvalue_alloc; - CxJsonValue *read_value; - int readvalue_initialized; - - unsigned reader_array_alloc; - - int error; -}; struct cx_json_array_s { CX_ARRAY_DECLARE(CxJsonValue*, array); @@ -174,9 +121,54 @@ } value; }; +struct cx_json_token_s { + CxJsonTokenType tokentype; + const char *content; + size_t length; + size_t alloc; +}; -cx_attr_nonnull_arg(2) -void cxJsonInit(const CxAllocator *allocator, CxJson *json); +struct cx_json_s { + const CxAllocator *allocator; + const char *buffer; + size_t size; + size_t pos; + + CxJsonToken uncompleted; + + /** + * A pointer to an intermediate state of the currently parsed value. + * + * Never access this value manually. + */ + CxJsonValue *parsed; + + /** + * State stack. + */ + CX_ARRAY_DECLARE_SIZED(int, states, unsigned); + + /** + * Value buffer stack. + */ + CX_ARRAY_DECLARE_SIZED(CxJsonValue*, vbuf, unsigned); + + /** + * Internally reserved memory for the state stack. + */ + int states_internal[8]; + + /** + * Internally reserved memory for the value buffer stack. + */ + CxJsonValue* vbuf_internal[8]; + + int error; // TODO: currently unused + bool tokenizer_escape; // TODO: check if it can be replaced with look-behind +}; + +cx_attr_nonnull_arg(1) +void cxJsonInit(CxJson *json, const CxAllocator *allocator); cx_attr_nonnull void cxJsonDestroy(CxJson *json); @@ -250,7 +242,6 @@ void cxJsonValueFree(CxJsonValue *value); -// TODO: if the CxJsonValue was a returned value, we could reference cxJsonValueFree() as deallocator cx_attr_nonnull int cxJsonNext(CxJson *json, CxJsonValue **value);
--- a/src/json.c Thu Dec 05 01:51:47 2024 +0100 +++ b/src/json.c Thu Dec 05 01:54:12 2024 +0100 @@ -28,6 +28,7 @@ #include <string.h> #include <ctype.h> +#include <assert.h> #include "cx/json.h" @@ -40,6 +41,11 @@ static CxJsonValue cx_json_value_nothing = {.type = CX_JSON_NOTHING}; +static void token_destroy(CxJsonToken *token) { + if (token->alloc > 0) { + free((char*) token->content); + } +} static int token_append(CxJsonToken *token, const char *buf, size_t len) { if (len == 0) { @@ -63,22 +69,20 @@ return 0; } -static CxJsonToken get_content(CxJson *p, size_t start, size_t end) { +static CxJsonToken token_create(CxJson *json, size_t start, size_t end) { CxJsonToken token = {0}; - size_t part2 = end - start; - if (p->uncompleted.tokentype == CX_JSON_NO_TOKEN) { - token.content = p->buffer + start; - token.length = part2; - } else if (part2 == 0) { - token = p->uncompleted; + size_t len = end - start; + if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) { + token.content = json->buffer + start; + token.length = len; } else { - if (token_append(&p->uncompleted, p->buffer + start, end - start)) { + if (token_append(&json->uncompleted, json->buffer + start, len)) { // TODO: this does certainly not lead to correct error handling return (CxJsonToken){0}; } - token = p->uncompleted; + token = json->uncompleted; } - p->uncompleted = (CxJsonToken){0}; + json->uncompleted = (CxJsonToken){0}; return token; } @@ -141,17 +145,6 @@ return type; } -static CxJsonToken get_token(CxJson *p, size_t start, size_t end) { - CxJsonToken token = get_content(p, start, end); - if (token_isliteral(token.content, token.length)) { - token.tokentype = CX_JSON_TOKEN_LITERAL; - } else { - token.tokentype = token_numbertype(token.content, token.length); - } - p->pos = end; - return token; -} - static CxJsonTokenType char2ttype(char c) { switch (c) { case '[': { @@ -184,13 +177,13 @@ return CX_JSON_NO_TOKEN; } -static CxJsonToken json_parser_next_token(CxJson *p) { +static CxJsonToken token_parse_next(CxJson *json) { // current token type and start index - CxJsonTokenType ttype = p->uncompleted.tokentype; - size_t token_start = p->pos; + CxJsonTokenType ttype = json->uncompleted.tokentype; + size_t token_start = json->pos; - for (size_t i = p->pos; i < p->size; i++) { - char c = p->buffer[i]; + for (size_t i = json->pos; i < json->size; i++) { + char c = json->buffer[i]; if (ttype != CX_JSON_TOKEN_STRING) { // currently non-string token @@ -205,7 +198,7 @@ token_start = i; } else if (ctype != CX_JSON_NO_TOKEN) { // single-char token - p->pos = i + 1; + json->pos = i + 1; CxJsonToken token = {ctype, NULL, 0, 0}; return token; } else { @@ -215,30 +208,37 @@ } else { // finish token if (ctype != CX_JSON_NO_TOKEN) { - return get_token(p, token_start, i); + CxJsonToken ret = token_create(json, token_start, i); + if (token_isliteral(ret.content, ret.length)) { + ret.tokentype = CX_JSON_TOKEN_LITERAL; + } else { + ret.tokentype = token_numbertype(ret.content, ret.length); + } + json->pos = i; + return ret; } } } else { // currently inside a string - if (!p->tokenizer_escape) { + if (json->tokenizer_escape) { + json->tokenizer_escape = false; + } else { if (c == '"') { - CxJsonToken ret = get_content(p, token_start, i + 1); + CxJsonToken ret = token_create(json, token_start, i + 1); ret.tokentype = CX_JSON_TOKEN_STRING; - p->pos = i + 1; + json->pos = i + 1; return ret; } else if (c == '\\') { - p->tokenizer_escape = 1; + json->tokenizer_escape = true; } - } else { - p->tokenizer_escape = 0; } } } if (ttype != CX_JSON_NO_TOKEN) { // uncompleted token - size_t uncompeted_len = p->size - token_start; - if (p->uncompleted.tokentype == CX_JSON_NO_TOKEN) { + size_t uncompeted_len = json->size - token_start; + if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) { // current token is uncompleted // save current token content in p->uncompleted CxJsonToken uncompleted; @@ -247,17 +247,17 @@ uncompleted.alloc = uncompeted_len + 16; char *tmp = malloc(uncompleted.alloc); if (tmp) { - memcpy(tmp, p->buffer + token_start, uncompeted_len); + memcpy(tmp, json->buffer + token_start, uncompeted_len); uncompleted.content = tmp; - p->uncompleted = uncompleted; + json->uncompleted = uncompleted; } else { - p->error = 1; + json->error = 1; } } else { // previously we also had an uncompleted token // combine the uncompleted token with the current token - if (token_append(&p->uncompleted, p->buffer + token_start, uncompeted_len)) { - p->error = 1; + if (token_append(&json->uncompleted, json->buffer + token_start, uncompeted_len)) { + json->error = 1; } } } @@ -273,7 +273,6 @@ result.length = 0; result.ptr = cxMalloc(a, len - 1); if (result.ptr == NULL) { - // TODO: check if this actually leads to correct error handling return result; } @@ -323,401 +322,321 @@ return (endptr != &buf[len]); } -static int add_state(CxJson *p, int state) { - CxArrayReallocator alloc = cx_array_reallocator(NULL, p->states_internal); - size_t size = p->nstates + 1; - size_t capacity = p->states_alloc; - // TODO: fix that nstates does not denote the size of the array - // TODO: replace with a 16 bit (or maybe even 8 bit) version of cx_array_add() - int result = cx_array_add( - &p->states, - &size, - &capacity, - sizeof(int), - &state, - &alloc - ); - if (result == 0) { - p->nstates = size - 1; - p->states_alloc = capacity; - } - return result; -} - -static void end_elm(CxJson *p, CxJsonReaderType type) { - p->reader_type = type; - p->nstates--; -} - -#define JP_STATE_VALUE_BEGIN 0 -#define JP_STATE_VALUE_BEGIN_OBJ 1 -#define JP_STATE_VALUE_BEGIN_AR 2 -#define JP_STATE_ARRAY_SEP_OR_CLOSE 3 -#define JP_STATE_OBJ_NAME_OR_CLOSE 4 -#define JP_STATE_OBJ_NAME 5 -#define JP_STATE_OBJ_COLON 6 -#define JP_STATE_OBJ_SEP_OR_CLOSE 7 - -static int next_state_after_value(int current) { - switch (current) { - default: - return -1; - // after value JSON complete, expect nothing - case JP_STATE_VALUE_BEGIN: - return -1; - // after obj value, expect ',' or '}' - case JP_STATE_VALUE_BEGIN_OBJ: - return JP_STATE_OBJ_SEP_OR_CLOSE; - // after array value, expect ',' or ']' - case JP_STATE_VALUE_BEGIN_AR: - return JP_STATE_ARRAY_SEP_OR_CLOSE; - } -} - -static void clear_valuename(CxJson *p) { - free(p->value_name); - p->value_name = NULL; - p->value_name_len = 0; -} - -static void clear_values(CxJson *p) { - free(p->value_str); - p->value_str = NULL; - p->value_str_len = 0; - p->value_int = 0; - p->value_double = 0; -} - -static int json_read(CxJson *p) { - int state = p->states[p->nstates]; - clear_values(p); - CxJsonToken token = json_parser_next_token(p); - p->reader_token = token; - - p->value_ready = 0; - - if (token.tokentype == CX_JSON_NO_TOKEN) { - return 0; - } - - int ret = 1; - - // 0 JP_STATE_VALUE_BEGIN value begin - // 1 JP_STATE_VALUE_BEGIN_OBJ value begin (inside object) - // 2 JP_STATE_VALUE_BEGIN_AR value begin (inside array) - // 3 JP_STATE_ARRAY_SEP_OR_CLOSE array, expect separator or arrayclose - // 4 JP_STATE_OBJ_NAME_OR_CLOSE object, expect name or objclose - // 5 JP_STATE_OBJ_NAME object, expect name - // 6 JP_STATE_OBJ_COLON object, expect ':' - // 7 JP_STATE_OBJ_SEP_OR_CLOSE object, expect separator, objclose - - if (state == JP_STATE_VALUE_BEGIN_AR || state == JP_STATE_OBJ_SEP_OR_CLOSE) { - clear_valuename(p); +static CxJsonValue* create_json_value(CxJson *json, CxJsonValueType type) { + CxJsonValue *v = cxMalloc(json->allocator, sizeof(CxJsonValue)); + if (v == NULL) { + return NULL; } - if (state < 3) { - // expect value - p->states[p->nstates] = next_state_after_value(state); - p->value_ready = 1; - switch (token.tokentype) { - case CX_JSON_TOKEN_BEGIN_ARRAY: { - p->reader_type = CX_JSON_READER_ARRAY_BEGIN; - ret = add_state(p, JP_STATE_VALUE_BEGIN_AR) ? -1 : 1; - break; - } - case CX_JSON_TOKEN_BEGIN_OBJECT: { - p->reader_type = CX_JSON_READER_OBJECT_BEGIN; - ret = add_state(p, JP_STATE_OBJ_NAME_OR_CLOSE) ? -1 : 1; - break; - } - case CX_JSON_TOKEN_END_ARRAY: { - p->value_ready = 0; - end_elm(p, CX_JSON_READER_ARRAY_END); - break; - } - case CX_JSON_TOKEN_STRING: { - p->reader_type = CX_JSON_READER_STRING; - cxmutstr str = unescape_string(p->allocator, token.content, token.length); - if (str.ptr) { - p->value_str = str.ptr; - p->value_str_len = str.length; - } else { - ret = -1; - } - break; - } - case CX_JSON_TOKEN_INTEGER: { - p->reader_type = CX_JSON_READER_INTEGER; - if (parse_number(token.content, token.length, - &p->value_int, true)) { - ret = -1; - } - break; - } - case CX_JSON_TOKEN_NUMBER: { - p->reader_type = CX_JSON_READER_NUMBER; - if (parse_number(token.content, token.length, - &p->value_double, false)) { - ret = -1; - } - break; - } - case CX_JSON_TOKEN_LITERAL: { - p->reader_type = CX_JSON_READER_LITERAL; - break; - } - default: ret = -1; + // initialize the value + if (type == CX_JSON_ARRAY) { + cx_array_initialize_a(json->allocator, v->value.array.array, 16); + if (v->value.array.array == NULL) { + cxFree(json->allocator, v); + return NULL; + } + } else if (type == CX_JSON_OBJECT) { + cx_array_initialize_a(json->allocator, v->value.object.values, 16); + if (v->value.object.values == NULL) { + cxFree(json->allocator, v); + return NULL; } - } else if (state == JP_STATE_ARRAY_SEP_OR_CLOSE) { - // expect ',' or ']' - if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) { - p->states[p->nstates] = JP_STATE_VALUE_BEGIN_AR; - ret = json_read(p); - } else if (token.tokentype == CX_JSON_TOKEN_END_ARRAY) { - end_elm(p, CX_JSON_READER_ARRAY_END); - } else { - ret = -1; - } - } else if (state == JP_STATE_OBJ_NAME_OR_CLOSE || state == JP_STATE_OBJ_NAME) { - if (state == JP_STATE_OBJ_NAME_OR_CLOSE && token.tokentype == CX_JSON_TOKEN_END_OBJECT) { - clear_valuename(p); - end_elm(p, CX_JSON_READER_OBJECT_END); - } else { - // expect string - if (token.tokentype != CX_JSON_TOKEN_STRING) return -1; + } else { + memset(v, 0, sizeof(CxJsonValue)); + } + v->type = type; + v->allocator = json->allocator; - if (p->value_name) free(p->value_name); - cxmutstr valname = unescape_string(p->allocator, token.content, token.length); - p->value_name = valname.ptr; - p->value_name_len = valname.length; - - // next state - p->states[p->nstates] = JP_STATE_OBJ_COLON; - ret = json_read(p); - } - } else if (state == JP_STATE_OBJ_COLON) { - // expect ':' - if (token.tokentype != CX_JSON_TOKEN_NAME_SEPARATOR) return -1; - // next state - p->states[p->nstates] = JP_STATE_VALUE_BEGIN_OBJ; - ret = json_read(p); - } else if (state == JP_STATE_OBJ_SEP_OR_CLOSE) { - // expect ',' or '}' - if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) { - p->states[p->nstates] = JP_STATE_OBJ_NAME; - ret = json_read(p); - } else if (token.tokentype == CX_JSON_TOKEN_END_OBJECT) { - end_elm(p, CX_JSON_READER_OBJECT_END); + // add the new value to a possible parent + CxArrayReallocator value_realloc = cx_array_reallocator(json->allocator, NULL); + if (json->vbuf_size > 0) { + CxJsonValue *parent = json->vbuf[json->vbuf_size - 1]; + if (parent->type == CX_JSON_ARRAY) { + cx_array_simple_add_a(&value_realloc, parent->value.array.array, v); + } else if (parent->type == CX_JSON_OBJECT) { + assert(parent->value.object.values_size > 0); + assert(parent->value.object.values[parent->value.object.values_size - 1].value == NULL); + parent->value.object.values[parent->value.object.values_size - 1].value = v; } else { - ret = -1; + assert(false); } } - - if (token.alloc > 0) { - free((char*)token.content); + + // add the new value to the stack, if it is an array or object + if (type == CX_JSON_ARRAY || type == CX_JSON_OBJECT) { + CxArrayReallocator vbuf_realloc = cx_array_reallocator(NULL, json->vbuf_internal); + if (cx_array_simple_add_a(&vbuf_realloc, json->vbuf, v)) { + cxFree(json->allocator, v); + return NULL; + } } - return ret; -} - -static CxJsonLiteral json_reader_literal(CxJson *p) { - const char *l = p->reader_token.content; - size_t token_len = p->reader_token.length; - if (token_len == 4 && !memcmp(l, "true", 4)) { - return CX_JSON_TRUE; - } else if (token_len == 5 && !memcmp(l, "false", 5)) { - return CX_JSON_FALSE; + // if currently no value is parsed, this is now the value of interest + if (json->parsed == NULL) { + json->parsed = v; } - return CX_JSON_NULL; -} -/* -------------------- read value functions -------------------- */ - -static int setup_read_value(CxJson *p) { - p->readvalue_alloc = PARSER_READVALUE_ALLOC; - p->readvalue_nelm = 0; - p->readvalue_stack = calloc(p->readvalue_alloc, sizeof(CxJsonValue *)); - if (!p->readvalue_stack) return -1; - - p->read_value = NULL; - p->readvalue_stack[0] = NULL; - - return 0; + return v; } -static int add_to_parent(CxJson *p, CxJsonValue *parent, CxJsonValue *v) { - if (!parent) { - return -1; // shouldn't happen but who knows - } - - CxArrayReallocator reallocator = cx_array_reallocator(p->allocator, NULL); - if (parent->type == CX_JSON_OBJECT) { - if (!p->value_name || p->value_name_len == 0) { - return -1; - } - char *valuename = p->value_name; - p->value_name = NULL; - - CxJsonObjValue newvalue; - newvalue.name = valuename; - newvalue.value = v; - - return cx_array_add( - &parent->value.object.values, - &parent->value.object.values_size, - &parent->value.object.values_capacity, - sizeof(CxJsonObjValue), - &newvalue, - &reallocator); - } else if (parent->type == CX_JSON_ARRAY) { - return cx_array_add( - &parent->value.array.array, - &parent->value.array.array_size, - &parent->value.array.array_capacity, - sizeof(CxJsonValue*), - &v, - &reallocator); - } else { - return -1; // should also never happen - } +static int json_obj_add_entry(CxJson *json, char *name) { + CxJsonObjValue kv = {name, NULL}; + assert(json->vbuf_size > 0); + CxJsonValue *parent = json->vbuf[json->vbuf_size - 1]; + assert(parent != NULL); + assert(parent->type == CX_JSON_OBJECT); + CxArrayReallocator value_realloc = cx_array_reallocator(json->allocator, NULL); + return cx_array_simple_add_a(&value_realloc, parent->value.object.values, kv); } +#define JP_STATE_VALUE_BEGIN 0 +#define JP_STATE_VALUE_END 10 +#define JP_STATE_VALUE_BEGIN_OBJ 1 +#define JP_STATE_OBJ_SEP_OR_CLOSE 11 +#define JP_STATE_VALUE_BEGIN_AR 2 +#define JP_STATE_ARRAY_SEP_OR_CLOSE 12 +#define JP_STATE_OBJ_NAME_OR_CLOSE 5 +#define JP_STATE_OBJ_NAME 6 +#define JP_STATE_OBJ_COLON 7 -static int readvaluestack_add(CxJson *p, CxJsonValue *v) { - if (p->readvalue_nelm == p->readvalue_alloc) { - p->readvalue_alloc *= 2; - if (cx_reallocate(&p->readvalue_stack, sizeof(CxJsonValue *) * p->readvalue_alloc)) { - return -1; - } - } - p->readvalue_stack[p->readvalue_nelm++] = v; - return 0; -} - -void cxJsonInit(const CxAllocator *allocator, CxJson *json) { +void cxJsonInit(CxJson *json, const CxAllocator *allocator) { if (allocator == NULL) { allocator = cxDefaultAllocator; } memset(json, 0, sizeof(CxJson)); json->allocator = allocator; + json->states = json->states_internal; - json->states_alloc = cx_nmemb(json->states_internal); - // TODO: find better way to configure the initial allocation size for arrays and objects - json->reader_array_alloc = 8; + json->states_capacity = cx_nmemb(json->states_internal); + json->states[0] = JP_STATE_VALUE_BEGIN; + json->states_size = 1; + + json->vbuf = json->vbuf_internal; + json->vbuf_capacity = cx_nmemb(json->vbuf_internal); } -void cxJsonDestroy(CxJson *p) { - if (p->states != p->states_internal) { - free(p->states); +void cxJsonDestroy(CxJson *json) { + if (json->states != json->states_internal) { + free(json->states); } - free(p->readvalue_stack); - cxJsonValueFree(p->read_value); - free(p->value_name); - free(p->value_str); + if (json->vbuf != json->vbuf_internal) { + free(json->vbuf); + } + cxJsonValueFree(json->parsed); + json->parsed = NULL; } -int cxJsonFilln(CxJson *p, const char *buf, size_t size) { +int cxJsonFilln(CxJson *json, const char *buf, size_t size) { // TODO: implement rescue buffer like in CxProperties to allow subsequent fills - p->buffer = buf; - p->size = size; - p->pos = 0; + json->buffer = buf; + json->size = size; + json->pos = 0; return 0; } -int cxJsonNext(CxJson *p, CxJsonValue **value) { - // TODO: replace int with a status enum like in CxProperties +static void json_add_state(CxJson *json, int state) { + // we have guaranteed the necessary space with cx_array_simple_reserve() + // therefore, we can safely add the state in the simplest way possible + json->states[json->states_size++] = state; +} + +#define return_rec(code) \ + token_destroy(&token); \ + return code + +static int json_parse(CxJson *json) { + // Reserve a pointer for a possibly read value + CxJsonValue *vbuf = NULL; - *value = NULL; // TODO: maybe better initialize with NOTHING? - if (!p->readvalue_stack) { - if (setup_read_value(p)) return -1; + // grab the next token + CxJsonToken token = token_parse_next(json); + if (token.tokentype == CX_JSON_NO_TOKEN) { + // nothing found, wait for more data + return 0; + } + + // pop the current state + assert(json->states_size > 0); + int state = json->states[--json->states_size]; + + // guarantee that at least two more states fit on the stack + CxArrayReallocator state_realloc = cx_array_reallocator(NULL, json->states_internal); + if (cx_array_simple_reserve_a(&state_realloc, json->states, 2)) { + return -1; } - while (p->readvalue_nelm > 0 || !p->read_value) { - if (p->value_ready) { - // value available without another read - CxJsonValue *v = cxCalloc(p->allocator, 1, sizeof(CxJsonValue)); - if (!v) return -1; - v->allocator = p->allocator; + + // 0 JP_STATE_VALUE_BEGIN value begin + // 10 JP_STATE_VALUE_END expect value end + // 1 JP_STATE_VALUE_BEGIN_OBJ value begin (inside object) + // 11 JP_STATE_OBJ_SEP_OR_CLOSE object, expect separator, objclose + // 2 JP_STATE_VALUE_BEGIN_AR value begin (inside array) + // 12 JP_STATE_ARRAY_SEP_OR_CLOSE array, expect separator or arrayclose + // 5 JP_STATE_OBJ_NAME_OR_CLOSE object, expect name or objclose + // 6 JP_STATE_OBJ_NAME object, expect name + // 7 JP_STATE_OBJ_COLON object, expect ':' - if (p->readvalue_nelm > 0) { - if (add_to_parent(p, p->readvalue_stack[p->readvalue_nelm - 1], v)) { - free(v); - return -1; + if (state < 3) { + // push expected end state to the stack + json_add_state(json, 10 + state); + switch (token.tokentype) { + case CX_JSON_TOKEN_BEGIN_ARRAY: { + if (create_json_value(json, CX_JSON_ARRAY) == NULL) { + // TODO: error code - no memory + return_rec(-1); + } + json_add_state(json, JP_STATE_VALUE_BEGIN_AR); + return_rec(1); + } + case CX_JSON_TOKEN_BEGIN_OBJECT: { + if (create_json_value(json, CX_JSON_OBJECT) == NULL) { + // TODO: error code - no memory + return_rec(-1); + } + json_add_state(json, JP_STATE_OBJ_NAME_OR_CLOSE); + return_rec(1); + } + case CX_JSON_TOKEN_STRING: { + if ((vbuf = create_json_value(json, CX_JSON_STRING)) == NULL) { + // TODO: error code - no memory + return_rec(-1); + } + cxmutstr str = unescape_string(json->allocator, token.content, token.length); + if (str.ptr == NULL) { + // TODO: error code - no memory + return_rec(-1); + } + vbuf->value.string = str; + return_rec(1); + } + case CX_JSON_TOKEN_INTEGER: + case CX_JSON_TOKEN_NUMBER: { + int type = token.tokentype == CX_JSON_TOKEN_INTEGER ? CX_JSON_INTEGER : CX_JSON_NUMBER; + if (NULL == (vbuf = create_json_value(json, type))) { + // TODO: error code - no memory + return_rec(-1); + } + if (parse_number(token.content, token.length, &vbuf->value,type == CX_JSON_INTEGER)) { + // TODO: error code - format error + return_rec(-1); } - } else { - // set this value as root - p->read_value = v; + return_rec(1); + } + case CX_JSON_TOKEN_LITERAL: { + if ((vbuf = create_json_value(json, CX_JSON_LITERAL)) == NULL) { + // TODO: error code - no memory + return_rec(-1); + } + const char *l = token.content; + size_t token_len = token.length; + if (token_len == 4 && !memcmp(l, "true", 4)) { + vbuf->value.literal = CX_JSON_TRUE; + } else if (token_len == 5 && !memcmp(l, "false", 5)) { + vbuf->value.literal = CX_JSON_FALSE; + } else { + vbuf->value.literal = CX_JSON_NULL; + } + return_rec(1); + } + default: { + // TODO: error code - unexpected token + return_rec(-1); + } + } + } else if (state == JP_STATE_ARRAY_SEP_OR_CLOSE) { + // expect ',' or ']' + if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) { + json_add_state(json, JP_STATE_VALUE_BEGIN_AR); + return_rec(1); + } else if (token.tokentype == CX_JSON_TOKEN_END_ARRAY) { + // discard the array from the value buffer + json->vbuf_size--; + return_rec(1); + } else { + // TODO: error code - unexpected token + return_rec(-1); + } + } else if (state == JP_STATE_OBJ_NAME_OR_CLOSE || state == JP_STATE_OBJ_NAME) { + if (state == JP_STATE_OBJ_NAME_OR_CLOSE && token.tokentype == CX_JSON_TOKEN_END_OBJECT) { + // discard the obj from the value buffer + json->vbuf_size--; + return_rec(1); + } else { + // expect string + if (token.tokentype != CX_JSON_TOKEN_STRING) { + // TODO: error code - unexpected token + return_rec(-1); } - switch (p->reader_type) { - case CX_JSON_READER_OBJECT_BEGIN: { - v->type = CX_JSON_OBJECT; - if (readvaluestack_add(p, v)) { - return -1; - } - break; - } - case CX_JSON_READER_OBJECT_END: - return -1; // should not happen - case CX_JSON_READER_ARRAY_BEGIN: { - v->type = CX_JSON_ARRAY; - if (readvaluestack_add(p, v)) { - return -1; - } - break; - } - case CX_JSON_READER_ARRAY_END: - return -1; // should not happen - case CX_JSON_READER_STRING: { - v->type = CX_JSON_STRING; - if (p->value_str) { - v->value.string.ptr = p->value_str; - v->value.string.length = p->value_str_len; - p->value_str = NULL; - } - break; - } - case CX_JSON_READER_INTEGER: { - v->type = CX_JSON_INTEGER; - v->value.integer = p->value_int; - break; - } - case CX_JSON_READER_NUMBER: { - v->type = CX_JSON_NUMBER; - v->value.number = p->value_double; - break; - } - case CX_JSON_READER_LITERAL: { - v->type = CX_JSON_LITERAL; - v->value.literal = json_reader_literal(p); - break; - } + // add new entry + cxmutstr name = unescape_string(json->allocator, token.content, token.length); + if (name.ptr == NULL) { + // TODO: error code - no mem + return_rec(-1); } - } else if (p->readvalue_initialized) { - CxJsonReaderType rt = p->reader_type; - if (rt == CX_JSON_READER_OBJECT_END || rt == CX_JSON_READER_ARRAY_END) { - p->readvalue_nelm--; - } - // else: p->value_ready is 1, this will be handled in the next run - } + json_obj_add_entry(json, name.ptr); - if (p->readvalue_nelm > 0 || !p->read_value) { - int r = json_read(p); - if (r != 1) { - p->readvalue_initialized = 0; - return r; - } - p->readvalue_initialized = 1; + // next state + json_add_state(json, JP_STATE_OBJ_COLON); + return_rec(1); + } + } else if (state == JP_STATE_OBJ_COLON) { + // expect ':' + if (token.tokentype != CX_JSON_TOKEN_NAME_SEPARATOR) { + // TODO: error code - unexpected token + return_rec(-1); + } + // next state + json_add_state(json, JP_STATE_VALUE_BEGIN_OBJ); + return_rec(1); + } else if (state == JP_STATE_OBJ_SEP_OR_CLOSE) { + // expect ',' or '}' + if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) { + json_add_state(json, JP_STATE_OBJ_NAME); + return_rec(1); + } else if (token.tokentype == CX_JSON_TOKEN_END_OBJECT) { + // discard the obj from the value buffer + json->vbuf_size--; + return_rec(1); + } else { + // TODO: error code - unexpected token + return_rec(-1); } + } else { + // should be unreachable + assert(false); + return_rec(-1); } +} + +int cxJsonNext(CxJson *json, CxJsonValue **value) { + // TODO: replace int with a status enum like in CxProperties + + // initialize output value + *value = &cx_json_value_nothing; - *value = p->read_value; - p->readvalue_initialized = 0; - p->read_value = NULL; - p->value_ready = 0; + // parse data + int result; + do { + result = json_parse(json); + if (result == 1 && json->states_size == 1) { + // final state reached + assert(json->states[0] == JP_STATE_VALUE_END); + assert(json->vbuf_size == 0); - return 1; + // write output value + *value = json->parsed; + json->parsed = NULL; + + // re-initialize state machine + json->states[0] = JP_STATE_VALUE_BEGIN; + + return 1; + } + } while (result == 1); + + return result; } void cxJsonValueFree(CxJsonValue *value) {
--- a/tests/Makefile Thu Dec 05 01:51:47 2024 +0100 +++ b/tests/Makefile Thu Dec 05 01:54:12 2024 +0100 @@ -87,10 +87,11 @@ @echo "Compiling $<" $(CC) -o $@ $(CFLAGS) -I../src -c $< -$(TEST_DIR)/test_json$(OBJ_EXT): test_json.c ../src/cx/test.h \ - ../src/cx/common.h ../src/cx/json.h ../src/cx/string.h \ - ../src/cx/allocator.h ../src/cx/array_list.h ../src/cx/list.h \ - ../src/cx/collection.h ../src/cx/iterator.h ../src/cx/compare.h +$(TEST_DIR)/test_json$(OBJ_EXT): test_json.c util_allocator.h \ + ../src/cx/allocator.h ../src/cx/common.h ../src/cx/test.h \ + ../src/cx/json.h ../src/cx/allocator.h ../src/cx/string.h \ + ../src/cx/array_list.h ../src/cx/list.h ../src/cx/collection.h \ + ../src/cx/iterator.h ../src/cx/compare.h ../src/cx/mempool.h @echo "Compiling $<" $(CC) -o $@ $(CFLAGS) -I../src -c $<
--- a/tests/test_json.c Thu Dec 05 01:51:47 2024 +0100 +++ b/tests/test_json.c Thu Dec 05 01:54:12 2024 +0100 @@ -26,6 +26,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include "util_allocator.h" #include "cx/test.h" #include "cx/json.h" @@ -34,11 +35,13 @@ CX_TEST(test_json_init_default) { CxJson json; CX_TEST_DO { - cxJsonInit(NULL, &json); + cxJsonInit(&json, NULL); CX_TEST_ASSERT(json.states == json.states_internal); - CX_TEST_ASSERT(json.nstates == 0); - CX_TEST_ASSERT(json.states_alloc == 8); - CX_TEST_ASSERT(json.reader_array_alloc == 8); + CX_TEST_ASSERT(json.states_size == 1); + CX_TEST_ASSERT(json.states_capacity >= 8); + CX_TEST_ASSERT(json.vbuf == json.vbuf_internal); + CX_TEST_ASSERT(json.vbuf_size == 0); + CX_TEST_ASSERT(json.vbuf_capacity >= 8); } } @@ -59,7 +62,7 @@ int result; CxJson json; - cxJsonInit(NULL, &json); + cxJsonInit(&json, NULL); cxJsonFill(&json, text); // parse the big fat object @@ -126,7 +129,7 @@ int result; CxJson json; - cxJsonInit(NULL, &json); + cxJsonInit(&json, NULL); CxJsonValue *obj; size_t part = 0; @@ -191,12 +194,12 @@ CxJsonValue *obj = NULL; for(int i=0;i<5;i++) { - cxJsonInit(NULL, &json); + cxJsonInit(&json, NULL); cxJsonFill(&json, tests[i]); result = cxJsonNext(&json, &obj); CX_TEST_ASSERT(result == -1); - CX_TEST_ASSERT(obj == NULL); + CX_TEST_ASSERT(obj != NULL && obj->type == CX_JSON_NOTHING); cxJsonDestroy(&json); } } @@ -207,7 +210,7 @@ CxJsonValue *d1; cxstring text = cx_str("{\"test\": [{},{\"foo\": [[{\"bar\":[4, 2, [null, {\"key\": 47}]]}]]}]}"); CX_TEST_DO { - cxJsonInit(NULL, &json); + cxJsonInit(&json, NULL); cxJsonFill(&json, text); cxJsonNext(&json, &d1); @@ -241,7 +244,7 @@ CX_TEST_ASSERT(cxJsonAsInteger(d10) == 47); CX_TEST_ASSERT(json.states != json.states_internal); - CX_TEST_ASSERT(json.states_alloc > cx_nmemb(json.states_internal)); + CX_TEST_ASSERT(json.states_capacity > cx_nmemb(json.states_internal)); cxJsonValueFree(d1); cxJsonDestroy(&json); @@ -250,7 +253,7 @@ CX_TEST(test_json_number) { CxJson json; - cxJsonInit(NULL, &json); + cxJsonInit(&json, NULL); CX_TEST_DO { // TODO: find a better way to terminate values that are not arrays/objects CxJsonValue *v; @@ -273,7 +276,7 @@ CX_TEST(test_json_multiple_values) { CxJson json; - cxJsonInit(NULL, &json); + cxJsonInit(&json, NULL); CX_TEST_DO { CxJsonValue *v; int result; @@ -334,10 +337,10 @@ } CX_TEST(test_json_allocator) { - CxMempool *mp = cxMempoolCreate(64, NULL); - CxJson json; - cxJsonInit(mp->allocator, &json); - + CxTestingAllocator talloc; + cx_testing_allocator_init(&talloc); + CxAllocator *allocator = &talloc.base; + cxstring text = cx_str( "{\n" "\t\"message\":\"success\",\n" @@ -346,22 +349,52 @@ ); CX_TEST_DO { - int result; - CxJson json; - cxJsonInit(mp->allocator, &json); + cxJsonInit(&json, allocator); cxJsonFill(&json, text); CxJsonValue *obj; - result = cxJsonNext(&json, &obj); + int result = cxJsonNext(&json, &obj); CX_TEST_ASSERT(result == 1); - CX_TEST_ASSERT(obj->allocator == mp->allocator); + CX_TEST_ASSERT(obj->allocator == allocator); // this recursively frees everything cxJsonValueFree(obj); cxJsonDestroy(&json); - cxMempoolFree(mp); + + CX_TEST_ASSERT(cx_testing_allocator_verify(&talloc)); } + cx_testing_allocator_destroy(&talloc); +} + +CX_TEST(test_json_allocator_parse_error) { + CxTestingAllocator talloc; + cx_testing_allocator_init(&talloc); + CxAllocator *allocator = &talloc.base; + + cxstring text = cx_str( + "{\n" + "\t\"message\":\"success\"\n" // <-- missing comma + "\t\"data\":[\"value1\",{\"x\":123, \"y\":523 }]\n" + "}" + ); + + CX_TEST_DO { + CxJson json; + cxJsonInit(&json, allocator); + cxJsonFill(&json, text); + + CxJsonValue *obj = NULL; + int result = cxJsonNext(&json, &obj); + CX_TEST_ASSERT(result == -1); + CX_TEST_ASSERT(obj != NULL && obj->type == CX_JSON_NOTHING); + + // clean-up any left-over memory + cxJsonDestroy(&json); + + CX_TEST_ASSERT(cx_testing_allocator_verify(&talloc)); + } + cx_testing_allocator_destroy(&talloc); } CxTestSuite *cx_test_suite_json(void) { @@ -375,6 +408,7 @@ cx_test_register(suite, test_json_number); cx_test_register(suite, test_json_multiple_values); cx_test_register(suite, test_json_allocator); + cx_test_register(suite, test_json_allocator_parse_error); return suite; }