# HG changeset patch # User Mike Becker # Date 1729351511 -7200 # Node ID 10123f4d561809338c49277e2b0399b8c2f8158f # Parent 9b9385fcdfd5416cad8b45ea522c436a21fdd6a0 add first draft of json implementation - relates to #431 diff -r 9b9385fcdfd5 -r 10123f4d5618 src/Makefile --- a/src/Makefile Sat Oct 19 16:28:15 2024 +0200 +++ b/src/Makefile Sat Oct 19 17:25:11 2024 +0200 @@ -25,7 +25,7 @@ SRC = allocator.c array_list.c buffer.c compare.c hash_key.c hash_map.c \ iterator.c linked_list.c list.c map.c mempool.c printf.c string.c tree.c \ - utils.c properties.c + utils.c properties.c json.c OBJ_EXT=.o OBJ=$(SRC:%.c=$(build_dir)/%$(OBJ_EXT)) @@ -99,6 +99,11 @@ @echo "Compiling $<" $(CC) -o $@ $(CFLAGS) -c $< +$(build_dir)/json$(OBJ_EXT): json.c cx/json.h cx/common.h cx/string.h \ + cx/allocator.h cx/allocator.h + @echo "Compiling $<" + $(CC) -o $@ $(CFLAGS) -c $< + $(build_dir)/linked_list$(OBJ_EXT): linked_list.c cx/linked_list.h \ cx/common.h cx/list.h cx/collection.h cx/allocator.h cx/iterator.h \ cx/compare.h cx/utils.h cx/compare.h @@ -126,8 +131,8 @@ $(CC) -o $@ $(CFLAGS) -c $< $(build_dir)/properties$(OBJ_EXT): properties.c cx/properties.h \ - cx/common.h cx/string.h cx/allocator.h cx/array_list.h cx/list.h \ - cx/collection.h cx/iterator.h cx/compare.h + cx/common.h cx/string.h cx/allocator.h cx/map.h cx/collection.h \ + cx/iterator.h cx/compare.h cx/hash_key.h cx/array_list.h cx/list.h @echo "Compiling $<" $(CC) -o $@ $(CFLAGS) -c $< diff -r 9b9385fcdfd5 -r 10123f4d5618 src/cx/json.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cx/json.h Sat Oct 19 17:25:11 2024 +0200 @@ -0,0 +1,299 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 2024 Mike Becker, Olaf Wintermann All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +/** + * \file json.h + * \brief Interface for parsing data from JSON files. + * \author Mike Becker + * \author Olaf Wintermann + * \copyright 2-Clause BSD License + */ + +#ifndef UCX_JSON_H +#define UCX_JSON_H + +#include "common.h" +#include "string.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum cx_json_token_type { + CX_JSON_NO_TOKEN, + CX_JSON_TOKEN_ERROR, + CX_JSON_TOKEN_BEGIN_ARRAY, + CX_JSON_TOKEN_BEGIN_OBJECT, + CX_JSON_TOKEN_END_ARRAY, + CX_JSON_TOKEN_END_OBJECT, + CX_JSON_TOKEN_NAME_SEPARATOR, + CX_JSON_TOKEN_VALUE_SEPARATOR, + CX_JSON_TOKEN_STRING, + CX_JSON_TOKEN_INTEGER, + CX_JSON_TOKEN_NUMBER, + CX_JSON_TOKEN_LITERAL, + CX_JSON_TOKEN_SPACE +}; + +enum cx_json_value_type { + CX_JSON_NOTHING, // this allows us to always return non-NULL values + CX_JSON_OBJECT, + CX_JSON_ARRAY, + CX_JSON_STRING, + CX_JSON_INTEGER, // TODO: the spec does not know integer types + CX_JSON_NUMBER, + CX_JSON_LITERAL +}; + +enum cx_json_literal_type { + CX_JSON_NULL, + CX_JSON_TRUE, + CX_JSON_FALSE +}; + +enum cx_json_reader_type { + CX_JSON_READER_OBJECT_BEGIN, + CX_JSON_READER_OBJECT_END, + CX_JSON_READER_ARRAY_BEGIN, + CX_JSON_READER_ARRAY_END, + CX_JSON_READER_STRING, + CX_JSON_READER_INTEGER, + CX_JSON_READER_NUMBER, + CX_JSON_READER_LITERAL +}; + +typedef enum cx_json_token_type CxJsonTokenType; +typedef enum cx_json_value_type CxJsonValueType; +typedef enum cx_json_literal_type CxJsonLiteralType; +typedef enum cx_json_reader_type CxJsonReaderType; + +typedef struct cx_json_s CxJson; +typedef struct cx_json_token_s CxJsonToken; + +typedef struct cx_json_value_s CxJsonValue; + +typedef struct cx_json_array_s CxJsonArray; +typedef struct cx_json_object_s CxJsonObject; +typedef struct cx_mutstr_s CxJsonString; +typedef struct cx_json_integer_s CxJsonInteger; +typedef struct cx_json_number_s CxJsonNumber; +typedef struct cx_json_literal_s CxJsonLiteral; + +typedef struct cx_json_obj_value_s CxJsonObjValue; + +struct cx_json_token_s { + CxJsonTokenType tokentype; + const char *content; + size_t length; + size_t alloc; +}; + +struct cx_json_s { + const char *buffer; + size_t size; + size_t pos; + + CxJsonToken uncompleted; + int tokenizer_escape; + + int *states; + int nstates; + int states_alloc; + + CxJsonToken reader_token; + CxJsonReaderType reader_type; + int value_ready; + char *value_name; + size_t value_name_len; + char *value_str; + size_t value_str_len; + int64_t value_int; + double value_double; + + CxJsonValue **readvalue_stack; + int readvalue_nelm; + int readvalue_alloc; + CxJsonValue *read_value; + int readvalue_initialized; + + int reader_array_alloc; + + int error; +}; + +struct cx_json_array_s { + CxJsonValue **array; + size_t alloc; + size_t size; +}; + +struct cx_json_object_s { + CxJsonObjValue *values; + size_t alloc; + size_t size; +}; + +struct cx_json_obj_value_s { + char *name; + CxJsonValue *value; +}; + +// TODO: remove single member structs + +struct cx_json_integer_s { + int64_t value; +}; + +struct cx_json_number_s { + double value; +}; + +struct cx_json_literal_s { + CxJsonLiteralType literal; +}; + +struct cx_json_value_s { + CxJsonValueType type; + union { + CxJsonArray array; + CxJsonObject object; + CxJsonString string; + CxJsonInteger integer; + CxJsonNumber number; + CxJsonLiteral literal; + } value; +}; + +// TODO: add support for CxAllocator + +__attribute__((__nonnull__)) +void cxJsonInit(CxJson *json); + +__attribute__((__nonnull__)) +void cxJsonDestroy(CxJson *json); + +__attribute__((__nonnull__)) +void cxJsonFill(CxJson *json, const char *buf, size_t len); + +// TODO: discuss if it is intentional that cxJsonNext() will usually parse an entire file in one go +__attribute__((__nonnull__)) +int cxJsonNext(CxJson *json, CxJsonValue **value); + +void cxJsonValueFree(CxJsonValue *value); + +__attribute__((__nonnull__)) +static inline bool cxJsonIsObject(CxJsonValue *value) { + return value->type == CX_JSON_OBJECT; +} + +__attribute__((__nonnull__)) +static inline bool cxJsonIsArray(CxJsonValue *value) { + return value->type == CX_JSON_ARRAY; +} + +__attribute__((__nonnull__)) +static inline bool cxJsonIsString(CxJsonValue *value) { + return value->type == CX_JSON_STRING; +} + +__attribute__((__nonnull__)) +static inline bool cxJsonIsNumber(CxJsonValue *value) { + // TODO: this is not good, because an integer is also a number + return value->type == CX_JSON_NUMBER; +} + +__attribute__((__nonnull__)) +static inline bool cxJsonIsInteger(CxJsonValue *value) { + return value->type == CX_JSON_INTEGER; +} + +__attribute__((__nonnull__)) +static inline bool cxJsonIsLiteral(CxJsonValue *value) { + return value->type == CX_JSON_LITERAL; +} + +__attribute__((__nonnull__)) +static inline bool cxJsonIsBool(CxJsonValue *value) { + return cxJsonIsLiteral(value) && value->value.literal.literal != CX_JSON_NULL; +} + +__attribute__((__nonnull__)) +static inline bool cxJsonIsTrue(CxJsonValue *value) { + return cxJsonIsLiteral(value) && value->value.literal.literal == CX_JSON_TRUE; +} + +__attribute__((__nonnull__)) +static inline bool cxJsonIsFalse(CxJsonValue *value) { + return cxJsonIsLiteral(value) && value->value.literal.literal == CX_JSON_FALSE; +} + +__attribute__((__nonnull__)) +static inline bool cxJsonIsNull(CxJsonValue *value) { + return cxJsonIsLiteral(value) && value->value.literal.literal == CX_JSON_NULL; +} + +__attribute__((__nonnull__)) +static inline cxmutstr cxJsonAsString(CxJsonValue *value) { + // TODO: do we need a separate method to return this directly as cxstring? + return value->value.string; +} + +__attribute__((__nonnull__)) +static inline double cxJsonAsDouble(CxJsonValue *value) { + return value->value.number.value; +} + +__attribute__((__nonnull__)) +static inline int64_t cxJsonAsInteger(CxJsonValue *value) { + return value->value.integer.value; +} + +__attribute__((__nonnull__)) +static inline bool cxJsonAsBool(CxJsonValue *value) { + return value->value.literal.literal == CX_JSON_TRUE; +} + +__attribute__((__nonnull__)) +static inline size_t cxJsonArrSize(CxJsonValue *value) { + return value->value.array.size; +} + +__attribute__((__nonnull__, __returns_nonnull__)) +CxJsonValue *cxJsonArrGet(CxJsonValue *value, size_t index); + +// TODO: add cxJsonArrIter() + +// TODO: implement cxJsonObjGet as a _Generic with support for cxstring +__attribute__((__nonnull__, __returns_nonnull__)) +CxJsonValue *cxJsonObjGet(CxJsonValue *value, const char* name); + +#ifdef __cplusplus +} +#endif + +#endif /* UCX_JSON_H */ + diff -r 9b9385fcdfd5 -r 10123f4d5618 src/json.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/json.c Sat Oct 19 17:25:11 2024 +0200 @@ -0,0 +1,825 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 2024 Mike Becker, Olaf Wintermann All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "cx/json.h" +#include "cx/allocator.h" + +/* + * RFC 8259 + * https://tools.ietf.org/html/rfc8259 + */ + +#define PARSER_STATES_ALLOC 32 + +static CxJsonValue cx_json_value_nothing = {CX_JSON_NOTHING, {0}}; + + +static int token_append(CxJsonToken *token, const char *buf, size_t len) { + if (len == 0) { + return 0; + } + + size_t newlen = token->length + len; + if (token->alloc < newlen) { + char *newbuf = realloc( + token->alloc == 0 ? NULL : (char *) token->content, + newlen); + if (!newbuf) { + return 1; + } + token->content = newbuf; + token->alloc = newlen; + } + + memcpy((char *) token->content + token->length, buf, len); + token->length = newlen; + return 0; +} + +static CxJsonToken get_content(CxJson *p, size_t start, size_t end) { + CxJsonToken token = {0}; + size_t part2 = end - start; + if (p->uncompleted.tokentype == CX_JSON_NO_TOKEN) { + token.content = p->buffer + start; + token.length = part2; + } else if (part2 == 0) { + token = p->uncompleted; + } else { + if (token_append(&p->uncompleted, p->buffer + start, end - start)) { + // TODO: this does certainly not lead to correct error handling + return (CxJsonToken){0}; + } + token = p->uncompleted; + } + p->uncompleted = (CxJsonToken){0}; + return token; +} + +static int token_isliteral(const char *content, size_t length) { + if (length == 4) { + if (!memcmp(content, "true", 4)) { + return 1; + } else if (!memcmp(content, "null", 4)) { + return 1; + } + } else if (length == 5 && !memcmp(content, "false", 5)) { + return 1; + } + return 0; +} + +static int num_isexp(const char *content, size_t length, size_t pos) { + if (pos >= length) { + return 0; + } + + int ok = 0; + for (size_t i = pos; i < length; i++) { + char c = content[i]; + if (isdigit(c)) { + ok = 1; + } else if (i == pos) { + if (!(c == '+' || c == '-')) { + return 0; + } + } else { + return 0; + } + } + + return ok; +} + +static CxJsonTokenType token_numbertype(const char *content, size_t length) { + if (length == 0) return CX_JSON_TOKEN_ERROR; + + if (content[0] != '-' && !isdigit(content[0])) { + return CX_JSON_TOKEN_ERROR; + } + + CxJsonTokenType type = CX_JSON_TOKEN_INTEGER; + for (size_t i = 1; i < length; i++) { + if (content[i] == '.') { + if (type == CX_JSON_TOKEN_NUMBER) { + return CX_JSON_TOKEN_ERROR; // more than one decimal separator + } + type = CX_JSON_TOKEN_NUMBER; + } else if (content[i] == 'e' || content[i] == 'E') { + return num_isexp(content, length, i + 1) ? CX_JSON_TOKEN_NUMBER : CX_JSON_TOKEN_ERROR; + } else if (!isdigit(content[i])) { + return CX_JSON_TOKEN_ERROR; // char is not a digit, decimal separator or exponent sep + } + } + + return type; +} + +static CxJsonToken get_token(CxJson *p, size_t start, size_t end) { + CxJsonToken token = get_content(p, start, end); + if (token_isliteral(token.content, token.length)) { + token.tokentype = CX_JSON_TOKEN_LITERAL; + } else { + token.tokentype = token_numbertype(token.content, token.length); + } + p->pos = end; + return token; +} + +static CxJsonTokenType char2ttype(char c) { + switch (c) { + case '[': { + return CX_JSON_TOKEN_BEGIN_ARRAY; + } + case '{': { + return CX_JSON_TOKEN_BEGIN_OBJECT; + } + case ']': { + return CX_JSON_TOKEN_END_ARRAY; + } + case '}': { + return CX_JSON_TOKEN_END_OBJECT; + } + case ':': { + return CX_JSON_TOKEN_NAME_SEPARATOR; + } + case ',': { + return CX_JSON_TOKEN_VALUE_SEPARATOR; + } + case '"': { + return CX_JSON_TOKEN_STRING; + } + default: { + if (isspace(c)) { + return CX_JSON_TOKEN_SPACE; + } + } + } + return CX_JSON_NO_TOKEN; +} + +static CxJsonToken json_parser_next_token(CxJson *p) { + // current token type and start index + CxJsonTokenType ttype = p->uncompleted.tokentype; + size_t token_start = p->pos; + + for (size_t i = p->pos; i < p->size; i++) { + char c = p->buffer[i]; + if (ttype != CX_JSON_TOKEN_STRING) { + // currently non-string token + + CxJsonTokenType ctype = char2ttype(c); // start of new token? + + if (ttype == CX_JSON_NO_TOKEN) { + if (ctype == CX_JSON_TOKEN_SPACE) { + continue; + } else if (ctype == CX_JSON_TOKEN_STRING) { + // begin string + ttype = CX_JSON_TOKEN_STRING; + token_start = i; + } else if (ctype != CX_JSON_NO_TOKEN) { + // single-char token + p->pos = i + 1; + CxJsonToken token = {ctype, NULL, 0, 0}; + return token; + } else { + ttype = CX_JSON_TOKEN_LITERAL; // number or literal + token_start = i; + } + } else { + // finish token + if (ctype != CX_JSON_NO_TOKEN) { + return get_token(p, token_start, i); + } + } + } else { + // currently inside a string + if (!p->tokenizer_escape) { + if (c == '"') { + CxJsonToken ret = get_content(p, token_start, i + 1); + ret.tokentype = CX_JSON_TOKEN_STRING; + p->pos = i + 1; + return ret; + } else if (c == '\\') { + p->tokenizer_escape = 1; + } + } else { + p->tokenizer_escape = 0; + } + } + } + + if (ttype != CX_JSON_NO_TOKEN) { + // uncompleted token + size_t uncompeted_len = p->size - token_start; + if (p->uncompleted.tokentype == CX_JSON_NO_TOKEN) { + // current token is uncompleted + // save current token content in p->uncompleted + CxJsonToken uncompleted; + uncompleted.tokentype = ttype; + uncompleted.length = uncompeted_len; + uncompleted.alloc = uncompeted_len + 16; + char *tmp = malloc(uncompleted.alloc); + if (tmp) { + memcpy(tmp, p->buffer + token_start, uncompeted_len); + uncompleted.content = tmp; + p->uncompleted = uncompleted; + } else { + p->error = 1; + } + } else { + // previously we also had an uncompleted token + // combine the uncompleted token with the current token + if (token_append(&p->uncompleted, p->buffer + token_start, uncompeted_len)) { + p->error = 1; + } + } + } + + CxJsonToken ret = {CX_JSON_NO_TOKEN, NULL, 0, 0}; + return ret; +} + +static cxmutstr unescape_string(const char *str, size_t len) { + // TODO: support more escape sequences + // we know that the unescaped string will be shorter by at least 2 chars + cxmutstr result; + result.length = 0; + result.ptr = malloc(len - 1); + if (result.ptr == NULL) { + // TODO: check if this actually leads to correct error handling + return result; + } + + bool u = false; + for (size_t i = 1; i < len - 1; i++) { + char c = str[i]; + if (u) { + u = false; + if (c == 'n') { + c = '\n'; + } else if (c == 't') { + c = '\t'; + } + result.ptr[result.length++] = c; + } else { + if (c == '\\') { + u = true; + } else { + result.ptr[result.length++] = c; + } + } + } + result.ptr[result.length] = 0; + + return result; +} + +static int parse_integer(const char *str, size_t len, int64_t *value) { + char *endptr = NULL; + char buf[32]; + if (len > 30) { + return 1; + } + memcpy(buf, str, len); + buf[len] = 0; + + long long v = strtoll(buf, &endptr, 10); + if (endptr != &buf[len]) { + return 1; + } + *value = (int64_t) v; + + return 0; +} + +static int parse_number(const char *str, size_t len, double *value) { + char *endptr = NULL; + char buf[32]; + if (len > 30) { + return 1; + } + memcpy(buf, str, len); + buf[len] = 0; + + double v = strtod(buf, &endptr); + if (endptr != &buf[len]) { + return 1; + } + *value = v; + + return 0; +} + +static int add_state(CxJson *p, int state) { + if (p->nstates >= p->states_alloc) { + p->states_alloc += PARSER_STATES_ALLOC; + if (cx_reallocate(&p->states, p->states_alloc * sizeof(int))) { + return 1; + } + } + p->states[++p->nstates] = state; + return 0; +} + +static void end_elm(CxJson *p, CxJsonReaderType type) { + p->reader_type = type; + p->nstates--; +} + +#define JP_STATE_VALUE_BEGIN 0 +#define JP_STATE_VALUE_BEGIN_OBJ 1 +#define JP_STATE_VALUE_BEGIN_AR 2 +#define JP_STATE_ARRAY_SEP_OR_CLOSE 3 +#define JP_STATE_OBJ_NAME_OR_CLOSE 4 +#define JP_STATE_OBJ_NAME 5 +#define JP_STATE_OBJ_COLON 6 +#define JP_STATE_OBJ_SEP_OR_CLOSE 7 + +static int next_state_after_value(int current) { + switch (current) { + default: + return -1; + // after value JSON complete, expect nothing + case JP_STATE_VALUE_BEGIN: + return -1; + // after obj value, expect ',' or '}' + case JP_STATE_VALUE_BEGIN_OBJ: + return JP_STATE_OBJ_SEP_OR_CLOSE; + // after array value, expect ',' or ']' + case JP_STATE_VALUE_BEGIN_AR: + return JP_STATE_ARRAY_SEP_OR_CLOSE; + } +} + +static void clear_valuename(CxJson *p) { + free(p->value_name); + p->value_name = NULL; + p->value_name_len = 0; +} + +static void clear_values(CxJson *p) { + free(p->value_str); + p->value_str = NULL; + p->value_str_len = 0; + p->value_int = 0; + p->value_double = 0; +} + +static int json_read(CxJson *p) { + int state = p->states[p->nstates]; + clear_values(p); + CxJsonToken token = json_parser_next_token(p); + p->reader_token = token; + + p->value_ready = 0; + + if (token.tokentype == CX_JSON_NO_TOKEN) { + return 0; + } + + int ret = 1; + + // 0 JP_STATE_VALUE_BEGIN value begin + // 1 JP_STATE_VALUE_BEGIN_OBJ value begin (inside object) + // 2 JP_STATE_VALUE_BEGIN_AR value begin (inside array) + // 3 JP_STATE_ARRAY_SEP_OR_CLOSE array, expect separator or arrayclose + // 4 JP_STATE_OBJ_NAME_OR_CLOSE object, expect name or objclose + // 5 JP_STATE_OBJ_NAME object, expect name + // 6 JP_STATE_OBJ_COLON object, expect ':' + // 7 JP_STATE_OBJ_SEP_OR_CLOSE object, expect separator, objclose + + if (state == JP_STATE_VALUE_BEGIN_AR || state == JP_STATE_OBJ_SEP_OR_CLOSE) { + clear_valuename(p); + } + + if (state < 3) { + // expect value + p->states[p->nstates] = next_state_after_value(state); + p->value_ready = 1; + switch (token.tokentype) { + case CX_JSON_TOKEN_BEGIN_ARRAY: { + p->reader_type = CX_JSON_READER_ARRAY_BEGIN; + if (add_state(p, JP_STATE_VALUE_BEGIN_AR)) return -1; + return 1; + //return json_read(p); + } + case CX_JSON_TOKEN_BEGIN_OBJECT: { + p->reader_type = CX_JSON_READER_OBJECT_BEGIN; + if (add_state(p, JP_STATE_OBJ_NAME_OR_CLOSE)) return -1; + return 1; + //return json_read(p); + } + case CX_JSON_TOKEN_END_ARRAY: { + p->value_ready = 0; + end_elm(p, CX_JSON_READER_ARRAY_END); + break; + } + case CX_JSON_TOKEN_END_OBJECT: { + p->value_ready = 0; + end_elm(p, CX_JSON_READER_OBJECT_END); + break; + } + case CX_JSON_TOKEN_STRING: { + p->reader_type = CX_JSON_READER_STRING; + cxmutstr str = unescape_string(token.content, token.length); + if (str.ptr) { + p->value_str = str.ptr; + p->value_str_len = str.length; + } else { + return -1; + } + break; + } + case CX_JSON_TOKEN_INTEGER: { + p->reader_type = CX_JSON_READER_INTEGER; + int64_t value; + if (parse_integer(token.content, token.length, &value)) { + return -1; + } + p->value_int = value; + p->value_double = (double) value; + break; + } + case CX_JSON_TOKEN_NUMBER: { + p->reader_type = CX_JSON_READER_NUMBER; + double value; + if (parse_number(token.content, token.length, &value)) { + return -1; + } + p->value_double = value; + p->value_int = (int64_t) value; + break; + } + case CX_JSON_TOKEN_LITERAL: { + p->reader_type = CX_JSON_READER_LITERAL; + break; + } + default: + return -1; + } + } else if (state == JP_STATE_ARRAY_SEP_OR_CLOSE) { + // expect ',' or ']' + if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) { + p->states[p->nstates] = JP_STATE_VALUE_BEGIN_AR; + return json_read(p); + } else if (token.tokentype == CX_JSON_TOKEN_END_ARRAY) { + end_elm(p, CX_JSON_READER_ARRAY_END); + } else { + return -1; + } + } else if (state == JP_STATE_OBJ_NAME_OR_CLOSE || state == JP_STATE_OBJ_NAME) { + if (state == JP_STATE_OBJ_NAME_OR_CLOSE && token.tokentype == CX_JSON_TOKEN_END_OBJECT) { + clear_valuename(p); + end_elm(p, CX_JSON_READER_OBJECT_END); + } else { + // expect string + if (token.tokentype != CX_JSON_TOKEN_STRING) return -1; + + if (p->value_name) free(p->value_name); + cxmutstr valname = unescape_string(token.content, token.length); + p->value_name = valname.ptr; + p->value_name_len = valname.length; + + // next state + p->states[p->nstates] = JP_STATE_OBJ_COLON; + return json_read(p); + } + } else if (state == JP_STATE_OBJ_COLON) { + // expect ':' + if (token.tokentype != CX_JSON_TOKEN_NAME_SEPARATOR) return -1; + // next state + p->states[p->nstates] = 1; + return json_read(p); + } else if (state == 7) { + // expect ',' or '}]' + if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) { + p->states[p->nstates] = JP_STATE_OBJ_NAME; + return json_read(p); + } else if (token.tokentype == CX_JSON_TOKEN_END_OBJECT) { + end_elm(p, CX_JSON_READER_OBJECT_END); + } else { + return -1; + } + } + + return ret; +} + +static CxJsonLiteralType json_reader_literal(CxJson *p) { + const char *l = p->reader_token.content; + size_t token_len = p->reader_token.length; + if (token_len == 4 && !memcmp(l, "true", 4)) { + return CX_JSON_TRUE; + } else if (token_len == 5 && !memcmp(l, "false", 5)) { + return CX_JSON_FALSE; + } + return CX_JSON_NULL; +} + +/* -------------------- read value functions -------------------- */ + +static int setup_read_value(CxJson *p) { + p->readvalue_alloc = PARSER_STATES_ALLOC; + p->readvalue_nelm = 0; + p->readvalue_stack = calloc(PARSER_STATES_ALLOC, sizeof(CxJsonValue *)); + if (!p->readvalue_stack) return -1; + + p->read_value = NULL; + p->readvalue_stack[0] = NULL; + + return 0; +} + +static int obj_init_values(CxJson *p, CxJsonValue *v) { + v->value.object.values = calloc(p->reader_array_alloc, sizeof(CxJsonObjValue)); + if (!v->value.object.values) { + return -1; + } + v->value.object.alloc = p->reader_array_alloc; + v->value.object.size = 0; + + return 0; +} + +static int obj_add_value(CxJson *p, CxJsonValue *parent, CxJsonObjValue v) { + if (!parent->value.object.values) { + if (obj_init_values(p, parent)) { + return -1; + } + } + + if (parent->value.object.size == parent->value.object.alloc) { + parent->value.object.alloc *= 2; + if (cx_reallocate(&parent->value.object.values, + sizeof(CxJsonObjValue) * parent->value.object.alloc)) { + return -1; + } + } + + parent->value.object.values[parent->value.object.size++] = v; + + return 0; +} + +static int array_init(CxJson *p, CxJsonValue *v) { + v->value.array.array = calloc(p->reader_array_alloc, sizeof(CxJsonValue *)); + if (!v->value.array.array) { + return -1; + } + v->value.array.alloc = p->reader_array_alloc; + v->value.array.size = 0; + + return 0; +} + +static int array_add_value(CxJson *p, CxJsonValue *parent, CxJsonValue *v) { + if (!parent->value.array.array) { + if (array_init(p, parent)) { + return -1; + } + } + + if (parent->value.array.size == parent->value.array.alloc) { + parent->value.array.alloc *= 2; + if (cx_reallocate(parent->value.array.array, + sizeof(CxJsonValue *) * parent->value.array.alloc)) { + return -1; + } + } + + parent->value.array.array[parent->value.array.size++] = v; + + return 0; +} + +static int add_to_parent(CxJson *p, CxJsonValue *parent, CxJsonValue *v) { + if (!parent) { + return -1; // shouldn't happen but who knows + } + + int ret = 0; + if (parent->type == CX_JSON_OBJECT) { + if (!p->value_name || p->value_name_len == 0) { + return -1; + } + char *valuename = p->value_name; + p->value_name = NULL; + + CxJsonObjValue newvalue; + newvalue.name = valuename; + newvalue.value = v; + + ret = obj_add_value(p, parent, newvalue); + } else if (parent->type == CX_JSON_ARRAY) { + ret = array_add_value(p, parent, v); + } else { + ret = -1; // should also never happen + } + + return ret; +} + + +static int readvaluestack_add(CxJson *p, CxJsonValue *v) { + if (p->readvalue_nelm == p->readvalue_alloc) { + p->readvalue_alloc *= 2; + if (cx_reallocate(&p->readvalue_stack, sizeof(CxJsonValue *) * p->readvalue_alloc)) { + return -1; + } + } + p->readvalue_stack[p->readvalue_nelm++] = v; + return 0; +} + +void cxJsonInit(CxJson *json) { + memset(json, 0, sizeof(CxJson)); + // TODO: do not allocate states right away + json->states_alloc = PARSER_STATES_ALLOC; + json->states = calloc(PARSER_STATES_ALLOC, sizeof(int)); + // TODO: find better way to configure the initial allocation size for arrays and objects + json->reader_array_alloc = 8; +} + +void cxJsonDestroy(CxJson *p) { + free(p->states); + free(p->readvalue_stack); +} + +void cxJsonFill(CxJson *p, const char *buf, size_t size) { + // TODO: implement rescue buffer like in CxProperties to allow subsequent fills + p->buffer = buf; + p->size = size; + p->pos = 0; +} + +int cxJsonNext(CxJson *p, CxJsonValue **value) { + // TODO: replace int with a status enum like in CxProperties + + *value = NULL; + if (!p->readvalue_stack) { + if (setup_read_value(p)) return -1; + } + + while (p->readvalue_nelm > 0 || !p->read_value) { + if (p->value_ready) { + // value available without another read + CxJsonValue *v = calloc(1, sizeof(CxJsonValue)); + if (!v) return -1; + + if (p->readvalue_nelm > 0) { + if (add_to_parent(p, p->readvalue_stack[p->readvalue_nelm - 1], v)) { + return -1; + } + } else { + // set this value as root + p->read_value = v; + } + + switch (p->reader_type) { + case CX_JSON_READER_OBJECT_BEGIN: { + v->type = CX_JSON_OBJECT; + if (readvaluestack_add(p, v)) { + return -1; + } + break; + } + case CX_JSON_READER_OBJECT_END: + return -1; // should not happen + case CX_JSON_READER_ARRAY_BEGIN: { + v->type = CX_JSON_ARRAY; + if (readvaluestack_add(p, v)) { + return -1; + } + break; + } + case CX_JSON_READER_ARRAY_END: + return -1; // should not happen + case CX_JSON_READER_STRING: { + v->type = CX_JSON_STRING; + if (p->value_str) { + v->value.string.ptr = p->value_str; + v->value.string.length = p->value_str_len; + p->value_str = NULL; + } + break; + } + case CX_JSON_READER_INTEGER: { + v->type = CX_JSON_INTEGER; + v->value.integer.value = p->value_int; + break; + } + case CX_JSON_READER_NUMBER: { + v->type = CX_JSON_NUMBER; + v->value.number.value = p->value_double; + break; + } + case CX_JSON_READER_LITERAL: { + v->type = CX_JSON_LITERAL; + v->value.literal.literal = json_reader_literal(p); + break; + } + } + } else if (p->readvalue_initialized) { + CxJsonReaderType rt = p->reader_type; + if (rt == CX_JSON_READER_OBJECT_END || rt == CX_JSON_READER_ARRAY_END) { + p->readvalue_nelm--; + } + // else: p->value_ready is 1, this will be handled in the next run + } + + if (p->readvalue_nelm > 0 || !p->read_value) { + int r = json_read(p); + if (r != 1) { + p->readvalue_initialized = 0; + return r; + } + p->readvalue_initialized = 1; + } + } + + *value = p->read_value; + p->readvalue_initialized = 0; + p->read_value = NULL; + + return 1; +} + +void cxJsonValueFree(CxJsonValue *value) { + if (value == NULL || value == &cx_json_value_nothing) return; + + // TODO: discuss if we should keep freeing the stuff recursively + switch (value->type) { + case CX_JSON_OBJECT: { + CxJsonObject obj = value->value.object; + for (size_t i = 0; i < obj.size; i++) { + cxJsonValueFree(obj.values[i].value); + free(obj.values[i].name); + } + free(obj.values); + break; + } + case CX_JSON_ARRAY: { + CxJsonArray array = value->value.array; + for (size_t i = 0; i < array.size; i++) { + cxJsonValueFree(array.array[i]); + } + free(array.array); + break; + } + case CX_JSON_STRING: { + free(value->value.string.ptr); + break; + } + default: { + break; + } + } + free(value); +} + +CxJsonValue *cxJsonArrGet(CxJsonValue *value, size_t index) { + if (index >= value->value.array.size) { + return &cx_json_value_nothing; + } + return value->value.array.array[index]; +} + +CxJsonValue *cxJsonObjGet(CxJsonValue *value, const char *name) { + CxJsonObject *obj = &(value->value.object); + // TODO: think about sorting the object so that we can use binary search here + for (size_t i = 0; i < obj->size; i++) { + // TODO: we might want to store names as cxmutstr + if (0 == strcmp(name, obj->values[i].name)) { + return obj->values[i].value; + } + } + return &cx_json_value_nothing; +} diff -r 9b9385fcdfd5 -r 10123f4d5618 tests/Makefile --- a/tests/Makefile Sat Oct 19 16:28:15 2024 +0200 +++ b/tests/Makefile Sat Oct 19 17:25:11 2024 +0200 @@ -30,7 +30,7 @@ SRC = util_allocator.c test_utils.c test_hash_key.c test_allocator.c \ test_compare.c test_string.c test_buffer.c test_iterator.c \ test_list.c test_tree.c test_hash_map.c test_properties.c \ - test_printf.c test_mempool.c ucxtest.c + test_printf.c test_mempool.c test_json.c ucxtest.c OBJ_EXT=.o OBJ=$(SRC:%.c=$(TEST_DIR)/%$(OBJ_EXT)) @@ -83,6 +83,12 @@ @echo "Compiling $<" $(CC) -o $@ $(CFLAGS) -c $< +$(TEST_DIR)/test_json$(OBJ_EXT): test_json.c ../src/cx/test.h \ + ../src/cx/json.h ../src/cx/common.h ../src/cx/string.h \ + ../src/cx/allocator.h + @echo "Compiling $<" + $(CC) -o $@ $(CFLAGS) -c $< + $(TEST_DIR)/test_list$(OBJ_EXT): test_list.c ../src/cx/test.h \ util_allocator.h ../src/cx/allocator.h ../src/cx/common.h \ ../src/cx/compare.h ../src/cx/utils.h ../src/cx/array_list.h \ diff -r 9b9385fcdfd5 -r 10123f4d5618 tests/test_json.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_json.c Sat Oct 19 17:25:11 2024 +0200 @@ -0,0 +1,105 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 2024 Mike Becker, Olaf Wintermann All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cx/test.h" + +#include "cx/json.h" + +CX_TEST(test_json_simple_object) { + cxstring text = cx_str( + "{\n" + "\t\"message\":\"success\",\n" + "\t\"position\":{\n" + "\t\t\"longitude\":-94.7099,\n" + "\t\t\"latitude\":51.5539\n" + "\t},\n" + "\t\"timestamp\":1729348561,\n" + "\t\"alive\":true\n" + "}" + ); + + CX_TEST_DO { + int result; + + CxJson json; + cxJsonInit(&json); + cxJsonFill(&json, text.ptr, text.length); + + // parse the big fat object + CxJsonValue *obj; + result = cxJsonNext(&json, &obj); + CX_TEST_ASSERT(result == 1); + + // check the contents + CX_TEST_ASSERT(cxJsonIsObject(obj)); + + CxJsonValue *message = cxJsonObjGet(obj, "message"); + CX_TEST_ASSERT(cxJsonIsString(message)); + CX_TEST_ASSERT(0 == cx_strcmp( + cx_strcast(cxJsonAsString(message)), + cx_str("success")) + ); + + CxJsonValue *position = cxJsonObjGet(obj, "position"); + CX_TEST_ASSERT(cxJsonIsObject(position)); + CxJsonValue *longitude = cxJsonObjGet(position, "longitude"); + CX_TEST_ASSERT(cxJsonIsNumber(longitude)); + CX_TEST_ASSERT(cxJsonAsDouble(longitude) == -94.7099); + CxJsonValue *latitude = cxJsonObjGet(position, "latitude"); + CX_TEST_ASSERT(cxJsonIsNumber(latitude)); + CX_TEST_ASSERT(cxJsonAsDouble(latitude) == 51.5539); + + CxJsonValue *timestamp = cxJsonObjGet(obj, "timestamp"); + CX_TEST_ASSERT(cxJsonIsInteger(timestamp)); + CX_TEST_ASSERT(cxJsonAsInteger(timestamp) == 1729348561); + + CxJsonValue *alive = cxJsonObjGet(obj, "alive"); + CX_TEST_ASSERT(cxJsonIsBool(alive)); + CX_TEST_ASSERT(cxJsonIsTrue(alive)); + CX_TEST_ASSERT(!cxJsonIsFalse(alive)); + CX_TEST_ASSERT(cxJsonAsBool(alive)); + + // this recursively frees everything else + cxJsonValueFree(obj); + + // we only have one object that already contained all the data + result = cxJsonNext(&json, &obj); + CX_TEST_ASSERT(result == 0); + + cxJsonDestroy(&json); + } +} + +CxTestSuite *cx_test_suite_json(void) { + CxTestSuite *suite = cx_test_suite_new("json"); + + cx_test_register(suite, test_json_simple_object); + + return suite; +} + diff -r 9b9385fcdfd5 -r 10123f4d5618 tests/ucxtest.c --- a/tests/ucxtest.c Sat Oct 19 16:28:15 2024 +0200 +++ b/tests/ucxtest.c Sat Oct 19 17:25:11 2024 +0200 @@ -47,6 +47,7 @@ CxTestSuite *cx_test_suite_mempool(void); CxTestSuite *cx_test_suite_hash_map(void); CxTestSuite *cx_test_suite_properties(void); +CxTestSuite *cx_test_suite_json(void); #define run_tests(suite) cx_test_run_stdout(suite); success += (suite)->success; failure += (suite)->failure #define execute_test_suites(...) unsigned success = 0, failure = 0; CxTestSuite* test_suites[] = {__VA_ARGS__}; \ @@ -75,7 +76,8 @@ cx_test_suite_tree_high_level(), cx_test_suite_mempool(), cx_test_suite_hash_map(), - cx_test_suite_properties() + cx_test_suite_properties(), + cx_test_suite_json() ); printf("=== OVERALL RESULT ===\n"); printf(" Total: %u\n Success: %u\n Failure: %u\n",