# HG changeset patch # User Mike Becker # Date 1736547068 -3600 # Node ID ff4d7e76f85a3a2d794d0ccfff7a4e4a275ca1ef # Parent 9fa87f9882babc60f88529451a79d3b48f9812af implement string escape - resolves #526 diff -r 9fa87f9882ba -r ff4d7e76f85a src/json.c --- a/src/json.c Fri Jan 10 15:03:58 2025 +0100 +++ b/src/json.c Fri Jan 10 23:11:08 2025 +0100 @@ -345,7 +345,7 @@ static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) { // TODO: support more escape sequences - // we know that the unescaped string will be shorter by at least 2 chars + // TODO: to be consistent with escape_string() we might want to expect that the enclosing quotes were already removed cxmutstr result; result.length = 0; result.ptr = cxMalloc(a, str.length - 1); @@ -375,6 +375,60 @@ return result; } +static cxmutstr escape_string(cxmutstr str) { + CxBuffer buf = {0}; + + bool all_printable = true; + for (size_t i = 0; i < str.length; i++) { + bool escape = !isprint(str.ptr[i]) + || str.ptr[i] == '\\' + || str.ptr[i] == '"' + // TODO: make escaping slash optional + || str.ptr[i] == '/'; + + if (all_printable && escape) { + size_t capa = str.length + 32; + char *space = malloc(capa); + if (space == NULL) return cx_mutstrn(NULL, 0); + cxBufferInit(&buf, space, capa, NULL, CX_BUFFER_AUTO_EXTEND); + cxBufferWrite(str.ptr, 1, i, &buf); + all_printable = false; + } + if (escape) { + cxBufferPut(&buf, '\\'); + if (str.ptr[i] == '\"') { + cxBufferPut(&buf, '\"'); + } else if (str.ptr[i] == '\n') { + cxBufferPut(&buf, 'n'); + } else if (str.ptr[i] == '\t') { + cxBufferPut(&buf, 't'); + } else if (str.ptr[i] == '\r') { + cxBufferPut(&buf, 'r'); + } else if (str.ptr[i] == '\\') { + cxBufferPut(&buf, '\\'); + } else if (str.ptr[i] == '/') { + cxBufferPut(&buf, '/'); + } else if (str.ptr[i] == '\f') { + cxBufferPut(&buf, 'f'); + } else if (str.ptr[i] == '\b') { + cxBufferPut(&buf, 'b'); + } else { + char code[6]; + snprintf(code, sizeof(code), "u%04x", + (unsigned int)(0xff & str.ptr[i])); + cxBufferPutString(&buf, code); + } + } else if (!all_printable) { + cxBufferPut(&buf, str.ptr[i]); + } + } + if (!all_printable) { + str = cx_mutstrn(buf.space, buf.size); + } + cxBufferDestroy(&buf); + return str; +} + static CxJsonValue* create_json_value(CxJson *json, CxJsonValueType type) { CxJsonValue *v = cxCalloc(json->allocator, 1, sizeof(CxJsonValue)); if (v == NULL) return NULL; // LCOV_EXCL_LINE @@ -1084,9 +1138,11 @@ // the name actual += wfunc("\"", 1, 1, target); - // TODO: escape the string - actual += wfunc(member->name.ptr, 1, - member->name.length, target); + cxmutstr name = escape_string(member->name); + actual += wfunc(name.ptr, 1, name.length, target); + if (name.ptr != member->name.ptr) { + cx_strfree(&name); + } actual += wfunc("\"", 1, 1, target); const char *obj_name_sep = ": "; if (settings->pretty) { @@ -1152,9 +1208,11 @@ } case CX_JSON_STRING: { actual += wfunc("\"", 1, 1, target); - // TODO: escape the string - actual += wfunc(value->value.string.ptr, 1, - value->value.string.length, target); + cxmutstr str = escape_string(value->value.string); + actual += wfunc(str.ptr, 1, str.length, target); + if (str.ptr != value->value.string.ptr) { + cx_strfree(&str); + } actual += wfunc("\"", 1, 1, target); expected += 2 + value->value.string.length; break; diff -r 9fa87f9882ba -r ff4d7e76f85a tests/test_json.c --- a/tests/test_json.c Fri Jan 10 15:03:58 2025 +0100 +++ b/tests/test_json.c Fri Jan 10 23:11:08 2025 +0100 @@ -894,7 +894,7 @@ cxBufferInit(&buf, NULL, 32, NULL, 0); CX_TEST_DO { // test default settings (6 digits) - cxJsonWrite(&buf,num, cxBufferWriteFunc, &writer); + cxJsonWrite(&buf, num, cxBufferWriteFunc, &writer); CX_TEST_ASSERT(0 == cx_strcmp(cx_strn(buf.space, buf.size), CX_STR("3.141592"))); // test too many digits @@ -939,6 +939,50 @@ cxJsonValueFree(num); } +CX_TEST(test_json_write_string_escape) { + /** + * According to RFC-8259 we have to test the following characters: + * " quotation mark + * \ reverse solidus + * / solidus + * b backspace + * f form feed + * n line feed + * r carriage return + * t tab + * And all other control characters must be encoded uXXXX - in our example the bell character. + * Also, all unicode characters are encoded that way - in our example the 'ö'. + */ + CxJsonValue* str = cxJsonCreateString(NULL, + "hello\twörld\r\nthis/is\\a \"string\"\b in \a string\f"); + CxJsonWriter writer = cxJsonWriterCompact(); + CxBuffer buf; + cxBufferInit(&buf, NULL, 128, NULL, 0); + CX_TEST_DO { + cxJsonWrite(&buf, str, cxBufferWriteFunc, &writer); + CX_TEST_ASSERT(0 == cx_strcmp(cx_strn(buf.space, buf.size), + CX_STR("\"hello\\tw\\u00c3\\u00b6rld\\r\\nthis\\/is\\\\a \\\"string\\\"\\b in \\u0007 string\\f\""))); + } + cxBufferDestroy(&buf); + cxJsonValueFree(str); +} + +CX_TEST(test_json_write_name_escape) { + CxJsonValue* obj = cxJsonCreateObj(NULL); + cxJsonObjPutLiteral(obj, + CX_STR("hello\twörld\r\nthis/is\\a \"string\"\b in \a string\f"), CX_JSON_TRUE); + CxJsonWriter writer = cxJsonWriterCompact(); + CxBuffer buf; + cxBufferInit(&buf, NULL, 128, NULL, 0); + CX_TEST_DO { + cxJsonWrite(&buf, obj, cxBufferWriteFunc, &writer); + CX_TEST_ASSERT(0 == cx_strcmp(cx_strn(buf.space, buf.size), + CX_STR("{\"hello\\tw\\u00c3\\u00b6rld\\r\\nthis\\/is\\\\a \\\"string\\\"\\b in \\u0007 string\\f\":true}"))); + } + cxBufferDestroy(&buf); + cxJsonValueFree(obj); +} + CxTestSuite *cx_test_suite_json(void) { CxTestSuite *suite = cx_test_suite_new("json"); @@ -962,6 +1006,8 @@ cx_test_register(suite, test_json_write_pretty_default_tabs); cx_test_register(suite, test_json_write_pretty_preserve_order); cx_test_register(suite, test_json_write_frac_max_digits); + cx_test_register(suite, test_json_write_string_escape); + cx_test_register(suite, test_json_write_name_escape); return suite; }