250 CX_JSON_NO_DATA : CX_JSON_INCOMPLETE_DATA; |
250 CX_JSON_NO_DATA : CX_JSON_INCOMPLETE_DATA; |
251 } |
251 } |
252 |
252 |
253 // current token type and start index |
253 // current token type and start index |
254 CxJsonTokenType ttype = json->uncompleted.tokentype; |
254 CxJsonTokenType ttype = json->uncompleted.tokentype; |
255 size_t token_start = json->buffer.pos; |
255 size_t token_part_start = json->buffer.pos; |
256 |
256 |
257 for (size_t i = json->buffer.pos; i < json->buffer.size; i++) { |
257 for (size_t i = json->buffer.pos; i < json->buffer.size; i++) { |
258 char c = json->buffer.space[i]; |
258 char c = json->buffer.space[i]; |
259 if (ttype != CX_JSON_TOKEN_STRING) { |
259 if (ttype != CX_JSON_TOKEN_STRING) { |
260 // currently non-string token |
260 // currently non-string token |
264 json->buffer.pos++; |
264 json->buffer.pos++; |
265 continue; |
265 continue; |
266 } else if (ctype == CX_JSON_TOKEN_STRING) { |
266 } else if (ctype == CX_JSON_TOKEN_STRING) { |
267 // begin string |
267 // begin string |
268 ttype = CX_JSON_TOKEN_STRING; |
268 ttype = CX_JSON_TOKEN_STRING; |
269 token_start = i; |
269 token_part_start = i; |
270 } else if (ctype != CX_JSON_NO_TOKEN) { |
270 } else if (ctype != CX_JSON_NO_TOKEN) { |
271 // single-char token |
271 // single-char token |
272 json->buffer.pos = i + 1; |
272 json->buffer.pos = i + 1; |
273 *result = (CxJsonToken){ctype, false, {NULL, 0}}; |
273 *result = (CxJsonToken){ctype, false, {NULL, 0}}; |
274 return CX_JSON_NO_ERROR; |
274 return CX_JSON_NO_ERROR; |
275 } else { |
275 } else { |
276 ttype = CX_JSON_TOKEN_LITERAL; // number or literal |
276 ttype = CX_JSON_TOKEN_LITERAL; // number or literal |
277 token_start = i; |
277 token_part_start = i; |
278 } |
278 } |
279 } else { |
279 } else { |
280 // finish token |
280 // finish token |
281 if (ctype != CX_JSON_NO_TOKEN) { |
281 if (ctype != CX_JSON_NO_TOKEN) { |
282 *result = token_create(json, false, token_start, i); |
282 *result = token_create(json, false, token_part_start, i); |
283 if (result->tokentype == CX_JSON_NO_TOKEN) { |
283 if (result->tokentype == CX_JSON_NO_TOKEN) { |
284 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE |
284 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE |
285 } |
285 } |
286 if (result->tokentype == CX_JSON_TOKEN_ERROR) { |
286 if (result->tokentype == CX_JSON_TOKEN_ERROR) { |
287 return CX_JSON_FORMAT_ERROR_NUMBER; |
287 return CX_JSON_FORMAT_ERROR_NUMBER; |
294 // currently inside a string |
294 // currently inside a string |
295 if (json->tokenizer_escape) { |
295 if (json->tokenizer_escape) { |
296 json->tokenizer_escape = false; |
296 json->tokenizer_escape = false; |
297 } else { |
297 } else { |
298 if (c == '"') { |
298 if (c == '"') { |
299 *result = token_create(json, true, token_start, i + 1); |
299 *result = token_create(json, true, token_part_start, i + 1); |
300 if (result->tokentype == CX_JSON_NO_TOKEN) { |
300 if (result->tokentype == CX_JSON_NO_TOKEN) { |
301 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE |
301 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE |
302 } |
302 } |
303 json->buffer.pos = i + 1; |
303 json->buffer.pos = i + 1; |
304 return CX_JSON_NO_ERROR; |
304 return CX_JSON_NO_ERROR; |
309 } |
309 } |
310 } |
310 } |
311 |
311 |
312 if (ttype != CX_JSON_NO_TOKEN) { |
312 if (ttype != CX_JSON_NO_TOKEN) { |
313 // uncompleted token |
313 // uncompleted token |
314 size_t uncompleted_len = json->buffer.size - token_start; |
314 size_t uncompleted_len = json->buffer.size - token_part_start; |
315 if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) { |
315 if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) { |
316 // current token is uncompleted |
316 // current token is uncompleted |
317 // save current token content |
317 // save current token content |
318 CxJsonToken uncompleted = { |
318 CxJsonToken uncompleted = { |
319 ttype, true, |
319 ttype, true, |
320 cx_strdup(cx_strn(json->buffer.space + token_start, uncompleted_len)) |
320 cx_strdup(cx_strn(json->buffer.space + token_part_start, uncompleted_len)) |
321 }; |
321 }; |
322 if (uncompleted.content.ptr == NULL) { |
322 if (uncompleted.content.ptr == NULL) { |
323 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE |
323 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE |
324 } |
324 } |
325 json->uncompleted = uncompleted; |
325 json->uncompleted = uncompleted; |
326 } else { |
326 } else { |
327 // previously we also had an uncompleted token |
327 // previously we also had an uncompleted token |
328 // combine the uncompleted token with the current token |
328 // combine the uncompleted token with the current token |
329 assert(json->uncompleted.allocated); |
329 assert(json->uncompleted.allocated); |
330 cxmutstr str = cx_strcat_m(json->uncompleted.content, 1, |
330 cxmutstr str = cx_strcat_m(json->uncompleted.content, 1, |
331 cx_strn(json->buffer.space + token_start, uncompleted_len)); |
331 cx_strn(json->buffer.space + token_part_start, uncompleted_len)); |
332 if (str.ptr == NULL) { |
332 if (str.ptr == NULL) { |
333 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE |
333 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE |
334 } |
334 } |
335 json->uncompleted.content = str; |
335 json->uncompleted.content = str; |
336 } |
336 } |
340 |
340 |
341 return CX_JSON_INCOMPLETE_DATA; |
341 return CX_JSON_INCOMPLETE_DATA; |
342 } |
342 } |
343 |
343 |
344 static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) { |
344 static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) { |
345 // TODO: support more escape sequences |
345 // note: this function expects that str contains the enclosing quotes! |
346 // TODO: to be consistent with escape_string() we might want to expect that the enclosing quotes were already removed |
346 |
347 cxmutstr result; |
347 cxmutstr result; |
348 result.length = 0; |
348 result.length = 0; |
349 result.ptr = cxMalloc(a, str.length - 1); |
349 result.ptr = cxMalloc(a, str.length - 1); |
350 if (result.ptr == NULL) return result; // LCOV_EXCL_LINE |
350 if (result.ptr == NULL) return result; // LCOV_EXCL_LINE |
351 |
351 |
356 u = false; |
356 u = false; |
357 if (c == 'n') { |
357 if (c == 'n') { |
358 c = '\n'; |
358 c = '\n'; |
359 } else if (c == 't') { |
359 } else if (c == 't') { |
360 c = '\t'; |
360 c = '\t'; |
361 } |
361 } else if (c == 'r') { |
|
362 c = '\r'; |
|
363 } else if (c == '\\') { |
|
364 c = '\\'; |
|
365 } else if (c == '/') { |
|
366 c = '/'; // always unescape, we don't need settings here |
|
367 } else if (c == 'f') { |
|
368 c = '\f'; |
|
369 } else if (c == 'b') { |
|
370 c = '\b'; |
|
371 } |
|
372 // TODO: support \uXXXX escape sequences |
|
373 // TODO: discuss the behavior for unrecognized escape sequences |
|
374 // most parsers throw an error here |
362 result.ptr[result.length++] = c; |
375 result.ptr[result.length++] = c; |
363 } else { |
376 } else { |
364 if (c == '\\') { |
377 if (c == '\\') { |
365 u = true; |
378 u = true; |
366 } else { |
379 } else { |
372 |
385 |
373 return result; |
386 return result; |
374 } |
387 } |
375 |
388 |
376 static cxmutstr escape_string(cxmutstr str) { |
389 static cxmutstr escape_string(cxmutstr str) { |
|
390 // note: this function produces the string without enclosing quotes |
|
391 // the reason is that we don't want to allocate memory just for that |
377 CxBuffer buf = {0}; |
392 CxBuffer buf = {0}; |
378 |
393 |
379 bool all_printable = true; |
394 bool all_printable = true; |
380 for (size_t i = 0; i < str.length; i++) { |
395 for (size_t i = 0; i < str.length; i++) { |
381 bool escape = !isprint(str.ptr[i]) |
396 bool escape = !isprint(str.ptr[i]) |