| 250 CX_JSON_NO_DATA : CX_JSON_INCOMPLETE_DATA; |
250 CX_JSON_NO_DATA : CX_JSON_INCOMPLETE_DATA; |
| 251 } |
251 } |
| 252 |
252 |
| 253 // current token type and start index |
253 // current token type and start index |
| 254 CxJsonTokenType ttype = json->uncompleted.tokentype; |
254 CxJsonTokenType ttype = json->uncompleted.tokentype; |
| 255 size_t token_start = json->buffer.pos; |
255 size_t token_part_start = json->buffer.pos; |
| 256 |
256 |
| 257 for (size_t i = json->buffer.pos; i < json->buffer.size; i++) { |
257 for (size_t i = json->buffer.pos; i < json->buffer.size; i++) { |
| 258 char c = json->buffer.space[i]; |
258 char c = json->buffer.space[i]; |
| 259 if (ttype != CX_JSON_TOKEN_STRING) { |
259 if (ttype != CX_JSON_TOKEN_STRING) { |
| 260 // currently non-string token |
260 // currently non-string token |
| 264 json->buffer.pos++; |
264 json->buffer.pos++; |
| 265 continue; |
265 continue; |
| 266 } else if (ctype == CX_JSON_TOKEN_STRING) { |
266 } else if (ctype == CX_JSON_TOKEN_STRING) { |
| 267 // begin string |
267 // begin string |
| 268 ttype = CX_JSON_TOKEN_STRING; |
268 ttype = CX_JSON_TOKEN_STRING; |
| 269 token_start = i; |
269 token_part_start = i; |
| 270 } else if (ctype != CX_JSON_NO_TOKEN) { |
270 } else if (ctype != CX_JSON_NO_TOKEN) { |
| 271 // single-char token |
271 // single-char token |
| 272 json->buffer.pos = i + 1; |
272 json->buffer.pos = i + 1; |
| 273 *result = (CxJsonToken){ctype, false, {NULL, 0}}; |
273 *result = (CxJsonToken){ctype, false, {NULL, 0}}; |
| 274 return CX_JSON_NO_ERROR; |
274 return CX_JSON_NO_ERROR; |
| 275 } else { |
275 } else { |
| 276 ttype = CX_JSON_TOKEN_LITERAL; // number or literal |
276 ttype = CX_JSON_TOKEN_LITERAL; // number or literal |
| 277 token_start = i; |
277 token_part_start = i; |
| 278 } |
278 } |
| 279 } else { |
279 } else { |
| 280 // finish token |
280 // finish token |
| 281 if (ctype != CX_JSON_NO_TOKEN) { |
281 if (ctype != CX_JSON_NO_TOKEN) { |
| 282 *result = token_create(json, false, token_start, i); |
282 *result = token_create(json, false, token_part_start, i); |
| 283 if (result->tokentype == CX_JSON_NO_TOKEN) { |
283 if (result->tokentype == CX_JSON_NO_TOKEN) { |
| 284 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE |
284 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE |
| 285 } |
285 } |
| 286 if (result->tokentype == CX_JSON_TOKEN_ERROR) { |
286 if (result->tokentype == CX_JSON_TOKEN_ERROR) { |
| 287 return CX_JSON_FORMAT_ERROR_NUMBER; |
287 return CX_JSON_FORMAT_ERROR_NUMBER; |
| 294 // currently inside a string |
294 // currently inside a string |
| 295 if (json->tokenizer_escape) { |
295 if (json->tokenizer_escape) { |
| 296 json->tokenizer_escape = false; |
296 json->tokenizer_escape = false; |
| 297 } else { |
297 } else { |
| 298 if (c == '"') { |
298 if (c == '"') { |
| 299 *result = token_create(json, true, token_start, i + 1); |
299 *result = token_create(json, true, token_part_start, i + 1); |
| 300 if (result->tokentype == CX_JSON_NO_TOKEN) { |
300 if (result->tokentype == CX_JSON_NO_TOKEN) { |
| 301 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE |
301 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE |
| 302 } |
302 } |
| 303 json->buffer.pos = i + 1; |
303 json->buffer.pos = i + 1; |
| 304 return CX_JSON_NO_ERROR; |
304 return CX_JSON_NO_ERROR; |
| 309 } |
309 } |
| 310 } |
310 } |
| 311 |
311 |
| 312 if (ttype != CX_JSON_NO_TOKEN) { |
312 if (ttype != CX_JSON_NO_TOKEN) { |
| 313 // uncompleted token |
313 // uncompleted token |
| 314 size_t uncompleted_len = json->buffer.size - token_start; |
314 size_t uncompleted_len = json->buffer.size - token_part_start; |
| 315 if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) { |
315 if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) { |
| 316 // current token is uncompleted |
316 // current token is uncompleted |
| 317 // save current token content |
317 // save current token content |
| 318 CxJsonToken uncompleted = { |
318 CxJsonToken uncompleted = { |
| 319 ttype, true, |
319 ttype, true, |
| 320 cx_strdup(cx_strn(json->buffer.space + token_start, uncompleted_len)) |
320 cx_strdup(cx_strn(json->buffer.space + token_part_start, uncompleted_len)) |
| 321 }; |
321 }; |
| 322 if (uncompleted.content.ptr == NULL) { |
322 if (uncompleted.content.ptr == NULL) { |
| 323 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE |
323 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE |
| 324 } |
324 } |
| 325 json->uncompleted = uncompleted; |
325 json->uncompleted = uncompleted; |
| 326 } else { |
326 } else { |
| 327 // previously we also had an uncompleted token |
327 // previously we also had an uncompleted token |
| 328 // combine the uncompleted token with the current token |
328 // combine the uncompleted token with the current token |
| 329 assert(json->uncompleted.allocated); |
329 assert(json->uncompleted.allocated); |
| 330 cxmutstr str = cx_strcat_m(json->uncompleted.content, 1, |
330 cxmutstr str = cx_strcat_m(json->uncompleted.content, 1, |
| 331 cx_strn(json->buffer.space + token_start, uncompleted_len)); |
331 cx_strn(json->buffer.space + token_part_start, uncompleted_len)); |
| 332 if (str.ptr == NULL) { |
332 if (str.ptr == NULL) { |
| 333 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE |
333 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE |
| 334 } |
334 } |
| 335 json->uncompleted.content = str; |
335 json->uncompleted.content = str; |
| 336 } |
336 } |
| 340 |
340 |
| 341 return CX_JSON_INCOMPLETE_DATA; |
341 return CX_JSON_INCOMPLETE_DATA; |
| 342 } |
342 } |
| 343 |
343 |
| 344 static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) { |
344 static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) { |
| 345 // TODO: support more escape sequences |
345 // note: this function expects that str contains the enclosing quotes! |
| 346 // TODO: to be consistent with escape_string() we might want to expect that the enclosing quotes were already removed |
346 |
| 347 cxmutstr result; |
347 cxmutstr result; |
| 348 result.length = 0; |
348 result.length = 0; |
| 349 result.ptr = cxMalloc(a, str.length - 1); |
349 result.ptr = cxMalloc(a, str.length - 1); |
| 350 if (result.ptr == NULL) return result; // LCOV_EXCL_LINE |
350 if (result.ptr == NULL) return result; // LCOV_EXCL_LINE |
| 351 |
351 |
| 356 u = false; |
356 u = false; |
| 357 if (c == 'n') { |
357 if (c == 'n') { |
| 358 c = '\n'; |
358 c = '\n'; |
| 359 } else if (c == 't') { |
359 } else if (c == 't') { |
| 360 c = '\t'; |
360 c = '\t'; |
| 361 } |
361 } else if (c == 'r') { |
| |
362 c = '\r'; |
| |
363 } else if (c == '\\') { |
| |
364 c = '\\'; |
| |
365 } else if (c == '/') { |
| |
366 c = '/'; // always unescape, we don't need settings here |
| |
367 } else if (c == 'f') { |
| |
368 c = '\f'; |
| |
369 } else if (c == 'b') { |
| |
370 c = '\b'; |
| |
371 } |
| |
372 // TODO: support \uXXXX escape sequences |
| |
373 // TODO: discuss the behavior for unrecognized escape sequences |
| |
374 // most parsers throw an error here |
| 362 result.ptr[result.length++] = c; |
375 result.ptr[result.length++] = c; |
| 363 } else { |
376 } else { |
| 364 if (c == '\\') { |
377 if (c == '\\') { |
| 365 u = true; |
378 u = true; |
| 366 } else { |
379 } else { |
| 372 |
385 |
| 373 return result; |
386 return result; |
| 374 } |
387 } |
| 375 |
388 |
| 376 static cxmutstr escape_string(cxmutstr str) { |
389 static cxmutstr escape_string(cxmutstr str) { |
| |
390 // note: this function produces the string without enclosing quotes |
| |
391 // the reason is that we don't want to allocate memory just for that |
| 377 CxBuffer buf = {0}; |
392 CxBuffer buf = {0}; |
| 378 |
393 |
| 379 bool all_printable = true; |
394 bool all_printable = true; |
| 380 for (size_t i = 0; i < str.length; i++) { |
395 for (size_t i = 0; i < str.length; i++) { |
| 381 bool escape = !isprint(str.ptr[i]) |
396 bool escape = !isprint(str.ptr[i]) |