351 } |
351 } |
352 |
352 |
353 return CX_JSON_INCOMPLETE_DATA; |
353 return CX_JSON_INCOMPLETE_DATA; |
354 } |
354 } |
355 |
355 |
|
356 static int codepoint_to_utf8(uint32_t codepoint, char *output_buf) { |
|
357 if (codepoint <= 0x7F) { |
|
358 *output_buf = (char)codepoint; |
|
359 return 1; |
|
360 } else if (codepoint <= 0x7FF) { |
|
361 output_buf[0] = (char)(0xC0 | ((codepoint >> 6) & 0x1F)); |
|
362 output_buf[1] = (char)(0x80 | (codepoint & 0x3F)); |
|
363 return 2; |
|
364 } else if (codepoint <= 0xFFFF) { |
|
365 output_buf[0] = (char)(0xE0 | ((codepoint >> 12) & 0x0F)); |
|
366 output_buf[1] = (char)(0x80 | ((codepoint >> 6) & 0x3F)); |
|
367 output_buf[2] = (char)(0x80 | (codepoint & 0x3F)); |
|
368 return 3; |
|
369 } |
|
370 |
|
371 return 0; |
|
372 } |
|
373 |
356 static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) { |
374 static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) { |
357 // note: this function expects that str contains the enclosing quotes! |
375 // note: this function expects that str contains the enclosing quotes! |
358 |
376 |
359 cxmutstr result; |
377 cxmutstr result; |
360 result.length = 0; |
378 result.length = 0; |
378 c = '/'; // always unescape, we don't need settings here |
396 c = '/'; // always unescape, we don't need settings here |
379 } else if (c == 'f') { |
397 } else if (c == 'f') { |
380 c = '\f'; |
398 c = '\f'; |
381 } else if (c == 'b') { |
399 } else if (c == 'b') { |
382 c = '\b'; |
400 c = '\b'; |
383 } |
401 } else if (c == 'u') { |
384 // TODO: support \uXXXX escape sequences |
402 if (i+4 < str.length) { |
|
403 cxstring codepoint_str = { str.ptr + i + 1, 4}; |
|
404 uint32_t codepoint; |
|
405 if(!cx_strtou32_lc_(codepoint_str, &codepoint, 16, "")) { |
|
406 char utf8buf[4]; |
|
407 int utf8len = codepoint_to_utf8(codepoint, utf8buf); |
|
408 if(utf8len > 0) { |
|
409 // add all bytes from utf8buf expect the last char |
|
410 // to the result |
|
411 utf8len--; |
|
412 c = utf8buf[utf8len]; |
|
413 for(int i=0;i<utf8len;i++) { |
|
414 result.ptr[result.length++] = utf8buf[i]; |
|
415 } |
|
416 } |
|
417 i += 4; |
|
418 } |
|
419 } |
|
420 } |
|
421 |
385 // TODO: discuss the behavior for unrecognized escape sequences |
422 // TODO: discuss the behavior for unrecognized escape sequences |
386 // most parsers throw an error here |
423 // most parsers throw an error here |
387 result.ptr[result.length++] = c; |
424 result.ptr[result.length++] = c; |
388 } else { |
425 } else { |
389 if (c == '\\') { |
426 if (c == '\\') { |