|    351     } | 
   351     } | 
|    352  | 
   352  | 
|    353     return CX_JSON_INCOMPLETE_DATA; | 
   353     return CX_JSON_INCOMPLETE_DATA; | 
|    354 } | 
   354 } | 
|    355  | 
   355  | 
|         | 
   356 // converts a unicode (up to U+FFFF) codepoint to utf8 | 
|    356 static int codepoint_to_utf8(uint32_t codepoint, char *output_buf) { | 
   357 static int codepoint_to_utf8(uint32_t codepoint, char *output_buf) { | 
|    357     if (codepoint <= 0x7F) { | 
   358     if (codepoint <= 0x7F) { | 
|    358         *output_buf = (char)codepoint; | 
   359         *output_buf = (char)codepoint; | 
|    359         return 1; | 
   360         return 1; | 
|    360     } else if (codepoint <= 0x7FF) { | 
   361     } else if (codepoint <= 0x7FF) { | 
|    364     } else if (codepoint <= 0xFFFF) { | 
   365     } else if (codepoint <= 0xFFFF) { | 
|    365         output_buf[0] = (char)(0xE0 | ((codepoint >> 12) & 0x0F)); | 
   366         output_buf[0] = (char)(0xE0 | ((codepoint >> 12) & 0x0F)); | 
|    366         output_buf[1] = (char)(0x80 | ((codepoint >> 6) & 0x3F)); | 
   367         output_buf[1] = (char)(0x80 | ((codepoint >> 6) & 0x3F)); | 
|    367         output_buf[2] = (char)(0x80 | (codepoint & 0x3F)); | 
   368         output_buf[2] = (char)(0x80 | (codepoint & 0x3F)); | 
|    368         return 3; | 
   369         return 3; | 
|         | 
   370     } else if (codepoint <= 0x10FFFF) { | 
|         | 
   371         output_buf[0] = (char)(0xF0 | ((codepoint >> 18) & 0x07)); | 
|         | 
   372         output_buf[1] = (char)(0x80 | ((codepoint >> 12) & 0x3F)); | 
|         | 
   373         output_buf[2] = (char)(0x80 | ((codepoint >> 6) & 0x3F)); | 
|         | 
   374         output_buf[3] = (char)(0x80 | (codepoint & 0x3F)); | 
|         | 
   375         return 4; | 
|    369     } | 
   376     } | 
|    370      | 
   377      | 
|    371     return 0; | 
   378     return 0; | 
|         | 
   379 } | 
|         | 
   380  | 
|         | 
   381 // converts a utf16 surrogate pair to utf8 | 
|         | 
   382 static inline uint32_t utf16pair_to_codepoint(uint16_t c0, uint16_t c1) { | 
|         | 
   383     return ((c0 - 0xD800) << 10) + (c1 - 0xDC00) + 0x10000; | 
|    372 } | 
   384 } | 
|    373  | 
   385  | 
|    374 static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) { | 
   386 static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) { | 
|    375     // note: this function expects that str contains the enclosing quotes! | 
   387     // note: this function expects that str contains the enclosing quotes! | 
|    376  | 
   388  | 
|    400                 c = '\b'; | 
   412                 c = '\b'; | 
|    401             } else if (c == 'u') { | 
   413             } else if (c == 'u') { | 
|    402                 if (i+4 < str.length - 1) { | 
   414                 if (i+4 < str.length - 1) { | 
|    403                     cxstring codepoint_str = { str.ptr + i + 1, 4}; | 
   415                     cxstring codepoint_str = { str.ptr + i + 1, 4}; | 
|    404                     uint32_t codepoint; | 
   416                     uint32_t codepoint; | 
|    405                     if(!cx_strtou32_lc(codepoint_str, &codepoint, 16, "")) { | 
   417                     if (!cx_strtou32_lc(codepoint_str, &codepoint, 16, "")) { | 
|    406                         char utf8buf[4]; | 
   418                         char utf8buf[4]; | 
|    407                         int utf8len = codepoint_to_utf8(codepoint, utf8buf); | 
   419                         int utf8len = 0; | 
|         | 
   420                         if (codepoint >= 0xD800 && codepoint <= 0xDFFF) { | 
|         | 
   421                             // character is encoded as a surrogate pair | 
|         | 
   422                             // get next 6 bytes | 
|         | 
   423                             if (i + 10 < str.length - 1) { | 
|         | 
   424                                 char *surrogate2 = str.ptr+i+5; | 
|         | 
   425                                 if (surrogate2[0] == '\\' && surrogate2[1] == 'u') { | 
|         | 
   426                                     cxstring c2_str = { surrogate2 + 2, 4 }; | 
|         | 
   427                                     uint32_t c2; | 
|         | 
   428                                     if (!cx_strtou32_lc(c2_str, &c2, 16, "")) { | 
|         | 
   429                                         codepoint = utf16pair_to_codepoint((uint16_t)codepoint, c2); | 
|         | 
   430                                         utf8len = codepoint_to_utf8(codepoint, utf8buf); | 
|         | 
   431                                         i += 6; | 
|         | 
   432                                     } | 
|         | 
   433                                 } | 
|         | 
   434                             } | 
|         | 
   435                         } else { | 
|         | 
   436                             // character is in the Basic Multilingual Plane | 
|         | 
   437                             // and encoded as a single utf16 char | 
|         | 
   438                             utf8len = codepoint_to_utf8(codepoint, utf8buf); | 
|         | 
   439                         } | 
|    408                         if(utf8len > 0) { | 
   440                         if(utf8len > 0) { | 
|    409                             // add all bytes from utf8buf expect the last char | 
   441                             // add all bytes from utf8buf expect the last char | 
|    410                             // to the result | 
   442                             // to the result | 
|    411                             utf8len--; | 
   443                             utf8len--; | 
|    412                             c = utf8buf[utf8len]; | 
   444                             c = utf8buf[utf8len]; | 
|    413                             for(int i=0;i<utf8len;i++) { | 
   445                             for(int x=0;x<utf8len;x++) { | 
|    414                                 result.ptr[result.length++] = utf8buf[i]; | 
   446                                 result.ptr[result.length++] = utf8buf[x]; | 
|    415                             } | 
   447                             } | 
|    416                         } | 
   448                         } | 
|    417                         i += 4; | 
   449                         i += 4; | 
|    418                     } | 
   450                     } | 
|    419                 } | 
   451                 } |