| 73 } |
73 } |
| 74 } |
74 } |
| 75 return 1; |
75 return 1; |
| 76 } |
76 } |
| 77 |
77 |
| |
78 static size_t check_number(const char *str) { |
| |
79 /* this function is not precise, but a good over-approximation */ |
| |
80 size_t i = 0; |
| |
81 if (str[0] == '+' || str[0] == '-') { |
| |
82 i++; |
| |
83 } |
| |
84 bool hex = str[i] == '0' && (str[i + 1] == 'x' || str[i + 1] == 'X'); |
| |
85 bool bin = str[i] == '0' && (str[i + 1] == 'b' || str[i + 1] == 'B'); |
| |
86 if (hex || bin) { |
| |
87 i += 2; |
| |
88 } |
| |
89 bool flt = false; |
| |
90 bool exp = false; |
| |
91 bool dot = false; |
| |
92 bool digit_seen = false; |
| |
93 if (str[i] == '.') { |
| |
94 dot = true; |
| |
95 flt = true; |
| |
96 i++; |
| |
97 } |
| |
98 char exp_char_low = hex ? 'p' : 'e'; |
| |
99 char exp_char_up = hex ? 'P' : 'E'; |
| |
100 while (str[i] != '\0' && str[i] != '\n') { |
| |
101 /* ignore grouping char */ |
| |
102 if (str[i] == '\'') { |
| |
103 i++; |
| |
104 continue; |
| |
105 } |
| |
106 /* binary is always integer, nothing else allowed */ |
| |
107 if (bin) { |
| |
108 if (str[i] != '0' && str[i] != '1') { |
| |
109 break; |
| |
110 } else { |
| |
111 i++; |
| |
112 digit_seen = true; |
| |
113 } |
| |
114 } else { |
| |
115 /* detect decimal and exponent separators */ |
| |
116 if ((!dot && str[i] == '.') || |
| |
117 (!exp && digit_seen && |
| |
118 (str[i] == exp_char_low || str[i] == exp_char_up) |
| |
119 ) |
| |
120 ) { |
| |
121 if (str[i] == '.') { |
| |
122 dot = true; |
| |
123 } else { |
| |
124 exp = true; |
| |
125 /* a sign may directly follow */ |
| |
126 if (str[i+1] == '+' || str[i+1] == '-') { |
| |
127 i++; |
| |
128 } |
| |
129 } |
| |
130 flt = true; |
| |
131 i++; |
| |
132 continue; |
| |
133 } |
| |
134 /* check for allowed digits */ |
| |
135 if ((str[i] >= '0' && str[i] <= '9') || (hex && ( |
| |
136 (str[i] >= 'a' && str[i] <= 'f') |
| |
137 || (str[i] >= 'A' && str[i] <= 'F') |
| |
138 ))) { |
| |
139 digit_seen = true; |
| |
140 i++; |
| |
141 } else { |
| |
142 break; |
| |
143 } |
| |
144 } |
| |
145 } |
| |
146 /* have we seen at least one digit? */ |
| |
147 if (!digit_seen) return 0; |
| |
148 |
| |
149 /* check if we are already done (over-approximation) */ |
| |
150 if (!isalpha(str[i])) return i; |
| |
151 |
| |
152 /* check suffixes (must check with decreasing length) */ |
| |
153 const char *const flt_suffixes[] = { |
| |
154 "f128", "bf16", "F128", "BF16", |
| |
155 "f16", "f32", "f64", "F16", "F32", "F64", |
| |
156 "df", "DF", "dd", "DD", "dl", "DL", |
| |
157 "d", "D", "f", "l", "F", "L", |
| |
158 }; |
| |
159 const unsigned flt_suffixes_len = 22; |
| |
160 const char *const int_suffixes[] = { |
| |
161 "ull", "ULL", |
| |
162 "ul", "UL", "ll", "LL", "wb", "WB", |
| |
163 "u", "U", "l", "L", |
| |
164 }; |
| |
165 const unsigned int_suffixes_len = 12; |
| |
166 const char * const *allowed_suffixes = flt ? flt_suffixes : int_suffixes; |
| |
167 const unsigned allowed_suffixes_len = flt ? flt_suffixes_len : int_suffixes_len; |
| |
168 for (unsigned j = 0 ; j < allowed_suffixes_len ; j++) { |
| |
169 cxstring suffix = cx_str(allowed_suffixes[j]); |
| |
170 const char *testee = str+i; |
| |
171 if (memcmp(testee, suffix.ptr, suffix.length) == 0) { |
| |
172 return i+suffix.length; |
| |
173 } |
| |
174 } |
| |
175 /* no suffix matched */ |
| |
176 return 0; |
| |
177 } |
| |
178 |
| 78 /* Plaintext Highlighter */ |
179 /* Plaintext Highlighter */ |
| 79 |
180 |
| 80 void c2html_plain_highlighter(char const *src, CxBuffer *dest, |
181 void c2html_plain_highlighter(char const *src, CxBuffer *dest, |
| 81 c2html_highlighter_data *hd) { |
182 c2html_highlighter_data *hd) { |
| 82 while (*src && *src != '\n') { |
183 while (*src && *src != '\n') { |
| 244 put_htmlescaped(dest, c); |
345 put_htmlescaped(dest, c); |
| 245 } |
346 } |
| 246 } else { |
347 } else { |
| 247 if (isstring) { |
348 if (isstring) { |
| 248 put_htmlescaped(dest, c); |
349 put_htmlescaped(dest, c); |
| |
350 } else if (wbuf->size == 0 && |
| |
351 (isdigit(c) || c == '+' || c == '-' || c == '.') |
| |
352 ) { |
| |
353 /* might be a number */ |
| |
354 size_t numlen = check_number(src+sp); |
| |
355 if (numlen > 0) { |
| |
356 start_span("number"); |
| |
357 put_htmlescapedstr(dest, cx_strn(src+sp, numlen)); |
| |
358 stop_span; |
| |
359 sp += numlen - 1; |
| |
360 c = src[sp]; |
| |
361 continue; |
| |
362 } else { |
| |
363 /* start a new buffered word */ |
| |
364 cxBufferPut(wbuf, c); |
| |
365 } |
| 249 } else if (isalnum(c) || c == '_' || c == '#') { |
366 } else if (isalnum(c) || c == '_' || c == '#') { |
| 250 /* buffer the current word */ |
367 /* buffer the current word */ |
| 251 cxBufferPut(wbuf, c); |
368 cxBufferPut(wbuf, c); |
| 252 } else { |
369 } else { |
| 253 /* write buffered word, if any */ |
370 /* write buffered word, if any */ |
| 365 put_htmlescaped(dest, c); |
488 put_htmlescaped(dest, c); |
| 366 } |
489 } |
| 367 } else { |
490 } else { |
| 368 if (isstring) { |
491 if (isstring) { |
| 369 put_htmlescaped(dest, c); |
492 put_htmlescaped(dest, c); |
| |
493 } else if (wbuf->size == 0 && |
| |
494 (isdigit(c) || c == '+' || c == '-' || c == '.') |
| |
495 ) { |
| |
496 /* might be a number */ |
| |
497 size_t numlen = check_number(src+sp); |
| |
498 if (numlen > 0) { |
| |
499 cxBufferPutString(dest, |
| |
500 "<span class=\"c2html-number\">"); |
| |
501 put_htmlescapedstr(dest, cx_strn(src+sp, numlen)); |
| |
502 cxBufferPutString(dest, "</span>"); |
| |
503 sp += numlen - 1; |
| |
504 c = src[sp]; |
| |
505 continue; |
| |
506 } else { |
| |
507 /* start a new buffered word */ |
| |
508 cxBufferPut(wbuf, c); |
| |
509 } |
| 370 } else if (isalnum(c) || c == '_' || c == '@') { |
510 } else if (isalnum(c) || c == '_' || c == '@') { |
| 371 /* buffer the current word */ |
511 /* buffer the current word */ |
| 372 cxBufferPut(wbuf, c); |
512 cxBufferPut(wbuf, c); |
| 373 } else { |
513 } else { |
| 374 /* write buffered word, if any */ |
514 /* write buffered word, if any */ |