73 } |
73 } |
74 } |
74 } |
75 return 1; |
75 return 1; |
76 } |
76 } |
77 |
77 |
|
78 static size_t check_number(const char *str) { |
|
79 /* this function is not precise, but a good over-approximation */ |
|
80 size_t i = 0; |
|
81 if (str[0] == '+' || str[0] == '-') { |
|
82 i++; |
|
83 } |
|
84 bool hex = str[i] == '0' && (str[i + 1] == 'x' || str[i + 1] == 'X'); |
|
85 bool bin = str[i] == '0' && (str[i + 1] == 'b' || str[i + 1] == 'B'); |
|
86 if (hex || bin) { |
|
87 i += 2; |
|
88 } |
|
89 bool flt = false; |
|
90 bool exp = false; |
|
91 bool dot = false; |
|
92 bool digit_seen = false; |
|
93 if (str[i] == '.') { |
|
94 dot = true; |
|
95 flt = true; |
|
96 i++; |
|
97 } |
|
98 char exp_char_low = hex ? 'p' : 'e'; |
|
99 char exp_char_up = hex ? 'P' : 'E'; |
|
100 while (str[i] != '\0' && str[i] != '\n') { |
|
101 /* ignore grouping char */ |
|
102 if (str[i] == '\'') { |
|
103 i++; |
|
104 continue; |
|
105 } |
|
106 /* binary is always integer, nothing else allowed */ |
|
107 if (bin) { |
|
108 if (str[i] != '0' && str[i] != '1') { |
|
109 break; |
|
110 } else { |
|
111 i++; |
|
112 digit_seen = true; |
|
113 } |
|
114 } else { |
|
115 /* detect decimal and exponent separators */ |
|
116 if ((!dot && str[i] == '.') || |
|
117 (!exp && digit_seen && |
|
118 (str[i] == exp_char_low || str[i] == exp_char_up) |
|
119 ) |
|
120 ) { |
|
121 if (str[i] == '.') { |
|
122 dot = true; |
|
123 } else { |
|
124 exp = true; |
|
125 /* a sign may directly follow */ |
|
126 if (str[i+1] == '+' || str[i+1] == '-') { |
|
127 i++; |
|
128 } |
|
129 } |
|
130 flt = true; |
|
131 i++; |
|
132 continue; |
|
133 } |
|
134 /* check for allowed digits */ |
|
135 if ((str[i] >= '0' && str[i] <= '9') || (hex && ( |
|
136 (str[i] >= 'a' && str[i] <= 'f') |
|
137 || (str[i] >= 'A' && str[i] <= 'F') |
|
138 ))) { |
|
139 digit_seen = true; |
|
140 i++; |
|
141 } else { |
|
142 break; |
|
143 } |
|
144 } |
|
145 } |
|
146 /* have we seen at least one digit? */ |
|
147 if (!digit_seen) return 0; |
|
148 |
|
149 /* check if we are already done (over-approximation) */ |
|
150 if (!isalpha(str[i])) return i; |
|
151 |
|
152 /* check suffixes (must check with decreasing length) */ |
|
153 const char *const flt_suffixes[] = { |
|
154 "f128", "bf16", "F128", "BF16", |
|
155 "f16", "f32", "f64", "F16", "F32", "F64", |
|
156 "df", "DF", "dd", "DD", "dl", "DL", |
|
157 "d", "D", "f", "l", "F", "L", |
|
158 }; |
|
159 const unsigned flt_suffixes_len = 22; |
|
160 const char *const int_suffixes[] = { |
|
161 "ull", "ULL", |
|
162 "ul", "UL", "ll", "LL", "wb", "WB", |
|
163 "u", "U", "l", "L", |
|
164 }; |
|
165 const unsigned int_suffixes_len = 12; |
|
166 const char * const *allowed_suffixes = flt ? flt_suffixes : int_suffixes; |
|
167 const unsigned allowed_suffixes_len = flt ? flt_suffixes_len : int_suffixes_len; |
|
168 for (unsigned j = 0 ; j < allowed_suffixes_len ; j++) { |
|
169 cxstring suffix = cx_str(allowed_suffixes[j]); |
|
170 const char *testee = str+i; |
|
171 if (memcmp(testee, suffix.ptr, suffix.length) == 0) { |
|
172 return i+suffix.length; |
|
173 } |
|
174 } |
|
175 /* no suffix matched */ |
|
176 return 0; |
|
177 } |
|
178 |
78 /* Plaintext Highlighter */ |
179 /* Plaintext Highlighter */ |
79 |
180 |
80 void c2html_plain_highlighter(char const *src, CxBuffer *dest, |
181 void c2html_plain_highlighter(char const *src, CxBuffer *dest, |
81 c2html_highlighter_data *hd) { |
182 c2html_highlighter_data *hd) { |
82 while (*src && *src != '\n') { |
183 while (*src && *src != '\n') { |
244 put_htmlescaped(dest, c); |
345 put_htmlescaped(dest, c); |
245 } |
346 } |
246 } else { |
347 } else { |
247 if (isstring) { |
348 if (isstring) { |
248 put_htmlescaped(dest, c); |
349 put_htmlescaped(dest, c); |
|
350 } else if (wbuf->size == 0 && |
|
351 (isdigit(c) || c == '+' || c == '-' || c == '.') |
|
352 ) { |
|
353 /* might be a number */ |
|
354 size_t numlen = check_number(src+sp); |
|
355 if (numlen > 0) { |
|
356 start_span("number"); |
|
357 put_htmlescapedstr(dest, cx_strn(src+sp, numlen)); |
|
358 stop_span; |
|
359 sp += numlen - 1; |
|
360 c = src[sp]; |
|
361 continue; |
|
362 } else { |
|
363 /* start a new buffered word */ |
|
364 cxBufferPut(wbuf, c); |
|
365 } |
249 } else if (isalnum(c) || c == '_' || c == '#') { |
366 } else if (isalnum(c) || c == '_' || c == '#') { |
250 /* buffer the current word */ |
367 /* buffer the current word */ |
251 cxBufferPut(wbuf, c); |
368 cxBufferPut(wbuf, c); |
252 } else { |
369 } else { |
253 /* write buffered word, if any */ |
370 /* write buffered word, if any */ |
365 put_htmlescaped(dest, c); |
488 put_htmlescaped(dest, c); |
366 } |
489 } |
367 } else { |
490 } else { |
368 if (isstring) { |
491 if (isstring) { |
369 put_htmlescaped(dest, c); |
492 put_htmlescaped(dest, c); |
|
493 } else if (wbuf->size == 0 && |
|
494 (isdigit(c) || c == '+' || c == '-' || c == '.') |
|
495 ) { |
|
496 /* might be a number */ |
|
497 size_t numlen = check_number(src+sp); |
|
498 if (numlen > 0) { |
|
499 cxBufferPutString(dest, |
|
500 "<span class=\"c2html-number\">"); |
|
501 put_htmlescapedstr(dest, cx_strn(src+sp, numlen)); |
|
502 cxBufferPutString(dest, "</span>"); |
|
503 sp += numlen - 1; |
|
504 c = src[sp]; |
|
505 continue; |
|
506 } else { |
|
507 /* start a new buffered word */ |
|
508 cxBufferPut(wbuf, c); |
|
509 } |
370 } else if (isalnum(c) || c == '_' || c == '@') { |
510 } else if (isalnum(c) || c == '_' || c == '@') { |
371 /* buffer the current word */ |
511 /* buffer the current word */ |
372 cxBufferPut(wbuf, c); |
512 cxBufferPut(wbuf, c); |
373 } else { |
513 } else { |
374 /* write buffered word, if any */ |
514 /* write buffered word, if any */ |