c2html: comparison src/highlighter.c

-:98adda6171d1
+:2c8514b3891b
 }
 }
 return 1;
 }
+static size_t check_number(const char *str) {
+/* this function is not precise, but a good over-approximation */
+size_t i = 0;
+if (str[0] == '+' || str[0] == '-') {
+i++;
+}
+bool hex = str[i] == '0' && (str[i + 1] == 'x' || str[i + 1] == 'X');
+bool bin = str[i] == '0' && (str[i + 1] == 'b' || str[i + 1] == 'B');
+if (hex || bin) {
+i += 2;
+}
+bool flt = false;
+bool exp = false;
+bool dot = false;
+bool digit_seen = false;
+if (str[i] == '.') {
+dot = true;
+flt = true;
+i++;
+}
+char exp_char_low = hex ? 'p' : 'e';
+char exp_char_up = hex ? 'P' : 'E';
+while (str[i] != '\0' && str[i] != '\n') {
+/* ignore grouping char */
+if (str[i] == '\'') {
+i++;
+continue;
+}
+/* binary is always integer, nothing else allowed */
+if (bin) {
+if (str[i] != '0' && str[i] != '1') {
+break;
+} else {
+i++;
+digit_seen = true;
+}
+} else {
+/* detect decimal and exponent separators */
+if ((!dot && str[i] == '.') ||
+(!exp && digit_seen &&
+(str[i] == exp_char_low || str[i] == exp_char_up)
+)
+) {
+if (str[i] == '.') {
+dot = true;
+} else {
+exp = true;
+/* a sign may directly follow */
+if (str[i+1] == '+' || str[i+1] == '-') {
+i++;
+}
+}
+flt = true;
+i++;
+continue;
+}
+/* check for allowed digits */
+if ((str[i] >= '0' && str[i] <= '9') || (hex && (
+(str[i] >= 'a' && str[i] <= 'f')
+|| (str[i] >= 'A' && str[i] <= 'F')
+))) {
+digit_seen = true;
+i++;
+} else {
+break;
+}
+}
+}
+/* have we seen at least one digit? */
+if (!digit_seen) return 0;
+/* check if we are already done (over-approximation) */
+if (!isalpha(str[i])) return i;
+/* check suffixes (must check with decreasing length) */
+const char *const flt_suffixes[] = {
+"f128", "bf16", "F128", "BF16",
+"f16", "f32", "f64", "F16", "F32", "F64",
+"df", "DF", "dd", "DD", "dl", "DL",
+"d", "D", "f", "l", "F", "L",
+};
+const unsigned flt_suffixes_len = 22;
+const char *const int_suffixes[] = {
+"ull", "ULL",
+"ul", "UL", "ll", "LL", "wb", "WB",
+"u", "U", "l", "L",
+};
+const unsigned int_suffixes_len = 12;
+const char * const *allowed_suffixes = flt ? flt_suffixes : int_suffixes;
+const unsigned allowed_suffixes_len = flt ? flt_suffixes_len : int_suffixes_len;
+for (unsigned j = 0 ; j < allowed_suffixes_len ; j++) {
+cxstring suffix = cx_str(allowed_suffixes[j]);
+const char *testee = str+i;
+if (memcmp(testee, suffix.ptr, suffix.length) == 0) {
+return i+suffix.length;
+}
+}
+/* no suffix matched */
+return 0;
+}
 /* Plaintext Highlighter */
 void c2html_plain_highlighter(char const *src, CxBuffer *dest,
 c2html_highlighter_data *hd) {
 while (*src && *src != '\n') {
 put_htmlescaped(dest, c);
 }
 } else {
 if (isstring) {
 put_htmlescaped(dest, c);
+} else if (wbuf->size == 0 &&
+(isdigit(c) ||  c == '+' || c == '-' || c == '.')
+) {
+/* might be a number */
+size_t numlen = check_number(src+sp);
+if (numlen > 0) {
+start_span("number");
+put_htmlescapedstr(dest, cx_strn(src+sp, numlen));
+stop_span;
+sp += numlen - 1;
+c = src[sp];
+continue;
+} else {
+/* start a new buffered word */
+cxBufferPut(wbuf, c);
+}
 } else if (isalnum(c) ||  c == '_' || c == '#') {
 /* buffer the current word */
 cxBufferPut(wbuf, c);
 } else {
 /* write buffered word, if any */
 }
 put_htmlescapedstr(dest, word);
 if (closespan) {
 stop_span;
 }
-}
-wbuf->pos = wbuf->size = 0; /* reset word buffer */
+/* reset word buffer */
+wbuf->pos = wbuf->size = 0;
+/* re-test current char */
+c = src[--sp];
+continue;
+}
 /* write current character */
 put_htmlescaped(dest, c);
 }
 }
 put_htmlescaped(dest, c);
 }
 } else {
 if (isstring) {
 put_htmlescaped(dest, c);
+} else if (wbuf->size == 0 &&
+(isdigit(c) ||  c == '+' || c == '-' || c == '.')
+) {
+/* might be a number */
+size_t numlen = check_number(src+sp);
+if (numlen > 0) {
+cxBufferPutString(dest,
+"<span class=\"c2html-number\">");
+put_htmlescapedstr(dest, cx_strn(src+sp, numlen));
+cxBufferPutString(dest, "</span>");
+sp += numlen - 1;
+c = src[sp];
+continue;
+} else {
+/* start a new buffered word */
+cxBufferPut(wbuf, c);
+}
 } else if (isalnum(c) || c == '_' || c == '@') {
 /* buffer the current word */
 cxBufferPut(wbuf, c);
 } else {
 /* write buffered word, if any */
 put_htmlescapedstr(dest, word);
 if (closespan) {
 cxBufferPutString(dest, "</span>");
 }
-}
-wbuf->pos = wbuf->size = 0; /* reset buffer */
+/* reset word buffer */
+wbuf->pos = wbuf->size = 0;
+/* re-test current char */
+c = src[--sp];
+continue;
+}
 /* write current character */
 put_htmlescaped(dest, c);
 }
 }

Mercurial > hg > c2html / file comparison

comparison: src/highlighter.c

src/highlighter.c