Sun, 02 Mar 2025 16:06:24 +0100
add number highlighting
fixes #393
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2016 Mike Becker. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ #include "highlighter.h" #include <string.h> #include <ctype.h> #include <cx/string.h> #include <cx/printf.h> static void put_htmlescaped(CxBuffer *dest, char c) { if (c == '>') { cxBufferPutString(dest, ">"); } else if (c == '<') { cxBufferPutString(dest, "<"); } else if (c == '&') { cxBufferPutString(dest, "&"); } else if (c) { cxBufferPut(dest, c); } } static void put_htmlescapedstr(CxBuffer *dest, cxstring s) { for (int i = 0 ; i < s.length ; i++) { put_htmlescaped(dest, s.ptr[i]); } } static int check_keyword(cxstring word, const char** keywords) { for (int i = 0 ; keywords[i] ; i++) { if (cx_strcmp(word, cx_str(keywords[i])) == 0) { return 1; } } return 0; } static int check_capsonly(cxstring word) { if (!isupper(word.ptr[0]) && word.ptr[0] != '_') { return 0; } for (size_t i = 1 ; i < word.length ; i++) { if (!isupper(word.ptr[i]) && !isdigit(word.ptr[i]) && word.ptr[i] != '_') { return 0; } } return 1; } static size_t check_number(const char *str) { /* this function is not precise, but a good over-approximation */ size_t i = 0; if (str[0] == '+' || str[0] == '-') { i++; } bool hex = str[i] == '0' && (str[i + 1] == 'x' || str[i + 1] == 'X'); bool bin = str[i] == '0' && (str[i + 1] == 'b' || str[i + 1] == 'B'); if (hex || bin) { i += 2; } bool flt = false; bool exp = false; bool dot = false; bool digit_seen = false; if (str[i] == '.') { dot = true; flt = true; i++; } char exp_char_low = hex ? 'p' : 'e'; char exp_char_up = hex ? 'P' : 'E'; while (str[i] != '\0' && str[i] != '\n') { /* ignore grouping char */ if (str[i] == '\'') { i++; continue; } /* binary is always integer, nothing else allowed */ if (bin) { if (str[i] != '0' && str[i] != '1') { break; } else { i++; digit_seen = true; } } else { /* detect decimal and exponent separators */ if ((!dot && str[i] == '.') || (!exp && digit_seen && (str[i] == exp_char_low || str[i] == exp_char_up) ) ) { if (str[i] == '.') { dot = true; } else { exp = true; /* a sign may directly follow */ if (str[i+1] == '+' || str[i+1] == '-') { i++; } } flt = true; i++; continue; } /* check for allowed digits */ if ((str[i] >= '0' && str[i] <= '9') || (hex && ( (str[i] >= 'a' && str[i] <= 'f') || (str[i] >= 'A' && str[i] <= 'F') ))) { digit_seen = true; i++; } else { break; } } } /* have we seen at least one digit? */ if (!digit_seen) return 0; /* check if we are already done (over-approximation) */ if (!isalpha(str[i])) return i; /* check suffixes (must check with decreasing length) */ const char *const flt_suffixes[] = { "f128", "bf16", "F128", "BF16", "f16", "f32", "f64", "F16", "F32", "F64", "df", "DF", "dd", "DD", "dl", "DL", "d", "D", "f", "l", "F", "L", }; const unsigned flt_suffixes_len = 22; const char *const int_suffixes[] = { "ull", "ULL", "ul", "UL", "ll", "LL", "wb", "WB", "u", "U", "l", "L", }; const unsigned int_suffixes_len = 12; const char * const *allowed_suffixes = flt ? flt_suffixes : int_suffixes; const unsigned allowed_suffixes_len = flt ? flt_suffixes_len : int_suffixes_len; for (unsigned j = 0 ; j < allowed_suffixes_len ; j++) { cxstring suffix = cx_str(allowed_suffixes[j]); const char *testee = str+i; if (memcmp(testee, suffix.ptr, suffix.length) == 0) { return i+suffix.length; } } /* no suffix matched */ return 0; } /* Plaintext Highlighter */ void c2html_plain_highlighter(char const *src, CxBuffer *dest, c2html_highlighter_data *hd) { while (*src && *src != '\n') { if (*src != '\r') { put_htmlescaped(dest, *src); } src++; } cxBufferPut(dest, '\n'); } /* C Highlighter */ static const char* ckeywords[] = { "auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", "register", "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while", NULL }; void c2html_c_highlighter(char const *src, CxBuffer *dest, c2html_highlighter_data *hd) { /* reset buffers without clearing them */ hd->primary_buffer.size = hd->primary_buffer.pos = 0; hd->secondary_buffer.size = hd->secondary_buffer.pos = 0; /* alias the buffers for better handling */ CxBuffer *wbuf = &hd->primary_buffer; CxBuffer *ifilebuf = &hd->secondary_buffer; /* local information */ size_t sp = SIZE_MAX; int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0; char quote = '\0'; int isescaping = 0; int continuation_enabled = 0; const char* current_highlight = NULL; /* define convenience macros */ #define start_span(cl) \ current_highlight = cl; \ cx_bprintf(dest, "<span class=\"c2html-%s\">", current_highlight) #define stop_span \ current_highlight = NULL;\ cxBufferPutString(dest, "</span>") /* continue a multi line comment highlighting */ if (hd->multiline_comment) { iscomment = 1; start_span("comment"); } /* continue highlighting in case of line continuation */ if (hd->continue_highlight) { start_span(hd->continue_highlight); isinclude = hd->continuation_info & 0x1; isstring = (hd->continuation_info & 0x2) >> 1; iscomment = (hd->continuation_info & 0x4) >> 2; if (hd->continuation_info & 0x10) { quote = '\''; } else if (hd->continuation_info & 0x20) { quote = '\"'; } hd->continue_highlight = NULL; hd->continuation_info = 0; } char c; do { c = src[++sp]; if (c == '\r') continue; /* line continuation */ if (c == '\\') { /* currently do not support continuations in user includes */ // TODO: also support user includes if (!parseinclude) { continuation_enabled = 1; } } else if (continuation_enabled) { if (!isspace(c)) { continuation_enabled = 0; } else if (c == '\n') { cxBufferPut(dest, '\n'); hd->continue_highlight = current_highlight; hd->continuation_info = \ isinclude | \ (isstring << 1) | \ (iscomment << 2); if (quote == '\'') { hd->continuation_info |= 0x10; } else if (quote == '\"') { hd->continuation_info |= 0x20; } stop_span; continue; } } /* comments */ if (!isstring && c == '/') { if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') { iscomment = 0; hd->multiline_comment = 0; cxBufferPut(dest, '/'); stop_span; continue; } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) { iscomment = 1; hd->multiline_comment = (src[sp+1] == '*'); start_span("comment"); } } if (iscomment) { if (c == '\n') { stop_span; cxBufferPut(dest, '\n'); } else { put_htmlescaped(dest, c); } } else if (isinclude) { if (c == '<') { start_span("stdinclude"); cxBufferPutString(dest, "<"); } else if (c == '\"') { if (parseinclude) { cxBufferPutString(dest, "\">"); cxBufferWrite(ifilebuf->space, 1, ifilebuf->size, dest); cxBufferPutString(dest, "\"</a>"); parseinclude = 0; } else { cxBufferPutString(dest, "<a class=\"c2html-userinclude\" href=\""); cxBufferPut(ifilebuf, '\"'); parseinclude = 1; } } else if (c == '>') { cxBufferPutString(dest, ">"); stop_span; } else { if (parseinclude) { cxBufferPut(ifilebuf, c); } put_htmlescaped(dest, c); } } else { /* strings */ if (!isescaping && (c == '\'' || c == '\"')) { if (isstring) { put_htmlescaped(dest, c); if (c == quote) { isstring = 0; stop_span; } else { put_htmlescaped(dest, c); } } else { isstring = 1; quote = c; start_span("string"); put_htmlescaped(dest, c); } } else { if (isstring) { put_htmlescaped(dest, c); } else if (wbuf->size == 0 && (isdigit(c) || c == '+' || c == '-' || c == '.') ) { /* might be a number */ size_t numlen = check_number(src+sp); if (numlen > 0) { start_span("number"); put_htmlescapedstr(dest, cx_strn(src+sp, numlen)); stop_span; sp += numlen - 1; c = src[sp]; continue; } else { /* start a new buffered word */ cxBufferPut(wbuf, c); } } else if (isalnum(c) || c == '_' || c == '#') { /* buffer the current word */ cxBufferPut(wbuf, c); } else { /* write buffered word, if any */ if (wbuf->size > 0) { cxstring word = cx_strn(wbuf->space, wbuf->size); int closespan = 1; cxstring typesuffix = CX_STR("_t"); if (check_keyword(word, ckeywords)) { start_span("keyword"); } else if (cx_strsuffix(word, typesuffix)) { start_span("type"); } else if (word.ptr[0] == '#') { isinclude = !cx_strcmp(word, CX_STR("#include")); start_span("directive"); } else if (check_capsonly(word)) { start_span("macroconst"); } else { closespan = 0; } put_htmlescapedstr(dest, word); if (closespan) { stop_span; } /* reset word buffer */ wbuf->pos = wbuf->size = 0; /* re-test current char */ c = src[--sp]; continue; } /* write current character */ put_htmlescaped(dest, c); } } isescaping = !isescaping & (c == '\\'); } } while (c && c != '\n'); #undef start_span #undef stop_span } /* Java Highlighter */ static const char* jkeywords[] = { "abstract", "continue", "for", "new", "switch", "assert", "default", "goto", "package", "synchronized", "boolean", "do", "if", "private", "this", "break", "double", "implements", "protected", "throw", "byte", "else", "import", "public", "throws", "case", "enum", "instanceof", "return", "transient", "catch", "extends", "int", "short", "try", "char", "final", "interface", "static", "void", "class", "finally", "long", "strictfp", "volatile", "const", "float", "native", "super", "while", NULL }; void c2html_java_highlighter(char const *src, CxBuffer *dest, c2html_highlighter_data *hd) { /* reset buffers without clearing them */ hd->primary_buffer.size = hd->primary_buffer.pos = 0; hd->secondary_buffer.size = hd->secondary_buffer.pos = 0; /* alias the buffers for better handling */ CxBuffer *wbuf = &hd->primary_buffer; /* local information */ size_t sp = SIZE_MAX; int isstring = 0, iscomment = 0, isimport = 0; char quote = '\0'; int isescaping = 0; if (hd->multiline_comment) { iscomment = 1; cxBufferPutString(dest, "<span class=\"c2html-comment\">"); } char c; do { c = src[++sp]; if (c == '\r') continue; /* comments */ if (!isstring && c == '/') { if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') { iscomment = 0; hd->multiline_comment = 0; cxBufferPutString(dest, "/</span>"); continue; } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) { iscomment = 1; hd->multiline_comment = (src[sp+1] == '*'); cxBufferPutString(dest, "<span class=\"c2html-comment\">"); } } if (iscomment) { if (c == '\n') { cxBufferPutString(dest, "</span>\n"); } else { put_htmlescaped(dest, c); } } else if (isimport) { /* TODO: local imports */ } else { /* strings */ if (!isescaping && (c == '\'' || c == '\"')) { if (isstring) { put_htmlescaped(dest, c); if (c == quote) { isstring = 0; cxBufferPutString(dest, "</span>"); } else { put_htmlescaped(dest, c); } } else { isstring = 1; quote = c; cxBufferPutString(dest, "<span class=\"c2html-string\">"); put_htmlescaped(dest, c); } } else { if (isstring) { put_htmlescaped(dest, c); } else if (wbuf->size == 0 && (isdigit(c) || c == '+' || c == '-' || c == '.') ) { /* might be a number */ size_t numlen = check_number(src+sp); if (numlen > 0) { cxBufferPutString(dest, "<span class=\"c2html-number\">"); put_htmlescapedstr(dest, cx_strn(src+sp, numlen)); cxBufferPutString(dest, "</span>"); sp += numlen - 1; c = src[sp]; continue; } else { /* start a new buffered word */ cxBufferPut(wbuf, c); } } else if (isalnum(c) || c == '_' || c == '@') { /* buffer the current word */ cxBufferPut(wbuf, c); } else { /* write buffered word, if any */ if (wbuf->size > 0) { cxstring word = cx_strn(wbuf->space, wbuf->size); int closespan = 1; if (check_keyword(word, jkeywords)) { cxBufferPutString(dest, "<span class=\"c2html-keyword\">"); } else if (isupper(word.ptr[0])) { cxBufferPutString(dest, "<span class=\"c2html-type\">"); } else if (word.ptr[0] == '@') { cxBufferPutString(dest, "<span class=\"c2html-directive\">"); } else if (check_capsonly(word)) { cxBufferPutString(dest, "<span class=\"c2html-macroconst\">"); } else { closespan = 0; } put_htmlescapedstr(dest, word); if (closespan) { cxBufferPutString(dest, "</span>"); } /* reset word buffer */ wbuf->pos = wbuf->size = 0; /* re-test current char */ c = src[--sp]; continue; } /* write current character */ put_htmlescaped(dest, c); } } isescaping = !isescaping & (c == '\\'); } } while (c && c != '\n'); }