src/c2html.c

Fri, 30 Aug 2013 11:23:44 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 30 Aug 2013 11:23:44 +0200
changeset 20
ebbf0776c1bc
parent 19
2e812df2b231
child 21
537aec525835
permissions
-rw-r--r--

replaced function static variables with struct members

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2013 Mike Becker. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <ctype.h>

#define INPUTBUF_SIZE 2048
#define WORDBUF_SIZE 64

const char* ckeywords[] = {
    "auto", "break", "case", "char", "const", "continue", "default", "do",
    "double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
    "long", "register", "return", "short", "signed", "sizeof", "static",
    "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
    "while", NULL
};

const char* jkeywords[] = {
    "abstract", "continue", "for", "new", "switch", "assert", "default", "goto",
    "package", "synchronized", "boolean", "do", "if", "private", "this",
    "break", "double", "implements", "protected", "throw", "byte", "else",
    "import", "public", "throws", "case", "enum", "instanceof", "return",
    "transient", "catch", "extends", "int", "short", "try", "char", "final",
    "interface", "static", "void", "class", "finally", "long", "strictfp",
    "volatile", "const", "float", "native", "super", "while", NULL
};

#define iswordcharacter(c) (isalnum(c) || c=='_' || c=='#' || c=='@')

int isctype(char *word, size_t len) {
    return (word[len-2] == '_' && word[len-1] == 't');
}

int iscdirective(char *word) {
    return (word[0] == '#');
}

int isjtype(char *word, size_t len) {
    return isupper(word[0]);
}

int isjdirective(char *word) {
    return word[0] == '@';
}

typedef struct _highlighter_t highlighter_t;

struct _highlighter_t {
    const char** keywords;
    int(*istype)(char*,size_t);
    int(*isdirective)(char*);
    void(*parser)(char*,char*,highlighter_t*);
    int iscommentml;
    char word[WORDBUF_SIZE];
    char includefile[FILENAME_MAX];
};

typedef struct {
    char* outfilename;
    char* infilename;
    int highlight;
} settings_t;

typedef struct {
    size_t count;
    size_t capacity;
    size_t maxlinewidth;
    char** lines;
} inputfile_t;

inputfile_t *inputfilebuffer(size_t capacity) {
    inputfile_t *inputfile = (inputfile_t*) malloc(sizeof(inputfile_t));
    inputfile->lines = (char**) malloc(capacity * sizeof(char*));
    inputfile->capacity = capacity;
    inputfile->count = 0;
    inputfile->maxlinewidth = 0;

    return inputfile;
}

void addline(inputfile_t *inputfile, char* line, size_t width) {
    char *l = (char*) malloc(width+1);
    memcpy(l, line, width);
    l[width] = 0;
    if (inputfile->count >= inputfile->capacity) {
        inputfile->capacity <<= 1;
        inputfile->lines = realloc(inputfile->lines, inputfile->capacity);
    }
    inputfile->lines[inputfile->count] = l;
    inputfile->maxlinewidth =
        width > inputfile->maxlinewidth ? width : inputfile->maxlinewidth;
    inputfile->count++;
}

void freeinputfilebuffer(inputfile_t *inputfile) {
    for (int i = 0 ; i < inputfile->count ; i++) {
        free(inputfile->lines[i]);
    }
    free(inputfile->lines);
    free(inputfile);
}

inputfile_t *readinput(char *filename) {

    int fd = open(filename, O_RDONLY);
    if (fd == -1) return NULL;

    inputfile_t *inputfile = inputfilebuffer(512);

    char buf[INPUTBUF_SIZE];
    ssize_t r;

    size_t maxlinewidth = 256;
    char *line = (char*) malloc(maxlinewidth);
    size_t col = 0;

    while ((r = read(fd, buf, INPUTBUF_SIZE)) > 0) {
        for (size_t i = 0 ; i < r ; i++) {
            if (col >= maxlinewidth-4) {
                maxlinewidth <<= 1;
                line = realloc(line, maxlinewidth);
            }

            if (buf[i] == '\n') {
                line[col++] = '\n';
                line[col] = 0;
                addline(inputfile, line, col);
                col = 0;
            } else {
                line[col++] = buf[i];
            }
        }
    }

    free(line);

    close(fd);

    return inputfile;
}

size_t writeescapedchar(char *dest, size_t dp, char c) {
    if (c == '>') {
        dest[dp++] = '&'; dest[dp++] = 'g'; dest[dp++] = 't'; dest[dp++] = ';';
    } else if (c == '<') {
        dest[dp++] = '&'; dest[dp++] = 'l'; dest[dp++] = 't'; dest[dp++] = ';';
    } else {
        dest[dp++] = c;
    }

    return dp;
}

int iskeyword(char *word, const char** keywords) {
    for (int i = 0 ; keywords[i] ; i++) {
        if (strncmp(keywords[i], word, WORDBUF_SIZE) == 0) {
            return 1;
        }
    }
    return 0;
}

int iscapsonly(char *word, size_t wp) {
    for (size_t i = 0 ; i < wp ; i++) {
        if (!isupper(word[i]) && word[i] != '_') {
            return 0;
        }
    }
    return 1;
}

void parseline(char *src, char *dest, highlighter_t *hltr) {
    hltr->parser(src, dest, hltr);
}

void cjparseline(char *src, char *dest, highlighter_t *hltr) {
    size_t sp = 0, dp = 0;
    /* indent */
    while (isspace(src[sp])) {
        dest[dp++] = src[sp++];
    }

    memset(hltr->word, 0, WORDBUF_SIZE);
    size_t wp = 0, ifp = 0;
    int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
    int isescaping = 0;

    if (hltr->iscommentml) {
        iscomment = 1;
        memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
        dp += 29;
    }

    for (char c = src[sp] ; c ; c=src[++sp]) {
        /* comments */
        if (c == '/') {
            if (hltr->iscommentml && sp > 0 && src[sp-1] == '*') {
                iscomment = 0;
                hltr->iscommentml = 0;
                memcpy(&(dest[dp]), "/</span>", 8);
                dp += 8;
                continue;
            } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
                iscomment = 1;
                hltr->iscommentml = (src[sp+1] == '*');
                memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
                dp += 29;
            }
        }

        if (iscomment) {
            if (c == '\n') {
                memcpy(&(dest[dp]), "</span>", 7);
                dp += 7;
            }
            dp = writeescapedchar(dest, dp, c);
        } else if (isinclude) {
            if (c == '<') {
                memcpy(&(dest[dp]), "<span class=\"c2html-stdinclude\">", 32);
                dp += 32;
                dp = writeescapedchar(dest, dp, c);
            } else if (c == '\"') {
                if (parseinclude) {
                    dest[dp++] = '\"';
                    dest[dp++] = '>';
                    memcpy(&(dest[dp]), hltr->includefile, ifp);
                    dp += ifp;

                    dp = writeescapedchar(dest, dp, c);
                    memcpy(&(dest[dp]), "</a>", 4);
                    dp += 4;
                    parseinclude = 0;
                } else {
                    memcpy(&(dest[dp]),
                        "<a class=\"c2html-userinclude\" href=", 35);
                    dp += 35;
                    dp = writeescapedchar(dest, dp, c);
                    ifp = 0;
                    hltr->includefile[ifp++] = '\"';
                    parseinclude = 1;
                }
            } else if (c == '>') {
                dp = writeescapedchar(dest, dp, c);
                memcpy(&(dest[dp]), "</span>", 7);
                dp += 7;
            } else {
                if (parseinclude) {
                    hltr->includefile[ifp++] = c;
                }
                dp = writeescapedchar(dest, dp, c);
            }
        } else {
            /* strings */
            if (!isescaping && (c == '\'' || c == '\"')) {
                isstring ^= 1;
                if (isstring) {
                    memcpy(&(dest[dp]), "<span class=\"c2html-string\">", 28);
                    dp += 28;
                    dp = writeescapedchar(dest, dp, c);
                } else {
                    dp = writeescapedchar(dest, dp, c);
                    memcpy(&(dest[dp]), "</span>", 7);
                    dp += 7;
                }
            } else {
                if (isstring) {
                    dp = writeescapedchar(dest, dp, c);
                } else if (!iswordcharacter(c)) {
                    /* interpret word int_t */
                    if (wp > 0 && wp < WORDBUF_SIZE) {
                        int closespan = 1;
                        if (iskeyword(hltr->word, hltr->keywords)) {
                            memcpy(&(dest[dp]),
                                "<span class=\"c2html-keyword\">", 29);
                            dp += 29;
                        } else if (hltr->istype(hltr->word, wp)) {
                            memcpy(&(dest[dp]),
                                "<span class=\"c2html-type\">", 26);
                            dp += 26;
                        } else if (hltr->isdirective(hltr->word)) {
                            isinclude = !strncmp(
                                "#include", hltr->word, WORDBUF_SIZE);
                            memcpy(&(dest[dp]),
                                "<span class=\"c2html-directive\">", 31);
                            dp += 31;
                        } else if (iscapsonly(hltr->word, wp)) {
                            memcpy(&(dest[dp]),
                                "<span class=\"c2html-macroconst\">", 32);
                            dp += 32;
                        } else {
                            closespan = 0;
                        }
                        for (int i = 0 ; i < wp ; i++) {
                            dp = writeescapedchar(dest, dp, hltr->word[i]);
                        }
                        if (closespan) {
                            memcpy(&(dest[dp]), "</span>", 7);
                            dp += 7;
                        }
                    }
                    memset(hltr->word, 0, WORDBUF_SIZE);
                    wp = 0;
                    dp = writeescapedchar(dest, dp, c);
                } else {
                    /* read word */
                    if (wp < WORDBUF_SIZE) {
                        hltr->word[wp++] = c;
                    } else if (wp == WORDBUF_SIZE) {
                        for (int i = 0 ; i < WORDBUF_SIZE ; i++) {
                            dp = writeescapedchar(dest, dp, hltr->word[i]);
                        }
                        wp++;
                        dp = writeescapedchar(dest, dp, c);
                    } else {
                        dp = writeescapedchar(dest, dp, c);
                    }
                }
            }

            isescaping = !isescaping & (c == '\\');
        }
    }
    dest[dp] = 0;
}

void printhelp() {
    printf("Formats source code using HTML.\n\nUsage:\n"
        "  c2html [Options] FILE\n\n"
        " Options:\n"
        "  -h                    Prints this help message\n"
        "  -j                    Highlight Java instead of C source code\n"
        "  -o <output>           Output file (stdout, if not specified)\n"
        "  -p                    Disable highlighting (plain text)\n"
        "\n");


}

int lnint(size_t lnc) {
    int w = 1, p = 1;
    while ((p*=10) < lnc) w++;
    return w;
}

int main(int argc, char** argv) {
    settings_t settings;
    settings.outfilename = NULL;
    settings.highlight = 1;

    highlighter_t highlighter;
    memset(&highlighter, 0, sizeof(highlighter));
    highlighter.isdirective = iscdirective;
    highlighter.istype = isctype;
    highlighter.keywords = ckeywords;
    highlighter.parser = cjparseline;

    char optc;
    while ((optc = getopt(argc, argv, "hjo:p")) != -1) {
        switch (optc) {
            case 'o':
                if (!(optarg[0] == '-' && optarg[1] == 0)) {
                    settings.outfilename = optarg;
                }
                break;
            case 'j':
                highlighter.isdirective = isjdirective;
                highlighter.istype = isjtype;
                highlighter.keywords = jkeywords;
                break;
            case 'p':
                settings.highlight = 0;
                break;
            case 'h':
                printhelp();
                return 0;
            default:
                return 1;
        }
    }

    if (optind != argc-1) {
        printhelp();
        return 1;
    } else {
        settings.infilename = argv[optind];

        inputfile_t *inputfile = readinput(settings.infilename);
        if (inputfile) {
            FILE *fout;
            char *line;
            if (settings.highlight) {
                line = (char*) malloc(inputfile->maxlinewidth*64);
            } else {
                line = NULL;
            }
            if (settings.outfilename) {
                fout = fopen(settings.outfilename, "w");
            } else {
                fout = stdout;
            }
            fprintf(fout, "<pre>\n");
            int lnw = lnint(inputfile->count);
            for (int i = 0 ; i < inputfile->count ; i++) {
                if (settings.highlight) {
                    parseline(inputfile->lines[i], line, &highlighter);
                } else {
                    line = inputfile->lines[i];
                }
                fprintf(fout, "<span class=\"c2html-lineno\">%*d:</span> %s",
                    lnw, i+1, line);
            }
            if (settings.highlight) {
                free(line);
            }
            fprintf(fout, "</pre>\n");

            if (fout != stdout) {
                fclose(fout);
            }

            freeinputfilebuffer(inputfile);
        }

        return 0;
    }
}

mercurial