added regexp_parser struct and compile function

2012-01-26

author
Mike Becker <universe@uap-core.de>
date
Thu, 26 Jan 2012 15:55:52 +0100 (2012-01-26)
changeset 27
95a958e3de88
parent 26
853a1181884b
child 28
72a98cbcb9f1

added regexp_parser struct and compile function

bfile_heuristics.h file | annotate | diff | comparison | revisions
cline.c file | annotate | diff | comparison | revisions
regex_parser.c file | annotate | diff | comparison | revisions
regex_parser.h file | annotate | diff | comparison | revisions
scanner.c file | annotate | diff | comparison | revisions
settings.c file | annotate | diff | comparison | revisions
settings.h file | annotate | diff | comparison | revisions
string_list.h file | annotate | diff | comparison | revisions
--- a/bfile_heuristics.h	Thu Dec 01 17:06:27 2011 +0100
+++ b/bfile_heuristics.h	Thu Jan 26 15:55:52 2012 +0100
@@ -16,9 +16,9 @@
 #define BFILE_HIGH_ACCURACY    0x04
 
 typedef struct {
-  int level;
-  int bcount; /* 'binary' character count */
-  int tcount; /* total count */
+  unsigned int level;
+  unsigned int bcount; /* 'binary' character count */
+  unsigned int tcount; /* total count */
 } bfile_heuristics_t;
 
 #ifdef _cplusplus
--- a/cline.c	Thu Dec 01 17:06:27 2011 +0100
+++ b/cline.c	Thu Jan 26 15:55:52 2012 +0100
@@ -10,17 +10,20 @@
 #include "settings.h"
 #include "arguments.h"
 #include "stream.h"
+#include "regex_parser.h"
 
 void printHelpText() {
   const char* helpText = 
     "\nUsage:"
-    "\n      cline [-hrmvV][-s suffix][-b level][<directory>]"
-    "\n      cline [-hrmvV][-S suffix][-b level][<directory>]"
+    "\n      cline [Options] [Directory]"
+    "\n      cline [Options] [Directory]"
     "\n\nCounts the line terminator characters (\\n) within all"
     " files in the specified\ndirectory."
     "\n\nOptions:"
     "\n  -b <level>          - binary file heuristics level (default medium)"
     "\n                        One of: ignore low medium high"
+    "\n  -e <start> <end>    - Excludes lines between <start> and <end>"
+    "\n                        You may use this option multiple times"
     "\n  -h, --help          - this help text"
     "\n  -m                  - print information about matching files only"
     "\n  -s <suffixes>       - only count files with these suffixes (separated"
@@ -35,7 +38,10 @@
     "\n  cline ./\n"
     "So each file in the working directory is counted. If you want to count C"
     "\nsource code in your working directory and its subdirectories, type:"
-    "\n  cline -rs .c\n";
+    "\n  cline -rs .c\n"
+    "\nIf you want to exclude comment lines, you may use the -e option."
+    "\nAfter a line matches the regex pattern <start> any following line is"
+    "\nnot counted unless a line matches the <end> pattern.";
     
   printf(helpText);
 }
@@ -68,7 +74,7 @@
 
   for (int t = 1 ; t < argc ; t++) {
 
-    int argflags = checkArgument(argv[t], "hsSrRmvVb");
+    int argflags = checkArgument(argv[t], "hsSrRmvVbe");
 
     /* s, S */
     if ((argflags & 6) > 0) {
@@ -132,6 +138,13 @@
         return exit_with_help(settings, 1);
       }
     }
+    if ((argflags & 512) > 0) {
+      if (t + 2 >= argc) {
+        return exit_with_help(settings, 1);
+      }
+      t++; add_string(settings->regex->pattern_list, argv[t]);
+      t++; add_string(settings->regex->pattern_list, argv[t]);
+    }
     /* Path */
     if (argflags == 0) {
       if (registerArgument(&checked, 1024)) {
@@ -154,6 +167,7 @@
   }
 
   /* Scan directory */
+  regex_compile_all(settings->regex);
   int lines = scanDirectory((scanner_t){directory, 0}, settings);
   destroy_settings_t(settings);
 
@@ -163,11 +177,10 @@
   }
   printf("\n%73d lines\n", lines);
 
-  if (settings->confusing_lnlen) {
-    /* TODO: display this only when the regexp parser is used */
+  if (settings->confusing_lnlen && settings->regex->pattern_list->count > 0) {
     printf("\nSome files contain too long lines.\n"
-      "The regexp parser currently supports a maximum line length of 2048."
-      "\nThe result might be wrong.\n");
+      "The regex parser currently supports a maximum line length of %d."
+      "\nThe result might be wrong.\n", REGEX_MAX_LINELENGTH);
   }
 
   if (!settings->verbose) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/regex_parser.c	Thu Jan 26 15:55:52 2012 +0100
@@ -0,0 +1,50 @@
+/*
+ * regex_parser.c
+ *
+ *  Created on: 26.01.2012
+ *      Author: fox3049
+ */
+
+#include "regex_parser.h"
+
+regex_parser_t* new_regex_parser_t() {
+  regex_parser_t* ret = malloc(sizeof(regex_parser_t));
+  if (ret != NULL) {
+    ret->pattern_list = new_string_list_t();
+    ret->matched_lines = 0;
+    ret->pattern_match = 0;
+    ret->compiled_patterns = NULL;
+  }
+  return ret;
+}
+
+void destroy_regex_parser_t(regex_parser_t* parser) {
+  destroy_string_list_t(parser->pattern_list);
+  free(parser);
+}
+
+bool regex_parser_matching(regex_parser_t* parser) {
+  return parser->pattern_match > 0;
+}
+
+void regex_compile_all(regex_parser_t* parser) {
+  size_t pcount = parser->pattern_list->count;
+  if (pcount > 0) {
+    if (parser->compiled_patterns != NULL) {
+      free(parser->compiled_patterns);
+    }
+    parser->compiled_patterns = calloc(pcount, sizeof(regex_t));
+
+    regex_t* re = malloc(sizeof(regex_t));
+    for (int i = 0 ; i < pcount ; i++) {
+      if (regcomp(re, parser->pattern_list->items[i],
+          REG_EXTENDED|REG_NOSUB) == 0) {
+        parser->compiled_patterns[i] = re;
+      } else {
+        fprintf(stderr, "Cannot compile: %s\n",
+            (parser->pattern_list->items[i]));
+        parser->compiled_patterns[i] = NULL;
+      }
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/regex_parser.h	Thu Jan 26 15:55:52 2012 +0100
@@ -0,0 +1,40 @@
+/*
+ * regex_parser.h
+ *
+ *  Created on: 26.01.2012
+ *      Author: Mike
+ */
+
+#ifndef REGEX_PARSER_H_
+#define REGEX_PARSER_H_
+
+#define REGEX_MAX_LINELENGTH           2048
+
+#include <sys/types.h>
+#include <stdbool.h>
+#include <regex.h>
+#include "string_list.h"
+
+typedef struct {
+  string_list_t* pattern_list; /* even entries: start ; odd entries: end */
+  regex_t** compiled_patterns;
+  unsigned int pattern_match; /* save position of end pattern to match -
+                                 NULL when a start pattern shall match first */
+  unsigned int matched_lines;
+} regex_parser_t;
+
+#ifdef _cplusplus
+extern "C" {
+#endif
+
+regex_parser_t* new_regex_parser_t();
+void destroy_regex_parser_t(regex_parser_t*);
+
+bool regex_parser_matching(regex_parser_t*);
+void regex_compile_all(regex_parser_t*);
+
+#ifdef _cplusplus
+}
+#endif
+
+#endif /* REGEX_PARSER_H_ */
--- a/scanner.c	Thu Dec 01 17:06:27 2011 +0100
+++ b/scanner.c	Thu Jan 26 15:55:52 2012 +0100
@@ -9,6 +9,7 @@
 #include "scanner.h"
 #include "suffix_fnc.h"
 #include "bfile_heuristics.h"
+#include "regex_parser.h"
 #include <sys/stat.h>
 
 int scanDirectory(scanner_t scanner, settings_t* settings) {
@@ -61,7 +62,7 @@
         lines = 0;
         bfile = false;
         bfile_reset(settings->bfileHeuristics);
-        char line_buffer[2048];
+        char line_buffer[REGEX_MAX_LINELENGTH];
         int line_buffer_offset = 0;
 
         FILE *file = fopen(filename, "r");
@@ -83,7 +84,7 @@
             line_buffer_offset = 0;
             lines++;
           } else {
-            if (line_buffer_offset < 2048) {
+            if (line_buffer_offset < REGEX_MAX_LINELENGTH) {
               line_buffer[line_buffer_offset] = a;
               line_buffer_offset++;
             } else {
--- a/settings.c	Thu Dec 01 17:06:27 2011 +0100
+++ b/settings.c	Thu Jan 26 15:55:52 2012 +0100
@@ -21,13 +21,15 @@
     settings->suffixList         = new_string_list_t();
     settings->verbose            = true;
     settings->bfileHeuristics    = new_bfile_heuristics_t();
-    settings->confusing_lnlen   = false;
+    settings->confusing_lnlen    = false;
+    settings->regex              = new_regex_parser_t();
   }
 
   return settings;
 }
 
 void destroy_settings_t(settings_t* settings) {
+  destroy_regex_parser_t(settings->regex);
   destroy_string_list_t(settings->suffixList);
   destroy_bfile_heuristics_t(settings->bfileHeuristics);
   free(settings);
--- a/settings.h	Thu Dec 01 17:06:27 2011 +0100
+++ b/settings.h	Thu Jan 26 15:55:52 2012 +0100
@@ -11,9 +11,11 @@
 #include "stdinc.h"
 #include "string_list.h"
 #include "bfile_heuristics.h"
+#include "regex_parser.h"
 
 typedef struct _settings {
   string_list_t* suffixList;
+  regex_parser_t* regex;
   bfile_heuristics_t* bfileHeuristics;
   char fileSeparator;
   bool recursive;
--- a/string_list.h	Thu Dec 01 17:06:27 2011 +0100
+++ b/string_list.h	Thu Jan 26 15:55:52 2012 +0100
@@ -11,7 +11,7 @@
 #include "stdinc.h"
 
 typedef struct _string_list {
-  int count;
+  size_t count;
   char** items;
 } string_list_t;
 

mercurial