src/regex_parser.c

Mon, 27 Jul 2020 17:19:56 +0200

author
Mike Becker <universe@uap-core.de>
date
Mon, 27 Jul 2020 17:19:56 +0200
changeset 61
9c8d768f0244
parent 57
68018eac46c3
child 66
be2084398c37
permissions
-rw-r--r--

adds option to compute individual sums

27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
1 /*
34
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
57
68018eac46c3 adds simple tiny test suite and updates license headers
Mike Becker <universe@uap-core.de>
parents: 54
diff changeset
3 * Copyright 2018 Mike Becker. All rights reserved.
34
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
4 *
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
5 * Redistribution and use in source and binary forms, with or without
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
6 * modification, are permitted provided that the following conditions are met:
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
7 *
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
8 * 1. Redistributions of source code must retain the above copyright
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
9 * notice, this list of conditions and the following disclaimer.
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
10 *
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
11 * 2. Redistributions in binary form must reproduce the above copyright
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
12 * notice, this list of conditions and the following disclaimer in the
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
13 * documentation and/or other materials provided with the distribution.
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
14 *
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
22 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
57
68018eac46c3 adds simple tiny test suite and updates license headers
Mike Becker <universe@uap-core.de>
parents: 54
diff changeset
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
25 */
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
26
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
27 #include "regex_parser.h"
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
28
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
29 regex_parser_t* new_regex_parser_t() {
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
30 regex_parser_t* ret = malloc(sizeof(regex_parser_t));
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
31 if (ret != NULL) {
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
32 ret->pattern_list = new_string_list_t();
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
33 ret->matched_lines = 0;
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
34 ret->pattern_match = 0;
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
35 ret->compiled_patterns = NULL;
28
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
36 ret->compiled_pattern_count = 0;
27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
37 }
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
38 return ret;
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
39 }
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
40
54
76d46533b9a9 regex parser was not properly reset before each file, sometimes resulting in wrong line counts, when the previous scanned file ended with a match
Mike Becker <universe@uap-core.de>
parents: 48
diff changeset
41 void regex_parser_reset(regex_parser_t* parser) {
76d46533b9a9 regex parser was not properly reset before each file, sometimes resulting in wrong line counts, when the previous scanned file ended with a match
Mike Becker <universe@uap-core.de>
parents: 48
diff changeset
42 parser->pattern_match = parser->matched_lines = 0;
76d46533b9a9 regex parser was not properly reset before each file, sometimes resulting in wrong line counts, when the previous scanned file ended with a match
Mike Becker <universe@uap-core.de>
parents: 48
diff changeset
43 }
76d46533b9a9 regex parser was not properly reset before each file, sometimes resulting in wrong line counts, when the previous scanned file ended with a match
Mike Becker <universe@uap-core.de>
parents: 48
diff changeset
44
28
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
45 void regex_destcomppats(regex_parser_t* parser) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
46 if (parser->compiled_patterns != NULL) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
47 for (int i = 0 ; i < parser->compiled_pattern_count ; i++) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
48 if (parser->compiled_patterns[i] != NULL) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
49 free(parser->compiled_patterns[i]);
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
50 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
51 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
52 free(parser->compiled_patterns);
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
53 parser->compiled_patterns = NULL;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
54 parser->compiled_pattern_count = 0;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
55 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
56 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
57
27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
58 void destroy_regex_parser_t(regex_parser_t* parser) {
28
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
59 regex_destcomppats(parser);
27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
60 destroy_string_list_t(parser->pattern_list);
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
61 free(parser);
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
62 }
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
63
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
64 bool regex_parser_matching(regex_parser_t* parser) {
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
65 return parser->pattern_match > 0;
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
66 }
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
67
28
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
68 int regex_parser_do(regex_parser_t* parser, char* input) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
69 int err = REG_NOMATCH;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
70 if (parser->compiled_pattern_count > 0) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
71 regmatch_t match;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
72
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
73 if (regex_parser_matching(parser)) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
74 parser->matched_lines++;
27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
75
28
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
76 err = regexec(parser->compiled_patterns[parser->pattern_match],
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
77 input, 1, &match, 0);
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
78 if (err > 0 && err != REG_NOMATCH) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
79 fprintf(stderr, "Regex-Error: 0x%08x", err);
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
80 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
81 if (err == 0) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
82 parser->pattern_match = 0;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
83 /* do not match line, if it does not end with the pattern */
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
84 if (match.rm_eo < strlen(input)) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
85 parser->matched_lines--;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
86 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
87 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
88 } else {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
89 for (int i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
90 err = regexec(parser->compiled_patterns[i], input, 1, &match, 0);
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
91 if (err > 0 && err != REG_NOMATCH) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
92 fprintf(stderr, "Regex-Error: 0x%08x", err);
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
93 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
94 if (err == 0) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
95 parser->pattern_match = i+1;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
96 parser->matched_lines = 0;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
97 /* Check, if end pattern is also in this line */
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
98 regex_parser_do(parser, input);
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
99 /* do not match line, if it does not start with the pattern */
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
100 if (match.rm_so > 0 && parser->matched_lines > 0) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
101 parser->matched_lines--;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
102 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
103 break;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
104 }
27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
105 }
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
106 }
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
107 }
28
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
108 return err;
27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
109 }
28
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
110
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
111 bool regex_compile_all(regex_parser_t* parser) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
112 bool success = true;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
113 size_t pcount = parser->pattern_list->count;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
114 if (pcount > 0) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
115 regex_destcomppats(parser);
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
116 parser->compiled_patterns = calloc(pcount, sizeof(regex_t));
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
117 parser->compiled_pattern_count = pcount;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
118
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
119 regex_t* re;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
120 for (int i = 0 ; i < pcount ; i++) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
121 re = malloc(sizeof(regex_t));
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
122 if (regcomp(re, parser->pattern_list->items[i], REG_EXTENDED) == 0) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
123 parser->compiled_patterns[i] = re;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
124 } else {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
125 fprintf(stderr, "Cannot compile pattern: %s\n",
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
126 (parser->pattern_list->items[i]));
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
127 parser->compiled_patterns[i] = NULL;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
128 success = false;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
129 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
130 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
131 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
132 return success;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
133 }

mercurial