scanner.c

Thu, 26 Jan 2012 15:55:52 +0100

author
Mike Becker <universe@uap-core.de>
date
Thu, 26 Jan 2012 15:55:52 +0100
changeset 27
95a958e3de88
parent 25
802c5382f499
child 28
72a98cbcb9f1
permissions
-rw-r--r--

added regexp_parser struct and compile function

10
ecf787666f44 refactored sources
Mike Becker <universe@uap-core.de>
parents: 8
diff changeset
1 /*
20
43725438ac50 Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents: 18
diff changeset
2 * scanner.c
10
ecf787666f44 refactored sources
Mike Becker <universe@uap-core.de>
parents: 8
diff changeset
3 *
ecf787666f44 refactored sources
Mike Becker <universe@uap-core.de>
parents: 8
diff changeset
4 * Created on: 23.05.2011
20
43725438ac50 Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents: 18
diff changeset
5 * Author: Mike
10
ecf787666f44 refactored sources
Mike Becker <universe@uap-core.de>
parents: 8
diff changeset
6 */
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
7
8
28319b20968c encapsulated common operations
Mike Becker <universe@uap-core.de>
parents: 6
diff changeset
8
10
ecf787666f44 refactored sources
Mike Becker <universe@uap-core.de>
parents: 8
diff changeset
9 #include "scanner.h"
ecf787666f44 refactored sources
Mike Becker <universe@uap-core.de>
parents: 8
diff changeset
10 #include "suffix_fnc.h"
21
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
11 #include "bfile_heuristics.h"
27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents: 25
diff changeset
12 #include "regex_parser.h"
23
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
13 #include <sys/stat.h>
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
14
23
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
15 int scanDirectory(scanner_t scanner, settings_t* settings) {
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
16
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
17 DIR *dirf;
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
18 struct dirent *entry;
18
cae1294702aa Increased warning level, fixed resulting warnings, changed std form c99 to gnu99
Mike Becker <universe@uap-core.de>
parents: 16
diff changeset
19 int lines, a;
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
20 int lineSum = 0;
21
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
21 bool bfile;
23
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
22 struct stat statbuf;
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
23
23
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
24 if ((dirf = opendir(scanner.dir)) == NULL) {
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
25 printf(scanner.dir);
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
26 perror(" Directory access failed");
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
27 return 0;
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
28 }
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
29
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
30 while ((entry = readdir(dirf)) != NULL) {
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
31 if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
23
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
32 /* Construct tree view and absolute pathname strings */
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
33 char entryname[strlen(entry->d_name)+scanner.spaces];
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
34 for (int t = 0 ; t < scanner.spaces ; t++) {
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
35 entryname[t]=' ';
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
36 }
23
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
37 entryname[scanner.spaces] = 0;
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
38 strcat(entryname, entry->d_name);
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
39
23
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
40 char filename[(1+strlen(scanner.dir)+strlen(entry->d_name))];
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
41 strcpy(filename, scanner.dir);
14
ee9333c91dda some minor fixes + makefile now injects revisionnumber into cline.h
Mike Becker <universe@uap-core.de>
parents: 10
diff changeset
42 strncat(filename, &settings->fileSeparator, 1);
ee9333c91dda some minor fixes + makefile now injects revisionnumber into cline.h
Mike Becker <universe@uap-core.de>
parents: 10
diff changeset
43 strcat(filename, entry->d_name);
ee9333c91dda some minor fixes + makefile now injects revisionnumber into cline.h
Mike Becker <universe@uap-core.de>
parents: 10
diff changeset
44
22
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
45 /* Check for subdirectory */
23
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
46 if (stat(filename, &statbuf) == 0) {
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
47 if (!(statbuf.st_mode & S_IFREG)) {
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
48 printf("%-60s\n", entryname);
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
49 if (settings->recursive && (statbuf.st_mode & S_IFDIR)) {
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
50 lineSum += scanDirectory(
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
51 (scanner_t) {filename, scanner.spaces+1}, settings);
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
52 }
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
53 continue;
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
54 }
23
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
55 } else {
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
56 perror(" Error in stat call");
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
57 continue;
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
58 }
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
59
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
60 if (testSuffix(filename, settings)) {
25
802c5382f499 Added line buffer (and warning message - there is no regexp parser, though)
Mike Becker <universe@uap-core.de>
parents: 23
diff changeset
61 /* Count lines */
802c5382f499 Added line buffer (and warning message - there is no regexp parser, though)
Mike Becker <universe@uap-core.de>
parents: 23
diff changeset
62 lines = 0;
802c5382f499 Added line buffer (and warning message - there is no regexp parser, though)
Mike Becker <universe@uap-core.de>
parents: 23
diff changeset
63 bfile = false;
802c5382f499 Added line buffer (and warning message - there is no regexp parser, though)
Mike Becker <universe@uap-core.de>
parents: 23
diff changeset
64 bfile_reset(settings->bfileHeuristics);
27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents: 25
diff changeset
65 char line_buffer[REGEX_MAX_LINELENGTH];
25
802c5382f499 Added line buffer (and warning message - there is no regexp parser, though)
Mike Becker <universe@uap-core.de>
parents: 23
diff changeset
66 int line_buffer_offset = 0;
802c5382f499 Added line buffer (and warning message - there is no regexp parser, though)
Mike Becker <universe@uap-core.de>
parents: 23
diff changeset
67
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
68 FILE *file = fopen(filename, "r");
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
69 if (file == NULL) {
23
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
70 printf(entryname);
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
71 perror(" File acces failed");
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
72 continue;
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
73 }
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
74
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
75 do {
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
76 a = fgetc(file);
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
77
21
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
78 bfile = bfile_check(settings->bfileHeuristics, a);
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
79
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
80 if (a == 10) {
25
802c5382f499 Added line buffer (and warning message - there is no regexp parser, though)
Mike Becker <universe@uap-core.de>
parents: 23
diff changeset
81 line_buffer[line_buffer_offset] = 0;
802c5382f499 Added line buffer (and warning message - there is no regexp parser, though)
Mike Becker <universe@uap-core.de>
parents: 23
diff changeset
82 /* TODO: do regex parsing */
802c5382f499 Added line buffer (and warning message - there is no regexp parser, though)
Mike Becker <universe@uap-core.de>
parents: 23
diff changeset
83
802c5382f499 Added line buffer (and warning message - there is no regexp parser, though)
Mike Becker <universe@uap-core.de>
parents: 23
diff changeset
84 line_buffer_offset = 0;
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
85 lines++;
25
802c5382f499 Added line buffer (and warning message - there is no regexp parser, though)
Mike Becker <universe@uap-core.de>
parents: 23
diff changeset
86 } else {
27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents: 25
diff changeset
87 if (line_buffer_offset < REGEX_MAX_LINELENGTH) {
25
802c5382f499 Added line buffer (and warning message - there is no regexp parser, though)
Mike Becker <universe@uap-core.de>
parents: 23
diff changeset
88 line_buffer[line_buffer_offset] = a;
802c5382f499 Added line buffer (and warning message - there is no regexp parser, though)
Mike Becker <universe@uap-core.de>
parents: 23
diff changeset
89 line_buffer_offset++;
802c5382f499 Added line buffer (and warning message - there is no regexp parser, though)
Mike Becker <universe@uap-core.de>
parents: 23
diff changeset
90 } else {
802c5382f499 Added line buffer (and warning message - there is no regexp parser, though)
Mike Becker <universe@uap-core.de>
parents: 23
diff changeset
91 line_buffer[line_buffer_offset-1] = 0;
802c5382f499 Added line buffer (and warning message - there is no regexp parser, though)
Mike Becker <universe@uap-core.de>
parents: 23
diff changeset
92 settings->confusing_lnlen = true;
802c5382f499 Added line buffer (and warning message - there is no regexp parser, though)
Mike Becker <universe@uap-core.de>
parents: 23
diff changeset
93 }
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
94 }
21
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
95 } while (!bfile && a != EOF);
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
96 fclose(file);
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
97
22
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
98 /* Print and sum line count */
21
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
99 if (bfile) {
22
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
100 if (!settings->matchesOnly) {
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
101 printf("%-60s%19s\n", entryname, "binary");
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
102 }
21
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
103 } else {
22
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
104 lineSum += lines;
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
105 printf("%-60s%13d lines\n", entryname, lines);
21
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
106 }
16
bc9a0fefd892 fixed makefile to run safely on compile errors + added -V option to cline
Mike Becker <universe@uap-core.de>
parents: 14
diff changeset
107 } else {
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
108 if (!settings->matchesOnly) {
22
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
109 /* Print hint */
21
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
110 printf("%-60s%19s\n", entryname, "no match");
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
111 }
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
112 }
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
113 }
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
114 }
23
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
115
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
116 closedir(dirf);
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
117
3
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
118 return lineSum;
510d6b198dde Moved some functions to functions.c
Mike Becker <universe@uap-core.de>
parents: 1
diff changeset
119 }

mercurial