bfile_heuristics.c

Thu, 01 Dec 2011 17:04:30 +0100

author
Mike Becker <universe@uap-core.de>
date
Thu, 01 Dec 2011 17:04:30 +0100
changeset 25
802c5382f499
parent 23
778388400f7b
permissions
-rw-r--r--

Added line buffer (and warning message - there is no regexp parser, though)

20
43725438ac50 Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff changeset
1 /*
43725438ac50 Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff changeset
2 * bfile_heuristics.c
43725438ac50 Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff changeset
3 *
43725438ac50 Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff changeset
4 * Created on: 20.10.2011
43725438ac50 Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff changeset
5 * Author: Mike
43725438ac50 Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff changeset
6 */
43725438ac50 Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff changeset
7
43725438ac50 Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff changeset
8 #include "bfile_heuristics.h"
22
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
9 #include <ctype.h>
20
43725438ac50 Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff changeset
10
21
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
11 bfile_heuristics_t *new_bfile_heuristics_t() {
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
12 bfile_heuristics_t *ret = malloc(sizeof(bfile_heuristics_t));
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
13 ret->level = BFILE_MEDIUM_ACCURACY;
22
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
14 bfile_reset(ret);
21
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
15 return ret;
20
43725438ac50 Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff changeset
16 }
43725438ac50 Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff changeset
17
21
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
18 void destroy_bfile_heuristics_t(bfile_heuristics_t *def) {
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
19 free(def);
20
43725438ac50 Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff changeset
20 }
21
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
21
22
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
22 void bfile_reset(bfile_heuristics_t *def) {
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
23 def->bcount = 0;
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
24 def->tcount = 0;
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
25 }
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
26
21
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
27 bool bfile_check(bfile_heuristics_t *def, int next_char) {
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
28 bool ret = false;
22
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
29 if (def->level != BFILE_IGNORE) {
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
30 def->tcount++;
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
31 if (!isprint(next_char) && !isspace(next_char)) {
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
32 def->bcount++;
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
33 }
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
34
23
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
35 if (def->tcount > 1) { /* empty files are text files */
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
36 switch (def->level) {
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
37 case BFILE_LOW_ACCURACY:
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
38 if (def->tcount > 15 || next_char == EOF) {
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
39 ret = (1.0*def->bcount)/def->tcount > 0.32;
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
40 }
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
41 break;
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
42 case BFILE_HIGH_ACCURACY:
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
43 if (def->tcount > 500 || next_char == EOF) {
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
44 ret = (1.0*def->bcount)/def->tcount > 0.1;
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
45 }
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
46 break;
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
47 default: /* BFILE_MEDIUM_ACCURACY */
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
48 if (def->tcount > 100 || next_char == EOF) {
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
49 ret = (1.0*def->bcount)/def->tcount > 0.1;
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
50 }
778388400f7b encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents: 22
diff changeset
51 break;
22
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
52 }
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
53 }
4508da679ffb completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents: 21
diff changeset
54 }
21
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
55
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
56 return ret;
91e0890464b0 implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents: 20
diff changeset
57 }

mercurial