bfile_heuristics.c

changeset 22
4508da679ffb
parent 21
91e0890464b0
child 23
778388400f7b
--- a/bfile_heuristics.c	Thu Oct 20 15:21:53 2011 +0200
+++ b/bfile_heuristics.c	Thu Oct 20 17:29:23 2011 +0200
@@ -6,22 +6,49 @@
  */
 
 #include "bfile_heuristics.h"
+#include <ctype.h>
 
 bfile_heuristics_t *new_bfile_heuristics_t() {
   bfile_heuristics_t *ret = malloc(sizeof(bfile_heuristics_t));
   ret->level = BFILE_MEDIUM_ACCURACY;
-  /* TODO: check why this fails */
-  /* ret->ccount = calloc(256, sizeof(int)); */
+  bfile_reset(ret);
   return ret;
 }
 
 void destroy_bfile_heuristics_t(bfile_heuristics_t *def) {
-  free(def->ccount);
   free(def);
 }
 
+void bfile_reset(bfile_heuristics_t *def) {
+  def->bcount = 0;
+  def->tcount = 0;
+}
+
 bool bfile_check(bfile_heuristics_t *def, int next_char) {
   bool ret = false;
+  if (def->level != BFILE_IGNORE) {
+    def->tcount++;
+    if (!isprint(next_char) && !isspace(next_char)) {
+      def->bcount++;
+    }
+
+    switch (def->level) {
+    case BFILE_LOW_ACCURACY:
+      if (def->tcount > 15 || next_char == EOF) {
+        ret = (1.0*def->bcount)/def->tcount > 0.32;
+      }
+      break;
+    case BFILE_HIGH_ACCURACY:
+      if (def->tcount > 500 || next_char == EOF) {
+        ret = (1.0*def->bcount)/def->tcount > 0.1;
+      }
+      break;
+    default: /* BFILE_MEDIUM_ACCURACY */
+      if (def->tcount > 100 || next_char == EOF) {
+        ret = (1.0*def->bcount)/def->tcount > 0.1;
+      }
+    }
+  }
 
   return ret;
 }

mercurial