src/bfile_heuristics.c

changeset 34
fa9bda32de17
parent 23
778388400f7b
child 36
a7ff583e153f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/bfile_heuristics.c	Fri Dec 28 15:44:28 2012 +0100
@@ -0,0 +1,81 @@
+/*
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 
+ * Copyright 2011 Mike Becker. All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ *
+ * bfile_heuristics.c
+ *
+ *  Created on: 20.10.2011
+ *      Author: Mike
+ */
+
+#include "bfile_heuristics.h"
+#include <ctype.h>
+
+bfile_heuristics_t *new_bfile_heuristics_t() {
+  bfile_heuristics_t *ret = malloc(sizeof(bfile_heuristics_t));
+  ret->level = BFILE_MEDIUM_ACCURACY;
+  bfile_reset(ret);
+  return ret;
+}
+
+void destroy_bfile_heuristics_t(bfile_heuristics_t *def) {
+  free(def);
+}
+
+void bfile_reset(bfile_heuristics_t *def) {
+  def->bcount = 0;
+  def->tcount = 0;
+}
+
+bool bfile_check(bfile_heuristics_t *def, int next_char) {
+  bool ret = false;
+  if (def->level != BFILE_IGNORE) {
+    def->tcount++;
+    if (!isprint(next_char) && !isspace(next_char)) {
+      def->bcount++;
+    }
+
+    if (def->tcount > 1) { /* empty files are text files */
+      switch (def->level) {
+      case BFILE_LOW_ACCURACY:
+        if (def->tcount > 15 || next_char == EOF) {
+          ret = (1.0*def->bcount)/def->tcount > 0.32;
+        }
+        break;
+      case BFILE_HIGH_ACCURACY:
+        if (def->tcount > 500 || next_char == EOF) {
+          ret = (1.0*def->bcount)/def->tcount > 0.1;
+        }
+        break;
+      default: /* BFILE_MEDIUM_ACCURACY */
+        if (def->tcount > 100 || next_char == EOF) {
+          ret = (1.0*def->bcount)/def->tcount > 0.1;
+        }
+        break;
+      }
+    }
+  }
+
+  return ret;
+}

mercurial