summaryrefslogtreecommitdiff
path: root/afuzzy.c
diff options
context:
space:
mode:
authorChristian Wieninger <winni@debian.(none)>2007-11-11 15:40:28 +0100
committerChristian Wieninger <winni@debian.(none)>2007-11-11 15:40:28 +0100
commit8d4f8607dc1558ce73eb4c376bdbf78ddb65da83 (patch)
treed0c5dde81a36ab2e8a2edc7c1e6922556518b312 /afuzzy.c
downloadvdr-plugin-epgsearch-8d4f8607dc1558ce73eb4c376bdbf78ddb65da83.tar.gz
vdr-plugin-epgsearch-8d4f8607dc1558ce73eb4c376bdbf78ddb65da83.tar.bz2
Initial commit
Diffstat (limited to 'afuzzy.c')
-rw-r--r--afuzzy.c271
1 files changed, 271 insertions, 0 deletions
diff --git a/afuzzy.c b/afuzzy.c
new file mode 100644
index 0000000..1a6792a
--- /dev/null
+++ b/afuzzy.c
@@ -0,0 +1,271 @@
+/*
+Copyright (C) 2004-2007 Christian Wieninger
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+Or, point your browser to http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
+
+The author can be reached at cwieninger@gmx.de
+
+The project's page is at http://winni.vdr-developer.org/epgsearch
+*/
+
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "afuzzy.h"
+
+static int afuzzy_checkFLT(const char *t, AFUZZY *fuzzy);
+
+/******************************************************************************
+FUNCTION afuzzy_init()
+ Initialization of the fuzzy search routine. This applies to the consequent
+ calls of the afuzzy_CheckRTR (whole string matching) and afuzzy_CheckSUB
+ (substring match) routines. afuzzy_init() should be called for each
+ new pattern or error length. The search is case sensitive
+
+ARGUMENTS:
+ p Pattern
+ kerr Number of possible errors. Shouldn't exceed pattern length
+ UseFilter Use agrep filter algorithm that speeds up search.
+ fuzzy pointer to the structure that will be later passes to Check*
+ (the first 6 elements should be NULLs for the first call)
+
+RETURN VALUE:
+ none
+
+ALGORITHM
+ see. the article on agrep algorithms.
+ The only change is accounting transpositions as one edit operation .
+******************************************************************************/
+void afuzzy_init(const char *p, int kerr, int UseFilter, AFUZZY *fuzzy)
+{
+ int cnt, p_len, i, j, l, d, m, dd;
+ char PatFilter[sizeof(Uint)*8 + 1];
+
+ fuzzy->k = kerr;
+ m = strlen(p);
+ fuzzy->FilterSet = 0;
+ memset(fuzzy->Map, 0 , sizeof(fuzzy->Map) );
+
+ if (fuzzy->S)
+ free(fuzzy->S);
+ if (fuzzy->R)
+ free(fuzzy->R);
+ if (fuzzy->R1)
+ free(fuzzy->R1);
+ if (fuzzy->RP)
+ free(fuzzy->RP);
+ if (fuzzy->RI)
+ free(fuzzy->RI);
+ if (fuzzy->FilterS)
+ free(fuzzy->FilterS);
+
+ fuzzy->FilterS = NULL;
+ fuzzy->S = (Uint *)calloc(m + 1, sizeof(Uint));
+ fuzzy->R = (Uint *)calloc(fuzzy->k + 1, sizeof(Uint));
+ fuzzy->R1 = (Uint *)calloc(fuzzy->k + 1, sizeof(Uint));
+ fuzzy->RI = (Uint *)calloc(fuzzy->k + 1, sizeof(Uint));
+ fuzzy->RP = (Uint *)calloc(fuzzy->k + 1, sizeof(Uint));
+
+ for (i = 0, cnt = 0; i < m; i++)
+ {
+ l = fuzzy->Map[(unsigned char)p[i]];
+ if (!l)
+ {
+ l = fuzzy->Map[(unsigned char)p[i]] = ++cnt;
+ fuzzy->S[l] = 0;
+ }
+ fuzzy->S[l] |= 1 << i;
+ }
+
+
+ for (d = 0; d <= fuzzy->k; d++)
+ fuzzy->RI[d] = (1 << d) - 1;
+
+ fuzzy->mask_ok = (1 << (m - 1));
+ fuzzy->r_size = sizeof(Uint) * (fuzzy->k + 1);
+ p_len = m;
+
+ if (p_len > (int) sizeof(Uint)*8)
+ p_len = (int) sizeof(Uint)*8;
+
+ /* If k is zero then no filter is needed! */
+ if (fuzzy->k && (p_len >= 2*(fuzzy->k + 1)) )
+ {
+ if (UseFilter)
+ {
+ fuzzy->FilterSet = 1;
+ memset(fuzzy->FilterMap, 0 , sizeof(fuzzy->FilterMap) );
+ fuzzy->FilterS = (Uint *)calloc(m + 1, sizeof(Uint));
+
+ /* Not let's fill the interleaved pattern */
+ dd = p_len / (fuzzy->k + 1);
+ p_len = dd * (fuzzy->k + 1);
+
+ for (i = 0, cnt = 0; i < dd; i++)
+ for (j = 0; j < fuzzy->k + 1; j++, cnt++)
+ PatFilter[cnt] = (unsigned char)p[j*dd + i];
+ PatFilter[p_len] = 0;
+
+ for (i = 0, cnt = 0; i < p_len; i++)
+ {
+ l = fuzzy->FilterMap[(unsigned char)PatFilter[i]];
+ if (!l)
+ {
+ l = fuzzy->FilterMap[(unsigned char)PatFilter[i]] = ++cnt;
+ fuzzy->FilterS[l] = 0;
+ }
+ fuzzy->FilterS[l] |= 1 << i;
+ }
+ fuzzy->filter_ok = 0;
+ for (i = p_len - fuzzy->k - 1; i <= p_len - 1; i++) /* k+1 times */
+ fuzzy->filter_ok |= 1 << i;
+
+ /* k+1 first bits set to 1 */
+ fuzzy->filter_shift = (1 << (fuzzy->k + 2)) - 1;
+ }
+ }
+}
+
+/******************************************************************************
+FUNCTION afuzzy_free()
+ Cleaning up after previous afuzzy_init() call.
+
+ARGUMENTS:
+ fuzzy pointer to the afuzzy parameters structure
+
+RETURN VALUE:
+ none
+******************************************************************************/
+void afuzzy_free(AFUZZY *fuzzy)
+{
+ if (fuzzy->S)
+ {
+ free(fuzzy->S);
+ fuzzy->S = NULL;
+ }
+ if (fuzzy->R)
+ {
+ free(fuzzy->R);
+ fuzzy->R = NULL;
+ }
+ if (fuzzy->R1)
+ {
+ free(fuzzy->R1);
+ fuzzy->R1 = NULL;
+ }
+ if (fuzzy->RP)
+ {
+ free(fuzzy->RP);
+ fuzzy->RP = NULL;
+ }
+ if (fuzzy->RI)
+ {
+ free(fuzzy->RI);
+ fuzzy->RI = NULL;
+ }
+ if (fuzzy->FilterS)
+ {
+ free(fuzzy->FilterS);
+ fuzzy->FilterS = NULL;
+ }
+}
+
+
+/******************************************************************************
+FUNCTION afuzzy_CheckSUB()
+ Perform a fuzzy pattern substring matching. afuzzy_init() should be
+ called previously to initialize the pattern and error length.
+ Positive result means that some part of the string given matches the
+ pattern with no more than afuzzy->k errors (1 error = 1 letter
+ replacement or transposition)
+
+ARGUMENTS:
+ t the string to test
+ fuzzy pointer to the afuzzy parameters structure
+
+RETURN VALUE:
+ 0 - no match
+ > 0 - strings match
+
+ALGORITHM
+ ????????????????
+******************************************************************************/
+int afuzzy_checkSUB(const char *t, AFUZZY *fuzzy)
+{
+ register char c;
+ register int j, d;
+
+ /* For eficciency this case should be little bit optimized */
+ if (!fuzzy->k)
+ {
+ Uint R = 0, R1;
+
+ for (j = 0; (c = t[j]) != '\0'; j++)
+ {
+ R1 = ( ((R<<1) | 1) & fuzzy->S[fuzzy->Map[(unsigned char)c]]);
+ R = R1;
+
+ if (R1 & fuzzy->mask_ok)
+ return 1;
+ } /* end for (register int j = 0 ... */
+ return 0;
+ }
+
+ if (fuzzy->FilterSet && !afuzzy_checkFLT(t, fuzzy))
+ return 0;
+
+ memcpy(fuzzy->R, fuzzy->RI, fuzzy->r_size); /* R = RI */
+
+ for (j = 0; (c = t[j]); j++)
+ {
+ for (d = 0; d <= fuzzy->k; d++)
+ {
+ fuzzy->R1[d] = (((fuzzy->R[d]<<1) | 1) &
+ fuzzy->S[fuzzy->Map[(unsigned char)c]]);
+ if (d > 0)
+ fuzzy->R1[d] |= ((fuzzy->R[d-1] | fuzzy->R1[d-1])<<1) | 1 |
+ fuzzy->R[d-1];
+ }
+ if (fuzzy->R1[fuzzy->k] & fuzzy->mask_ok)
+ return j;
+
+ memcpy(fuzzy->R, fuzzy->R1, fuzzy->r_size);
+
+ } /* end for (register int j = 0 ... */
+
+ return 0;
+}
+
+static int afuzzy_checkFLT(const char *t, AFUZZY *fuzzy)
+{
+ register Uint FilterR = 0;
+ register Uint FilterR1;
+ register int j;
+
+ for (j = 0; t[j] != '\0'; j++)
+ {
+ FilterR1 = ( ((FilterR<<(fuzzy->k+1)) | fuzzy->filter_shift) &
+ fuzzy->FilterS[fuzzy->FilterMap[(unsigned char)t[j]]]);
+ if (FilterR1 & fuzzy->filter_ok)
+ return 1;
+ FilterR = FilterR1;
+ } /* end for (register int j = 0 ... */
+
+ return 0;
+}
+
+