summaryrefslogtreecommitdiff
path: root/levenshtein.c
blob: 538c6f4942701c3f16e862e4f923dccb836e2217 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/*
 * levenshtein.c
 *
 * See the README file for copyright information 
 *
 */

#include <string>
#include <algorithm>
#include <vector>

#include "levenshtein.h"

//***************************************************************************
// lvDistance
//***************************************************************************

int lvDistance(const std::string source, const std::string target, int maxPer, int& maxDist)
{
   // Step 1
   
   const int n = source.length();
   const int m = target.length();
   
   if (!n) return m;
   if (!m) return n;

   if (maxPer != na)
   {
      maxDist = ((double)n) / 100.0 * ((double)maxPer);
      
      if (abs(n-m) > maxDist)
         return maxDist+1;
   }
   
   // Good form to declare a TYPEDEF
   
   typedef std::vector< std::vector<int> > Tmatrix; 
   
   Tmatrix matrix(n+1);
   
   // Size the vectors in the 2.nd dimension. Unfortunately C++ doesn't
   // allow for allocation on declaration of 2.nd dimension of vec of vec
   
   for (int i = 0; i <= n; i++) 
      matrix[i].resize(m+1);
   
   // Step 2
   
   for (int i = 0; i <= n; i++) 
      matrix[i][0] = i;
   
   for (int j = 0; j <= m; j++) 
      matrix[0][j] = j;
   
   // Step 3
   
   for (int i = 1; i <= n; i++) 
   {
      const char s_i = source[i-1];
      
      // Step 4
      
      for (int j = 1; j <= m; j++) 
      {
         const char t_j = target[j-1];
         
         // Step 5
         
         int cost;
         
         if (s_i == t_j) 
            cost = 0;
         else 
            cost = 1;
         
         // Step 6
         
         const int above = matrix[i-1][j];
         const int left = matrix[i][j-1];
         const int diag = matrix[i-1][j-1];
         int cell = std::min(above + 1, std::min(left + 1, diag + cost));
         
         // Step 6A: Cover transposition, in addition to deletion,
         // insertion and substitution. This step is taken from:
         // Berghel, Hal ; Roach, David : "An Extension of Ukkonen's 
         // Enhanced Dynamic Programming ASM Algorithm"
         // (http://www.acm.org/~hlb/publications/asm/asm.html)
         
         if (i>2 && j>2) 
         {
            int trans = matrix[i-2][j-2]+1;
            
            if (source[i-2] != t_j) trans++;
            if (s_i != target[j-2]) trans++;

            if (cell > trans) cell = trans;
         }
         
         matrix[i][j] = cell;
      }
   }
   
   // Step 7
   
   return matrix[n][m];
}