blob: 538c6f4942701c3f16e862e4f923dccb836e2217 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
/*
* levenshtein.c
*
* See the README file for copyright information
*
*/
#include <string>
#include <algorithm>
#include <vector>
#include "levenshtein.h"
//***************************************************************************
// lvDistance
//***************************************************************************
int lvDistance(const std::string source, const std::string target, int maxPer, int& maxDist)
{
// Step 1
const int n = source.length();
const int m = target.length();
if (!n) return m;
if (!m) return n;
if (maxPer != na)
{
maxDist = ((double)n) / 100.0 * ((double)maxPer);
if (abs(n-m) > maxDist)
return maxDist+1;
}
// Good form to declare a TYPEDEF
typedef std::vector< std::vector<int> > Tmatrix;
Tmatrix matrix(n+1);
// Size the vectors in the 2.nd dimension. Unfortunately C++ doesn't
// allow for allocation on declaration of 2.nd dimension of vec of vec
for (int i = 0; i <= n; i++)
matrix[i].resize(m+1);
// Step 2
for (int i = 0; i <= n; i++)
matrix[i][0] = i;
for (int j = 0; j <= m; j++)
matrix[0][j] = j;
// Step 3
for (int i = 1; i <= n; i++)
{
const char s_i = source[i-1];
// Step 4
for (int j = 1; j <= m; j++)
{
const char t_j = target[j-1];
// Step 5
int cost;
if (s_i == t_j)
cost = 0;
else
cost = 1;
// Step 6
const int above = matrix[i-1][j];
const int left = matrix[i][j-1];
const int diag = matrix[i-1][j-1];
int cell = std::min(above + 1, std::min(left + 1, diag + cost));
// Step 6A: Cover transposition, in addition to deletion,
// insertion and substitution. This step is taken from:
// Berghel, Hal ; Roach, David : "An Extension of Ukkonen's
// Enhanced Dynamic Programming ASM Algorithm"
// (http://www.acm.org/~hlb/publications/asm/asm.html)
if (i>2 && j>2)
{
int trans = matrix[i-2][j-2]+1;
if (source[i-2] != t_j) trans++;
if (s_i != target[j-2]) trans++;
if (cell > trans) cell = trans;
}
matrix[i][j] = cell;
}
}
// Step 7
return matrix[n][m];
}
|