-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwagner_fischer.py
More file actions
109 lines (91 loc) · 4.31 KB
/
wagner_fischer.py
File metadata and controls
109 lines (91 loc) · 4.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Costs for the operations
INS_COST = 1
DEL_COST = 1
SUB_COST = 2
def find_minimum_edit_distance(source_string, target_string) :
# Create a dp matrix of dimension (source_string + 1) x (destination_matrix + 1)
dp = [[0] * (len(source_string) + 1) for i in range(len(target_string) + 1)]
# Initialize the required values of the matrix
for i in range(1, len(target_string) + 1) :
dp[i][0] = dp[i - 1][0] + INS_COST
for i in range(1, len(source_string) + 1) :
dp[0][i] = dp[0][i - 1] + DEL_COST
# Maintain the record of opertions done
# Record is one tuple. Eg : (INSERT, 'a') or (SUBSTITUTE, 'e', 'r') or (DELETE, 'j')
operations_performed = []
# Build the matrix following the algorithm
for i in range(1, len(target_string) + 1) :
for j in range(1, len(source_string) + 1) :
if source_string[j - 1] == target_string[i - 1] :
dp[i][j] = dp[i - 1][j - 1]
else :
dp[i][j] = min(dp[i - 1][j] + INS_COST, \
dp[i - 1][j - 1] + SUB_COST, \
dp[i][j - 1] + DEL_COST)
# Initialization for backtracking
i = len(target_string)
j = len(source_string)
# Backtrack to record the operation performed
while (i != 0 and j != 0) :
# If the character of the source string is equal to the character of the destination string,
# no operation is performed
if target_string[i - 1] == source_string[j - 1] :
i -= 1
j -= 1
else :
# Check if the current element is derived from the upper-left diagonal element
if dp[i][j] == dp[i - 1][j - 1] + SUB_COST :
operations_performed.append(('SUBSTITUTE', source_string[j - 1], target_string[i - 1]))
i -= 1
j -= 1
# Check if the current element is derived from the upper element
elif dp[i][j] == dp[i - 1][j] + INS_COST :
operations_performed.append(('INSERT', target_string[i - 1]))
i -= 1
# Check if the current element is derived from the left element
else :
operations_performed.append(('DELETE', source_string[j - 1]))
j -= 1
# If we reach top-most row of the matrix
while (j != 0) :
operations_performed.append(('DELETE', source_string[j - 1]))
j -= 1
# If we reach left-most column of the matrix
while (i != 0) :
operations_performed.append(('INSERT', target_string[i - 1]))
i -= 1
# Reverse the list of operations performed as we have operations in reverse
# order because of backtracking
operations_performed.reverse()
return [dp[len(target_string)][len(source_string)], operations_performed]
if __name__ == "__main__":
# Get the source and target string
print("Enter the source string :")
source_string = input().strip()
print("Enter the target string :")
target_string = input().strip()
# Find the minimum edit distance and the operation performed
distance, operations_performed = find_minimum_edit_distance(source_string, target_string)
# Count the number of individual operations
insertions, deletions, substitutions = 0, 0, 0
for i in operations_performed :
if i[0] == 'INSERT' :
insertions += 1
elif i[0] == 'DELETE' :
deletions += 1
else :
substitutions += 1
# Print the results
print("Minimum edit distance : {}".format(distance))
print("Number of insertions : {}".format(insertions))
print("Number of deletions : {}".format(deletions))
print("Number of substitutions : {}".format(substitutions))
print("Total number of operations : {}".format(insertions + deletions + substitutions))
print("Actual Operations :")
for i in range(len(operations_performed)) :
if operations_performed[i][0] == 'INSERT' :
print("{}) {} : {}".format(i + 1, operations_performed[i][0], operations_performed[i][1]))
elif operations_performed[i][0] == 'DELETE' :
print("{}) {} : {}".format(i + 1, operations_performed[i][0], operations_performed[i][1]))
else :
print("{}) {} : {} by {}".format(i + 1, operations_performed[i][0], operations_performed[i][1], operations_performed[i][2]))