-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathedit_distance.py
More file actions
88 lines (67 loc) · 2.58 KB
/
edit_distance.py
File metadata and controls
88 lines (67 loc) · 2.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def calculate_edit_distance(str1, str2):
'''
Calculate the edit distance between two strings.
An edit is defined as one of three actions, a deletion,
a replacement, or an addition.
'''
# operation enums
MATCH, INSERT, DELETE = 0, 1, 2
# three possible operations @ each point
opt = [0,0,0]
# 2D array to hold all edit distance data
distance = [[0] * (len(str1)+1) for _ in range(len(str2)+1)]
# 2D array to hold parent least cost relationships
parent = [[0] * (len(str1)+1) for _ in range(len(str2)+1)]
str1 = " " + str1
str2 = " " + str2
# initial values
for i in range(len(str2)):
distance[i][0] = i
parent[i][0] = DELETE
for j in range(len(str1)):
distance[0][j] = j
parent[0][j] = INSERT
distance[0][0] = 0
parent[0][0] = -1
# go through every letter combination
for i in range(1, len(str2)):
for j in range(1, len(str1)):
opt = [0,0,0]
# populate with edit data
if j > 0:
opt[INSERT] = distance[i][j-1] + 1 # indel
if i > 0:
opt[DELETE] = distance[i-1][j] + 1 # indel
if j > 0 and i > 0:
opt[MATCH] = distance[i-1][j-1] + (0 if str1[j] == str2[i] else 1) # match or substitution
# find min cost operation
lowest_cost = min(opt)
parent_opt = opt.index(lowest_cost)
# print(opt, lowest_cost, parent_opt)
distance[i][j] = lowest_cost
parent[i][j] = parent_opt
# for i in range(len(distance)):
# print(distance[i])
# print('-----')
# for i in range(len(parent)):
# print(parent[i])
# traceback
current_pos = (len(str2)-1, len(str1)-1)
D,I,S = 'Delete','Insert','Substitute'
trace_stack = []
while parent[current_pos[0]][current_pos[1]] != -1:
parent_val = parent[current_pos[0]][current_pos[1]]
if parent_val == 0:
if str2[current_pos[0]] == str1[current_pos[1]]:
# trace_stack.append(M)
pass
else:
trace_stack.append(S + ' ' + str1[current_pos[1]])
current_pos = (current_pos[0]-1, current_pos[1]-1)
elif parent_val == 1:
trace_stack.append(I + ' ' + str1[current_pos[1]])
current_pos = (current_pos[0], current_pos[1]-1)
else:
trace_stack.append(D + ' ' + str2[current_pos[0]])
current_pos = (current_pos[0]-1, current_pos[1])
return trace_stack[::-1]