问题描述
给定两个字符串s和t,求利用删除、插入、替换三种操作将s替换为t串的最小编辑次数,即最短编辑距离。
动态规划代码求解
import numpy
def Minimum_Edit_Distance(s, t):
"""
:param s: 字符串s
:param t: 字符串t
:return: s和t的最小编辑距离
"""
n = len(s)
m = len(t)
D = [[0 for i in range(m + 1)] for j in range(n + 1)]
Rec = [[None for i in range(m + 1)] for j in range(n + 1)]
for i in range(1, n + 1):
D[i][0] = i
Rec[i][0] = "U"
for j in range(1, m + 1):
D[0][j] = j
Rec[0][j] = "L"
for i in range(0, n):
for j in range(0, m):
c = 0
if s[i] != t[j]:
c = 1
replace = D[i][j] + c
delete = D[i][j + 1] + 1
insert = D[i + 1][j] + 1
if replace == min(delete, insert, replace):
D[i + 1][j + 1] = D[i][j] + c
Rec[i + 1][j + 1] = "LU"
elif insert == min(delete, insert, replace):
D[i + 1][j + 1] = D[i + 1][j] + 1
Rec[i + 1][j + 1] = "L"
else:
D[i + 1][j + 1] = D[i][j + 1] + 1
Rec[i + 1][j + 1] = "U"
def Print_MED(Rec, s, t, i, j):
"""
:param Rec: 追踪矩阵Rec
:param s: 字符串s
:param t: 字符串t
:param i: 位置索引i
:param j: 位置索引j
:return: 操作序列
"""
if i < 0 and j < 0:
return None
if Rec[ i + 1][j + 1] == "LU":
Print_MED(Rec, s, t, i - 1, j - 1)
if s[i] == t[j]:
print("无需操作")
else:
print(f"用{t[j]}替换{s[i]}")
elif Rec[i + 1][j + 1] == "U":
Print_MED(Rec, s, t, i - 1, j)
print(f"删除{s[i]}")
else:
Print_MED(Rec, s, t, i, j - 1)
print(f"插入{t[j]}")
return f"{Print_MED(Rec, s, t, i, j)}\n递推公式表:\n{numpy.asmatrix(D)}\n 追踪数组表:\n{numpy.asmatrix(Rec)}")
if __name__ == '__main__':
s = ["A", "B", "C", "B", "D", "A", "B"]
t = ["B", "D", "C", "A", "B", "A"]
print(Minimum_Edit_Distance(s, t))
|