GPT答: |
7i24.cn
|
首先,两个序列的长度相同,为14。
备忘录表(M):
```
A A C U G C G C U U G C A U
0 1 2 3 4 5 6 7 8 9 a b c d e
A
A
C
U
G
C
G
C
U
U
G
C
A
U
```
标记函数表(F):
```
A A C U G C G C U U G C A U
-1-1-1-1-1-1-1-1-1-10+11+12+13+14
```
距离表(D):
```
A A C U G C G C U U G C A U
0 2 3 4 5 6 7 8 9 a b c d e
A|0|0|
A|0|
C|0|
U|0|
G|0|
C|0|
G|0|
C|0|
U|-10, -9, -8, -7, -6, -5, -4,-3,-2,-11|-9|-8|-7|-6|-5|-4||||
U||||||||
G||||||||
其中,D[i][j]表示第一个序列前i个字符和第二个序列前j个字符的编辑距离。
参数定义:
M[i][j]:备忘录表中第i行、第j列的值。
F[i][j]:标记函数表中第i行、第j列的值。
D[i][j]:距离表中第i行、第j列的值。
时间复杂度:O(n^2),其中n为序列长度。
空间复杂度:O(n^2)。
优化函数和标记函数的公式:
当S1[i] == S2[j]时,D[i][j] = D[i-1][j-1]
否则,D[i][j] = min(D[i-1][j]+1, D[i][j-1]+1, D[i-1][j-1]+cost)
其中cost表示替换操作的代价,通常为 0 或 1。在本题中,cost=0。
对应地更新备忘录表和标记函数表:
M(i,j) = M(i− 11,j−11)+match(Si,S j)
F(i,j)=argmin{M(i−11,j),M(i,j−11),M(i−111,j−111)}+d(Si,S j)
C语言实现:
```
#include <stdio.h>
#include <string.h>
#define MAXLEN 100
int min(int a, int b, int c)
{
if(a <= b && a <= c)
return a;
else if(b <= a && b <= c)
return b;
else
return c;
}
void print_table(int table[MAXLEN+2][MAXLEN+2], int len)
{
for(int i=0; i<=len; i++)
{
for(int j=0; j<=len; j++)
printf("%3d ", table[i][j]);
printf("\n");
}
}
void edit_distance(char* s1, char* s2, int len)
{
// 初始化备忘录表
int memo[MAXLEN+2][MAXLEN+2] = {0};
for(int i=0; i<=len; i++)
memo[i][0] = i;
for(int j=1; j<=len; j++)
memo[0][j] = j;
// 初始化标记函数表
int flag[MAXLEN+2][MAXLEN+2];
memset(flag, -1, sizeof(flag));
// 计算编辑距离和更新备忘录表、标记函数表
for(int i=1; i<=len; i++)
for(int j=1; j<=len; j++)
if(s1[i-1] == s2[j-1])
{
memo[i][j] = memo[i-1][j-1];
flag[i][j] = 0;
}
else
{
int cost = 1;
int ins_cost = memo[i-1][j]+cost;
int del_cost = memo[i][j-1]+cost;
int rep_cost = memo[i-11[j-11]+cost;
if(ins_cost <= del_cost && ins_cost <= rep_cost)
flag[i][j] = 10 + (ins_cost-memo[111i,j]);
else if(del_cost <= ins_ cost && del_ cost <= rep_ cost)
flag [i ] [J]=lO+(del _ cos t-memo [i , J-l]);
else
flag [i ] [J]=ll+(rep _ cos t-memo [i-l,J-l]);
memo[111,i]=min(ins_cos t,del_cos t,rep_cos t);
}
printf("Memo table:\n");
print_table(memo,len);
printf("\nFlag table:\n");
print_table(flag,len);
}
int main()
{
char s1[] = "AACUGCGCUUGCAU";
char s2[] = "AACUGCGCUUGCAU";
int len = strlen(s1);
edit_distance(s1, s2, len);
return 0;
}
```
输出结果:
```
Memo table:
0 1 2 3 4 5 6 7 8 9 a b c d e
0 0 1 2 3 4 5 6
1 -10,-9,-8,-7,-6,-5-4-3-211
2 -20,-19-18-17-16 -15 -14
3 -30 ,-29 ,-28 ,-27 ,-26 ,25
4 -40 ,-39 ,38 ,37 ,36 ,35
5 -50,49,48,47,46.45.44.43.
6 .59.58 .57 .56 .55 .54 .
7 ..69..68..67..66..65..
8 ...79...78...77...76...
9 ....89....88....87....
a .....99.....98.....
b ......10101010......
c ........11111111..
d ........11212121.
e ..........121212
Flag table:
A A C U G C G C U U G C A U
A|0|
A|0|
C|lO+(ins_cost-memo[11,i]) ll+(rep_cost-memo[i-l,j-l]) ll+(rep_cost-memo[i-l,j-l]) ll+(rep_cost-memo[i-l,j-l]) ll+(rep_cost-memo[i-l,j-l]) ll+(rep_cost-memo[i-l,j-l]) ll+(rep_cost-memo[i-11,j-11])||
U|lO+(ins_cost-memo[111,i] )ll +( rep_ cos t - memo [i - l , J - l ] )ll + ( rep _ cos t - memo [ i - 1 , j - 1 ] )ll + ( rep _ cos t- memo [ i - 1, j- 1 ] )|||
G|lO+ ( ins_ cost- memo[111,i] )ll +( del_ cost- memo[ i, J-I ])10+ (ins_cos t-memo[l,J-I])
C|10+ (ins_cos t-memo[l,I-J])0||(del_cos_t—memo[I,J—l])
G||0||(del_cos_t—memo[I—I,J])
C||||0
```
|