#!/apps/bin/perl -w # This program computes the edit distance between two strings # computes the EDIT matrix my $string1 = shift; die "Where is the string?\n" unless defined($string1); my $string2 = shift; die "Where is the second string?\n" unless defined($string2); #print edit($string1,$string2); #print "\n"; $edit1 = edit_di($string1,$string2); print $edit1; print "\n"; $lcs = (length($string1)+length($string2)-$edit1)/2; print "LCS: $lcs\n"; # end Main sub edit { my @EDIT; my @DECODING; my $x = shift; # first string my $y = shift; # scond string my $m = length $x; # length of fisr string my $n = length $y; # length of second string # initialization for($i=0; $i<= $m; $i++) { $EDIT[$i][0]=$i; $DECODING[$i][0]=0; } for($j=0; $j<= $n; $j++) { $EDIT[0][$j]=$j; $DECODING[0][$j]=1; } # dynamic programming for($i=1; $i<= $m; $i++) { for($j=1;$j<= $n; $j++) { @aux = ($EDIT[$i-1][$j]+1, $EDIT[$i][$j-1]+1, $EDIT[$i-1][$j-1]+delta(substr($x,$i-1,1),substr($y,$j-1,1)) ); $pos = where_min(@aux); $EDIT[$i][$j] = $aux[$pos]; $DECODING[$i][$j] = $pos; } } # print decoding $row = $m; $col = $n; my @instructions = (); while($row>=1 && $col>=1) { if($DECODING[$row][$col]==0) { $row=$row-1; $instruction = "delete ".substr($x,$row,1); } elsif($DECODING[$row][$col]==1) { $col=$col-1; $instruction = "insert ".substr($y,$col,1); } elsif($DECODING[$row][$col]==2) { $row=$row-1; $col=$col-1; if($EDIT[$row][$col]==$EDIT[$row+1][$col+1]) { $instruction="no change"; } else { $instruction = "change ".substr($x,$row,1)." by ".substr($y,$col,1); } } else { die "Invalid Code\n"; } push @instructions, $instruction; } # row==0 OR col==0 if($row==0 && $col==0) { push @instructions,"no change"; } elsif($row==1) { # row ==1 and col == 0 push @instructions, "delete ".substr($x,$row-1,1); } else { # row == 0 and col == 1 push @instructions, "delete ".substr($y,$col-1,1); } foreach $e (reverse @instructions) { print "$e\n"; } return $EDIT[$m][$n]; } sub edit_di { my @EDIT; my @DECODING; my $x = shift; # first string my $y = shift; # scond string my $m = length $x; # length of fisr string my $n = length $y; # length of second string # initialization for($i=0; $i<= $m; $i++) { $EDIT[$i][0]=$i; $DECODING[$i][0]=0; } for($j=0; $j<= $n; $j++) { $EDIT[0][$j]=$j; $DECODING[0][$j]=1; } # dynamic programming for($i=1; $i<= $m; $i++) { for($j=1;$j<= $n; $j++) { @aux = ($EDIT[$i-1][$j]+1, $EDIT[$i][$j-1]+1, $EDIT[$i-1][$j-1]+delta_di(substr($x,$i-1,1),substr($y,$j-1,1)) ); $pos = where_min(@aux); $EDIT[$i][$j] = $aux[$pos]; $DECODING[$i][$j] = $pos; } } # print decoding $row = $m; $col = $n; my @instructions = (); my $count = 0; while($row>=1 && $col>=1) { if($DECODING[$row][$col]==0) { $row=$row-1; $instruction = "delete ".substr($x,$row,1); $count++; } elsif($DECODING[$row][$col]==1) { $col=$col-1; $instruction = "insert ".substr($y,$col,1); $count++; } elsif($DECODING[$row][$col]==2) { $row=$row-1; $col=$col-1; if($EDIT[$row][$col]==$EDIT[$row+1][$col+1]) { $instruction="no change"; } else { $instruction = "change ".substr($x,$row,1)." by ".substr($y,$col,1); $count=$count+2; } } else { die "Invalid Code\n"; } push @instructions, $instruction; } # row==0 OR col==0 if ($row==1 || $row==1) { if($row==1) { # row ==1 and col == 0 # push @instructions, "delete ".substr($x,$row-1,1); $count++; } else { # row == 0 and col == 1 # push @instructions, "delete ".substr($y,$col-1,1); $count++; } } my $flag=0;my $max=0; my $aux=0; foreach $e (reverse @instructions) { print "$e\n"; if($e eq "no change") { if($flag==1) { $aux++; } else { $aux=1;$flag=1; } } else { $max = ($max < $aux) ? $aux : $max; $flag=0; $aux=0; } } print "LONG: $max\n"; return $count; } sub where_min { my @a = @_; my $l = $#a; my $i; my $min = 0; for($i=0;$i<=$l;$i++) { if($a[$min]>$a[$i]) { $min = $i; } } return $min; } sub min { my @a = @_; my $l = $#a; my $i; my $min = $a[0]; for($i=0;$i<=$l;$i++) { if($min>$a[$i]) { $min = $a[$i]; } } return $min; } sub delta { my $a = shift; my $b = shift; return ($a eq $b ? 0 : 1); } sub delta_di { my $a = shift; my $b = shift; return ($a eq $b ? 0 : 2); }