You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
cointop/pkg/levenshtein/levenshtein.go

99 lines
2.4 KiB
Go

package levenshtein
import (
"math"
"strings"
)
// Distance Levenshtein distance
// The Levenshtein distance between two strings is defined as the minimum
// number of edits needed to transform one string into the other, with the
// allowable edit operations being insertion, deletion, or substitution of
// a single character
// http://en.wikipedia.org/wiki/Levenshtein_distance
//
// This implemention is optimized to use O(min(m,n)) space.
// It is based on the optimized C version found here:
// http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#C
func Distance(str1, str2 string) int {
var cost, lastdiag, olddiag int
s1 := []rune(str1)
s2 := []rune(str2)
lenS1 := len(s1)
lenS2 := len(s2)
column := make([]int, lenS1+1)
for y := 1; y <= lenS1; y++ {
column[y] = y
}
for x := 1; x <= lenS2; x++ {
column[0] = x
lastdiag = x - 1
for y := 1; y <= lenS1; y++ {
olddiag = column[y]
cost = 0
if s1[y-1] != s2[x-1] {
cost = 1
}
column[y] = min(
column[y]+1,
column[y-1]+1,
lastdiag+cost)
lastdiag = olddiag
}
}
return column[lenS1]
}
// DamerauLevenshteinDistance calculates the damerau-levenshtein distance between s1 and s2.
// Reference: [Damerau-Levenshtein Distance](http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance)
// Note that this calculation's result isn't normalized. (not between 0 and 1.)
// and if s1 and s2 are exactly the same, the result is 0.
func DamerauLevenshteinDistance(s1, s2 string) int {
if s1 == s2 {
return 0
}
s1Array := strings.Split(s1, "")
s2Array := strings.Split(s2, "")
lenS1Array := len(s1Array)
lenS2Array := len(s2Array)
m := make([][]int, lenS1Array+1)
var cost int
for i := range m {
m[i] = make([]int, lenS2Array+1)
}
for i := 0; i < lenS1Array+1; i++ {
for j := 0; j < lenS2Array+1; j++ {
if i == 0 {
m[i][j] = j
} else if j == 0 {
m[i][j] = i
} else {
cost = 0
if s1Array[i-1] != s2Array[j-1] {
cost = 1
}
m[i][j] = min(m[i-1][j]+1, m[i][j-1]+1, m[i-1][j-1]+cost)
if i > 1 && j > 1 && s1Array[i-1] == s2Array[j-2] && s1Array[i-2] == s2Array[j-1] {
m[i][j] = min(m[i][j], m[i-2][j-2]+cost)
}
}
}
}
return m[lenS1Array][lenS2Array]
}
// min returns the minimum number of passed int slices.
func min(is ...int) int {
min := int(math.MaxInt64)
for _, v := range is {
if min > v {
min = v
}
}
return min
}