#cs ---------------------------------------------------------------------------- Project Name: Phonetic Algorithms Description: Phonetic Algorithms AutoIt Version: v3.3.6.1 Author: David Williams (willichan) Email: david@optionsintegrated.com website: http://optionsintegrated.com Company: Options Integrated Creation Date: 8/4/2010 #ce ---------------------------------------------------------------------------- #include-once ;;;; All of these functions are case-insensitive ; +------------------+ ; | Public Functions | ; +------------------+ Func _SoundexNARA($sSoundexString, $iSoundexLength=4) ; soundex as prescribed by the U.S. National Archives & Records Administration Local $iLoop, $sLookup Local $sReturnValue $sSoundexString = StringUpper(StringStripWS($sSoundexString, 8)) $sReturnValue = StringLeft($sSoundexString, 1) For $iLoop = 2 To StringLen($sSoundexString) $sLookup = _SoundexLookup(StringMid($sSoundexString, $iLoop, 1)) If $sLookup = _SoundexLookup(StringMid($sSoundexString, $iLoop - 1, 1)) Then ContinueLoop If $sLookup < 0 Then ContinueLoop If $iLoop > 2 Then If _SoundexLookup(StringMid($sSoundexString, $iLoop - 2, 1)) > 0 Then If _SoundexLookup(StringMid($sSoundexString, $iLoop - 1, 1)) = -2 Then ContinueLoop EndIf EndIf $sReturnValue &= $sLookup If StringLen($sReturnValue) >= $iSoundexLength Then ExitLoop Next While StringLen($sReturnValue) < $iSoundexLength $sReturnValue &= "0" WEnd Return $sReturnValue EndFunc Func _SoundexNum($sSoundexString, $iSoundexLength=4) ; soundex with numbers only result Local $iLoop, $sLookup Local $sReturnValue $sSoundexString = StringUpper(StringStripWS($sSoundexString, 8)) $sReturnValue = _SoundexLookup(StringLeft($sSoundexString, 1)) If $sReturnValue < 0 Then $sReturnValue = 0 For $iLoop = 2 To StringLen($sSoundexString) $sLookup = _SoundexLookup(StringMid($sSoundexString, $iLoop, 1)) If $sLookup = _SoundexLookup(StringMid($sSoundexString, $iLoop - 1, 1)) Then ContinueLoop If $sLookup < 0 Then ContinueLoop $sReturnValue &= $sLookup If StringLen($sReturnValue) >= $iSoundexLength Then ExitLoop Next While StringLen($sReturnValue) < $iSoundexLength $sReturnValue &= "0" WEnd Return $sReturnValue EndFunc Func _LevenshteinDistance($sString1, $sString2) ;Evaluates how many additions/subtractions/substitutions are needed to make the strings identical $sString1 = StringUpper(StringStripWS($sString1, 3)) $sString2 = StringUpper(StringStripWS($sString2, 3)) Local $aiScores[StringLen($sString1) + 1][StringLen($sString2) + 1] Local $iI, $iJ Local $aiEval[2] For $iI = 0 To StringLen($sString1) $aiScores[$iI][0] = 1 Next For $iJ = 0 To StringLen($sString2) $aiScores[0][$iJ] = $iJ Next For $iI = 1 To StringLen($sString1) For $iJ = 1 To StringLen($sString2) If StringMid($sString1, $iI, 1) = StringMid($sString2, $iJ, 1) Then $aiScores[$iI][$iJ] = $aiScores[$iI - 1][$iJ - 1] Else $aiEval[0] = $aiScores[$iI - 1][$iJ] + 1 $aiEval[1] = $aiScores[$iI][$iJ - 1] + 1 If $aiEval[1] < $aiEval[0] Then $aiEval[0] = $aiEval[1] $aiEval[1] = $aiScores[$iI - 1][$iJ - 1] + 1 If $aiEval[1] < $aiEval[0] Then $aiEval[0] = $aiEval[1] $aiScores[$iI][$iJ] = $aiEval[0] EndIf Next Next Return $aiScores[StringLen($sString1)][StringLen($sString2)] EndFunc Func _DamerauLevenshteinDistance($sString1, $sString2) ;Same as _LevenshteinDistance, but also accounts for transposed (switched & adjacent) characters Local $aiScores[StringLen($sString1) + 1][StringLen($sString2) + 1] Local $iI, $iJ Local $aiEval[2] For $iI = 0 To StringLen($sString1) $aiScores[$iI][0] = 1 Next For $iJ = 0 To StringLen($sString2) $aiScores[0][$iJ] = $iJ Next For $iI = 1 To StringLen($sString1) For $iJ = 1 To StringLen($sString2) If StringMid($sString1, $iI, 1) = StringMid($sString2, $iJ, 1) Then $aiScores[$iI][$iJ] = $aiScores[$iI - 1][$iJ - 1] Else $aiEval[0] = $aiScores[$iI - 1][$iJ] + 1 $aiEval[1] = $aiScores[$iI][$iJ - 1] + 1 If $aiEval[1] < $aiEval[0] Then $aiEval[0] = $aiEval[1] $aiEval[1] = $aiScores[$iI - 1][$iJ - 1] + 1 If $aiEval[1] < $aiEval[0] Then $aiEval[0] = $aiEval[1] $aiScores[$iI][$iJ] = $aiEval[0] If ($iI > 1) And ($iJ > 1) And (StringMid($sString1, $iI, 1) = StringMid($sString2, $iJ - 1, 1)) And (StringMid($sString1, $iI - 1, 1) = StringMid($sString2, $iJ, 1)) Then $aiEval[0] = $aiScores[$iI][$iJ] $aiEval[1] = $aiScores[$iI - 2][$iJ - 2] + 1 If $aiEval[0] < $aiEval[1] Then $aiScores[$iI][$iJ] = $aiEval[0] Else $aiScores[$iI][$iJ] = $aiEval[1] EndIf EndIf EndIf Next Next Return $aiScores[StringLen($sString1)][StringLen($sString2)] EndFunc ; +-------------------+ ; | Private Functions | ; +-------------------+ Func _SoundexLookup($sChar) Switch StringUpper($sChar) Case "B", "F", "P", "V" Return 1 Case "C", "G", "J", "K", "Q", "S", "X", "Z" Return 2 Case "D", "T" Return 3 Case "L" Return 4 Case "M", "N" Return 5 Case "R" Return 6 Case "A", "E", "I", "O", "U", "Y" ;Vowels Return -1 Case "H", "W" ; Special case consonants Return -2 Case Else ; Unknown character Return -3 EndSwitch EndFunc