11Imports System
22Imports System.Drawing
3+ Imports System.Text.RegularExpressions
34
45Public Class B2SData
56
@@ -655,11 +656,122 @@ Public Class B2SData
655656 Public Shared Function ShortFileName( ByVal longFileName As String ) As String
656657 ' Cut filename after the first parenthesis
657658 ' Return short file name
658- If longFileName Like "*) *" Then
659- Return longFileName.Substring( 0 , longFileName.IndexOf( ") " ) - 1 )
659+ If longFileName Like "* *" Then
660+ Return longFileName.Substring( 0 , longFileName.IndexOf( " " ) - 1 )
660661 End If
661662 Return longFileName
662663
663664 End Function
664665
665- End Class
666+ Public Class FuzzyFileName
667+
668+ ' Optimized function to calculate the Levenshtein distance between two strings
669+ Public Shared Function LevenshteinDistance( ByVal s As String , ByVal t As String ) As Integer
670+ Dim n As Integer = s.Length
671+ Dim m As Integer = t.Length
672+
673+ ' If one of the strings is empty, return the length of the other string
674+ If n = 0 Then Return m
675+ If m = 0 Then Return n
676+
677+ ' Ensure n <= m to use less space
678+ If n > m Then
679+ Dim temp As String = s
680+ s = t
681+ t = temp
682+ n = s.Length
683+ m = t.Length
684+ End If
685+
686+ ' Create two work vectors of integer distances
687+ Dim previousRow(n) As Integer
688+ Dim currentRow(n) As Integer
689+
690+ ' Initialize the previous row
691+ For i As Integer = 0 To n
692+ previousRow(i) = i
693+ Next
694+
695+ ' Compute the distance
696+ For j As Integer = 1 To m
697+ currentRow( 0 ) = j
698+ For i As Integer = 1 To n
699+ Dim cost As Integer = If (s(i - 1 ) = t(j - 1 ), 0 , 1 )
700+ currentRow(i) = Math.Min(Math.Min(currentRow(i - 1 ) + 1 , previousRow(i) + 1 ), previousRow(i - 1 ) + cost)
701+ Next
702+ ' Swap the current and previous rows
703+ Dim tempRow() As Integer = previousRow
704+ previousRow = currentRow
705+ currentRow = tempRow
706+ Next
707+
708+ Return previousRow(n)
709+ End Function
710+
711+ ' Function to calculate the percentage match between two strings
712+ Public Shared Function PercentageMatch( ByVal s As String , ByVal t As String ) As Double
713+ Dim maxLength As Integer = Math.Max(s.Length, t.Length)
714+ If maxLength = 0 Then
715+ Return 100.0
716+ End If
717+
718+ Dim distance As Integer = LevenshteinDistance(s, t)
719+ Return ( 1.0 - CDbl (distance) / maxLength) * 100 . 0
720+ End Function
721+
722+ ' Function to normalize a string
723+ Public Shared Function NormalizeString( ByVal input As String ) As String
724+ ' Convert to lowercase, remove special characters, and trim whitespace
725+ Dim normalized As String = input.ToLower()
726+ normalized = Regex.Replace(normalized, "[^\w\s]" , "" )
727+ normalized = normalized.Trim()
728+ Return normalized
729+ End Function
730+
731+ ' Function to tokenize a string
732+ Public Shared Function TokenizeString( ByVal input As String ) As List( Of String )
733+ ' Split the string into words
734+ Dim tokens As List( Of String ) = New List( Of String )(input.Split( New Char () { " "c }, StringSplitOptions.RemoveEmptyEntries))
735+ Return tokens
736+ End Function
737+
738+ ' Function to calculate the Jaccard similarity between two sets of tokens
739+ Public Shared Function JaccardSimilarity( ByVal tokens1 As List( Of String ), ByVal tokens2 As List( Of String )) As Double
740+ Dim intersection As Integer = tokens1.Intersect(tokens2).Count()
741+ Dim union As Integer = tokens1.Union(tokens2).Count()
742+ Return CDbl (intersection) / union
743+ End Function
744+
745+ ' Function to find the best match among candidates
746+ Public Shared Function FindBestMatch( ByVal target As String , ByVal candidates As List( Of String )) As String
747+ Dim bestMatch As String = String .Empty
748+ Dim highestMatchScore As Double = 0.0
749+
750+ ' Normalize and tokenize the target string
751+ Dim normalizedTarget As String = NormalizeString(target)
752+ Dim targetTokens As List( Of String ) = TokenizeString(normalizedTarget)
753+
754+ For Each candidate As String In candidates
755+ ' Normalize and tokenize the candidate string
756+ Dim normalizedCandidate As String = NormalizeString(candidate)
757+ Dim candidateTokens As List( Of String ) = TokenizeString(normalizedCandidate)
758+
759+ ' Calculate the Jaccard similarity
760+ Dim jaccardScore As Double = JaccardSimilarity(targetTokens, candidateTokens)
761+
762+ ' Calculate the Levenshtein distance percentage match
763+ Dim levenshteinScore As Double = PercentageMatch(normalizedTarget, normalizedCandidate)
764+
765+ ' Combine the scores (you can adjust the weights as needed)
766+ Dim combinedScore As Double = (jaccardScore + levenshteinScore) / 2
767+
768+ If combinedScore > highestMatchScore Then
769+ highestMatchScore = combinedScore
770+ bestMatch = candidate
771+ End If
772+ Next
773+
774+ Return bestMatch
775+ End Function
776+ End Class
777+ End Class
0 commit comments