...pudding - diary

この日記は https://yapud.hatenablog.com/ に引っ越し中


2008-12-15

_ [Software][Notes] LotusScript で Trigram の Compare

先日の日記に書いた String::Trigram の件、これに含まれてる Compare が、俺にはたいそう便利で。

LotusScript 内で使いたくなったので書いてみた。

ngramCompare(n As Integer, Str1 As String, Str2 As String)

n : 数値。n-gram の n。とりあえず 3 でいいんじゃね?

Str1 : 比較文字列1

Str2 : 比較文字列2

Str1 と Str2 を n-gram で比較して、類似度を 0〜1 の間で返します。

Sub Initialize
	
	Dim Str1 As String
	Dim Str2 As String
	Dim n    As Integer
	
	n    = 3
	Str1 = "kangaroo"
	Str2 = "kanagaroo"
	
	Print ngramCompare(n,Str1,Str2)*100
	'61.53846....
End Sub
 
Function ngramCompare (n As Integer, Str1 As String, Str2 As String)
	
	Dim Str1len As Integer
	Dim Str2len As Integer
	Dim i,j,k   As Integer
	Dim acount  As Integer
	Dim hcount  As Integer
	
	Dim Str1c   As String
	Dim Str2c   As String
	
	Dim Str1tmp As String
	Dim Str2tmp As String
	
	Str1c = Strconv(Str1,(1+4+32+512))
	Str2c = Strconv(Str2,(1+4+32+512))
	
	For k=1 To n-1
		Str1c = "_" & Str1c & "_"
		Str2c = "_" & Str2c & "_"
	Next
	
	Str1len = Len(Str1c)
	Str2len = Len(Str2c)
	
	acount=0
	hcount=0
	
	For i = 1 To (Str1len-n+1) 
		Str1tmp = Mid(Str1c,i,n)
		
		For j = 1 To (Str2len-n+1) 
			Str2tmp = Mid(Str2c,j,n)
			If Str1tmp = Str2tmp Then
				hcount = hcount + 1
			End If
		Next
		
	Next
	
	acount = ((Str1len-n+1) + (Str2len-n+1)) - hcount
	
	ngramCompare = hcount / acount 
	
End Function


2008年
12月
1 2 3 4 5 6
7 8 9 10 11 12 13
14 15 16 17 18 19 20
21 22 23 24 25 26 27
28 29 30 31
Twitter : @moriya_jp