最近有人問到關於兩個字段求相似度的函數,所以就寫了一篇關於相似度的函數,分別是“簡單的模糊匹配”,“順序匹配”,“一對一位置匹配”。在平時的這種函數可能會需要用到,可能業務需求不一樣,這裡只給出參照,實際情況可以相對修改。
本文所有的兩個字段比較都是除以比較字段本身,例如A與B比較,找出的長度除以A的長度,因為考慮如果A的長度大於B的長度,相似度會超100%,例如‘abbc’,’ab’.
如果大家想除以B的長度,只需要在語句末尾將‘SET @num=@num*1.0/LEN(@Cloumna)’修改成‘SET @num=@num*1.0/LEN(@Cloumnb)’
1.兩個字段簡單相似
---兩個字段簡單相似
ALTER FUNCTION DBO.FN_Resemble
(@Cloumna NVARCHAR(MAX),
@Cloumnb NVARCHAR(MAX)
)
RETURNS FLOAT
AS
BEGIN
DECLARE @num FLOAT,@len int
SET @Cloumna=ISNULL(@Cloumna,0)
SET @Cloumnb=ISNULL(@Cloumnb,0)
SET @len=1
SET @num=0
WHILE(LEN(@Cloumna)<>0 AND LEN(@CloumnB)<>0)
BEGIN
WHILE(@len<=LEN(@Cloumna))
BEGIN
DECLARE @a NVARCHAR(4)
SET @a=''
SET @a=SUBSTRING(@Cloumna,@len,1)
IF(CHARINDEX(@a,@CloumnB)>0)
BEGIN
SET @num=@num+1
END
SET @len=@len+1
END
SET @num=@num*1.0/LEN(@Cloumna)
BREAK
END
RETURN @num
END
----測試代碼
SELECT DBO.FN_Resemble('ABDC321G','ABDC123G')
2.兩個字段順序相似
---兩個字段順序相似
CREATE FUNCTION DBO.FN_Resemble_order
(@Cloumna NVARCHAR(MAX),
@Cloumnb NVARCHAR(MAX)
)
RETURNS FLOAT
AS
BEGIN
DECLARE @num FLOAT,@len int
SET @Cloumna=ISNULL(@Cloumna,0)
SET @Cloumnb=ISNULL(@Cloumnb,0)
SET @len=1
SET @num=0
WHILE(LEN(@Cloumna)<>0 AND LEN(@CloumnB)<>0)
BEGIN
DECLARE @a NVARCHAR(4)
DECLARE @b NVARCHAR(4)
IF(LEN(@Cloumna)>=LEN(@CloumnB))
BEGIN
WHILE(@len<=LEN(@CloumnB))
BEGIN
SET @a=''
SET @a=SUBSTRING(@Cloumna,@len,1)
SET @b=''
SET @b=SUBSTRING(@CloumnB,@len,1)
IF(@a=@b)
BEGIN
SET @num=@num+1
END
ELSE
BEGIN
break
END
SET @len=@len+1
END
END
ELSE IF (LEN(@Cloumna)<LEN(@CloumnB))
BEGIN
WHILE(@len<=LEN(@Cloumna))
BEGIN
SET @a=''
SET @a=SUBSTRING(@Cloumna,@len,1)
SET @b=''
SET @b=SUBSTRING(@CloumnB,@len,1)
IF(@a=@b)
BEGIN
SET @num=@num+1
END
ELSE
BEGIN
break
END
SET @len=@len+1
END
END
SET @num=@num*1.0/LEN(@Cloumna)
BREAK
END
RETURN @num
END
go
----測試代碼
SELECT DBO.FN_Resemble_order('ABDC456G','ABDC123G')
3.兩個字段一對一相似
---兩個字段一對一相似
CREATE FUNCTION DBO.FN_Resemble_onebyone
(@Cloumna NVARCHAR(MAX),
@Cloumnb NVARCHAR(MAX)
)
RETURNS FLOAT
AS
BEGIN
DECLARE @num FLOAT,@len int
SET @Cloumna=ISNULL(@Cloumna,0)
SET @Cloumnb=ISNULL(@Cloumnb,0)
SET @len=1
SET @num=0
WHILE(LEN(@Cloumna)<>0 AND LEN(@CloumnB)<>0)
BEGIN
DECLARE @a NVARCHAR(4)
DECLARE @b NVARCHAR(4)
IF(LEN(@Cloumna)>=LEN(@CloumnB))
BEGIN
WHILE(@len<=LEN(@CloumnB))
BEGIN
SET @a=''
SET @a=SUBSTRING(@Cloumna,@len,1)
SET @b=''
SET @b=SUBSTRING(@CloumnB,@len,1)
IF(@a=@b)
BEGIN
SET @num=@num+1
END
SET @len=@len+1
END
END
ELSE IF (LEN(@Cloumna)<LEN(@CloumnB))
BEGIN
WHILE(@len<=LEN(@Cloumna))
BEGIN
SET @a=''
SET @a=SUBSTRING(@Cloumna,@len,1)
SET @b=''
SET @b=SUBSTRING(@CloumnB,@len,1)
IF(@a=@b)
BEGIN
SET @num=@num+1
END
SET @len=@len+1
END
END
SET @num=@num*1.0/LEN(@Cloumna)
BREAK
END
RETURN @num
END
----測試代碼
SELECT DBO.FN_Resemble_onebyone('ABDC456G','ABDC123G')