SQL Server對比兩字符串的相似度(函數算法)


一、概述

   最近有人問到關於兩個字符串求相似度的函數,所以就寫了本篇文章,分別是“簡單的模糊匹配”,“順序匹配”,“一對一位置匹配”。在平時的這種函數可能會需要用到,業務需求不一樣,這里只給出參照,實際情況可以相應修改。本文所有的兩個字段比較都是除以比較字段本身,例如A與B比較,找出的長度除以A的長度,因為考慮如果A的長度大於B的長度,相似度會超100%,例如‘abbc’,'ab'.

如果大家想除以B的長度,只需要在語句末尾將‘SET @num=@num*1.0/LEN(@Cloumna)’修改成‘SET @num=@num*1.0/LEN(@Cloumnb)

 

1.兩個字符串簡單相似 

---兩個字段簡單相似
CREATE FUNCTION DBO.FN_Resemble
    (@Cloumna NVARCHAR(MAX),
     @Cloumnb NVARCHAR(MAX)
    )
RETURNS FLOAT
AS
BEGIN
    DECLARE @num FLOAT,@len int
    SET @Cloumna=ISNULL(@Cloumna,0)
    SET @Cloumnb=ISNULL(@Cloumnb,0)
    SET @len=1
    SET @num=0
    WHILE(LEN(@Cloumna)<>0 AND LEN(@CloumnB)<>0)
    BEGIN
        WHILE(@len<=LEN(@Cloumna))
        BEGIN
            DECLARE @a NVARCHAR(4)
            SET @a=''
            SET @a=SUBSTRING(@Cloumna,@len,1)
            IF(CHARINDEX(@a,@CloumnB)>0)
            BEGIN
            SET @num=@num+1
            END
        SET  @len=@len+1   
        END
    SET @num=@num*1.0/LEN(@Cloumna)
    BREAK
    END
    
    RETURN @num
END


----測試代碼
SELECT DBO.FN_Resemble('ABDC321G','ABDC123G'

2.兩個字符串順序相似

---兩個字段順序相似
CREATE FUNCTION DBO.FN_Resemble_order
    (@Cloumna NVARCHAR(MAX),
     @Cloumnb NVARCHAR(MAX)
    )
RETURNS FLOAT
AS
BEGIN
    DECLARE @num FLOAT,@len int
    SET @Cloumna=ISNULL(@Cloumna,0)
    SET @Cloumnb=ISNULL(@Cloumnb,0)
    SET @len=1
    SET @num=0
    WHILE(LEN(@Cloumna)<>0 AND LEN(@CloumnB)<>0)
    BEGIN
    DECLARE @a NVARCHAR(4)
    DECLARE @b NVARCHAR(4)
        IF(LEN(@Cloumna)>=LEN(@CloumnB))
        BEGIN
            WHILE(@len<=LEN(@CloumnB))
            BEGIN
                
                SET @a=''
                SET @a=SUBSTRING(@Cloumna,@len,1)
                SET @b=''
                SET @b=SUBSTRING(@CloumnB,@len,1)
                IF(@a=@b)
                BEGIN
                SET @num=@num+1
                END
                    ELSE
                    BEGIN
                        break
                    END
            SET  @len=@len+1   
            END
        END
        ELSE IF    (LEN(@Cloumna)<LEN(@CloumnB))
        BEGIN
            WHILE(@len<=LEN(@Cloumna))
                BEGIN
                    SET @a=''
                    SET @a=SUBSTRING(@Cloumna,@len,1)
                    SET @b=''
                    SET @b=SUBSTRING(@CloumnB,@len,1)
                    IF(@a=@b)
                    BEGIN
                        SET @num=@num+1
                    END
                    ELSE
                        BEGIN
                            break
                        END
                SET  @len=@len+1   
            END
        
        END
    SET @num=@num*1.0/LEN(@Cloumna)
    BREAK
    END
    RETURN @num
END
go

----測試代碼
SELECT DBO.FN_Resemble_order('ABDC456G','ABDC123G')

3.兩個字符串一對一相似

---兩個字段一對一相似
CREATE FUNCTION DBO.FN_Resemble_onebyone
    (@Cloumna NVARCHAR(MAX),
     @Cloumnb NVARCHAR(MAX)
    )
RETURNS FLOAT
AS
BEGIN
    DECLARE @num FLOAT,@len int
    SET @Cloumna=ISNULL(@Cloumna,0)
    SET @Cloumnb=ISNULL(@Cloumnb,0)
    SET @len=1
    SET @num=0
    WHILE(LEN(@Cloumna)<>0 AND LEN(@CloumnB)<>0)
    BEGIN
    DECLARE @a NVARCHAR(4)
    DECLARE @b NVARCHAR(4)
        IF(LEN(@Cloumna)>=LEN(@CloumnB))
        BEGIN
            WHILE(@len<=LEN(@CloumnB))
            BEGIN
                
                SET @a=''
                SET @a=SUBSTRING(@Cloumna,@len,1)
                SET @b=''
                SET @b=SUBSTRING(@CloumnB,@len,1)
                IF(@a=@b)
                BEGIN
                SET @num=@num+1
                END
            SET  @len=@len+1   
            END
        END
        ELSE IF    (LEN(@Cloumna)<LEN(@CloumnB))
        BEGIN
            WHILE(@len<=LEN(@Cloumna))
                BEGIN
                    SET @a=''
                    SET @a=SUBSTRING(@Cloumna,@len,1)
                    SET @b=''
                    SET @b=SUBSTRING(@CloumnB,@len,1)
                    IF(@a=@b)
                    BEGIN
                        SET @num=@num+1
                    END
                SET  @len=@len+1   
            END
        
        END
    SET @num=@num*1.0/LEN(@Cloumna)
    BREAK
    END
    RETURN @num
END

----測試代碼
SELECT DBO.FN_Resemble_onebyone('ABDC456G','ABDC123G'

4.對比兩個版本號的大小

如果前面比后面的大返回1,小返回-1,相等返回0

ALTER FUNCTION FNStrCompare
(@Val1  VARCHAR(50),---比較字符串1
 @Val2  VARCHAR(50),---比較字符串2
 @Break VARCHAR(10) ---分隔符
)
RETURNS INT
AS
BEGIN
DECLARE @Num1 INT
DECLARE @Num2 INT
DECLARE @Val1Num INT
DECLARE @Val2Num INT
DECLARE @a INT
IF CHARINDEX(@Break,@Val1)>0 AND CHARINDEX(@Break,@Val2)>0
    BEGIN
        WHILE LEN(@Val1)>0 AND LEN(@Val2)>0
        BEGIN
            IF CHARINDEX(@Break,@Val1)>0 AND CHARINDEX(@Break,@Val2)>0
            BEGIN
                    SET @Num1=CHARINDEX(@Break,@Val1)-1
                    SET @Val1Num=LEFT(@Val1,@Num1)
                    SET @Val1=SUBSTRING(@Val1,@Num1+2,LEN(@Val1))

                    SET @Num2=CHARINDEX(@Break,@Val2)-1
                    SET @Val2Num=LEFT(@Val2,@Num2)
                    SET @Val2=SUBSTRING(@Val2,@Num1+2,LEN(@Val2))

            END
            ELSE
            BEGIN
                    SET @Val1Num=CONVERT(INT,@Val1)
                    SET @Val2Num=CONVERT(INT,@Val2)

                    IF @Val1Num=@Val2Num
                    BEGIN
                        SET @a=0
                        BREAK
                    END

            END

            IF @Val1Num>@Val2Num
            BEGIN
                 SET @a=1
                 BREAK
            END
            IF @Val1Num<@Val2Num
            BEGIN
                 SET @a=-1
                 BREAK
            END

        END
    END
ELSE
    BEGIN
        SET @Val1Num=CONVERT(INT,@Val1)
        SET @Val2Num=CONVERT(INT,@Val2)
        IF @Val1Num>@Val2Num
        BEGIN
                SET @a=1
        END
        IF @Val1Num<@Val2Num
        BEGIN
                SET @a=-1
        END
        IF @Val1Num=@Val2Num
        BEGIN
                SET @a=0
        END

    END

RETURN @a

END

 

 執行

SELECT chenmh.dbo.FNStrCompare('1.15.1','1.15.1','.')

SELECT chenmh.dbo.FNStrCompare('1.15.2','1.15.1','.')

SELECT chenmh.dbo.FNStrCompare('1.15.2','2.3.1','.')

SELECT chenmh.dbo.FNStrCompare('1.08.2','1.15.1','.')

SELECT dbo.FNStrCompare('1','2','.')

 

 

 

  

 

備注:

    作者:pursuer.chen

    博客:http://www.cnblogs.com/chenmh

本站點所有隨筆都是原創,歡迎大家轉載;但轉載時必須注明文章來源,且在文章開頭明顯處給明鏈接,否則保留追究責任的權利。

《歡迎交流討論》


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM