开发者

How to pick Duplicate Records from SQL Table?

SQL Table

How to pick duplicate records, do i need to use cursor ???

Following i开发者_JAVA技巧s my Query..

select RxNbr,[Fill Date],NDC,GenericCode 
    from vw_TempTransaction 
    order by GenericCode Asc

If the Column : RxNbr and Column : [Fill Date] has the same value and repeated more than once, i need to pick those rxnbr / [fill date] records.


I'd do as:

with duplicated as (

    select      RxNbr,
                [Fill Date]
    from        vw_TempTransaction
    group by    RxNbr,
                [Fill Date]
    having      count(*) > 1

)
select      a.RxNbr,
            a.[Fill Date],
            a.NDC,
            a.GenericCode
from        vw_TempTransaction a
inner join  duplicated b
on          a.RxNbr = b.RxNbr
and         a.[Fill Date] = b.[Fill Date]
order by    a.GenericCode Asc


SELECT t.RxNbr, t.[Fill Date], t.NDC, t.GenericCode
    FROM vw_TempTransaction t
        INNER JOIN (SELECT RxNbr, [Fill Date]
                        FROM vw_TempTransaction 
                        GROUP BY RxNbr, [Fill Date]
                        HAVING COUNT(*) > 1
                   ) q
            ON t.RxNbr = q.RxNbr
                AND t.[Fill Date] = q.[Fill Date]
    ORDER BY t.GenericCode ASC


following example using the checksum to find the duplicate records

CREATE TABLE #t1(ID INT NULL, VALUE VARCHAR(2))
INSERT INTO #t1(ID, VALUE) VALUES (1,'aa')
INSERT INTO #t1(ID, VALUE) VALUES (2,'bb')
INSERT INTO #t1(ID, VALUE) VALUES (1,'aa')
INSERT INTO #t1(ID, VALUE) VALUES (1,'aa')
INSERT INTO #t1(ID, VALUE) VALUES (3,'cc')
INSERT INTO #t1(ID, VALUE) VALUES (3,'cc')
GO

-- BINARY_CHECKSUM(<column names>): <column names> are columns that we want to compare duplicates for
-- if you want to compare the full row then change BINARY_CHECKSUM(<column names>) -> BINARY_CHECKSUM(*)

-- for SQL Server 2000+ a loop
-- save checksums and rowcounts for duplicates
SELECT BINARY_CHECKSUM(ID, VALUE) AS ChkSum, COUNT(*) AS Cnt 
INTO #t2 
FROM #t1 
GROUP BY BINARY_CHECKSUM(ID, VALUE) HAVING COUNT(*)>1

DECLARE @ChkSum BIGINT, @rc INT
-- get the first checksum and set the rowcount to the count - 1 
-- because we want to leave one duplicate
SELECT TOP 1 @ChkSum = ChkSum, @rc = Cnt-1 FROM #t2

WHILE EXISTS (SELECT * FROM #t2)
BEGIN    
    -- rowcount is one less than the duplicate rows count
    SET ROWCOUNT @rc
    DELETE FROM #t1 WHERE BINARY_CHECKSUM(ID, VALUE) = @ChkSum 
    -- remove the processed duplicate from the checksum table
    DELETE #t2 WHERE ChkSum = @ChkSum 
    -- select the next duplicate rows to delete
    SELECT TOP 1 @ChkSum = ChkSum, @rc = Cnt-1 FROM #t2    
END 
SET ROWCOUNT 0
GO

SELECT * FROM #t1 

-- for SQL Server 2005+ a cool CTE
;WITH Numbered 
AS 
(
    SELECT ROW_NUMBER() OVER (PARTITION BY ChkSum ORDER BY ChkSum) AS RN, *
    FROM (
             SELECT BINARY_CHECKSUM(ID, VALUE) AS ChkSum
             FROM #t1
         ) t
) 
DELETE FROM Numbered WHERE RN > 1;
GO

SELECT * FROM #t1 

DROP TABLE #t1;
DROP TABLE #t2;
0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜