Remove duplicates - SQL server 2000-2005
Only for MSSQL 2000-2005 by SQL query and no stored procedures
No cursors and temp tables
The table
create table my_table
(
row_id int identity(1,1),
_key varchar(20),
_total decimal(18,2)
)
Insert the data
insert into m开发者_如何学运维y_table (_key,_total) Values('qwe',10)
insert into my_table(_key,_total) Values ('qwe',10)
insert into my_table(_key,_total) Values ('asd',10)
insert into my_table(_key,_total) Values('asd',10)
insert into my_table(_key,_total) Values('zxc',10)
insert into my_table(_key,_total) Values('zxc',10)
insert into my_table(_key,_total) Values('qwe',100)
insert into my_table(_key,_total) Values('qwe',100)
insert into my_table(_key,_total) Values('asd',100)
insert into my_table(_key,_total) Values('asd',100)
insert into my_table(_key,_total) Values('zxc',100)
insert into my_table(_key,_total) Values('zxc',100)
insert into my_table(_key,_total) Values('qwe',50)
insert into my_table(_key,_total) Values('qwe',50)
insert into my_table(_key,_total) Values('asd',50)
insert into my_table(_key,_total) Values('asd',50)
insert into my_table(_key,_total) Values('zxc',50)
insert into my_table(_key,_total) Values('zxc',50)
I need to delete duplicates by _key field and leave rows with maximum _total and among them maximum row_id.
I need to get this result set
8 qwe 100.00
10 asd 100.00
12 zxc 100.00
To delete them, which is what I figured "remove duplicates" means:
DELETE FROM dbo.my_table
WHERE NOT EXISTS(SELECT NULL
FROM dbo.my_table x
JOIN (SELECT _key,
MAX(_total) AS max_total
FROM dbo.my_table
GROUP BY _key) y ON y._key = x._key
AND y.max_total = x._total
WHERE x._key = my_table._key
AND x._total = my_table._total
GROUP BY x._key, x._total
HAVING MAX(x.row_id) = my_table.row_id)
The query to select the rows you list is:
SELECT MAX(x.row_id),
x._key,
x._total
FROM dbo.my_table x
JOIN (SELECT _key,
MAX(_total) AS max_total
FROM dbo.my_table
GROUP BY _key) y ON y._key = x._key
AND y.max_total = x._total
GROUP BY x._key, x._total
If you first express what you want in plain language, it becomes easier
- find the max row_id per (max total per key)
...which becomes...
- find the max total per key
- join back an find the max row_id per (key, max(total)) combination
I haven't run this... I'll leave that for you to find any missing commas etc
SELECT
MAX(row_id) AS MaxRowid, M._key, M._total
FROM
(
SELECT
_key, max(_total) AS TTotal
FROM
my_table
GROUP BY
_key
) foo
JOIN
my_table M ON foo._key = M._key AND foo.TTotal = M._total
GROUP BY
M._key, M._total
For SQL Server 2005+ I could use a CTE for variety
;WITH cFoo AS
(
SELECT
row_id, _key, _total,
ROW_NUMBER() OVER (PARTITION BY _key ORDER BY _total DESC, row_id DESC) AS bar
)
SELECT
row_id, _key, _total
FROM
cFoo
WHERE
bar = 1
You could achieve this with the use of two queries:
--Delete all lesser total duplicates
delete T1
from My_Table T1, My_Table T2
where T1._key = T2._key
and T1._total < T2._total
--Delete all equal total with lesser row_id duplicates
delete T1
from My_Table T1, My_Table T2
where T1._key = T2._key
and T1.row_id < T2.row_id
精彩评论