Delete rows following a duplicate
I have a list of user login and logout stamps. Unfortunately a LOGIN entry might not always be followed by a LOGOUT entry.
I wish to delete any row which has the same [event] and [user_id] as previous row when ordered by [event_date] Any suggestions on how to do this?Example table
CREATE TABLE #LOG (
[id] int IDENTITY(1,1),
[user_id] int,
[event] varchar(50),
[event_date] datetime
);
INSERT INTO #LOG ([user_id], [event], [event_date])
SELECT 1,'LOGIN',{ts '2010-12-15 15:31:59'}
UNION ALL SELECT 1,'LOGOUT',{ts '2010-12-15 15:32:55'}
UNION ALL SELECT 1,'LOGIN',{ts '2010-12-15 15:38:04'}
UNION ALL SELECT 1,'LOGOUT',{ts '2010-12-15 15:38:17'}
UNION ALL SELECT 1,'LOGOUT',{ts '2010-12-1开发者_StackOverflow5 15:38:45'} -- Delete
UNION ALL SELECT 2,'LOGIN',{ts '2010-12-15 16:59:39'}
UNION ALL SELECT 2,'LOGOUT',{ts '2010-12-15 17:00:08'}
UNION ALL SELECT 2,'LOGOUT',{ts '2010-12-15 17:00:39'} -- Delete
UNION ALL SELECT 2,'LOGOUT',{ts '2010-12-15 17:01:16'} -- Delete
UNION ALL SELECT 2,'LOGIN',{ts '2010-12-15 17:01:38'}
UNION ALL SELECT 2,'LOGIN',{ts '2010-12-15 17:02:26'} -- Delete
UNION ALL SELECT 2,'LOGOUT',{ts '2010-12-15 17:02:39'}
;WITH T1 AS
(
SELECT * ,
ROW_NUMBER() OVER (ORDER BY event_date)-
ROW_NUMBER() OVER (PARTITION BY [user_id], [event]
ORDER BY event_date) AS Grp
FROM #LOG
),T2 AS
(
SELECT
ROW_NUMBER() OVER (PARTITION BY [user_id], [event], Grp
ORDER BY event_date) RN
FROM T1
)
DELETE FROM T2
WHERE RN > 1
Using the ROW_NUMBER functionality of SQL Server would be an option
SQL Statement
;WITH q AS (
SELECT Rownumber = ROW_NUMBER() OVER (ORDER BY user_id, event_date)
, user_id
, event
, event_date
FROM #LOG
)
DELETE FROM #LOG
FROM #LOG l
INNER JOIN (
SELECT q2.*
FROM q q1
INNER JOIN q q2 ON q2.Rownumber = q1.Rownumber + 1
AND q2.user_id = q1.user_id
AND q2.event = q1.event
) q ON q.user_id = l.user_id
AND q.event_date = l.event_date
SELECT *
FROM #LOG
My understanding is that you want to delete entries such that the pattern is always In,Out,In,Out,etc
.
This means a record is deleted if the preceding record (when order by user_id, then event_date), is of the same event.
There are two options I'd use to go about this...
DELETE
#log
WHERE
event = (
SELECT
TOP 1
event
FROM
#log AS [preceding]
WHERE
[preceding].user_id = #log.user_id
AND [preceding].event_date < #log.event_date
ORDER BY
[preceding].event_date DESC
)
Or...
WITH ordered_log AS (
SELECT
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY event_date) AS user_event_id,
*
FROM
#log
)
DELETE
ordered_log
FROM
ordered_log
INNER JOIN
ordered_log AS [preceding]
ON [preceding].login_id = [ordered_log].login_id
AND [preceding].user_event_id = [ordered_log].user_event_id - 1
WHERE
[preceding].event = [ordered_log].event
Either way, I highly recommend an Index covering user_id
then event_date
.
Note: The first version does not cope with the possibility of two events having the same timestamp. The latter, however, does.
If you have to delete duplicate row. Then no need to set the order by clause.
Try below
Delete l from #LOG l
Inner Join
(
Select id from #LOG l
Inner Join(
Select user_id, event from #LOG
group by user_id, event
having COUNT(user_id) > 1 and COUNT(event) > 1
)T
on (l.user_id = t.user_id) and (l.event = t.event)
)T
on T.id = l.id
精彩评论