Getting clusters of rows close together in time
I have a table basically like so
ID | ItemID | Start | End |
---------------------------------------------------------------
1 234 10/20/09 8:34:22 10/20/09 8:35:10
2 274 10/20/09 8:35:30 10/20/09 8:36:27
3 272 10/21/09 12:15:00 10/21/09 12:17:00
4 112 10/21/09 12:20:14 10/21/09 12:21:21
5 15 10/21/09 12:22:39 10/21/09 12:24:15
There are two "clusters" of entries here, 1-2 and 3-5 separated by a gap in time, specifically > 30 minutes is what I'm interested in.
What I would like is the first and last rows of the cluster of entries. This is fairly easy to achieve by retrieving all the rows and looping through them in order of start time, but I'd like to have it in SQL if possible.
I'm using SQL Server 2008, thanks.
Edit:
Each row would contain
first.* , las开发者_StackOverflowt.*
where first is the first row in the cluster and last is the last row.
the results of this table would be
1 234 10/20/09 8:34:22 10/20/09 8:35:10 2 274 10/20/09 8:35:30 10/20/09 8:36:27
3 272 10/21/09 12:15:00 10/21/09 12:17:00 5 15 10/21/09 12:22:39 10/21/09 12:24:15
try this out:
DECLARE @YourTable table (ID int, ItemID int, StartD datetime, EndD datetime)
INSERT @YourTable VALUES (1,234,'10/20/09 8:34:22' ,'10/20/09 8:35:10' )
INSERT @YourTable VALUES (2,274,'10/20/09 8:35:30' ,'10/20/09 8:36:27' )
INSERT @YourTable VALUES (3,272,'10/21/09 12:15:00','10/21/09 12:17:00')
INSERT @YourTable VALUES (4,112,'10/21/09 12:20:14','10/21/09 12:21:21')
INSERT @YourTable VALUES (5,15 ,'10/21/09 12:22:39','10/21/09 12:24:15')
;WITH AggValues AS
(SELECT
MAX(ID) AS MaxID, COUNT(ID) AS CountOf, MIN(ID) AS MinID
FROM @YourTable
)
, NumberRows AS
(SELECT --generate a first row to help get a range
0 AS ID, a.StartD-1 AS StartD, a.EndD-1 AS EndD, 0 AS RowNumber
FROM @YourTable a
INNER JOIN AggValues dt ON a.ID=dt.MinID
UNION
SELECT --get all actual rows
ID, StartD, EndD, ROW_NUMBER() OVER(ORDER BY ID) AS RowNumber
FROM @YourTable
UNION
SELECT --generate a last row to help get a range
dt2.MaxID+1 AS ID, a.StartD+1 AS StartD, a.EndD+1 AS EndD, dt2.CountOf+1 AS RowNumber
FROM @YourTable a
INNER JOIN AggValues dt2 ON a.ID=dt2.MaxID
)
, FindGaps AS
(SELECT
a.ID,DATEDIFF(minute,b.StartD,a.EndD) AS Diff, a.RowNumber, c.RowNumber AS PreviousRowNumber
FROM NumberRows a
LEFT OUTER JOIN NumberRows b ON a.RowNumber=b.RowNumber+1
LEFT OUTER JOIN NumberRows c ON a.RowNumber-1=c.RowNumber
)
, Gaps AS
(SELECT
f.ID,f.RowNumber, f.PreviousRowNumber, ROW_NUMBER() OVER(ORDER BY ID) AS GapRowNumber
FROM FindGaps f
WHERE f.Diff>30
)
, Results AS
(SELECT
g.ID,n.ID AS IDEnd
FROM Gaps g
LEFT OUTER JOIN Gaps x ON g.GapRowNumber+1=x.GapRowNumber
LEFT OUTER JOIN NumberRows n ON x.PreviousRowNumber=n.RowNumber
WHERE n.ID IS NOT NULL
)
SELECT
a.*,b.*
from Results r
LEFT OUTER JOIN @YourTable a ON r.ID=a.ID
LEFT OUTER JOIN @YourTable b ON r.IDEnd=b.ID
OUTPUT:
ID ItemID StartD EndD ID ItemID StartD EndD
----------- ----------- ----------------------- ----------------------- ----------- ----------- ----------------------- -----------------------
1 234 2009-10-20 08:34:22.000 2009-10-20 08:35:10.000 2 274 2009-10-20 08:35:30.000 2009-10-20 08:36:27.000
3 272 2009-10-21 12:15:00.000 2009-10-21 12:17:00.000 5 15 2009-10-21 12:22:39.000 2009-10-21 12:24:15.000
(2 row(s) affected)
精彩评论