SQL duration time calculation
I have a table of historic bus positions at a given time, recorded once per second. The schema looks like this:
BusID int not null,
BreadcrumbID int开发者_Go百科 not null identity (1, 1),
BusStopID int null,
Timestamp datetime not null
I want to generate a bus stop schedule based on historic trips. A bus is "at a stop" if its BusStopID
corresponds to the stop, and is not "at a stop" if the BusStopID
is null.
I need to generate the average times the bus is at each stop. So basically, I need to do the following:
- Identify the times that a bus is at a stop -- a simple
where
clause does the trick - Identify the average times the bus is at a stop. For my purposes, I'm defining a discrete "stop time" as a window of plus/minus 10 minutes; if a bus stops one day from 10:04 - 10:08, another day at 10:06 - 10:08, and a third day at 10:14 - 10:18, those would be the same stop, but if it stops at 10:45 - 10:48, that would be a different stop occurrence.
- Filter out "noise" -- i.e. stops times that only happened a few times but never again
I'm completely at a loss as to how to accomplish the second and third bullet. Please help!
On a number of occasions I've done something similar. Essentially, grouping based on separations within a complex ordering. The basics of the approach I use, with regards to this problem, are as follows:
- Build a table of all time ranges of interest.
- Find the starting time for each group of time ranges of interest.
- Find the ending time for each group of time ranges of interest.
- Join the starting and ending times to the list of time ranges, and group.
Or, in more detail: (each of these steps could be a part of one big CTE, but I've broken it down into temp tables for ease of reading...)
Step 1: Find the list of all time ranges of interest (I used a method similar to the one linked to by @Brad). NOTE: as @Manfred Sorg pointed out, this assumes there are no "missing seconds" in a bus's data. If there is a break in the timestamps, this code will interpret the single range as two (or more) distinct ranges.
;with stopSeconds as (
select BusID, BusStopID, TimeStamp,
[date] = cast(datediff(dd,0,TimeStamp) as datetime),
[grp] = dateadd(ss, -row_number() over(partition by BusID order by TimeStamp), TimeStamp)
from #test
where BusStopID is not null
)
select BusID, BusStopID, date,
[sTime] = dateadd(ss,datediff(ss,date,min(TimeStamp)), 0),
[eTime] = dateadd(ss,datediff(ss,date,max(TimeStamp)), 0),
[secondsOfStop] = datediff(ss, min(TimeStamp), max(Timestamp)),
[sOrd] = row_number() over(partition by BusID, BusStopID order by datediff(ss,date,min(TimeStamp))),
[eOrd] = row_number() over(partition by BusID, BusStopID order by datediff(ss,date,max(TimeStamp)))
into #ranges
from stopSeconds
group by BusID, BusStopID, date, grp
Step 2: Find the earliest time for each stop
select this.BusID, this.BusStopID, this.sTime minSTime,
[stopOrder] = row_number() over(partition by this.BusID, this.BusStopID order by this.sTime)
into #starts
from #ranges this
left join #ranges prev on this.BusID = prev.BusID
and this.BusStopID = prev.BusStopID
and this.sOrd = prev.sOrd+1
and this.sTime between dateadd(mi,-10,prev.sTime) and dateadd(mi,10,prev.sTime)
where prev.BusID is null
Step 3: Find the latest time for each stop
select this.BusID, this.BusStopID, this.eTime maxETime,
[stopOrder] = row_number() over(partition by this.BusID, this.BusStopID order by this.eTime)
into #ends
from #ranges this
left join #ranges next on this.BusID = next.BusID
and this.BusStopID = next.BusStopID
and this.eOrd = next.eOrd-1
and this.eTime between dateadd(mi,-10,next.eTime) and dateadd(mi,10,next.eTime)
where next.BusID is null
Step 4: Join everything together
select r.BusID, r.BusStopID,
[avgLengthOfStop] = avg(datediff(ss,r.sTime,r.eTime)),
[earliestStop] = min(r.sTime),
[latestDepart] = max(r.eTime)
from #starts s
join #ends e on s.BusID=e.BusID
and s.BusStopID=e.BusStopID
and s.stopOrder=e.stopOrder
join #ranges r on r.BusID=s.BusID
and r.BusStopID=s.BusStopID
and r.sTime between s.minSTime and e.maxETime
and r.eTime between s.minSTime and e.maxETime
group by r.BusID, r.BusStopID, s.stopOrder
having count(distinct r.date) > 1 --filters out the "noise"
Finally, to be complete, tidy up:
drop table #ends
drop table #starts
drop table #ranges
This post I just saw may help you. (Sql Server Central)
Fresh answer...
Try this out:
DECLARE @stopWindowMinutes INT
SET @stopWindowMinutes = 10
--
;
WITH test_data
AS ( SELECT 1 [BusStopId]
,'2010-01-01 10:00:04' 1674995515
UNION SELECT 1,'2010-01-01 10:00:05'
UNION SELECT 1,'2010-01-01 10:00:06'
UNION SELECT 1,'2010-01-01 10:00:07'
UNION SELECT 1,'2010-01-01 10:00:08'
UNION SELECT 1,'2010-01-02 10:00:06'
UNION SELECT 1,'2010-01-02 10:00:07'
UNION SELECT 1,'2010-01-02 10:00:08'
UNION SELECT 2,'2010-01-01 10:00:06'
UNION SELECT 2,'2010-01-01 10:00:07'
UNION SELECT 2,'2010-01-01 10:00:08'
UNION SELECT 2,'2010-01-01 10:00:09'
UNION SELECT 2,'2010-01-01 10:00:10'
UNION SELECT 2,'2010-01-01 10:00:09'
UNION SELECT 2,'2010-01-01 10:00:10'
UNION SELECT 2,'2010-01-01 10:00:11'
UNION SELECT 1,'2010-01-02 10:33:43'
UNION SELECT 1,'2010-01-02 10:33:44'
UNION SELECT 1,'2010-01-02 10:33:45'
UNION SELECT 1,'2010-01-02 10:33:46'
)
SELECT DISTINCT
[BusStopId]
,[AvgStop]
FROM ( SELECT [a].[BusStopId]
,( SELECT MIN([b].1674995515)
FROM [test_data] b
WHERE [a].[BusStopId] = [b].[BusStopId]
AND CONVERT(VARCHAR(10), [a].1674995515, 120) = CONVERT(VARCHAR(10), [b].1674995515, 120)
AND [b].1674995515 BETWEEN DATEADD(SECOND, -@stopWindowMinutes * 60,
[a].1674995515)
AND DATEADD(SECOND, @stopWindowMinutes * 60, [a].1674995515) -- w/i X minutes
) [MinStop]
,( SELECT MAX([b].1674995515)
FROM [test_data] b
WHERE [a].[BusStopId] = [b].[BusStopId]
AND CONVERT(VARCHAR(10), [a].1674995515, 120) = CONVERT(VARCHAR(10), [b].1674995515, 120)
AND [b].1674995515 BETWEEN DATEADD(SECOND, -@stopWindowMinutes * 60,
[a].1674995515)
AND DATEADD(SECOND, @stopWindowMinutes * 60, [a].1674995515) -- w/i X minutes
) [MaxStop]
,( SELECT DATEADD(second,
AVG(DATEDIFF(second, CONVERT(VARCHAR(10), [b].1674995515, 120),
[b].1674995515)),
CONVERT(VARCHAR(10), MIN([b].1674995515), 120))
FROM [test_data] b
WHERE [a].[BusStopId] = [b].[BusStopId]
AND CONVERT(VARCHAR(10), [a].1674995515, 120) = CONVERT(VARCHAR(10), [b].1674995515, 120)
AND [b].1674995515 BETWEEN DATEADD(SECOND, -@stopWindowMinutes * 60,
[a].1674995515)
AND DATEADD(SECOND, @stopWindowMinutes * 60, [a].1674995515) -- w/i X minutes
) [AvgStop]
FROM [test_data] a
WHERE CONVERT(VARCHAR(10), 1674995515, 120) = CONVERT(VARCHAR(10), 1674995515, 120)
GROUP BY [a].[BusStopId]
,[a].1674995515
) subset1
As is often the case, these kind of problems are easier to solve and maintain by breaking them into bite-sized pieces:
-- Split into Date and minutes-since-midnight
WITH observed(dates,arrival,busstop,bus) AS (
SELECT
CONVERT(CHAR(8), TimeStamp, 112),
DATEPART(HOUR,TimeStamp) * 60 + DATEPART(MINUTE,TimeStamp),
busstopid,
busid
FROM
History
),
-- Identify times at stop subsequent to arrival at that stop
atstop(dates,stoptime,busstop,bus) AS (
SELECT
a.dates,
a.arrival,
a.busstop,
a.bus
FROM
observed a
WHERE
EXISTS (
SELECT
*
FROM
observed b
WHERE
a.dates = b.dates AND
a.busstop = b.busstop AND
a.bus = b.bus AND
a.arrival - b.arrival BETWEEN 1 AND 10
)
),
-- Isolate actual arrivals at stops, excluding waiting at stops
dailyhalts(dates,arrival,busstop,bus) AS (
SELECT
a.dates,a.arrival,a.busstop,a.bus
FROM
observed a
WHERE
arrival NOT IN (
SELECT
stoptime
FROM
atstop b
WHERE
a.dates = b.dates AND
a.busstop = b.busstop AND
a.bus = b.bus
)
),
-- Merge arrivals across all dates
timetable(busstop,bus,arrival) AS (
SELECT
a.busstop, a.bus, a.arrival
FROM
dailyhalts a
WHERE
NOT EXISTS (
SELECT
*
FROM
dailyhalts h
WHERE
a.busstop = h.busstop AND
a.bus = h.bus AND
a.arrival - h.arrival BETWEEN 1 AND 10
)
GROUP BY
a.busstop, a.bus, a.arrival
)
-- Print timetable for a given day
SELECT
a.busstop, a.bus, a.arrival, DATEADD(minute,AVG(b.arrival),'2010/01/01')
FROM
timetable a INNER JOIN
observed b ON
a.busstop = b.busstop AND
a.bus = b.bus AND
b.arrival BETWEEN a.arrival AND a.arrival + 10
GROUP BY
a.busstop, a.bus, a.arrival
Input:
ID BusID BusStopID TimeStamp
1 1 1 2010-01-01 10:00:00.000
2 1 1 2010-01-01 10:01:00.000
3 1 1 2010-01-01 10:02:00.000
4 1 2 2010-01-01 11:00:00.000
5 1 3 2010-01-01 12:00:00.000
6 1 3 2010-01-01 12:01:00.000
7 1 3 2010-01-01 12:02:00.000
8 1 3 2010-01-01 12:03:00.000
9 1 1 2010-01-02 11:00:00.000
10 1 1 2010-01-02 11:03:00.000
11 1 1 2010-01-02 11:07:00.000
12 1 2 2010-01-02 12:00:00.000
13 1 3 2010-01-02 13:00:00.000
14 1 3 2010-01-02 13:01:00.000
15 1 1 2010-01-03 10:03:00.000
16 1 1 2010-01-03 10:05:00.000
Output:
busstop bus arrival (No column name)
1 1 600 2010-01-01 10:02:00.000
1 1 660 2010-01-01 11:03:00.000
2 1 660 2010-01-01 11:00:00.000
2 1 720 2010-01-01 12:00:00.000
3 1 720 2010-01-01 12:01:00.000
3 1 780 2010-01-01 13:00:00.000
精彩评论