开发者

How to avoid overlapping date ranges when using a grouping clause?

I have a situation where I need to find time spans between value changes. I tried a simple group by clause but it eliminates overlapping changes. Consider the following example:

create table #items (
      code varchar(4)
    , class varchar(4)
    , txdate datetime
)

insert into #items (code, class, txdate) values ('A', 'C', '2010-01-01');
insert into #items (code, class, txdate) values ('A', 'C', '2010-01-02');
insert into #items (code, class, txdate) values ('A', 'C', '2010-01-03');
insert into #items (code, class, txdate) values ('A', 'D', '2010-01-04');
insert into #items (code, class, txdate) values ('A', 'D', '2010-01-05');
insert into #items (code, class, txdate) values ('A', 'C', '2010-01-06');
insert into #items (开发者_开发百科code, class, txdate) values ('A', 'C', '2010-01-07');
insert into #items (code, class, txdate) values ('A', 'D', '2010-01-08');
insert into #items (code, class, txdate) values ('A', 'D', '2010-01-09');

select code
, class
, min(txdate) mindate
, max(txdate) maxdate
from #items
group by code, class

This returns the following results (notice the overlapping date ranges):

|code|class|mindate   |maxdate   |
----------------------------------
|A   |C    |2010-01-01|2010-01-07|
|A   |D    |2010-01-04|2010-01-09|

I would like to have the query return the following:

|code|class|mindate   |maxdate   |
----------------------------------
|A   |C    |2010-01-01|2010-01-03|
|A   |D    |2010-01-04|2010-01-05|
|A   |C    |2010-01-06|2010-01-07|
|A   |D    |2010-01-08|2010-01-09|

Any ideas and suggestions?


EDIT: As pointed out in the comments, this is still not quite right.

;with cteNtile as (
    select code, class, txdate, 
           ntile((select count(*) from (select NULL as dummy from #items group by code, class) a)) over(partition by code, class order by txdate) as tilenum
        from #items
)
select code, class, MIN(txdate) as mindate, MAX(txdate) as maxdate
    from cteNtile
    group by code, class, tilenum
    order by mindate, maxdate


Here is the query that can give you the desired result.

;WITH items1 AS (
SELECT ROW_NUMBER() OVER (ORDER BY txdate) rowid, code, class, txdate
from #items
),
items2 AS (
SELECT ROW_NUMBER() OVER (ORDER BY rowid) id, rowid, i1.Code, i1.Class, i1.txdate
FROM items1 i1
WHERE NOT EXISTS (SELECT 1 FROM items1 i2
                  WHERE i2.txdate < i1.txdate
                  AND i2.class = i1.class
                  AND i2.Code = i1.Code 
                  AND i2.rowid+1=i1.rowid)
)
SELECT items2.code, items2.class, items2.txdate mindate, items1.txdate maxdate
FROM items2,  items2 items3, items1 
WHERE (items2.id+1=items3.id AND items3.rowid-1=items1.rowid)
OR items2.rowid = (SELECT MAX(t.rowid) FROM items1 t)
UNION 
SELECT items2.code, items2.class, MAX(items2.txdate) mindate, MAX(items1.txdate) maxdate
FROM items2, items1 
WHERE items1.class = items2.class
GROUP BY items1.class, items2.class, items2.code, items2.class
ORDER BY items2.txdate


I think you can´t do this with a simple select statement.

You can use a cursor to iterate over the rows and identify the "class" changes.


After researching SQL SERVER ISLANDS as suggested by @KM, I came up with the following query which seems to work well when additional class codes are added to the dataset.

select a.code, a.class, a.txdate as mindate, b.txdate as maxdate
from (
    --Find minimum island
    select code
        , class
        , txdate
        , row_number() over (order by code, class, txdate) as n
    from #items tb1
    where not exists (
        select *
        from #items tb2
        where datediff(d, tb1.txdate, tb2.txdate) = -1      
          and tb1.class = tb2.class
          and tb1.code = tb2.code
    ) 
) as a
inner join (
    --Find maximum island 
    select code
        , class
        , txdate
        , row_number() over (order by code, class, txdate) as n
    from #items tb1
    where not exists (
        select *
        from #items tb2
        where datediff(d, tb1.txdate, tb2.txdate) = 1   
          and tb1.class = tb2.class
          and tb1.code = tb2.code
    ) 
) as b on a.n = b.n

The only caveat to this approach is that the number of entries in the minimum set needs to match the number of entries in the maximum set. So far I have not been able to do anything that would make this not true. However, I did not test null values or performance.

0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜