开发者

Count n-tuples in SQL

I have a table with two int columns, the combination of them is unique.

ids  idk
---------
1    2
1    3
1    4

2    2
2    3
2    4

3    2
3    5
3    4

4    2
4    3

What I want is to get the biggest set of idks which is common to at least 2 ids.

The result here should look like this:

(2,3,4) - 2x
(2,4) - 3x
(2,3) - 3x
(3,4) - 2x
(2,5) - 1x

As you see, the number of common idks is the most important thing. If the number is the same #(2,4) = #(2,5) then开发者_开发技巧 the criterion is the number of occurencies.

It seems to me rather like a algorithm which needs to be programmed but I may be wrong and SQL is capable of it.


Interesting question! I think I've found a way to solve it in SQL Server. Still it's rather complicated:

  • Create a stored procedure that calculates combinations
  • Call the stored procedure to create a list of combinations for each ids
  • Increate the occurance by 1 for each combination that was previously found
  • Insert new combinations into a result table

Here's the full code:

set nocount on

-- Helper table for sp_Combinations
if OBJECT_ID('tempdb..#combos') is not null
    drop table #combos
create table #combos (
    comboid int default 1,
    basecomboid int,
    combosize int default 1,
    val int,
    primary key (comboid, val))


if OBJECT_ID('dbo.sp_Combinations') is null
    exec ('create procedure dbo.sp_Combinations as select 1')
go
-- Creates a list of all combinations of values in #combos,
-- based on a starting list with comboid = 1
alter procedure dbo.sp_Combinations
as begin
    delete from #combos where comboid <> 1

    declare @n int
    select @n = count(*) from #combos

    update #combos set combosize = (select count(*) from #combos)

    if @n = 1
        return

    -- Add individual numbers
    insert  #combos 
            (comboid, basecomboid, val) 
    select  row_number() over (order by val) + 1
    ,       1
    ,       val 
    from    #combos 
    where   comboid = 1

    declare @k int
    set @k = 1
    while @k + 1 < @n
        begin
        declare @max_combo_id int
        select @max_combo_id = max(comboid) from #combos

        -- Add one new member to each combination of size @k
        -- The new member must be larger than all existing members
        -- F.e. for @n = 4 and @k = 1:
        --       (1) --> (1,2) (1,3) (1,4)
        --       (2) --> (2,3) (2,4)
        --       (3) --> (3,4)
        --       (4) --> no new combinations
        ;with b as (
            select  val
            from    #combos
            where   comboid = 1
        ), g as (
            select  comboid
            ,       max(val) as maxval
            from    #combos c
            where   combosize = @k
            group by
                    comboid
        )
        insert  #combos (comboid, basecomboid, combosize, val)
        select  row_number() over (order by g.comboid) + @max_combo_id
        ,       g.comboid
        ,       @k + 1
        ,       b.val
        from    b
        join    g
        on      b.val > g.maxval

        -- Add the members of the base combo
        insert  #combos (comboid, basecomboid, combosize, val)
        select  l.comboid
        ,       l.basecomboid
        ,       l.combosize
        ,       b.val
        from    #combos l
        join    #combos b
        on      b.comboid = l.basecomboid
        where   l.combosize = @k + 1

        set @k = @k + 1
        end
end
go
go

-- Input table
declare @t table (ids int, idk int)
insert into @t (ids, idk) values 
    (1, 2), (1, 3), (1, 4), 
    (2, 2), (2, 3), (2, 4), 
    (3, 2), (3, 5), (3, 4), 
    (4, 2), (4, 3);

-- Valid combinations with number of occurrences
declare @combinations table (comboid int, val int, cnt int)

-- Iterate over all ids    
declare cur cursor for select distinct ids from @t
open cur
declare @ids int
while 1=1
    begin
    fetch from cur into @ids
    if @@FETCH_STATUS <> 0
        break

    -- Calculate combinations for this ids
    truncate table #combos
    insert into #combos (comboid, val) select 1, idk from @t where ids = @ids
    exec dbo.sp_Combinations

    -- Increase count of existing combinations
    update  u
    set     cnt = cnt + 1
    from    @combinations u
    where   exists
            (
            select  *
            from    @combinations a
            full join
                    #combos b
            on      a.val = b.val
            where   a.comboid = u.comboid
            group by
                    a.comboid
            having  max(case when a.val is null then 1 else 0 end) = 0
                    and max(case when b.val is null then 1 else 0 end) = 0
            )

    -- Insert new combinations
    declare @max_combo_id int
    select @max_combo_id = isnull(max(comboid),0) from @combinations

    insert  @combinations
            (comboid, val, cnt)
    select  n.comboid + @max_combo_id
    ,       n.val
    ,       1
    from    #combos n
    where   not exists
            (
            select  *
            from    @combinations a
            full join
                    #combos b
            on      a.val = b.val
            where   b.comboid = n.comboid
            group by
                    b.comboid
            having  max(case when a.val is null then 1 else 0 end) = 0
                    and max(case when b.val is null then 1 else 0 end) = 0
            )
    end

close cur
deallocate cur

-- Display result
select  x.r as combination
,       cnt as occurrences
from    (
        select  distinct comboid
        ,       cnt
        from    @combinations
        ) a
cross apply
        (
        select  val as [text()]
        from    @combinations c
        where   c.comboid = a.comboid
        for xml path('')
        ) x(r)
where   a.cnt > 1
order by
        len(x.r) desc
,       cnt desc

For your example data set, this prints:

combination occurrences
234         2
23          3
24          3
34          2
2           4
3           3
4           3
0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜