Count n-tuples in SQL
I have a table with two int columns, the combination of them is unique.
ids idk
---------
1 2
1 3
1 4
2 2
2 3
2 4
3 2
3 5
3 4
4 2
4 3
What I want is to get the biggest set of idks which is common to at least 2 ids.
The result here should look like this:
(2,3,4) - 2x
(2,4) - 3x
(2,3) - 3x
(3,4) - 2x
(2,5) - 1x
As you see, the number of common idks is the most important thing. If the number is the same #(2,4) = #(2,5) then开发者_开发技巧 the criterion is the number of occurencies.
It seems to me rather like a algorithm which needs to be programmed but I may be wrong and SQL is capable of it.
Interesting question! I think I've found a way to solve it in SQL Server. Still it's rather complicated:
- Create a stored procedure that calculates combinations
- Call the stored procedure to create a list of combinations for each
ids
- Increate the occurance by 1 for each combination that was previously found
- Insert new combinations into a result table
Here's the full code:
set nocount on
-- Helper table for sp_Combinations
if OBJECT_ID('tempdb..#combos') is not null
drop table #combos
create table #combos (
comboid int default 1,
basecomboid int,
combosize int default 1,
val int,
primary key (comboid, val))
if OBJECT_ID('dbo.sp_Combinations') is null
exec ('create procedure dbo.sp_Combinations as select 1')
go
-- Creates a list of all combinations of values in #combos,
-- based on a starting list with comboid = 1
alter procedure dbo.sp_Combinations
as begin
delete from #combos where comboid <> 1
declare @n int
select @n = count(*) from #combos
update #combos set combosize = (select count(*) from #combos)
if @n = 1
return
-- Add individual numbers
insert #combos
(comboid, basecomboid, val)
select row_number() over (order by val) + 1
, 1
, val
from #combos
where comboid = 1
declare @k int
set @k = 1
while @k + 1 < @n
begin
declare @max_combo_id int
select @max_combo_id = max(comboid) from #combos
-- Add one new member to each combination of size @k
-- The new member must be larger than all existing members
-- F.e. for @n = 4 and @k = 1:
-- (1) --> (1,2) (1,3) (1,4)
-- (2) --> (2,3) (2,4)
-- (3) --> (3,4)
-- (4) --> no new combinations
;with b as (
select val
from #combos
where comboid = 1
), g as (
select comboid
, max(val) as maxval
from #combos c
where combosize = @k
group by
comboid
)
insert #combos (comboid, basecomboid, combosize, val)
select row_number() over (order by g.comboid) + @max_combo_id
, g.comboid
, @k + 1
, b.val
from b
join g
on b.val > g.maxval
-- Add the members of the base combo
insert #combos (comboid, basecomboid, combosize, val)
select l.comboid
, l.basecomboid
, l.combosize
, b.val
from #combos l
join #combos b
on b.comboid = l.basecomboid
where l.combosize = @k + 1
set @k = @k + 1
end
end
go
go
-- Input table
declare @t table (ids int, idk int)
insert into @t (ids, idk) values
(1, 2), (1, 3), (1, 4),
(2, 2), (2, 3), (2, 4),
(3, 2), (3, 5), (3, 4),
(4, 2), (4, 3);
-- Valid combinations with number of occurrences
declare @combinations table (comboid int, val int, cnt int)
-- Iterate over all ids
declare cur cursor for select distinct ids from @t
open cur
declare @ids int
while 1=1
begin
fetch from cur into @ids
if @@FETCH_STATUS <> 0
break
-- Calculate combinations for this ids
truncate table #combos
insert into #combos (comboid, val) select 1, idk from @t where ids = @ids
exec dbo.sp_Combinations
-- Increase count of existing combinations
update u
set cnt = cnt + 1
from @combinations u
where exists
(
select *
from @combinations a
full join
#combos b
on a.val = b.val
where a.comboid = u.comboid
group by
a.comboid
having max(case when a.val is null then 1 else 0 end) = 0
and max(case when b.val is null then 1 else 0 end) = 0
)
-- Insert new combinations
declare @max_combo_id int
select @max_combo_id = isnull(max(comboid),0) from @combinations
insert @combinations
(comboid, val, cnt)
select n.comboid + @max_combo_id
, n.val
, 1
from #combos n
where not exists
(
select *
from @combinations a
full join
#combos b
on a.val = b.val
where b.comboid = n.comboid
group by
b.comboid
having max(case when a.val is null then 1 else 0 end) = 0
and max(case when b.val is null then 1 else 0 end) = 0
)
end
close cur
deallocate cur
-- Display result
select x.r as combination
, cnt as occurrences
from (
select distinct comboid
, cnt
from @combinations
) a
cross apply
(
select val as [text()]
from @combinations c
where c.comboid = a.comboid
for xml path('')
) x(r)
where a.cnt > 1
order by
len(x.r) desc
, cnt desc
For your example data set, this prints:
combination occurrences
234 2
23 3
24 3
34 2
2 4
3 3
4 3
精彩评论