Join One Table To Get Random Rows from 2nd Table
I have two tables: #tmpParams, which lists 29 combinations of Disability and Grade. The 2nd table, #tmpSource contains many students with their StudentID, Disability, and Grade. I need to select 29 random rows from #tmpSource, joining on #tmpParams by Disability and Grade.
In other words, someone handed me a list of parameters for 29 students (Disability and Grade) and they want 29 random students from #tmpSource, including StudentID, matching on the two fields.
This is the DDL:
IF OBJECT_ID('tempdb..#tmpParams') IS NOT NULL
BEGIN
DROP TABLE #tmpParams
END
GO
IF OBJECT_ID('tempdb..#tmpSource') IS NOT NULL
BEGIN
DROP TABLE #tmpSource
END
GO
/* Create parameter table */
CREATE TABLE #tmpParams
(
Disability varchar(10)
,Grade varchar(10)
)
GO
/* populate it */
INSERT INTO #tmpParams
(Disability, Grade)
SELECT '13 - AUT' ,'Grade 1' UNION ALL
SELECT '08 - SL' ,'Grade 1' UNION ALL
SELECT '11 - PSD' ,'Grade 1' UNION ALL
SELECT '04 - SLD' ,'Grade 2' UNION ALL
SELECT '04 - SLD' ,'Grade 2' UNION ALL
SELECT '08 - SL' ,'Grade 2' UNION ALL
SELECT '08 - SL' ,'Grade 2' UNION ALL
SELECT '08 - SL' ,'Grade 2' UNION ALL
SELECT '14 - TBI' ,'Grade 2' UNION ALL
SELECT '07 - PD' ,'Grade 2' UNION ALL
SELECT '08 - SL' ,'Grade 3' UNION ALL
SELECT '08 - SL' ,'Grade 3' UNION ALL
SELECT '08 - SL' ,'Grade 3' UNION ALL
SELECT '08 - SL' ,'Grade 3' UNION ALL
SELECT '08 - SL' ,'Grade 3' UNION ALL
SELECT '07 - PD' ,'Grade 3' UNION ALL
SELECT '04 - SLD' ,'Grade 4' UNION ALL
SELECT '04 - SLD' ,'Grade 4' UNION ALL
SELECT '08 - SL' ,'Grade 4' UNION ALL
SELECT '08 - SL' ,'Grade 4' UNION ALL
SELECT '08 - SL' ,'Grade 4' UNION ALL
SELECT '08 - SL' ,'Grade 4' UNION ALL
SELECT '08 - SL' ,'Grade 4' UNION ALL
SELECT '04 - SLD' ,'Grade 5' UNION ALL
SELECT '04 - SLD' ,'Grade 5' UNION ALL
SELECT '04 - SLD' ,'Grade 5' UNION ALL
SELECT '04 - SLD' ,'Grade 5' UNION ALL
SELECT '08 - SL' ,'Grade 5' UNION ALL
SELECT '08 - SL' ,'Grade 5';
GO
/* Create table to hold source */
CREATE TABLE #tmpSource
(
StudentID int
,Disability varchar(10)
,Grade varchar(10)
)
GO
INSERT INTO #tmpSource
(StudentID, Disability, Grade)
SELECT '3', '04 - SLD', 'Grade 2' UNION ALL
SELECT '31', '04 - SLD', 'Grade 2' UNION ALL
SELECT '39', '04 - SLD', 'Grade 2' UNION ALL
SELECT '46', '04 - SLD', 'Grade 2' UNION ALL
SELECT '10', '04 - SLD', 'Grade 4' UNION ALL
SELECT '13', '04 - SLD', 'Grade 4' UNION ALL
SELECT '25', '04 - SLD', 'Grade 4' UNION ALL
SELECT '33', '04 - SLD', 'Grade 4' UNION ALL
SELECT '57', '04 - SLD', 'Grade 4' UNION ALL
SELECT '60', '04 - SLD', 'Grade 4' UNION ALL
SELECT '8', '04 - SLD', 'Grade 5' UNION ALL
SELECT '19', '04 - SLD', 'Grade 5' UNION ALL
SELECT '23', '04 - SLD', 'Grade 5' UNION ALL
SELECT '51', '04 - SLD', 'Grade 5' UNION ALL
SELECT '55', '04 - SLD', 'Grade 5' UNION ALL
SELECT '16', '07 - PD', 'Grade 2' UNION ALL
SELECT '28', '07 - PD', 'Grade 2' UNION ALL
SELECT '36', '07 - PD', 'Grade 2' UNION ALL
SELECT '43', '07 - PD', 'Grade 2' UNION ALL
SELECT '11', '07 - PD', 'Grade 3' UNION ALL
SELECT '14', '07 - PD', 'Grade 3' UNION ALL
SELECT '26', '07 - PD', 'Grade 3' UNION ALL
SELECT '34', '07 - PD', 'Grade 3' UNION ALL
SELECT '58', '07 - PD', 'Grade 3' UNION ALL
SELECT '61', '07 - PD', 'Grade 3' UNION ALL
SELECT '5', '08 - SL', 'Grade 1' UNION ALL
SELECT '21', '08 - SL', 'Grade 1' UNION ALL
SELECT '41', '08 - SL', 'Grade 1' UNION ALL
SELECT '48', '08 - SL', 'Grade 1' UNION ALL
SELECT '2', '08 - SL', 'Grade 2' UNION ALL
SELECT '30', '08 - SL', 'Grade 2' UNION ALL
SELECT '38', '08 - SL', 'Grade 2' UNION ALL
SELECT '45', '08 - SL', 'Grade 2' UNION ALL
SELECT '12', '08 - SL', 'Grade 3' UNION ALL
SELECT '15', '08 - SL', 'Grade 3' UNION ALL
SELECT '27', '08 - SL', 'Grade 3' UNION ALL
SELECT '35', '08 - SL', 'Grade 3' UNION ALL
SELECT '59', '08 - SL', 'Grade 3' UNION ALL
SELECT '62', '08 - SL', 'Grade 3' UNION ALL
SELECT '9', '08 - SL', 'Grade 4' UNION ALL
SELECT '20', '08 - SL', 'Grade 4' UNION ALL
SELECT '24', '08 - SL', 'Grade 4' 开发者_运维技巧UNION ALL
SELECT '52', '08 - SL', 'Grade 4' UNION ALL
SELECT '56', '08 - SL', 'Grade 4' UNION ALL
SELECT '7', '08 - SL', 'Grade 5' UNION ALL
SELECT '18', '08 - SL', 'Grade 5' UNION ALL
SELECT '22', '08 - SL', 'Grade 5' UNION ALL
SELECT '50', '08 - SL', 'Grade 5' UNION ALL
SELECT '54', '08 - SL', 'Grade 5' UNION ALL
SELECT '4', '11 - PSD', 'Grade 1' UNION ALL
SELECT '32', '11 - PSD', 'Grade 1' UNION ALL
SELECT '40', '11 - PSD', 'Grade 1' UNION ALL
SELECT '47', '11 - PSD', 'Grade 1' UNION ALL
SELECT '6', '13 - AUT', 'Grade 1' UNION ALL
SELECT '17', '13 - AUT', 'Grade 1' UNION ALL
SELECT '42', '13 - AUT', 'Grade 1' UNION ALL
SELECT '49', '13 - AUT', 'Grade 1' UNION ALL
SELECT '53', '13 - AUT', 'Grade 1' UNION ALL
SELECT '1', '14 - TBI', 'Grade 2' UNION ALL
SELECT '29', '14 - TBI', 'Grade 2' UNION ALL
SELECT '37', '14 - TBI', 'Grade 2' UNION ALL
SELECT '44', '14 - TBI', 'Grade 2';
GO
That populates 29 students into #tmpParams and 62 into #tmpSource. My real-world scenario is, of course, much more complex.
I can get 29 random students from #tmpSource easily enough...but how do I JOIN on #tmpParams to get 29 students that match on the two fields?
SELECT TOP 29
TS.StudentID
,TS.Disability
,TS.Grade
FROM #tmpSource AS TS
ORDER BY NEWID();
GO
As always, thank you for your help, and please let me know if I can clarify something or make this easier to read.
I may be missing something but why not
SELECT TOP 29 TS.StudentID,
TS.Disability,
TS.Grade
FROM #tmpParams P
JOIN #tmpSource TS
ON TS.Disability = P.Disability
AND TS.Grade = P.Grade
ORDER BY NEWID()
Edit
You want this?
SELECT TS.StudentID,
TS.Disability,
TS.Grade
FROM #tmpParams P
CROSS APPLY (SELECT TOP 1 *
FROM #tmpSource TS
WHERE TS.Disability = P.Disability
AND TS.Grade = P.Grade
ORDER BY NEWID()) TS
Edit 2
You want this?
;WITH T1
AS (SELECT *,
Row_number() OVER (PARTITION BY Disability, Grade ORDER BY Newid()) RN
FROM #tmpSource),
T2
AS (SELECT Disability,
Grade,
Row_number() OVER (PARTITION BY Disability, Grade
ORDER BY (SELECT 0)) AS RN
FROM #tmpParams)
SELECT T1.Disability,
T1.Grade,
T1.StudentID
FROM T1
JOIN T2
ON T1.Disability = T2.Disability
AND T2.Grade = T1.Grade
AND T1.RN = T2.RN
with sample as
(
select p.disability, p.grade, count(*) as numOfSamples from #tmpParams P
group by p.disability, p.grade
),
rownumedStuds as
(
select std.id, std.disability, std.grade, row_number() over (partition by std.disability, std.grade order by newID()) num
)
select a.id,a.disability, a.grade
from sample b
inner join rownumedStuds a on a.grade = b.grade and a.disability = b.disability
where num <= numOfSamples
Basically it does calculate number of buckets and number of matches then randomizes each group (if this is perfomance heavy You might loop join it with only the categoriez that are need in the rownumedStuds to randomize only groups You need.
the actual select I hope is pretty straighforward
regards luke
I may have figured this out, although it doesn't appear very elegant:
/* Add identity column to #tmpParams */
ALTER TABLE #tmpParams ADD ID INT IDENTITY (1,1)
/* drop #tmpResults if exists */
IF OBJECT_ID('tempdb..#tmpResults') IS NOT NULL
BEGIN
DROP TABLE #tmpResults
END
GO
/* Create table to hold results */
CREATE TABLE #tmpResults
(
StudentID int
,Disability varchar(10)
,Grade varchar(10)
)
GO
DECLARE @i AS INT; -- set variable to parse through
SET @i = 1
WHILE @i <= (SELECT MAX(ID) FROM #tmpParams) -- Max number in table
BEGIN
INSERT INTO #tmpResults
SELECT TOP 1
TS.StudentID
,TS.Disability
,TS.Grade
FROM #tmpSource AS TS
JOIN #tmpParams AS TP
ON TS.Disability = TP.Disability
AND TS.Grade = TP.Grade
WHERE TP.ID = @i
AND TS.StudentID NOT IN (SELECT StudentID FROM #tmpResults) -- so no duplicates
ORDER BY NEWID()
SET @i = @i + 1
END;
GO
Thanks for your help, Martin, it got me on the right track.
精彩评论