Sql server 2008 r2 - redundant rows
The first query
--645 rows
SELECT *
FROM (
SELECT DISTINCT
cu.*,
ROW_NUMBER() OVER ( ORDER BY cu.Id ) AS RowNum
FROM Customers cu
LEFT JOIN dbo.CustomerCategories cc
ON cc.CustomerId = cu.Id
LEFT JOIN dbo.CustomerServices cs
ON cs.CustomerId = cu.Id
WHERE ( @FullName IS NULL
OR cu.FullName LIKE @FullName
)
AND ( @CategoriesIdXml IS NULL
OR cc.CategoryId IN ( SELECT *
FROM @CategoriesList )
)
AND ( @ServicesIdXml IS NULL
OR cs.ServiceId IN ( SELECT *
FROM @ServicesList )
)
) AS _
WHERE RowNum BETWEEN ( @PageIndex - 1 ) * @PageSize + 1
AND @PageIndex * @PageSiz开发者_JAVA技巧e
The second query
--41 rows
SELECT *
FROM (
SELECT DISTINCT
cu.*,
ROW_NUMBER() OVER ( ORDER BY cu.Id ) AS RowNum
FROM Customers cu
-- LEFT JOIN dbo.CustomerCategories cc
-- ON cc.CustomerId = cu.Id
-- LEFT JOIN dbo.CustomerServices cs
-- ON cs.CustomerId = cu.Id
--WHERE ( @FullName IS NULL
-- OR cu.FullName LIKE @FullName
-- )
-- AND ( @CategoriesIdXml IS NULL
-- OR cc.CategoryId IN ( SELECT *
-- FROM @CategoriesList )
-- )
-- AND ( @ServicesIdXml IS NULL
-- OR cs.ServiceId IN ( SELECT *
-- FROM @ServicesList )
-- )
) AS _
WHERE RowNum BETWEEN ( @PageIndex - 1 ) * @PageSize + 1
AND @PageIndex * @PageSize
The second query returns right result set (41 rows), but the first returns 645 rows which is wrong. But I use DISTINCT
in both queries and I wonder why first returns too much rows.
How do I avoid it?
The DISTINCT is being applied after the creation of the ROW_NUMBER()
As ROW_NUMBER() is different for every row, every row is unique by definition. This means that you appear to have a few options.
Apply the Distinct in one query, then wrap another around it for ROW_NUMBER()
SELECT
*
FROM
(
SELECT
*,
ROW_NUMBER() OVER (ORDER BY id) AS row_num
FROM
(
SELECT DISTINCT
cu.*
FROM
<your query>
)
AS raw_data
)
AS ordered_data
WHERE
RowNum BETWEEN ( @PageIndex - 1 ) * @PageSize + 1
AND @PageIndex * @PageSize
Use GROUP BY instead of DISTINCT
SELECT
*
FROM
(
SELECT DISTINCT
cu.*,
ROW_NUMBER() OVER (ORDER BY id) AS row_num
FROM
<your query>
GROUP BY
cu.id,
cu.field1,
cu.field2,
etc, etc
)
AS ordered_data
WHERE
RowNum BETWEEN ( @PageIndex - 1 ) * @PageSize + 1
AND @PageIndex * @PageSize
ROW_NUMBER
is not right, use DENSE_RANK
instead.
You can see difference here : Difference between ROW_NUMBER, RANK and DENSE_RANK
ROW_NUMBER
will give you different number for the same Customer, and this is not what you want, you need the same value so that your distinct could work.
精彩评论