开发者

Nested Linq performance

We have 2 tables (tblSerials and tblRequests). The question is how can we make this code to execute faster? As you can see "from request in ctx.tblRequests" parts are very similar. Any other better solution?

var list = from s in ctx.tblSerials
        orderby s.CreateDate descending
        select new
        {
         SerialGUID = s.SerialGUID,
         CreateDate = s.CreateDate,
         NumberOfAllRequests = 
            (from request in ctx.tblRequests
              where request.Seri开发者_如何学CalGUID.ToLower().Equals(s.SerialGUID.ToLower())
              select request.RequestGUID).Count(),
         NumberOfAllRequestsDistinctByMachine = 
            (from request in ctx.tblRequests
              where request.SerialGUID.ToLower().Equals(s.SerialGUID.ToLower())
              select request.MachineCode).Distinct().Count(),                    
        };


If you don't care about seeing the results that have 0 count in tblRequests you could use this query:

tblSerials
    .Join(
        tblRequests,
        x => x.SerialGUID.ToLower(),
        x => x.SerialGUID.ToLower(),
        (o,i) => new { Serial = o, Request = i }
    ).GroupBy(x => x.Serial)
    .Select(x => new {
        SerialGUID = x.Key.SerialGUID,
        CreateDate = x.Key.CreateDate,
        NumberOfAllRequests = x.Select(y => y.Request.RequestGUID).Count(),
        NumberOfAllRequestsDistinctByMachine = x.Select(y => y.Request.MachineCode).Distinct().Count()
    }).OrderByDescending(x => x.CreateDate);

If you did want those results you could easily pull one of the tables into memory so you could use a multi-line lambda function:

tblSerials
    .ToList().Select(x => {
    var requests = tblRequests.Where(y => y.SerialGUID.ToLower().Equals(x.SerialGUID.ToLower()));
    return  new {
                SerialGUID = x.SerialGUID,
                CreateDate = x.CreateDate,
                NumberOfAllRequests = requests.Count(),
                NumberOfAllRequestsDistinctByMachine = requests.Select(y => y.MachineCode).Distinct().Count()
            };
    }).OrderByDescending(x => x.CreateDate);

Or if tblSerials is too large you could use an outer join:

(from s in tblSerials
from r in tblRequests
    .Where(x => x.SerialGUID.ToLower().Equals(s.SerialGUID.ToLower()))
    .DefaultIfEmpty()
select new { Serial = s, Request = r })
.GroupBy(x => x.Serial)
    .Select(x => new {
        SerialGUID = x.Key.SerialGUID,
        CreateDate = x.Key.CreateDate,
        NumberOfAllRequests = x.Any(y => y.Request != null) ? x.Select(y => y.Request.RequestGUID).Count() : 0,
        NumberOfAllRequestsDistinctByMachine = x.Any(y => y.Request != null) ? x.Select(y => y.Request.MachineCode).Distinct().Count() : 0
    }).OrderByDescending(x => x.CreateDate);

NOTE: I do not actually have any performance comparisons and cannot guarantee that any of these are faster than yours, they are just suggestions.

(if you have linqpad installed you can compare the results of all of these queries and your own with some dummy data in the following script:

var tblSerials = new [] {
new { CreateDate = DateTime.Today.AddDays(-2), SerialGUID = "foo" },
new { CreateDate = DateTime.Today.AddDays(-3), SerialGUID = "bar" },
new { CreateDate = DateTime.Today.AddDays(-2), SerialGUID = "foobar" },
new { CreateDate = DateTime.Today.AddDays(-1), SerialGUID = "foo" }
};

var tblRequests = new [] {
new { SerialGUID = "foo", RequestGUID = "hi", MachineCode = "1" },
new { SerialGUID = "bar", RequestGUID = "yo", MachineCode = "2" },
new { SerialGUID = "foo", RequestGUID = "hello", MachineCode = "1" },
new { SerialGUID = "baz", RequestGUID = "yeah", MachineCode = "3" }
};

tblSerials
    .Join(
        tblRequests,
        x => x.SerialGUID.ToLower(),
        x => x.SerialGUID.ToLower(),
        (o,i) => new { Serial = o, Request = i }
    ).GroupBy(x => x.Serial)
    .Select(x => new {
        SerialGUID = x.Key.SerialGUID,
        CreateDate = x.Key.CreateDate,
        NumberOfAllRequests = x.Select(y => y.Request.RequestGUID).Count(),
        NumberOfAllRequestsDistinctByMachine = x.Select(y => y.Request.MachineCode).Distinct().Count()
    }).OrderByDescending(x => x.CreateDate).Dump();

(from s in tblSerials
from r in tblRequests
    .Where(x => x.SerialGUID.ToLower().Equals(s.SerialGUID.ToLower()))
    .DefaultIfEmpty()
select new { Serial = s, Request = r })
.GroupBy(x => x.Serial)
    .Select(x => new {
        SerialGUID = x.Key.SerialGUID,
        CreateDate = x.Key.CreateDate,
        NumberOfAllRequests = x.Any(y => y.Request != null) ? x.Select(y => y.Request.RequestGUID).Count() : 0,
        NumberOfAllRequestsDistinctByMachine = x.Any(y => y.Request != null) ? x.Select(y => y.Request.MachineCode).Distinct().Count() : 0
    }).OrderByDescending(x => x.CreateDate).Dump();

tblSerials
    .ToList().Select(x => {
    var requests = tblRequests.Where(y => y.SerialGUID.ToLower().Equals(x.SerialGUID.ToLower()));
    return  new {
                SerialGUID = x.SerialGUID,
                CreateDate = x.CreateDate,
                NumberOfAllRequests = requests.Count(),
                NumberOfAllRequestsDistinctByMachine = requests.Select(y => y.MachineCode).Distinct().Count()
            };
    }).OrderByDescending(x => x.CreateDate).Dump();

var list = from s in tblSerials
        orderby s.CreateDate descending
        select new
        {
         SerialGUID = s.SerialGUID,
         CreateDate = s.CreateDate,
         NumberOfAllRequests = 
            (from request in tblRequests
              where request.SerialGUID.ToLower().Equals(s.SerialGUID.ToLower())
              select request.RequestGUID).Count(),
         NumberOfAllRequestsDistinctByMachine = 
            (from request in tblRequests
              where request.SerialGUID.ToLower().Equals(s.SerialGUID.ToLower())
              select request.MachineCode).Distinct().Count(),                    
        };

list.Dump();


My experience is that Linq to SQL always means a big performance hit, especially with many requests, so consider the option of using stored procedures on the database server instead, and just taking the result into a new List with ToList() on the (possibly, depending on what methods you use) Queryable result.


If performance is your goal, what does the query plan generated by SQL Server look like? Step five of this article provides some help in how to find the query plan. It's possible that after you accept some of the changes in the other answers that the slowdown will not be because of L2Sql, but because of an improperly optomized database structure.

For example, is SerialGuid indexed? If there are enough entries in the table, getting the query to search an index can provide an order of magnitude speed up.


If you join with tblRequests, you could change your select into this:

select new
{
    SerialGUID = s.SerialGUID,
    CreateDate = s.CreateDate,
    RequestGUID = request.RequestGUID,
    MachineCode = request.MachineCode
};

Then, you have a single query. After retrieving these results, you can then 'manually' aggregate them over the result set and do you Count() and Distinct().Count() yourself.


The simplest way to make these fast would be to bring the data into memory. However, even though it is often quite likely that query result will fit into memory, the source data may not. So the trick is to limit the data loaded into memory but processing where clauses on the database, but do your join clauses in memory.

In this case, though, there are no where clauses to push to the database, so I'm going to assume that the entire tblSerials & tblRequests will fit into memory.

So here's what to do:

var serials =
    ctx.tblSerials.ToArray();

var requests =
    ctx.tblRequests.ToArray();

var list =
    from s in serials 
    orderby s.CreateDate descending
    let rs =
        (from request in requests
         where request.SerialGUID.ToLower().Equals(s.SerialGUID.ToLower())
         select request).ToArray()
    let NumberOfAllRequests =
        rs.Count()
    let NumberOfAllRequestsDistinctByMachine =
        rs.Select(r => r.MachineCode).Distinct().Count()
    select new
    {
        s.SerialGUID,
        s.CreateDate,
        NumberOfAllRequests,
        NumberOfAllRequestsDistinctByMachine,
    };

That should be the fastest way to do your query - provided that it can load in to memory.


I'm not sure if it's smart enough to create joins using the nested queries, you could try using joins instead to see if it helps.

like so:

    var requestsByMachine = (from request in ctx.tblRequests
                            group request by request.MachineCode into g
                            select new { serial = request.SerialGuid, amount = g.Distinct().Count() }).ToArray();

    var requestsByGuid = (from request in ctx.tblRequests
                         group request by request.requestGUID into g
                         select new { serial = request.SerialGuid, amount = g.Count()}).ToArray();

    var list = from s in ctx.tblSerials.ToArray()
               join r1 in requestsByGuid on s.SerialGUID equals r1.serial
               join r2 in requestsByMachine on s.SerialGUID equals r2.serial
               orderby s.CreateDate descending
               select new
               {
                   SerialGUID = s.SerialGUID,
                   CreateDate = s.CreateDate,
                   NumberOfAllRequests = r1.amount,
                   NumberOfAllRequestsDistinctByMachine = r2.amount
               };

Can't try it myself at the moment, but you could check in sql profiler to see if creates a single query.

Edit: Modified to query in memory

0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜