fast load big object graph from DB
Hi
I have my own data structure written in C# like:public class ElectricScheme
{
public List<Element> Elements { get; set; }
public List<Net> Nets { get; set; }
}
public class Element
{
public string IdName { get; set; }
public string Func { get; set; }
public string Name { get; set; }
public BaseElementType Type { get; set; }
public List<Pin> Pins { get; set; }
}
public class Pin
{
public string IdName { get; set; }
public string Name { get; set; }
public BasePinType PinType { get; set; }
public BasePinDirection PinDirection { get; set; }
}
public class Net
{
public string IdName { get; set; }
public string Name { get; set; }
public List<Tuple<Element,Pin>> ConnectionPoints { get; set; }
}
Where Elements count ~19000, each element contain >=3 Pin,
Nets count ~20000, each net contain >=3 pair (Element, Pin)Parse txt (file size ~17mb) file takes 5 minutes. Serilization / Deserialization by default serializ开发者_C百科er ~3 minutes. Load from DB 20 minutes and not loaded... I use Entity Framework like
public ElectricScheme LoadScheme(int schemeId)
{
var eScheme = (from s in container.ElectricSchemesSet
where s.IdElectricScheme.Equals(schemeId)
select s).FirstOrDefault();
if (eScheme == null) return null;
container.LoadProperty(eScheme, "Elements");
container.LoadProperty(eScheme, "Nets");
container.LoadProperty(eScheme, "Elements.Pins");
return eScheme;
}
The problem is dependencies between Element and Pin... (for ~19000 elements ~95000 pins)
Any ideas?
Task solution:
Database structure(MS SQL CE):
-- Creating table 'ElectricSchemesSet'
CREATE TABLE [ElectricSchemesSet] (
[IdElectricScheme] int NOT NULL,
[Name] nvarchar(4000) NOT NULL,
[SourceFileHash] nvarchar(4000) NOT NULL,
[CreatedDate] datetime NOT NULL,
[LastUpdatedDate] datetime NOT NULL
);
-- Creating table 'Nets'
CREATE TABLE [Nets] (
[IdNet] int NOT NULL,
[IdName] nvarchar(4000) NOT NULL,
[Name] nvarchar(4000) NOT NULL,
[ElectricSchemes_IdElectricScheme] int NOT NULL
);
-- Creating table 'Elements'
CREATE TABLE [Elements] (
[IdElement] int NOT NULL,
[Func] nvarchar(4000) NULL,
[IdName] nvarchar(4000) NOT NULL,
[Name] nvarchar(4000) NOT NULL,
[Type] nvarchar(4000) NOT NULL,
[ElectricSchemes_IdElectricScheme] int NOT NULL
);
-- Creating table 'Pins'
CREATE TABLE [Pins] (
[IdPin] int NOT NULL,
[IdName] nvarchar(4000) NOT NULL,
[Name] nvarchar(4000) NOT NULL,
[Direction] nvarchar(4000) NOT NULL,
[Type] nvarchar(4000) NOT NULL,
[ElementsIdElement] int NOT NULL,
[ElectricSchemesIdElectricScheme] int NOT NULL
);
-- Creating table 'NetConnections'
CREATE TABLE [NetConnections] (
[IdNetConnections] int NOT NULL,
[NetsIdNet] int NOT NULL,
[ElementsIdElement] int NOT NULL,
[PinsIdPin] int NOT NULL,
[ElectricSchemesIdElectricScheme] int NOT NULL
);
Loading using Entity Framework (2.08min in single thread, 1.48min for dual core) :
public ElectricScheme LoadScheme(int schemeId)
{
var eScheme = (from s in container.ElectricSchemesSet
where s.IdElectricScheme.Equals(schemeId)
select s).FirstOrDefault();
if (eScheme == null)
{
return null;
}
TaskFactory tf = new TaskFactory();
Elements[] elems = null;
IGrouping<int, Pins>[] pins = null;
List<Element> mElements = null;
var loadElements = tf.StartNew(() =>
elems = (from e in container.Elements
where e.ElectricSchemes.IdElectricScheme.Equals(schemeId)
select e).ToArray());
var loadPins = tf.StartNew(() =>
pins = (from p in container.Pins
where p.ElectricSchemesIdElectricScheme.Equals(schemeId)
select p)
.GroupBy(x => x.ElementsIdElement).ToArray());
var buildElements = tf.ContinueWhenAll(
new Task[] {loadElements, loadPins},
delegate { mElements = Builder.BuildElement(elems, pins); });
Nets[] net = null;
IGrouping<int, NetConnections>[] nConn = null;
List<Net> mNet = null;
var loadNet =tf.StartNew(() =>
net = (from n in container.Nets
where n.ElectricSchemes.IdElectricScheme.Equals(schemeId)
select n).ToArray());
var loadConn = tf.StartNew(() =>
nConn = (from c in container.NetConnections
where c.ElectricScheme.IdElectricScheme.Equals(schemeId)
select c)
.GroupBy(x => x.NetsIdNet).ToArray());
var buildNet = tf.ContinueWhenAll(
new Task[] {loadNet, loadConn},
delegate { mNet = Builder.BuildNet(net, nConn); });
ElectricScheme scheme = null;
var buildScheme = tf.ContinueWhenAll(new Task[] {buildElements, buildNet},
delegate { scheme = Builder.BuildScheme(mNet, mElements, eScheme.IdElectricScheme); });
buildScheme.Wait();
return scheme;
}
Save()(45seconds):
public void SaveUsingPureSql(string connectionString)
{
using (SqlCeConnection connection = new SqlCeConnection(connectionString))
{
connection.Open();
using (var transaction = connection.BeginTransaction())
{
SaveSchemeToTable(connection, transaction);
SaveElementsAndPinsToTable(connection, transaction);
SaveNetWithConnectionsToTable(connection, transaction);
transaction.Commit();
}
}
}
private void SaveNetWithConnectionsToTable(SqlCeConnection conn, SqlCeTransaction transaction)
{
string insertNetsQuery = "INSERT INTO Nets VALUES (@id,@IdName,@Name,@ElectricSchemes_IdElectricSchemes);";
string insertNetConnectionQuery = "INSERT INTO NetConnections VALUES (@id,@NetsIdNet,@ElementsIdElemtnt,@PinsIdPin,@ElectricSchemesIdElectricScheme);";
SqlCeCommand netsCommand = new SqlCeCommand(insertNetsQuery, conn, transaction);
SqlCeCommand netConnectionCommand = new SqlCeCommand(insertNetConnectionQuery, conn, transaction);
netsCommand.Parameters.Add("@id", SqlDbType.BigInt);
netsCommand.Parameters.Add("@ElectricSchemes_IdElectricSchemes", SqlDbType.BigInt);
netsCommand.Parameters.Add("@IdName", SqlDbType.NVarChar);
netsCommand.Parameters.Add("@Name", SqlDbType.NVarChar);
netConnectionCommand.Parameters.Add("@id", SqlDbType.BigInt);
netConnectionCommand.Parameters.Add("@NetsIdNet", SqlDbType.BigInt);
netConnectionCommand.Parameters.Add("@ElementsIdElemtnt", SqlDbType.BigInt);
netConnectionCommand.Parameters.Add("@PinsIdPin", SqlDbType.BigInt);
netConnectionCommand.Parameters.Add("@ElectricSchemesIdElectricScheme", SqlDbType.BigInt);
foreach (var net in scheme.Nets)
{
net.Id = lastNetId++;
netsCommand.Parameters["@id"].Value = net.Id;
netsCommand.Parameters["@ElectricSchemes_IdElectricSchemes"].Value = lastSchemeId;
netsCommand.Parameters["@IdName"].Value = net.IdName;
netsCommand.Parameters["@Name"].Value = net.Name;
netsCommand.ExecuteNonQuery();
foreach (var point in net.ConnectionPoints)
{
netConnectionCommand.Parameters["@id"].Value = lastPinConnectionId++;
netConnectionCommand.Parameters["@NetsIdNet"].Value = net.Id;
netConnectionCommand.Parameters["@ElementsIdElemtnt"].Value = point.Item1.Id;
netConnectionCommand.Parameters["@PinsIdPin"].Value = point.Item2.Id;
netConnectionCommand.Parameters["@ElectricSchemesIdElectricScheme"].Value = lastSchemeId;
netConnectionCommand.ExecuteNonQuery();
}
}
}
private void SaveSchemeToTable(SqlCeConnection conn, SqlCeTransaction transaction)
{
string query = "INSERT INTO ElectricSchemesSet VALUES (@id,@name,@SourceFileHash,@CreatedDate,@LastUpdatedDate);";
SqlCeCommand command = new SqlCeCommand(query,conn,transaction);
ValueType timeNow = DateTime.Now;
scheme.Id = lastSchemeId;
command.Parameters.AddWithValue("@id", scheme.Id);
command.Parameters.AddWithValue("@name", name);
command.Parameters.AddWithValue("@SourceFileHash", sourceFileHash);
command.Parameters.AddWithValue("@CreatedDate", timeNow);
command.Parameters.AddWithValue("@LastUpdatedDate", timeNow);
command.ExecuteNonQuery();
}
private void SaveElementsAndPinsToTable(SqlCeConnection conn, SqlCeTransaction transaction)
{
string insertElementQuery =
"INSERT INTO Elements VALUES (@id,@Func,@IdName,@Name,@Type,@ElectricSchemes_IdElectricSchemes);";
string insertPinQuery =
"INSERT INTO Pins VALUES (@id,@IdName,@Name,@Direction,@Type,@ElementIdElement, @ElectricSchemesIdElectricSchemes);";
var elementCommand = new SqlCeCommand(insertElementQuery, conn, transaction);
elementCommand.Parameters.Add("@id", SqlDbType.BigInt);
elementCommand.Parameters.Add("@ElectricSchemes_IdElectricSchemes", SqlDbType.BigInt);
elementCommand.Parameters.Add("@Func", SqlDbType.NVarChar);
elementCommand.Parameters.Add("@IdName", SqlDbType.NVarChar);
elementCommand.Parameters.Add("@Name", SqlDbType.NVarChar);
elementCommand.Parameters.Add("@Type", SqlDbType.NVarChar);
var pinCommand = new SqlCeCommand(insertPinQuery, conn, transaction);
pinCommand.Parameters.Add("@id", SqlDbType.BigInt);
pinCommand.Parameters.Add("@ElementIdElement", SqlDbType.BigInt);
pinCommand.Parameters.Add("@ElectricSchemesIdElectricSchemes", SqlDbType.BigInt);
pinCommand.Parameters.Add("@IdName", SqlDbType.NVarChar);
pinCommand.Parameters.Add("@Name", SqlDbType.NVarChar);
pinCommand.Parameters.Add("@Direction", SqlDbType.NVarChar);
pinCommand.Parameters.Add("@Type", SqlDbType.NVarChar);
foreach (var element in scheme.Elements)
{
element.Id = lastElementId++;
elementCommand.Parameters["@id"].Value = element.Id;
elementCommand.Parameters["@ElectricSchemes_IdElectricSchemes"].Value = lastSchemeId;
elementCommand.Parameters["@Func"].Value = element.Func;
elementCommand.Parameters["@IdName"].Value = element.IdName;
elementCommand.Parameters["@Name"].Value = element.Name;
elementCommand.Parameters["@Type"].Value = element.Type.ToString();
elementCommand.ExecuteNonQuery();
foreach (var pin in element.Pins)
{
pin.Id = lastPinId++;
pinCommand.Parameters["@id"].Value = pin.Id;
pinCommand.Parameters["@ElementIdElement"].Value = element.Id;
pinCommand.Parameters["@ElectricSchemesIdElectricSchemes"].Value = lastSchemeId;
pinCommand.Parameters["@IdName"].Value = pin.IdName;
pinCommand.Parameters["@Name"].Value = pin.Name;
pinCommand.Parameters["@Direction"].Value = pin.PinDirection.ToString();
pinCommand.Parameters["@Type"].Value = pin.PinType.ToString();
pinCommand.ExecuteNonQuery();
}
}
}
I see your post is a little older, but I will add another approach for posterity ;-)
It might be faster than your fetch and will only help you for fetches, not saves...
You could create a stored procedure that returns multiple result sets and use EF to automatically load the unrelated collections(in my case my sp is names searchJobs):
var conn = Database.DefaultConnectionFactory.CreateConnection(GetConnectionString(context));
try
{
conn.Open();
using (var cmd = conn.CreateCommand())
{
cmd.CommandText = "SearchJobs";
cmd.CommandType = CommandType.StoredProcedure;
AddParameter(cmd, "serviceProviderId", filter.ServiceProviderId);
AddParameter(cmd, "service", filter.Service);
AddParameter(cmd, "statuses", string.Join(",", filter.Statuses));
AddParameter(cmd, "deadlineFrom", filter.DeadlineFrom);
AddParameter(cmd, "deadlineTo", filter.DeadlineTo);
using (var rdr = cmd.ExecuteReader())
{
//I just need a read-only list for viewing, so I won’t worry about change tracking
//the objects need to be read out of the DbReader before it is closed
jobs = context.ObjectContext.Translate<Job>(rdr, GetEntitySetName<Job>(context), MergeOption.AppendOnly).ToList();
rdr.NextResult();
documentRequests = context.ObjectContext.Translate<DocumentRequest>(rdr, GetEntitySetName<DocumentRequest>(context), MergeOption.AppendOnly).ToList();
rdr.NextResult();
serviceorders = context.ObjectContext.Translate<ServiceOrder>(rdr, GetEntitySetName<ServiceOrder>(context), MergeOption.AppendOnly).ToList();
}
fetch.Stop();
}
}
finally
{
conn.Close();
}
Then iterate over the unrelated collections and wire them up using simple code, here you could use multi-threading(like your example) to speed things up.
foreach (var job in jobs)
{
job.ServiceOrders = serviceorders.Where(s => s.Job_Id == job.Id).ToList();
job.Request = documentRequests.FirstOrDefault(d => d.Id == job.OfficerRequest_Id);
}
精彩评论