follow loading progress of huge XML files
I try to follow the loading progress of big XML files (I'm not the provider of these files) in dotnet (C#, framework 3.5 SP1) : from 1 MB to 300 MB over a network file share.
I use an XmlReader for loading purpose instead of direct XmlDocument.Load method for speed up the loading process.开发者_如何学Python
By the way I found nowhere on internet/document on how to follow this loading progress: no delegates/events seem exist. Is there any way to perform this task ? Having the kind of functionnality for XML saving purpose could be a nice to have.
Thanks
Assuming you are reading from a stream here is a (non-perfect) example of how to do it... Basically the ProgressStreamWrapper wraps the file stream and raises an event whenever Position is changed.
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Reading big file...");
FileStream fileStream = File.OpenRead("c:\\temp\\bigfile.xml");
ProgressStreamWrapper progressStreamWrapper = new ProgressStreamWrapper(fileStream);
progressStreamWrapper.PositionChanged += (o, ea) => Console.WriteLine((double) progressStreamWrapper.Position / progressStreamWrapper.Length * 100 + "% complete");
XmlReader xmlReader = XmlReader.Create(progressStreamWrapper);
while (xmlReader.Read())
{
//read the xml document
}
Console.WriteLine("DONE");
Console.ReadLine();
}
}
public class ProgressStreamWrapper : Stream, IDisposable
{
public ProgressStreamWrapper(Stream innerStream)
{
InnerStream = innerStream;
}
public Stream InnerStream { get; private set; }
public override void Close()
{
InnerStream.Close();
}
void IDisposable.Dispose()
{
base.Dispose();
InnerStream.Dispose();
}
public override void Flush()
{
InnerStream.Flush();
}
public override IAsyncResult BeginRead(byte[] buffer, int offset, int count, AsyncCallback callback, object state)
{
return InnerStream.BeginRead(buffer, offset, count, callback, state);
}
public override int EndRead(IAsyncResult asyncResult)
{
int endRead = InnerStream.EndRead(asyncResult);
OnPositionChanged();
return endRead;
}
public override IAsyncResult BeginWrite(byte[] buffer, int offset, int count, AsyncCallback callback, object state)
{
return InnerStream.BeginWrite(buffer, offset, count, callback, state);
}
public override void EndWrite(IAsyncResult asyncResult)
{
InnerStream.EndWrite(asyncResult);
OnPositionChanged(); ;
}
public override long Seek(long offset, SeekOrigin origin)
{
long seek = InnerStream.Seek(offset, origin);
OnPositionChanged();
return seek;
}
public override void SetLength(long value)
{
InnerStream.SetLength(value);
}
public override int Read(byte[] buffer, int offset, int count)
{
int read = InnerStream.Read(buffer, offset, count);
OnPositionChanged();
return read;
}
public override int ReadByte()
{
int readByte = InnerStream.ReadByte();
OnPositionChanged();
return readByte;
}
public override void Write(byte[] buffer, int offset, int count)
{
InnerStream.Write(buffer, offset, count);
OnPositionChanged();
}
public override void WriteByte(byte value)
{
InnerStream.WriteByte(value);
OnPositionChanged();
}
public override bool CanRead
{
get { return InnerStream.CanRead; }
}
public override bool CanSeek
{
get { return InnerStream.CanSeek; }
}
public override bool CanTimeout
{
get { return InnerStream.CanTimeout; }
}
public override bool CanWrite
{
get { return InnerStream.CanWrite; }
}
public override long Length
{
get { return InnerStream.Length; }
}
public override long Position
{
get { return InnerStream.Position; }
set
{
InnerStream.Position = value;
OnPositionChanged();
}
}
public event EventHandler PositionChanged;
protected virtual void OnPositionChanged()
{
if (PositionChanged != null)
{
PositionChanged(this, EventArgs.Empty);
}
}
public override int ReadTimeout
{
get { return InnerStream.ReadTimeout; }
set { InnerStream.ReadTimeout = value; }
}
public override int WriteTimeout
{
get { return InnerStream.WriteTimeout; }
set { InnerStream.WriteTimeout = value; }
}
}
With the inbuilt loaders there isn't much; you could, however, write an intercepting stream - load your document from this stream, and expose the Position
via events? i.e. raise an event in the Read
method (at intervals)?
Here's an example that supports updates during both read and write:
using System;
using System.IO;
using System.Xml;
class ChattyStream : Stream
{
private Stream baseStream;
public ChattyStream(Stream baseStream)
{
if (baseStream == null) throw new ArgumentNullException("baseStream");
this.baseStream = baseStream;
updateInterval = 1000;
}
public event EventHandler ProgressChanged;
protected virtual void OnProgressChanged()
{
var handler = ProgressChanged;
if (handler != null) handler(this, EventArgs.Empty);
}
private void CheckDisposed()
{
if (baseStream == null) throw new ObjectDisposedException(GetType().Name);
}
protected Stream BaseStream
{
get { CheckDisposed(); return baseStream; }
}
int pos, updateInterval;
public int UpdateInterval
{
get { return updateInterval; }
set
{
if (value <= 0) throw new ArgumentOutOfRangeException("value");
updateInterval = value;
}
}
protected void Increment(int value)
{
if (value > 0)
{
pos += value;
if (pos >= updateInterval)
{
OnProgressChanged();
pos = pos % updateInterval;
}
}
}
public override int Read(byte[] buffer, int offset, int count)
{
int result = BaseStream.Read(buffer, offset, count);
Increment(result);
return result;
}
public override void Write(byte[] buffer, int offset, int count)
{
BaseStream.Write(buffer, offset, count);
Increment(count);
}
public override void SetLength(long value)
{
BaseStream.SetLength(value);
}
public override void Flush()
{
BaseStream.Flush();
}
public override long Position
{
get { return BaseStream.Position; }
set { BaseStream.Position = value; }
}
public override long Seek(long offset, SeekOrigin origin)
{
return BaseStream.Seek(offset, origin);
}
public override long Length
{
get { return BaseStream.Length; }
}
public override bool CanWrite
{
get { return BaseStream.CanWrite; }
}
public override bool CanRead
{
get { return BaseStream.CanRead; }
}
public override bool CanSeek
{
get { return BaseStream.CanSeek; }
}
protected override void Dispose(bool disposing)
{
if (disposing && baseStream != null)
{
baseStream.Dispose();
}
baseStream = null;
base.Dispose(disposing);
}
public override void Close()
{
if (baseStream != null) baseStream.Close();
base.Close();
}
public override int ReadByte()
{
int val = BaseStream.ReadByte();
if (val >= 0) Increment(1);
return val;
}
public override void WriteByte(byte value)
{
BaseStream.WriteByte(value);
Increment(1);
}
}
static class Program
{
static void Main()
{
/* invent some big data */
const string path = "bigfile";
if (File.Exists(path)) File.Delete(path);
using (var chatty = new ChattyStream(File.Create(path)))
{
chatty.ProgressChanged += delegate
{
Console.WriteLine("Writing: " + chatty.Position);
};
using (var writer = XmlWriter.Create(chatty))
{
writer.WriteStartDocument();
writer.WriteStartElement("xml");
for (int i = 0; i < 50000; i++)
{
writer.WriteElementString("add", i.ToString());
}
writer.WriteEndElement();
writer.WriteEndDocument();
}
chatty.Close();
}
/* read it */
using (var chatty = new ChattyStream(File.OpenRead("bigfile")))
{
chatty.ProgressChanged += delegate
{
Console.WriteLine("Reading: " + chatty.Position);
};
// now read "chatty" with **any** API; XmlReader, XmlDocument, XDocument, etc
XmlDocument doc = new XmlDocument();
doc.Load(chatty);
}
}
}
How about using DataSet.Read()?
or,
// Create the document.
XmlDocument doc = new XmlDocument();
doc.Load(file);
// Loop through all the nodes, and create the list of Product objects .
List<Product> products = new List<Product>();
foreach (XmlElement element in doc.DocumentElement.ChildNodes)
{
Product newProduct = new Product();
newProduct.ID = Int32.Parse(element.GetAttribute("ID"));
newProduct.Name = element.GetAttribute("Name");
// If there were more than one child node, you would probably use
// another For Each loop here and move through the
// Element.ChildNodes collection.
newProduct.Price = Decimal.Parse(element.ChildNodes[0].InnerText);
products.Add(newProduct);
}
精彩评论