How do I read data from a spreadsheet using the OpenXML Format SDK?
I need to read data from a single worksheet in an Excel 2007 workbook using the Open XML SDK 2.0. I have spent a lot of time searching for basic guidelines to doing this, but I have only found help on creating spreadsheets.
How do I iterate rows in a worksheet and then iterate the cells in each row, using t开发者_StackOverflow中文版his SDK?
The other answer seemed more like a meta-answer. I have been struggling with this since using LINQ does work with separated document parts. The following code includes a wrapper function to get the value from a Cell, resolving any possible string lookups.
public void ExcelDocTest()
{
Debug.WriteLine("Running through sheet.");
int rowsComplete = 0;
using (SpreadsheetDocument spreadsheetDocument =
SpreadsheetDocument.Open(@"path\to\Spreadsheet.xlsx", false))
{
WorkbookPart workBookPart = spreadsheetDocument.WorkbookPart;
foreach (Sheet s in workBookPart.Workbook.Descendants<Sheet>())
{
WorksheetPart wsPart = workBookPart.GetPartById(s.Id) as WorksheetPart;
Debug.WriteLine("Worksheet {1}:{2} - id({0}) {3}", s.Id, s.SheetId, s.Name,
wsPart == null ? "NOT FOUND!" : "found.");
if (wsPart == null)
{
continue;
}
Row[] rows = wsPart.Worksheet.Descendants<Row>().ToArray();
//assumes the first row contains column names
foreach (Row row in wsPart.Worksheet.Descendants<Row>())
{
rowsComplete++;
bool emptyRow = true;
List<object> rowData = new List<object>();
string value;
foreach (Cell c in row.Elements<Cell>())
{
value = GetCellValue(c);
emptyRow = emptyRow && string.IsNullOrWhiteSpace(value);
rowData.Add(value);
}
Debug.WriteLine("Row {0}: {1}", row,
emptyRow ? "EMPTY!" : string.Join(", ", rowData));
}
}
}
Debug.WriteLine("Done, processed {0} rows.", rowsComplete);
}
public static string GetCellValue(Cell cell)
{
if (cell == null)
return null;
if (cell.DataType == null)
return cell.InnerText;
string value = cell.InnerText;
switch (cell.DataType.Value)
{
case CellValues.SharedString:
// For shared strings, look up the value in the shared strings table.
// Get worksheet from cell
OpenXmlElement parent = cell.Parent;
while (parent.Parent != null && parent.Parent != parent
&& string.Compare(parent.LocalName, "worksheet", true) != 0)
{
parent = parent.Parent;
}
if (string.Compare(parent.LocalName, "worksheet", true) != 0)
{
throw new Exception("Unable to find parent worksheet.");
}
Worksheet ws = parent as Worksheet;
SpreadsheetDocument ssDoc = ws.WorksheetPart.OpenXmlPackage as SpreadsheetDocument;
SharedStringTablePart sstPart = ssDoc.WorkbookPart.GetPartsOfType<SharedStringTablePart>().FirstOrDefault();
// lookup value in shared string table
if (sstPart != null && sstPart.SharedStringTable != null)
{
value = sstPart.SharedStringTable.ElementAt(int.Parse(value)).InnerText;
}
break;
//this case within a case is copied from msdn.
case CellValues.Boolean:
switch (value)
{
case "0":
value = "FALSE";
break;
default:
value = "TRUE";
break;
}
break;
}
return value;
}
Edit: Thanks @Nitin-Jadhav for the correction to GetCellValue().
The way I do this is with Linq. There are lots of sample around on this subject from using the SDK to just going with pure Open XML (no SDK). Take a look at:
- Office Open XML Formats: Retrieving Excel 2007 Cell Values (uses pure OpenXML, not SDK, but the concepts are really close)
- Using LINQ to Query Tables in Excel 2007 (uses Open XML SDK, assumes ListObject)
- Reading Data from SpreadsheetML (probably best "overall introduction" article)
精彩评论