using System;
using System.Collections.Generic;
using System.Text;
using System.Xml;
using System.IO.Packaging;
using System.IO;
namespace WpfSamples {
///
/// Helper class for reading Excel 2007 file format
///
public class ExcelHelper {
///
/// Retrieves an excel table from a given sheet in an xlsx file
///
/// xlsx file name
/// Sheet name (e.g. "Sheet1"
/// table[rows][cols] containing all the table information for the given sheet in the given file
/// Cells with no information are returned as null. The table starts at the first element containing data in the sheet.
public static List> XLGetTable(string fileName, string sheetName) {
// Return the value of the specified cell.
const string documentRelationshipType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
const string worksheetSchema = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
const string sharedStringSchema = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
List> table = new List>();
// Retrieve the stream containing the requested
// worksheet's info:
using (Package xlPackage = Package.Open(fileName, FileMode.Open, FileAccess.Read)) {
PackagePart documentPart = null;
Uri documentUri = null;
// Get the main document part (workbook.xml).
foreach (System.IO.Packaging.PackageRelationship relationship in xlPackage.GetRelationshipsByType(documentRelationshipType)) {
// There should only be one document part in the package.
documentUri = PackUriHelper.ResolvePartUri(new Uri("/", UriKind.Relative), relationship.TargetUri);
documentPart = xlPackage.GetPart(documentUri);
// There should only be one instance, but get out no matter what.
break;
}
if (documentPart == null) {
return table;
}
// Load the contents of the workbook.
XmlDocument doc = new XmlDocument();
doc.Load(documentPart.GetStream());
// Create a namespace manager, so you can search.
// Add a prefix (d) for the default namespace.
NameTable nt = new NameTable();
XmlNamespaceManager nsManager = new XmlNamespaceManager(nt);
nsManager.AddNamespace("d", worksheetSchema);
nsManager.AddNamespace("s", sharedStringSchema);
string searchString = string.Format("//d:sheet[@name='{0}']", sheetName);
XmlNode sheetNode = doc.SelectSingleNode(searchString, nsManager);
if (sheetNode == null) {
// sheet not found
return table;
}
// Get the relId attribute:
XmlAttribute relationAttribute = sheetNode.Attributes["r:id"];
if (relationAttribute == null) {
return table;
}
string relId = relationAttribute.Value;
// First, get the relation between the document and the sheet.
PackageRelationship sheetRelation = documentPart.GetRelationship(relId);
Uri sheetUri = PackUriHelper.ResolvePartUri(documentUri, sheetRelation.TargetUri);
PackagePart sheetPart = xlPackage.GetPart(sheetUri);
// Load the contents of the workbook.
XmlDocument sheetDoc = new XmlDocument(nt);
sheetDoc.Load(sheetPart.GetStream());
XmlNode dimensionNode = sheetDoc.SelectSingleNode("//d:dimension", nsManager);
if (dimensionNode == null) {
throw new InvalidDataException("Dimension information not found in excel spreadsheet file");
}
XmlAttribute dimensionRefAttr = dimensionNode.Attributes["ref"];
string dimensions;
if (dimensionRefAttr == null) {
throw new InvalidDataException("Dimension ref information not found in excel spreadsheet file");
}
dimensions = dimensionRefAttr.Value;
int startRow, startCol, endRow, endCol, rows, cols;
ParseDimensions(dimensions, out startRow, out startCol, out endRow, out endCol);
rows = endRow - startRow + 1;
cols = endCol - startCol + 1;
if ((rows == 0) || (cols == 0)) {
return table;
}
// uncomment the lines below to output the sheet information in a separate xml file
// XmlTextWriter writer = new XmlTextWriter(@"output.xml", System.Text.Encoding.UTF8);
// sheetDoc.WriteTo(writer);
// writer.Close();
XmlNode sheetDataNode = sheetDoc.SelectSingleNode("//d:sheetData", nsManager);
if (sheetDataNode == null) {
throw new InvalidDataException("Sheet data information not found in excel spreadsheet file");
}
table.Capacity = rows;
for (int i = 0; i < rows; i++) {
List colList = new List(cols);
for (int j = 0; j < cols; j++) {
colList.Add(null);
}
table.Add(colList);
}
foreach (XmlNode row in sheetDataNode.ChildNodes) {
if (row.LocalName != "row") {
continue;
}
XmlAttribute rowRAttr = row.Attributes["r"];
if (rowRAttr == null) {
throw new InvalidDataException("Dimension ref information not found in excel spreadsheet file");
}
int rowIndex = int.Parse(rowRAttr.Value) - startRow;
// rowIndex is valid index inside List> table
List colList = table[rowIndex]; // columns for this table
foreach (XmlNode col in row.ChildNodes) {
if (col.LocalName != "c") {
continue;
}
XmlAttribute rAttr = col.Attributes["r"];
if (rAttr == null) {
throw new InvalidDataException("No r attribute found for col in xlxs data");
}
int colIndex, tempI;
GetColNumber(rAttr.Value, out colIndex, out tempI);
colIndex -= startCol;
colList[colIndex] = GetValue(col, nsManager, documentPart, documentUri, xlPackage, nt);
}
}
// uncomment the code below to find the value of a specific cell directly (e.g. A1)
//XmlNode cellNode = sheetDoc.SelectSingleNode(string.Format("//d:sheetData/d:row/d:c[@r='{0}']", "A1"), nsManager);
//if (cellNode != null) {
// string cellValue = GetValue(cellNode, nsManager, documentPart, documentUri, xlPackage, nt);
//}
}
return table;
}
///
/// Retrieves the value as string of a cell node in an xlsx file
///
private static string GetValue(XmlNode cellNode, XmlNamespaceManager nsManager, PackagePart documentPart, Uri documentUri, Package xlPackage, NameTable nt) {
const string sharedStringsRelationshipType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings";
const string sharedStringSchema = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
string cellValue = null;
// Retrieve the value. The value may be stored within
// this element. If the "t" attribute contains "s", then
// the cell contains a shared string, and you must look
// up the value individually.
XmlAttribute typeAttr = cellNode.Attributes["t"];
string cellType = string.Empty;
if (typeAttr != null) {
cellType = typeAttr.Value;
}
XmlNode valueNode = cellNode.SelectSingleNode("d:v", nsManager);
if (valueNode != null) {
cellValue = valueNode.InnerText;
}
// Check the cell type. At this point, this code only checks
// for booleans and strings individually.
if (cellType == "b") {
if (cellValue == "1") {
cellValue = "TRUE";
}
else {
cellValue = "FALSE";
}
}
else if (cellType == "s") {
// Go retrieve the actual string from the associated string file.
foreach (System.IO.Packaging.PackageRelationship stringRelationship in documentPart.GetRelationshipsByType(sharedStringsRelationshipType)) {
// There should only be one shared string reference, so you'll exit this loop immediately.
Uri sharedStringsUri = PackUriHelper.ResolvePartUri(documentUri, stringRelationship.TargetUri);
PackagePart stringPart = xlPackage.GetPart(sharedStringsUri);
if (stringPart != null) {
// Load the contents of the shared strings.
XmlDocument stringDoc = new XmlDocument(nt);
stringDoc.Load(stringPart.GetStream());
// Add the string schema to the namespace manager:
nsManager.AddNamespace("s", sharedStringSchema);
int requestedString = Convert.ToInt32(cellValue);
string strSearch = string.Format("//s:sst/s:si[{0}]", requestedString + 1);
XmlNode stringNode = stringDoc.SelectSingleNode(strSearch, nsManager);
if (stringNode != null) {
cellValue = stringNode.InnerText;
}
}
}
}
return cellValue;
}
///
/// Returns column index given an excel column as string (e.g. "AB" -> 28)
///
private static int ColNumFromStr(string str) {
str = str.ToUpper();
int result = 0;
int strLen = str.Length;
int mul = 1;
for (int i = strLen - 1; i >= 0; i--) {
result += (mul * (str[i] - 'A' + 1));
mul *= 26;
}
return result;
}
///
/// Retrieves the column number given a string from Excel (e.g. A5 or C4)
///
private static void GetColNumber(string rowColStr, out int col, out int indexAfterColInStr) {
int len = rowColStr.Length;
for (indexAfterColInStr = 0; indexAfterColInStr < len; indexAfterColInStr++) {
if (!char.IsLetter(rowColStr[indexAfterColInStr])) {
break;
}
}
if (indexAfterColInStr == 0) {
throw new InvalidDataException("xlsx GetColNumber(): invalid rowColStr");
}
col = ColNumFromStr(rowColStr.Substring(0, indexAfterColInStr));
}
///
/// Converts row and col string in Excel (something like C1 or A4) to 1-based indexes
///
private static void GetRowColNumbers(string rowColStr, out int row, out int col) {
int i = 0;
while (char.IsLetter(rowColStr[i])) {
i++;
}
int indexAfterColInStr;
GetColNumber(rowColStr, out col, out indexAfterColInStr);
row = int.Parse(rowColStr.Substring(indexAfterColInStr));
}
///
/// Converts dimensions in Excel (e.g. C3:D4) to 1-based indexes
///
private static void ParseDimensions(string dimensions, out int startRow, out int startCol,
out int endRow, out int endCol) {
int colonIndex = dimensions.IndexOf(':');
if (colonIndex < 0) {
throw new InvalidDataException("Invalid xlss dimension information: colon not found");
}
GetRowColNumbers(dimensions.Substring(0, colonIndex), out startRow, out startCol);
GetRowColNumbers(dimensions.Substring(colonIndex + 1), out endRow, out endCol);
}
}
}