Blog - Title

The OpenXmlDocument, WordprocessingML, and SpreadsheetML Classes (Dec 19, 2007)

The OpenXmlDocument, WordprocessingML, and SpreadsheetML Classes (Dec 19, 2007)

  • Comments 3

This page presents a new version of the OpenXmlDocument, WordprocessingML, and SpreadsheetML classes.

This blog is inactive.
New blog: EricWhite.com/blog

Blog TOC

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
using System.IO;
using System.IO.Packaging;

namespace Microsoft.Examples.LtxOpenXml
{
    class Relationship
    {
        public string Id { get; set; }
        public string RelationshipType { get; set; }
        public string ContentType { get; set; }
        public System.IO.Packaging.TargetMode TargetMode { get; set; }
        public Uri SourceUri { get; set; }
        public Uri TargetUri { get; set; }
        public PackagePart PackagePart { get; set; }
        public XDocument XDocument { get; set; }
        public List<Relationship> Relationships { get; set; }
    }

    class OpenXmlDocument : IDisposable
    {
        public string Name { get; set; }
        public Package Package { get; set; }
        public List<Relationship> Relationships { get; set; }
        public XNamespace RelationshipNamespace =
            "http://schemas.openxmlformats.org/officeDocument/2006/relationships";

        private bool disposed = false;

        public void Dispose()
        {
            Dispose(true);
            GC.SuppressFinalize(this);
        }

        private void Dispose(bool disposing)
        {
            if (!this.disposed)
            {
                if (disposing)
                    Package.Close();
                disposed = true;
            }
        }

        private XDocument LoadXDocument(string relationshipType,
            PackagePart part)
        {
            if (XmlContentTypes.Contains(part.ContentType))
                return XDocument.Load(XmlReader.Create(part.GetStream()));
            else
                return null;
        }

        private List<Relationship> CreateRelationshipList(
            PackageRelationshipCollection prc)
        {
            return (
                from pr in prc
                where pr.TargetMode == TargetMode.Internal
                let uri = PackUriHelper.ResolvePartUri(
                    new Uri(pr.SourceUri.ToString(), UriKind.Relative),
                    pr.TargetUri)
                let part = pr.Package.GetPart(uri)
                let contentType = part.ContentType
                let xdoc = LoadXDocument(pr.RelationshipType, part)
                let partRelationshipList =
                    CreateRelationshipList(part.GetRelationships())
                select new Relationship
                {
                    Id = pr.Id,
                    RelationshipType = pr.RelationshipType,
                    TargetMode = pr.TargetMode,
                    ContentType = part.ContentType,
                    SourceUri = pr.SourceUri,
                    TargetUri = pr.TargetUri,
                    PackagePart = part,
                    XDocument = xdoc,
                    Relationships = partRelationshipList
                }
            ).ToList();
        }

        public OpenXmlDocument(string name)
        {
            Name = name;
            Package = Package.Open(Name, FileMode.Open, FileAccess.Read);
            Relationships = CreateRelationshipList(
                Package.GetRelationships());
        }

        public HashSet<string> XmlContentTypes = new HashSet<string>
        {
            "application/vnd.openxmlformats-officedocument.custom-properties+xml",
            "application/vnd.openxmlformats-officedocument.customXmlProperties+xml",
            "application/vnd.openxmlformats-officedocument.drawing+xml",
            "application/vnd.openxmlformats-officedocument.drawingml.chart+xml",
            "application/vnd.openxmlformats-officedocument.drawingml.chartshapes+xml",
            "application/vnd.openxmlformats-officedocument.drawingml.diagramColors+xml",
            "application/vnd.openxmlformats-officedocument.drawingml.diagramData+xml",
            "application/vnd.openxmlformats-officedocument.drawingml.diagramLayout+xml",
            "application/vnd.openxmlformats-officedocument.drawingml.diagramStyle+xml",
            "application/vnd.openxmlformats-officedocument.extended-properties+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.commentAuthors+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.comments+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.handoutMaster+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.notesMaster+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.presentationProperties+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.slide+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.slideLayout+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.slideMaster+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.slideshow.main+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.slideUpdateInfo+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.tableStyles+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.tags+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.template.main+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.viewProps+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.calcChain+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.chartsheet+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.comments+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.connections+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.dialogsheet+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.externalLink+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.pivotCacheDefinition+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.pivotCacheRecords+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.pivotTable+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.queryTable+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.revisionHeaders+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.revisionLog+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheetMetadata+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.table+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.tableSingleCells+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.userNames+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.volatileDependencies+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml",
            "application/vnd.openxmlformats-officedocument.theme+xml",
            "application/vnd.openxmlformats-officedocument.themeOverride+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.document.glossary+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml",
            "application/vnd.openxmlformats-package.core-properties+xml",
            "application/vnd.openxmlformats-package.digital-signature-xmlsignature+xml",
            "application/xml"
        };
    }

    class WordprocessingML : OpenXmlDocument
    {
        public class Comment
        {
            public int Id { get; set; }
            public string Text { get; set; }
            public string Author { get; set; }
            public Paragraph Parent { get; set; }
            public Comment(Paragraph parent) { Parent = parent; }
        }

        public class Paragraph
        {
            public XElement ParagraphElement { get; set; }
            public string StyleName { get; set; }
            public string Text { get; set; }
            public IEnumerable<Comment> Comments()
            {
                XNamespace w = Parent.WordprocessingMLNamespace;
                XElement p = ParagraphElement;

                var commentIds = p
                                 .Elements(w + "commentRangeStart")
                                 .Attributes(w + "id")
                                 .Select(c => (int)c);

                return
                    commentIds
                    .Select(i =>
                        new Comment(this)
                        {
                            Id = i,
                            Author =
                                Parent.CommentsRelationship.XDocument
                                .Root
                                .Elements(w + "comment")
                                .Where(c => (int)c.Attribute(w + "id") == i)
                                .First()
                                .Attribute(w + "author")
                                .Value,
                            Text =
                                Parent.CommentsRelationship.XDocument
                                .Root
                                .Elements(w + "comment")
                                .Where(c => (int)c.Attribute(w + "id") == i)
                                .First()
                                .Descendants(w + "p")
                                .Select(run => run
                                               .Descendants(w + "t")
                                               .StringConcatenate(e => (string)e)
                                               + "\n")
                                .Aggregate(new StringBuilder(), (sb, v) => sb.Append(v), sb => sb.ToString())
                                .Trim()
                        }
                    );
            }
            public WordprocessingML Parent { get; set; }
            public Paragraph(WordprocessingML parent) { Parent = parent; }
        }

        public const string DocumentRelationshipType =
            "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
        public const string StylesRelationshipType =
            "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
        public const string CommentsRelationshipType =
            "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments";
        public XNamespace WordprocessingMLNamespace =
            "http://schemas.openxmlformats.org/wordprocessingml/2006/main";

        public Relationship DocumentRelationship { get; set; }
        public Relationship StylesRelationship { get; set; }
        public Relationship CommentsRelationship { get; set; }

        public WordprocessingML(string name)
            : base(name)
        {
            DocumentRelationship =
                (
                    from dr in this.Relationships
                    where dr.RelationshipType == DocumentRelationshipType
                    select dr
                ).FirstOrDefault();

            StylesRelationship =
                (
                    from sr in this.DocumentRelationship.Relationships
                    where sr.RelationshipType == StylesRelationshipType
                    select sr
                ).FirstOrDefault();

            CommentsRelationship =
                (
                    from sr in this.DocumentRelationship.Relationships
                    where sr.RelationshipType == CommentsRelationshipType
                    select sr
                ).FirstOrDefault();
        }

        public string DefaultStyle()
        {
            // the following assignment is cheap - the XNamespace is already atomized
            XNamespace w = WordprocessingMLNamespace;
            return (string)(
                from style in this.StylesRelationship
                              .XDocument.Root.Elements(w + "style")
                where (string)style.Attribute(w + "type") == "paragraph" &&
                      (string)style.Attribute(w + "default") == "1"
                select style
            ).First().Attribute(w + "styleId");
        }

        public IEnumerable<Paragraph> Paragraphs()
        {
            // a good convention to use is to name the XNamespace
            // variable with the same name as the namespace prefix,
            // and to name XName variables with the local name of the element
            XNamespace w = WordprocessingMLNamespace;
            XName r = w + "r";
            XName ins = w + "ins";
            string defaultStyle = this.DefaultStyle();

            // query for all paragraphs in the document.
            return
                from p in this.DocumentRelationship.XDocument
                             .Root
                             .Element(w + "body")
                             .Descendants(w + "p")
                let styleNode = p
                                .Elements(w + "pPr")
                                .Elements(w + "pStyle")
                                .FirstOrDefault()
                select new Paragraph(this)
                {
                    ParagraphElement = p,
                    StyleName = styleNode != null ?
                        (string)styleNode.Attribute(w + "val") :
                        defaultStyle,
                    // in the following query, we need to select both
                    // the r and ins elements in order to assemble the text
                    // properly for paragraphs that have tracked changes.
                    Text = p
                           .Elements()
                           .Where(z => z.Name == r || z.Name == ins)
                           .Descendants(w + "t")
                           .StringConcatenate(element => (string)element)
                };
        }
    }

    class SpreadsheetML : OpenXmlDocument
    {
        public const string DocumentRelationshipType =
            "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
        public const string StylesRelationshipType =
            "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
        public const string CalcChainRelationshipType =
            "http://schemas.openxmlformats.org/officeDocument/2006/relationships/calcChain";
        public const string WorksheetRelationshipType =
            "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet";
        public const string SharedStringsRelationshipType =
            "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings";

        public Relationship DocumentRelationship { get; set; }
        public Relationship StylesRelationship { get; set; }
        public Relationship CalcChainRelationship { get; set; }
        public Relationship WorksheetRelationship { get; set; }
        public Relationship SharedStringsRelationship { get; set; }
        public XNamespace SpreadsheetMLNamespace =
            "http://schemas.openxmlformats.org/spreadsheetml/2006/main";

        public SpreadsheetML(string name) : base(name)
        {
            DocumentRelationship =
                (
                    from dr in this.Relationships
                    where dr.RelationshipType == DocumentRelationshipType
                    select dr
                ).FirstOrDefault();

            StylesRelationship =
                (
                    from sr in this.DocumentRelationship.Relationships
                    where sr.RelationshipType == StylesRelationshipType
                    select sr
                ).FirstOrDefault();

            CalcChainRelationship =
                (
                    from sr in this.DocumentRelationship.Relationships
                    where sr.RelationshipType == CalcChainRelationshipType
                    select sr
                ).FirstOrDefault();

            SharedStringsRelationship =
                (
                    from sr in this.DocumentRelationship.Relationships
                    where sr.RelationshipType == SharedStringsRelationshipType
                    select sr
                ).FirstOrDefault();
        }

        public IEnumerable<Sheet> Sheets()
        {
            XNamespace s = this.SpreadsheetMLNamespace;
            XNamespace r = this.RelationshipNamespace;

            return
                from sheet in this.DocumentRelationship
                              .XDocument
                              .Root
                              .Element(s + "sheets")
                              .Elements(s + "sheet")
                select new Sheet(this)
                {
                    Name = (string)sheet.Attribute("name"),
                    SheetId = (string)sheet.Attribute("sheetId"),
                    Id = (string)sheet.Attribute(r + "id"),
                    XDocument =
                        (
                            from rel in this.DocumentRelationship.Relationships
                            where rel.Id == (string)sheet.Attribute(r + "id")
                            select rel.XDocument
                        ).FirstOrDefault()

                };
        }

        public class Sheet
        {
            public string Name { get; set; }
            public string Id { get; set; }
            public string SheetId { get; set; }
            public XDocument XDocument { get; set; }
            public IEnumerable<Row> Rows()
            {
                XNamespace s = Parent.SpreadsheetMLNamespace;
                XNamespace r = Parent.RelationshipNamespace;

                XDocument sheetXDocument =
                    (
                        from rel in Parent.DocumentRelationship.Relationships
                        where rel.Id == this.Id
                        select rel.XDocument
                    ).First();

                return
                    from row in sheetXDocument
                                .Root
                                .Element(s + "sheetData")
                                .Elements(s + "row")
                    select new Row(this)
                    {
                        RowElement = row,
                        RowId = (string)row.Attribute("r"),
                        Spans = (string)row.Attribute("spans")
                    };
            }
            public SpreadsheetML Parent { get; set; }
            public Sheet(SpreadsheetML parent) { Parent = parent; }
        }

        public class Row
        {
            public XElement RowElement { get; set; }
            public string RowId { get; set; }
            public string Spans { get; set; }
            public IEnumerable<Cell> Cells()
            {
                XNamespace s = Parent.Parent.SpreadsheetMLNamespace;
                XNamespace r = Parent.Parent.RelationshipNamespace;
                Relationship ssr = Parent.Parent.SharedStringsRelationship;
                return
                    from cell in this.RowElement.Elements(s + "c")
                    let cellType = (string)cell.Attribute("t")
                    let sharedStringIndex = (int?)cell.Element(s + "v")
                    let sharedString = cellType == "s" ?
                        ssr
                        .XDocument
                        .Root
                        .Elements(s + "si")
                        .Skip((int)sharedStringIndex)
                        .First()
                        .Descendants(s + "t")
                        .StringConcatenate(e => (string)e)
                        : null
                    select new Cell(this)
                    {
                        CellElement = cell,
                        Row = (string)RowElement.Attribute("r"),
                        Column = (string)cell.Attribute("r"),
                        Type = (string)cell.Attribute("t"),
                        Formula = (string)cell.Element(s + "f"),
                        Value = (string)cell.Element(s + "v"),
                        SharedString = sharedString
                    };
            }
            public Sheet Parent { get; set; }
            public Row(Sheet parent) { Parent = parent; }
        }

        public class Cell
        {
            public XElement CellElement { get; set; }
            public string Row { get; set; }
            public string Column { get; set; }
            public string Type { get; set; }
            public string Value { get; set; }
            public string Formula { get; set; }
            public string SharedString { get; set; }
            public Row Parent { get; set; }
            public Cell(Row parent) { Parent = parent; }
        }
    }

    public static class LocalExtensions
    {
        public static string StringConcatenate<T>(this IEnumerable<T> source,
            Func<T, string> func)
        {
            StringBuilder sb = new StringBuilder();
            foreach (T item in source)
                sb.Append(func(item));
            return sb.ToString();
        }
    }
}

Leave a Comment
  • Please add 8 and 8 and type the answer here:
  • Post
  • This post describes the SpreadsheetML class (derives from the OpenXmlDocument class), which presents

  • Hi Eric,

    do you have an actual/updated Version of the SpreadsheetML?

    (for the Open XML SDK 2.0 and VS 2008)

    regards

    Christoph

  • Hi Christoph,

    I haven't updated this code for the Open XML SDK V2.  But you can be sure that I'll post it as soon as I am able.

    -Eric

Page 1 of 1 (3 items)