Welcome to MSDN Blogs Sign in | Join | Help
The WordprocessingML Class

This page presents a refinement of the OpenXmlDocument class, which is a new class (WordprocessingML) that derives from the OpenXmlDocument class. The WordprocessingML class adds additional functionality that is specific to WordprocessingML documents, including:

·         Some constant strings that contain the DocumentRelationshipType, the StylesRelationshipType, and the CommentsRelationshipType.

·         An XNamespace object that contain the main XML namespace for WordprocessingML documents.

·         Initialized properties that find the main DocumentRelationship object, the StylesRelationship object, and the CommentsRelationship object. The Relationship class is declared in the following source code, and represents a node in the object graph that contains an entire OpenXML document. 

·         A DefaultStyle method that queries for the default style of the document.

·         A.Paragraphs method that enumerates all paragraphs in the document. The Paragraph class contains the XElement node of the paragraph for further querying if necessary, the style of the paragraph, the text of the paragraph, and a collection of comments for the paragraph. It needs to contain a collection because a paragraph can have more than one comment.

Following is a simple example that shows the use of the WordprocessingML class:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using Microsoft.Examples.LtxOpenXml;

class Program
{
    public static void Main(string[] args)
    {
        string filename = "Test.docx";

        using (WordprocessingML doc = new WordprocessingML(filename))
        {
            foreach (var p in doc.Paragraphs())
            {
                Console.WriteLine("Style: {0}   Text: >{1}<",
                    p.StyleName.PadRight(16), p.Text);
                if (p.Comments != null)
                    foreach (var c in p.Comments)
                    {
                        Console.WriteLine("  Id: {0}", c.Id);
                        Console.WriteLine("  Author: {0}", c.Author);
                        Console.WriteLine("  Text: >{0}<", c.Text);
                    }
            }
        }
    }
}

 

This is the complete listing of both the OpenXmlDocument class and the WordprocessingML class:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
using System.IO;
using System.IO.Packaging;

 

namespace Microsoft.Examples.LtxOpenXml
{
    class Comment
    {
        public int Id { get; set; }
        public string Text { get; set; }
        public string Author { get; set; }
    }

 

    class Paragraph
    {
        public XElement ParagraphElement { get; set; }
        public string StyleName { get; set; }
        public string Text { get; set; }
        public IEnumerable<Comment> Comments { get; set; }
    }

 

    class Relationship
    {
        public string Id { get; set; }
        public string RelationshipType { get; set; }
        public string ContentType { get; set; }
        public System.IO.Packaging.TargetMode TargetMode { get; set; }
        public Uri SourceUri { get; set; }
        public Uri TargetUri { get; set; }
        public PackagePart PackagePart { get; set; }
        public XDocument XDocument { get; set; }
        public List<Relationship> Relationships { get; set; }
    }

 

    class OpenXmlDocument : IDisposable
    {
        // public
        public string Name { get; set; }
        public Package Package { get; set; }
        public List<Relationship> Relationships { get; set; }

 

        // private
        private bool disposed = false;

        public void Dispose()
        {
            Dispose(true);
            GC.SuppressFinalize(this);
        }

 

        private void Dispose(bool disposing)
        {
            if (!this.disposed)
            {
                if (disposing)
                    Package.Close();
                disposed = true;
            }
        }

 

        private XDocument LoadXDocument(string relationshipType,
            PackagePart part)
        {
            if (XmlContentTypes.Contains(part.ContentType))
                return XDocument.Load(XmlReader.Create(part.GetStream()));
            else
                return null;
        }

 

        private List<Relationship> CreateRelationshipList(
            PackageRelationshipCollection prc)
        {
            return (
                from pr in prc
                where pr.TargetMode == TargetMode.Internal
                let uri = PackUriHelper.ResolvePartUri(
                    new Uri(pr.SourceUri.ToString(), UriKind.Relative),
                    pr.TargetUri)
                let part = pr.Package.GetPart(uri)
                let contentType = part.ContentType
                let xdoc = LoadXDocument(pr.RelationshipType, part)
                let partRelationshipList =
                    CreateRelationshipList(part.GetRelationships())
                select new Relationship
                {
                    Id = pr.Id,
                    RelationshipType = pr.RelationshipType,
                    TargetMode = pr.TargetMode,
                    ContentType = part.ContentType,
                    SourceUri = pr.SourceUri,
                    TargetUri = pr.TargetUri,
                    PackagePart = part,
                    XDocument = xdoc,
                    Relationships = partRelationshipList
                }
            ).ToList();
        }

 

        public OpenXmlDocument(string name)
        {
            Name = name;
            Package = Package.Open(Name, FileMode.Open, FileAccess.Read);
            Relationships = CreateRelationshipList(
                Package.GetRelationships());
        }

 

        public HashSet<string> XmlContentTypes = new HashSet<string>
        {
            "application/vnd.openxmlformats-officedocument.custom-properties+xml",
            "application/vnd.openxmlformats-officedocument.customXmlProperties+xml",
            "application/vnd.openxmlformats-officedocument.drawing+xml",
            "application/vnd.openxmlformats-officedocument.drawingml.chart+xml",
            "application/vnd.openxmlformats-officedocument.drawingml.chartshapes+xml",
            "application/vnd.openxmlformats-officedocument.drawingml.diagramColors+xml",
            "application/vnd.openxmlformats-officedocument.drawingml.diagramData+xml",
            "application/vnd.openxmlformats-officedocument.drawingml.diagramLayout+xml",
            "application/vnd.openxmlformats-officedocument.drawingml.diagramStyle+xml",
            "application/vnd.openxmlformats-officedocument.extended-properties+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.commentAuthors+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.comments+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.handoutMaster+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.notesMaster+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.presentationProperties+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.slide+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.slideLayout+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.slideMaster+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.slideshow.main+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.slideUpdateInfo+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.tableStyles+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.tags+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.template.main+xml",
            "application/vnd.openxmlformats-officedocument.presentationml.viewProps+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.calcChain+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.chartsheet+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.comments+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.connections+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.dialogsheet+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.externalLink+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.pivotCacheDefinition+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.pivotCacheRecords+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.pivotTable+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.queryTable+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.revisionHeaders+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.revisionLog+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheetMetadata+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.table+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.tableSingleCells+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.userNames+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.volatileDependencies+xml",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml",
            "application/vnd.openxmlformats-officedocument.theme+xml",
            "application/vnd.openxmlformats-officedocument.themeOverride+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.document.glossary+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml",
            "application/vnd.openxmlformats-package.core-properties+xml",
            "application/vnd.openxmlformats-package.digital-signature-xmlsignature+xml",
            "application/xml"
        };
    }

 

    class WordprocessingML : OpenXmlDocument
    {
        public const string DocumentRelationshipType =
            "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
        public const string StylesRelationshipType =
            "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
        public const string CommentsRelationshipType =
            "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments";

        public Relationship DocumentRelationship { get; set; }
        public Relationship StylesRelationship { get; set; }
        public Relationship CommentsRelationship { get; set; }
        public XNamespace WordprocessingMLNamespace = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";

 

        public WordprocessingML(string name)
            : base(name)
        {
            DocumentRelationship =
                (
                    from dr in this.Relationships
                    where dr.RelationshipType == DocumentRelationshipType
                    select dr
                ).FirstOrDefault();

 

            StylesRelationship =
                (
                    from sr in this.DocumentRelationship.Relationships
                    where sr.RelationshipType == StylesRelationshipType
                    select sr
                ).FirstOrDefault();

 

            CommentsRelationship =
                (
                    from sr in this.DocumentRelationship.Relationships
                    where sr.RelationshipType == CommentsRelationshipType
                    select sr
                ).FirstOrDefault();
        }

 

        public string DefaultStyle()
        {
            // the following assignment is cheap - the XNamespace is already atomized
            XNamespace w = WordprocessingMLNamespace;
            return (string)(
                from style in this.StylesRelationship
                              .XDocument.Root.Elements(w + "style")
                where (string)style.Attribute(w + "type") == "paragraph" &&
                      (string)style.Attribute(w + "default") == "1"
                select style
            ).First().Attribute(w + "styleId");
        }

 

        public IEnumerable<Paragraph> Paragraphs()
        {
            // a good convention to use is to name the XNamespace
            // variable with the same name as the namespace prefix,
            // and to name XName variables with the local name of the element

            XNamespace w = WordprocessingMLNamespace;
            XName r = w + "r";
            XName ins = w + "ins";
            string defaultStyle = this.DefaultStyle();

            // query for all paragraphs in the document.
            return
                from p in this.DocumentRelationship.XDocument
                             .Root
                             .Element(w + "body")
                             .Descendants(w + "p")
                let styleNode = p
                                .Elements(w + "pPr")
                                .Elements(w + "pStyle")
                                .FirstOrDefault()
                // if there are no comments for the paragraph, commentIds will be null
                let commentIds = p
                                 .Elements(w + "commentRangeStart")
                                 .Any() ?
                                 p
                                 .Elements(w + "commentRangeStart")
                                 .Attributes(w + "id")
                                 .Select(c => (int)c)
                                 :
                                 null
                select new Paragraph
                {
                    ParagraphElement = p,
                    StyleName = styleNode != null ?
                        (string)styleNode.Attribute(w + "val") :
                        defaultStyle,
                    // in the following query, we need to select both
                    // the r and ins elements in order to assemble the text
                    // properly for paragraphs that have tracked changes.
                    Text = p
                           .Elements()
                           .Where(z => z.Name == r || z.Name == ins)
                           .Descendants(w + "t")
                           .StringConcatenate(element => (string)element),
                    Comments = commentIds != null ?
                        commentIds
                        .Select(i =>
                            new Comment
                            {
                                Id = i,
                                Author =
                                    CommentsRelationship.XDocument
                                    .Root
                                    .Elements(w + "comment")
                                    .Where(c => (int)c.Attribute(w + "id") == i)
                                    .First()
                                    .Attribute(w + "author")
                                    .Value,
                                Text =
                                    CommentsRelationship.XDocument
                                    .Root
                                    .Elements(w + "comment")
                                    .Where(c => (int)c.Attribute(w + "id") == i)
                                    .First()
                                    .Descendants(w + "p")
                                    .Select(run => run
                                                   .Descendants(w + "t")
                                                   .StringConcatenate(e => (string)e)
                                                   + "\n")
                                    .Aggregate(new StringBuilder(), (sb, v) => sb.Append(v), sb => sb.ToString())
                                    .Trim()
                            }
                        )
                        .ToList()
                        :
                        null
                };
        }
    }

 

    public static class LocalExtensions
    {
        public static string StringConcatenate<T>(this IEnumerable<T> source,
            Func<T, string> func)
        {
            StringBuilder sb = new StringBuilder();
            foreach (T item in source)
                sb.Append(func(item));
            return sb.ToString();
        }
    }
}

 

Posted: Thursday, December 13, 2007 12:27 PM by EricWhite
Filed under: ,

Comments

Edje said:

Hi Eric,

Could you comment on how to add support for CustomXMLParts to this class?

Ed Richard

# February 18, 2008 12:32 AM

Eric White's Blog said:

(July 10, 2008 - I've written a new blog post on a better way to accomplish this.) This post presents

# July 10, 2008 11:14 PM
Leave a Comment

(required) 

(required) 

(optional)

(required) 

  
Enter Code Here: Required

Comment Notification

If you would like to receive an email when updates are made to this post, please register here

Subscribe to this post's comments using RSS

Page view tracker