The WordprocessingML Class
This page presents a refinement of the OpenXmlDocument class, which is a new class (WordprocessingML) that derives from the OpenXmlDocument class. The WordprocessingML class adds additional functionality that is specific to WordprocessingML documents, including:
· Some constant strings that contain the DocumentRelationshipType, the StylesRelationshipType, and the CommentsRelationshipType.
· An XNamespace object that contain the main XML namespace for WordprocessingML documents.
· Initialized properties that find the main DocumentRelationship object, the StylesRelationship object, and the CommentsRelationship object. The Relationship class is declared in the following source code, and represents a node in the object graph that contains an entire OpenXML document.
· A DefaultStyle method that queries for the default style of the document.
· A.Paragraphs method that enumerates all paragraphs in the document. The Paragraph class contains the XElement node of the paragraph for further querying if necessary, the style of the paragraph, the text of the paragraph, and a collection of comments for the paragraph. It needs to contain a collection because a paragraph can have more than one comment.
Following is a simple example that shows the use of the WordprocessingML class:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using Microsoft.Examples.LtxOpenXml;
class Program
{
public static void Main(string[] args)
{
string filename = "Test.docx";
using (WordprocessingML doc = new WordprocessingML(filename))
{
foreach (var p in doc.Paragraphs())
{
Console.WriteLine("Style: {0} Text: >{1}<",
p.StyleName.PadRight(16), p.Text);
if (p.Comments != null)
foreach (var c in p.Comments)
{
Console.WriteLine(" Id: {0}", c.Id);
Console.WriteLine(" Author: {0}", c.Author);
Console.WriteLine(" Text: >{0}<", c.Text);
}
}
}
}
}
This is the complete listing of both the OpenXmlDocument class and the WordprocessingML class:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
using System.IO;
using System.IO.Packaging;
namespace Microsoft.Examples.LtxOpenXml
{
class Comment
{
public int Id { get; set; }
public string Text { get; set; }
public string Author { get; set; }
}
class Paragraph
{
public XElement ParagraphElement { get; set; }
public string StyleName { get; set; }
public string Text { get; set; }
public IEnumerable<Comment> Comments { get; set; }
}
class Relationship
{
public string Id { get; set; }
public string RelationshipType { get; set; }
public string ContentType { get; set; }
public System.IO.Packaging.TargetMode TargetMode { get; set; }
public Uri SourceUri { get; set; }
public Uri TargetUri { get; set; }
public PackagePart PackagePart { get; set; }
public XDocument XDocument { get; set; }
public List<Relationship> Relationships { get; set; }
}
class OpenXmlDocument : IDisposable
{
// public
public string Name { get; set; }
public Package Package { get; set; }
public List<Relationship> Relationships { get; set; }
// private
private bool disposed = false;
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
private void Dispose(bool disposing)
{
if (!this.disposed)
{
if (disposing)
Package.Close();
disposed = true;
}
}
private XDocument LoadXDocument(string relationshipType,
PackagePart part)
{
if (XmlContentTypes.Contains(part.ContentType))
return XDocument.Load(XmlReader.Create(part.GetStream()));
else
return null;
}
private List<Relationship> CreateRelationshipList(
PackageRelationshipCollection prc)
{
return (
from pr in prc
where pr.TargetMode == TargetMode.Internal
let uri = PackUriHelper.ResolvePartUri(
new Uri(pr.SourceUri.ToString(), UriKind.Relative),
pr.TargetUri)
let part = pr.Package.GetPart(uri)
let contentType = part.ContentType
let xdoc = LoadXDocument(pr.RelationshipType, part)
let partRelationshipList =
CreateRelationshipList(part.GetRelationships())
select new Relationship
{
Id = pr.Id,
RelationshipType = pr.RelationshipType,
TargetMode = pr.TargetMode,
ContentType = part.ContentType,
SourceUri = pr.SourceUri,
TargetUri = pr.TargetUri,
PackagePart = part,
XDocument = xdoc,
Relationships = partRelationshipList
}
).ToList();
}
public OpenXmlDocument(string name)
{
Name = name;
Package = Package.Open(Name, FileMode.Open, FileAccess.Read);
Relationships = CreateRelationshipList(
Package.GetRelationships());
}
public HashSet<string> XmlContentTypes = new HashSet<string>
{
"application/vnd.openxmlformats-officedocument.custom-properties+xml",
"application/vnd.openxmlformats-officedocument.customXmlProperties+xml",
"application/vnd.openxmlformats-officedocument.drawing+xml",
"application/vnd.openxmlformats-officedocument.drawingml.chart+xml",
"application/vnd.openxmlformats-officedocument.drawingml.chartshapes+xml",
"application/vnd.openxmlformats-officedocument.drawingml.diagramColors+xml",
"application/vnd.openxmlformats-officedocument.drawingml.diagramData+xml",
"application/vnd.openxmlformats-officedocument.drawingml.diagramLayout+xml",
"application/vnd.openxmlformats-officedocument.drawingml.diagramStyle+xml",
"application/vnd.openxmlformats-officedocument.extended-properties+xml",
"application/vnd.openxmlformats-officedocument.presentationml.commentAuthors+xml",
"application/vnd.openxmlformats-officedocument.presentationml.comments+xml",
"application/vnd.openxmlformats-officedocument.presentationml.handoutMaster+xml",
"application/vnd.openxmlformats-officedocument.presentationml.notesMaster+xml",
"application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml",
"application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml",
"application/vnd.openxmlformats-officedocument.presentationml.presentationProperties+xml",
"application/vnd.openxmlformats-officedocument.presentationml.slide+xml",
"application/vnd.openxmlformats-officedocument.presentationml.slideLayout+xml",
"application/vnd.openxmlformats-officedocument.presentationml.slideMaster+xml",
"application/vnd.openxmlformats-officedocument.presentationml.slideshow.main+xml",
"application/vnd.openxmlformats-officedocument.presentationml.slideUpdateInfo+xml",
"application/vnd.openxmlformats-officedocument.presentationml.tableStyles+xml",
"application/vnd.openxmlformats-officedocument.presentationml.tags+xml",
"application/vnd.openxmlformats-officedocument.presentationml.template.main+xml",
"application/vnd.openxmlformats-officedocument.presentationml.viewProps+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.calcChain+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.chartsheet+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.comments+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.connections+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.dialogsheet+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.externalLink+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.pivotCacheDefinition+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.pivotCacheRecords+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.pivotTable+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.queryTable+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.revisionHeaders+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.revisionLog+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheetMetadata+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.table+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.tableSingleCells+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.userNames+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.volatileDependencies+xml",
"application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml",
"application/vnd.openxmlformats-officedocument.theme+xml",
"application/vnd.openxmlformats-officedocument.themeOverride+xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document.glossary+xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml",
"application/vnd.openxmlformats-package.core-properties+xml",
"application/vnd.openxmlformats-package.digital-signature-xmlsignature+xml",
"application/xml"
};
}
class WordprocessingML : OpenXmlDocument
{
public const string DocumentRelationshipType =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
public const string StylesRelationshipType =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
public const string CommentsRelationshipType =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments";
public Relationship DocumentRelationship { get; set; }
public Relationship StylesRelationship { get; set; }
public Relationship CommentsRelationship { get; set; }
public XNamespace WordprocessingMLNamespace = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
public WordprocessingML(string name)
: base(name)
{
DocumentRelationship =
(
from dr in this.Relationships
where dr.RelationshipType == DocumentRelationshipType
select dr
).FirstOrDefault();
StylesRelationship =
(
from sr in this.DocumentRelationship.Relationships
where sr.RelationshipType == StylesRelationshipType
select sr
).FirstOrDefault();
CommentsRelationship =
(
from sr in this.DocumentRelationship.Relationships
where sr.RelationshipType == CommentsRelationshipType
select sr
).FirstOrDefault();
}
public string DefaultStyle()
{
// the following assignment is cheap - the XNamespace is already atomized
XNamespace w = WordprocessingMLNamespace;
return (string)(
from style in this.StylesRelationship
.XDocument.Root.Elements(w + "style")
where (string)style.Attribute(w + "type") == "paragraph" &&
(string)style.Attribute(w + "default") == "1"
select style
).First().Attribute(w + "styleId");
}
public IEnumerable<Paragraph> Paragraphs()
{
// a good convention to use is to name the XNamespace
// variable with the same name as the namespace prefix,
// and to name XName variables with the local name of the element
XNamespace w = WordprocessingMLNamespace;
XName r = w + "r";
XName ins = w + "ins";
string defaultStyle = this.DefaultStyle();
// query for all paragraphs in the document.
return
from p in this.DocumentRelationship.XDocument
.Root
.Element(w + "body")
.Descendants(w + "p")
let styleNode = p
.Elements(w + "pPr")
.Elements(w + "pStyle")
.FirstOrDefault()
// if there are no comments for the paragraph, commentIds will be null
let commentIds = p
.Elements(w + "commentRangeStart")
.Any() ?
p
.Elements(w + "commentRangeStart")
.Attributes(w + "id")
.Select(c => (int)c)
:
null
select new Paragraph
{
ParagraphElement = p,
StyleName = styleNode != null ?
(string)styleNode.Attribute(w + "val") :
defaultStyle,
// in the following query, we need to select both
// the r and ins elements in order to assemble the text
// properly for paragraphs that have tracked changes.
Text = p
.Elements()
.Where(z => z.Name == r || z.Name == ins)
.Descendants(w + "t")
.StringConcatenate(element => (string)element),
Comments = commentIds != null ?
commentIds
.Select(i =>
new Comment
{
Id = i,
Author =
CommentsRelationship.XDocument
.Root
.Elements(w + "comment")
.Where(c => (int)c.Attribute(w + "id") == i)
.First()
.Attribute(w + "author")
.Value,
Text =
CommentsRelationship.XDocument
.Root
.Elements(w + "comment")
.Where(c => (int)c.Attribute(w + "id") == i)
.First()
.Descendants(w + "p")
.Select(run => run
.Descendants(w + "t")
.StringConcatenate(e => (string)e)
+ "\n")
.Aggregate(new StringBuilder(), (sb, v) => sb.Append(v), sb => sb.ToString())
.Trim()
}
)
.ToList()
:
null
};
}
}
public static class LocalExtensions
{
public static string StringConcatenate<T>(this IEnumerable<T> source,
Func<T, string> func)
{
StringBuilder sb = new StringBuilder();
foreach (T item in source)
sb.Append(func(item));
return sb.ToString();
}
}
}