using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.IO; using System.Xml; using System.Xml.Linq; using Microsoft.Office.DocumentFormat.OpenXml.Packaging; public class GroupOfAdjacent : IEnumerable, IGrouping { public TKey Key { get; set; } private List GroupList { get; set; } System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { return ((System.Collections.Generic.IEnumerable)this).GetEnumerator(); } System.Collections.Generic.IEnumerator System.Collections.Generic.IEnumerable.GetEnumerator() { foreach (var s in GroupList) yield return s; } public GroupOfAdjacent(List source, TKey key) { GroupList = source; Key = key; } } public static class LocalExtensions { public static string GetPath(this XElement el) { return el .AncestorsAndSelf() .Aggregate("", (seed, i) => i.Name.LocalName + "/" + seed); } public static string StringConcatenate( this IEnumerable source) { return source.Aggregate( new StringBuilder(), (s, i) => s.Append(i), s => s.ToString()); } public static string StringConcatenate( this IEnumerable source, Func projectionFunc) { return source.Aggregate( new StringBuilder(), (s, i) => s.Append(projectionFunc(i)), s => s.ToString()); } public static IEnumerable> GroupAdjacent( this IEnumerable source, Func keySelector) { TKey last = default(TKey); bool haveLast = false; List list = new List(); foreach (TSource s in source) { TKey k = keySelector(s); if (haveLast) { if (!k.Equals(last)) { yield return new GroupOfAdjacent(list, last); list = new List(); list.Add(s); last = k; } else { list.Add(s); last = k; } } else { list.Add(s); last = k; haveLast = true; } } if (haveLast) yield return new GroupOfAdjacent(list, last); } } class Program { readonly static XNamespace w = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"; public static XDocument LoadXDocument(OpenXmlPart part) { XDocument xdoc; using (StreamReader streamReader = new StreamReader(part.GetStream())) xdoc = XDocument.Load(XmlReader.Create(streamReader)); return xdoc; } public static string GetParagraphStyle(XElement para) { return (string)para.Elements(w + "pPr") .Elements(w + "pStyle") .Attributes(w + "val") .FirstOrDefault(); } public static string GetCommentText(XDocument commentsDoc, string id) { var commentNode = commentsDoc.Root .Elements(w + "comment") .Where(c => (string)c.Attribute(w + "id") == id) .First(); var comment = commentNode.Elements(w + "p") .StringConcatenate(node => node.Descendants(w + "t") .Select(t => (string)t) .StringConcatenate() + "\n"); return comment; } static void Main(string[] args) { const string filename = "SampleDoc.docx"; using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(filename, true)) { MainDocumentPart mainPart = wordDoc.MainDocumentPart; StyleDefinitionsPart stylePart = mainPart.StyleDefinitionsPart; CommentsPart commentsPart = mainPart.CommentsPart; XDocument mainPartDoc = LoadXDocument(mainPart); XDocument styleDoc = LoadXDocument(stylePart); XDocument commentsDoc = LoadXDocument(commentsPart); string defaultStyle = (string)styleDoc.Root .Elements(w + "style") .Where(style => (string)style.Attribute(w + "type") == "paragraph" && (string)style.Attribute(w + "default") == "1") .First() .Attribute(w + "styleId"); var paragraphs = mainPartDoc.Root .Element(w + "body") .Descendants(w + "p") .Select(p => { string style = GetParagraphStyle(p); string styleName = style == null ? defaultStyle : style; return new { ParagraphNode = p, Style = styleName }; } ); XName r = w + "r"; XName ins = w + "ins"; var paragraphsWithText = paragraphs.Select(p => new { ParagraphNode = p.ParagraphNode, Style = p.Style, Text = p.ParagraphNode .Elements() .Where(z => z.Name == r || z.Name == ins) .Descendants(w + "t") .StringConcatenate(s => (string)s) } ); var groupedCodeParagraphs = paragraphsWithText.GroupAdjacent(p => p.Style) .Where(g => g.Key == "Code"); var groupedCodeWithComments = groupedCodeParagraphs.Select(g => { var id = (string)g.Select(p => p.ParagraphNode) .Elements(w + "commentRangeStart") .First() .Attribute(w + "id"); return new { ParagraphGroup = g, Comment = GetCommentText(commentsDoc, id) }; } ); foreach (var group in groupedCodeWithComments) { Console.WriteLine("Code Block"); Console.WriteLine("=========="); foreach (var paragraph in group.ParagraphGroup) Console.WriteLine(paragraph.Text); Console.WriteLine(); Console.WriteLine("Meta Data"); Console.WriteLine("========="); Console.WriteLine(group.Comment); Console.WriteLine(); } } } }