Blog Map
[Table of Contents] [Next Topic] [Blog Map] This blog is inactive. New blog: EricWhite.com/blog
The following code is attached to this page.
Imports System.IOImports System.XmlImports System.TextImports DocumentFormat.OpenXml.Packaging Public Class GroupOfAdjacent(Of TElement, TKey) Implements IEnumerable(Of TElement) Private _key As TKey Private _groupList As List(Of TElement) Public Property GroupList() As List(Of TElement) Get Return _groupList End Get Set(ByVal value As List(Of TElement)) _groupList = value End Set End Property Public ReadOnly Property Key() As TKey Get Return _key End Get End Property Public Function GetEnumerator() As System.Collections.Generic.IEnumerator(Of TElement) _ Implements System.Collections.Generic.IEnumerable(Of TElement).GetEnumerator Return _groupList.GetEnumerator End Function Public Function GetEnumerator1() As System.Collections.IEnumerator _ Implements System.Collections.IEnumerable.GetEnumerator Return _groupList.GetEnumerator End Function Public Sub New(ByVal key As TKey) _key = key _groupList = New List(Of TElement) End SubEnd Class Module Module1 <System.Runtime.CompilerServices.Extension()> _ Public Function GroupAdjacent(Of TElement, TKey)(ByVal source As IEnumerable(Of TElement), _ ByVal keySelector As Func(Of TElement, TKey)) As List(Of GroupOfAdjacent(Of TElement, TKey)) Dim lastKey As TKey = Nothing Dim currentGroup As GroupOfAdjacent(Of TElement, TKey) = Nothing Dim allGroups As List(Of GroupOfAdjacent(Of TElement, TKey)) = New List(Of GroupOfAdjacent(Of TElement, TKey))() For Each item In source Dim thisKey As TKey = keySelector(item) If lastKey IsNot Nothing And Not thisKey.Equals(lastKey) Then allGroups.Add(currentGroup) End If If Not thisKey.Equals(lastKey) Then currentGroup = New GroupOfAdjacent(Of TElement, TKey)(keySelector(item)) End If currentGroup.GroupList.Add(item) lastKey = thisKey Next If lastKey IsNot Nothing Then allGroups.Add(currentGroup) End If Return allGroups End Function <System.Runtime.CompilerServices.Extension()> _ Public Function GetPath(ByVal el As XElement) As String Return el _ .AncestorsAndSelf _ .InDocumentOrder _ .Aggregate("", Function(seed, i) seed & "/" & i.Name.LocalName) End Function <System.Runtime.CompilerServices.Extension()> _ Function StringConcatenate(ByVal source As IEnumerable(Of String)) _ As String Return source.Aggregate(New StringBuilder, _ Function(sb, i) sb.Append(i), _ Function(sb) sb.ToString) End Function <System.Runtime.CompilerServices.Extension()> _ Function StringConcatenate(Of T) _ (ByVal source As IEnumerable(Of T), ByVal projectionFunc As Func(Of T, String)) _ As String Return source.Aggregate(New StringBuilder, _ Function(sb, i) sb.Append(projectionFunc(i)), _ Function(sb) sb.ToString) End Function Public Function LoadXDocument(ByVal part As OpenXmlPart) _ As XDocument Using streamReader As StreamReader = New StreamReader(part.GetStream()) Using xmlReader As XmlReader = xmlReader.Create(streamReader) Return XDocument.Load(xmlReader) End Using End Using End Function Public Function GetParagraphStyle(ByVal para As XElement, _ ByVal defaultStyle As String) As String Dim w As XNamespace = _ "http://schemas.openxmlformats.org/wordprocessingml/2006/main" Dim paraStyle = CStr(para.Elements(w + "pPr") _ .Elements(w + "pStyle") _ .Attributes(w + "val") _ .FirstOrDefault()) If (paraStyle Is Nothing) Then Return defaultStyle Else Return paraStyle End If End Function Public Function GetComment(ByVal commentsDoc As XDocument, ByVal p As XElement) As String Dim w As XNamespace = _ "http://schemas.openxmlformats.org/wordprocessingml/2006/main" Dim id = _ CStr(p.Elements(w + "commentRangeStart") _ .First() _ .Attribute(w + "id")) Dim commentNode = commentsDoc.Root() _ .Elements(w + "comment") _ .Where(Function(c) CStr(c.Attribute(w + "id")) = id) _ .First() Dim comment = commentNode _ .Elements(w + "p") _ .StringConcatenate(Function(node) node _ .Descendants(w + "t") _ .Select(Function(t) CStr(t)) _ .StringConcatenate() & "\n") Return comment End Function Sub Main() Dim w As XNamespace = _ "http://schemas.openxmlformats.org/wordprocessingml/2006/main" Dim filename As String = "SampleDoc.docx" Using wordDoc As WordprocessingDocument = _ WordprocessingDocument.Open(filename, True) Dim mainPart As MainDocumentPart = _ wordDoc.MainDocumentPart Dim styleDefinitionPart As StyleDefinitionsPart = _ mainPart.StyleDefinitionsPart Dim commentsPart As WordprocessingCommentsPart = _ mainPart.WordprocessingCommentsPart Dim mainPartDoc As XDocument = LoadXDocument(mainPart) Dim styleDoc As XDocument = LoadXDocument(styleDefinitionPart) Dim commentsDoc As XDocument = LoadXDocument(commentsPart) Dim defaultStyle As String = _ CStr( _ ( _ From style In styleDoc.Root _ .Elements(w + "style") _ Where ( _ CStr(style.Attribute(w + "type")) = "paragraph" And _ CStr(style.Attribute(w + "default")) = "1") _ ) _ .First() _ .Attribute(w + "styleId") _ ) Dim paragraphs = _ mainPartDoc.Root _ .Element(w + "body") _ .Descendants(w + "p") _ .Select(Function(p) _ New With { _ .ParagraphNode = p, _ .Style = GetParagraphStyle(p, defaultStyle) _ } _ ) Dim r As XName = w + "r" Dim ins As XName = w + "ins" Dim paragraphsWithText = _ paragraphs.Select(Function(p) _ New With { _ .ParagraphNode = p.ParagraphNode, _ .Style = p.Style, _ .Text = p.ParagraphNode _ .Elements() _ .Where(Function(z) z.Name = r Or z.Name = ins) _ .Descendants(w + "t") _ .StringConcatenate(Function(s) CStr(s)) _ } _ ) Dim groupedCodeParagraphs = paragraphsWithText _ .GroupAdjacent(Function(p) p.Style) _ .Where(Function(g) g.Key = "Code") Dim groupedCodeWithComments = _ groupedCodeParagraphs.Select(Function(g) New With { _ .ParagraphGroup = g, _ .Comment = GetComment(commentsDoc, g.First().ParagraphNode) _ } _ ) For Each group In groupedCodeWithComments Console.WriteLine("Code Block") Console.WriteLine("==========") For Each paragraph In group.ParagraphGroup Console.WriteLine(paragraph.Text) Next Console.WriteLine() Console.WriteLine("Meta Data") Console.WriteLine("=========") Console.WriteLine(group.Comment) Console.WriteLine() Next End Using End SubEnd Module
[Table of Contents] [Next Topic] [Blog Map]