Blog - Title

Complete Listing of ParseWordML - VB

Complete Listing of ParseWordML - VB

  • Comments 0

[Table of Contents] [Next Topic]

The following code is attached to this page.

This blog is inactive.
New blog: EricWhite.com/blog

Blog TOC

Imports System.IO
Imports System.Xml
Imports System.Text
Imports DocumentFormat.OpenXml.Packaging
 
Public Class GroupOfAdjacent(Of TElement, TKey)
    Implements IEnumerable(Of TElement)
 
    Private _key As TKey
    Private _groupList As List(Of TElement)
 
    Public Property GroupList() As List(Of TElement)
        Get
            Return _groupList
        End Get
        Set(ByVal value As List(Of TElement))
            _groupList = value
        End Set
    End Property
 
    Public ReadOnly Property Key() As TKey
        Get
            Return _key
        End Get
    End Property
 
    Public Function GetEnumerator() As System.Collections.Generic.IEnumerator(Of TElement) _
            Implements System.Collections.Generic.IEnumerable(Of TElement).GetEnumerator
        Return _groupList.GetEnumerator
    End Function
 
    Public Function GetEnumerator1() As System.Collections.IEnumerator _
            Implements System.Collections.IEnumerable.GetEnumerator
        Return _groupList.GetEnumerator
    End Function
 
    Public Sub New(ByVal key As TKey)
        _key = key
        _groupList = New List(Of TElement)
    End Sub
End Class
 
Module Module1
    <System.Runtime.CompilerServices.Extension()> _
    Public Function GroupAdjacent(Of TElement, TKey)(ByVal source As IEnumerable(Of TElement), _
            ByVal keySelector As Func(Of TElement, TKey)) As List(Of GroupOfAdjacent(Of TElement, TKey))
        Dim lastKey As TKey = Nothing
        Dim currentGroup As GroupOfAdjacent(Of TElement, TKey) = Nothing
        Dim allGroups As List(Of GroupOfAdjacent(Of TElement, TKey)) = New List(Of GroupOfAdjacent(Of TElement, TKey))()
        For Each item In source
            Dim thisKey As TKey = keySelector(item)
            If lastKey IsNot Nothing And Not thisKey.Equals(lastKey) Then
                allGroups.Add(currentGroup)
            End If
            If Not thisKey.Equals(lastKey) Then
                currentGroup = New GroupOfAdjacent(Of TElement, TKey)(keySelector(item))
            End If
            currentGroup.GroupList.Add(item)
            lastKey = thisKey
        Next
        If lastKey IsNot Nothing Then
            allGroups.Add(currentGroup)
        End If
        Return allGroups
    End Function
 
    <System.Runtime.CompilerServices.Extension()> _
    Public Function GetPath(ByVal el As XElement) As String
        Return el _
            .AncestorsAndSelf _
            .InDocumentOrder _
            .Aggregate("", Function(seed, i) seed & "/" & i.Name.LocalName)
    End Function
 
    <System.Runtime.CompilerServices.Extension()> _
    Function StringConcatenate(ByVal source As IEnumerable(Of String)) _
        As String
        Return source.Aggregate(New StringBuilder, _
            Function(sb, i) sb.Append(i), _
            Function(sb) sb.ToString)
    End Function
 
    <System.Runtime.CompilerServices.Extension()> _
    Function StringConcatenate(Of T) _
            (ByVal source As IEnumerable(Of T), ByVal projectionFunc As Func(Of T, String)) _
            As String
        Return source.Aggregate(New StringBuilder, _
            Function(sb, i) sb.Append(projectionFunc(i)), _
            Function(sb) sb.ToString)
    End Function
 
    Public Function LoadXDocument(ByVal part As OpenXmlPart) _
            As XDocument
        Using streamReader As StreamReader = New StreamReader(part.GetStream())
            Using xmlReader As XmlReader = xmlReader.Create(streamReader)
                Return XDocument.Load(xmlReader)
            End Using
        End Using
    End Function
 
    Public Function GetParagraphStyle(ByVal para As XElement, _
                                      ByVal defaultStyle As String) As String
        Dim w As XNamespace = _
            "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
        Dim paraStyle = CStr(para.Elements(w + "pPr") _
                       .Elements(w + "pStyle") _
                       .Attributes(w + "val") _
                       .FirstOrDefault())
        If (paraStyle Is Nothing) Then
            Return defaultStyle
        Else
            Return paraStyle
        End If
    End Function
 
    Public Function GetComment(ByVal commentsDoc As XDocument, ByVal p As XElement) As String
        Dim w As XNamespace = _
            "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
 
        Dim id = _
            CStr(p.Elements(w + "commentRangeStart") _
                .First() _
                .Attribute(w + "id"))
 
        Dim commentNode = commentsDoc.Root() _
            .Elements(w + "comment") _
            .Where(Function(c) CStr(c.Attribute(w + "id")) = id) _
            .First()
 
        Dim comment = commentNode _
            .Elements(w + "p") _
            .StringConcatenate(Function(node) node _
                               .Descendants(w + "t") _
                               .Select(Function(t) CStr(t)) _
                               .StringConcatenate() & "\n")
 
        Return comment
    End Function
 
    Sub Main()
        Dim w As XNamespace = _
            "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
        Dim filename As String = "SampleDoc.docx"
        Using wordDoc As WordprocessingDocument = _
            WordprocessingDocument.Open(filename, True)
            Dim mainPart As MainDocumentPart = _
                wordDoc.MainDocumentPart
            Dim styleDefinitionPart As StyleDefinitionsPart = _
                mainPart.StyleDefinitionsPart
            Dim commentsPart As WordprocessingCommentsPart = _
                mainPart.WordprocessingCommentsPart
            Dim mainPartDoc As XDocument = LoadXDocument(mainPart)
            Dim styleDoc As XDocument = LoadXDocument(styleDefinitionPart)
            Dim commentsDoc As XDocument = LoadXDocument(commentsPart)
 
            Dim defaultStyle As String = _
                CStr( _
                        ( _
                            From style In styleDoc.Root _
                                .Elements(w + "style") _
                            Where ( _
                                CStr(style.Attribute(w + "type")) = "paragraph" And _
                                CStr(style.Attribute(w + "default")) = "1") _
                        ) _
                        .First() _
                        .Attribute(w + "styleId") _
                    )
 
            Dim paragraphs = _
                mainPartDoc.Root _
                    .Element(w + "body") _
                    .Descendants(w + "p") _
                    .Select(Function(p) _
                        New With { _
                            .ParagraphNode = p, _
                            .Style = GetParagraphStyle(p, defaultStyle) _
                        } _
                    )
 
            Dim r As XName = w + "r"
            Dim ins As XName = w + "ins"
 
            Dim paragraphsWithText = _
                paragraphs.Select(Function(p) _
                    New With { _
                        .ParagraphNode = p.ParagraphNode, _
                        .Style = p.Style, _
                        .Text = p.ParagraphNode _
                            .Elements() _
                            .Where(Function(z) z.Name = r Or z.Name = ins) _
                            .Descendants(w + "t") _
                            .StringConcatenate(Function(s) CStr(s)) _
                    } _
                )
 
            Dim groupedCodeParagraphs = paragraphsWithText _
                .GroupAdjacent(Function(p) p.Style) _
                .Where(Function(g) g.Key = "Code")
 
            Dim groupedCodeWithComments = _
                groupedCodeParagraphs.Select(Function(g) New With { _
                        .ParagraphGroup = g, _
                        .Comment = GetComment(commentsDoc, g.First().ParagraphNode) _
                    } _
                )
 
            For Each group In groupedCodeWithComments
                Console.WriteLine("Code Block")
                Console.WriteLine("==========")
                For Each paragraph In group.ParagraphGroup
                    Console.WriteLine(paragraph.Text)
                Next
                Console.WriteLine()
                Console.WriteLine("Meta Data")
                Console.WriteLine("=========")
                Console.WriteLine(group.Comment)
                Console.WriteLine()
            Next
        End Using
    End Sub
End Module
 

[Table of Contents] [Next Topic] [Blog Map]

Attachment: ParseWordML.cs
Leave a Comment
  • Please add 3 and 6 and type the answer here:
  • Post