Welcome to MSDN Blogs Sign in | Join | Help
Complete Listing of ParseWordML

[Table of Contents]  [Next Topic]

The following code is attached to this page.

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using System.IO;

using System.Xml;

using System.Xml.Linq;

using Microsoft.Office.DocumentFormat.OpenXml.Packaging;

 

public class GroupOfAdjacent<TSource, TKey> :

    IEnumerable<TSource>, IGrouping<TKey, TSource>

{

    public TKey Key { get; set; }

    private List<TSource> GroupList { get; set; }

 

    System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()

    {

        return ((System.Collections.Generic.IEnumerable<TSource>)this).GetEnumerator();

    }

 

    System.Collections.Generic.IEnumerator<TSource>

        System.Collections.Generic.IEnumerable<TSource>.GetEnumerator()

    {

        foreach (var s in GroupList)

            yield return s;

    }

 

    public GroupOfAdjacent(List<TSource> source, TKey key)

    {

        GroupList = source;

        Key = key;

    }

}

 

public static class LocalExtensions

{

    public static string GetPath(this XElement el)

    {

        return

            el

            .AncestorsAndSelf()

            .Aggregate("", (seed, i) => i.Name.LocalName + "/" + seed);

    }

 

    public static string StringConcatenate(

        this IEnumerable<string> source)

    {

        return source.Aggregate(

            new StringBuilder(),

            (s, i) => s.Append(i),

            s => s.ToString());

    }

 

    public static string StringConcatenate<T>(

        this IEnumerable<T> source,

        Func<T, string> projectionFunc)

    {

        return source.Aggregate(

            new StringBuilder(),

            (s, i) => s.Append(projectionFunc(i)),

            s => s.ToString());

    }

 

    public static IEnumerable<IGrouping<TKey, TSource>> GroupAdjacent<TSource, TKey>(

        this IEnumerable<TSource> source,

        Func<TSource, TKey> keySelector)

    {

        TKey last = default(TKey);

        bool haveLast = false;

        List<TSource> list = new List<TSource>();

 

        foreach (TSource s in source)

        {

            TKey k = keySelector(s);

            if (haveLast)

            {

                if (!k.Equals(last))

                {

                    yield return new GroupOfAdjacent<TSource, TKey>(list, last);

                    list = new List<TSource>();

                    list.Add(s);

                    last = k;

                }

                else

                {

                    list.Add(s);

                    last = k;

                }

            }

            else

            {

                list.Add(s);

                last = k;

                haveLast = true;

            }

        }

        if (haveLast)

            yield return new GroupOfAdjacent<TSource, TKey>(list, last);

    }

}

 

class Program

{

    readonly static XNamespace w =

      "http://schemas.openxmlformats.org/wordprocessingml/2006/main";

 

    public static XDocument LoadXDocument(OpenXmlPart part)

    {

        XDocument xdoc;

        using (StreamReader streamReader = new StreamReader(part.GetStream()))

            xdoc = XDocument.Load(XmlReader.Create(streamReader));

        return xdoc;

    }

 

    public static string GetParagraphStyle(XElement para)

    {

        return (string)para.Elements(w + "pPr")

                           .Elements(w + "pStyle")

                           .Attributes(w + "val")

                           .FirstOrDefault();

    }

 

    public static string GetCommentText(XDocument commentsDoc, string id)

    {

        var commentNode =

            commentsDoc.Root

                       .Elements(w + "comment")

                       .Where(c => (string)c.Attribute(w + "id") == id)

                       .First();

 

        var comment =

            commentNode.Elements(w + "p")

                       .StringConcatenate(node =>

                           node.Descendants(w + "t")

                               .Select(t => (string)t)

                               .StringConcatenate()

                           + "\n");

        return comment;

    }

 

    static void Main(string[] args)

    {

        const string filename = "SampleDoc.docx";

 

        using (WordprocessingDocument wordDoc =

            WordprocessingDocument.Open(filename, true))

        {

            MainDocumentPart mainPart = wordDoc.MainDocumentPart;

            StyleDefinitionsPart stylePart = mainPart.StyleDefinitionsPart;

            CommentsPart commentsPart = mainPart.CommentsPart;

            XDocument mainPartDoc = LoadXDocument(mainPart);

            XDocument styleDoc = LoadXDocument(stylePart);

            XDocument commentsDoc = LoadXDocument(commentsPart);

 

            string defaultStyle =

                (string)styleDoc.Root

                                .Elements(w + "style")

                                .Where(style =>

                                    (string)style.Attribute(w + "type") == "paragraph" &&

                                    (string)style.Attribute(w + "default") == "1")

                                .First()

                                .Attribute(w + "styleId");

 

            var paragraphs =

                mainPartDoc.Root

                           .Element(w + "body")

                           .Descendants(w + "p")

                .Select(p =>

                {

                    string style = GetParagraphStyle(p);

                    string styleName = style == null ? defaultStyle : style;

                    return new

                    {

                        ParagraphNode = p,

                        Style = styleName

                    };

                }

                );

 

            XName r = w + "r";

            XName ins = w + "ins";

 

            var paragraphsWithText =

                paragraphs.Select(p =>

                    new

                    {

                        ParagraphNode = p.ParagraphNode,

                        Style = p.Style,

                        Text = p.ParagraphNode

                                .Elements()

                                .Where(z => z.Name == r || z.Name == ins)

                                .Descendants(w + "t")

                                .StringConcatenate(s => (string)s)

                    }

                );

 

            var groupedCodeParagraphs =

                paragraphsWithText.GroupAdjacent(p => p.Style)

                                  .Where(g => g.Key == "Code");

 

            var groupedCodeWithComments =

                groupedCodeParagraphs.Select(g =>

                    {

                        var id =

                            (string)g.Select(p => p.ParagraphNode)

                                     .Elements(w + "commentRangeStart")

                                     .First()

                                     .Attribute(w + "id");

                        return new

                        {

                            ParagraphGroup = g,

                            Comment = GetCommentText(commentsDoc, id)

                        };

                    }

                );

 

            foreach (var group in groupedCodeWithComments)

            {

                Console.WriteLine("Code Block");

                Console.WriteLine("==========");

                foreach (var paragraph in group.ParagraphGroup)

                    Console.WriteLine(paragraph.Text);

                Console.WriteLine();

                Console.WriteLine("Meta Data");

                Console.WriteLine("=========");

                Console.WriteLine(group.Comment);

                Console.WriteLine();

            }

        }

    }

}

 

[Table of Contents]  [Next Topic]

 

Posted: Wednesday, October 04, 2006 5:32 AM by EricWhite
Filed under:

Attachment(s): ParseWordML.cs
Leave a Comment

(required) 

(required) 

(optional)

(required) 

Comment Notification

If you would like to receive an email when updates are made to this post, please register here

Subscribe to this post's comments using RSS

Page view tracker