Marco Dorantes' WebLog

"Computer science is no more about computers than astronomy is about telescopes" -Edsger W. Dijkstra

Microsoft Word 2003 – one XML InfoSet, a Microsoft Word 2003 rendition

Microsoft Word 2003 – one XML InfoSet, a Microsoft Word 2003 rendition

  • Comments 1

So do you want a Microsoft Word 2003 rendition instead of the HTML rendition of my previous post? Why not? A mind can be downloaded to a variety of formats, HTML, Word, PDF, PS, RTF, flat, CSV, Excel, Visio, etc.

 

Accordingly to the Office 2003 XML Reference Schemas, a Microsoft Word 2003 XML document has the following basic top elements:

 

<?xml version="1.0" standalone="yes"?>

<?mso-application progid="Word.Document"?>

<w:wordDocument

  xmlns:w="http://schemas.microsoft.com/office/word/2003/wordml"

  xmlns:v="urn:schemas-microsoft-com:vml"

  xmlns:w10="urn:schemas-microsoft-com:office:word"

  xmlns:sl="http://schemas.microsoft.com/schemaLibrary/2003/core"

  xmlns:aml="http://schemas.microsoft.com/aml/2001/core"

  xmlns:wx="http://schemas.microsoft.com/office/word/2003/auxHint"

  xmlns:o="urn:schemas-microsoft-com:office:office"

  xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882"

  xmlns:ns0="http://marcod.org/Doc1"

  w:macrosPresent="no"

  w:embeddedObjPresent="no"

  w:ocxPresent="no"

  xml:space="preserve">

 

  <o:DocumentProperties/>

  <w:fonts/>

  <w:styles/>

  <w:docPr/>

  <w:body/>

</w:wordDocument>

 

So our basic Microsoft Word 2003 rendition could be like the following XSLT stylesheet:

 

<?xml version="1.0" encoding="UTF-8" ?>

<x:stylesheet version="1.0" xmlns:x="http://www.w3.org/1999/XSL/Transform" xmlns:t="http://marcod.org/Doc1">

 

<x:output method="xml" version="1.0"/>

 

<x:template match="/">

 

<x:processing-instruction name="mso-application">

<x:text>progid="Word.Document"</x:text>

</x:processing-instruction>

 

<w:wordDocument

  xmlns:w="http://schemas.microsoft.com/office/word/2003/wordml"

  xmlns:v="urn:schemas-microsoft-com:vml"

  xmlns:w10="urn:schemas-microsoft-com:office:word"

  xmlns:sl="http://schemas.microsoft.com/schemaLibrary/2003/core"

  xmlns:aml="http://schemas.microsoft.com/aml/2001/core"

  xmlns:wx="http://schemas.microsoft.com/office/word/2003/auxHint"

  xmlns:o="urn:schemas-microsoft-com:office:office"

  xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882"

  xmlns:ns0="http://marcod.org/Doc1"

  w:macrosPresent="no"

  w:embeddedObjPresent="no"

  w:ocxPresent="no"

  xml:space="preserve">

 

  <o:DocumentProperties/>

  <w:fonts>

    <w:defaultFonts w:ascii="Times New Roman" w:fareast="Times New Roman" w:h-ansi="Times New Roman" w:cs="Times New Roman"/>

  </w:fonts>

  <w:styles>

    <w:versionOfBuiltInStylenames w:val="4"/>

    <w:latentStyles w:defLockedState="off" w:latentStyleCount="156"/>

    <w:style w:type="paragraph" w:default="on" w:styleId="Normal">

      <w:name w:val="Normal"/>

      <w:rPr>

        <wx:font wx:val="Times New Roman"/>

        <w:sz w:val="24"/>

        <w:sz-cs w:val="24"/>

        <w:lang w:val="EN-US" w:fareast="EN-US" w:bidi="AR-SA"/>

      </w:rPr>

    </w:style>

    <w:style w:type="paragraph" w:styleId="Heading1">

      <w:name w:val="heading 1"/>

      <wx:uiName wx:val="Heading 1"/>

      <w:basedOn w:val="Normal"/>

      <w:next w:val="Normal"/>

      <w:rsid w:val="00B01077"/>

      <w:pPr>

        <w:pStyle w:val="Heading1"/>

        <w:keepNext/>

        <w:spacing w:before="240" w:after="60"/>

        <w:outlineLvl w:val="0"/>

      </w:pPr>

      <w:rPr>

        <w:rFonts w:ascii="Arial" w:h-ansi="Arial" w:cs="Arial"/>

        <wx:font wx:val="Arial"/>

        <w:b/>

        <w:b-cs/>

        <w:kern w:val="32"/>

        <w:sz w:val="32"/>

        <w:sz-cs w:val="32"/>

      </w:rPr>

    </w:style>

    <w:style w:type="paragraph" w:styleId="Heading2">

      <w:name w:val="heading 2"/>

      <wx:uiName wx:val="Heading 2"/>

      <w:basedOn w:val="Normal"/>

      <w:next w:val="Normal"/>

      <w:autoRedefine/>

      <w:rsid w:val="00B01077"/>

      <w:pPr>

        <w:pStyle w:val="Heading2"/>

        <w:keepNext/>

        <w:spacing w:before="240" w:after="60"/>

        <w:outlineLvl w:val="1"/>

      </w:pPr>

      <w:rPr>

        <w:rFonts w:ascii="Arial" w:h-ansi="Arial" w:cs="Arial"/>

        <wx:font wx:val="Arial"/>

        <w:b/>

        <w:b-cs/>

        <w:i-cs/>

        <w:sz w:val="28"/>

        <w:sz-cs w:val="28"/>

      </w:rPr>

    </w:style>

    <w:style w:type="character" w:default="on" w:styleId="DefaultParagraphFont">

      <w:name w:val="Default Paragraph Font"/>

      <w:semiHidden/>

    </w:style>

    <w:style w:type="table" w:default="on" w:styleId="TableNormal">

      <w:name w:val="Normal Table"/>

      <wx:uiName wx:val="Table Normal"/>

      <w:semiHidden/>

      <w:rPr>

        <wx:font wx:val="Times New Roman"/>

      </w:rPr>

      <w:tblPr>

        <w:tblInd w:w="0" w:type="dxa"/>

        <w:tblCellMar>

          <w:top w:w="0" w:type="dxa"/>

          <w:left w:w="108" w:type="dxa"/>

          <w:bottom w:w="0" w:type="dxa"/>

          <w:right w:w="108" w:type="dxa"/>

        </w:tblCellMar>

      </w:tblPr>

    </w:style>

    <w:style w:type="list" w:default="on" w:styleId="NoList">

      <w:name w:val="No List"/>

      <w:semiHidden/>

    </w:style>

  </w:styles>

  <w:docPr/>

  <w:body>

    <wx:sect>

      <ns0:Doc1>

        <w:p>

          <w:pPr>

            <w:pStyle w:val="Heading1"/>

          </w:pPr>

          <ns0:title>

            <w:r>

              <w:t><x:value-of select="/t:Doc1/t:title" /></w:t>

            </w:r>

          </ns0:title>

        </w:p>

        <x:apply-templates select="/t:Doc1/t:section" />

      </ns0:Doc1>

    </wx:sect>

  </w:body>

 

</w:wordDocument>

 

</x:template>

 

<x:template match="t:section">

        <ns0:section xmlns:ns0="http://marcod.org/Doc1">

          <w:p xmlns:w="http://schemas.microsoft.com/office/word/2003/wordml">

            <w:pPr>

              <w:pStyle w:val="Heading2"/>

            </w:pPr>

            <ns0:subtitle>

              <w:r>

                <w:t><x:value-of select="t:subtitle" /></w:t>

              </w:r>

            </ns0:subtitle>

          </w:p>

          <x:apply-templates select="t:p" />

        </ns0:section>

</x:template>

 

<x:template match="t:p">

          <ns0:p xmlns:ns0="http://marcod.org/Doc1">

            <w:p xmlns:w="http://schemas.microsoft.com/office/word/2003/wordml">

              <w:r>

                <w:t><x:value-of select="." /></w:t>

              </w:r>

            </w:p>

            <w:p xmlns:w="http://schemas.microsoft.com/office/word/2003/wordml"/>

          </ns0:p>

</x:template>

 

</x:stylesheet>

 

So far, so good but somehow uncomfortable with this particular rendition to a particular XML schema, a more general solution perhaps involves some more programming (great!) with MSXML parser or System.Xml namespace or XSLT meta-programming, will see.

 

Leave a Comment
  • Please add 4 and 2 and type the answer here:
  • Post