Thursday, 23 October 2014

CSV to XML with a Quick and Dirty XSLT

Issue

A csv file has to be converted into XML

Resolution

The following XSLT uses a simple method of tokenization to generate the xml from plain seperated text, the separator being defined by the parameter 'seperator'. The example below uses a tab character.

Other parameters allow the definition of whether a header row is included (header-row), plus the customised naming of the various elements that generate the table, row and cell structure.

The transformation is XSLT2 and can be invoked by use of saxon using the following command line, where thisXSLT.xsl is the code below:

java -jar saxon.jar -it:main -xsl:thisXSLT.xsl -o:result.xml "csvFile=myfile.csv"

XSLT

<xsl:stylesheet 
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform" 
  xmlns:fn="http://www.w3.org/2005/xpath-functions" 
  xmlns:local="http://www.griffmonster.org" 
  xmlns:xs="http://www.w3.org/2001/XMLSchema" 
 
  version="2.0"
  exclude-result-prefixes="xsl xs fn local">
 <xsl:output indent="yes" encoding="UTF-8" method="xml"/>
 
 <!--
 
 a more complex routine is available at http://rosettacode.org/wiki/Csv-to-xml.xslt
 
 -->

 <xsl:param name="csvFile" as="xs:string" />
 <xsl:param name="header-row" as="xs:string" select="'true'" />
 <xsl:param name="seperator" as="xs:string"  select="'&#9;'"/>
 <xsl:param name="tableName" as="xs:string"  select="'legislation'"/>
 <xsl:param name="rowName" as="xs:string"  select="'item'"/>
 <xsl:param name="cellName" as="xs:string"  select="'data'"/>
 
 <xsl:template match="/" name="main">
  <xsl:copy-of select="local:csv-to-xml($csvFile)" />
 </xsl:template>

 <!-- if this function is available from xslt 3 then use it otherwise use the makeshift expression  -->
 <xsl:function name="local:unparsed-text-lines" as="xs:string+">
  <xsl:param name="href" as="xs:string" />
  <xsl:sequence use-when="function-available('unparsed-text-lines')" 
    select="fn:unparsed-text-lines($href)" />
  <xsl:sequence use-when="not(function-available('unparsed-text-lines'))" 
    select="tokenize(unparsed-text($href), '\r\n|\r|\n')[not(position()=last() and .='')]" />
 </xsl:function>

 <xsl:function name="local:csv-to-xml" as="node()+">
  <xsl:param name="href" as="xs:string" />
  <xsl:variable name="header-row" as="xs:string*" 
    select="if ($header-row != '') then 
       tokenize(local:unparsed-text-lines($href)[1], $seperator) 
      else ()"/>
  <xsl:element name="{$tableName}">
   <xsl:for-each select="local:unparsed-text-lines($href)">
    <xsl:choose>
     <xsl:when test="position() = 1 and exists($header-row)">
     </xsl:when>
     <xsl:otherwise>
      <xsl:element name="{$rowName}">
       <xsl:variable name="tokens"  as="xs:string+" select="tokenize(., $seperator)"/>
       <xsl:for-each select="$tokens">
        <xsl:variable name="position" as="xs:integer" 
          select="position()"/>
        <xsl:variable name="celltitle" as="xs:string?" 
          select="if (exists($header-row)) then 
             $header-row[$position]
            else ()"/>
        <xsl:element name="{$cellName}">
         <xsl:if test="exists($header-row)">
          <xsl:attribute name="title" select="$celltitle"/>
         </xsl:if>
         <xsl:value-of select="."/>
        </xsl:element>
       </xsl:for-each>
      </xsl:element>
     </xsl:otherwise>
    </xsl:choose>
    
   </xsl:for-each>
  </xsl:element>
 </xsl:function>
</xsl:stylesheet>

No comments:

Post a Comment