Thursday, 23 October 2014

CSV to XML with a Quick and Dirty XSLT


A csv file has to be converted into XML


The following XSLT uses a simple method of tokenization to generate the xml from plain seperated text, the separator being defined by the parameter 'seperator'. The example below uses a tab character.

Other parameters allow the definition of whether a header row is included (header-row), plus the customised naming of the various elements that generate the table, row and cell structure.

The transformation is XSLT2 and can be invoked by use of saxon using the following command line, where thisXSLT.xsl is the code below:

java -jar saxon.jar -it:main -xsl:thisXSLT.xsl -o:result.xml "csvFile=myfile.csv"


  exclude-result-prefixes="xsl xs fn local">
 <xsl:output indent="yes" encoding="UTF-8" method="xml"/>
 a more complex routine is available at

 <xsl:param name="csvFile" as="xs:string" />
 <xsl:param name="header-row" as="xs:string" select="'true'" />
 <xsl:param name="seperator" as="xs:string"  select="'&#9;'"/>
 <xsl:param name="tableName" as="xs:string"  select="'legislation'"/>
 <xsl:param name="rowName" as="xs:string"  select="'item'"/>
 <xsl:param name="cellName" as="xs:string"  select="'data'"/>
 <xsl:template match="/" name="main">
  <xsl:copy-of select="local:csv-to-xml($csvFile)" />

 <!-- if this function is available from xslt 3 then use it otherwise use the makeshift expression  -->
 <xsl:function name="local:unparsed-text-lines" as="xs:string+">
  <xsl:param name="href" as="xs:string" />
  <xsl:sequence use-when="function-available('unparsed-text-lines')" 
    select="fn:unparsed-text-lines($href)" />
  <xsl:sequence use-when="not(function-available('unparsed-text-lines'))" 
    select="tokenize(unparsed-text($href), '\r\n|\r|\n')[not(position()=last() and .='')]" />

 <xsl:function name="local:csv-to-xml" as="node()+">
  <xsl:param name="href" as="xs:string" />
  <xsl:variable name="header-row" as="xs:string*" 
    select="if ($header-row != '') then 
       tokenize(local:unparsed-text-lines($href)[1], $seperator) 
      else ()"/>
  <xsl:element name="{$tableName}">
   <xsl:for-each select="local:unparsed-text-lines($href)">
     <xsl:when test="position() = 1 and exists($header-row)">
      <xsl:element name="{$rowName}">
       <xsl:variable name="tokens"  as="xs:string+" select="tokenize(., $seperator)"/>
       <xsl:for-each select="$tokens">
        <xsl:variable name="position" as="xs:integer" 
        <xsl:variable name="celltitle" as="xs:string?" 
          select="if (exists($header-row)) then 
            else ()"/>
        <xsl:element name="{$cellName}">
         <xsl:if test="exists($header-row)">
          <xsl:attribute name="title" select="$celltitle"/>
         <xsl:value-of select="."/>

No comments:

Post a Comment