XSLT stylesheet to convert unescaped HTML to XML
This was used to extract the html from a blogger feed which was used across http://griffmonster-walks.blogspot.co.uk/feeds/posts/full
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:functx="http://www.functx.com"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
>
<xsl:output omit-xml-declaration="yes" indent="yes" />
<xsl:variable name="htmlEntities" as="element()*">
<entity name="&nbsp;" char=" "/> <!-- no-break space -->
<entity name="&iexcl;" char="¡"/> <!-- inverted exclamation mark -->
<entity name="&cent;" char="¢"/> <!-- cent sign -->
<entity name="&pound;" char="£"/> <!-- pound sterling sign -->
<entity name="&curren;" char="¤"/> <!-- general currency sign -->
<entity name="&yen;" char="¥"/> <!-- yen sign -->
<entity name="&brvbar;" char="¦"/> <!-- broken (vertical) bar -->
<entity name="&sect;" char="§"/> <!-- section sign -->
<entity name="&uml;" char="¨"/> <!-- umlaut (dieresis) -->
<entity name="&copy;" char="©"/> <!-- copyright sign -->
<entity name="&ordf;" char="ª"/> <!-- ordinal indicator, feminine -->
<entity name="&laquo;" char="«"/> <!-- angle quotation mark, left -->
<entity name="&not;" char="¬"/> <!-- not sign -->
<entity name="&shy;" char="­"/> <!-- soft hyphen -->
<entity name="&reg;" char="®"/> <!-- registered sign -->
<entity name="&macr;" char="¯"/> <!-- macron -->
<entity name="&deg;" char="°"/> <!-- degree sign -->
<entity name="&plusmn;" char="±"/> <!-- plus-or-minus sign -->
<entity name="&sup2;" char="²"/> <!-- superscript two -->
<entity name="&sup3;" char="³"/> <!-- superscript three -->
<entity name="&acute;" char="´"/> <!-- acute accent -->
<entity name="&micro;" char="µ"/> <!-- micro sign -->
<entity name="&para;" char="¶"/> <!-- pilcrow (paragraph sign) -->
<entity name="&middot;" char="·"/> <!-- middle dot -->
<entity name="&cedil;" char="¸"/> <!-- cedilla -->
<entity name="&sup1;" char="¹"/> <!-- superscript one -->
<entity name="&ordm;" char="º"/> <!-- ordinal indicator, masculine -->
<entity name="&raquo;" char="»"/> <!-- angle quotation mark, right -->
<entity name="&frac14;" char="¼"/> <!-- fraction one-quarter -->
<entity name="&frac12;" char="½"/> <!-- fraction one-half -->
<entity name="&frac34;" char="¾"/> <!-- fraction three-quarters -->
<entity name="&iquest;" char="¿"/> <!-- inverted question mark -->
<entity name="&Agrave;" char="À"/> <!-- capital A, grave accent -->
<entity name="&Aacute;" char="Á"/> <!-- capital A, acute accent -->
<entity name="&Acirc;" char="Â"/> <!-- capital A, circumflex accent -->
<entity name="&Atilde;" char="Ã"/> <!-- capital A, tilde -->
<entity name="&Auml;" char="Ä"/> <!-- capital A, dieresis or umlaut mark -->
<entity name="&Aring;" char="Å"/> <!-- capital A, ring -->
<entity name="&AElig;" char="Æ"/> <!-- capital AE diphthong (ligature) -->
<entity name="&Ccedil;" char="Ç"/> <!-- capital C, cedilla -->
<entity name="&Egrave;" char="È"/> <!-- capital E, grave accent -->
<entity name="&Eacute;" char="É"/> <!-- capital E, acute accent -->
<entity name="&Ecirc;" char="Ê"/> <!-- capital E, circumflex accent -->
<entity name="&Euml;" char="Ë"/> <!-- capital E, dieresis or umlaut mark -->
<entity name="&Igrave;" char="Ì"/> <!-- capital I, grave accent -->
<entity name="&Iacute;" char="Í"/> <!-- capital I, acute accent -->
<entity name="&Icirc;" char="Î"/> <!-- capital I, circumflex accent -->
<entity name="&Iuml;" char="Ï"/> <!-- capital I, dieresis or umlaut mark -->
<entity name="&ETH;" char="Ð"/> <!-- capital Eth, Icelandic -->
<entity name="&Ntilde;" char="Ñ"/> <!-- capital N, tilde -->
<entity name="&Ograve;" char="Ò"/> <!-- capital O, grave accent -->
<entity name="&Oacute;" char="Ó"/> <!-- capital O, acute accent -->
<entity name="&Ocirc;" char="Ô"/> <!-- capital O, circumflex accent -->
<entity name="&Otilde;" char="Õ"/> <!-- capital O, tilde -->
<entity name="&Ouml;" char="Ö"/> <!-- capital O, dieresis or umlaut mark -->
<entity name="&times;" char="×"/> <!-- multiply sign -->
<entity name="&Oslash;" char="Ø"/> <!-- capital O, slash -->
<entity name="&Ugrave;" char="Ù"/> <!-- capital U, grave accent -->
<entity name="&Uacute;" char="Ú"/> <!-- capital U, acute accent -->
<entity name="&Ucirc;" char="Û"/> <!-- capital U, circumflex accent -->
<entity name="&Uuml;" char="Ü"/> <!-- capital U, dieresis or umlaut mark -->
<entity name="&Yacute;" char="Ý"/> <!-- capital Y, acute accent -->
<entity name="&THORN;" char="Þ"/> <!-- capital THORN, Icelandic -->
<entity name="&szlig;" char="ß"/> <!-- small sharp s, German (sz ligature) -->
<entity name="&agrave;" char="à"/> <!-- small a, grave accent -->
<entity name="&aacute;" char="á"/> <!-- small a, acute accent -->
<entity name="&acirc;" char="â"/> <!-- small a, circumflex accent -->
<entity name="&atilde;" char="ã"/> <!-- small a, tilde -->
<entity name="&auml;" char="ä"/> <!-- small a, dieresis or umlaut mark -->
<entity name="&aring;" char="å"/> <!-- small a, ring -->
<entity name="&aelig;" char="æ"/> <!-- small ae diphthong (ligature) -->
<entity name="&ccedil;" char="ç"/> <!-- small c, cedilla -->
<entity name="&egrave;" char="è"/> <!-- small e, grave accent -->
<entity name="&eacute;" char="é"/> <!-- small e, acute accent -->
<entity name="&ecirc;" char="ê"/> <!-- small e, circumflex accent -->
<entity name="&euml;" char="ë"/> <!-- small e, dieresis or umlaut mark -->
<entity name="&igrave;" char="ì"/> <!-- small i, grave accent -->
<entity name="&iacute;" char="í"/> <!-- small i, acute accent -->
<entity name="&icirc;" char="î"/> <!-- small i, circumflex accent -->
<entity name="&iuml;" char="ï"/> <!-- small i, dieresis or umlaut mark -->
<entity name="&eth;" char="ð"/> <!-- small eth, Icelandic -->
<entity name="&ntilde;" char="ñ"/> <!-- small n, tilde -->
<entity name="&ograve;" char="ò"/> <!-- small o, grave accent -->
<entity name="&oacute;" char="ó"/> <!-- small o, acute accent -->
<entity name="&ocirc;" char="ô"/> <!-- small o, circumflex accent -->
<entity name="&otilde;" char="õ"/> <!-- small o, tilde -->
<entity name="&ouml;" char="ö"/> <!-- small o, dieresis or umlaut mark -->
<entity name="&divide;" char="÷"/> <!-- divide sign -->
<entity name="&oslash;" char="ø"/> <!-- small o, slash -->
<entity name="&ugrave;" char="ù"/> <!-- small u, grave accent -->
<entity name="&uacute;" char="ú"/> <!-- small u, acute accent -->
<entity name="&ucirc;" char="û"/> <!-- small u, circumflex accent -->
<entity name="&uuml;" char="ü"/> <!-- small u, dieresis or umlaut mark -->
<entity name="&yacute;" char="ý"/> <!-- small y, acute accent -->
<entity name="&thorn;" char="þ"/> <!-- small thorn, Icelandic -->
<entity name="&yuml;" char="ÿ"/> <!-- small y, dieresis or umlaut mark -->
<!-- Latin Extended-B -->
<entity name="&fnof;" char="ƒ"/> <!-- latin small f with hook, =function, =florin, u+0192 ISOtech -->
<!-- Greek -->
<entity name="&Alpha;" char="Α"/> <!-- greek capital letter alpha, u+0391 -->
<entity name="&Beta;" char="Β"/> <!-- greek capital letter beta, u+0392 -->
<entity name="&Gamma;" char="Γ"/> <!-- greek capital letter gamma, u+0393 ISOgrk3 -->
<entity name="&Delta;" char="Δ"/> <!-- greek capital letter delta, u+0394 ISOgrk3 -->
<entity name="&Epsilon;" char="Ε"/> <!-- greek capital letter epsilon, u+0395 -->
<entity name="&Zeta;" char="Ζ"/> <!-- greek capital letter zeta, u+0396 -->
<entity name="&Eta;" char="Η"/> <!-- greek capital letter eta, u+0397 -->
<entity name="&Theta;" char="Θ"/> <!-- greek capital letter theta, u+0398 ISOgrk3 -->
<entity name="&Iota;" char="Ι"/> <!-- greek capital letter iota, u+0399 -->
<entity name="&Kappa;" char="Κ"/> <!-- greek capital letter kappa, u+039A -->
<entity name="&Lambda;" char="Λ"/> <!-- greek capital letter lambda, u+039B ISOgrk3 -->
<entity name="&Mu;" char="Μ"/> <!-- greek capital letter mu, u+039C -->
<entity name="&Nu;" char="Ν"/> <!-- greek capital letter nu, u+039D -->
<entity name="&Xi;" char="Ξ"/> <!-- greek capital letter xi, u+039E ISOgrk3 -->
<entity name="&Omicron;" char="Ο"/> <!-- greek capital letter omicron, u+039F -->
<entity name="&Pi;" char="Π"/> <!-- greek capital letter pi, u+03A0 ISOgrk3 -->
<entity name="&Rho;" char="Ρ"/> <!-- greek capital letter rho, u+03A1 -->
<!-- (there is no Sigmaf, and no u+03A2 character either) -->
<entity name="&Sigma;" char="Σ"/> <!-- greek capital letter sigma, u+03A3 ISOgrk3 -->
<entity name="&Tau;" char="Τ"/> <!-- greek capital letter tau, u+03A4 -->
<entity name="&Upsilon;" char="Υ"/> <!-- greek capital letter upsilon, u+03A5 ISOgrk3 -->
<entity name="&Phi;" char="Φ"/> <!-- greek capital letter phi, u+03A6 ISOgrk3 -->
<entity name="&Chi;" char="Χ"/> <!-- greek capital letter chi, u+03A7 -->
<entity name="&Psi;" char="Ψ"/> <!-- greek capital letter psi, u+03A8 ISOgrk3 -->
<entity name="&Omega;" char="Ω"/> <!-- greek capital letter omega, u+03A9 ISOgrk3 -->
<entity name="&alpha;" char="α"/> <!-- greek small letter alpha, u+03B1 ISOgrk3 -->
<entity name="&beta;" char="β"/> <!-- greek small letter beta, u+03B2 ISOgrk3 -->
<entity name="&gamma;" char="γ"/> <!-- greek small letter gamma, u+03B3 ISOgrk3 -->
<entity name="&delta;" char="δ"/> <!-- greek small letter delta, u+03B4 ISOgrk3 -->
<entity name="&epsilon;" char="ε"/> <!-- greek small letter epsilon, u+03B5 ISOgrk3 -->
<entity name="&zeta;" char="ζ"/> <!-- greek small letter zeta, u+03B6 ISOgrk3 -->
<entity name="&eta;" char="η"/> <!-- greek small letter eta, u+03B7 ISOgrk3 -->
<entity name="&theta;" char="θ"/> <!-- greek small letter theta, u+03B8 ISOgrk3 -->
<entity name="&iota;" char="ι"/> <!-- greek small letter iota, u+03B9 ISOgrk3 -->
<entity name="&kappa;" char="κ"/> <!-- greek small letter kappa, u+03BA ISOgrk3 -->
<entity name="&lambda;" char="λ"/> <!-- greek small letter lambda, u+03BB ISOgrk3 -->
<entity name="&mu;" char="μ"/> <!-- greek small letter mu, u+03BC ISOgrk3 -->
<entity name="&nu;" char="ν"/> <!-- greek small letter nu, u+03BD ISOgrk3 -->
<entity name="&xi;" char="ξ"/> <!-- greek small letter xi, u+03BE ISOgrk3 -->
<entity name="&omicron;" char="ο"/> <!-- greek small letter omicron, u+03BF NEW -->
<entity name="&pi;" char="π"/> <!-- greek small letter pi, u+03C0 ISOgrk3 -->
<entity name="&rho;" char="ρ"/> <!-- greek small letter rho, u+03C1 ISOgrk3 -->
<entity name="&sigmaf;" char="ς"/> <!-- greek small letter final sigma, u+03C2 ISOgrk3 -->
<entity name="&sigma;" char="σ"/> <!-- greek small letter sigma, u+03C3 ISOgrk3 -->
<entity name="&tau;" char="τ"/> <!-- greek small letter tau, u+03C4 ISOgrk3 -->
<entity name="&upsilon;" char="υ"/> <!-- greek small letter upsilon, u+03C5 ISOgrk3 -->
<entity name="&phi;" char="φ"/> <!-- greek small letter phi, u+03C6 ISOgrk3 -->
<entity name="&chi;" char="χ"/> <!-- greek small letter chi, u+03C7 ISOgrk3 -->
<entity name="&psi;" char="ψ"/> <!-- greek small letter psi, u+03C8 ISOgrk3 -->
<entity name="&omega;" char="ω"/> <!-- greek small letter omega, u+03C9 ISOgrk3 -->
<entity name="&thetasym;" char="ϑ"/> <!-- greek small letter theta symbol, u+03D1 NEW -->
<entity name="&upsih;" char="ϒ"/> <!-- greek upsilon with hook symbol, u+03D2 NEW -->
<entity name="&piv;" char="ϖ"/> <!-- greek pi symbol, u+03D6 ISOgrk3 -->
<!-- General Punctuation -->
<entity name="&bull;" char="•"/> <!-- bullet, =black small circle, u+2022 ISOpub -->
<!-- bullet is NOT the same as bullet operator, u+2219 -->
<entity name="&hellip;" char="…"/> <!-- horizontal ellipsis, =three dot leader, u+2026 ISOpub -->
<entity name="&prime;" char="′"/> <!-- prime, =minutes, =feet, u+2032 ISOtech -->
<entity name="&Prime;" char="″"/> <!-- double prime, =seconds, =inches, u+2033 ISOtech -->
<entity name="&oline;" char="‾"/> <!-- overline, =spacing overscore, u+203E NEW -->
<entity name="&frasl;" char="⁄"/> <!-- fraction slash, u+2044 NEW -->
<!-- Letterlike Symbols -->
<entity name="&weierp;" char="℘"/> <!-- script capital P, =power set, =Weierstrass p, u+2118 ISOamso -->
<entity name="&image;" char="ℑ"/> <!-- blackletter capital I, =imaginary part, u+2111 ISOamso -->
<entity name="&real;" char="ℜ"/> <!-- blackletter capital R, =real part symbol, u+211C ISOamso -->
<entity name="&trade;" char="™"/> <!-- trade mark sign, u+2122 ISOnum -->
<entity name="&alefsym;" char="ℵ"/> <!-- alef symbol, =first transfinite cardinal, u+2135 NEW -->
<!-- alef symbol is NOT the same as hebrew letter alef, u+05D0 although the same glyph
could be used to depict both characters -->
<!-- Arrows -->
<entity name="&larr;" char="←"/> <!-- leftwards arrow, u+2190 ISOnum -->
<entity name="&uarr;" char="↑"/> <!-- upwards arrow, u+2191 ISOnum-->
<entity name="&rarr;" char="→"/> <!-- rightwards arrow, u+2192 ISOnum -->
<entity name="&darr;" char="↓"/> <!-- downwards arrow, u+2193 ISOnum -->
<entity name="&harr;" char="↔"/> <!-- left right arrow, u+2194 ISOamsa -->
<entity name="&crarr;" char="↵"/> <!-- downwards arrow with corner leftwards, =carriage return, u+21B5 NEW -->
<entity name="&lArr;" char="⇐"/> <!-- leftwards double arrow, u+21D0 ISOtech -->
<!-- Unicode does not say that lArr is the same as the 'is implied by' arrow but also
does not have any other character for that function. So ? lArr can be used for
'is implied by' as ISOtech suggests -->
<entity name="&uArr;" char="⇑"/> <!-- upwards double arrow, u+21D1 ISOamsa -->
<entity name="&rArr;" char="⇒"/> <!-- rightwards double arrow, u+21D2 ISOtech -->
<!-- Unicode does not say this is the 'implies' character but does not have another
character with this function so ? rArr can be used for 'implies' as ISOtech suggests -->
<entity name="&dArr;" char="⇓"/> <!-- downwards double arrow, u+21D3 ISOamsa -->
<entity name="&hArr;" char="⇔"/> <!-- left right double arrow, u+21D4 ISOamsa -->
<!-- Mathematical Operators -->
<entity name="&forall;" char="∀"/> <!-- for all, u+2200 ISOtech -->
<entity name="&part;" char="∂"/> <!-- partial differential, u+2202 ISOtech -->
<entity name="&exist;" char="∃"/> <!-- there exists, u+2203 ISOtech -->
<entity name="&empty;" char="∅"/> <!-- empty set, =null set, =diameter, u+2205 ISOamso -->
<entity name="&nabla;" char="∇"/> <!-- nabla, =backward difference, u+2207 ISOtech -->
<entity name="&isin;" char="∈"/> <!-- element of, u+2208 ISOtech -->
<entity name="&notin;" char="∉"/> <!-- not an element of, u+2209 ISOtech -->
<entity name="&ni;" char="∋"/> <!-- contains as member, u+220B ISOtech -->
<!-- should there be a more memorable name than 'ni'? -->
<entity name="&prod;" char="∏"/> <!-- n-ary product, =product sign, u+220F ISOamsb -->
<!-- prod is NOT the same character as u+03A0 'greek capital letter pi' though the same
glyph might be used for both -->
<entity name="&sum;" char="∑"/> <!-- n-ary sumation, u+2211 ISOamsb -->
<!-- sum is NOT the same character as u+03A3 'greek capital letter sigma' though the same
glyph might be used for both -->
<entity name="&minus;" char="−"/> <!-- minus sign, u+2212 ISOtech -->
<entity name="&lowast;" char="∗"/> <!-- asterisk operator, u+2217 ISOtech -->
<entity name="&radic;" char="√"/> <!-- square root, =radical sign, u+221A ISOtech -->
<entity name="&prop;" char="∝"/> <!-- proportional to, u+221D ISOtech -->
<entity name="&infin;" char="∞"/> <!-- infinity, u+221E ISOtech -->
<entity name="&ang;" char="∠"/> <!-- angle, u+2220 ISOamso -->
<entity name="&and;" char="⊥"/> <!-- logical and, =wedge, u+2227 ISOtech -->
<entity name="&or;" char="⊦"/> <!-- logical or, =vee, u+2228 ISOtech -->
<entity name="&cap;" char="∩"/> <!-- intersection, =cap, u+2229 ISOtech -->
<entity name="&cup;" char="∪"/> <!-- union, =cup, u+222A ISOtech -->
<entity name="&int;" char="∫"/> <!-- integral, u+222B ISOtech -->
<entity name="&there4;" char="∴"/> <!-- therefore, u+2234 ISOtech -->
<entity name="&sim;" char="∼"/> <!-- tilde operator, =varies with, =similar to, u+223C ISOtech -->
<!-- tilde operator is NOT the same character as the tilde, u+007E, although the same
glyph might be used to represent both -->
<entity name="&cong;" char="≅"/> <!-- approximately equal to, u+2245 ISOtech -->
<entity name="&asymp;" char="≈"/> <!-- almost equal to, =asymptotic to, u+2248 ISOamsr -->
<entity name="&ne;" char="≠"/> <!-- not equal to, u+2260 ISOtech -->
<entity name="&equiv;" char="≡"/> <!-- identical to, u+2261 ISOtech -->
<entity name="&le;" char="≤"/> <!-- less-than or equal to, u+2264 ISOtech -->
<entity name="&ge;" char="≥"/> <!-- greater-than or equal to, u+2265 ISOtech -->
<entity name="&sub;" char="⊂"/> <!-- subset of, u+2282 ISOtech -->
<entity name="&sup;" char="⊃"/> <!-- superset of, u+2283 ISOtech -->
<!-- note that nsup, 'not a superset of, u+2283' is not covered by the Symbol font
encoding and is not included. Should it be, for symmetry? It is in ISOamsn -->
<entity name="&nsub;" char="⊄"/> <!-- not a subset of, u+2284 ISOamsn -->
<entity name="&sube;" char="⊆"/> <!-- subset of or equal to, u+2286 ISOtech -->
<entity name="&supe;" char="⊇"/> <!-- superset of or equal to, u+2287 ISOtech -->
<entity name="&oplus;" char="⊕"/> <!-- circled plus, =direct sum, u+2295 ISOamsb -->
<entity name="&otimes;" char="⊗"/> <!-- circled times, =vector product, u+2297 ISOamsb -->
<entity name="&perp;" char="⊥"/> <!-- up tack, =orthogonal to, =perpendicular, u+22A5 ISOtech -->
<entity name="&sdot;" char="⋅"/> <!-- dot operator, u+22C5 ISOamsb -->
<!-- dot operator is NOT the same character as u+00B7 middle dot -->
<!-- Miscellaneous Technical -->
<entity name="&lceil;" char="⌈"/> <!-- left ceiling, =apl upstile, u+2308, ISOamsc -->
<entity name="&rceil;" char="⌉"/> <!-- right ceiling, u+2309, ISOamsc -->
<entity name="&lfloor;" char="⌊"/> <!-- left floor, =apl downstile, u+230A, ISOamsc -->
<entity name="&rfloor;" char="⌋"/> <!-- right floor, u+230B, ISOamsc -->
<entity name="&lang;" char="〈"/> <!-- left-pointing angle bracket, =bra, u+2329 ISOtech -->
<!-- lang is NOT the same character as u+003C 'less than'
or u+2039 'single left-pointing angle quotation mark' -->
<entity name="&rang;" char="〉"/> <!-- right-pointing angle bracket, =ket, u+232A ISOtech -->
<!-- rang is NOT the same character as u+003E 'greater than'
or u+203A 'single right-pointing angle quotation mark' -->
<!-- Geometric Shapes -->
<entity name="&loz;" char="◊"/> <!-- lozenge, u+25CA ISOpub -->
<!-- Miscellaneous Symbols -->
<entity name="&spades;" char="♠"/> <!-- black spade suit, u+2660 ISOpub -->
<!-- black here seems to mean filled as opposed to hollow -->
<entity name="&clubs;" char="♣"/> <!-- black club suit, =shamrock, u+2663 ISOpub -->
<entity name="&hearts;" char="♥"/> <!-- black heart suit, =valentine, u+2665 ISOpub -->
<entity name="&diams;" char="♦"/> <!-- black diamond suit, u+2666 ISOpub -->
<!-- C0 Controls and Basic Latin -->
<entity name="&quot;" char="""/> <!-- quotation mark, =apl quote, u+0022 ISOnum -->
<entity name="&amp;" char="&amp;"/> <!-- ampersand, u+0026 ISOnum -->
<entity name="&lt;" char="<"/> <!-- less-than sign, u+003C ISOnum -->
<entity name="&gt;" char=">"/> <!-- greater-than sign, u+003E ISOnum -->
<!-- Latin Extended-A -->
<entity name="&OElig;" char="Œ"/> <!-- latin capital ligature oe, u+0152 ISOlat2 -->
<entity name="&oelig;" char="œ"/> <!-- latin small ligature oe, u+0153 ISOlat2 -->
<!-- ligature is a misnomer, this is a separate character in some languages -->
<entity name="&Scaron;" char="Š"/> <!-- latin capital letter s with caron, u+0160 ISOlat2 -->
<entity name="&scaron;" char="š"/> <!-- latin small letter s with caron, u+0161 ISOlat2 -->
<entity name="&Yuml;" char="Ÿ"/> <!-- latin capital letter y with diaeresis, u+0178 ISOlat2 -->
<!-- Spacing Modifier Letters -->
<entity name="&circ;" char="ˆ"/> <!-- modifier letter circumflex accent, u+02C6 ISOpub -->
<entity name="&tilde;" char="˜"/> <!-- small tilde, u+02DC ISOdia -->
<!-- General Punctuation -->
<entity name="&ensp;" char=" "/> <!-- en space, u+2002 ISOpub -->
<entity name="&emsp;" char=" "/> <!-- em space, u+2003 ISOpub -->
<entity name="&thinsp;" char=" "/> <!-- thin space, u+2009 ISOpub -->
<entity name="&zwnj;" char="‌"/> <!-- zero width non-joiner, u+200C NEW RFC 2070 -->
<entity name="&zwj;" char="‍"/> <!-- zero width joiner, u+200D NEW RFC 2070 -->
<entity name="&lrm;" char="‎"/> <!-- left-to-right mark, u+200E NEW RFC 2070 -->
<entity name="&rlm;" char="‏"/> <!-- right-to-left mark, u+200F NEW RFC 2070 -->
<entity name="&ndash;" char="–"/> <!-- en dash, u+2013 ISOpub -->
<entity name="&mdash;" char="—"/> <!-- em dash, u+2014 ISOpub -->
<entity name="&lsquo;" char="‘"/> <!-- left single quotation mark, u+2018 ISOnum -->
<entity name="&rsquo;" char="’"/> <!-- right single quotation mark, u+2019 ISOnum -->
<entity name="&sbquo;" char="‚"/> <!-- single low-9 quotation mark, u+201A NEW -->
<entity name="&ldquo;" char="“"/> <!-- left double quotation mark, u+201C ISOnum -->
<entity name="&rdquo;" char="”"/> <!-- right double quotation mark, u+201D ISOnum -->
<entity name="&bdquo;" char="„"/> <!-- double low-9 quotation mark, u+201E NEW -->
<entity name="&dagger;" char="†"/> <!-- dagger, u+2020 ISOpub -->
<entity name="&Dagger;" char="‡"/> <!-- double dagger, u+2021 ISOpub -->
<entity name="&permil;" char="‰"/> <!-- per mille sign, u+2030 ISOtech -->
<entity name="&lsaquo;" char="‹"/> <!-- single left-pointing angle quotation mark, u+2039 ISO proposed -->
<!-- lsaquo is proposed but not yet ISO standardised -->
<entity name="&rsaquo;" char="›"/> <!-- single right-pointing angle quotation mark, u+203A ISO proposed -->
<!-- rsaquo is proposed but not yet ISO standardised -->
</xsl:variable>
<xsl:template match="/">
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="* | @*">
<xsl:copy>
<xsl:apply-templates select="* | @*" />
</xsl:copy>
</xsl:template>
<xsl:template match="atom:content">
<xsl:variable name="from" select="($htmlEntities/@name)"/>
<xsl:variable name="to" select="($htmlEntities/@char)"/>
<content>
<xsl:value-of select="functx:replace-multi(. ,$from,$to)" disable-output-escaping="yes" />
</content>
</xsl:template>
<xsl:function name="functx:replace-multi" as="xs:string?">
<xsl:param name="arg" as="xs:string?"/>
<xsl:param name="changeFrom" as="xs:string*"/>
<xsl:param name="changeTo" as="xs:string*"/>
<xsl:sequence select="
if (count($changeFrom) > 0)
then functx:replace-multi(
replace($arg, $changeFrom[1],
functx:if-absent($changeTo[1],'')),
$changeFrom[position() > 1],
$changeTo[position() > 1])
else $arg "/>
</xsl:function>
<xsl:function name="functx:if-absent" as="item()*">
<xsl:param name="arg" as="item()*"/>
<xsl:param name="value" as="item()*"/>
<xsl:sequence select="
if (exists($arg))
then $arg
else $value
"/>
</xsl:function>
</xsl:stylesheet>
No comments:
Post a Comment