Convert XML file to CSV
After having converted a messed up XML using regex, I now need to change it yet again. This source file
<product>
<s开发者_如何学Goku>SP00001</sku>
<PID_OWNER_SellerID>StoreName</PID_OWNER_SellerID>
<EANCode>8711983489813</EANCode>
<DeliveryDays>2</DeliveryDays>
</product>
Has to become a CSV file, but like this:
sku field value
SP00001 PID_OWNER_SellerID StoreName
SP00001 EANCode 8711983489813
SP00001 DeliveryDays 2
I take it this is outside of regex' scope and has to be done with XSL?
It's generally a bad idea to attempt to parse XML using regular expressions, as there's an infinite way to format an XML document that's structurally the same, and yet will bamboozle your regular expressions.
For files that aren't massive, definitely use XSL, remembering to specify 'text' as your output method. Don't forget that you can invoke an XSL process programmatically if you must -- most languages let you do that.
For huge files, then consider writing a small program that uses a streaming API (e.g. SAX or one of the push-parser APIs).
Here is some XSLT for you...
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:msxsl="urn:schemas-microsoft-com:xslt"
exclude-result-prefixes="msxsl"
>
<xsl:output method="text" indent="yes"/>
<xsl:template match="/">
<xsl:call-template name="headerRow" />
<xsl:apply-templates select="//product" />
</xsl:template>
<xsl:template name="headerRow">
<xsl:call-template name="rightpad">
<xsl:with-param name="fieldvalue" select="'sku'"/>
<xsl:with-param name="fieldsize" select="number(11)"/>
</xsl:call-template>
<xsl:call-template name="rightpad">
<xsl:with-param name="fieldvalue" select="'field'"/>
<xsl:with-param name="fieldsize" select="number(22)"/>
</xsl:call-template>
<xsl:text>value
</xsl:text>
</xsl:template>
<xsl:template match="product">
<xsl:for-each select="node()[local-name(.) != 'sku']">
<xsl:call-template name="rightpad">
<xsl:with-param name="fieldvalue" select="../sku"/>
<xsl:with-param name="fieldsize" select="number(11)"/>
</xsl:call-template>
<xsl:call-template name="rightpad">
<xsl:with-param name="fieldvalue" select="local-name(.)"/>
<xsl:with-param name="fieldsize" select="number(22)"/>
</xsl:call-template>
<xsl:value-of select="."/>
<xsl:text>
</xsl:text>
</xsl:for-each>
</xsl:template>
<xsl:template name="rightpad">
<xsl:param name="fieldvalue" select="string('')"/>
<xsl:param name="fieldsize" select="0"/>
<xsl:variable name="padded"
select="concat($fieldvalue, ' ')" />
<xsl:variable name="result"
select="substring($padded,1,$fieldsize)" />
<xsl:value-of select="$result"/>
</xsl:template>
</xsl:stylesheet>
XPathDocumemt x = new XPathDocument("yourdoc.xml");
XPathNavigator n = x.CreateNavigator();
XPathNodeIterator i = n.Select("root/product");
List<string> fields = new List<string>() { "PID_OWNER_SellerID", "EANCode", "DeliveryDays" }
using (TextWriter w = File.CreateText("c:\\yourfile.csv"))
{
w.WriteLine("sku, field, value");
while (i.MoveNext())
{
foreach (string field in fields)
{
w.WriteLine(string.Format("{0}, {1}, {2}", i.Current.SelectSingleNode("sku").value, field, i.Current.selectSingleNode(field).Value));
}
}
}
This stylesheet will produce the output in the format specified:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" encoding="UTF-8" omit-xml-declaration="yes"/>
<!--Spacing between column1 and column2 is 11 characters-->
<xsl:variable name="col1-spaces" select="' '" />
<!--Spacing between column2 and column3 is 22 characters-->
<xsl:variable name="col2-spaces" select="concat($col1-spaces, $col1-spaces)" />
<xsl:template match="/">
<!--Generate the heading row first, then apply templates-->
<xsl:text>sku</xsl:text>
<!--Add enough spaces after to align the next column-->
<xsl:value-of select="substring($col1-spaces, 3)"/>
<xsl:text>field</xsl:text>
<!--Add enough spaces after to align the next column-->
<xsl:value-of select="substring($col2-spaces, 5)"/>
<xsl:text>value </xsl:text>
<xsl:apply-templates />
</xsl:template>
<!--Do nothing with sku elements-->
<xsl:template match="sku" />
<!--For all elements that are children of product, except for sku, do this-->
<xsl:template match="product/*[not(self::sku)]">
<xsl:value-of select="preceding-sibling::sku"/>
<!--Calculate how many spaces are needed using the length of the value of sku -->
<xsl:value-of select="substring($col1-separator, string-length(preceding-sibling::sku))"/>
<xsl:value-of select="local-name()" />
<!--Calculate how many spaces are needed using the length of the name of the current element-->
<xsl:value-of select="substring($col2-separator, string-length(local-name()))"/>
<xsl:value-of select="." />
<xsl:text> </xsl:text>
</xsl:template>
</xsl:stylesheet>
精彩评论