Transpose XML using Muenchian method - multiple groupings
I need to transpose an XML file such as this:
<?xml version="1.0" encoding="UTF-8"?>
<products>
<product id="10" name="Widget 1" price="15.99" category_code="T" category="Toys" manufacturer_code="SC1" manufacturer="Some Company 1" />
<product id="10" name="Widget 1" price="15.99" category_code="E" category="Electronics" manufacturer_code="SC1" manufacturer="Some Company 1" />
<product id="10" name="Widget 1" price="15.99" category_code="T" category="Toys" manufacturer_code="SC2" manufacturer="Some Company 2" />
<product id="10" name="Widget 1" price="15.99" category_code="E" category="Electronics" manufacturer_code="SC2" manufacturer="Some Company 2" />
<product id="10" name="Widget 1" price="15.99" category_code="T" category="Toys" manufacturer_code="SC3" manufacturer="Some Company 3" />
<product id="10" name="Widget 1" price="15.99" category_code="E" category="Electronics" manufacturer_code="SC3" manufacturer="Some Company 3" />
<product id="11" name="Widget 2" price="21.99" category_code="V" category="Video Games" manufacturer_code="SC4" manufacturer="Some Company 4" />
<product id="12" name="Widget 3" price="10.99" category_code="T" category="Toys" manufacturer_code="SC1" manufacturer="Some Company 2" />
</products>
to a comma delimited text file, or a properly formated HTML table that includes only one line for each product, something like this:
id,name,price,category_code_1,category_1,category_code_2,category_2,manufacturer_code_1,manufacturer_1,manufacturer_code_2,manufacturer_2,manufacturer_code_3,manufacturer_3
10, Widget 1, 15.99, T, Toys, E, Electronics, SC1, Some Company 1, SC2, Some Company 2, SC3, Some Company 3
11, Widget 2, 21.99, V, Video Games,,, SC4, Some Company 4,,,,
12, Widget 3, 10.99, T, Toys,,, SC1, Some Company 2,,,,
As you'd notice, the XML data can be thought of as the result of three tables joined together: product, product_category, and product_manufacturer. Each product can belong to multiple categories and have multiple manufacturers. Of course the real data that I'm dealing with is more complex and in a totally different domain, but this sample depicts the problem properly.
I have very limited knowledge of XSLT, and with help of SO and other resources on the Interent, have put together a stylesheet that partly provides me what I need:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="html" version="1.0" encoding="UTF-8" indent="yes"/>
<xsl:key name="key_product_group" match="product" use="@id"/>
<xsl:key name="key_category_group" match="product" use="concat(
@id,
@category_code,
@category)"/>
<xsl:key name="key_manufacturer_group" match="product" use="concat(
@id,
@manufacturer_code,
@manufacturer)"/>
<xsl:variable name="var_max_category_group" >
<xsl:for-each select="//product[generate-id(.) = generate-id(key('key_product_group',@id) )]">
<xsl:sort select="count(key('key_product_group',@id )[generate-id() = generate-id(key('key_category_group',
concat(@id,
@category_code,
@category)))])" order="descending"/>
<xsl:if test="position() = 1">
<xsl:value-of select="count(key('key_product_group',@id )[generate-id() = generate-id(key('key_category_group',
concat(@id,
@category_code,
@category)))])"/>
</xsl:if>
</xsl:for-each>
</xsl:variable>
<xsl:variable name="var_max_manufacturer_group">
<xsl:for-each select="//product[generate-id(.) = generate-id(key('key_product_group',@id) )]">
<xsl:sort select="count(key('key_product_group',@id )[generate-id() = generate-id(key('key_manufacturer_group',
concat(@id,
@ca开发者_StackOverflow中文版tegory_code,
@category)))])" order="descending"/>
<xsl:if test="position() = 1">
<xsl:value-of select="count(key('key_product_group',@id )[generate-id() = generate-id(key('key_manufacturer_group',
concat(@id,
@manufacturer_code,
@manufacturer)))])"/>
</xsl:if>
</xsl:for-each>
</xsl:variable>
<xsl:template match="/">
<xsl:text>id,</xsl:text>
<xsl:text>name,</xsl:text>
<xsl:text>price,</xsl:text>
<xsl:call-template name="loop_pcat">
<xsl:with-param name="count" select="$var_max_category_group"/>
</xsl:call-template>
<xsl:call-template name="loop_pmf">
<xsl:with-param name="count" select="$var_max_manufacturer_group"/>
</xsl:call-template>
<br></br>
<xsl:variable name="var_group"
select="//product[generate-id(.) = generate-id(key('key_product_group',@id)[1])]"/>
<xsl:for-each select="$var_group">
<xsl:sort order="ascending" select="@id"/>
<xsl:value-of select="@id"/>,
<xsl:value-of select="@name"/>,
<xsl:value-of select="@price"/>,
<xsl:for-each select="key('key_product_group',@id )[generate-id() = generate-id(key('key_category_group',
concat(@id,
@category_code,
@category)))]">
<xsl:value-of select="@category_code"/>,
<xsl:value-of select="@category"/>,
</xsl:for-each>
<xsl:for-each select="key('key_product_group',@id )[generate-id() = generate-id(key('key_manufacturer_group',
concat(@id,
@manufacturer_code,
@manufacturer)))]">
<xsl:value-of select="@manufacturer_code"/>,
<xsl:value-of select="@manufacturer"/>,
</xsl:for-each>
<br></br>
</xsl:for-each>
</xsl:template>
<xsl:template name="loop_pcat">
<xsl:param name="count" select="1"/>
<xsl:param name="limit" select="$count+1"/>
<xsl:if test="$count > 0">
<xsl:value-of select="concat('category_code_',$limit - $count)"/>
<xsl:text>,</xsl:text>
<xsl:value-of select="concat('category_',$limit - $count)"/>
<xsl:text>,</xsl:text>
<xsl:call-template name="loop_pcat">
<xsl:with-param name="count" select="$count - 1"/>
<xsl:with-param name="limit" select="$limit"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template name="loop_pmf">
<xsl:param name="count" select="1"/>
<xsl:param name="limit" select="$count+1"/>
<xsl:if test="$count > 0">
<xsl:value-of select="concat('manufacturer_code_',$limit - $count)"/>
<xsl:text>,</xsl:text>
<xsl:value-of select="concat('manufacturer_',$limit - $count)"/>
<xsl:text>,</xsl:text>
<xsl:call-template name="loop_pmf">
<xsl:with-param name="count" select="$count - 1"/>
<xsl:with-param name="limit" select="$limit"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
</xsl:stylesheet>
The above stylesheet produces the following results:
id,name,price,category_code_1,category_1,category_code_2,category_2,manufacturer_code_1,manufacturer_1,manufacturer_code_2,manufacturer_2,manufacturer_code_3,manufacturer_3,
10, Widget 1, 15.99, T, Toys, E, Electronics, SC1, Some Company 1, SC2, Some Company 2, SC3, Some Company 3,
11, Widget 2, 21.99, V, Video Games, SC4, Some Company 4,
12, Widget 3, 10.99, T, Toys, SC1, Some Company 2,
The output has at least one major problem: all columns do not exist in each line, for example line 2 and 3 are missing category_code_2, and category_2, and manufacture_code and manufacturer 2 and 3. I'm sure there are other problems with the stylesheet as well, and I have no idea how it would perform on a relatively large xml file, but for now I would greatly appreciate your help with making the stylesheet produce the desired output format.
Thank you
MRSA
I had already begun with my own implementation, but then you posted and I was able to reuse some of your code to speed things up a bit. Main differences are:
- in the templates, delimiters are added in front instead of after data; that way, you can end your lines properly.
- the keys only use the ids and the codes (not the names).
- the concatenation for the keys also uses a delimiter (
'#'
). - the main template only provides the header line.
- the product template has simpler calls to the data templates.
- modified the data templates accordingly…
Here is my resulting stylesheet and its output:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" version="1.0" encoding="UTF-8"/>
<xsl:key name="byProduct" match="product" use="@id"/>
<xsl:key name="byProductAndCategory" match="product" use="concat(@id,'#',@category_code)"/>
<xsl:key name="byProductAndManufacturer" match="product" use="concat(@id,'#',@manufacturer_code)"/>
<xsl:variable name="delimiter" select="','"/>
<xsl:variable name="eol" select="'
'"/>
<xsl:variable name="maxManufacturers">
<xsl:for-each select="//product[count(.|key('byProduct',@id)[1])=1]">
<xsl:sort select="count(key('byProductAndCategory',concat(@id,'#',@category_code)))"/>
<xsl:if test="position()=last()">
<xsl:value-of select="count(key('byProductAndCategory',concat(@id,'#',@category_code)))"/>
</xsl:if>
</xsl:for-each>
</xsl:variable>
<xsl:variable name="maxCategories">
<xsl:for-each select="//product[count(.|key('byProduct',@id)[1])=1]">
<xsl:sort select="count(key('byProductAndManufacturer',concat(@id,'#',@manufacturer_code)))"/>
<xsl:if test="position()=last()">
<xsl:value-of select="count(key('byProductAndManufacturer',concat(@id,'#',@manufacturer_code)))"/>
</xsl:if>
</xsl:for-each>
</xsl:variable>
<xsl:template match="/">
<!--create the file header-->
<xsl:text>id</xsl:text>
<xsl:value-of select="$delimiter"/>
<xsl:text>name</xsl:text>
<xsl:value-of select="$delimiter"/>
<xsl:text>price</xsl:text>
<!--recursive loop for each group to add appropriate number of columns, i.e. maximum number of repetitions-->
<xsl:call-template name="loop_pcat_header">
<xsl:with-param name="count" select="$maxCategories"/>
</xsl:call-template>
<!--recursive loop for each group to add appropriate number of columns, i.e. maximum number of repetitions-->
<xsl:call-template name="loop_pmf_header">
<xsl:with-param name="count" select="$maxManufacturers"/>
</xsl:call-template>
<xsl:value-of select="$eol"/>
<xsl:apply-templates select="//product[count(.|key('byProduct',@id)[1])=1]"/>
</xsl:template>
<xsl:template match="product">
<!-- id -->
<xsl:value-of select="@id"/>
<xsl:value-of select="$delimiter"/>
<!-- name -->
<xsl:value-of select="@name"/>
<xsl:value-of select="$delimiter"/>
<!-- price -->
<xsl:value-of select="@price"/>
<!-- category stuff -->
<xsl:call-template name="loop_pcat_data">
<xsl:with-param name="data" select="key('byProductAndManufacturer',concat(@id,'#',@manufacturer_code))"/>
<xsl:with-param name="count" select="$maxCategories"/>
</xsl:call-template>
<!-- manufacturer stuff -->
<xsl:call-template name="loop_pmf_data">
<xsl:with-param name="data" select="key('byProductAndCategory',concat(@id,'#',@category_code))"/>
<xsl:with-param name="count" select="$maxManufacturers"/>
</xsl:call-template>
<xsl:value-of select="$eol"/>
</xsl:template>
<!--recursive loop templates for file header and data-->
<xsl:template name="loop_pcat_header">
<xsl:param name="count" select="1"/>
<xsl:param name="limit" select="$count+1"/>
<xsl:if test="$count > 0">
<xsl:value-of select="$delimiter"/>
<xsl:value-of select="concat('category_code_',$limit - $count)"/>
<xsl:value-of select="$delimiter"/>
<xsl:value-of select="concat('category_',$limit - $count)"/>
<xsl:call-template name="loop_pcat_header">
<xsl:with-param name="count" select="$count - 1"/>
<xsl:with-param name="limit" select="$limit"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template name="loop_pmf_header">
<xsl:param name="count" select="1"/>
<xsl:param name="limit" select="$count+1"/>
<xsl:if test="$count > 0">
<xsl:value-of select="$delimiter"/>
<xsl:value-of select="concat('manufacturer_code_',$limit - $count)"/>
<xsl:value-of select="$delimiter"/>
<xsl:value-of select="concat('manufacturer_',$limit - $count)"/>
<xsl:call-template name="loop_pmf_header">
<xsl:with-param name="count" select="$count - 1"/>
<xsl:with-param name="limit" select="$limit"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template name="loop_pcat_data">
<xsl:param name="data"/>
<xsl:param name="count" select="1"/>
<xsl:param name="limit" select="$count+1"/>
<xsl:if test="$count > 0">
<xsl:value-of select="$delimiter"/>
<xsl:value-of select="$data[$limit - $count]/@category_code"/>
<xsl:value-of select="$delimiter"/>
<xsl:value-of select="$data[$limit - $count]/@category"/>
<xsl:call-template name="loop_pcat_data">
<xsl:with-param name="data" select="$data"/>
<xsl:with-param name="count" select="$count - 1"/>
<xsl:with-param name="limit" select="$limit"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template name="loop_pmf_data">
<xsl:param name="data"/>
<xsl:param name="count" select="1"/>
<xsl:param name="limit" select="$count+1"/>
<xsl:if test="$count > 0">
<xsl:value-of select="$delimiter"/>
<xsl:value-of select="$data[$limit - $count]/@manufacturer_code"/>
<xsl:value-of select="$delimiter"/>
<xsl:value-of select="$data[$limit - $count]/@manufacturer"/>
<xsl:call-template name="loop_pmf_data">
<xsl:with-param name="data" select="$data"/>
<xsl:with-param name="count" select="$count - 1"/>
<xsl:with-param name="limit" select="$limit"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
</xsl:stylesheet>
The requested output:
id,name,price,category_code_1,category_1,category_code_2,category_2,manufacturer_code_1,manufacturer_1,manufacturer_code_2,manufacturer_2,manufacturer_code_3,manufacturer_3
10,Widget 1,15.99,T,Toys,E,Electronics,SC1,Some Company 1,SC2,Some Company 2,SC3,Some Company 3
11,Widget 2,21.99,V,Video Games,,,SC4,Some Company 4,,,,
12,Widget 3,10.99,T,Toys,,,SC1,Some Company 2,,,,
So this is what I came up with, I still have to take care of extra comma at the end, also deal with special characters such as single or double qoutes in the attribute values. Any help with optimizing the code is greatly appreciated.
The following stylesheet
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" version="1.0" encoding="UTF-8" indent="yes"/>
<!--Generate keys, first is the product group-->
<xsl:key name="key_product_group" match="product" use="@id"/>
<!--Here is keys for groupings that are needed, note concatenation of attributes-->
<xsl:key name="key_category_group" match="product" use="concat(
@id,
@category_code,
@category)"/>
<!--another key-->
<xsl:key name="key_manufacturer_group" match="product" use="concat(
@id,
@manufacturer_code,
@manufacturer)"/>
<!--we need the maximum number of distinct groups for each product so that can format the layout properly-->
<xsl:variable name="var_max_category_group" >
<xsl:for-each select="//product[generate-id(.) = generate-id(key('key_product_group',@id) )]">
<xsl:sort select="count(key('key_product_group',@id )[generate-id() = generate-id(key('key_category_group',
concat(@id,
@category_code,
@category)))])" order="descending"/>
<xsl:if test="position() = 1">
<xsl:value-of select="count(key('key_product_group',@id )[generate-id() = generate-id(key('key_category_group',
concat(@id,
@category_code,
@category)))])"/>
</xsl:if>
</xsl:for-each>
</xsl:variable>
<!--maximum for the second group-->
<xsl:variable name="var_max_manufacturer_group">
<xsl:for-each select="//product[generate-id(.) = generate-id(key('key_product_group',@id) )]">
<xsl:sort select="count(key('key_product_group',@id )[generate-id() = generate-id(key('key_manufacturer_group',
concat(@id,
@category_code,
@category)))])" order="descending"/>
<xsl:if test="position() = 1">
<xsl:value-of select="count(key('key_product_group',@id )[generate-id() = generate-id(key('key_manufacturer_group',
concat(@id,
@manufacturer_code,
@manufacturer)))])"/>
</xsl:if>
</xsl:for-each>
</xsl:variable>
<!--delmiter and line break characters defined here-->
<xsl:variable name="var_delimiter" select="','"/>
<xsl:variable name="var_line_break" select="'
'"/>
<!--main procedure starts here-->
<xsl:template match="/">
<!--create the file header-->
<xsl:text>id</xsl:text>
<xsl:value-of select="$var_delimiter"/>
<xsl:text>name</xsl:text>
<xsl:value-of select="$var_delimiter"/>
<xsl:text>price</xsl:text>
<xsl:value-of select="$var_delimiter"/>
<!--recursive loop for each group to add appropriate number of columns, i.e. maximum number of repetitions-->
<xsl:call-template name="loop_pcat_header">
<xsl:with-param name="count" select="$var_max_category_group"/>
</xsl:call-template>
<!--recursive loop for each group to add appropriate number of columns, i.e. maximum number of repetitions-->
<xsl:call-template name="loop_pmf_header">
<xsl:with-param name="count" select="$var_max_manufacturer_group"/>
</xsl:call-template>
<xsl:value-of select="$var_line_break"/>
<!--select products and order ascending, the ordering is not really needed-->
<xsl:variable name="var_group"
select="//product[generate-id(.) = generate-id(key('key_product_group',@id)[1])]"/>
<xsl:for-each select="$var_group">
<xsl:sort order="ascending" select="@id"/>
<!--select non-repeatable attributes-->
<xsl:value-of select="@id"/>
<xsl:value-of select="$var_delimiter"/>
<xsl:value-of select="@name"/>
<xsl:value-of select="$var_delimiter"/>
<xsl:value-of select="@price"/>
<xsl:value-of select="$var_delimiter"/>
<!--select repeatable groups for each product-->
<xsl:for-each select="key('key_product_group',@id )[generate-id() = generate-id(key('key_category_group',
concat(@id,
@category_code,
@category)))]">
<xsl:value-of select="@category_code"/>
<xsl:value-of select="$var_delimiter"/>
<xsl:value-of select="@category"/>
<xsl:value-of select="$var_delimiter"/>
<!--count number of groups for each product and add blanks for the difference between the max and current instance-->
<xsl:variable name="var_max_category_group_instance" select="count(key('key_product_group',@id )[generate-id() = generate-id(key('key_category_group',
concat(@id,
@category_code,
@category)))])"/>
<xsl:call-template name="loop_pcat_data">
<xsl:with-param name="count" select="$var_max_category_group - $var_max_category_group_instance"/>
</xsl:call-template>
</xsl:for-each>
<!--select repeatable groups for each product-->
<xsl:for-each select="key('key_product_group',@id )[generate-id() = generate-id(key('key_manufacturer_group',
concat(@id,
@manufacturer_code,
@manufacturer)))]">
<xsl:value-of select="@manufacturer_code"/>
<xsl:value-of select="$var_delimiter"/>
<xsl:value-of select="@manufacturer"/>
<xsl:value-of select="$var_delimiter"/>
<!--count number of groups for each product and add blanks for the difference between the max and current instance-->
<xsl:variable name="var_max_manufacturer_group_instance" select="count(key('key_product_group',@id )[generate-id() = generate-id(key('key_manufacturer_group',
concat(@id,
@manufacturer_code,
@manufacturer)))])"/>
<xsl:call-template name="loop_pmf_data">
<xsl:with-param name="count" select="$var_max_manufacturer_group - $var_max_manufacturer_group_instance"/>
</xsl:call-template>
</xsl:for-each>
<xsl:value-of select="$var_line_break"/>
</xsl:for-each>
</xsl:template>
<!--recursive loop templates for file header and data-->
<xsl:template name="loop_pcat_header">
<xsl:param name="count" select="1"/>
<xsl:param name="limit" select="$count+1"/>
<xsl:if test="$count > 0">
<xsl:value-of select="concat('category_code_',$limit - $count)"/>
<xsl:value-of select="$var_delimiter"/>
<xsl:value-of select="concat('category_',$limit - $count)"/>
<xsl:value-of select="$var_delimiter"/>
<xsl:call-template name="loop_pcat_header">
<xsl:with-param name="count" select="$count - 1"/>
<xsl:with-param name="limit" select="$limit"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template name="loop_pcat_data">
<xsl:param name="count" select="1"/>
<xsl:param name="limit" select="$count+1"/>
<xsl:if test="$count > 0">
<xsl:value-of select="$var_delimiter"/>
<xsl:value-of select="$var_delimiter"/>
<xsl:call-template name="loop_pcat_data">
<xsl:with-param name="count" select="$count - 1"/>
<xsl:with-param name="limit" select="$limit"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template name="loop_pmf_header">
<xsl:param name="count" select="1"/>
<xsl:param name="limit" select="$count+1"/>
<xsl:if test="$count > 0">
<xsl:value-of select="concat('manufacturer_code_',$limit - $count)"/>
<xsl:value-of select="$var_delimiter"/>
<xsl:value-of select="concat('manufacturer_',$limit - $count)"/>
<xsl:value-of select="$var_delimiter"/>
<xsl:call-template name="loop_pmf_header">
<xsl:with-param name="count" select="$count - 1"/>
<xsl:with-param name="limit" select="$limit"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template name="loop_pmf_data">
<xsl:param name="count" select="1"/>
<xsl:param name="limit" select="$count+1"/>
<xsl:if test="$count > 0">
<xsl:value-of select="$var_delimiter"/>
<xsl:value-of select="$var_delimiter"/>
<xsl:call-template name="loop_pmf_data">
<xsl:with-param name="count" select="$count - 1"/>
<xsl:with-param name="limit" select="$limit"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
</xsl:stylesheet>
when applied to this XML
<?xml version="1.0" encoding="UTF-8"?>
<products>
<product id="10" name="Widget 1" price="15.99" category_code="T" category="Toys" manufacturer_code="SC1" manufacturer="Some Company 1" />
<product id="10" name="Widget 1" price="15.99" category_code="E" category="Electronics" manufacturer_code="SC1" manufacturer="Some Company 1" />
<product id="10" name="Widget 1" price="15.99" category_code="T" category="Toys" manufacturer_code="SC2" manufacturer="Some Company 2" />
<product id="10" name="Widget 1" price="15.99" category_code="E" category="Electronics" manufacturer_code="SC2" manufacturer="Some Company 2" />
<product id="10" name="Widget 1" price="15.99" category_code="T" category="Toys" manufacturer_code="SC3" manufacturer="Some Company 3" />
<product id="10" name="Widget 1" price="15.99" category_code="E" category="Electronics" manufacturer_code="SC3" manufacturer="Some Company 3" />
<product id="11" name="Widget 2" price="21.99" category_code="V" category="Video Games" manufacturer_code="SC4" manufacturer="Some Company 4" />
<product id="12" name="Widget 3" price="10.99" category_code="T" category="Toys" manufacturer_code="SC1" manufacturer="Some Company 2" />
</products>
generates the following output:
id,name,price,category_code_1,category_1,category_code_2,category_2,manufacturer_code_1,manufacturer_1,manufacturer_code_2,manufacturer_2,manufacturer_code_3,manufacturer_3,
10,Widget 1,15.99,T,Toys,E,Electronics,SC1,Some Company 1,SC2,Some Company 2,SC3,Some Company 3,
11,Widget 2,21.99,V,Video Games,,,SC4,Some Company 4,,,,,
12,Widget 3,10.99,T,Toys,,,SC1,Some Company 2,,,,,
精彩评论