XSLT conversion creating new qnames from elements
I need to convert a table of data that comes in xml outputs like the following. C1 column 1 c2 column2 etc
<?xml version="1.0" encoding="UTF-8"?>
<report>
<report_header>
<c1>desc</c1>
<c2>prname</c2>
<c3>prnum</c3>
<c4>cdate</c4>
<c5>phase</c5>
<c6>stype</c6>
<c7>status</c7>
<c8>parent</c8>
<c9>location</c9>
</report_header>
<report_row>
<c1></c1>
<c2>IT Project Message Validation</c2>
<c3>IT-0000021</c3>
<c4>12/14/2010 09:56 AM</c4>
<c5>Preparation</c5>
<c6>IT Projects</c6>
<c7>Active</c7>
<c8>IT</c8>
<c9>/IT/BIOMED</c9>
</report_row>
<report_row>
<c1></c1>
<c2>David, Michael John Morning QA Test</c2>
<c3>IT-0000020</c3>
<c4>12/14/2010 08:12 AM</c4>
<c5>Preparation</c5>
<c6>IT Projects</c6>
<c7>Active</c7>
<c8>IT</c8>
<c9>/IT/BIOMED</c9>
</report_row>
</report>
into
<?xml version="1.0" encoding="UTF-8"?>
<report>
<report_row>
<desc></desc>
<prname>IT Project Message Validation</prname>
<prnum>I开发者_运维问答T-0000021</prnum>
<cdate>12/14/2010 09:56 AM</cdate>
<phase>Preparation</phase>
<stype>IT Projects</stype>
<status>Active</status>
<parent>IT</parent>
<location>/IT/BIOMED</location>
</report_row>
<report_row>
<desc></desc>
<prname>David, Michael John Morning QA Test</prname>
<prnum>IT-0000020</prnum>
<cdate>12/14/2010 08:12 AM</cdate>
<phase>Preparation</phase>
<stype>IT Projects</stype>
<status>Active</status>
<parent>IT</parent>
<location>/IT/BIOMED</location>
</report_row>
</report>
my current xslt looks like this
<?xml version="1.0" encoding="ISO-8859-1"?>
<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:exslt="http://exslt.org/common">
<xsl:output method="xml" indent="yes"/>
<xsl:template match="/">
<report>
<xsl:apply-templates select="/report/report_row"/>
</report>
</xsl:template>
<xsl:template match="/report/report_row">
<report_row>
<xsl:apply-templates select="c1"/>
<xsl:apply-templates select="c2"/>
<xsl:apply-templates select="c3"/>
<xsl:apply-templates select="c4"/>
<xsl:apply-templates select="c5"/>
<xsl:apply-templates select="c6"/>
<xsl:apply-templates select="c7"/>
<xsl:apply-templates select="c8"/>
<xsl:apply-templates select="c9"/>
</report_row>
</xsl:template>
<xsl:template match="c1">
<xsl:element name="{/report/report_header/c1}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
<xsl:template match="c2">
<xsl:element name="{/report/report_header/c2}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
<xsl:template match="c3">
<xsl:element name="{/report/report_header/c3}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
<xsl:template match="c4">
<xsl:element name="{/report/report_header/c4}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
<xsl:template match="c5">
<xsl:element name="{/report/report_header/c5}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
<xsl:template match="c6">
<xsl:element name="{/report/report_header/c6}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
<xsl:template match="c7">
<xsl:element name="{/report/report_header/c7}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
<xsl:template match="c8">
<xsl:element name="{/report/report_header/c8}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
<xsl:template match="c9">
<xsl:element name="{/report/report_header/c9}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
</xsl:transform>
My transform works if I assume a maximal number of columns and the column headers can be legal qnames.
It started failing when I got more than the 100 limit I had assumed and column headers with spaces in them.
How do I create a transform that uses wildcards instead and how do I strip spaces and illegal characters from the column headers to make them legal qnames?
Thanks
This transformation:
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output omit-xml-declaration="yes" indent="yes"/>
<xsl:strip-space elements="*"/>
<xsl:variable name="vAlphanum" select=
"concat('ABCDEFGHIJKLMNOPQRSTUVWXYZ',
'abcdefghijklmnopqrstuvwxyz',
'_0123456789'
)
"/>
<xsl:variable name="vReps" select=
"'_____________________________________'"/>
<xsl:key name="kColNameByCode"
match="report_header/*/text()"
use="name(..)"/>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
<xsl:template match="report_row/*">
<xsl:variable name="vNameText" select=
"key('kColNameByCode', name())"/>
<xsl:variable name="vElName" select=
"translate($vNameText,
translate($vNameText,$vAlphanum,''),
$vReps)
"/>
<xsl:element name="{$vElName}">
<xsl:value-of select="."/>
</xsl:element>
</xsl:template>
<xsl:template match="report_header"/>
</xsl:stylesheet>
when applied to the provided XML document:
<report>
<report_header>
<c1>desc</c1>
<c2>pr name</c2>
<c3>pr num</c3>
<c4>cdate</c4>
<c5>phase</c5>
<c6>stype</c6>
<c7>status</c7>
<c8>parent</c8>
<c9>location</c9>
</report_header>
<report_row>
<c1></c1>
<c2>IT Project Message Validation</c2>
<c3>IT-0000021</c3>
<c4>12/14/2010 09:56 AM</c4>
<c5>Preparation</c5>
<c6>IT Projects</c6>
<c7>Active</c7>
<c8>IT</c8>
<c9>/IT/BIOMED</c9>
</report_row>
<report_row>
<c1></c1>
<c2>David, Michael John Morning QA Test</c2>
<c3>IT-0000020</c3>
<c4>12/14/2010 08:12 AM</c4>
<c5>Preparation</c5>
<c6>IT Projects</c6>
<c7>Active</c7>
<c8>IT</c8>
<c9>/IT/BIOMED</c9>
</report_row>
</report>
produces the wanted, correct result:
<report>
<report_row>
<desc/>
<pr_name>IT Project Message Validation</pr_name>
<pr_num>IT-0000021</pr_num>
<cdate>12/14/2010 09:56 AM</cdate>
<phase>Preparation</phase>
<stype>IT Projects</stype>
<status>Active</status>
<parent>IT</parent>
<location>/IT/BIOMED</location>
</report_row>
<report_row>
<desc/>
<pr_name>David, Michael John Morning QA Test</pr_name>
<pr_num>IT-0000020</pr_num>
<cdate>12/14/2010 08:12 AM</cdate>
<phase>Preparation</phase>
<stype>IT Projects</stype>
<status>Active</status>
<parent>IT</parent>
<location>/IT/BIOMED</location>
</report_row>
</report>
Do note:
The transformation successfully converts any text with any number of different non-alphanumeric characters to a syntactically-correct XML name.
Efficiency is achieved using keys.
I know this has already been answered, but I figure I'd include a StAX version in ColdFusion, seeing as the question was originally tagged as such. Will serve posterity if bitten by OoM errors using XSLT:
<!--- see: http://today.java.net/pub/a/today/2006/07/20/introduction-to-stax.html --->
<cfset XMLOutputFactory = createObject("java", "javax.xml.stream.XMLOutputFactory").newInstance()>
<cfset fos = createObject("java", "java.io.FileOutputStream").init("#getDirectoryFromPath(getCurrentTemplatePath())#/destination.xml")>
<cfset bos = createObject("java", "java.io.BufferedOutputStream").init(fos)>
<cfset writer = XMLOutputFactory.createXMLStreamWriter(bos)>
<cfset fis = createObject("java", "java.io.FileInputStream").init("#getDirectoryFromPath(getCurrentTemplatePath())#/source.xml")>
<cfset bis = createObject("java", "java.io.BufferedInputStream").init(fis)>
<cfset XMLInputFactory = createObject("java", "javax.xml.stream.XMLInputFactory").newInstance()>
<cfset reader = XMLInputFactory.createXMLStreamReader(bis)>
<cfset headers = {}>
<cfset isHeaderRow = true>
<cfloop condition="#reader.hasNext()#">
<cfset event = reader.next()>
<cfif event EQ reader.START_ELEMENT>
<cfswitch expression="#reader.getLocalName()#">
<cfcase value="report">
<cfset isHeaderRow = false>
<cfset writer.writeStartElement(reader.getLocalName())>
</cfcase>
<cfcase value="report_header">
<cfset isHeaderRow = true>
</cfcase>
<cfcase value="report_row">
<cfset writer.writeStartElement(reader.getLocalName())>
<cfset isHeaderRow = false>
</cfcase>
<!--- cX node --->
<cfdefaultcase>
<cfif isHeaderRow>
<!--- alphanumerics and underscores only --->
<cfset headers[reader.getLocalName()] = rereplacenocase(reader.getElementText(), "[^A-Z0-9\_]*", "", "all")>
<cfelse>
<!--- getElementText --->
<cfset writer.writeStartElement(headers[reader.getLocalName()])>
<cfset writer.writeCharacters(reader.getElementText())>
<cfset writer.writeEndElement()>
</cfif>
</cfdefaultcase>
</cfswitch>
<cfelseif event EQ reader.END_ELEMENT>
<cfif isHeaderRow><cfcontinue/></cfif>
<cfset writer.writeEndElement()>
</cfif>
</cfloop>
<cfset reader.close()>
<cfset writer.flush()>
<cfset writer.close()>
<!--- don't do this w/a large file b/c you'll get an OOM error --->
<cffile action="read" file="#getDirectoryFromPath(getCurrentTemplatePath())#/destination.xml" variable="transformed">
<cfdump var="#transformed#">
You can use the translate function to strip undesired characters from the name.
You can use regular wildcard templates to match any child element; use a mode to prevent this to get in the way of the regular wildcard template. And you can use the local-name function to lookup an element by element name.
<?xml version="1.0" encoding="ISO-8859-1"?>
<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:exslt="http://exslt.org/common">
<xsl:output method="xml" indent="yes"/>
<xsl:template match="/">
<report>
<xsl:apply-templates select="/report/report_row"/>
</report>
</xsl:template>
<xsl:template match="/report/report_row">
<report_row>
<xsl:apply-templates mode="wildcard"/>
</report_row>
</xsl:template>
<xsl:template match="*" mode="wildcard">
<xsl:variable name="elemname" select="local-name()"/>
<xsl:variable name="elemcontent" select="/report/report_header/*[local-name()=$elemname]"/>
<xsl:element name='{translate($elemcontent," ()","")}'><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
</xsl:transform>
Consider the following stylesheet:
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" indent="yes"/>
<xsl:strip-space elements="*"/>
<xsl:key name="getHeaderText" match="report_header/*" use="local-name()"/>
<xsl:template match="node() | @*">
<xsl:copy>
<xsl:apply-templates select="node() | @*"/>
</xsl:copy>
</xsl:template>
<xsl:template match="report_row/*">
<xsl:element name="{ translate( key('getHeaderText', local-name()), ' ', '_') }">
<xsl:apply-templates/>
</xsl:element>
</xsl:template>
<xsl:template match="report_header"/>
</xsl:stylesheet>
Applied to an XML with spaces in headers:
<report>
<report_header>
<c1>desc</c1>
<c2>pr name</c2>
<c3>pr num</c3>
<c4>cdate</c4>
<c5>phase</c5>
<c6>stype</c6>
<c7>status</c7>
<c8>parent</c8>
<c9>location</c9>
</report_header>
<report_row>
<c1></c1>
<c2>IT Project Message Validation</c2>
<c3>IT-0000021</c3>
<c4>12/14/2010 09:56 AM</c4>
<c5>Preparation</c5>
<c6>IT Projects</c6>
<c7>Active</c7>
<c8>IT</c8>
<c9>/IT/BIOMED</c9>
</report_row>
<report_row>
<c1></c1>
<c2>David, Michael John Morning QA Test</c2>
<c3>IT-0000020</c3>
<c4>12/14/2010 08:12 AM</c4>
<c5>Preparation</c5>
<c6>IT Projects</c6>
<c7>Active</c7>
<c8>IT</c8>
<c9>/IT/BIOMED</c9>
</report_row>
</report>
It produces this result:
<report>
<report_row>
<desc/>
<pr_name>IT Project Message Validation</pr_name>
<pr_num>IT-0000021</pr_num>
<cdate>12/14/2010 09:56 AM</cdate>
<phase>Preparation</phase>
<stype>IT Projects</stype>
<status>Active</status>
<parent>IT</parent>
<location>/IT/BIOMED</location>
</report_row>
<report_row>
<desc/>
<pr_name>David, Michael John Morning QA Test</pr_name>
<pr_num>IT-0000020</pr_num>
<cdate>12/14/2010 08:12 AM</cdate>
<phase>Preparation</phase>
<stype>IT Projects</stype>
<status>Active</status>
<parent>IT</parent>
<location>/IT/BIOMED</location>
</report_row>
</report>
精彩评论