开发者

XSLT conversion creating new qnames from elements

I need to convert a table of data that comes in xml outputs like the following. C1 column 1 c2 column2 etc

<?xml version="1.0" encoding="UTF-8"?>
<report>
    <report_header>
        <c1>desc</c1>
        <c2>prname</c2>
        <c3>prnum</c3>
        <c4>cdate</c4>
        <c5>phase</c5>
        <c6>stype</c6>
        <c7>status</c7>
        <c8>parent</c8>
        <c9>location</c9>
    </report_header>
    <report_row>
        <c1></c1>
        <c2>IT Project Message Validation</c2>
        <c3>IT-0000021</c3>
        <c4>12/14/2010 09:56 AM</c4>
        <c5>Preparation</c5>
        <c6>IT Projects</c6>
        <c7>Active</c7>
        <c8>IT</c8>
        <c9>/IT/BIOMED</c9>
    </report_row>
    <report_row>
        <c1></c1>
        <c2>David, Michael John Morning QA Test</c2>
        <c3>IT-0000020</c3>
        <c4>12/14/2010 08:12 AM</c4>
        <c5>Preparation</c5>
        <c6>IT Projects</c6>
        <c7>Active</c7>
        <c8>IT</c8>
        <c9>/IT/BIOMED</c9>
    </report_row>
</report>

into

<?xml version="1.0" encoding="UTF-8"?>
<report>
    <report_row>
        <desc></desc>
        <prname>IT Project Message Validation</prname>
        <prnum>I开发者_运维问答T-0000021</prnum>
        <cdate>12/14/2010 09:56 AM</cdate>
        <phase>Preparation</phase>
        <stype>IT Projects</stype>
        <status>Active</status>
        <parent>IT</parent>
        <location>/IT/BIOMED</location>
    </report_row>
    <report_row>
        <desc></desc>
        <prname>David, Michael John Morning QA Test</prname>
        <prnum>IT-0000020</prnum>
        <cdate>12/14/2010 08:12 AM</cdate>
        <phase>Preparation</phase>
        <stype>IT Projects</stype>
        <status>Active</status>
        <parent>IT</parent>
        <location>/IT/BIOMED</location>
    </report_row>
</report>

my current xslt looks like this

<?xml version="1.0" encoding="ISO-8859-1"?>
<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"  xmlns:exslt="http://exslt.org/common">
<xsl:output method="xml" indent="yes"/>

<xsl:template match="/">
    <report>
        <xsl:apply-templates select="/report/report_row"/>          
    </report>
</xsl:template>

<xsl:template match="/report/report_row">
<report_row>
    <xsl:apply-templates select="c1"/>
    <xsl:apply-templates select="c2"/>
    <xsl:apply-templates select="c3"/>
    <xsl:apply-templates select="c4"/>
    <xsl:apply-templates select="c5"/>
    <xsl:apply-templates select="c6"/>
    <xsl:apply-templates select="c7"/>
    <xsl:apply-templates select="c8"/>
    <xsl:apply-templates select="c9"/>
</report_row>
</xsl:template> 

<xsl:template match="c1">   
    <xsl:element name="{/report/report_header/c1}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
<xsl:template match="c2">   
    <xsl:element name="{/report/report_header/c2}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template> 
<xsl:template match="c3">   
    <xsl:element name="{/report/report_header/c3}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
<xsl:template match="c4">   
    <xsl:element name="{/report/report_header/c4}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template> 
<xsl:template match="c5">   
    <xsl:element name="{/report/report_header/c5}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template> 
<xsl:template match="c6">   
    <xsl:element name="{/report/report_header/c6}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
<xsl:template match="c7">   
    <xsl:element name="{/report/report_header/c7}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
<xsl:template match="c8">   
    <xsl:element name="{/report/report_header/c8}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
<xsl:template match="c9">   
    <xsl:element name="{/report/report_header/c9}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>

</xsl:transform>

My transform works if I assume a maximal number of columns and the column headers can be legal qnames.

It started failing when I got more than the 100 limit I had assumed and column headers with spaces in them.

How do I create a transform that uses wildcards instead and how do I strip spaces and illegal characters from the column headers to make them legal qnames?

Thanks


This transformation:

<xsl:stylesheet version="1.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <xsl:output omit-xml-declaration="yes" indent="yes"/>
 <xsl:strip-space elements="*"/>

 <xsl:variable name="vAlphanum" select=
  "concat('ABCDEFGHIJKLMNOPQRSTUVWXYZ',
          'abcdefghijklmnopqrstuvwxyz',
          '_0123456789'
         )
  "/>

 <xsl:variable name="vReps" select=
  "'_____________________________________'"/>
 <xsl:key name="kColNameByCode"
       match="report_header/*/text()"
       use="name(..)"/>

 <xsl:template match="node()|@*">
  <xsl:copy>
   <xsl:apply-templates select="node()|@*"/>
  </xsl:copy>
 </xsl:template>

 <xsl:template match="report_row/*">
  <xsl:variable name="vNameText" select=
   "key('kColNameByCode', name())"/>

  <xsl:variable name="vElName" select=
  "translate($vNameText,
             translate($vNameText,$vAlphanum,''),
             $vReps)
  "/>
  <xsl:element name="{$vElName}">
    <xsl:value-of select="."/>
  </xsl:element>
 </xsl:template>
 <xsl:template match="report_header"/>
</xsl:stylesheet>

when applied to the provided XML document:

<report>
    <report_header>
        <c1>desc</c1>
        <c2>pr name</c2>
        <c3>pr num</c3>
        <c4>cdate</c4>
        <c5>phase</c5>
        <c6>stype</c6>
        <c7>status</c7>
        <c8>parent</c8>
        <c9>location</c9>
    </report_header>
    <report_row>
        <c1></c1>
        <c2>IT Project Message Validation</c2>
        <c3>IT-0000021</c3>
        <c4>12/14/2010 09:56 AM</c4>
        <c5>Preparation</c5>
        <c6>IT Projects</c6>
        <c7>Active</c7>
        <c8>IT</c8>
        <c9>/IT/BIOMED</c9>
    </report_row>
    <report_row>
        <c1></c1>
        <c2>David, Michael John Morning QA Test</c2>
        <c3>IT-0000020</c3>
        <c4>12/14/2010 08:12 AM</c4>
        <c5>Preparation</c5>
        <c6>IT Projects</c6>
        <c7>Active</c7>
        <c8>IT</c8>
        <c9>/IT/BIOMED</c9>
    </report_row>
</report>

produces the wanted, correct result:

<report>
   <report_row>
      <desc/>
      <pr_name>IT Project Message Validation</pr_name>
      <pr_num>IT-0000021</pr_num>
      <cdate>12/14/2010 09:56 AM</cdate>
      <phase>Preparation</phase>
      <stype>IT Projects</stype>
      <status>Active</status>
      <parent>IT</parent>
      <location>/IT/BIOMED</location>
   </report_row>
   <report_row>
      <desc/>
      <pr_name>David, Michael John Morning QA Test</pr_name>
      <pr_num>IT-0000020</pr_num>
      <cdate>12/14/2010 08:12 AM</cdate>
      <phase>Preparation</phase>
      <stype>IT Projects</stype>
      <status>Active</status>
      <parent>IT</parent>
      <location>/IT/BIOMED</location>
   </report_row>
</report>

Do note:

  1. The transformation successfully converts any text with any number of different non-alphanumeric characters to a syntactically-correct XML name.

  2. Efficiency is achieved using keys.


I know this has already been answered, but I figure I'd include a StAX version in ColdFusion, seeing as the question was originally tagged as such. Will serve posterity if bitten by OoM errors using XSLT:

<!--- see: http://today.java.net/pub/a/today/2006/07/20/introduction-to-stax.html --->
<cfset XMLOutputFactory = createObject("java", "javax.xml.stream.XMLOutputFactory").newInstance()>
<cfset fos = createObject("java", "java.io.FileOutputStream").init("#getDirectoryFromPath(getCurrentTemplatePath())#/destination.xml")>
<cfset bos = createObject("java", "java.io.BufferedOutputStream").init(fos)>
<cfset writer = XMLOutputFactory.createXMLStreamWriter(bos)>

<cfset fis = createObject("java", "java.io.FileInputStream").init("#getDirectoryFromPath(getCurrentTemplatePath())#/source.xml")>
<cfset bis = createObject("java", "java.io.BufferedInputStream").init(fis)>
<cfset XMLInputFactory = createObject("java", "javax.xml.stream.XMLInputFactory").newInstance()>
<cfset reader = XMLInputFactory.createXMLStreamReader(bis)>

<cfset headers = {}>
<cfset isHeaderRow = true>

<cfloop condition="#reader.hasNext()#">
    <cfset event = reader.next()>
    <cfif event EQ reader.START_ELEMENT>
        <cfswitch expression="#reader.getLocalName()#">
            <cfcase value="report">
                <cfset isHeaderRow = false>
                <cfset writer.writeStartElement(reader.getLocalName())>
            </cfcase>
            <cfcase value="report_header">
                <cfset isHeaderRow = true>
            </cfcase>
            <cfcase value="report_row">
                <cfset writer.writeStartElement(reader.getLocalName())>
                <cfset isHeaderRow = false>
            </cfcase>
            <!--- cX node --->
            <cfdefaultcase>
                <cfif isHeaderRow>
                    <!--- alphanumerics and underscores only --->
                    <cfset headers[reader.getLocalName()] = rereplacenocase(reader.getElementText(), "[^A-Z0-9\_]*", "", "all")>
                <cfelse>
                    <!--- getElementText --->
                    <cfset writer.writeStartElement(headers[reader.getLocalName()])>
                    <cfset writer.writeCharacters(reader.getElementText())>
                    <cfset writer.writeEndElement()>
                </cfif>
            </cfdefaultcase>
        </cfswitch>
    <cfelseif event EQ reader.END_ELEMENT>
        <cfif isHeaderRow><cfcontinue/></cfif>
        <cfset writer.writeEndElement()>
    </cfif>
</cfloop>

<cfset reader.close()>

<cfset writer.flush()>
<cfset writer.close()>

<!--- don't do this w/a large file b/c you'll get an OOM error --->
<cffile action="read" file="#getDirectoryFromPath(getCurrentTemplatePath())#/destination.xml" variable="transformed">
<cfdump var="#transformed#">


You can use the translate function to strip undesired characters from the name.

You can use regular wildcard templates to match any child element; use a mode to prevent this to get in the way of the regular wildcard template. And you can use the local-name function to lookup an element by element name.

<?xml version="1.0" encoding="ISO-8859-1"?>
<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"  xmlns:exslt="http://exslt.org/common">
<xsl:output method="xml" indent="yes"/>

<xsl:template match="/">
    <report>
        <xsl:apply-templates select="/report/report_row"/>          
    </report>
</xsl:template>

<xsl:template match="/report/report_row">
<report_row>
    <xsl:apply-templates mode="wildcard"/>
</report_row>
</xsl:template> 

<xsl:template match="*" mode="wildcard">
    <xsl:variable name="elemname" select="local-name()"/>
    <xsl:variable name="elemcontent" select="/report/report_header/*[local-name()=$elemname]"/>
    <xsl:element name='{translate($elemcontent," ()","")}'><xsl:value-of select="current()"/></xsl:element>
</xsl:template>

</xsl:transform>


Consider the following stylesheet:

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" indent="yes"/>
<xsl:strip-space elements="*"/>

<xsl:key name="getHeaderText" match="report_header/*" use="local-name()"/>

<xsl:template match="node() | @*">
    <xsl:copy>
        <xsl:apply-templates select="node() | @*"/>        
    </xsl:copy>
</xsl:template>

<xsl:template match="report_row/*">
    <xsl:element name="{ translate( key('getHeaderText', local-name()), ' ', '_') }">        
        <xsl:apply-templates/>
    </xsl:element>
</xsl:template>

<xsl:template match="report_header"/>

</xsl:stylesheet>

Applied to an XML with spaces in headers:

<report>
    <report_header>
        <c1>desc</c1>
        <c2>pr name</c2>
        <c3>pr num</c3>
        <c4>cdate</c4>
        <c5>phase</c5>
        <c6>stype</c6>
        <c7>status</c7>
        <c8>parent</c8>
        <c9>location</c9>
    </report_header>
    <report_row>
        <c1></c1>
        <c2>IT Project Message Validation</c2>
        <c3>IT-0000021</c3>
        <c4>12/14/2010 09:56 AM</c4>
        <c5>Preparation</c5>
        <c6>IT Projects</c6>
        <c7>Active</c7>
        <c8>IT</c8>
        <c9>/IT/BIOMED</c9>
    </report_row>
    <report_row>
        <c1></c1>
        <c2>David, Michael John Morning QA Test</c2>
        <c3>IT-0000020</c3>
        <c4>12/14/2010 08:12 AM</c4>
        <c5>Preparation</c5>
        <c6>IT Projects</c6>
        <c7>Active</c7>
        <c8>IT</c8>
        <c9>/IT/BIOMED</c9>
    </report_row>
</report>

It produces this result:

<report>
    <report_row>
       <desc/>
       <pr_name>IT Project Message Validation</pr_name>
       <pr_num>IT-0000021</pr_num>
       <cdate>12/14/2010 09:56 AM</cdate>
       <phase>Preparation</phase>
       <stype>IT Projects</stype>
       <status>Active</status>
       <parent>IT</parent>
       <location>/IT/BIOMED</location>
    </report_row>
    <report_row>
       <desc/>
       <pr_name>David, Michael John Morning QA Test</pr_name>
       <pr_num>IT-0000020</pr_num>
       <cdate>12/14/2010 08:12 AM</cdate>
       <phase>Preparation</phase>
       <stype>IT Projects</stype>
       <status>Active</status>
       <parent>IT</parent>
       <location>/IT/BIOMED</location>
    </report_row>
</report>
0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜