开发者

Sitemap as folder structure

Hey guys, I'm looking for a way to show a sitemap as a folder structure.

So currently the sitemap looks like this:

  • http://mydomain.com/
  • http://mydomain.com/category
  • http://mydomain.com/category/product1
  • http://mydomain.com/category/product2
  • http://mydomain.com/other-category/product1
  • http://mydomain.com/other-category/product2

But instead, I'm looking for this:

  • http://mydomain.com/

    • category

      • product1
      • product2
    • other-category
      • product1
      • product2

Is there a convenient way to do this?

Sitemap code开发者_运维技巧

...
<url>
  <loc>http://mydomain.com</loc>
  <changefreq>weekly</changefreq>
  <priority>1.00</priority>
</url>
<url>
  <loc>http://mydomain.com/category</loc>
  <changefreq>weekly</changefreq>
  <priority>0.80</priority>
</url>
...

** XSLT Code **

...
<ul> 
  <xsl:for-each select="xna:urlset/xna:url"> 
    <li><xsl:value-of select="xna:loc"/></li> 
  </xsl:for-each> 
</ul> 
...


I. This XSLT 1.0 solution:

<xsl:stylesheet version="1.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
 xmlns:ext="http://exslt.org/common"
 exclude-result-prefixes="ext">

 <xsl:output omit-xml-declaration="yes" indent="yes"/>
 <xsl:strip-space elements="*"/>

 <xsl:key name="kLocByDomain" match="loc"
  use="word[1]"/>
 <xsl:key name="kLocByDomainAndCat" match="loc"
  use="concat(word[1], '+', word[2])"/>

 <xsl:key name="kLocByDomainCatProduct" match="loc"
  use="concat(word[1], '+', word[2], '+', word[3])"/>

 <xsl:template match="/*">
  <xsl:variable name="vrtfTokenized">
   <urls>
    <xsl:apply-templates/>
   </urls>
  </xsl:variable>

  <xsl:apply-templates mode="group"
   select="ext:node-set($vrtfTokenized)/*"/>
 </xsl:template>

 <xsl:template match="/*" mode="group">
  <h1>Sitemap</h1>

  <ul>
   <xsl:apply-templates mode="group" select=
    "loc[generate-id()
        =
         generate-id(key('kLocByDomain', word[1])[1])
        ]"/>
  </ul>
 </xsl:template>

 <xsl:template match="loc" mode="group">
  <li><xsl:value-of select="word[1]"/>
    <ul>
     <xsl:apply-templates mode="group2" select=
      "key('kLocByDomain', word[1])
             [generate-id()
             =
              generate-id(key('kLocByDomainAndCat',
                              concat(current()/word[1], '+', word[2])
                             )[1]
                          )
              ]"/>
    </ul>
  </li>
 </xsl:template>

 <xsl:template match="loc[word[2]]" mode="group2">
  <li><xsl:value-of select="word[2]"/>
    <ul>
     <xsl:apply-templates mode="group3" select=
      "key('kLocByDomainAndCat', concat(word[1], '+', word[2]))
             [generate-id()
             =
              generate-id(key('kLocByDomainCatProduct',
                              concat(current()/word[1], 
                                     '+', current()/word[2], 
                                     '+', word[3])
                             )[1]
                          )
              ]"/>
    </ul>
  </li>
 </xsl:template>

 <xsl:template match="loc[word[3]]" mode="group3">
  <li><xsl:value-of select="word[3]"/></li>
 </xsl:template>

 <xsl:template match="loc">
  <loc>
   <xsl:call-template name="tokenize">
    <xsl:with-param name="pText"
         select="substring-after(.,'http://')"/>
   </xsl:call-template>
  </loc>
 </xsl:template>

 <xsl:template name="tokenize">
  <xsl:param name="pText"/>

  <xsl:if test="string-length($pText)>0">
   <word>
    <xsl:value-of select=
     "substring-before(concat($pText,'/'), '/')"/>
   </word>
   <xsl:call-template name="tokenize">
    <xsl:with-param name="pText" select=
        "substring-after($pText,'/')"/>
   </xsl:call-template>
  </xsl:if>
 </xsl:template>

 <xsl:template match="text()"/>
 <xsl:template match="text()" mode="group2"/>
 <xsl:template match="text()" mode="group3"/>
</xsl:stylesheet>

when applied on the following XML document (based on the provided fragment):

<site>
    <url>
        <loc>http://mydomain.com/</loc>
        <changefreq>weekly</changefreq>
        <priority>1.00</priority>
    </url>
    <url>
        <loc>http://mydomain.com/category</loc>
        <changefreq>weekly</changefreq>
        <priority>0.80</priority>
    </url> ...
    <url>
        <loc>http://mydomain.com/category/product1</loc>
        <changefreq>weekly</changefreq>
        <priority>0.80</priority>
    </url> ...
    <url>
        <loc>http://mydomain.com/category/product2</loc>
        <changefreq>weekly</changefreq>
        <priority>0.80</priority>
    </url> ...
    <url>
        <loc>http://mydomain.com/other-category/product1</loc>
        <changefreq>weekly</changefreq>
        <priority>0.80</priority>
    </url> ...
    <url>
        <loc>http://mydomain.com/other-category/product2</loc>
        <changefreq>weekly</changefreq>
        <priority>0.80</priority>
    </url> ...
    <url>
        <loc>http://mydomain.com/other-category/product3</loc>
        <changefreq>weekly</changefreq>
        <priority>0.80</priority>
    </url> ...
    <url>
        <loc>http://mydomain2.com/other-category/product3</loc>
        <changefreq>weekly</changefreq>
        <priority>0.80</priority>
    </url> ...
</site>

produces the wanted, correct result:

    <h1>Sitemap</h1>
    <ul>
        <li>mydomain.com
            <ul>
                <li>category
                    <ul>
                        <li>product1</li>
                        <li>product2</li>
                    </ul></li>
                <li>other-category
                    <ul>
                        <li>product1</li>
                        <li>product2</li>
                        <li>product3</li>
                    </ul></li>
            </ul></li>
        <li>mydomain2.com
            <ul>
                <li>other-category
                    <ul>
                        <li>product3</li>
                    </ul></li>
            </ul></li>
    </ul>

--
and it looks like this in the browser:

Sitemap

  • mydomain.com
    • category
      • product1
      • product2
    • other-category
      • product1
      • product2
      • product3
  • mydomain2.com
    • other-category
      • product3

Explanation:

  1. The site map gropus different domains, inside them different categories and inside them different products.

  2. This is a two-pass solution.

  3. The first pass tokenizes each url. The tokens are represented by word elements.

  4. The second pass applies to the results of the first pass Muenchian grouping on keys containing one, then two, then three parts.

II. XSLT 2.0 solution

<xsl:stylesheet version="2.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <xsl:output omit-xml-declaration="yes" indent="yes"/>
 <xsl:strip-space elements="*"/>

 <xsl:template match="/*">
  <h1>Sitemap</h1>
  <ul>
   <xsl:for-each-group select="url/loc"
    group-by="tokenize(., '/')[3]"
   >
    <xsl:apply-templates select="."/>
   </xsl:for-each-group>
  </ul>
 </xsl:template>

 <xsl:template match="loc">
  <li><xsl:sequence select="tokenize(., '/')[3]"/>
   <ul>
    <xsl:for-each-group select=
     "current-group()[tokenize(., '/')[4]]"
     group-by="tokenize(., '/')[4]"
    >
     <xsl:apply-templates select="." mode="cat"/>
    </xsl:for-each-group>
   </ul>
  </li>
 </xsl:template>

 <xsl:template match="loc" mode="cat">
  <li><xsl:sequence select="tokenize(., '/')[4]"/>
   <ul>
    <xsl:for-each-group select=
     "current-group()[tokenize(., '/')[5]]"
     group-by="tokenize(., '/')[5]"
    >
     <xsl:apply-templates select="." mode="prod"/>
    </xsl:for-each-group>
   </ul>
  </li>
 </xsl:template>

 <xsl:template match="loc" mode="prod">
  <li><xsl:sequence select="tokenize(., '/')[5]"/></li>
 </xsl:template>
</xsl:stylesheet>

Explanation:

We use a number of XSLT 2.0 features that facilitate grouping:

  1. <xsl:for-each-group>

  2. current-group()


In this 1.0 transform categories are gathered just by using substring-before and substring-after.

To apply this transform to your case, you simply need to set your real domain in the xsl:param and within the xsl:key.

Note that your input xml is a fragment and it is not clear how you need namespace prefix managed. Therefore I've tested the transform on a sample XML without namespaces. If your source XML contains namespace prefixes the transform should be tuned.


XSLT 1.0 tested under Saxon 6.5.5

<xsl:stylesheet version="1.0"
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output method="xml" indent="yes"/>

    <xsl:param name="mydomain" select="'http://mydomain.com/'"/>
    <xsl:key name="urlbyloc" match="url" use="substring-before(substring-after(loc,'http://mydomain.com/'),'/')"/>

    <xsl:template match="/*">
        <ul>
            <li><xsl:value-of select="$mydomain"/>
                <ul>
                    <xsl:apply-templates select="url[generate-id()=generate-id(key('urlbyloc', substring-before(substring-after(loc,$mydomain),'/'))[1]) and position()!=1]"/>
                </ul>
            </li>
        </ul>
    </xsl:template>

    <xsl:template match="url">
        <li>
            <xsl:value-of select="key('urlbyloc', '')/loc[contains(text(),substring-before(substring-after(current()/loc,$mydomain),'/'))]"/>
            <ul>
                <xsl:apply-templates select="key('urlbyloc', substring-before(substring-after(loc,$mydomain),'/'))/loc"/>
            </ul>
        </li>
    </xsl:template>

    <xsl:template match="loc">
        <li><xsl:value-of select="."/></li>
    </xsl:template>

    <xsl:template match="changefreq|priority"/>
</xsl:stylesheet>

This transform applied on the following input:

<url-set>
    <url>
        <loc>http://mydomain.com</loc>
        <changefreq>weekly</changefreq>
        <priority>1.00</priority>
    </url>
    <url>
        <loc>http://mydomain.com/category</loc>
        <changefreq>weekly</changefreq>
        <priority>0.80</priority>
    </url>
    <url>
        <loc>http://mydomain.com/category/prod1</loc>
        <changefreq>weekly</changefreq>
        <priority>1.00</priority>
    </url>
    <url>
        <loc>http://mydomain.com/category/prod2</loc>
        <changefreq>weekly</changefreq>
        <priority>0.80</priority>
    </url>
    <url>
        <loc>http://mydomain.com/othercat</loc>
        <changefreq>weekly</changefreq>
        <priority>1.00</priority>
    </url>
    <url>
        <loc>http://mydomain.com/othercat/prod1</loc>
        <changefreq>weekly</changefreq>
        <priority>0.80</priority>
    </url>
    <url>
        <loc>http://mydomain.com/othercat/prod2</loc>
        <changefreq>weekly</changefreq>
        <priority>0.80</priority>
    </url>
</url-set>

Produces:

<ul>
   <li>http://mydomain.com/<ul>
         <li>http://mydomain.com/category<ul>
               <li>http://mydomain.com/category/prod1</li>
               <li>http://mydomain.com/category/prod2</li>
            </ul>
         </li>
         <li>http://mydomain.com/othercat<ul>
               <li>http://mydomain.com/othercat/prod1</li>
               <li>http://mydomain.com/othercat/prod2</li>
            </ul>
         </li>
      </ul>
   </li>
</ul>

Using XSLT 2.0 user-defined functions, we can make the transform yet more readable. Moreover, you will need to indicate your domain just in the initial parameters, as 2.0 xsl:key supports variable references.

XSLT 2.0 tested under Saxon-B 9.0.0.4J

<xsl:stylesheet version="2.0"
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:empo="http://stackoverflow.com/users/253811/empo"
    exclude-result-prefixes="empo">
    <xsl:output method="xml" indent="yes" omit-xml-declaration="yes"/>
    <xsl:param name="mydomain" select="'http://mydomain.com/'"/>

    <xsl:function name="empo:get-category">
        <xsl:param name="loc"/>
        <xsl:param name="mydomain"/>
    <xsl:value-of select="substring-before(substring-after($loc,$mydomain),'/')"/>
  </xsl:function>

    <xsl:key name="urlbyloc" match="url" use="empo:get-category(loc,$mydomain)"/>

    <xsl:template match="/*">
        <ul>
            <li><xsl:value-of select="$mydomain"/>
                <ul>
                    <xsl:apply-templates select="url[generate-id()=generate-id(key('urlbyloc', empo:get-category(loc,$mydomain))[1]) and position()!=1]"/>
                </ul>
            </li>
        </ul>
    </xsl:template>

    <xsl:template match="url">
        <li>
            <xsl:value-of select="key('urlbyloc', '')/loc[contains(text(),empo:get-category(current()/loc,$mydomain))]"/>
            <ul>
                <xsl:apply-templates select="key('urlbyloc',empo:get-category(loc,$mydomain))/loc"/>
            </ul>
        </li>
    </xsl:template>

    <xsl:template match="loc">
        <li><xsl:value-of select="."/></li>
    </xsl:template>

    <xsl:template match="changefreq|priority"/>

</xsl:stylesheet>


Here's a simple template that should do what you need:

  <xsl:template match="/">
    <xsl:apply-templates select="//url[1]" />
  </xsl:template>

  <xsl:template match="url">
    <xsl:param name="prefix" />
    <ul>
      <li><xsl:value-of select="substring-after(loc,$prefix)" /></li>
      <xsl:apply-templates select="../url[substring-after(loc,current()/loc) and not(contains(substring(substring-after(loc,current()/loc),2),'/'))]">
        <xsl:with-param name="prefix" select="concat(loc,'/')" />
      </xsl:apply-templates>
    </ul>
  </xsl:template>

The first template just picks your starting point; in this case assuming the first url in the document contains your root. Use whatever xpath you need here; //url[loc='http://mydomain.com'] would also do.

The second template does the work, simply outputting the current loc field, stripping off what came before it using substring-after. It then applies itself to any other url nodes whose loc field starts with the text in the current one, but does not have any further / characters.

0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜