开发者

Transforming XML so that all its elements and attributes effectively become "minOccurs 1"

I would like to transform a complex XML element so that its structure becomes "regular" - all subelements and attributes present in this element anywhere will be present in 100% of its nodes.

It's probably easier t开发者_Go百科o show what I mean...

Sample input:

<product>

    <size name="S"/>            

    <size>
        <stock>10</stock>
    </size>

</product>

Desired output:

<product>

    <size name="S">    
        <stock/>
    </size>

    <size name="">
        <stock>10</stock>
    </size>

</product>

What happened:

The first size element was supplied with an empty subelement stock (because the second size element had one).

Attribute /size@name with an empty value was added to the second size subelement (because the first size element had one).

Preconditions:

  • Processed XML is unlikely to be big (no problem with using LINQ, caching all of it in memory etc.)

  • I don't know its XML schema in advance.


The code below should match your expectations.

Using this test.xml file as input

 <product xmlns="http://www.example.com/schemas/v0.1">

    <size name="S"/>

    <size>
      <stock>
        <a.el a.att="a.value"/>
      </stock>
    </size>

    <size>
      <stock>
        10
        <b.el b.att="b.value"/>
      </stock>
    </size>

    <size size.att="size.value" name="e">
      <stock>
        12
        <b.el b.att2="b.value2"/>
      </stock>
    </size>

  </product>

This generates the following valid and normalized output

<product xmlns="http://www.example.com/schemas/v0.1">
  <size name="S" size.att="">
    <stock>
      <a.el a.att=""></a.el>
      <b.el b.att="" b.att2=""></b.el>
    </stock>
  </size>
  <size name="" size.att="">
    <stock>
      <a.el a.att="a.value" />
      <b.el b.att="" b.att2=""></b.el>
    </stock>
  </size>
  <size name="" size.att="">
    <stock>
        10
        <b.el b.att="b.value" b.att2="" /><a.el a.att=""></a.el></stock>
  </size>
  <size size.att="size.value" name="e">
    <stock>
        12
        <b.el b.att2="b.value2" b.att="" /><a.el a.att=""></a.el></stock>
  </size>
</product>

using System;
using System.Collections.Generic;
using System.Xml;
using System.Xml.Linq;
using System.Xml.XPath;

namespace ConsoleApplication1
{
    class Program
    {
        static void Main(string[] args)
        {
            XDocument xDoc = XDocument.Load("test.xml");
            XNamespace ns = xDoc.Root.Name.Namespace;

            var mgr = new XmlNamespaceManager(new NameTable());
            mgr.AddNamespace("ns", ns.ToString());

            var elements = xDoc.XPathSelectElements("/ns:product/ns:size", mgr);

            Descriptor desc = Descriptor.InferFrom(elements);

            desc.Normalize(elements);
            Console.Write(xDoc.ToString());
        }
    }

    public class Descriptor
    {
        private readonly IList<XName> _attributeNames = new List<XName>();
        private readonly IDictionary<XName, Descriptor> _elementDescriptors = new Dictionary<XName, Descriptor>();

        public XName Name { get; private set; }
        public IEnumerable<XName> AttributeNames { get { return _attributeNames; } }
        public IEnumerable<KeyValuePair<XName, Descriptor>> ElementDescriptors { get { return _elementDescriptors;  } }

        private void UpdateNameFrom(XElement element)
        {
            if (Name == null)
            {
                Name = element.Name;
                return;
            }

            if (element.Name == Name)
                return;

            throw new InvalidOperationException();
        }

        private void Add(XAttribute att)
        {
            XName name = att.Name;
            if (_attributeNames.Contains(name))
                return;

            _attributeNames.Add(name);
        }

        public static Descriptor InferFrom(IEnumerable<XElement> elements)
        {
            var desc = new Descriptor();

            foreach (var element in elements)
                InferFromInternal(element, desc);

            return desc;
        }

        private static void InferFromInternal(XElement element, Descriptor desc)
        {
            desc.UpdateNameFrom(element);

            foreach (var att in element.Attributes())
                desc.Add(att);

            foreach (var subElement in element.Elements())
                desc.Add(subElement);
        }

        private void Add(XElement subElement)
        {
            Descriptor desc;
            if (_elementDescriptors.ContainsKey(subElement.Name))
                desc = _elementDescriptors[subElement.Name];
            else
            {
                desc = new Descriptor();
                _elementDescriptors.Add(subElement.Name, desc);
            }

            InferFromInternal(subElement, desc);
        }

        public void Normalize(IEnumerable<XElement> elements)
        {
            foreach (var element in elements)
                NormalizeInternal(element);
        }

        private void NormalizeInternal(XElement element)
        {
            if (element.Name != Name)
                throw new InvalidOperationException();

            foreach (var attribute in AttributeNames)
            {
                var att = element.Attribute(attribute);

                if (att != null)
                    continue;

                element.Add(new XAttribute(attribute, string.Empty));
            }

            foreach (var attribute in _elementDescriptors)
            {
                XElement el = element.Element(attribute.Key);

                if (el == null)
                {
                    el = new XElement(attribute.Key, string.Empty);
                    element.Add(el);
                }

                attribute.Value.NormalizeInternal(el);
            }
        }
    }
}
0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜