Need help with a Linq XML conditional grouping query
I have the following xml fragment:
<BANNER ID="Banner 2" ROW_WIDTH="200">
<BANNER_TEXTS ID="BANNER_TEXTS">
<BANNER_TEXT UNDERLINE="false" SPAN_COL="1" WIDTHT="78px"></BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="3" WIDTHT="234px">Years In Practice</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="3" WIDTHT="234px">Internet Usage</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="4" WIDTHT="312px">Sales Reps Seen / Week</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="3" WIDTHT="234px">Prescription Volume</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="3" WIDTHT="222px">Patient Load</BANNER_TEXT>
</BANNER_TEXTS>
<BANNER_TEXTS ID="COLUMN_TEXTS">
<BANNER_TEXT UNDERLINE="true" SPAN_COL="1" WIDTHT="78px">Total</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="1" WIDTHT="78px">< 11 years</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="1" WIDTHT="78px">11-20 years</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="1" WIDTHT="78px">21-30 years</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="1" WIDTHT="78px">Light 1-5 hrs</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="1" WIDTHT="78px">Medium 6-10 hrs</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="1" WIDTHT="78px">Heavy >10 hrs</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="1" WIDTHT="78px">0</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="1" WIDTHT="78px">1-2</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="1" WIDTHT="78px">3-5</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="1" WIDTHT="78px">>5</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="1" WIDTHT="78px">1-100</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="1" WIDTHT="78px">101-150</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="1" WIDTHT="78px">>150</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="1" WIDTHT="74px">1-100</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="1" WIDTHT="74px">101-200</BANNER_TEXT>
<BANNER_TEXT UNDERLINE="true" SPAN_COL="1" WIDTHT="74px">>200</BANNER_TEXT>
</BANNER_TEXTS>
<BANNER_TEXTS ID="COLUMN_TEXTS">
<COLUMN_TEXT UNDERLINE="false" SPAN_COL="1">(A)</COLUMN_TEXT>
<COLUMN_TEXT UNDERLINE="false" SPAN_COL="1">(B)</COLUMN_TEXT>
<COLUMN_TEXT UNDERLINE="false" SPAN_COL="1">(C)</COLUMN_TEXT>
<COLUMN_TEXT UNDERLINE="false" SPAN_COL="1">(D)</COLUMN_TEXT>
<COLUMN_TEXT UNDERLINE="false" SPAN_COL="1">(E)</COLUMN_TEXT>
<COLUMN_TEXT UNDERLINE="false" SPAN_COL="1">(F)</COLUMN_TEXT>
<COLUMN_TEXT UNDERLINE="false" SPAN_COL="1">(G)</COLUMN_TEXT>
<COLUMN_TEXT UNDERLINE="false" SPAN_COL="1">(H)</COLUMN_TEXT>
<COLUMN_TEXT UNDERLINE="false" SPAN_COL="1">(I)</COLUMN_TEXT>
<COLUMN_TEXT UNDERLINE="false" SPAN_COL="1">(J)</COLUMN_TEXT>
<COLUMN_TEXT UNDERLINE="false" SPAN_COL="1">(K)</COLUMN_TEXT>
<COLUMN_TEXT UNDERLINE="false" SPAN_COL="1">(L)</COLUMN_TEXT>
<COLUMN_TEXT UNDERLINE="false" SPAN_COL="1">(M)</COLUMN_TEXT>
<COLUMN_TEXT UNDERLINE="false" SPAN_COL="1">(N)</COLUMN_TEXT>
<COLUMN_TEXT UNDERLINE="false" SPAN_COL="1">(O)</COLUMN_TEXT>
<COLUMN_TEXT UNDER开发者_运维技巧LINE="false" SPAN_COL="1">(P)</COLUMN_TEXT>
<COLUMN_TEXT UNDERLINE="false" SPAN_COL="1">(Q)</COLUMN_TEXT>
</BANNER_TEXTS>
</BANNER>
I would like to group all the 'BANNER_TEXT' in the second sequence using the first sequence 'BANNER_TEXT' as the key (only include elements where string is not null or empty). The span_col attribute in the first 'BANNER_TEXT' sequence indicates which elements by position in the 2nd sequence are related.
An example: 'Years in Practice' would be the first key and the attribute SPAN_COL=3 for that element indicates it would contain '< 11 years', '11-20 years', '21-30 years' (the first grouping of string.empty => Total would be skipped).
I've been able to come up with this:
IEnumerable<XElement> groupCats = child.Descendants("BANNER_TEXTS").ElementAt(0).Descendants("BANNER_TEXT");
var totals =
from s in groupCats
let span = int.Parse(s.Attribute("SPAN_COL").Value)
group s by s.Value into grouped
select new
{
GroupCategory = grouped.Key,
Categories = child.Descendants("BANNER_TEXTS").ElementAt(1).Descendants("BANNER_TEXT").Skip(1).Take(1)
};
I want to skip by the sum of the spans so far, and take by the span. I can't get the 'span' variable into the query as it is now.
Consider calculating a relatable value (position) as in the example below.
public class TopTitle
{
public int Span {get;set;}
public string Value {get;set;}
public int Position {get;set;}
}
public class SubTitle
{
public int Span {get;set;}
public string Value {get;set;}
public int Position {get;set;}
}
//
List<Title> Titles = GetTitles();
List<SubTitle> SubTitles = GetSubTitles();
int i = 0;
Titles.ForEach(t =>
{
t.Position = i;
i += t.Span;
}
i = 0;
SubTitles.ForEach(st =>
{
st.Position = i;
i += st.Span;
}
var query =
from t in Titles
let sts =
from st in SubTitles
where t.Position <= st.Position
&& st.Position < (t.Position + t.Span)
select st
select new {Title = t, SubTitles = sts.ToList()};
Maybe you could utilise the following by expanding on the criteria selection:
public class Grouping
{
public string Title { get; set; }
public string Criteria { get; set; }
}
XmlDocument xDoc = new XmlDocument();
var First_Sequence = (from b in XElement.Load("Banners.xml").Elements("BANNER_TEXTS").First().Elements("BANNER_TEXT")
where b.Value != ""
select b);
var Second_Sequence = (from b in XElement.Load("Banners.xml").Elements("BANNER_TEXTS").Skip(1).First().Elements("BANNER_TEXT")
where b.Value != "Total"
select b).ToList();
List<Grouping> groups = new List<Grouping>();
int i = 0;
foreach (var item in First_Sequence)
{
groups.Add(new Grouping { Title = item.Value, Criteria = (Second_Sequence.Skip(i).First().Value).ToString() });
i++;
}
This should iterate each sequence (the header sequence and the values sequence) once:
static IEnumerable<IGrouping<TFirst, TSecond>> Chunk<TFirst, TSecond>(
IEnumerable<TFirst> source,
IEnumerable<TSecond> toChunk,
Func<TFirst, int> chunkSizeSelector)
{
//error checking here
using (var chunkItems = toChunk.GetEnumerator())
{
foreach (var key in source)
{
List<TSecond> items = new List<TSecond>();
for (int itemsRemaining = chunkSizeSelector(key); itemsRemaining > 0; itemsRemaining--)
{
if (!chunkItems.MoveNext())
throw new ArgumentException("There are not enough items in toChunk to satisfy source.");
items.Add(chunkItems.Current);
}
yield return new ChunkGrouping<TFirst, TSecond>(key, items);
}
}
}
internal class ChunkGrouping<TKey, TElement> : IGrouping<TKey, TElement>
{
public ChunkGrouping(TKey key, IEnumerable<TElement> elements)
{
if (elements == null) throw new ArgumentNullException("elements");
_key = key;
_elements = elements;
}
private readonly TKey _key;
private readonly IEnumerable<TElement> _elements;
public TKey Key { get { return _key; } }
IEnumerator<TElement> IEnumerable<TElement>.GetEnumerator()
{
return _elements.GetEnumerator();
}
IEnumerator IEnumerable.GetEnumerator()
{
return _elements.GetEnumerator();
}
}
Then you can use it as:
foreach (var group in Chunk(child.Elements("BANNER_TEXTS").ElementAt(0).Elements(),
child.Elements("BANNER_TEXTS").ElementAt(1).Elements(),
xe => (int)xe.Attribute("SPAN_COL")))
{
//do stuff with the elements
}
I ended up using this:
foreach (XElement child in e.Elements("BANNER"))
{
IEnumerable<XElement> groups = child.Descendants("BANNER_TEXTS").ElementAt(0).Descendants("BANNER_TEXT");
var groupCats =
from s in groups
group s by s.Value into grouped
select new
{
GroupCategory = grouped.Key,
Categories = GetCategories(grouped.Key, child)
};
}
private IEnumerable<string> GetCategories(string key, XElement parent)
{
int span = parent.Descendants("BANNER_TEXTS").ElementAt(0).Descendants("BANNER_TEXT").Where(x => x.Value == key).Select(x => int.Parse(x.Attribute("SPAN_COL").Value)).FirstOrDefault();
IEnumerable<int> set = Series(key,parent.Descendants("BANNER_TEXTS").ElementAt(0).Descendants("BANNER_TEXT"));
int sum = set.Sum();
return parent.Descendants("BANNER_TEXTS").ElementAt(1).Descendants("BANNER_TEXT").Skip(sum).Take(span).Select(x => x.Value);
}
private static IEnumerable<int> Series(string key, IEnumerable<XElement> elements)
{
foreach (XElement item in elements)
{
if (item.Value != key)
{
yield return int.Parse(item.Attribute("SPAN_COL").Value);
}
else
{
break;
}
}
}
精彩评论