Creating a YQL Opentable to combine RSS data with Placemaker Map info
I am trying to retrieve data from a RSS feed (succeeding with this part) and then using the description, title and caption to be used to get Geo information (latitude, longitude, woeId) using YQL Placemaker open datatable.
This is all then needed to be output as JSON.
The permalink to test it in the YQL console is here.
Any ideas whats wrong with my xml code or what should I try instead?
<meta>
<author>Yahoo! Inc.</author>
<documentationURL>http://i-magine.mobi/</documentationURL>
<sampleQuery>select * from {table} where section="topstories" and description matches ".*jpg.*"</sampleQuery>
<description>Searches Yahoo.news RSS</description>
</meta>
<bindings>
<select itemPath="" produces="XML">
<urls>
<url>http://rss.news.yahoo.com/rss/topstories/{section}
</url>
</urls>
<inputs>
<key id='section' type="xs:string" paramType="path" required="true" />
</inputs>
<execute><![CDATA[
default xml namespace = "http://where.yahooapis.com/v1/schema.rng";
// http://www.json.org/json2.js
y.include('http://www.i-magine.mobi/json2.js');
rssStorySection = [];
rssStoryNumber = [];
rssStoryTitle = [];
rssStorySummary = [];
rssImageCaption = [];
rssStoryUrl = [];
rssImageUrl = [];
rssGeoText = [];
// var content = 'They followed him to deepest Africa after Brussels and Tokyo and found him there in Timbuktu';
var rssQuery = 'select * from rss where url = ' + "'" + 'http://rss.news.yahoo.com/rss/' + section + "'" + ' and description matches ".*jpg.*" limit 30';
res1 = y.query(rssQuery);
data1 = res1.results;
// var geoQuery = 'SELECT * FROM geo.placemaker WHERE documentContent =' + '"' + content + '"' + 'AND documentType=' + '"' + 'text/plain' + '"';
// res2 = y.query(geoQuery);
// data2 = res2.results;
for (var c=0;c<data1.item.length;c++)
{
var story = dat开发者_开发问答a1.item[c];
var storyTitleText0 = story.title + "";
var storyUrl = story.link + "";
var description = story.description;
var storyTitleText = storyTitleText0.replace(/\n/ig, "") + "";
var imageUrl = description.match(/http:\/\/d.*?jpg/i) + "";
var imageCaptionText0 = description.match(/alt="([^ ]).*border/) + "";
var imageCaptionText1 = imageCaptionText0.replace(/alt="/ig, "") + "";
var imageCaptionText = imageCaptionText1.replace(/" border.*/ig, "") + "";
var storySummaryText = description.replace(/<[^>]*>([\s]?)*<[^>]*>/g, "") + "";
var storySection0 = description.match(/http[^ ].*\*/i) + "";
var storySection1 = storySection0.replace(/\/\*/ig, "") + "";
var storySection = storySection1.replace(/http:\/\/us.rd.yahoo.com\/dailynews\/rss\//ig, "") + "";
var geoString = (imageCaptionText + " " + storyTitleText + " " + storySummaryText);
rssStorySection.push(storySection);
rssStoryTitle.push(storyTitleText);
rssStorySummary.push(storySummaryText);
rssImageCaption.push(imageCaptionText);
rssStoryUrl.push(storyUrl);
rssImageUrl.push(imageUrl);
rssGeoText.push(geoString);
rssStoryNumber.push(c);
var content = geoString;
var geoQuery = 'SELECT * FROM geo.placemaker WHERE documentContent =' + '"' + content + '"' + 'AND documentType=' + '"' + 'text/plain' + '"';
var res2 = y.query(geoQuery);
var data2 = res2.results;
}
var d = data1;
var e = data2;
response.object = <stories>
<c>{section}</c>
<d>{d}</d>
<e>{e}</e>
</stories>;
]]></execute>
</select>
</bindings>
</table>
A tidied up (and "working" in the sense that it brings back RSS+Placemaker results) version of your table looks like:
<?xml version="1.0" encoding="UTF-8" ?>
<table xmlns="http://query.yahooapis.com/v1/schema/table.xsd">
<meta>
<author>Peter Cowburn</author>
<documentationURL>http://stackoverflow.com/questions/6168564/creating-a-yql-opentable-to-combine-rss-data-with-placemaker-map-info</documentationURL>
<sampleQuery>select * from {table} where section='topstories'</sampleQuery>
<description>Searches Yahoo! News RSS and injects Placemaker Places</description>
</meta>
<bindings>
<select itemPath="stories.story" produces="XML">
<urls>
<url>
http://rss.news.yahoo.com/rss/{section}
</url>
</urls>
<inputs>
<key id="section" type="xs:string" paramType="path" required="true" />
</inputs>
<execute><![CDATA[
// Fetch top 30 feed items with jpg images
var feed = y.query(
'select * from rss where url=@url and description matches ".*jpg.*" limit 30',
{url: request.url}
).results;
// Build geo queries
var placeQuery = 'select * from geo.placemaker where documentContent=@text and documentType="text/plain"';
var placeQueries = [];
var title, description, caption, summary, content;
for each (result in feed.item) {
title = result.title.text().toString();
description = y.tidy(result.description.toString()).body.p;
caption = description.a.img.@alt.toString();
summary = description..*.text().toString();
content = caption + " " + title + " " + summary;
placeQueries.push({
query: y.query(placeQuery, {text: content}),
item: result,
results: null
});
}
// Execute all queries
var where = new Namespace('http://wherein.yahooapis.com/v1/schema');
var matches, match, places = [];
for (var q in placeQueries) {
matches = placeQueries[q].query.results.matches.match;
placeQueries[q].results = matches..where::place;
}
// Build response object
var stories = <stories/>;
for each (q in placeQueries) {
stories.node += <story>
{q.item}
{q.results}
</story>;
}
response.object = stories;
]]></execute>
</select>
</bindings>
</table>
You can use it by pointing to the table online (it may not be around forever!) in a query like:
use 'https://raw.github.com/salathe/yql-tables/so-6168564/yahoo/newswithplaces.xml'
as rssplaces;
select * from rssplaces where section='topstories';
(Try this in the YQL console)
The table uses some of the features available in <execute>
blocks like E4X, query parameters and parallel queries which all make life easier but may be slightly foreign at first glance.
P.S. The above is offered as-is, I'm not going to be bending over backwards to field "support" questions on this. It is primarily intended as something to get you moving, an introduction to an approach which might work for you.
精彩评论