import the webfwd buzz feed generator

This commit is contained in:
Lloyd Hilaiel 2011-10-05 10:10:51 -06:00
Родитель ab123d5dd8
Коммит dd0772fb6c
3 изменённых файлов: 147 добавлений и 0 удалений

20
feed/README.md Normal file
Просмотреть файл

@ -0,0 +1,20 @@
This directory contains a little tiny script for updating the
"buzz feed" which pulls from three sources:
1. A local file for *events* (`./events.json`)
2. tumblr for *posts*
3. twitter search of `#webfwd` for *tweets*
`update_feed.js` is a little tiny node.js script that does all
of the work. view source for more details
## Deployment
1. install node.js
2. **use cron** to run `update_feed.js > latest.json` at some intervals
3. if you have a [site update system](http://trickyco.de/simple-site-publishing-with-git),
make sure you run update_feed.js immediately after updating your site code
4. fiddle your web server to ensure there are properly short lived cache headers
on `latest.json`
all done!

7
feed/events.json Normal file
Просмотреть файл

@ -0,0 +1,7 @@
[
{
"title": "Startup Week (Vienna, Austria)",
"when": "October, 3rd - 7th",
"link": "http://www.startupweek2011.com/"
}
]

120
feed/update_feed.js Executable file
Просмотреть файл

@ -0,0 +1,120 @@
#!/usr/bin/env node
const fs = require('fs'),
http = require("http"),
querystring = require("querystring"),
xml2js = require("xml2js"),
path = require("path");
function die(msg) {
process.stderr.write("fatal error:" + msg + "\n");
process.exit(1);
}
// Aggregate feeds from three sources:
// 1. on-disk events .json blob
// 2. twitter (search #webfwd)
// 3. http://webfwd.tumblr.com/rss
var feed = {};
// #1
fs.readFile(path.join(__dirname, "events.json"), function(err, data) {
if (err) die("reading ./events.json:" + err);
try {
feed.events = JSON.parse(data);
} catch(e) {
die("parse error in events.json");
}
allDone();
});
// #2
http.get({
host: "search.twitter.com",
path: "/search.json?" + querystring.stringify({q: "#webfwd"})
}, function (res) {
var body = "";
res.on('data', function(chunk) { body+=chunk; } )
.on('end', function() {
try {
var rs = JSON.parse(body).results;
feed.tweets = [];
for (var i = 0; i < rs.length; i++) {
if (feed.tweets.length > 3) break;
feed.tweets.push({
who: rs[i].from_user,
title: rs[i].text,
posted: rs[i].created_at,
link: "http://twitter.com/#!/"+rs[i].from_user+"/status/"+rs[i].id_str
});
}
allDone();
} catch(e) {
die("Error performing twitter search:" + e);
}
});
}).on('error', function(e) {
die("Error performing twitter search:" + e);
});
// #3
// tumblr has some odd redirection behavior that seems to be
// anti-robot oriented. From time to time we must follow redirects
// for the feed to continue working
var feedFetchTries = 0;
function tryFeedFetch(p) {
http.get({
host: "blog.webfwd.org",
path: p
}, function (res) {
if (res.statusCode === 302) {
if (++feedFetchTries >= 3) die('Too many redirects on tumblr');
tryFeedFetch(res.headers['location']);
return;
}
var body = "";
res.on('data', function(chunk) { body+=chunk; } )
.on('end', function() {
var parser = new xml2js.Parser();
parser.addListener('end', function(result) {
var items = result.channel.item;
// handle the case where there is only one
if (!Array.isArray(items)) items = [ items ];
feed.posts = [];
for (var i = 0; i < items.length; i++) {
if (feed.posts.length > 3) break;
feed.posts.push({
title: items[i].title,
posted: items[i].pubDate,
link: items[i].link
});
}
allDone();
});
parser.parseString(body);
});
}).on('error', function(e) {
die("Error fetching blog posts:" + e);
});
};
tryFeedFetch('/rss');
// when all done, we'll print
var numDone = 0;
function allDone() {
if (++numDone >= 3) {
console.log(JSON.stringify(feed, null, 4));
}
}
// print an error at exit time if we failed to read one of the three "feeds"
process.on('exit', function () {
if (numDone < 3) {
process.stderr.write("not all feeds read! only " + numDone + "/3\n");
numDone = 3; // infinite loop prevention!
process.exit(1);
}
});