From 975cb3cf1620127530635ef2c7128db8e40f1ae0 Mon Sep 17 00:00:00 2001 From: Davor Spasovski Date: Tue, 22 Apr 2014 12:35:42 -0400 Subject: [PATCH] add fetchdb task ported for commonplace --- .gitignore | 4 + Gruntfile.js | 39 +++++ hearth/db/archives/.git-keep | 0 hearth/downloads/icons/.git-keep | 0 hearth/downloads/screenshots-thumbs/.git-keep | 0 lib/db-transformer.js | 151 ++++++++++++++++++ lib/db.js | 129 +++++++++++++++ lib/utils.js | 33 ++++ package.json | 9 +- settings.js | 18 +++ 10 files changed, 382 insertions(+), 1 deletion(-) create mode 100644 Gruntfile.js create mode 100644 hearth/db/archives/.git-keep create mode 100644 hearth/downloads/icons/.git-keep create mode 100644 hearth/downloads/screenshots-thumbs/.git-keep create mode 100644 lib/db-transformer.js create mode 100644 lib/db.js create mode 100644 lib/utils.js create mode 100644 settings.js diff --git a/.gitignore b/.gitignore index 1d04e83f..fac62e39 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,7 @@ hearth/media/include.css hearth/media/include.js hearth/media/js/include.js hearth/templates.js +hearth/db/**/*.json +hearth/downloads/**/*.gif +hearth/downloads/**/*.jp* +hearth/downloads/**/*.png diff --git a/Gruntfile.js b/Gruntfile.js new file mode 100644 index 00000000..1ebe1d47 --- /dev/null +++ b/Gruntfile.js @@ -0,0 +1,39 @@ +var db = require('./lib/db'); +var utils = require('./lib/utils'); +var settings = require('./settings'); + +module.exports = function(grunt) { + grunt.initConfig({ + fetchdb: { + options: { + data_dest: settings.db_dir + '/latest.json' + } + } + }); + + // Always show stack traces when Grunt prints out an uncaught exception. + grunt.option('stack', true); + + grunt.registerTask('fetchdb', 'Fetches JSON from API, downloads ' + + 'icons/screenshots, and transforms data to ' + + 'static JSON file to disk', function() { + var done = this.async(); + var options = this.options(); + db.fetchLatest(options.data_dest).then(function() { + grunt.log.writeln( + 'File ' + utils.color('cyan', options.data_dest) + ' created.'); + done(); + }, function(err) { + grunt.log.writeln(utils.color('red', + 'File ' + options.file_dest + ' failed to be created: ' + err)); + done(); + }).catch(function(err) { + grunt.log.writeln(utils.color('red', 'lib/db failed: ' + err)); + done(); + }); + + }); + + grunt.registerTask('default', ['fetchdb']); +}; + diff --git a/hearth/db/archives/.git-keep b/hearth/db/archives/.git-keep new file mode 100644 index 00000000..e69de29b diff --git a/hearth/downloads/icons/.git-keep b/hearth/downloads/icons/.git-keep new file mode 100644 index 00000000..e69de29b diff --git a/hearth/downloads/screenshots-thumbs/.git-keep b/hearth/downloads/screenshots-thumbs/.git-keep new file mode 100644 index 00000000..e69de29b diff --git a/lib/db-transformer.js b/lib/db-transformer.js new file mode 100644 index 00000000..bdf3fffd --- /dev/null +++ b/lib/db-transformer.js @@ -0,0 +1,151 @@ +var fs = require('fs'); +var path = require('path'); + +var _ = require('lodash'); +var request = require('request'); +var Promise = require('es6-promise').Promise; + +var utils = require('../lib/utils'); + + +function generateFilename(url) { + // Generates a pretty filename from a remote URL, turning + // `97310.png?modified=1366438278` to `97310.png`. + return path.basename(url).split('?')[0]; +} + +module.exports = function(settings, data) { + return new Promise(function(resolveDB, rejectDB) { + var images = {}; + + data = data.apps.map(function(app) { + app._id = app.id; + app.icon = app.icons['64']; + + // Collect a list of image URLs (to later download to disk). + // key = URL, value = directory name + images[app.icon] = 'icons'; + if (app.previews.length > 0) { + images[app.previews[0].thumbnail_url] = 'screenshots-thumbs'; + } + + // Flatten object of localised name to one key for easy searching. + app.name_search = []; + Object.keys(app.name).forEach(function(locale) { + app.name_search.push(app.name[locale]); + }); + app.name_search = app.name_search.join(' ').replace(/\(|\)/g, ''); + + // Flatten object of localised name to one key for easy searching. + app.description_search = []; + Object.keys(app.description).forEach(function(locale) { + app.description_search.push(app.description[locale]); + }); + app.description_search = app.description_search.join(' ').replace(/\(|\)/g, ''); + + // Transform from Zamboni categories to Tarako categories. + // TODO: figure out a way to use transformMap from src/media/js/categories.js + // so we don't need to dupe this constant. + var categoryMap = { + 'games': 'games', + 'utilities': 'tools', + 'reference': 'tools', + 'productivity': 'tools', + 'education': 'tools', + 'business': 'tools', + 'travel': 'lifestyle', + 'sports': 'lifestyle', + 'social': 'lifestyle', + 'shopping': 'lifestyle', + 'photo-video': 'lifestyle', + 'news-weather': 'lifestyle', + 'music': 'lifestyle', + 'maps-navigation': 'lifestyle', + 'lifestyle': 'lifestyle', + 'health-fitness': 'lifestyle', + 'entertainment': 'lifestyle', + 'books': 'lifestyle' + }; + var zcategories = app.categories; + app.categories = []; + _.forEach(zcategories, function (zcat) { + var dcat = categoryMap[zcat]; + if (app.categories.indexOf(dcat) === -1) { + app.categories.push(dcat); + } + }); + + return _.pick(app, [ + '_id', + 'author', + 'categories', + 'content_ratings', + 'description', + 'description_search', + 'icon', + 'is_packaged', + 'homepage', + 'manifest_url', + 'name', + 'name_search', + 'previews', + 'privacy_policy', + 'ratings', + 'slug', + 'status', + 'support_email', + 'support_url', + ]); + }); + + console.log('Transformed data'); + + console.log('Fetching images to save to disk'); + + var promises = []; + + _.uniq(Object.keys(images)).forEach(function(url) { + promises.push(new Promise(function(resolve, reject) { + console.log('Fetching', url); + var fn = path.join(settings.downloads_dir, images[url], generateFilename(url)); + + // Update filename. + images[url] = path.relative(settings.frontend_dir, fn); + + var req = request(url).pipe(fs.createWriteStream(fn)); + var body = ''; + req.on('data', function (data) { + body += data; + }); + req.on('close', function() { + // Cachebust these URLs with `?h=`. + var hash = utils.computeHash(body); + images[url] = utils.cachebust(images[url], hash); + resolve(); + }).on('error', function() { + reject(); + }); + })); + }); + + Promise.all(promises).then(function() { + data = data.map(function (app) { + app.icon = path.join('/', images[app.icon]); + if (app.previews.length > 0) { + app.previews[0].thumbnail_url = path.join('/', images[app.previews[0].thumbnail_url]); + } + return app; + }); + + fs.writeFile(settings.appcache_media, + JSON.stringify(_.values(images).sort(), null, 2)); + + console.log('Successfully saved all images to disk'); + resolveDB(data); + }, function(err) { + console.error('Failed to save images to disk:', err); + rejectDB(err); + }); + }); +}; + diff --git a/lib/db.js b/lib/db.js new file mode 100644 index 00000000..a5fc74f6 --- /dev/null +++ b/lib/db.js @@ -0,0 +1,129 @@ +var fs = require('fs'); +var path = require('path'); + +var Promise = require('es6-promise').Promise; +var request = require('request'); + +var settings = require('../settings'); +var utils = require('../lib/utils'); + + +var db_dir = path.join(__dirname, '..', settings.db_dir); + + +module.exports.fetch = fetch = function fetch(dest, preloaded) { + return new Promise(function (resolve, reject) { + var now = Date.now(); + + var fnOriginal = path.join(db_dir, 'original.json'); + var fnTransformed = dest; + var fnArchivedOriginal = path.join(db_dir, 'archives', now + '-original.json'); + var fnPreloadedHash = path.join(db_dir, 'preloaded-hash.json'); + + request(settings.db_url, function (err, res, body) { + if (err) { + console.error(err); + reject(err); + return; + } + + fs.writeFile(fnOriginal, body); + fs.writeFile(fnArchivedOriginal, body); + + var bodyJSON = JSON.parse(body); + + settings.db_transformer(bodyJSON).then(function success(data) { + var bodyTransformed = JSON.stringify(data); + var hash = utils.computeHash(bodyTransformed); + var fnArchivedTransformed = path.join(db_dir, 'archives', hash + '.json'); + + var promises = [ + new Promise(function (resolveFile) { + fs.writeFile(fnTransformed, bodyTransformed, function () { + console.log('Successfully wrote database to disk', fnTransformed); + resolveFile(); + }) + }), + new Promise(function (resolveFile) { + fs.writeFile(fnArchivedTransformed, bodyTransformed, function () { + console.log('Successfully wrote archived database to disk', fnArchivedTransformed); + resolveFile(); + }); + }) + ]; + if (preloaded) { + promises.push(new Promise(function (resolveFile) { + fs.writeFile(fnPreloadedHash, hash, function () { + console.log('Successfully wrote database hash to disk', fnPreloadedHash); + resolveFile(); + }); + })); + } + Promise.all(promises).then(function () { + console.log('Successfully wrote all database files to disk'); + resolve(data); + }); + }, function error(err) { + console.error('settings.db_transformer rejected:', err); + reject(err); + }).catch(function (err) { + console.error('settings.db_transformer errored:', err); + reject(err); + }); + }); + }); +}; + +module.exports.fetchPreloaded = function fetchPreloaded(dest) { + return fetch(dest || path.join(db_dir, 'preloaded.json'), true); +}; + +module.exports.fetchLatest = function fetchLatest(dest) { + return new Promise(function (resolve, reject) { + var latest = fetch(dest || path.join(db_dir, 'latest.json')); + latest.then(function (latestDocs) { + // (1) Look up the hash of last DB file that was included in the appcache. + // (2) Find the archived DB file that matches that hash. + // (3) Read the file and remove the apps that were already present. + // (4) Write that file to disk: `latest-since-.json`. + // (5) The front end uses this static JSON file to fetch. + fs.readFile(path.join(db_dir, 'preloaded-hash.json'), function (err, hash) { + if (err) { + console.error(err); + reject(err); + return; + } + var fnPreviousDB = path.join(db_dir, 'archives', hash + '.json'); + fs.readFile(fnPreviousDB, function (err, previousDB) { + if (err) { + console.error(err); + reject(err); + return; + } + + var oldDocs = JSON.parse(previousDB); + + var idsToExclude = oldDocs.map(function (doc) { + return doc._id.toString(); + }); + + // Build an array of objects that weren't in the last DB that we appcached. + var newDocs = latestDocs.filter(function (doc) { + if (idsToExclude.indexOf((doc._id || '').toString()) === -1) { + return doc; + } + }); + + var fnLatestSince = path.join(db_dir, + 'latest-since-' + hash + '.json'); + fs.writeFile(fnLatestSince, JSON.stringify(newDocs), function () { + resolve(newDocs); + }); + }); + }); + }, function (err) { + console.log('reject'); + reject(err); + }); + }); +}; diff --git a/lib/utils.js b/lib/utils.js new file mode 100644 index 00000000..76f3efff --- /dev/null +++ b/lib/utils.js @@ -0,0 +1,33 @@ +var crypto = require('crypto'); + + +var colors = { + cyan: '\x1B[36m', + red: '\x1B[31m' +}; + +module.exports.color = function color(whichColor, text) { + return colors[whichColor] + text + '\x1B[39m'; +}; + +module.exports.cachebust = function cachebust(fn, hash) { + var extPos = fn.lastIndexOf('.'); + return fn.substr(0, extPos) + '.hash_' + hash + fn.substr(extPos); +}; + +module.exports.computeHash = function computeHash(contents) { + var hasher = crypto.createHash('sha256'); + hasher.update(contents, 'binary'); + return hasher.digest('hex').substr(0, 7); +}; + +module.exports.baseurl = function baseurl(url) { + return url.split('?')[0]; +}; + +module.exports.urlparams = function urlparams(url, qs) { + if (url.indexOf('?') === -1) { + return url + '?' + qs; + } + return url + '&' + qs; +}; diff --git a/package.json b/package.json index 1793cd12..15c4d4ab 100644 --- a/package.json +++ b/package.json @@ -13,5 +13,12 @@ "node": ">= 0.10.x", "npm": ">= 1.1.x" }, - "dependencies": {} + "dependencies": { + "request": "~2.34.0", + "es6-promise": "~0.1.1", + "lodash": "~2.4.1" + }, + "devDependencies": { + "grunt": "~0.4.4" + } } diff --git a/settings.js b/settings.js new file mode 100644 index 00000000..c3873920 --- /dev/null +++ b/settings.js @@ -0,0 +1,18 @@ +var dbTransformer = require('./lib/db-transformer'); + +var settings = { + debug: true, + db_url: 'https://marketplace-dev.allizom.org/api/v1/fireplace/collection/curated/?region=restofworld', + frontend_dir: 'hearth', + use_data_uris: true +}; + +settings.db_dir = settings.frontend_dir + '/db'; +settings.downloads_dir = settings.frontend_dir + '/downloads'; + +settings.db_transformer = function(data) { + return dbTransformer(settings, data); +}; + +module.exports = settings; +