Remove LICA worker and tests to get original set of tests passing

This commit is contained in:
Maxim Zhilyaev 2014-11-14 14:32:36 -08:00
Родитель 9179dfc815
Коммит 153a42dc9c
4 изменённых файлов: 0 добавлений и 278 удалений

Просмотреть файл

@ -1,46 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
"use strict";
const { getBaseDomain } = require('./utils/tld');
function parseURI(uri){
//Accepts: a URI string
//Returns: different parts of the URI
//extract different types of chunks
chunks = {
"protocol": "",
"subdomains": [],
"domain_name": "",
"path": [],
"filename": "",
"variables": {},
"shebang": ""
}
//this could be made more efficient with a letter-by-letter while loop
chunks['protocol'] = uri.split("://")[0]
chunks['domain_name'] = getBaseDomain(uri)
chunks['subdomains'] = uri.split(chunks['domain_name'])[0].split(chunks['protocol'])[1].split(".")
path_and_rest = uri.split(chunks['domain_name'])[1].split("?")
path_items = path_and_rest[0].split("/")
chunks['path'] = path_items.slice(0,-1)
chunks['filename'] = path_items.slice(-1)
if (path_and_rest.length > 1) {
variables = path_and_rest[1].split("&")
for (let vq of variables) {
vq = vq.split("=")
variables[vq[0]] = ""
if (vq.length > 1) {
variables[vq[0]] = vq[1]
}
}
}
return chunks
}

Просмотреть файл

@ -1,26 +0,0 @@
//description method
//URL for examples: https://www.www.e4hats.com/hats/ribbon-band-wool-hat-red.html?q=fedora
//Mozilla URL description language
// DEFINITIONS
// //ftp (the protocol)
// .www (the subdomain)
// /hats (in the path)
// -ribbon (in the filename)
// ?q (a variable name)
// =fedora (a variable value)
// ?q=fedora (specifically q equal to fedora)
// *wool (wool is anywhere in the URL)
// NGRAMS
// /real,estate (real followed by estate exists in the path)
// MULTIPLES
// .www{2} (www appears twice)
// .www{3,} (www appears more than three times)
// NEGATIVES
// !.www (www is not in the subdomain)

Просмотреть файл

@ -1,141 +0,0 @@
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
"use strict";
const {testUtils} = require("./helpers");
const {Cc, Ci, Cu, ChromeWorker} = require("chrome");
const OldPromise = require("sdk/core/promise");
Cu.import("resource://gre/modules/Services.jsm");
Cu.import("resource://gre/modules/NetUtil.jsm");
Cu.import("resource://gre/modules/Task.jsm");
let testKeywords = {
"__ANY": {
"mozilla": {
"technology & computing": 0.75,
"computer programming": 0.5
},
"baseball": {"baseball": 0.95},
"golf": {"golf": 0.9},
"open source": {"computer programming": 0.7},
"volkswagen,golf": {"automotive": 1},
"volkswagen,golf,parts": {"auto parts": 1},
"red,hat": { //http://www.e4hats.com/ribbon-band-wool-hat-red.html
"unix": 0.5,
"clothing": 0.5,
},
"red,hat,enterprise": { //http://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives
"unix": 1,
},
}
}
let testDomainRules = {
"mozilla.org" : {
"__ANY" : {
"technology & computing": 0.8
}
],
"__HOST": [
"phonebook": {"ignore": null}
],
"__PATH": [
"developer": {"computer programming": 0.9}
]
},
}
// the test array
let matchTests = [
{
info: "LICA Test 1: mozilla.org",
url: "http://www.mozilla.org/en-US",
title: "Home of the Mozilla Project Ñ Mozilla",
expectedDecision: ["technology & computing", "general"],
},
//{
// info: "LICA Test 2: Red Hat Linux",
// url: "http://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives",
// title: "Red Hat Enterprise Linux derivatives - Wikipedia, the free encyclopedia",
// expectedDecision: ["technology & computing", "unix"]
//},
//{
// info: "LICA Test 3: Golf, the sport",
// url: "http://www.golfdigest.com/golf-courses/2013-02/100-greatest-public-courses",
// title: "2013-14 Ranking: America's 100 Greatest Public Golf Courses : Golf Digest",
// expectedDecision: ["sports", "golf"]
//},
//{
// info: "LICA Test 4: Volkswagen Golf, the car",
// url: "http://www.vw.com/models/golf/",
// title: "2015 VW Golf - The Versatile Compact Car | Volkswagen",
// expectedDecision: ["automotive", "general"]
//},
]
exports["test default matcher"] = function test_default_matcher(assert, done) {
let deferred;
let expectedInterests;
let workerTester = {
handleEvent: function(aEvent) {
if (aEvent.type == "message") {
let msgData = aEvent.data;
if (msgData.message == "InterestsForDocument") {
// make sure that categorization is correct
let host = msgData.host;
console.log("msgData=> " + JSON.stringify(msgData.results))
console.log("expectedDecision=> " + JSON.stringify(expectedDecision))
assert.ok(msgData.Results == expectedDecision, "passed test 1");
deferred.resolve();
}
else if (!(msgData.message in testUtils.kValidMessages)) {
// unexpected message
throw "ERROR_UNEXPECTED_MSG: " + msgData.message;
}
}
else {
throw "ERROR_UNEXPECTED_MSG_TYPE" + aEvent.type;
}
} // end of handleEvent
};
let worker = testUtils.getWorker({
namespace: "test-Matching",
listener: workerTester,
domainRules: testDomainRules,
textModel: null,
urlStopWords: ['php', 'html']
});
Task.spawn(function() {
for (let test of matchTests) {
deferred = OldPromise.defer();
let uri = NetUtil.newURI(test.url);
let title = test.title;
let host = uri.host;
let path = uri.path;
let baseDomain = Services.eTLD.getBaseDomainFromHost(host);
expectedInterests = test.expectedInterests;
worker.postMessage({
command: "getInterestsForDocument",
payload: {
host: host,
path: path,
title: title,
url: test.url,
baseDomain: baseDomain
}
});
yield deferred.promise;
}
}).then(done);
}
require("sdk/test").run(exports);

Просмотреть файл

@ -1,65 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
"use strict";
const {testUtils} = require("./data/licaWorker.js");
const {Cc, Ci, Cu, ChromeWorker} = require("chrome");
let test_uris_and_attributes = [
{
"reason": "A very rich URL with path and several variables",
"url": "http://www.amazon.co.uk/Augmenting-Dirichlet-Allocation-Threshold-Ontologies/dp/1502959488/ref=sr_1_1?ie=UTF8&qid=1415840085&sr=8-1&keywords=latent+dirichlet+allocation",
"chunks": {
"protocol": "http",
"subdomains": ['www'],
"domain_name": "amazon.co.uk",
"path": ["Augmenting-Dirichlet-Allocation-Threshold-Ontologies", "dp", "1502959488"],
"filename": "ref=sr_1_1",
"variables": {
"ie": "UTF8",
"qid": "1415840085",
"sr": "8-1",
"keywords": "latent+dirichlet+allocation",
},
"shebang": ""
}
},
{
"reason": "A secure url with a very long path",
"url": "https://www.facebook.com/search/str/bob/users-named/93693583250/students/14696440021/employees/present/males/me/friends/intersect",
"chunks": {
"protocol": "https",
"subdomains": ['www'],
"domain_name": "facebook.com",
"path": ["search", "str", "bob", "users-named", "93693583250", "students", "14696440021", "employees", "present", "males", "me", "friends"],
"filename": "intersect",
"variables": {},
"shebang": "",
}
},
{
"reason": "A url with a shebang",
"url": "http://www.facebook.com/example.profile#!/pages/Another-Page/123456789012345",
"chunks": {
"protocol": "http",
"subdomains": ['www'],
"domain_name": "facebook.com",
"path": [],
"filename": "example.profile",
"variables": {},
"shebang": "/pages/Another-Page/123456789012345"
}
}
]
exports["test URI parsing and chunking functionality"] = function (assert) {
for (let test of test_uris_and_attributes) {
assert.equal(parseURI(test['url']), test['chunks'], test['reason']);
}
};
require('sdk/test').run(exports);