From 153a42dc9c399dac061d7757da053ef449916ab5 Mon Sep 17 00:00:00 2001 From: Maxim Zhilyaev Date: Fri, 14 Nov 2014 14:32:36 -0800 Subject: [PATCH] Remove LICA worker and tests to get original set of tests passing --- data/licaWorker.js | 46 ------------ test/LICA-rules-readme.js | 26 ------- test/test-LICA-rules-en.js | 141 ------------------------------------- test/test-licaWorker.js | 65 ----------------- 4 files changed, 278 deletions(-) delete mode 100644 data/licaWorker.js delete mode 100644 test/LICA-rules-readme.js delete mode 100644 test/test-LICA-rules-en.js delete mode 100644 test/test-licaWorker.js diff --git a/data/licaWorker.js b/data/licaWorker.js deleted file mode 100644 index fcb4662..0000000 --- a/data/licaWorker.js +++ /dev/null @@ -1,46 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -"use strict"; - -const { getBaseDomain } = require('./utils/tld'); - -function parseURI(uri){ - //Accepts: a URI string - //Returns: different parts of the URI - - //extract different types of chunks - chunks = { - "protocol": "", - "subdomains": [], - "domain_name": "", - "path": [], - "filename": "", - "variables": {}, - "shebang": "" - } - - //this could be made more efficient with a letter-by-letter while loop - chunks['protocol'] = uri.split("://")[0] - chunks['domain_name'] = getBaseDomain(uri) - chunks['subdomains'] = uri.split(chunks['domain_name'])[0].split(chunks['protocol'])[1].split(".") - - path_and_rest = uri.split(chunks['domain_name'])[1].split("?") - path_items = path_and_rest[0].split("/") - chunks['path'] = path_items.slice(0,-1) - chunks['filename'] = path_items.slice(-1) - - if (path_and_rest.length > 1) { - variables = path_and_rest[1].split("&") - for (let vq of variables) { - vq = vq.split("=") - variables[vq[0]] = "" - if (vq.length > 1) { - variables[vq[0]] = vq[1] - } - } - } - - return chunks -} diff --git a/test/LICA-rules-readme.js b/test/LICA-rules-readme.js deleted file mode 100644 index 1a31f87..0000000 --- a/test/LICA-rules-readme.js +++ /dev/null @@ -1,26 +0,0 @@ - -//description method -//URL for examples: https://www.www.e4hats.com/hats/ribbon-band-wool-hat-red.html?q=fedora - -//Mozilla URL description language - -// DEFINITIONS -// //ftp (the protocol) -// .www (the subdomain) -// /hats (in the path) -// -ribbon (in the filename) -// ?q (a variable name) -// =fedora (a variable value) -// ?q=fedora (specifically q equal to fedora) -// *wool (wool is anywhere in the URL) - -// NGRAMS -// /real,estate (real followed by estate exists in the path) - -// MULTIPLES -// .www{2} (www appears twice) -// .www{3,} (www appears more than three times) - -// NEGATIVES -// !.www (www is not in the subdomain) - diff --git a/test/test-LICA-rules-en.js b/test/test-LICA-rules-en.js deleted file mode 100644 index 14a3f3c..0000000 --- a/test/test-LICA-rules-en.js +++ /dev/null @@ -1,141 +0,0 @@ -/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* vim:set ts=2 sw=2 sts=2 et: */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -"use strict"; - -const {testUtils} = require("./helpers"); -const {Cc, Ci, Cu, ChromeWorker} = require("chrome"); -const OldPromise = require("sdk/core/promise"); -Cu.import("resource://gre/modules/Services.jsm"); -Cu.import("resource://gre/modules/NetUtil.jsm"); -Cu.import("resource://gre/modules/Task.jsm"); - -let testKeywords = { - "__ANY": { - "mozilla": { - "technology & computing": 0.75, - "computer programming": 0.5 - }, - "baseball": {"baseball": 0.95}, - "golf": {"golf": 0.9}, - "open source": {"computer programming": 0.7}, - "volkswagen,golf": {"automotive": 1}, - "volkswagen,golf,parts": {"auto parts": 1}, - "red,hat": { //http://www.e4hats.com/ribbon-band-wool-hat-red.html - "unix": 0.5, - "clothing": 0.5, - }, - "red,hat,enterprise": { //http://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives - "unix": 1, - }, - } -} - -let testDomainRules = { - "mozilla.org" : { - "__ANY" : { - "technology & computing": 0.8 - } - ], - "__HOST": [ - "phonebook": {"ignore": null} - ], - "__PATH": [ - "developer": {"computer programming": 0.9} - ] - }, -} - -// the test array -let matchTests = [ -{ - info: "LICA Test 1: mozilla.org", - url: "http://www.mozilla.org/en-US", - title: "Home of the Mozilla Project Ñ Mozilla", - expectedDecision: ["technology & computing", "general"], -}, -//{ -// info: "LICA Test 2: Red Hat Linux", -// url: "http://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives", -// title: "Red Hat Enterprise Linux derivatives - Wikipedia, the free encyclopedia", -// expectedDecision: ["technology & computing", "unix"] -//}, -//{ -// info: "LICA Test 3: Golf, the sport", -// url: "http://www.golfdigest.com/golf-courses/2013-02/100-greatest-public-courses", -// title: "2013-14 Ranking: America's 100 Greatest Public Golf Courses : Golf Digest", -// expectedDecision: ["sports", "golf"] -//}, -//{ -// info: "LICA Test 4: Volkswagen Golf, the car", -// url: "http://www.vw.com/models/golf/", -// title: "2015 VW Golf - The Versatile Compact Car | Volkswagen", -// expectedDecision: ["automotive", "general"] -//}, -] - -exports["test default matcher"] = function test_default_matcher(assert, done) { - let deferred; - let expectedInterests; - - let workerTester = { - handleEvent: function(aEvent) { - if (aEvent.type == "message") { - let msgData = aEvent.data; - if (msgData.message == "InterestsForDocument") { - // make sure that categorization is correct - let host = msgData.host; - console.log("msgData=> " + JSON.stringify(msgData.results)) - console.log("expectedDecision=> " + JSON.stringify(expectedDecision)) - assert.ok(msgData.Results == expectedDecision, "passed test 1"); - deferred.resolve(); - } - else if (!(msgData.message in testUtils.kValidMessages)) { - // unexpected message - throw "ERROR_UNEXPECTED_MSG: " + msgData.message; - } - } - else { - throw "ERROR_UNEXPECTED_MSG_TYPE" + aEvent.type; - } - } // end of handleEvent - }; - - let worker = testUtils.getWorker({ - namespace: "test-Matching", - listener: workerTester, - domainRules: testDomainRules, - textModel: null, - urlStopWords: ['php', 'html'] - }); - - Task.spawn(function() { - for (let test of matchTests) { - deferred = OldPromise.defer(); - - let uri = NetUtil.newURI(test.url); - let title = test.title; - let host = uri.host; - let path = uri.path; - let baseDomain = Services.eTLD.getBaseDomainFromHost(host); - - expectedInterests = test.expectedInterests; - worker.postMessage({ - command: "getInterestsForDocument", - payload: { - host: host, - path: path, - title: title, - url: test.url, - baseDomain: baseDomain - } - }); - yield deferred.promise; - } - }).then(done); -} - -require("sdk/test").run(exports); diff --git a/test/test-licaWorker.js b/test/test-licaWorker.js deleted file mode 100644 index 089a8fe..0000000 --- a/test/test-licaWorker.js +++ /dev/null @@ -1,65 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - - -"use strict"; - -const {testUtils} = require("./data/licaWorker.js"); -const {Cc, Ci, Cu, ChromeWorker} = require("chrome"); - -let test_uris_and_attributes = [ - { - "reason": "A very rich URL with path and several variables", - "url": "http://www.amazon.co.uk/Augmenting-Dirichlet-Allocation-Threshold-Ontologies/dp/1502959488/ref=sr_1_1?ie=UTF8&qid=1415840085&sr=8-1&keywords=latent+dirichlet+allocation", - "chunks": { - "protocol": "http", - "subdomains": ['www'], - "domain_name": "amazon.co.uk", - "path": ["Augmenting-Dirichlet-Allocation-Threshold-Ontologies", "dp", "1502959488"], - "filename": "ref=sr_1_1", - "variables": { - "ie": "UTF8", - "qid": "1415840085", - "sr": "8-1", - "keywords": "latent+dirichlet+allocation", - }, - "shebang": "" - } - }, - { - "reason": "A secure url with a very long path", - "url": "https://www.facebook.com/search/str/bob/users-named/93693583250/students/14696440021/employees/present/males/me/friends/intersect", - "chunks": { - "protocol": "https", - "subdomains": ['www'], - "domain_name": "facebook.com", - "path": ["search", "str", "bob", "users-named", "93693583250", "students", "14696440021", "employees", "present", "males", "me", "friends"], - "filename": "intersect", - "variables": {}, - "shebang": "", - } - }, - { - "reason": "A url with a shebang", - "url": "http://www.facebook.com/example.profile#!/pages/Another-Page/123456789012345", - "chunks": { - "protocol": "http", - "subdomains": ['www'], - "domain_name": "facebook.com", - "path": [], - "filename": "example.profile", - "variables": {}, - "shebang": "/pages/Another-Page/123456789012345" - } - } -] - - -exports["test URI parsing and chunking functionality"] = function (assert) { - for (let test of test_uris_and_attributes) { - assert.equal(parseURI(test['url']), test['chunks'], test['reason']); - } -}; - -require('sdk/test').run(exports); \ No newline at end of file