Bug 356355: numeric domain normalization only happens on enchash table values

patch: canonicalize urls before lookup in -url and -domain tables
r=mmchew
This commit is contained in:
tony%ponderer.org 2006-10-12 23:57:51 +00:00
Родитель 66f5675c48
Коммит 611dd559c7
3 изменённых файлов: 75 добавлений и 22 удалений

Просмотреть файл

@ -166,16 +166,41 @@ PROT_EnchashDecrypter.prototype.parseRegExps = function(data) {
return res;
}
PROT_EnchashDecrypter.prototype.getCanonicalHost = function(str) {
var ioservice = Cc["@mozilla.org/network/io-service;1"]
/**
* Get the canonical version of the given URL for lookup in a table of
* type -url.
*
* @param url String to canonicalize
*
* @returns String containing the canonicalized url (maximally url-decoded
* with hostname normalized, then specially url-encoded)
*/
PROT_EnchashDecrypter.prototype.getCanonicalUrl = function(url) {
var escapedUrl = PROT_URLCanonicalizer.canonicalizeURL_(url);
// Normalize the host
var host = this.getCanonicalHost(escapedUrl);
if (!host) {
// Probably an invalid url, return what we have so far.
return escapedUrl;
}
// Combine our normalized host with our escaped url.
var ioService = Cc["@mozilla.org/network/io-service;1"]
.getService(Ci.nsIIOService);
var urlObj = ioService.newURI(escapedUrl, null, null);
urlObj.host = host;
return urlObj.asciiSpec;
}
PROT_EnchashDecrypter.prototype.getCanonicalHost = function(str) {
var ioService = Cc["@mozilla.org/network/io-service;1"]
.getService(Ci.nsIIOService);
var urlObj = ioservice.newURI(str, null, null);
var asciiHost = '';
try {
asciiHost = urlObj.asciiHost;
var urlObj = ioService.newURI(str, null, null);
var asciiHost = urlObj.asciiHost;
} catch (e) {
return asciiHost;
G_Debug(this, "Unable to get hostname: " + str);
return "";
}
var unescaped = this.hexDecode_(asciiHost);
@ -275,9 +300,11 @@ PROT_EnchashDecrypter.prototype.canonicalNum_ = function(num, bytes, octal) {
if (temp_num == -1)
return "";
// Since we mod the number, we're removing the least significant bits. We
// Want to push them into the front of the array to preserve the order.
var parts = [];
while (bytes--) {
parts.push("" + (temp_num % 256));
parts.unshift("" + (temp_num % 256));
temp_num -= temp_num % 256;
temp_num /= 256;
}
@ -459,27 +486,31 @@ function TEST_PROT_EnchashDecrypter() {
"", "0x45", -1, true,
"45", "45", 1, true,
"16", "0x10", 1, true,
"111.1", "367", 2, true,
"229.20.0", "012345", 3, true,
"1.111", "367", 2, true,
"0.20.229", "012345", 3, true,
"123", "0173", 1, true,
"9", "09", 1, false,
"", "0x120x34", 2, true,
"252.18", "0x12fc", 2, true];
"18.252", "0x12fc", 2, true];
for (var i = 0; i < tests.length; i+= 4)
G_Assert(z, tests[i] === l.canonicalNum_(tests[i + 1],
tests[i + 2],
tests[i + 3]),
"canonicalNum broken on: " + tests[i + 1]);
// Test parseIPAddress
// Test parseIPAddress (these are all verifiable using ping)
var testing = {};
testing["123.123.0.0.1"] = "";
testing["255.0.0.1"] = "255.0.0.1";
testing["12.0x12.01234"] = "12.18.156.2";
testing["276.2.3"] = "20.2.3.0";
testing["12.0x12.01234"] = "12.18.2.156";
testing["012.034.01.055"] = "10.28.1.45";
testing["0x12.0x43.0x44.0x01"] = "18.67.68.1";
testing["0x12434401"] = "18.67.68.1";
testing["413960661"] = "24.172.137.213";
testing["03053104725"] = "24.172.137.213";
testing["030.0254.0x89d5"] = "24.172.137.213";
testing["1.234.4.0377"] = "1.234.4.255";
for (var key in testing)
G_Assert(z, l.parseIPAddress_(key) === testing[key],
"parseIPAddress broken on " + key + "(got: " +
@ -487,18 +518,38 @@ function TEST_PROT_EnchashDecrypter() {
// Test getCanonicalHost
var testing = {};
testing["completely.bogus.url.with.a.whole.lot.of.dots"] =
testing["http://completely.bogus.url.with.a.whole.lot.of.dots"] =
"with.a.whole.lot.of.dots";
testing["http://poseidon.marinet.gr/~elani"] = "poseidon.marinet.gr";
testing["http://www.google.com.."] = "www.google.com";
testing["https://www.yaho%6F.com"] = "www.yahoo.com";
testing["http://012.034.01.0xa"] = "10.28.1.10";
testing["ftp://wierd..chars...%0f,%fa"] = "wierd.chars.,";
testing["http://0x18ac89d5/http.www.paypal.com/"] = "24.172.137.213";
testing["http://413960661/http.www.paypal.com/"] = "24.172.137.213";
testing["http://03053104725/http.www.paypal.com/"] = "24.172.137.213";
for (var key in testing)
G_Assert(z, l.getCanonicalHost(key) == testing[key],
"getCanonicalHost broken on: " + key +
"(got: " + l.getCanonicalHost(key) + ")");
// Test getCanonicalUrl
testing = {};
testing["http://0x18.0xac.0x89.0xd5/http.www.paypal.com/"] =
"http://24.172.137.213/http.www.paypal.com/";
testing["http://0x18ac89d5/http.www.paypal.com/"] =
"http://24.172.137.213/http.www.paypal.com/";
testing["http://413960661/http.www.paypal.com/"] =
"http://24.172.137.213/http.www.paypal.com/";
testing["http://03053104725/http.www.paypal.com/"] =
"http://24.172.137.213/http.www.paypal.com/";
testing["http://03053104725/%68t%74p.www.paypal.c%6fm/"] =
"http://24.172.137.213/http.www.paypal.com/";
for (var key in testing)
G_Assert(z, l.getCanonicalUrl(key) == testing[key],
"getCanonicalUrl broken on: " + key +
"(got: " + l.getCanonicalUrl(key) + ")");
// Test getlookupkey
var testing = {};
testing["www.google.com"] = "AF5638A09FDDDAFF5B7A6013B1BE69A9";
@ -546,7 +597,6 @@ function TEST_PROT_EnchashDecrypter() {
", expected: " + tests[i + 2] + ")");
}
G_Debug(z, "PASSED");
}
}

Просмотреть файл

@ -46,6 +46,7 @@ function UrlClassifierTable() {
this.debugZone = "urlclassifier-table";
this.name = '';
this.needsUpdate = false;
this.enchashDecrypter_ = new PROT_EnchashDecrypter();
}
UrlClassifierTable.prototype.QueryInterface = function(iid) {
@ -74,7 +75,7 @@ UrlClassifierTableUrl.inherits(UrlClassifierTable);
* Look up a URL in a URL table
*/
UrlClassifierTableUrl.prototype.exists = function(url, callback) {
var canonicalized = PROT_URLCanonicalizer.canonicalizeURL_(url);
var canonicalized = this.enchashDecrypter_.getCanonicalUrl(url);
G_Debug(this, "Looking up: " + url + " (" + canonicalized + ")");
var dbservice_ = Cc["@mozilla.org/url-classifier/dbservice;1"]
@ -124,7 +125,8 @@ UrlClassifierTableDomain.inherits(UrlClassifierTable);
* @returns Boolean true if the url domain is in the table
*/
UrlClassifierTableDomain.prototype.exists = function(url, callback) {
var urlObj = this.ioService_.newURI(url, null, null);
var canonicalized = this.enchashDecrypter_.getCanonicalUrl(url);
var urlObj = this.ioService_.newURI(canonicalized, null, null);
var host = '';
try {
host = urlObj.host;
@ -167,7 +169,6 @@ UrlClassifierTableDomain.prototype.exists = function(url, callback) {
function UrlClassifierTableEnchash() {
UrlClassifierTable.call(this);
this.debugZone = "urlclassifier-table-enchash";
this.enchashDecrypter_ = new PROT_EnchashDecrypter();
}
UrlClassifierTableEnchash.inherits(UrlClassifierTable);

Просмотреть файл

@ -90,8 +90,10 @@ PROT_URLCanonicalizer.toHex_ = function(val) {
}
/**
* Get the canonical version of the given URL for lookup in a table of
* type -url.
* Canonicalize a URL. DON'T USE THIS DIRECTLY. Use
* PROT_EnchashDecrypter.prototype.getCanonicalUrl instead. This method
* url-decodes a string, but it doesn't normalize the hostname. The method
* in EnchashDecrypter first calls this method, then normalizes the hostname.
*
* @param url String to canonicalize
*