зеркало из https://github.com/mozilla/pjs.git
Add tests for URLCanonicalizer and EnchashDecryptor. Small change in
trtable.js to expose JS component in debug builds to allow testing.
This commit is contained in:
@ -85,6 +85,7 @@ RunSet.runall = function() {
@ -0,0 +1,381 @@
<html xmlns="http://www.w3.org/1999/xhtml">
This is a port of all the existing URLCanonicalizer and EnchashDecryptor
unittests to the mochitest framework.
<title>Test for Bug 356355</title>
<script type="text/javascript" src="/MochiKit/packed.js"></script>
<script type="text/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
<link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" />
<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=356355">Mozilla Bug 356355</a>
<p id="display"></p>
<div id="content" style="display: none">
<pre id="test">
<script class="testbody" type="text/javascript">
/** Test for Bug 356355 **/
var Cc = Components.classes;
var Ci = Components.interfaces;
var table = Cc["@mozilla.org/url-classifier/table;1?type=url"].createInstance();
var componentScope = table.wrappedJSObject.__parent__;
ok(!!componentScope, "unable to get wrapped js object");
////// Test PROT_URLCanonicalizer methods //////
var PROT_URLCanonicalizer = componentScope.PROT_URLCanonicalizer;
// Test hex gotcha
var hexify = PROT_URLCanonicalizer.toHex_;
var shouldHaveLeadingZeros = hexify(0) + hexify(1);
ok(shouldHaveLeadingZeros == "0001",
"Need to append leading zeros to hex rep value <= 15 !");
// Test url decoding
var dec = PROT_URLCanonicalizer.fullyDecodeURLAsString_;
// Test empty string
ok(dec("") == "", "decoding empty string");
// Test decoding of all characters
var allCharsEncoded = "";
var allCharsEncodedLowercase = "";
var allCharsAsString = "";
// Special case null
allCharsEncoded += "%01";
allCharsEncodedLowercase += "%01";
allCharsAsString += String.fromCharCode(1);
for (var i = 1; i < 256; i++) {
allCharsEncoded += "%" + PROT_URLCanonicalizer.toHex_(i);
allCharsEncodedLowercase += "%" +
allCharsAsString += String.fromCharCode(i);
ok(dec(allCharsEncoded) == allCharsAsString, "decoding escaped");
ok(dec(allCharsEncodedLowercase) == allCharsAsString, "decoding lowercase");
// Test %-related edge cases
ok(dec("%") == "%", "1 percent");
ok(dec("%xx") == "%xx", "1 percent, two non-hex");
ok(dec("%%") == "%%", "2 percent");
ok(dec("%%%") == "%%%", "3 percent");
ok(dec("%%%%") == "%%%%", "4 percent");
ok(dec("%1") == "%1", "1 percent, one nonhex");
ok(dec("%1z") == "%1z", "1 percent, two nonhex");
ok(dec("a%1z") == "a%1z", "nonhex, 1 percent, two nonhex");
ok(dec("abc%d%e%fg%hij%klmno%") == "abc%d%e%fg%hij%klmno%",
"lots of percents, no hex");
// Test repeated %-decoding. Note: %25 --> %, %32 --> 2, %35 --> 5
ok(dec("%25") == "%", "single-encoded %");
ok(dec("%25%32%35") == "%", "double-encoded %");
ok(dec("asdf%25%32%35asd") == "asdf%asd", "double-encoded % 2");
ok(dec("%%%25%32%35asd%%") == "%%%asd%%", "double-encoded % 3");
ok(dec("%25%32%35%25%32%35%25%32%35") == "%%%",
"sequenctial double-encoded %");
ok(dec("%2525252525252525") == "%", "many-encoded %");
== "~a!b@c#d$e%f^00&11*22(33)44_55+", "4x-encoded string");
// Test encoding methods
var enc = PROT_URLCanonicalizer.specialEncodeURL_;
// Test empty string
ok(enc([]) == "", "encoding empty array");
// Test that all characters we shouldn't encode ([33-36],[38,126]) are not.
var no = [];
var noAsString = "";
for (var i = 33; i < 127; i++)
if (i != 37) { // skip %
noAsString += String.fromCharCode(i);
ok(enc(no) == noAsString, "chars to not encode");
// Test that all the chars that we should encode [0,32],37,[127,255] are
var yes = [];
var yesAsString = "";
var yesExpectedString = "";
// Special case 0
yesAsString += String.fromCharCode(1);
yesExpectedString += "%01";
for (var i = 1; i < 256; i++)
if (i < 33 || i == 37 || i > 126) {
yesAsString += String.fromCharCode(i);
var hex = i.toString(16).toUpperCase();
yesExpectedString += "%" + ((i < 16) ? "0" : "") + hex;
ok(enc(yes) == yesExpectedString, "chars to encode");
// Can not use decodeURIComponent or encodeURIComponent to test b/c UTF-8
// Test composition
var c = PROT_URLCanonicalizer.canonicalizeURL_;
ok(c("http://www.google.com") == "http://www.google.com",
== "", "fully encoded ebay");
== "",
"long url with spaces that stays same");
////// Test PROT_EnchashDecrypter methods //////
var PROT_EnchashDecrypter = componentScope.PROT_EnchashDecrypter;
var l = new PROT_EnchashDecrypter();
// Test our regular expressions. Make sure they are handled the same as on
// the server that handles remote look ups.
// Yes this defies our naming convention, but we copy verbatim from
// the C++ unittest, so lets just keep things clear.
var no_dots = "abcd123;[]";
var one_dot = "abc.123";
var two_dots = "two..dots";
var lots_o_dots = "I have a lovely .... bunch of dots";
var multi_dots = "dots ... and ... more .... dots";
var leading_dot = ".leading";
var trailing_dot = "trailing.";
var trailing_dots = "I love trailing dots....";
var end_dots = ".dots.";
var decimal = "1234567890";
var hex = "0x123452FAf";
var bad_hex = "0xFF0xGG";
var octal = "012034056";
var bad_octal = "012034089";
var garbage = "lk,.:asdfa-=";
var mixed = "1230x78034";
var spaces = "123 0xFA 045";
var r = PROT_EnchashDecrypter.REs;
// Test regular expressions
function testRE(re, inputValPairs) {
for (var i = 0; i < inputValPairs.length; i += 2)
ok(re.test(inputValPairs[i]) == inputValPairs[i + 1],
"RegExp broken: " + re + " (input: " + inputValPairs[i] + ")");
var tests =
["", false,
"normal chars;!@#$%^&*&(", false,
"MORE NORMAL ,./<>?;':{}", false,
"Slightly less\2 normal", true,
"\245 stuff \45", true,
"\31", true];
testRE(r.FIND_DODGY_CHARS, tests);
tests =
[no_dots, false,
one_dot, false,
leading_dot, true,
trailing_dots, true,
end_dots, true];
testRE(r.FIND_END_DOTS, tests);
tests =
[no_dots, false,
one_dot, false,
two_dots, true,
lots_o_dots, true,
multi_dots, true];
testRE(r.FIND_MULTIPLE_DOTS, tests);
tests =
[no_dots, false,
one_dot, false,
trailing_dot, true,
trailing_dots, true];
testRE(r.FIND_TRAILING_DOTS, tests);
tests =
["random junk", false,
"123.45.6-7.89", false,
"012.12.123", true,
"0x12.0xff.123", true,
"", true];
testRE(r.POSSIBLE_IP, tests);
tests =
[decimal, false,
hex, false,
octal, false,
bad_octal, true];
testRE(r.FIND_BAD_OCTAL, tests);
tests =
[decimal, false,
hex, false,
bad_octal, false,
garbage, false,
mixed, false,
spaces, false,
octal, true];
testRE(r.IS_OCTAL, tests);
tests =
[hex, false,
garbage, false,
mixed, false,
spaces, false,
octal, true,
bad_octal, true,
decimal, true];
testRE(r.IS_DECIMAL, tests);
tests =
[decimal, false,
octal, false,
bad_octal, false,
garbage, false,
mixed, false,
spaces, false,
bad_hex, false,
hex, true];
testRE(r.IS_HEX, tests);
// Test find last N
var longstr = "";
for(var k = 0; k < 100; k++) {
longstr += "a";
var shortstr = "short";
var val = l.lastNChars_(longstr, 8);
ok(val.length == 8, "find last eight broken on long str");
val = l.lastNChars_(shortstr, 8);
ok(val.length == 5, "find last eight broken on short str");
// Test canonical num
var tests =
["", "", 1, true,
"", "10", 0, true,
"", "0x45", -1, true,
"45", "45", 1, true,
"16", "0x10", 1, true,
"1.111", "367", 2, true,
"0.20.229", "012345", 3, true,
"123", "0173", 1, true,
"9", "09", 1, false,
"", "0x120x34", 2, true,
"18.252", "0x12fc", 2, true];
for (var i = 0; i < tests.length; i+= 4) {
ok(tests[i] === l.canonicalNum_(tests[i + 1], tests[i + 2], tests[i + 3]),
"canonicalNum broken on: " + tests[i + 1]);
// Test parseIPAddress (these are all verifiable using ping)
var testing = {};
testing[""] = "";
testing[""] = "";
testing["12.0x12.01234"] = "";
testing[""] = "";
testing["0x12.0x43.0x44.0x01"] = "";
testing["0x12434401"] = "";
testing["413960661"] = "";
testing["03053104725"] = "";
testing["030.0254.0x89d5"] = "";
testing[""] = "";
for (var key in testing) {
ok(l.parseIPAddress_(key) === testing[key],
"parseIPAddress broken on " + key + "(got: " + l.parseIPAddress_(key));
// Test getCanonicalHost
var testing = {};
testing["http://completely.bogus.url.with.a.whole.lot.of.dots"] =
testing["http://poseidon.marinet.gr/~elani"] = "poseidon.marinet.gr";
testing["http://www.google.com.."] = "www.google.com";
testing["https://www.yaho%6F.com"] = "www.yahoo.com";
testing[""] = "";
testing["ftp://wierd..chars...%0f,%fa"] = "wierd.chars.,";
testing["http://0x18ac89d5/http.www.paypal.com/"] = "";
testing["http://413960661/http.www.paypal.com/"] = "";
testing["http://03053104725/http.www.paypal.com/"] = "";
+ "detailsconfirm"] = "co.uk.brccontrol.assruspede.org.bz";
for (var key in testing) {
ok(l.getCanonicalHost(key, PROT_EnchashDecrypter.MAX_DOTS) == testing[key],
"getCanonicalHost broken on: " + key + "(got: " + l.getCanonicalHost(key) + ")");
// Test getCanonicalUrl
testing = {};
testing["http://0x18.0xac.0x89.0xd5/http.www.paypal.com/"] =
testing["http://0x18ac89d5/http.www.paypal.com/"] =
testing["http://413960661/http.www.paypal.com/"] =
testing["http://03053104725/http.www.paypal.com/"] =
testing["http://03053104725/%68t%74p.www.paypal.c%6fm/"] =
testing["http://www.barclays.co.uk.brccontrol.assruspede.org.bz/detailsconfirm"] =
for (var key in testing)
ok(l.getCanonicalUrl(key) == testing[key],
"getCanonicalUrl broken on: " + key + "(got: " + l.getCanonicalUrl(key) + ")");
// Test getlookupkey
var testing = {};
testing["www.google.com"] = "AF5638A09FDDDAFF5B7A6013B1BE69A9";
testing["poseidon.marinet.gr"] = "01844755C8143C4579BB28DD59C23747";
testing[""] = "B775DDC22DEBF8BEBFEAC24CE40A1FBF";
for (var key in testing)
ok(l.getLookupKey(key) === testing[key],
"getlookupkey broken on " + key + " (got: " +
l.getLookupKey(key) + ", expected: " +
testing[key] + ")");
// Test decryptdata
var tests =
[ "bGtEQWJuMl/z2ZxSBB2hsuWI8geMAwfSh3YBfYPejQ1O+wyRAJeJ1UW3V56zm" +
"EpUvnaEiECN1pndxW5rEMNzE+gppPeel7PvH+OuabL3NXlspcP0xnpK8rzNgB1" +
"^(?i)http\\:\\/\\/80\\.53\\.164\\.26(?:\\:80)?\\/\\.PayPal" +
"ZTMzZjVnb3WW1Yc2ABorgQGAwYfcaCb/BG3sMFLTMDvOQxH8LkdGGWqp2tI5SK" +
"uNrXIHNf2cyzcVocTqUIUkt1Ud1GKieINcp4tWcU53I0VZ0ZZHCjGObDCbv9Wb" +
"^(?i)http\\:\\/\\/80\\.53\\.164\\.26(?:\\:80)?\\/\\.PayPal\\.com" +
"ZTMzZjVnb3WVb6VqoJ44hVo4V77XjDRcXTxOc2Zpn4yIHcpS0AQ0nn1TVlX4MY" +
"^(?i)http\\:\\/\\/poseidon\\.marinet\\.gr(?:\\:80)?\\/\\~eleni" +
"bGtEQWJuMl9FA3Kl5RiXMpgFU8nDJl9J0hXjUck9+mMUQwAN6llf0gJeY5DIPP" +
"^(?i)http\\:\\/\\/poseidon\\.marinet\\.gr(?:\\:80)?\\/\\~eleni" +
for (var i = 0; i < tests.length; i += 3) {
var dec = l.decryptData(tests[i], tests[i + 1]);
ok(dec === tests[i + 2],
"decryptdata broken on " + tests[i] + " (got: " + dec + ", expected: "
+ tests[i + 2] + ")");
@ -351,264 +351,3 @@ PROT_EnchashDecrypter.prototype.decryptData = function(data, host) {
return this.streamCipher_.finish(false /* no base64 */);
#ifdef DEBUG
* Lame unittesting function
function TEST_PROT_EnchashDecrypter() {
if (G_GDEBUG) {
var z = "enchash UNITTEST";
G_Debug(z, "Starting");
// Yes this defies our naming convention, but we copy verbatim from
// the C++ unittest, so lets just keep things clear.
var no_dots = "abcd123;[]";
var one_dot = "abc.123";
var two_dots = "two..dots";
var lots_o_dots = "I have a lovely .... bunch of dots";
var multi_dots = "dots ... and ... more .... dots";
var leading_dot = ".leading";
var trailing_dot = "trailing.";
var trailing_dots = "I love trailing dots....";
var end_dots = ".dots.";
var decimal = "1234567890";
var hex = "0x123452FAf";
var bad_hex = "0xFF0xGG";
var octal = "012034056";
var bad_octal = "012034089";
var garbage = "lk,.:asdfa-=";
var mixed = "1230x78034";
var spaces = "123 0xFA 045";
var longstr = "";
for(var k = 0; k < 100; k++) {
longstr += "a";
var shortstr = "short";
var r = PROT_EnchashDecrypter.REs;
var l = new PROT_EnchashDecrypter();
// Test regular expressions
function testRE(re, inputValPairs) {
for (var i = 0; i < inputValPairs.length; i += 2)
G_Assert(z, re.test(inputValPairs[i]) == inputValPairs[i + 1],
"RegExp broken: " + re + " (input: " + inputValPairs[i] + ")");
var tests =
["", false,
"normal chars;!@#$%^&*&(", false,
"MORE NORMAL ,./<>?;':{}", false,
"Slightly less\2 normal", true,
"\245 stuff \45", true,
"\31", true];
testRE(r.FIND_DODGY_CHARS, tests);
tests =
[no_dots, false,
one_dot, false,
leading_dot, true,
trailing_dots, true,
end_dots, true];
testRE(r.FIND_END_DOTS, tests);
tests =
[no_dots, false,
one_dot, false,
two_dots, true,
lots_o_dots, true,
multi_dots, true];
testRE(r.FIND_MULTIPLE_DOTS, tests);
tests =
[no_dots, false,
one_dot, false,
trailing_dot, true,
trailing_dots, true];
testRE(r.FIND_TRAILING_DOTS, tests);
tests =
["random junk", false,
"123.45.6-7.89", false,
"012.12.123", true,
"0x12.0xff.123", true,
"", true];
testRE(r.POSSIBLE_IP, tests);
tests =
[decimal, false,
hex, false,
octal, false,
bad_octal, true];
testRE(r.FIND_BAD_OCTAL, tests);
tests =
[decimal, false,
hex, false,
bad_octal, false,
garbage, false,
mixed, false,
spaces, false,
octal, true];
testRE(r.IS_OCTAL, tests);
tests =
[hex, false,
garbage, false,
mixed, false,
spaces, false,
octal, true,
bad_octal, true,
decimal, true];
testRE(r.IS_DECIMAL, tests);
tests =
[decimal, false,
octal, false,
bad_octal, false,
garbage, false,
mixed, false,
spaces, false,
bad_hex, false,
hex, true];
testRE(r.IS_HEX, tests);
// Test find last N
var val = l.lastNChars_(longstr, 8);
G_Assert(z, val.length == 8, "find last eight broken on long str");
val = l.lastNChars_(shortstr, 8);
G_Assert(z, val.length == 5, "find last 11 broken on short str");
// Test canonical num
tests =
["", "", 1, true,
"", "10", 0, true,
"", "0x45", -1, true,
"45", "45", 1, true,
"16", "0x10", 1, true,
"1.111", "367", 2, true,
"0.20.229", "012345", 3, true,
"123", "0173", 1, true,
"9", "09", 1, false,
"", "0x120x34", 2, true,
"18.252", "0x12fc", 2, true];
for (var i = 0; i < tests.length; i+= 4)
G_Assert(z, tests[i] === l.canonicalNum_(tests[i + 1],
tests[i + 2],
tests[i + 3]),
"canonicalNum broken on: " + tests[i + 1]);
// Test parseIPAddress (these are all verifiable using ping)
var testing = {};
testing[""] = "";
testing[""] = "";
testing["12.0x12.01234"] = "";
testing[""] = "";
testing["0x12.0x43.0x44.0x01"] = "";
testing["0x12434401"] = "";
testing["413960661"] = "";
testing["03053104725"] = "";
testing["030.0254.0x89d5"] = "";
testing[""] = "";
for (var key in testing)
G_Assert(z, l.parseIPAddress_(key) === testing[key],
"parseIPAddress broken on " + key + "(got: " +
// Test getCanonicalHost
var testing = {};
testing["http://completely.bogus.url.with.a.whole.lot.of.dots"] =
testing["http://poseidon.marinet.gr/~elani"] = "poseidon.marinet.gr";
testing["http://www.google.com.."] = "www.google.com";
testing["https://www.yaho%6F.com"] = "www.yahoo.com";
testing[""] = "";
testing["ftp://wierd..chars...%0f,%fa"] = "wierd.chars.,";
testing["http://0x18ac89d5/http.www.paypal.com/"] = "";
testing["http://413960661/http.www.paypal.com/"] = "";
testing["http://03053104725/http.www.paypal.com/"] = "";
+ "detailsconfirm"] = "co.uk.brccontrol.assruspede.org.bz";
for (var key in testing)
G_Assert(z, l.getCanonicalHost(key, PROT_EnchashDecrypter.MAX_DOTS) ==
"getCanonicalHost broken on: " + key +
"(got: " + l.getCanonicalHost(key) + ")");
// Test getCanonicalUrl
testing = {};
testing["http://0x18.0xac.0x89.0xd5/http.www.paypal.com/"] =
testing["http://0x18ac89d5/http.www.paypal.com/"] =
testing["http://413960661/http.www.paypal.com/"] =
testing["http://03053104725/http.www.paypal.com/"] =
testing["http://03053104725/%68t%74p.www.paypal.c%6fm/"] =
testing["http://www.barclays.co.uk.brccontrol.assruspede.org.bz/detailsconfirm"] =
for (var key in testing)
G_Assert(z, l.getCanonicalUrl(key) == testing[key],
"getCanonicalUrl broken on: " + key +
"(got: " + l.getCanonicalUrl(key) + ")");
// Test getlookupkey
var testing = {};
testing["www.google.com"] = "AF5638A09FDDDAFF5B7A6013B1BE69A9";
testing["poseidon.marinet.gr"] = "01844755C8143C4579BB28DD59C23747";
testing[""] = "B775DDC22DEBF8BEBFEAC24CE40A1FBF";
for (var key in testing)
G_Assert(z, l.getLookupKey(key) === testing[key],
"getlookupkey broken on " + key + " (got: " +
l.getLookupKey(key) + ", expected: " +
testing[key] + ")");
// Test decryptdata
var tests =
[ "bGtEQWJuMl/z2ZxSBB2hsuWI8geMAwfSh3YBfYPejQ1O+wyRAJeJ1UW3V56zm" +
"EpUvnaEiECN1pndxW5rEMNzE+gppPeel7PvH+OuabL3NXlspcP0xnpK8rzNgB1" +
"^(?i)http\\:\\/\\/80\\.53\\.164\\.26(?:\\:80)?\\/\\.PayPal" +
"ZTMzZjVnb3WW1Yc2ABorgQGAwYfcaCb/BG3sMFLTMDvOQxH8LkdGGWqp2tI5SK" +
"uNrXIHNf2cyzcVocTqUIUkt1Ud1GKieINcp4tWcU53I0VZ0ZZHCjGObDCbv9Wb" +
"^(?i)http\\:\\/\\/80\\.53\\.164\\.26(?:\\:80)?\\/\\.PayPal\\.com" +
"ZTMzZjVnb3WVb6VqoJ44hVo4V77XjDRcXTxOc2Zpn4yIHcpS0AQ0nn1TVlX4MY" +
"^(?i)http\\:\\/\\/poseidon\\.marinet\\.gr(?:\\:80)?\\/\\~eleni" +
"bGtEQWJuMl9FA3Kl5RiXMpgFU8nDJl9J0hXjUck9+mMUQwAN6llf0gJeY5DIPP" +
"^(?i)http\\:\\/\\/poseidon\\.marinet\\.gr(?:\\:80)?\\/\\~eleni" +
for (var i = 0; i < tests.length; i += 3) {
var dec = l.decryptData(tests[i], tests[i + 1]);
G_Assert(z, dec === tests[i + 2],
"decryptdata broken on " + tests[i] + " (got: " + dec +
", expected: " + tests[i + 2] + ")");
G_Debug(z, "PASSED");
@ -47,6 +47,9 @@ function UrlClassifierTable() {
this.name = '';
this.needsUpdate = false;
this.enchashDecrypter_ = new PROT_EnchashDecrypter();
#ifdef DEBUG
this.wrappedJSObject = this;
UrlClassifierTable.prototype.QueryInterface = function(iid) {
@ -1,354 +0,0 @@
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
# The Original Code is Google Safe Browsing.
# The Initial Developer of the Original Code is Google Inc.
# Portions created by the Initial Developer are Copyright (C) 2006
# the Initial Developer. All Rights Reserved.
# Contributor(s):
# Fritz Schneider <fritz@google.com> (original author)
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
# ***** END LICENSE BLOCK *****
// This is the class we use to canonicalize URLs for TRTables of type
// url. We maximally URL-decode the URL, treating +'s as if they're
// not special. We then specially URL-encode it (we encode ASCII
// values [0, 32] (whitespace or unprintable), 37 (%), [127, 255]
// (unprintable)).
// This mapping is not a function. That is, multiple URLs can map to
// the same canonical representation. However this is OK because
// collisions happen only when there are weird characters (e.g.,
// nonprintables), and the canonical representation makes us robust
// to some weird kinds of encoding we could see.
// All members are static at this point -- this is basically a namespace.
* Create a new URLCanonicalizer. Useless because members are static.
* @constructor
function PROT_URLCanonicalizer() {
throw new Error("No need to instantiate a canonicalizer at this point.");
PROT_URLCanonicalizer.debugZone = "urlcanonicalizer";
PROT_URLCanonicalizer.hexChars_ = "0123456789ABCDEF";
* Helper funciton to (maybe) convert a two-character hex string into its
* decimal numerical equivalent
* @param hh String of length two that might be a valid hex sequence
* @returns Number: NaN if hh wasn't valid hex, else the appropriate decimal
* value
PROT_URLCanonicalizer.hexPairToInt_ = function(hh) {
return Number("0x" + hh);
* Helper function to hex-encode a number
* @param val Number in range [0, 255]
* @returns String containing the hex representation of that number (sans 0x)
PROT_URLCanonicalizer.toHex_ = function(val) {
var retVal = PROT_URLCanonicalizer.hexChars_.charAt((val >> 4) & 15) +
PROT_URLCanonicalizer.hexChars_.charAt(val & 15);
return retVal;
* Canonicalize a URL. DON'T USE THIS DIRECTLY. Use
* PROT_EnchashDecrypter.prototype.getCanonicalUrl instead. This method
* url-decodes a string, but it doesn't normalize the hostname. The method
* in EnchashDecrypter first calls this method, then normalizes the hostname.
* @param url String to canonicalize
* @returns String containing the canonicalized url (maximally url-decoded,
* then specially url-encoded)
PROT_URLCanonicalizer.canonicalizeURL_ = function(url) {
var arrayOfASCIIVals = PROT_URLCanonicalizer.fullyDecodeURL_(url);
return PROT_URLCanonicalizer.specialEncodeURL_(arrayOfASCIIVals);
* Maximally URL-decode a URL. This breaks the semantics of the URL, but
* we don't care because we're using it for lookup, not for navigation.
* We break multi-byte UTF-8 escape sequences as well, but we don't care
* so long as they canonicalize the same way consistently (they do).
* @param url String containing the URL to maximally decode. Should ONLY
* contain characters with UCS codepoints U+0001 to U+00FF
* (the ASCII set minus null).
* @returns Array of ASCII values corresponding to the decoded sequence of
* characters in the url
PROT_URLCanonicalizer.fullyDecodeURL_ = function(url) {
// The goals here are: simplicity, correctness, and most of all
// portability; we want the same implementation of canonicalization
// wherever we use it so as to to minimize the chances of
// inconsistency. For example, we have to do this canonicalization
// on URLs we get from third parties, and at the lookup server when
// we get a request.
// The following implementation should translate easily to any
// language that supports arrays and pointers or references. Note
// that arrays are pointer types in JavaScript, so foo = [some,
// array] points foo at the array; it doesn't copy it. The
// implementation is efficient (linear) so long as most %'s in the
// url belong to valid escape sequences and there aren't too many
// doubly-escaped values.
// The basic idea is to copy current input to output, decoding escape
// sequences as we see them, until we decode a %. At that point we start
// copying into the "next iteration buffer" instead of the output buffer;
// we do this so we can accomodate multiply-escaped strings. When we hit
// the end of the input, we take the "next iteration buffer" as our input,
// and start over.
var nextIteration = url.split("");
var output = [];
while (nextIteration.length) {
var decodedAPercent = false;
var thisIteration = nextIteration;
var nextIteration = [];
var i = 0;
while (i < thisIteration.length) {
var c = thisIteration[i];
if (c == "%" && i + 2 < thisIteration.length) {
// Peek ahead to see if we have a valid HH sequence
var asciiVal =
PROT_URLCanonicalizer.hexPairToInt_(thisIteration[i + 1] +
thisIteration[i + 2]);
if (!isNaN(asciiVal)) {
i += 2; // Valid HH sequence; consume it
if (asciiVal == 0) // We special case nulls
asciiVal = 1;
c = String.fromCharCode(asciiVal);
if (c == "%")
decodedAPercent = true;
if (decodedAPercent)
nextIteration[nextIteration.length] = c;
output[output.length] = c.charCodeAt(0);
return output;
* Maximally URL-decode a URL (same as fullyDecodeURL_ except that it
* returns a string). Useful for making unittests more readable.
* @param url String containing the URL to maximally decode. Should ONLY
* contain characters with UCS codepoints U+0001 to U+00FF
* (the ASCII set minus null).
* @returns String containing the decoded URL
PROT_URLCanonicalizer.fullyDecodeURLAsString_ = function(url) {
var arrayOfASCIIVals = PROT_URLCanonicalizer.fullyDecodeURL_(url);
var s = "";
for (var i = 0; i < arrayOfASCIIVals.length; i++)
s += String.fromCharCode(arrayOfASCIIVals[i]);
return s;
* Specially URL-encode the given array of ASCII values. We want to encode
* the charcters: [0, 32], 37, [127, 255].
* @param arrayOfASCIIValues Array of ascii values (numbers) to encode
* @returns String corresonding to the escaped URL
PROT_URLCanonicalizer.specialEncodeURL_ = function(arrayOfASCIIValues) {
var output = [];
for (var i = 0; i < arrayOfASCIIValues.length; i++) {
var n = arrayOfASCIIValues[i];
if (n <= 32 || n == 37 || n >= 127)
output.push("%" + ((!n) ? "01" : PROT_URLCanonicalizer.toHex_(n)));
return output.join("");
#ifdef DEBUG
* Lame unittesting function
function TEST_PROT_URLCanonicalizer() {
if (G_GDEBUG) {
var z = "urlcanonicalizer UNITTEST";
G_Debug(z, "Starting");
// ------ TEST HEX GOTCHA ------
var hexify = PROT_URLCanonicalizer.toHex_;
var shouldHaveLeadingZeros = hexify(0) + hexify(1);
G_Assert(z, shouldHaveLeadingZeros == "0001",
"Need to append leading zeros to hex rep value <= 15 !")
// ------ TEST DECODING ------
// For convenience, shorten the function name
var dec = PROT_URLCanonicalizer.fullyDecodeURLAsString_;
// Test empty string
G_Assert(z, dec("") == "", "decoding empty string");
// Test decoding of all characters
var allCharsEncoded = "";
var allCharsEncodedLowercase = "";
var allCharsAsString = "";
// Special case null
allCharsEncoded += "%01";
allCharsEncodedLowercase += "%01";
allCharsAsString += String.fromCharCode(1);
for (var i = 1; i < 256; i++) {
allCharsEncoded += "%" + PROT_URLCanonicalizer.toHex_(i);
allCharsEncodedLowercase += "%" +
allCharsAsString += String.fromCharCode(i);
G_Assert(z, dec(allCharsEncoded) == allCharsAsString, "decoding escaped");
G_Assert(z, dec(allCharsEncodedLowercase) == allCharsAsString,
"decoding lowercase");
// Test %-related edge cases
G_Assert(z, dec("%") == "%", "1 percent");
G_Assert(z, dec("%xx") == "%xx", "1 percent, two non-hex");
G_Assert(z, dec("%%") == "%%", "2 percent");
G_Assert(z, dec("%%%") == "%%%", "3 percent");
G_Assert(z, dec("%%%%") == "%%%%", "4 percent");
G_Assert(z, dec("%1") == "%1", "1 percent, one nonhex");
G_Assert(z, dec("%1z") == "%1z", "1 percent, two nonhex");
G_Assert(z, dec("a%1z") == "a%1z", "nonhex, 1 percent, two nonhex");
G_Assert(z, dec("abc%d%e%fg%hij%klmno%") == "abc%d%e%fg%hij%klmno%",
"lots of percents, no hex");
// Test repeated %-decoding. Note: %25 --> %, %32 --> 2, %35 --> 5
G_Assert(z, dec("%25") == "%", "single-encoded %");
G_Assert(z, dec("%25%32%35") == "%", "double-encoded %");
G_Assert(z, dec("asdf%25%32%35asd") == "asdf%asd", "double-encoded % 2");
G_Assert(z, dec("%%%25%32%35asd%%") == "%%%asd%%", "double-encoded % 3");
G_Assert(z, dec("%25%32%35%25%32%35%25%32%35") == "%%%",
"sequenctial double-encoded %");
G_Assert(z, dec("%2525252525252525") == "%", "many-encoded %");
G_Assert(z, dec("%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A22%252833%252944_55%252B") == "~a!b@c#d$e%f^00&11*22(33)44_55+",
"4x-encoded string");
// ------ TEST ENCODING ------
// For convenience, shorten the function name
var enc = PROT_URLCanonicalizer.specialEncodeURL_;
// Test empty string
G_Assert(z, enc([]) == "", "encoding empty array");
// Test that all characters we shouldn't encode ([33-36],[38,126]) are not.
var no = [];
var noAsString = "";
for (var i = 33; i < 127; i++)
if (i != 37) { // skip %
noAsString += String.fromCharCode(i);
G_Assert(z, enc(no) == noAsString, "chars to not encode");
// Test that all the chars that we should encode [0,32],37,[127,255] are
var yes = [];
var yesAsString = "";
var yesExpectedString = "";
// Special case 0
yesAsString += String.fromCharCode(1);
yesExpectedString += "%01";
for (var i = 1; i < 256; i++)
if (i < 33 || i == 37 || i > 126) {
yesAsString += String.fromCharCode(i);
var hex = i.toString(16).toUpperCase();
yesExpectedString += "%" + ((i < 16) ? "0" : "") + hex;
G_Assert(z, enc(yes) == yesExpectedString, "chars to encode");
// Can't use decodeURIComponent or encodeURIComponent to test b/c UTF-8
// ------ TEST COMPOSITION ------
// For convenience, shorten function name:
var c = PROT_URLCanonicalizer.canonicalizeURL_;
G_Assert(z, c("http://www.google.com") == "http://www.google.com",
G_Assert(z, c("http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77%77%77%2E%65%62%61%79%2E%63%6F%6D/") == "",
"fully encoded ebay");
G_Assert(z, c("") == "",
"long url with spaces that stays same");
G_Debug(z, "PASSED");
Ссылка в новой задаче