Merged encoding fixes into codebase
This commit is contained in:
Коммит
6e2e1fd6b4
Двоичные данные
tests/resources/bug_621360.js
Двоичные данные
tests/resources/bug_621360.js
Двоичный файл не отображается.
Двоичный файл не отображается.
|
@ -0,0 +1 @@
|
|||
function test() {}
|
Двоичный файл не отображается.
Двоичный файл не отображается.
|
@ -0,0 +1 @@
|
|||
function täst() {}
|
Двоичный файл не отображается.
|
@ -0,0 +1 @@
|
|||
täst
|
Двоичный файл не отображается.
Двоичный файл не отображается.
Двоичный файл не отображается.
Двоичный файл не отображается.
|
@ -0,0 +1 @@
|
|||
täst
|
|
@ -0,0 +1 @@
|
|||
täst
|
|
@ -1,20 +0,0 @@
|
|||
import os
|
||||
import validator.testcases.scripting
|
||||
|
||||
def _do_test(path):
|
||||
"Performs a test on a JS file"
|
||||
script = open(path).read()
|
||||
|
||||
err = validator.testcases.scripting.traverser.MockBundler()
|
||||
validator.testcases.scripting.test_js_file(err, path, script)
|
||||
|
||||
return err
|
||||
|
||||
def test_control_chars():
|
||||
"Tests that control characters throw a single error"
|
||||
|
||||
err = _do_test("tests/resources/bug_621360.js")
|
||||
# There should be a single error.
|
||||
print err.message_count
|
||||
assert err.message_count == 1
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
import os
|
||||
|
||||
import validator.unicodehelper
|
||||
import validator.testcases.scripting
|
||||
|
||||
# Originated from bug 626496
|
||||
|
||||
def _do_test(path):
|
||||
"Performs a test on a JS file"
|
||||
script = validator.unicodehelper.decode(open(path, "rb").read())
|
||||
print script.encode("ascii", "replace")
|
||||
|
||||
err = validator.testcases.scripting.traverser.MockBundler()
|
||||
validator.testcases.scripting.test_js_file(err, path, script)
|
||||
|
||||
print err.ids
|
||||
|
||||
return err
|
||||
|
||||
def test_controlchars_ascii_ok():
|
||||
"""Tests that multi-byte characters are decoded properly (utf-8)"""
|
||||
|
||||
errs = _do_test("tests/resources/controlchars/controlchars_ascii_ok.js")
|
||||
assert len(errs.ids) == 0
|
||||
|
||||
def test_controlchars_ascii_warn():
|
||||
"""Tests that multi-byte characters are decoded properly (utf-8)
|
||||
but remaining non ascii characters raise warnings"""
|
||||
|
||||
errs = _do_test("tests/resources/controlchars/controlchars_ascii_warn.js")
|
||||
assert len(errs.ids) == 1
|
||||
assert errs.ids[0][2] == "syntax_error"
|
||||
|
||||
def test_controlchars_utf8_ok():
|
||||
"""Tests that multi-byte characters are decoded properly (utf-8)"""
|
||||
|
||||
errs = _do_test("tests/resources/controlchars/controlchars_utf-8_ok.js")
|
||||
assert len(errs.ids) == 0
|
||||
|
||||
def test_controlchars_utf8_warn():
|
||||
"""Tests that multi-byte characters are decoded properly (utf-8)
|
||||
but remaining non ascii characters raise warnings"""
|
||||
|
||||
errs = _do_test("tests/resources/controlchars/controlchars_utf-8_warn.js")
|
||||
assert len(errs.ids) == 1
|
||||
assert errs.ids[0][2] == "syntax_error"
|
||||
|
|
@ -1,24 +1,26 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import validator.testcases.markup.markuptester as markuptester
|
||||
from validator.errorbundler import ErrorBundle
|
||||
from validator.constants import *
|
||||
|
||||
def _do_test(path, should_fail=False, type_=None):
|
||||
return _do_test_raw(open(path).read(),
|
||||
path,
|
||||
should_fail,
|
||||
type_)
|
||||
|
||||
markup_file = open(path)
|
||||
data = markup_file.read()
|
||||
markup_file.close()
|
||||
|
||||
def _do_test_raw(data, path, should_fail=False, type_=None):
|
||||
filename = path.split("/")[-1]
|
||||
extension = filename.split(".")[-1]
|
||||
|
||||
err = ErrorBundle(None, True)
|
||||
err = ErrorBundle()
|
||||
if type_:
|
||||
err.set_type(type_)
|
||||
|
||||
parser = markuptester.MarkupParser(err, debug=True)
|
||||
parser.process(filename, data, extension)
|
||||
|
||||
err.print_summary(True)
|
||||
print err.print_summary(verbose=True)
|
||||
|
||||
if should_fail:
|
||||
assert err.failed()
|
||||
|
@ -31,7 +33,7 @@ def _do_test(path, should_fail=False, type_=None):
|
|||
def test_local_url_detector():
|
||||
"Tests that local URLs can be detected."
|
||||
|
||||
err = ErrorBundle(None, True)
|
||||
err = ErrorBundle()
|
||||
mp = markuptester.MarkupParser(err)
|
||||
tester = mp._is_url_local
|
||||
|
||||
|
@ -135,3 +137,18 @@ def test_invalid_markup():
|
|||
result = _do_test("tests/resources/markup/markuptester/bad_script.xml",
|
||||
False)
|
||||
assert result.notices
|
||||
|
||||
|
||||
def test_self_closing_scripts():
|
||||
"""Tests that self-closing script tags are not deletrious to parsing"""
|
||||
|
||||
_do_test_raw("""
|
||||
<foo>
|
||||
<script type="text/javascript"/>
|
||||
<list_item undecodable=" _ " />
|
||||
<list_item />
|
||||
<list_item />
|
||||
</foo>
|
||||
""", "foo.js")
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import nose
|
||||
import os
|
||||
import validator.unicodehelper as unicodehelper
|
||||
|
||||
COMPARISON = "täst".decode("utf-8")
|
||||
|
||||
def _do_test(path):
|
||||
"Performs a test on a JS file"
|
||||
|
||||
text = open(path).read()
|
||||
utext = unicodehelper.decode(text)
|
||||
|
||||
print utext.encode("ascii", "backslashreplace")
|
||||
nose.tools.eq_(utext, COMPARISON)
|
||||
|
||||
def test_latin1():
|
||||
"Tests utf-8 encoding is properly decoded"
|
||||
_do_test("tests/resources/unicodehelper/latin_1.txt")
|
||||
|
||||
def test_utf8():
|
||||
"Tests utf-8 w/o BOM encoding is properly decoded"
|
||||
_do_test("tests/resources/unicodehelper/utf-8.txt")
|
||||
|
||||
def test_utf8():
|
||||
"Tests utf-8 with BOM encoding is properly decoded"
|
||||
_do_test("tests/resources/unicodehelper/utf-8-bom.txt")
|
||||
|
||||
def test_utf16le():
|
||||
"Tests utf-16 Little Endian encoding is properly decoded"
|
||||
_do_test("tests/resources/unicodehelper/utf-16le.txt")
|
||||
|
||||
def test_utf16be():
|
||||
"Tests utf-16 Big Endian encoding is properly decoded"
|
||||
_do_test("tests/resources/unicodehelper/utf-16be.txt")
|
||||
|
||||
def test_utf32le():
|
||||
"Tests utf-32 Little Endian encoding is properly decoded"
|
||||
_do_test("tests/resources/unicodehelper/utf-32le.txt")
|
||||
|
||||
def test_utf32be():
|
||||
"Tests utf-32 Big Endian encoding is properly decoded"
|
||||
_do_test("tests/resources/unicodehelper/utf-32be.txt")
|
|
@ -1,6 +1,5 @@
|
|||
from StringIO import StringIO
|
||||
|
||||
import textfilter
|
||||
import unicodehelper
|
||||
|
||||
|
||||
class ContextGenerator:
|
||||
|
@ -83,6 +82,6 @@ class ContextGenerator:
|
|||
data = "%s ..." % data[:140]
|
||||
|
||||
data = "%s%s" % (raw_data[0:with_ws - line_length], data)
|
||||
data = textfilter.filter_ascii(data)
|
||||
data = unicodehelper.decode(data)
|
||||
return data
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@ import uuid
|
|||
from StringIO import StringIO
|
||||
|
||||
from outputhandlers.shellcolors import OutputHandler
|
||||
from textfilter import filter_ascii
|
||||
import unicodehelper
|
||||
|
||||
|
||||
class ErrorBundle(object):
|
||||
|
@ -110,8 +110,8 @@ class ErrorBundle(object):
|
|||
else:
|
||||
message["context"] = None
|
||||
|
||||
message["message"] = filter_ascii(message["message"])
|
||||
message["description"] = filter_ascii(message["description"])
|
||||
message["message"] = unicodehelper.decode(message["message"])
|
||||
message["description"] = unicodehelper.decode(message["description"])
|
||||
|
||||
stack.append(message)
|
||||
|
||||
|
|
|
@ -85,7 +85,7 @@ def main():
|
|||
# Print the output of the tests based on the requested format.
|
||||
if args.output == "text":
|
||||
print error_bundle.print_summary(verbose=args.verbose,
|
||||
no_color=args.boring)
|
||||
no_color=args.boring).encode("utf-8")
|
||||
elif args.output == "json":
|
||||
sys.stdout.write(error_bundle.render_json())
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@ from StringIO import StringIO
|
|||
from validator.contextgenerator import ContextGenerator
|
||||
from validator import decorator
|
||||
from validator import submain as testendpoint_validator
|
||||
from validator import unicodehelper
|
||||
import validator.testcases.markup.markuptester as testendpoint_markup
|
||||
import validator.testcases.markup.csstester as testendpoint_css
|
||||
import validator.testcases.scripting as testendpoint_js
|
||||
|
@ -165,9 +166,8 @@ def test_packed_packages(err, package_contents=None, xpi_package=None):
|
|||
if not file_data:
|
||||
continue
|
||||
|
||||
# Skip BOMs and the like
|
||||
while not is_standard_ascii(file_data[0]):
|
||||
file_data = file_data[1:]
|
||||
# Convert the file data to unicode
|
||||
file_data = unicodehelper.decode(file_data)
|
||||
|
||||
if data["extension"] == "css":
|
||||
testendpoint_css.test_css_file(err,
|
||||
|
|
|
@ -291,12 +291,14 @@ def _call_expression(traverser, node):
|
|||
result = dangerous(a=args, t=t)
|
||||
if result:
|
||||
# Generate a string representation of the params
|
||||
params = ", ".join([str(t(p).get_literal_value()) for p in args])
|
||||
params = u", ".join([unicode(t(p).get_literal_value()) for
|
||||
p in args])
|
||||
traverser.err.warning(("testcases_javascript_actions",
|
||||
"_call_expression",
|
||||
"called_dangerous_global"),
|
||||
"Global called in dangerous manner",
|
||||
result if isinstance(result, str) else
|
||||
result if isinstance(result,
|
||||
types.StringTypes) else
|
||||
"A global function was called using a set "
|
||||
"of dangerous parameters. These parameters "
|
||||
"have been disallowed.",
|
||||
|
@ -418,10 +420,10 @@ def _expr_assignment(traverser, node):
|
|||
if lit_right is None:
|
||||
lit_right = 0
|
||||
|
||||
if isinstance(lit_left, (str, unicode)) or \
|
||||
isinstance(lit_right, (str, unicode)):
|
||||
lit_left = str(lit_left)
|
||||
lit_right = str(lit_right)
|
||||
if isinstance(lit_left, types.StringTypes) or \
|
||||
isinstance(lit_right, types.StringTypes):
|
||||
lit_left = unicode(lit_left)
|
||||
lit_right = unicode(lit_right)
|
||||
|
||||
gleft = _get_as_num(left)
|
||||
gright = _get_as_num(right)
|
||||
|
@ -446,8 +448,8 @@ def _expr_assignment(traverser, node):
|
|||
traverser.debug_level -= 1
|
||||
return left
|
||||
|
||||
traverser._debug("ASSIGNMENT::LEFT>>%s" % str(left.is_global))
|
||||
traverser._debug("ASSIGNMENT::RIGHT>>%s" % str(operators[token]()))
|
||||
traverser._debug("ASSIGNMENT::LEFT>>%s" % unicode(left.is_global))
|
||||
traverser._debug("ASSIGNMENT::RIGHT>>%s" % unicode(operators[token]()))
|
||||
left.set_value(operators[token](), traverser=traverser)
|
||||
traverser.debug_level -= 1
|
||||
return left
|
||||
|
@ -469,7 +471,7 @@ def _expr_binary(traverser, node):
|
|||
left = traverser._traverse_node(node["left"])
|
||||
if not isinstance(left, JSWrapper):
|
||||
left = JSWrapper(left, traverser=traverser)
|
||||
traverser._debug(str(left.dirty))
|
||||
traverser._debug(unicode(left.dirty))
|
||||
|
||||
traverser.debug_level -= 1
|
||||
|
||||
|
@ -479,7 +481,7 @@ def _expr_binary(traverser, node):
|
|||
right = traverser._traverse_node(node["right"])
|
||||
if not isinstance(right, JSWrapper):
|
||||
right = JSWrapper(right, traverser=traverser)
|
||||
traverser._debug(str(right.dirty))
|
||||
traverser._debug(unicode(right.dirty))
|
||||
|
||||
if left.dirty:
|
||||
return left
|
||||
|
@ -589,7 +591,7 @@ def _get_as_num(value):
|
|||
return False
|
||||
|
||||
try:
|
||||
if isinstance(value, str):
|
||||
if isinstance(value, types.StringTypes):
|
||||
return float(value)
|
||||
elif isinstance(value, int) or isinstance(value, float):
|
||||
return value
|
||||
|
|
|
@ -9,7 +9,7 @@ traverser
|
|||
node
|
||||
the current node being evaluated
|
||||
"""
|
||||
|
||||
import types
|
||||
from jstypes import *
|
||||
|
||||
|
||||
|
@ -21,10 +21,11 @@ def createElement(args, traverser, node):
|
|||
|
||||
simple_args = [traverser._traverse_node(a) for a in args]
|
||||
|
||||
if str(simple_args[0].get_literal_value()).lower() == "script":
|
||||
if unicode(simple_args[0].get_literal_value()).lower() == u"script":
|
||||
_create_script_tag(traverser)
|
||||
elif not (simple_args[0].is_literal() or
|
||||
isinstance(simple_args[0].get_literal_value(), str)):
|
||||
isinstance(simple_args[0].get_literal_value(),
|
||||
types.StringTypes)):
|
||||
_create_variable_element(traverser)
|
||||
|
||||
|
||||
|
@ -36,10 +37,11 @@ def createElementNS(args, traverser, node):
|
|||
|
||||
simple_args = [traverser._traverse_node(a) for a in args]
|
||||
|
||||
if "script" in str(simple_args[1].get_literal_value()).lower():
|
||||
if "script" in unicode(simple_args[1].get_literal_value()).lower():
|
||||
_create_script_tag(traverser)
|
||||
elif not (simple_args[1].is_literal() or
|
||||
isinstance(simple_args[1].get_literal_value(), str)):
|
||||
isinstance(simple_args[1].get_literal_value(),
|
||||
types.StringTypes)):
|
||||
_create_variable_element(traverser)
|
||||
|
||||
|
||||
|
@ -115,7 +117,7 @@ def setAttribute(args, traverser, node):
|
|||
|
||||
simple_args = [traverser._traverse_node(a) for a in args]
|
||||
|
||||
if str(simple_args[0].get_literal_value()).lower().startswith("on"):
|
||||
if unicode(simple_args[0].get_literal_value()).lower().startswith("on"):
|
||||
traverser.err.notice(
|
||||
err_id=("testcases_javascript_instanceactions", "setAttribute",
|
||||
"setting_on*"),
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
import re
|
||||
import types
|
||||
|
||||
import jstypes
|
||||
|
||||
def set_innerHTML(new_value, traverser):
|
||||
"Tests that values being assigned to innerHTML are not dangerous"
|
||||
|
||||
if not isinstance(new_value, jstypes.JSWrapper):
|
||||
new_value = jstypes.JSWrapper(new_value, traverser=traverser)
|
||||
literal_value = new_value.get_literal_value()
|
||||
if isinstance(literal_value, types.StringTypes):
|
||||
# Static string assignments
|
||||
|
@ -16,9 +19,11 @@ def set_innerHTML(new_value, traverser):
|
|||
err_id=("testcases_javascript_instancetypes", "set_innerHTML",
|
||||
"event_assignment"),
|
||||
warning="Event handler assignment via innerHTML",
|
||||
description="When assigning event handlers, innerHTML "
|
||||
"should never be used. Rather, use a "
|
||||
"proper technique, like addEventListener.",
|
||||
description=["When assigning event handlers, innerHTML "
|
||||
"should never be used. Rather, use a "
|
||||
"proper technique, like addEventListener.",
|
||||
"Event handler code: %s" %
|
||||
literal_value.encode("ascii", "replace")],
|
||||
filename=traverser.filename,
|
||||
line=traverser.line,
|
||||
column=traverser.position,
|
||||
|
|
|
@ -19,7 +19,7 @@ class JSObject(object):
|
|||
|
||||
def get(self, name):
|
||||
"Returns the value associated with a property name"
|
||||
name = str(name)
|
||||
name = unicode(name)
|
||||
return self.data[name] if name in self.data else None
|
||||
|
||||
def get_literal_value(self):
|
||||
|
@ -36,11 +36,11 @@ class JSObject(object):
|
|||
self.data[name] = value
|
||||
|
||||
def has_var(self, name):
|
||||
name = str(name)
|
||||
name = unicode(name)
|
||||
return name in self.data
|
||||
|
||||
def output(self):
|
||||
return str(self.data)
|
||||
return unicode(self.data)
|
||||
|
||||
|
||||
class JSContext(JSObject):
|
||||
|
@ -56,7 +56,7 @@ class JSContext(JSObject):
|
|||
def output(self):
|
||||
output = {}
|
||||
for (name, item) in self.data.items():
|
||||
output[name] = str(item)
|
||||
output[name] = unicode(item)
|
||||
return json.dumps(output)
|
||||
|
||||
|
||||
|
@ -273,7 +273,7 @@ class JSWrapper(object):
|
|||
|
||||
def __str__(self):
|
||||
"""Returns a textual version of the object."""
|
||||
return str(self.get_literal_value())
|
||||
return unicode(self.get_literal_value())
|
||||
|
||||
|
||||
class JSLiteral(JSObject):
|
||||
|
@ -309,7 +309,7 @@ class JSPrototype(JSObject):
|
|||
|
||||
def get(self, name):
|
||||
"Enables static analysis of `with` statements"
|
||||
name = str(name)
|
||||
name = unicode(name)
|
||||
output = None
|
||||
if name in self.data:
|
||||
output = self.data[name]
|
||||
|
@ -351,7 +351,7 @@ class JSArray(JSObject):
|
|||
# Interestingly enough, this allows for things like:
|
||||
# x = [4]
|
||||
# y = x * 3 // y = 12 since x equals "4"
|
||||
return ",".join([str(w.get_literal_value()) for w in self.elements])
|
||||
return u",".join([unicode(w.get_literal_value()) for w in self.elements])
|
||||
|
||||
def set(self, index, value, traverser=None):
|
||||
"""Follow the rules of JS for creating an array"""
|
||||
|
|
|
@ -15,9 +15,9 @@ BANNED_IDENTIFIERS = ("newThread", )
|
|||
# "True", except the string will be outputted when the error is thrown.
|
||||
|
||||
INTERFACES = {
|
||||
"nsICategoryManager":
|
||||
u"nsICategoryManager":
|
||||
{"value":
|
||||
{"addCategoryEntry":
|
||||
{u"addCategoryEntry":
|
||||
{"dangerous":
|
||||
lambda a, t, e:
|
||||
e.get_resource("em:bootstrap") and \
|
||||
|
@ -28,33 +28,33 @@ INTERFACES = {
|
|||
"Authors of bootstrapped add-ons must take care "
|
||||
"to cleanup any added category entries "
|
||||
"at shutdown")}}},
|
||||
"nsIComponentRegistrar":
|
||||
u"nsIComponentRegistrar":
|
||||
{"value":
|
||||
{"autoRegister":
|
||||
{u"autoRegister":
|
||||
{"dangerous":
|
||||
lambda a, t, e:
|
||||
e.get_resource("em:bootstrap") and \
|
||||
"Bootstrapped add-ons may not register "
|
||||
"chrome manifest files"},
|
||||
"registerFactory":
|
||||
u"registerFactory":
|
||||
{"dangerous":
|
||||
lambda a, t, e:
|
||||
e.get_resource("em:bootstrap") and \
|
||||
"Authors of bootstrapped add-ons must take care "
|
||||
"to cleanup any component registrations "
|
||||
"at shutdown"}}},
|
||||
"nsIObserverService":
|
||||
u"nsIObserverService":
|
||||
{"value":
|
||||
{"addObserver":
|
||||
{u"addObserver":
|
||||
{"dangerous":
|
||||
lambda a, t, e:
|
||||
e.get_resource("em:bootstrap") and \
|
||||
"Authors of bootstrapped add-ons must take care "
|
||||
"to remove any added observers "
|
||||
"at shutdown"}}},
|
||||
"nsIResProtocolHandler":
|
||||
u"nsIResProtocolHandler":
|
||||
{"value":
|
||||
{"setSubstitution":
|
||||
{u"setSubstitution":
|
||||
{"dangerous":
|
||||
lambda a, t, e:
|
||||
e.get_resource("em:bootstrap") and \
|
||||
|
@ -64,30 +64,30 @@ INTERFACES = {
|
|||
"Authors of bootstrapped add-ons must take care "
|
||||
"to cleanup any added resource substitutions "
|
||||
"at shutdown"}}},
|
||||
"nsIStringBundleService":
|
||||
u"nsIStringBundleService":
|
||||
{"value":
|
||||
{"createStringBundle":
|
||||
{u"createStringBundle":
|
||||
{"dangerous":
|
||||
lambda a, t, e:
|
||||
e.get_resource("em:bootstrap") and \
|
||||
"Authors of bootstrapped add-ons must take care "
|
||||
"to flush the string bundle cache at shutdown"},
|
||||
"createExtensibleBundle":
|
||||
u"createExtensibleBundle":
|
||||
{"dangerous":
|
||||
lambda a, t, e:
|
||||
e.get_resource("em:bootstrap") and \
|
||||
"Authors of bootstrapped add-ons must take care "
|
||||
"to flush the string bundle cache at shutdown"}}},
|
||||
"nsIStyleSheetService":
|
||||
u"nsIStyleSheetService":
|
||||
{"value":
|
||||
{"loadAndRegisterSheet":
|
||||
{u"loadAndRegisterSheet":
|
||||
{"dangerous":
|
||||
lambda a, t, e:
|
||||
e.get_resource("em:bootstrap") and \
|
||||
"Authors of bootstrapped add-ons must take care "
|
||||
"to unregister any registered stylesheets "
|
||||
"at shutdown"}}},
|
||||
"nsIWindowMediator":
|
||||
u"nsIWindowMediator":
|
||||
{"value":
|
||||
{"registerNotification":
|
||||
{"dangerous":
|
||||
|
@ -96,9 +96,9 @@ INTERFACES = {
|
|||
"Authors of bootstrapped add-ons must take care "
|
||||
"to remove any added observers "
|
||||
"at shutdown"}}},
|
||||
"nsIWindowWatcher":
|
||||
u"nsIWindowWatcher":
|
||||
{"value":
|
||||
{"addListener":
|
||||
{u"addListener":
|
||||
{"dangerous":
|
||||
lambda a, t, e:
|
||||
e.get_resource("em:bootstrap") and \
|
||||
|
@ -109,126 +109,126 @@ INTERFACES = {
|
|||
|
||||
# GLOBAL_ENTITIES is also representative of the `window` object.
|
||||
GLOBAL_ENTITIES = {
|
||||
"window": {"value": lambda: GLOBAL_ENTITIES},
|
||||
"document":
|
||||
{"value": {"createElement":
|
||||
u"window": {"value": lambda: GLOBAL_ENTITIES},
|
||||
u"document":
|
||||
{"value": {u"createElement":
|
||||
{"dangerous":
|
||||
lambda a, t: t(a[0]).get_literal_value()
|
||||
.lower() == "script"},
|
||||
"createElementNS":
|
||||
u"createElementNS":
|
||||
{"dangerous":
|
||||
lambda a, t: t(a[0]).get_literal_value()
|
||||
.lower() == "script"}}},
|
||||
|
||||
# The nefariuos timeout brothers!
|
||||
"setTimeout": {"dangerous": actions._call_settimeout},
|
||||
"setInterval": {"dangerous": actions._call_settimeout},
|
||||
u"setTimeout": {"dangerous": actions._call_settimeout},
|
||||
u"setInterval": {"dangerous": actions._call_settimeout},
|
||||
|
||||
"encodeURI": {"readonly": True},
|
||||
"decodeURI": {"readonly": True},
|
||||
"encodeURIComponent": {"readonly": True},
|
||||
"decodeURIComponent": {"readonly": True},
|
||||
"escape": {"readonly": True},
|
||||
"unescape": {"readonly": True},
|
||||
"isFinite": {"readonly": True},
|
||||
"isNaN": {"readonly": True},
|
||||
"parseFloat": {"readonly": True},
|
||||
"parseInt": {"readonly": True},
|
||||
u"encodeURI": {"readonly": True},
|
||||
u"decodeURI": {"readonly": True},
|
||||
u"encodeURIComponent": {"readonly": True},
|
||||
u"decodeURIComponent": {"readonly": True},
|
||||
u"escape": {"readonly": True},
|
||||
u"unescape": {"readonly": True},
|
||||
u"isFinite": {"readonly": True},
|
||||
u"isNaN": {"readonly": True},
|
||||
u"parseFloat": {"readonly": True},
|
||||
u"parseInt": {"readonly": True},
|
||||
|
||||
"eval": {"dangerous": True},
|
||||
"Function": {"dangerous": True},
|
||||
"Object": {"value": {"prototype": {"dangerous": True},
|
||||
"constructor": # Just an experiment for now
|
||||
{"value": lambda: GLOBAL_ENTITIES["Function"]}}},
|
||||
"String": {"value": {"prototype": {"dangerous": True}}},
|
||||
"Array": {"value": {"prototype": {"dangerous": True}}},
|
||||
"Number": {"value": {"prototype": {"dangerous": True}}},
|
||||
"Boolean": {"value": {"prototype": {"dangerous": True}}},
|
||||
"RegExp": {"value": {"prototype": {"dangerous": True}}},
|
||||
"Date": {"value": {"prototype": {"dangerous": True}}},
|
||||
u"eval": {"dangerous": True},
|
||||
u"Function": {"dangerous": True},
|
||||
u"Object": {"value": {u"prototype": {"dangerous": True},
|
||||
u"constructor": # Just an experiment for now
|
||||
{"value": lambda: GLOBAL_ENTITIES["Function"]}}},
|
||||
u"String": {"value": {u"prototype": {"dangerous": True}}},
|
||||
u"Array": {"value": {u"prototype": {"dangerous": True}}},
|
||||
u"Number": {"value": {u"prototype": {"dangerous": True}}},
|
||||
u"Boolean": {"value": {u"prototype": {"dangerous": True}}},
|
||||
u"RegExp": {"value": {u"prototype": {"dangerous": True}}},
|
||||
u"Date": {"value": {u"prototype": {"dangerous": True}}},
|
||||
|
||||
"Math": {"readonly": True},
|
||||
u"Math": {"readonly": True},
|
||||
|
||||
"netscape":
|
||||
{"value": {"security":
|
||||
{"value": {"PrivilegeManager":
|
||||
{"value": {"enablePrivilege":
|
||||
u"netscape":
|
||||
{"value": {u"security":
|
||||
{"value": {u"PrivilegeManager":
|
||||
{"value": {u"enablePrivilege":
|
||||
{"dangerous": True}}}}}}},
|
||||
|
||||
"navigator":
|
||||
{"value": {"wifi": {"dangerous": True},
|
||||
"geolocation": {"dangerous": True}}},
|
||||
u"navigator":
|
||||
{"value": {u"wifi": {"dangerous": True},
|
||||
u"geolocation": {"dangerous": True}}},
|
||||
|
||||
"Components":
|
||||
u"Components":
|
||||
{"readonly": True,
|
||||
"value":
|
||||
{"classes":
|
||||
{u"classes":
|
||||
{"xpcom_wildcard": True,
|
||||
"value":
|
||||
{"createInstance":
|
||||
{u"createInstance":
|
||||
{"return": call_definitions.xpcom_constructor("createInstance")},
|
||||
"getService":
|
||||
u"getService":
|
||||
{"return": call_definitions.xpcom_constructor("getService")}}},
|
||||
"utils":
|
||||
{"value": {"evalInSandbox":
|
||||
{"value": {u"evalInSandbox":
|
||||
{"dangerous": True},
|
||||
"import":
|
||||
u"import":
|
||||
{"dangerous":
|
||||
lambda a, t:
|
||||
a and \
|
||||
str(t(a[0]).get_literal_value())
|
||||
.count("ctypes.jsm")}}},
|
||||
"interfaces":
|
||||
{"value": {"nsIXMLHttpRequest":
|
||||
unicode(t(a[0]).get_literal_value())
|
||||
.count("ctypes.jsm")}}},
|
||||
u"interfaces":
|
||||
{"value": {u"nsIXMLHttpRequest":
|
||||
{"xpcom_map":
|
||||
lambda:
|
||||
GLOBAL_ENTITIES["XMLHttpRequest"]},
|
||||
"nsICategoryManager":
|
||||
u"nsICategoryManager":
|
||||
{"xpcom_map":
|
||||
lambda:
|
||||
INTERFACES["nsICategoryManager"]},
|
||||
"nsIComponentRegistrar":
|
||||
u"nsIComponentRegistrar":
|
||||
{"xpcom_map":
|
||||
lambda:
|
||||
INTERFACES["nsIComponentRegistrar"]},
|
||||
"nsIObserverService":
|
||||
u"nsIObserverService":
|
||||
{"xpcom_map":
|
||||
lambda:
|
||||
INTERFACES["nsIObserverService"]},
|
||||
"nsIResProtocolHandler":
|
||||
u"nsIResProtocolHandler":
|
||||
{"xpcom_map":
|
||||
lambda:
|
||||
INTERFACES["nsIResProtocolHandler"]},
|
||||
"nsIStyleSheetService":
|
||||
u"nsIStyleSheetService":
|
||||
{"xpcom_map":
|
||||
lambda:
|
||||
INTERFACES["nsIStyleSheetService"]},
|
||||
"nsIStringBundleService":
|
||||
u"nsIStringBundleService":
|
||||
{"xpcom_map":
|
||||
lambda:
|
||||
INTERFACES["nsIStringBundleService"]},
|
||||
"nsIWindowMediator":
|
||||
u"nsIWindowMediator":
|
||||
{"xpcom_map":
|
||||
lambda:
|
||||
INTERFACES["nsIWindowMediator"]},
|
||||
"nsIWindowWatcher":
|
||||
u"nsIWindowWatcher":
|
||||
{"xpcom_map":
|
||||
lambda:
|
||||
INTERFACES["nsIWindowWatcher"]},
|
||||
"nsIProcess":
|
||||
u"nsIProcess":
|
||||
{"dangerous": True},
|
||||
"nsIDOMGeoGeolocation":
|
||||
u"nsIDOMGeoGeolocation":
|
||||
{"dangerous": True},
|
||||
"nsIX509CertDB":
|
||||
u"nsIX509CertDB":
|
||||
{"dangerous": True},
|
||||
"mozIJSSubScriptLoader":
|
||||
u"mozIJSSubScriptLoader":
|
||||
{"dangerous": True}}}}},
|
||||
"extensions": {"dangerous": True},
|
||||
"xpcnativewrappers": {"dangerous": True},
|
||||
u"extensions": {"dangerous": True},
|
||||
u"xpcnativewrappers": {"dangerous": True},
|
||||
|
||||
"XMLHttpRequest":
|
||||
u"XMLHttpRequest":
|
||||
{"value":
|
||||
{"open": {"dangerous":
|
||||
{u"open": {"dangerous":
|
||||
# Ban syncrhonous XHR by making sure the third arg
|
||||
# is absent and false.
|
||||
lambda a, t:
|
||||
|
@ -241,7 +241,7 @@ GLOBAL_ENTITIES = {
|
|||
"connections."}}},
|
||||
|
||||
# Global properties are inherently read-only, though this formalizes it.
|
||||
"Infinity": {"readonly": True},
|
||||
"NaN": {"readonly": True},
|
||||
"undefined": {"readonly": True},
|
||||
u"Infinity": {"readonly": True},
|
||||
u"NaN": {"readonly": True},
|
||||
u"undefined": {"readonly": True},
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import codecs
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
@ -7,7 +8,7 @@ from cStringIO import StringIO
|
|||
|
||||
from validator.constants import SPIDERMONKEY_INSTALLATION
|
||||
from validator.contextgenerator import ContextGenerator
|
||||
from validator.textfilter import *
|
||||
import validator.unicodehelper as unicodehelper
|
||||
|
||||
JS_ESCAPE = re.compile("\\\\+[ux]", re.I)
|
||||
|
||||
|
@ -81,59 +82,21 @@ def prepare_code(code, err, filename):
|
|||
# slash: a character is necessary to prevent bad identifier errors
|
||||
code = JS_ESCAPE.sub("u", code)
|
||||
|
||||
encoding = None
|
||||
try:
|
||||
code = unicode(code) # Make sure we can get a Unicode representation
|
||||
code = strip_weird_chars(code, err=err, name=filename)
|
||||
except UnicodeDecodeError:
|
||||
# If it's not an easily decodeable encoding, detect it and decode that
|
||||
code = filter_ascii(code)
|
||||
|
||||
code = unicodehelper.decode(code)
|
||||
return code
|
||||
|
||||
|
||||
def strip_weird_chars(chardata, err=None, name=""):
|
||||
line_num = 1
|
||||
out_code = StringIO()
|
||||
has_warned_ctrlchar = False
|
||||
|
||||
for line in chardata.split("\n"):
|
||||
|
||||
charpos = 0
|
||||
for char in line:
|
||||
if is_standard_ascii(char):
|
||||
out_code.write(char)
|
||||
else:
|
||||
if not has_warned_ctrlchar and err is not None:
|
||||
err.warning(("testcases_scripting",
|
||||
"_get_tree",
|
||||
"control_char_filter"),
|
||||
"Invalid control character in JS file",
|
||||
"An invalid character (ASCII 0-31, except CR "
|
||||
"and LF) has been found in a JS file. These "
|
||||
"are considered unsafe and should be removed.",
|
||||
filename=name,
|
||||
line=line_num,
|
||||
column=charpos,
|
||||
context=ContextGenerator(chardata))
|
||||
has_warned_ctrlchar = True
|
||||
|
||||
charpos += 1
|
||||
|
||||
out_code.write("\n")
|
||||
line_num += 1
|
||||
|
||||
return out_code.getvalue()
|
||||
|
||||
|
||||
def _get_tree(code, shell=SPIDERMONKEY_INSTALLATION):
|
||||
"Returns an AST tree of the JS passed in `code`."
|
||||
|
||||
if not code:
|
||||
return None
|
||||
|
||||
temp = tempfile.NamedTemporaryFile(mode="w+", delete=False)
|
||||
temp.write(code)
|
||||
code = unicodehelper.decode(code)
|
||||
|
||||
temp = tempfile.NamedTemporaryFile(mode="w+b", delete=False)
|
||||
#temp.write(codecs.BOM_UTF8)
|
||||
temp.write(code.encode("utf_8"))
|
||||
temp.flush()
|
||||
|
||||
data = """try{
|
||||
|
@ -147,7 +110,7 @@ def _get_tree(code, shell=SPIDERMONKEY_INSTALLATION):
|
|||
}""" % json.dumps(temp.name)
|
||||
|
||||
try:
|
||||
cmd = [shell, "-e", data]
|
||||
cmd = [shell, "-e", data, "-U"]
|
||||
try:
|
||||
shell_obj = subprocess.Popen(cmd,
|
||||
shell=False,
|
||||
|
@ -171,11 +134,7 @@ def _get_tree(code, shell=SPIDERMONKEY_INSTALLATION):
|
|||
if not data:
|
||||
raise JSReflectException("Reflection failed")
|
||||
|
||||
try:
|
||||
data = unicode(data)
|
||||
except UnicodeDecodeError:
|
||||
data = unicode(filter_ascii(data))
|
||||
|
||||
data = unicodehelper.decode(data)
|
||||
parsed = json.loads(data, strict=False)
|
||||
|
||||
if "error" in parsed and parsed["error"]:
|
||||
|
|
|
@ -40,12 +40,14 @@ class MockBundler:
|
|||
# Increment the message counter
|
||||
self.message_count += 1
|
||||
|
||||
self.ids.append(id)
|
||||
self.ids.append(err_id)
|
||||
|
||||
error = unicode(error)
|
||||
|
||||
print "-" * 30
|
||||
print error
|
||||
print error.encode("ascii", "replace")
|
||||
print "~" * len(error)
|
||||
if isinstance(description, str):
|
||||
if isinstance(description, types.StringTypes):
|
||||
print description
|
||||
else:
|
||||
# Errors can have multiple lines
|
||||
|
@ -105,12 +107,14 @@ class Traverser:
|
|||
output = data
|
||||
if isinstance(data, JSObject) or isinstance(data, JSContext):
|
||||
output = data.output()
|
||||
print ". " * self.debug_level + output
|
||||
|
||||
output = unicode(output)
|
||||
print ". " * self.debug_level + output.encode("ascii", "replace")
|
||||
|
||||
def run(self, data):
|
||||
if DEBUG:
|
||||
x = open("/tmp/output.js", "w")
|
||||
x.write(str(data))
|
||||
x.write(unicode(data))
|
||||
x.close()
|
||||
|
||||
if "type" not in data or not self._can_handle_node(data["type"]):
|
||||
|
@ -189,7 +193,7 @@ class Traverser:
|
|||
if action is not None:
|
||||
action_result = action(self, node)
|
||||
self._debug("ACTION>>%s (%s)" %
|
||||
("halt>>%s" % str(action_result) if
|
||||
("halt>>%s" % unicode(action_result) if
|
||||
action_result else
|
||||
"continue",
|
||||
node["type"]))
|
||||
|
@ -350,7 +354,8 @@ class Traverser:
|
|||
"_build_global",
|
||||
"dangerous_global"),
|
||||
"Dangerous Global Object",
|
||||
[dang if isinstance(dang, str) else
|
||||
[dang if
|
||||
isinstance(dang, types.StringTypes) else
|
||||
"A dangerous or banned global object was "
|
||||
"accessed by some JavaScript code.",
|
||||
"Accessed object: %s" % name],
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
import re
|
||||
try:
|
||||
from HTMLParser import HTMLParser
|
||||
|
@ -6,6 +5,7 @@ except ImportError: # pragma: no cover
|
|||
from html.parser import HTMLParser
|
||||
|
||||
import validator.testcases.scripting as scripting
|
||||
import validator.unicodehelper as unicodehelper
|
||||
from validator.testcases.markup import csstester
|
||||
from validator.contextgenerator import ContextGenerator
|
||||
from validator.constants import *
|
||||
|
@ -51,7 +51,7 @@ class MarkupParser(HTMLParser):
|
|||
self.xml_state = []
|
||||
self.xml_buffer = []
|
||||
|
||||
self.reported = {}
|
||||
self.reported = set()
|
||||
|
||||
def process(self, filename, data, extension="xul"):
|
||||
"""Processes data by splitting it into individual lines, then
|
||||
|
@ -61,7 +61,7 @@ class MarkupParser(HTMLParser):
|
|||
self.filename = filename
|
||||
self.extension = extension
|
||||
|
||||
self.reported = {}
|
||||
self.reported = set()
|
||||
|
||||
self.context = ContextGenerator(data)
|
||||
|
||||
|
@ -100,6 +100,8 @@ class MarkupParser(HTMLParser):
|
|||
|
||||
try:
|
||||
self.feed(line + "\n")
|
||||
except UnicodeDecodeError:
|
||||
raise
|
||||
except Exception as inst:
|
||||
if DEBUG: # pragma: no cover
|
||||
print self.xml_state, inst
|
||||
|
@ -107,8 +109,8 @@ class MarkupParser(HTMLParser):
|
|||
if "markup" in self.reported:
|
||||
return
|
||||
|
||||
if "script" in self.xml_state or (
|
||||
self.debug and "testscript" in self.xml_state):
|
||||
if ("script" in self.xml_state or
|
||||
self.debug and "testscript" in self.xml_state):
|
||||
if "script_comments" in self.reported or not self.strict:
|
||||
return
|
||||
self.err.notice(("testcases_markup_markuptester",
|
||||
|
@ -122,7 +124,7 @@ class MarkupParser(HTMLParser):
|
|||
self.filename,
|
||||
line=self.line,
|
||||
context=self.context)
|
||||
self.reported["script_comments"] = True
|
||||
self.reported.add("script_comments")
|
||||
return
|
||||
|
||||
if self.strict:
|
||||
|
@ -136,7 +138,7 @@ class MarkupParser(HTMLParser):
|
|||
self.filename,
|
||||
line=self.line,
|
||||
context=self.context)
|
||||
self.reported["markup"] = True
|
||||
self.reported.add("markup")
|
||||
|
||||
def handle_startendtag(self, tag, attrs):
|
||||
# Self closing tags don't have an end tag, so we want to
|
||||
|
@ -154,7 +156,7 @@ class MarkupParser(HTMLParser):
|
|||
self_closing = tag in SELF_CLOSING_TAGS
|
||||
|
||||
if DEBUG: # pragma: no cover
|
||||
print self.xml_state, tag, self_closing
|
||||
print "S: ", self.xml_state, tag, self_closing
|
||||
|
||||
# A fictional tag for testing purposes.
|
||||
if tag == "xbannedxtestx":
|
||||
|
@ -286,17 +288,19 @@ class MarkupParser(HTMLParser):
|
|||
return
|
||||
|
||||
self.xml_state.append(tag)
|
||||
self.xml_buffer.append("")
|
||||
self.xml_buffer.append(unicode(""))
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
|
||||
tag = tag.lower()
|
||||
|
||||
if DEBUG: # pragma: no cover
|
||||
print tag, self.xml_state
|
||||
print "E: ", tag, self.xml_state
|
||||
|
||||
if not self.xml_state:
|
||||
if "closing_tags" in self.reported or not self.strict:
|
||||
if DEBUG:
|
||||
print "Unstrict; extra closing tags ------"
|
||||
return
|
||||
self.err.warning(("testcases_markup_markuptester",
|
||||
"handle_endtag",
|
||||
|
@ -307,16 +311,18 @@ class MarkupParser(HTMLParser):
|
|||
self.filename,
|
||||
line=self.line,
|
||||
context=self.context)
|
||||
self.reported["closing_tags"] = True
|
||||
self.reported.add("closing_tags")
|
||||
if DEBUG: # pragma: no cover
|
||||
print "Too many closing tags ------"
|
||||
return
|
||||
|
||||
elif "script" in self.xml_state:
|
||||
elif "script" in self.xml_state[:-1]:
|
||||
# If we're in a script tag, nothing else matters. Just rush
|
||||
# everything possible into the xml buffer.
|
||||
|
||||
self._save_to_buffer("</" + tag + ">")
|
||||
if DEBUG:
|
||||
print "Markup as text in script ------"
|
||||
return
|
||||
|
||||
elif tag not in self.xml_state:
|
||||
|
@ -344,6 +350,8 @@ class MarkupParser(HTMLParser):
|
|||
# classifies as a self-closing tag, we just recursively close
|
||||
# down to the level of the tag we're actualy closing.
|
||||
if old_state != tag and old_state in SELF_CLOSING_TAGS:
|
||||
if DEBUG:
|
||||
print "Self closing tag cascading down ------"
|
||||
return self.handle_endtag(tag)
|
||||
|
||||
# If this is an XML-derived language, everything must nest
|
||||
|
@ -365,17 +373,20 @@ class MarkupParser(HTMLParser):
|
|||
if DEBUG: # pragma: no cover
|
||||
print "Invalid markup nesting ------"
|
||||
|
||||
data_buffer = data_buffer.strip()
|
||||
|
||||
# Perform analysis on collected data.
|
||||
if tag == "script":
|
||||
scripting.test_js_snippet(self.err,
|
||||
data_buffer,
|
||||
self.filename,
|
||||
self.line)
|
||||
elif tag == "style":
|
||||
csstester.test_css_file(self.err,
|
||||
self.filename,
|
||||
data_buffer,
|
||||
self.line)
|
||||
if data_buffer:
|
||||
if tag == "script":
|
||||
scripting.test_js_snippet(self.err,
|
||||
data_buffer,
|
||||
self.filename,
|
||||
self.line)
|
||||
elif tag == "style":
|
||||
csstester.test_css_file(self.err,
|
||||
self.filename,
|
||||
data_buffer,
|
||||
self.line)
|
||||
|
||||
def handle_data(self, data):
|
||||
self._save_to_buffer(data)
|
||||
|
@ -413,6 +424,8 @@ class MarkupParser(HTMLParser):
|
|||
if not self.xml_buffer:
|
||||
return
|
||||
|
||||
data = unicodehelper.decode(data)
|
||||
|
||||
self.xml_buffer[-1] += data
|
||||
|
||||
def _format_args(self, args):
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
import codecs
|
||||
import textfilter
|
||||
|
||||
# Many thanks to nmaier for inspiration and code in this module
|
||||
|
||||
UNICODES = [
|
||||
(codecs.BOM_UTF8, "utf-8"),
|
||||
(codecs.BOM_UTF32_LE, "utf-32-le"),
|
||||
(codecs.BOM_UTF32_BE, "utf-32-be"),
|
||||
(codecs.BOM_UTF16_LE, "utf-16-le"),
|
||||
(codecs.BOM_UTF16_BE, "utf-16-be"),
|
||||
]
|
||||
|
||||
COMMON_ENCODINGS = ("utf-16", "latin_1", "ascii")
|
||||
|
||||
def decode(data):
|
||||
"""
|
||||
Decode data employing some charset detection and including unicode BOM
|
||||
stripping.
|
||||
"""
|
||||
|
||||
# Don't make more work than we have to.
|
||||
if not isinstance(data, str):
|
||||
return data
|
||||
|
||||
# Detect standard unicodes.
|
||||
for bom, encoding in UNICODES:
|
||||
if data.startswith(bom):
|
||||
return unicode(data[len(bom):], encoding, "ignore")
|
||||
|
||||
# Try straight UTF-8
|
||||
try:
|
||||
return unicode(data, "utf-8")
|
||||
except:
|
||||
pass
|
||||
|
||||
# Test for latin_1, because it can be matched as UTF-16
|
||||
# Somewhat of a hack, but it works and is about a thousand times faster
|
||||
# than using chardet.
|
||||
if all(ord(c) < 256 for c in data):
|
||||
try:
|
||||
return unicode(data, "latin_1")
|
||||
except:
|
||||
pass
|
||||
|
||||
# Test for various common encodings.
|
||||
for encoding in COMMON_ENCODINGS:
|
||||
try:
|
||||
return unicode(data, encoding)
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
# Anything else gets filtered.
|
||||
return unicode(textfilter.filter_ascii(data), errors="replace")
|
||||
|
Загрузка…
Ссылка в новой задаче