Merged encoding fixes into codebase

This commit is contained in:
Matt Basta 2011-04-21 17:11:17 +00:00
Родитель 4b6ff350b0 378c28d910
Коммит 6e2e1fd6b4
31 изменённых файлов: 357 добавлений и 225 удалений

Двоичные данные
tests/resources/bug_621360.js

Двоичный файл не отображается.

Двоичные данные
tests/resources/controlchars.js Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -0,0 +1 @@
function test() {}

Двоичные данные
tests/resources/controlchars/controlchars_ascii_warn.js Normal file

Двоичный файл не отображается.

Двоичные данные
tests/resources/controlchars/controlchars_bad.js Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -0,0 +1 @@
function täst() {}

Двоичные данные
tests/resources/controlchars/controlchars_utf-8_warn.js Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -0,0 +1 @@
täst

Двоичные данные
tests/resources/unicodehelper/utf-16be.txt Normal file

Двоичный файл не отображается.

Двоичные данные
tests/resources/unicodehelper/utf-16le.txt Normal file

Двоичный файл не отображается.

Двоичные данные
tests/resources/unicodehelper/utf-32be.txt Normal file

Двоичный файл не отображается.

Двоичные данные
tests/resources/unicodehelper/utf-32le.txt Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -0,0 +1 @@
täst

Просмотреть файл

@ -0,0 +1 @@
täst

Просмотреть файл

@ -1,20 +0,0 @@
import os
import validator.testcases.scripting
def _do_test(path):
"Performs a test on a JS file"
script = open(path).read()
err = validator.testcases.scripting.traverser.MockBundler()
validator.testcases.scripting.test_js_file(err, path, script)
return err
def test_control_chars():
"Tests that control characters throw a single error"
err = _do_test("tests/resources/bug_621360.js")
# There should be a single error.
print err.message_count
assert err.message_count == 1

Просмотреть файл

@ -0,0 +1,47 @@
import os
import validator.unicodehelper
import validator.testcases.scripting
# Originated from bug 626496
def _do_test(path):
"Performs a test on a JS file"
script = validator.unicodehelper.decode(open(path, "rb").read())
print script.encode("ascii", "replace")
err = validator.testcases.scripting.traverser.MockBundler()
validator.testcases.scripting.test_js_file(err, path, script)
print err.ids
return err
def test_controlchars_ascii_ok():
"""Tests that multi-byte characters are decoded properly (utf-8)"""
errs = _do_test("tests/resources/controlchars/controlchars_ascii_ok.js")
assert len(errs.ids) == 0
def test_controlchars_ascii_warn():
"""Tests that multi-byte characters are decoded properly (utf-8)
but remaining non ascii characters raise warnings"""
errs = _do_test("tests/resources/controlchars/controlchars_ascii_warn.js")
assert len(errs.ids) == 1
assert errs.ids[0][2] == "syntax_error"
def test_controlchars_utf8_ok():
"""Tests that multi-byte characters are decoded properly (utf-8)"""
errs = _do_test("tests/resources/controlchars/controlchars_utf-8_ok.js")
assert len(errs.ids) == 0
def test_controlchars_utf8_warn():
"""Tests that multi-byte characters are decoded properly (utf-8)
but remaining non ascii characters raise warnings"""
errs = _do_test("tests/resources/controlchars/controlchars_utf-8_warn.js")
assert len(errs.ids) == 1
assert errs.ids[0][2] == "syntax_error"

Просмотреть файл

@ -1,24 +1,26 @@
# -*- coding: utf-8 -*-
import validator.testcases.markup.markuptester as markuptester import validator.testcases.markup.markuptester as markuptester
from validator.errorbundler import ErrorBundle from validator.errorbundler import ErrorBundle
from validator.constants import * from validator.constants import *
def _do_test(path, should_fail=False, type_=None): def _do_test(path, should_fail=False, type_=None):
return _do_test_raw(open(path).read(),
path,
should_fail,
type_)
markup_file = open(path) def _do_test_raw(data, path, should_fail=False, type_=None):
data = markup_file.read()
markup_file.close()
filename = path.split("/")[-1] filename = path.split("/")[-1]
extension = filename.split(".")[-1] extension = filename.split(".")[-1]
err = ErrorBundle(None, True) err = ErrorBundle()
if type_: if type_:
err.set_type(type_) err.set_type(type_)
parser = markuptester.MarkupParser(err, debug=True) parser = markuptester.MarkupParser(err, debug=True)
parser.process(filename, data, extension) parser.process(filename, data, extension)
err.print_summary(True) print err.print_summary(verbose=True)
if should_fail: if should_fail:
assert err.failed() assert err.failed()
@ -31,7 +33,7 @@ def _do_test(path, should_fail=False, type_=None):
def test_local_url_detector(): def test_local_url_detector():
"Tests that local URLs can be detected." "Tests that local URLs can be detected."
err = ErrorBundle(None, True) err = ErrorBundle()
mp = markuptester.MarkupParser(err) mp = markuptester.MarkupParser(err)
tester = mp._is_url_local tester = mp._is_url_local
@ -135,3 +137,18 @@ def test_invalid_markup():
result = _do_test("tests/resources/markup/markuptester/bad_script.xml", result = _do_test("tests/resources/markup/markuptester/bad_script.xml",
False) False)
assert result.notices assert result.notices
def test_self_closing_scripts():
"""Tests that self-closing script tags are not deletrious to parsing"""
_do_test_raw("""
<foo>
<script type="text/javascript"/>
<list_item undecodable=" _ " />
<list_item />
<list_item />
</foo>
""", "foo.js")

Просмотреть файл

@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-
import nose
import os
import validator.unicodehelper as unicodehelper
COMPARISON = "täst".decode("utf-8")
def _do_test(path):
"Performs a test on a JS file"
text = open(path).read()
utext = unicodehelper.decode(text)
print utext.encode("ascii", "backslashreplace")
nose.tools.eq_(utext, COMPARISON)
def test_latin1():
"Tests utf-8 encoding is properly decoded"
_do_test("tests/resources/unicodehelper/latin_1.txt")
def test_utf8():
"Tests utf-8 w/o BOM encoding is properly decoded"
_do_test("tests/resources/unicodehelper/utf-8.txt")
def test_utf8():
"Tests utf-8 with BOM encoding is properly decoded"
_do_test("tests/resources/unicodehelper/utf-8-bom.txt")
def test_utf16le():
"Tests utf-16 Little Endian encoding is properly decoded"
_do_test("tests/resources/unicodehelper/utf-16le.txt")
def test_utf16be():
"Tests utf-16 Big Endian encoding is properly decoded"
_do_test("tests/resources/unicodehelper/utf-16be.txt")
def test_utf32le():
"Tests utf-32 Little Endian encoding is properly decoded"
_do_test("tests/resources/unicodehelper/utf-32le.txt")
def test_utf32be():
"Tests utf-32 Big Endian encoding is properly decoded"
_do_test("tests/resources/unicodehelper/utf-32be.txt")

Просмотреть файл

@ -1,6 +1,5 @@
from StringIO import StringIO from StringIO import StringIO
import unicodehelper
import textfilter
class ContextGenerator: class ContextGenerator:
@ -83,6 +82,6 @@ class ContextGenerator:
data = "%s ..." % data[:140] data = "%s ..." % data[:140]
data = "%s%s" % (raw_data[0:with_ws - line_length], data) data = "%s%s" % (raw_data[0:with_ws - line_length], data)
data = textfilter.filter_ascii(data) data = unicodehelper.decode(data)
return data return data

Просмотреть файл

@ -3,7 +3,7 @@ import uuid
from StringIO import StringIO from StringIO import StringIO
from outputhandlers.shellcolors import OutputHandler from outputhandlers.shellcolors import OutputHandler
from textfilter import filter_ascii import unicodehelper
class ErrorBundle(object): class ErrorBundle(object):
@ -110,8 +110,8 @@ class ErrorBundle(object):
else: else:
message["context"] = None message["context"] = None
message["message"] = filter_ascii(message["message"]) message["message"] = unicodehelper.decode(message["message"])
message["description"] = filter_ascii(message["description"]) message["description"] = unicodehelper.decode(message["description"])
stack.append(message) stack.append(message)

Просмотреть файл

@ -85,7 +85,7 @@ def main():
# Print the output of the tests based on the requested format. # Print the output of the tests based on the requested format.
if args.output == "text": if args.output == "text":
print error_bundle.print_summary(verbose=args.verbose, print error_bundle.print_summary(verbose=args.verbose,
no_color=args.boring) no_color=args.boring).encode("utf-8")
elif args.output == "json": elif args.output == "json":
sys.stdout.write(error_bundle.render_json()) sys.stdout.write(error_bundle.render_json())

Просмотреть файл

@ -6,6 +6,7 @@ from StringIO import StringIO
from validator.contextgenerator import ContextGenerator from validator.contextgenerator import ContextGenerator
from validator import decorator from validator import decorator
from validator import submain as testendpoint_validator from validator import submain as testendpoint_validator
from validator import unicodehelper
import validator.testcases.markup.markuptester as testendpoint_markup import validator.testcases.markup.markuptester as testendpoint_markup
import validator.testcases.markup.csstester as testendpoint_css import validator.testcases.markup.csstester as testendpoint_css
import validator.testcases.scripting as testendpoint_js import validator.testcases.scripting as testendpoint_js
@ -165,9 +166,8 @@ def test_packed_packages(err, package_contents=None, xpi_package=None):
if not file_data: if not file_data:
continue continue
# Skip BOMs and the like # Convert the file data to unicode
while not is_standard_ascii(file_data[0]): file_data = unicodehelper.decode(file_data)
file_data = file_data[1:]
if data["extension"] == "css": if data["extension"] == "css":
testendpoint_css.test_css_file(err, testendpoint_css.test_css_file(err,

Просмотреть файл

@ -291,12 +291,14 @@ def _call_expression(traverser, node):
result = dangerous(a=args, t=t) result = dangerous(a=args, t=t)
if result: if result:
# Generate a string representation of the params # Generate a string representation of the params
params = ", ".join([str(t(p).get_literal_value()) for p in args]) params = u", ".join([unicode(t(p).get_literal_value()) for
p in args])
traverser.err.warning(("testcases_javascript_actions", traverser.err.warning(("testcases_javascript_actions",
"_call_expression", "_call_expression",
"called_dangerous_global"), "called_dangerous_global"),
"Global called in dangerous manner", "Global called in dangerous manner",
result if isinstance(result, str) else result if isinstance(result,
types.StringTypes) else
"A global function was called using a set " "A global function was called using a set "
"of dangerous parameters. These parameters " "of dangerous parameters. These parameters "
"have been disallowed.", "have been disallowed.",
@ -418,10 +420,10 @@ def _expr_assignment(traverser, node):
if lit_right is None: if lit_right is None:
lit_right = 0 lit_right = 0
if isinstance(lit_left, (str, unicode)) or \ if isinstance(lit_left, types.StringTypes) or \
isinstance(lit_right, (str, unicode)): isinstance(lit_right, types.StringTypes):
lit_left = str(lit_left) lit_left = unicode(lit_left)
lit_right = str(lit_right) lit_right = unicode(lit_right)
gleft = _get_as_num(left) gleft = _get_as_num(left)
gright = _get_as_num(right) gright = _get_as_num(right)
@ -446,8 +448,8 @@ def _expr_assignment(traverser, node):
traverser.debug_level -= 1 traverser.debug_level -= 1
return left return left
traverser._debug("ASSIGNMENT::LEFT>>%s" % str(left.is_global)) traverser._debug("ASSIGNMENT::LEFT>>%s" % unicode(left.is_global))
traverser._debug("ASSIGNMENT::RIGHT>>%s" % str(operators[token]())) traverser._debug("ASSIGNMENT::RIGHT>>%s" % unicode(operators[token]()))
left.set_value(operators[token](), traverser=traverser) left.set_value(operators[token](), traverser=traverser)
traverser.debug_level -= 1 traverser.debug_level -= 1
return left return left
@ -469,7 +471,7 @@ def _expr_binary(traverser, node):
left = traverser._traverse_node(node["left"]) left = traverser._traverse_node(node["left"])
if not isinstance(left, JSWrapper): if not isinstance(left, JSWrapper):
left = JSWrapper(left, traverser=traverser) left = JSWrapper(left, traverser=traverser)
traverser._debug(str(left.dirty)) traverser._debug(unicode(left.dirty))
traverser.debug_level -= 1 traverser.debug_level -= 1
@ -479,7 +481,7 @@ def _expr_binary(traverser, node):
right = traverser._traverse_node(node["right"]) right = traverser._traverse_node(node["right"])
if not isinstance(right, JSWrapper): if not isinstance(right, JSWrapper):
right = JSWrapper(right, traverser=traverser) right = JSWrapper(right, traverser=traverser)
traverser._debug(str(right.dirty)) traverser._debug(unicode(right.dirty))
if left.dirty: if left.dirty:
return left return left
@ -589,7 +591,7 @@ def _get_as_num(value):
return False return False
try: try:
if isinstance(value, str): if isinstance(value, types.StringTypes):
return float(value) return float(value)
elif isinstance(value, int) or isinstance(value, float): elif isinstance(value, int) or isinstance(value, float):
return value return value

Просмотреть файл

@ -9,7 +9,7 @@ traverser
node node
the current node being evaluated the current node being evaluated
""" """
import types
from jstypes import * from jstypes import *
@ -21,10 +21,11 @@ def createElement(args, traverser, node):
simple_args = [traverser._traverse_node(a) for a in args] simple_args = [traverser._traverse_node(a) for a in args]
if str(simple_args[0].get_literal_value()).lower() == "script": if unicode(simple_args[0].get_literal_value()).lower() == u"script":
_create_script_tag(traverser) _create_script_tag(traverser)
elif not (simple_args[0].is_literal() or elif not (simple_args[0].is_literal() or
isinstance(simple_args[0].get_literal_value(), str)): isinstance(simple_args[0].get_literal_value(),
types.StringTypes)):
_create_variable_element(traverser) _create_variable_element(traverser)
@ -36,10 +37,11 @@ def createElementNS(args, traverser, node):
simple_args = [traverser._traverse_node(a) for a in args] simple_args = [traverser._traverse_node(a) for a in args]
if "script" in str(simple_args[1].get_literal_value()).lower(): if "script" in unicode(simple_args[1].get_literal_value()).lower():
_create_script_tag(traverser) _create_script_tag(traverser)
elif not (simple_args[1].is_literal() or elif not (simple_args[1].is_literal() or
isinstance(simple_args[1].get_literal_value(), str)): isinstance(simple_args[1].get_literal_value(),
types.StringTypes)):
_create_variable_element(traverser) _create_variable_element(traverser)
@ -115,7 +117,7 @@ def setAttribute(args, traverser, node):
simple_args = [traverser._traverse_node(a) for a in args] simple_args = [traverser._traverse_node(a) for a in args]
if str(simple_args[0].get_literal_value()).lower().startswith("on"): if unicode(simple_args[0].get_literal_value()).lower().startswith("on"):
traverser.err.notice( traverser.err.notice(
err_id=("testcases_javascript_instanceactions", "setAttribute", err_id=("testcases_javascript_instanceactions", "setAttribute",
"setting_on*"), "setting_on*"),

Просмотреть файл

@ -1,10 +1,13 @@
import re import re
import types import types
import jstypes
def set_innerHTML(new_value, traverser): def set_innerHTML(new_value, traverser):
"Tests that values being assigned to innerHTML are not dangerous" "Tests that values being assigned to innerHTML are not dangerous"
if not isinstance(new_value, jstypes.JSWrapper):
new_value = jstypes.JSWrapper(new_value, traverser=traverser)
literal_value = new_value.get_literal_value() literal_value = new_value.get_literal_value()
if isinstance(literal_value, types.StringTypes): if isinstance(literal_value, types.StringTypes):
# Static string assignments # Static string assignments
@ -16,9 +19,11 @@ def set_innerHTML(new_value, traverser):
err_id=("testcases_javascript_instancetypes", "set_innerHTML", err_id=("testcases_javascript_instancetypes", "set_innerHTML",
"event_assignment"), "event_assignment"),
warning="Event handler assignment via innerHTML", warning="Event handler assignment via innerHTML",
description="When assigning event handlers, innerHTML " description=["When assigning event handlers, innerHTML "
"should never be used. Rather, use a " "should never be used. Rather, use a "
"proper technique, like addEventListener.", "proper technique, like addEventListener.",
"Event handler code: %s" %
literal_value.encode("ascii", "replace")],
filename=traverser.filename, filename=traverser.filename,
line=traverser.line, line=traverser.line,
column=traverser.position, column=traverser.position,

Просмотреть файл

@ -19,7 +19,7 @@ class JSObject(object):
def get(self, name): def get(self, name):
"Returns the value associated with a property name" "Returns the value associated with a property name"
name = str(name) name = unicode(name)
return self.data[name] if name in self.data else None return self.data[name] if name in self.data else None
def get_literal_value(self): def get_literal_value(self):
@ -36,11 +36,11 @@ class JSObject(object):
self.data[name] = value self.data[name] = value
def has_var(self, name): def has_var(self, name):
name = str(name) name = unicode(name)
return name in self.data return name in self.data
def output(self): def output(self):
return str(self.data) return unicode(self.data)
class JSContext(JSObject): class JSContext(JSObject):
@ -56,7 +56,7 @@ class JSContext(JSObject):
def output(self): def output(self):
output = {} output = {}
for (name, item) in self.data.items(): for (name, item) in self.data.items():
output[name] = str(item) output[name] = unicode(item)
return json.dumps(output) return json.dumps(output)
@ -273,7 +273,7 @@ class JSWrapper(object):
def __str__(self): def __str__(self):
"""Returns a textual version of the object.""" """Returns a textual version of the object."""
return str(self.get_literal_value()) return unicode(self.get_literal_value())
class JSLiteral(JSObject): class JSLiteral(JSObject):
@ -309,7 +309,7 @@ class JSPrototype(JSObject):
def get(self, name): def get(self, name):
"Enables static analysis of `with` statements" "Enables static analysis of `with` statements"
name = str(name) name = unicode(name)
output = None output = None
if name in self.data: if name in self.data:
output = self.data[name] output = self.data[name]
@ -351,7 +351,7 @@ class JSArray(JSObject):
# Interestingly enough, this allows for things like: # Interestingly enough, this allows for things like:
# x = [4] # x = [4]
# y = x * 3 // y = 12 since x equals "4" # y = x * 3 // y = 12 since x equals "4"
return ",".join([str(w.get_literal_value()) for w in self.elements]) return u",".join([unicode(w.get_literal_value()) for w in self.elements])
def set(self, index, value, traverser=None): def set(self, index, value, traverser=None):
"""Follow the rules of JS for creating an array""" """Follow the rules of JS for creating an array"""

Просмотреть файл

@ -15,9 +15,9 @@ BANNED_IDENTIFIERS = ("newThread", )
# "True", except the string will be outputted when the error is thrown. # "True", except the string will be outputted when the error is thrown.
INTERFACES = { INTERFACES = {
"nsICategoryManager": u"nsICategoryManager":
{"value": {"value":
{"addCategoryEntry": {u"addCategoryEntry":
{"dangerous": {"dangerous":
lambda a, t, e: lambda a, t, e:
e.get_resource("em:bootstrap") and \ e.get_resource("em:bootstrap") and \
@ -28,33 +28,33 @@ INTERFACES = {
"Authors of bootstrapped add-ons must take care " "Authors of bootstrapped add-ons must take care "
"to cleanup any added category entries " "to cleanup any added category entries "
"at shutdown")}}}, "at shutdown")}}},
"nsIComponentRegistrar": u"nsIComponentRegistrar":
{"value": {"value":
{"autoRegister": {u"autoRegister":
{"dangerous": {"dangerous":
lambda a, t, e: lambda a, t, e:
e.get_resource("em:bootstrap") and \ e.get_resource("em:bootstrap") and \
"Bootstrapped add-ons may not register " "Bootstrapped add-ons may not register "
"chrome manifest files"}, "chrome manifest files"},
"registerFactory": u"registerFactory":
{"dangerous": {"dangerous":
lambda a, t, e: lambda a, t, e:
e.get_resource("em:bootstrap") and \ e.get_resource("em:bootstrap") and \
"Authors of bootstrapped add-ons must take care " "Authors of bootstrapped add-ons must take care "
"to cleanup any component registrations " "to cleanup any component registrations "
"at shutdown"}}}, "at shutdown"}}},
"nsIObserverService": u"nsIObserverService":
{"value": {"value":
{"addObserver": {u"addObserver":
{"dangerous": {"dangerous":
lambda a, t, e: lambda a, t, e:
e.get_resource("em:bootstrap") and \ e.get_resource("em:bootstrap") and \
"Authors of bootstrapped add-ons must take care " "Authors of bootstrapped add-ons must take care "
"to remove any added observers " "to remove any added observers "
"at shutdown"}}}, "at shutdown"}}},
"nsIResProtocolHandler": u"nsIResProtocolHandler":
{"value": {"value":
{"setSubstitution": {u"setSubstitution":
{"dangerous": {"dangerous":
lambda a, t, e: lambda a, t, e:
e.get_resource("em:bootstrap") and \ e.get_resource("em:bootstrap") and \
@ -64,30 +64,30 @@ INTERFACES = {
"Authors of bootstrapped add-ons must take care " "Authors of bootstrapped add-ons must take care "
"to cleanup any added resource substitutions " "to cleanup any added resource substitutions "
"at shutdown"}}}, "at shutdown"}}},
"nsIStringBundleService": u"nsIStringBundleService":
{"value": {"value":
{"createStringBundle": {u"createStringBundle":
{"dangerous": {"dangerous":
lambda a, t, e: lambda a, t, e:
e.get_resource("em:bootstrap") and \ e.get_resource("em:bootstrap") and \
"Authors of bootstrapped add-ons must take care " "Authors of bootstrapped add-ons must take care "
"to flush the string bundle cache at shutdown"}, "to flush the string bundle cache at shutdown"},
"createExtensibleBundle": u"createExtensibleBundle":
{"dangerous": {"dangerous":
lambda a, t, e: lambda a, t, e:
e.get_resource("em:bootstrap") and \ e.get_resource("em:bootstrap") and \
"Authors of bootstrapped add-ons must take care " "Authors of bootstrapped add-ons must take care "
"to flush the string bundle cache at shutdown"}}}, "to flush the string bundle cache at shutdown"}}},
"nsIStyleSheetService": u"nsIStyleSheetService":
{"value": {"value":
{"loadAndRegisterSheet": {u"loadAndRegisterSheet":
{"dangerous": {"dangerous":
lambda a, t, e: lambda a, t, e:
e.get_resource("em:bootstrap") and \ e.get_resource("em:bootstrap") and \
"Authors of bootstrapped add-ons must take care " "Authors of bootstrapped add-ons must take care "
"to unregister any registered stylesheets " "to unregister any registered stylesheets "
"at shutdown"}}}, "at shutdown"}}},
"nsIWindowMediator": u"nsIWindowMediator":
{"value": {"value":
{"registerNotification": {"registerNotification":
{"dangerous": {"dangerous":
@ -96,9 +96,9 @@ INTERFACES = {
"Authors of bootstrapped add-ons must take care " "Authors of bootstrapped add-ons must take care "
"to remove any added observers " "to remove any added observers "
"at shutdown"}}}, "at shutdown"}}},
"nsIWindowWatcher": u"nsIWindowWatcher":
{"value": {"value":
{"addListener": {u"addListener":
{"dangerous": {"dangerous":
lambda a, t, e: lambda a, t, e:
e.get_resource("em:bootstrap") and \ e.get_resource("em:bootstrap") and \
@ -109,126 +109,126 @@ INTERFACES = {
# GLOBAL_ENTITIES is also representative of the `window` object. # GLOBAL_ENTITIES is also representative of the `window` object.
GLOBAL_ENTITIES = { GLOBAL_ENTITIES = {
"window": {"value": lambda: GLOBAL_ENTITIES}, u"window": {"value": lambda: GLOBAL_ENTITIES},
"document": u"document":
{"value": {"createElement": {"value": {u"createElement":
{"dangerous": {"dangerous":
lambda a, t: t(a[0]).get_literal_value() lambda a, t: t(a[0]).get_literal_value()
.lower() == "script"}, .lower() == "script"},
"createElementNS": u"createElementNS":
{"dangerous": {"dangerous":
lambda a, t: t(a[0]).get_literal_value() lambda a, t: t(a[0]).get_literal_value()
.lower() == "script"}}}, .lower() == "script"}}},
# The nefariuos timeout brothers! # The nefariuos timeout brothers!
"setTimeout": {"dangerous": actions._call_settimeout}, u"setTimeout": {"dangerous": actions._call_settimeout},
"setInterval": {"dangerous": actions._call_settimeout}, u"setInterval": {"dangerous": actions._call_settimeout},
"encodeURI": {"readonly": True}, u"encodeURI": {"readonly": True},
"decodeURI": {"readonly": True}, u"decodeURI": {"readonly": True},
"encodeURIComponent": {"readonly": True}, u"encodeURIComponent": {"readonly": True},
"decodeURIComponent": {"readonly": True}, u"decodeURIComponent": {"readonly": True},
"escape": {"readonly": True}, u"escape": {"readonly": True},
"unescape": {"readonly": True}, u"unescape": {"readonly": True},
"isFinite": {"readonly": True}, u"isFinite": {"readonly": True},
"isNaN": {"readonly": True}, u"isNaN": {"readonly": True},
"parseFloat": {"readonly": True}, u"parseFloat": {"readonly": True},
"parseInt": {"readonly": True}, u"parseInt": {"readonly": True},
"eval": {"dangerous": True}, u"eval": {"dangerous": True},
"Function": {"dangerous": True}, u"Function": {"dangerous": True},
"Object": {"value": {"prototype": {"dangerous": True}, u"Object": {"value": {u"prototype": {"dangerous": True},
"constructor": # Just an experiment for now u"constructor": # Just an experiment for now
{"value": lambda: GLOBAL_ENTITIES["Function"]}}}, {"value": lambda: GLOBAL_ENTITIES["Function"]}}},
"String": {"value": {"prototype": {"dangerous": True}}}, u"String": {"value": {u"prototype": {"dangerous": True}}},
"Array": {"value": {"prototype": {"dangerous": True}}}, u"Array": {"value": {u"prototype": {"dangerous": True}}},
"Number": {"value": {"prototype": {"dangerous": True}}}, u"Number": {"value": {u"prototype": {"dangerous": True}}},
"Boolean": {"value": {"prototype": {"dangerous": True}}}, u"Boolean": {"value": {u"prototype": {"dangerous": True}}},
"RegExp": {"value": {"prototype": {"dangerous": True}}}, u"RegExp": {"value": {u"prototype": {"dangerous": True}}},
"Date": {"value": {"prototype": {"dangerous": True}}}, u"Date": {"value": {u"prototype": {"dangerous": True}}},
"Math": {"readonly": True}, u"Math": {"readonly": True},
"netscape": u"netscape":
{"value": {"security": {"value": {u"security":
{"value": {"PrivilegeManager": {"value": {u"PrivilegeManager":
{"value": {"enablePrivilege": {"value": {u"enablePrivilege":
{"dangerous": True}}}}}}}, {"dangerous": True}}}}}}},
"navigator": u"navigator":
{"value": {"wifi": {"dangerous": True}, {"value": {u"wifi": {"dangerous": True},
"geolocation": {"dangerous": True}}}, u"geolocation": {"dangerous": True}}},
"Components": u"Components":
{"readonly": True, {"readonly": True,
"value": "value":
{"classes": {u"classes":
{"xpcom_wildcard": True, {"xpcom_wildcard": True,
"value": "value":
{"createInstance": {u"createInstance":
{"return": call_definitions.xpcom_constructor("createInstance")}, {"return": call_definitions.xpcom_constructor("createInstance")},
"getService": u"getService":
{"return": call_definitions.xpcom_constructor("getService")}}}, {"return": call_definitions.xpcom_constructor("getService")}}},
"utils": "utils":
{"value": {"evalInSandbox": {"value": {u"evalInSandbox":
{"dangerous": True}, {"dangerous": True},
"import": u"import":
{"dangerous": {"dangerous":
lambda a, t: lambda a, t:
a and \ a and \
str(t(a[0]).get_literal_value()) unicode(t(a[0]).get_literal_value())
.count("ctypes.jsm")}}}, .count("ctypes.jsm")}}},
"interfaces": u"interfaces":
{"value": {"nsIXMLHttpRequest": {"value": {u"nsIXMLHttpRequest":
{"xpcom_map": {"xpcom_map":
lambda: lambda:
GLOBAL_ENTITIES["XMLHttpRequest"]}, GLOBAL_ENTITIES["XMLHttpRequest"]},
"nsICategoryManager": u"nsICategoryManager":
{"xpcom_map": {"xpcom_map":
lambda: lambda:
INTERFACES["nsICategoryManager"]}, INTERFACES["nsICategoryManager"]},
"nsIComponentRegistrar": u"nsIComponentRegistrar":
{"xpcom_map": {"xpcom_map":
lambda: lambda:
INTERFACES["nsIComponentRegistrar"]}, INTERFACES["nsIComponentRegistrar"]},
"nsIObserverService": u"nsIObserverService":
{"xpcom_map": {"xpcom_map":
lambda: lambda:
INTERFACES["nsIObserverService"]}, INTERFACES["nsIObserverService"]},
"nsIResProtocolHandler": u"nsIResProtocolHandler":
{"xpcom_map": {"xpcom_map":
lambda: lambda:
INTERFACES["nsIResProtocolHandler"]}, INTERFACES["nsIResProtocolHandler"]},
"nsIStyleSheetService": u"nsIStyleSheetService":
{"xpcom_map": {"xpcom_map":
lambda: lambda:
INTERFACES["nsIStyleSheetService"]}, INTERFACES["nsIStyleSheetService"]},
"nsIStringBundleService": u"nsIStringBundleService":
{"xpcom_map": {"xpcom_map":
lambda: lambda:
INTERFACES["nsIStringBundleService"]}, INTERFACES["nsIStringBundleService"]},
"nsIWindowMediator": u"nsIWindowMediator":
{"xpcom_map": {"xpcom_map":
lambda: lambda:
INTERFACES["nsIWindowMediator"]}, INTERFACES["nsIWindowMediator"]},
"nsIWindowWatcher": u"nsIWindowWatcher":
{"xpcom_map": {"xpcom_map":
lambda: lambda:
INTERFACES["nsIWindowWatcher"]}, INTERFACES["nsIWindowWatcher"]},
"nsIProcess": u"nsIProcess":
{"dangerous": True}, {"dangerous": True},
"nsIDOMGeoGeolocation": u"nsIDOMGeoGeolocation":
{"dangerous": True}, {"dangerous": True},
"nsIX509CertDB": u"nsIX509CertDB":
{"dangerous": True}, {"dangerous": True},
"mozIJSSubScriptLoader": u"mozIJSSubScriptLoader":
{"dangerous": True}}}}}, {"dangerous": True}}}}},
"extensions": {"dangerous": True}, u"extensions": {"dangerous": True},
"xpcnativewrappers": {"dangerous": True}, u"xpcnativewrappers": {"dangerous": True},
"XMLHttpRequest": u"XMLHttpRequest":
{"value": {"value":
{"open": {"dangerous": {u"open": {"dangerous":
# Ban syncrhonous XHR by making sure the third arg # Ban syncrhonous XHR by making sure the third arg
# is absent and false. # is absent and false.
lambda a, t: lambda a, t:
@ -241,7 +241,7 @@ GLOBAL_ENTITIES = {
"connections."}}}, "connections."}}},
# Global properties are inherently read-only, though this formalizes it. # Global properties are inherently read-only, though this formalizes it.
"Infinity": {"readonly": True}, u"Infinity": {"readonly": True},
"NaN": {"readonly": True}, u"NaN": {"readonly": True},
"undefined": {"readonly": True}, u"undefined": {"readonly": True},
} }

Просмотреть файл

@ -1,3 +1,4 @@
import codecs
import json import json
import os import os
import re import re
@ -7,7 +8,7 @@ from cStringIO import StringIO
from validator.constants import SPIDERMONKEY_INSTALLATION from validator.constants import SPIDERMONKEY_INSTALLATION
from validator.contextgenerator import ContextGenerator from validator.contextgenerator import ContextGenerator
from validator.textfilter import * import validator.unicodehelper as unicodehelper
JS_ESCAPE = re.compile("\\\\+[ux]", re.I) JS_ESCAPE = re.compile("\\\\+[ux]", re.I)
@ -81,59 +82,21 @@ def prepare_code(code, err, filename):
# slash: a character is necessary to prevent bad identifier errors # slash: a character is necessary to prevent bad identifier errors
code = JS_ESCAPE.sub("u", code) code = JS_ESCAPE.sub("u", code)
encoding = None code = unicodehelper.decode(code)
try:
code = unicode(code) # Make sure we can get a Unicode representation
code = strip_weird_chars(code, err=err, name=filename)
except UnicodeDecodeError:
# If it's not an easily decodeable encoding, detect it and decode that
code = filter_ascii(code)
return code return code
def strip_weird_chars(chardata, err=None, name=""):
line_num = 1
out_code = StringIO()
has_warned_ctrlchar = False
for line in chardata.split("\n"):
charpos = 0
for char in line:
if is_standard_ascii(char):
out_code.write(char)
else:
if not has_warned_ctrlchar and err is not None:
err.warning(("testcases_scripting",
"_get_tree",
"control_char_filter"),
"Invalid control character in JS file",
"An invalid character (ASCII 0-31, except CR "
"and LF) has been found in a JS file. These "
"are considered unsafe and should be removed.",
filename=name,
line=line_num,
column=charpos,
context=ContextGenerator(chardata))
has_warned_ctrlchar = True
charpos += 1
out_code.write("\n")
line_num += 1
return out_code.getvalue()
def _get_tree(code, shell=SPIDERMONKEY_INSTALLATION): def _get_tree(code, shell=SPIDERMONKEY_INSTALLATION):
"Returns an AST tree of the JS passed in `code`." "Returns an AST tree of the JS passed in `code`."
if not code: if not code:
return None return None
temp = tempfile.NamedTemporaryFile(mode="w+", delete=False) code = unicodehelper.decode(code)
temp.write(code)
temp = tempfile.NamedTemporaryFile(mode="w+b", delete=False)
#temp.write(codecs.BOM_UTF8)
temp.write(code.encode("utf_8"))
temp.flush() temp.flush()
data = """try{ data = """try{
@ -147,7 +110,7 @@ def _get_tree(code, shell=SPIDERMONKEY_INSTALLATION):
}""" % json.dumps(temp.name) }""" % json.dumps(temp.name)
try: try:
cmd = [shell, "-e", data] cmd = [shell, "-e", data, "-U"]
try: try:
shell_obj = subprocess.Popen(cmd, shell_obj = subprocess.Popen(cmd,
shell=False, shell=False,
@ -171,11 +134,7 @@ def _get_tree(code, shell=SPIDERMONKEY_INSTALLATION):
if not data: if not data:
raise JSReflectException("Reflection failed") raise JSReflectException("Reflection failed")
try: data = unicodehelper.decode(data)
data = unicode(data)
except UnicodeDecodeError:
data = unicode(filter_ascii(data))
parsed = json.loads(data, strict=False) parsed = json.loads(data, strict=False)
if "error" in parsed and parsed["error"]: if "error" in parsed and parsed["error"]:

Просмотреть файл

@ -40,12 +40,14 @@ class MockBundler:
# Increment the message counter # Increment the message counter
self.message_count += 1 self.message_count += 1
self.ids.append(id) self.ids.append(err_id)
error = unicode(error)
print "-" * 30 print "-" * 30
print error print error.encode("ascii", "replace")
print "~" * len(error) print "~" * len(error)
if isinstance(description, str): if isinstance(description, types.StringTypes):
print description print description
else: else:
# Errors can have multiple lines # Errors can have multiple lines
@ -105,12 +107,14 @@ class Traverser:
output = data output = data
if isinstance(data, JSObject) or isinstance(data, JSContext): if isinstance(data, JSObject) or isinstance(data, JSContext):
output = data.output() output = data.output()
print ". " * self.debug_level + output
output = unicode(output)
print ". " * self.debug_level + output.encode("ascii", "replace")
def run(self, data): def run(self, data):
if DEBUG: if DEBUG:
x = open("/tmp/output.js", "w") x = open("/tmp/output.js", "w")
x.write(str(data)) x.write(unicode(data))
x.close() x.close()
if "type" not in data or not self._can_handle_node(data["type"]): if "type" not in data or not self._can_handle_node(data["type"]):
@ -189,7 +193,7 @@ class Traverser:
if action is not None: if action is not None:
action_result = action(self, node) action_result = action(self, node)
self._debug("ACTION>>%s (%s)" % self._debug("ACTION>>%s (%s)" %
("halt>>%s" % str(action_result) if ("halt>>%s" % unicode(action_result) if
action_result else action_result else
"continue", "continue",
node["type"])) node["type"]))
@ -350,7 +354,8 @@ class Traverser:
"_build_global", "_build_global",
"dangerous_global"), "dangerous_global"),
"Dangerous Global Object", "Dangerous Global Object",
[dang if isinstance(dang, str) else [dang if
isinstance(dang, types.StringTypes) else
"A dangerous or banned global object was " "A dangerous or banned global object was "
"accessed by some JavaScript code.", "accessed by some JavaScript code.",
"Accessed object: %s" % name], "Accessed object: %s" % name],

Просмотреть файл

@ -1,4 +1,3 @@
import re import re
try: try:
from HTMLParser import HTMLParser from HTMLParser import HTMLParser
@ -6,6 +5,7 @@ except ImportError: # pragma: no cover
from html.parser import HTMLParser from html.parser import HTMLParser
import validator.testcases.scripting as scripting import validator.testcases.scripting as scripting
import validator.unicodehelper as unicodehelper
from validator.testcases.markup import csstester from validator.testcases.markup import csstester
from validator.contextgenerator import ContextGenerator from validator.contextgenerator import ContextGenerator
from validator.constants import * from validator.constants import *
@ -51,7 +51,7 @@ class MarkupParser(HTMLParser):
self.xml_state = [] self.xml_state = []
self.xml_buffer = [] self.xml_buffer = []
self.reported = {} self.reported = set()
def process(self, filename, data, extension="xul"): def process(self, filename, data, extension="xul"):
"""Processes data by splitting it into individual lines, then """Processes data by splitting it into individual lines, then
@ -61,7 +61,7 @@ class MarkupParser(HTMLParser):
self.filename = filename self.filename = filename
self.extension = extension self.extension = extension
self.reported = {} self.reported = set()
self.context = ContextGenerator(data) self.context = ContextGenerator(data)
@ -100,6 +100,8 @@ class MarkupParser(HTMLParser):
try: try:
self.feed(line + "\n") self.feed(line + "\n")
except UnicodeDecodeError:
raise
except Exception as inst: except Exception as inst:
if DEBUG: # pragma: no cover if DEBUG: # pragma: no cover
print self.xml_state, inst print self.xml_state, inst
@ -107,7 +109,7 @@ class MarkupParser(HTMLParser):
if "markup" in self.reported: if "markup" in self.reported:
return return
if "script" in self.xml_state or ( if ("script" in self.xml_state or
self.debug and "testscript" in self.xml_state): self.debug and "testscript" in self.xml_state):
if "script_comments" in self.reported or not self.strict: if "script_comments" in self.reported or not self.strict:
return return
@ -122,7 +124,7 @@ class MarkupParser(HTMLParser):
self.filename, self.filename,
line=self.line, line=self.line,
context=self.context) context=self.context)
self.reported["script_comments"] = True self.reported.add("script_comments")
return return
if self.strict: if self.strict:
@ -136,7 +138,7 @@ class MarkupParser(HTMLParser):
self.filename, self.filename,
line=self.line, line=self.line,
context=self.context) context=self.context)
self.reported["markup"] = True self.reported.add("markup")
def handle_startendtag(self, tag, attrs): def handle_startendtag(self, tag, attrs):
# Self closing tags don't have an end tag, so we want to # Self closing tags don't have an end tag, so we want to
@ -154,7 +156,7 @@ class MarkupParser(HTMLParser):
self_closing = tag in SELF_CLOSING_TAGS self_closing = tag in SELF_CLOSING_TAGS
if DEBUG: # pragma: no cover if DEBUG: # pragma: no cover
print self.xml_state, tag, self_closing print "S: ", self.xml_state, tag, self_closing
# A fictional tag for testing purposes. # A fictional tag for testing purposes.
if tag == "xbannedxtestx": if tag == "xbannedxtestx":
@ -286,17 +288,19 @@ class MarkupParser(HTMLParser):
return return
self.xml_state.append(tag) self.xml_state.append(tag)
self.xml_buffer.append("") self.xml_buffer.append(unicode(""))
def handle_endtag(self, tag): def handle_endtag(self, tag):
tag = tag.lower() tag = tag.lower()
if DEBUG: # pragma: no cover if DEBUG: # pragma: no cover
print tag, self.xml_state print "E: ", tag, self.xml_state
if not self.xml_state: if not self.xml_state:
if "closing_tags" in self.reported or not self.strict: if "closing_tags" in self.reported or not self.strict:
if DEBUG:
print "Unstrict; extra closing tags ------"
return return
self.err.warning(("testcases_markup_markuptester", self.err.warning(("testcases_markup_markuptester",
"handle_endtag", "handle_endtag",
@ -307,16 +311,18 @@ class MarkupParser(HTMLParser):
self.filename, self.filename,
line=self.line, line=self.line,
context=self.context) context=self.context)
self.reported["closing_tags"] = True self.reported.add("closing_tags")
if DEBUG: # pragma: no cover if DEBUG: # pragma: no cover
print "Too many closing tags ------" print "Too many closing tags ------"
return return
elif "script" in self.xml_state: elif "script" in self.xml_state[:-1]:
# If we're in a script tag, nothing else matters. Just rush # If we're in a script tag, nothing else matters. Just rush
# everything possible into the xml buffer. # everything possible into the xml buffer.
self._save_to_buffer("</" + tag + ">") self._save_to_buffer("</" + tag + ">")
if DEBUG:
print "Markup as text in script ------"
return return
elif tag not in self.xml_state: elif tag not in self.xml_state:
@ -344,6 +350,8 @@ class MarkupParser(HTMLParser):
# classifies as a self-closing tag, we just recursively close # classifies as a self-closing tag, we just recursively close
# down to the level of the tag we're actualy closing. # down to the level of the tag we're actualy closing.
if old_state != tag and old_state in SELF_CLOSING_TAGS: if old_state != tag and old_state in SELF_CLOSING_TAGS:
if DEBUG:
print "Self closing tag cascading down ------"
return self.handle_endtag(tag) return self.handle_endtag(tag)
# If this is an XML-derived language, everything must nest # If this is an XML-derived language, everything must nest
@ -365,7 +373,10 @@ class MarkupParser(HTMLParser):
if DEBUG: # pragma: no cover if DEBUG: # pragma: no cover
print "Invalid markup nesting ------" print "Invalid markup nesting ------"
data_buffer = data_buffer.strip()
# Perform analysis on collected data. # Perform analysis on collected data.
if data_buffer:
if tag == "script": if tag == "script":
scripting.test_js_snippet(self.err, scripting.test_js_snippet(self.err,
data_buffer, data_buffer,
@ -413,6 +424,8 @@ class MarkupParser(HTMLParser):
if not self.xml_buffer: if not self.xml_buffer:
return return
data = unicodehelper.decode(data)
self.xml_buffer[-1] += data self.xml_buffer[-1] += data
def _format_args(self, args): def _format_args(self, args):

Просмотреть файл

@ -0,0 +1,55 @@
import codecs
import textfilter
# Many thanks to nmaier for inspiration and code in this module
UNICODES = [
(codecs.BOM_UTF8, "utf-8"),
(codecs.BOM_UTF32_LE, "utf-32-le"),
(codecs.BOM_UTF32_BE, "utf-32-be"),
(codecs.BOM_UTF16_LE, "utf-16-le"),
(codecs.BOM_UTF16_BE, "utf-16-be"),
]
COMMON_ENCODINGS = ("utf-16", "latin_1", "ascii")
def decode(data):
"""
Decode data employing some charset detection and including unicode BOM
stripping.
"""
# Don't make more work than we have to.
if not isinstance(data, str):
return data
# Detect standard unicodes.
for bom, encoding in UNICODES:
if data.startswith(bom):
return unicode(data[len(bom):], encoding, "ignore")
# Try straight UTF-8
try:
return unicode(data, "utf-8")
except:
pass
# Test for latin_1, because it can be matched as UTF-16
# Somewhat of a hack, but it works and is about a thousand times faster
# than using chardet.
if all(ord(c) < 256 for c in data):
try:
return unicode(data, "latin_1")
except:
pass
# Test for various common encodings.
for encoding in COMMON_ENCODINGS:
try:
return unicode(data, encoding)
except UnicodeDecodeError:
pass
# Anything else gets filtered.
return unicode(textfilter.filter_ascii(data), errors="replace")