Merged encoding fixes into codebase

This commit is contained in:
Matt Basta 2011-04-21 17:11:17 +00:00
Родитель 4b6ff350b0 378c28d910
Коммит 6e2e1fd6b4
31 изменённых файлов: 357 добавлений и 225 удалений

Двоичные данные
tests/resources/bug_621360.js

Двоичный файл не отображается.

Двоичные данные
tests/resources/controlchars.js Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -0,0 +1 @@
function test() {}

Двоичные данные
tests/resources/controlchars/controlchars_ascii_warn.js Normal file

Двоичный файл не отображается.

Двоичные данные
tests/resources/controlchars/controlchars_bad.js Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -0,0 +1 @@
function täst() {}

Двоичные данные
tests/resources/controlchars/controlchars_utf-8_warn.js Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -0,0 +1 @@
täst

Двоичные данные
tests/resources/unicodehelper/utf-16be.txt Normal file

Двоичный файл не отображается.

Двоичные данные
tests/resources/unicodehelper/utf-16le.txt Normal file

Двоичный файл не отображается.

Двоичные данные
tests/resources/unicodehelper/utf-32be.txt Normal file

Двоичный файл не отображается.

Двоичные данные
tests/resources/unicodehelper/utf-32le.txt Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -0,0 +1 @@
täst

Просмотреть файл

@ -0,0 +1 @@
täst

Просмотреть файл

@ -1,20 +0,0 @@
import os
import validator.testcases.scripting
def _do_test(path):
"Performs a test on a JS file"
script = open(path).read()
err = validator.testcases.scripting.traverser.MockBundler()
validator.testcases.scripting.test_js_file(err, path, script)
return err
def test_control_chars():
"Tests that control characters throw a single error"
err = _do_test("tests/resources/bug_621360.js")
# There should be a single error.
print err.message_count
assert err.message_count == 1

Просмотреть файл

@ -0,0 +1,47 @@
import os
import validator.unicodehelper
import validator.testcases.scripting
# Originated from bug 626496
def _do_test(path):
"Performs a test on a JS file"
script = validator.unicodehelper.decode(open(path, "rb").read())
print script.encode("ascii", "replace")
err = validator.testcases.scripting.traverser.MockBundler()
validator.testcases.scripting.test_js_file(err, path, script)
print err.ids
return err
def test_controlchars_ascii_ok():
"""Tests that multi-byte characters are decoded properly (utf-8)"""
errs = _do_test("tests/resources/controlchars/controlchars_ascii_ok.js")
assert len(errs.ids) == 0
def test_controlchars_ascii_warn():
"""Tests that multi-byte characters are decoded properly (utf-8)
but remaining non ascii characters raise warnings"""
errs = _do_test("tests/resources/controlchars/controlchars_ascii_warn.js")
assert len(errs.ids) == 1
assert errs.ids[0][2] == "syntax_error"
def test_controlchars_utf8_ok():
"""Tests that multi-byte characters are decoded properly (utf-8)"""
errs = _do_test("tests/resources/controlchars/controlchars_utf-8_ok.js")
assert len(errs.ids) == 0
def test_controlchars_utf8_warn():
"""Tests that multi-byte characters are decoded properly (utf-8)
but remaining non ascii characters raise warnings"""
errs = _do_test("tests/resources/controlchars/controlchars_utf-8_warn.js")
assert len(errs.ids) == 1
assert errs.ids[0][2] == "syntax_error"

Просмотреть файл

@ -1,24 +1,26 @@
# -*- coding: utf-8 -*-
import validator.testcases.markup.markuptester as markuptester
from validator.errorbundler import ErrorBundle
from validator.constants import *
def _do_test(path, should_fail=False, type_=None):
return _do_test_raw(open(path).read(),
path,
should_fail,
type_)
markup_file = open(path)
data = markup_file.read()
markup_file.close()
def _do_test_raw(data, path, should_fail=False, type_=None):
filename = path.split("/")[-1]
extension = filename.split(".")[-1]
err = ErrorBundle(None, True)
err = ErrorBundle()
if type_:
err.set_type(type_)
parser = markuptester.MarkupParser(err, debug=True)
parser.process(filename, data, extension)
err.print_summary(True)
print err.print_summary(verbose=True)
if should_fail:
assert err.failed()
@ -31,7 +33,7 @@ def _do_test(path, should_fail=False, type_=None):
def test_local_url_detector():
"Tests that local URLs can be detected."
err = ErrorBundle(None, True)
err = ErrorBundle()
mp = markuptester.MarkupParser(err)
tester = mp._is_url_local
@ -135,3 +137,18 @@ def test_invalid_markup():
result = _do_test("tests/resources/markup/markuptester/bad_script.xml",
False)
assert result.notices
def test_self_closing_scripts():
"""Tests that self-closing script tags are not deletrious to parsing"""
_do_test_raw("""
<foo>
<script type="text/javascript"/>
<list_item undecodable=" _ " />
<list_item />
<list_item />
</foo>
""", "foo.js")

Просмотреть файл

@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-
import nose
import os
import validator.unicodehelper as unicodehelper
COMPARISON = "täst".decode("utf-8")
def _do_test(path):
"Performs a test on a JS file"
text = open(path).read()
utext = unicodehelper.decode(text)
print utext.encode("ascii", "backslashreplace")
nose.tools.eq_(utext, COMPARISON)
def test_latin1():
"Tests utf-8 encoding is properly decoded"
_do_test("tests/resources/unicodehelper/latin_1.txt")
def test_utf8():
"Tests utf-8 w/o BOM encoding is properly decoded"
_do_test("tests/resources/unicodehelper/utf-8.txt")
def test_utf8():
"Tests utf-8 with BOM encoding is properly decoded"
_do_test("tests/resources/unicodehelper/utf-8-bom.txt")
def test_utf16le():
"Tests utf-16 Little Endian encoding is properly decoded"
_do_test("tests/resources/unicodehelper/utf-16le.txt")
def test_utf16be():
"Tests utf-16 Big Endian encoding is properly decoded"
_do_test("tests/resources/unicodehelper/utf-16be.txt")
def test_utf32le():
"Tests utf-32 Little Endian encoding is properly decoded"
_do_test("tests/resources/unicodehelper/utf-32le.txt")
def test_utf32be():
"Tests utf-32 Big Endian encoding is properly decoded"
_do_test("tests/resources/unicodehelper/utf-32be.txt")

Просмотреть файл

@ -1,6 +1,5 @@
from StringIO import StringIO
import textfilter
import unicodehelper
class ContextGenerator:
@ -83,6 +82,6 @@ class ContextGenerator:
data = "%s ..." % data[:140]
data = "%s%s" % (raw_data[0:with_ws - line_length], data)
data = textfilter.filter_ascii(data)
data = unicodehelper.decode(data)
return data

Просмотреть файл

@ -3,7 +3,7 @@ import uuid
from StringIO import StringIO
from outputhandlers.shellcolors import OutputHandler
from textfilter import filter_ascii
import unicodehelper
class ErrorBundle(object):
@ -110,8 +110,8 @@ class ErrorBundle(object):
else:
message["context"] = None
message["message"] = filter_ascii(message["message"])
message["description"] = filter_ascii(message["description"])
message["message"] = unicodehelper.decode(message["message"])
message["description"] = unicodehelper.decode(message["description"])
stack.append(message)

Просмотреть файл

@ -85,7 +85,7 @@ def main():
# Print the output of the tests based on the requested format.
if args.output == "text":
print error_bundle.print_summary(verbose=args.verbose,
no_color=args.boring)
no_color=args.boring).encode("utf-8")
elif args.output == "json":
sys.stdout.write(error_bundle.render_json())

Просмотреть файл

@ -6,6 +6,7 @@ from StringIO import StringIO
from validator.contextgenerator import ContextGenerator
from validator import decorator
from validator import submain as testendpoint_validator
from validator import unicodehelper
import validator.testcases.markup.markuptester as testendpoint_markup
import validator.testcases.markup.csstester as testendpoint_css
import validator.testcases.scripting as testendpoint_js
@ -165,9 +166,8 @@ def test_packed_packages(err, package_contents=None, xpi_package=None):
if not file_data:
continue
# Skip BOMs and the like
while not is_standard_ascii(file_data[0]):
file_data = file_data[1:]
# Convert the file data to unicode
file_data = unicodehelper.decode(file_data)
if data["extension"] == "css":
testendpoint_css.test_css_file(err,

Просмотреть файл

@ -291,12 +291,14 @@ def _call_expression(traverser, node):
result = dangerous(a=args, t=t)
if result:
# Generate a string representation of the params
params = ", ".join([str(t(p).get_literal_value()) for p in args])
params = u", ".join([unicode(t(p).get_literal_value()) for
p in args])
traverser.err.warning(("testcases_javascript_actions",
"_call_expression",
"called_dangerous_global"),
"Global called in dangerous manner",
result if isinstance(result, str) else
result if isinstance(result,
types.StringTypes) else
"A global function was called using a set "
"of dangerous parameters. These parameters "
"have been disallowed.",
@ -418,10 +420,10 @@ def _expr_assignment(traverser, node):
if lit_right is None:
lit_right = 0
if isinstance(lit_left, (str, unicode)) or \
isinstance(lit_right, (str, unicode)):
lit_left = str(lit_left)
lit_right = str(lit_right)
if isinstance(lit_left, types.StringTypes) or \
isinstance(lit_right, types.StringTypes):
lit_left = unicode(lit_left)
lit_right = unicode(lit_right)
gleft = _get_as_num(left)
gright = _get_as_num(right)
@ -446,8 +448,8 @@ def _expr_assignment(traverser, node):
traverser.debug_level -= 1
return left
traverser._debug("ASSIGNMENT::LEFT>>%s" % str(left.is_global))
traverser._debug("ASSIGNMENT::RIGHT>>%s" % str(operators[token]()))
traverser._debug("ASSIGNMENT::LEFT>>%s" % unicode(left.is_global))
traverser._debug("ASSIGNMENT::RIGHT>>%s" % unicode(operators[token]()))
left.set_value(operators[token](), traverser=traverser)
traverser.debug_level -= 1
return left
@ -469,7 +471,7 @@ def _expr_binary(traverser, node):
left = traverser._traverse_node(node["left"])
if not isinstance(left, JSWrapper):
left = JSWrapper(left, traverser=traverser)
traverser._debug(str(left.dirty))
traverser._debug(unicode(left.dirty))
traverser.debug_level -= 1
@ -479,7 +481,7 @@ def _expr_binary(traverser, node):
right = traverser._traverse_node(node["right"])
if not isinstance(right, JSWrapper):
right = JSWrapper(right, traverser=traverser)
traverser._debug(str(right.dirty))
traverser._debug(unicode(right.dirty))
if left.dirty:
return left
@ -589,7 +591,7 @@ def _get_as_num(value):
return False
try:
if isinstance(value, str):
if isinstance(value, types.StringTypes):
return float(value)
elif isinstance(value, int) or isinstance(value, float):
return value

Просмотреть файл

@ -9,7 +9,7 @@ traverser
node
the current node being evaluated
"""
import types
from jstypes import *
@ -21,10 +21,11 @@ def createElement(args, traverser, node):
simple_args = [traverser._traverse_node(a) for a in args]
if str(simple_args[0].get_literal_value()).lower() == "script":
if unicode(simple_args[0].get_literal_value()).lower() == u"script":
_create_script_tag(traverser)
elif not (simple_args[0].is_literal() or
isinstance(simple_args[0].get_literal_value(), str)):
isinstance(simple_args[0].get_literal_value(),
types.StringTypes)):
_create_variable_element(traverser)
@ -36,10 +37,11 @@ def createElementNS(args, traverser, node):
simple_args = [traverser._traverse_node(a) for a in args]
if "script" in str(simple_args[1].get_literal_value()).lower():
if "script" in unicode(simple_args[1].get_literal_value()).lower():
_create_script_tag(traverser)
elif not (simple_args[1].is_literal() or
isinstance(simple_args[1].get_literal_value(), str)):
isinstance(simple_args[1].get_literal_value(),
types.StringTypes)):
_create_variable_element(traverser)
@ -115,7 +117,7 @@ def setAttribute(args, traverser, node):
simple_args = [traverser._traverse_node(a) for a in args]
if str(simple_args[0].get_literal_value()).lower().startswith("on"):
if unicode(simple_args[0].get_literal_value()).lower().startswith("on"):
traverser.err.notice(
err_id=("testcases_javascript_instanceactions", "setAttribute",
"setting_on*"),

Просмотреть файл

@ -1,10 +1,13 @@
import re
import types
import jstypes
def set_innerHTML(new_value, traverser):
"Tests that values being assigned to innerHTML are not dangerous"
if not isinstance(new_value, jstypes.JSWrapper):
new_value = jstypes.JSWrapper(new_value, traverser=traverser)
literal_value = new_value.get_literal_value()
if isinstance(literal_value, types.StringTypes):
# Static string assignments
@ -16,9 +19,11 @@ def set_innerHTML(new_value, traverser):
err_id=("testcases_javascript_instancetypes", "set_innerHTML",
"event_assignment"),
warning="Event handler assignment via innerHTML",
description="When assigning event handlers, innerHTML "
"should never be used. Rather, use a "
"proper technique, like addEventListener.",
description=["When assigning event handlers, innerHTML "
"should never be used. Rather, use a "
"proper technique, like addEventListener.",
"Event handler code: %s" %
literal_value.encode("ascii", "replace")],
filename=traverser.filename,
line=traverser.line,
column=traverser.position,

Просмотреть файл

@ -19,7 +19,7 @@ class JSObject(object):
def get(self, name):
"Returns the value associated with a property name"
name = str(name)
name = unicode(name)
return self.data[name] if name in self.data else None
def get_literal_value(self):
@ -36,11 +36,11 @@ class JSObject(object):
self.data[name] = value
def has_var(self, name):
name = str(name)
name = unicode(name)
return name in self.data
def output(self):
return str(self.data)
return unicode(self.data)
class JSContext(JSObject):
@ -56,7 +56,7 @@ class JSContext(JSObject):
def output(self):
output = {}
for (name, item) in self.data.items():
output[name] = str(item)
output[name] = unicode(item)
return json.dumps(output)
@ -273,7 +273,7 @@ class JSWrapper(object):
def __str__(self):
"""Returns a textual version of the object."""
return str(self.get_literal_value())
return unicode(self.get_literal_value())
class JSLiteral(JSObject):
@ -309,7 +309,7 @@ class JSPrototype(JSObject):
def get(self, name):
"Enables static analysis of `with` statements"
name = str(name)
name = unicode(name)
output = None
if name in self.data:
output = self.data[name]
@ -351,7 +351,7 @@ class JSArray(JSObject):
# Interestingly enough, this allows for things like:
# x = [4]
# y = x * 3 // y = 12 since x equals "4"
return ",".join([str(w.get_literal_value()) for w in self.elements])
return u",".join([unicode(w.get_literal_value()) for w in self.elements])
def set(self, index, value, traverser=None):
"""Follow the rules of JS for creating an array"""

Просмотреть файл

@ -15,9 +15,9 @@ BANNED_IDENTIFIERS = ("newThread", )
# "True", except the string will be outputted when the error is thrown.
INTERFACES = {
"nsICategoryManager":
u"nsICategoryManager":
{"value":
{"addCategoryEntry":
{u"addCategoryEntry":
{"dangerous":
lambda a, t, e:
e.get_resource("em:bootstrap") and \
@ -28,33 +28,33 @@ INTERFACES = {
"Authors of bootstrapped add-ons must take care "
"to cleanup any added category entries "
"at shutdown")}}},
"nsIComponentRegistrar":
u"nsIComponentRegistrar":
{"value":
{"autoRegister":
{u"autoRegister":
{"dangerous":
lambda a, t, e:
e.get_resource("em:bootstrap") and \
"Bootstrapped add-ons may not register "
"chrome manifest files"},
"registerFactory":
u"registerFactory":
{"dangerous":
lambda a, t, e:
e.get_resource("em:bootstrap") and \
"Authors of bootstrapped add-ons must take care "
"to cleanup any component registrations "
"at shutdown"}}},
"nsIObserverService":
u"nsIObserverService":
{"value":
{"addObserver":
{u"addObserver":
{"dangerous":
lambda a, t, e:
e.get_resource("em:bootstrap") and \
"Authors of bootstrapped add-ons must take care "
"to remove any added observers "
"at shutdown"}}},
"nsIResProtocolHandler":
u"nsIResProtocolHandler":
{"value":
{"setSubstitution":
{u"setSubstitution":
{"dangerous":
lambda a, t, e:
e.get_resource("em:bootstrap") and \
@ -64,30 +64,30 @@ INTERFACES = {
"Authors of bootstrapped add-ons must take care "
"to cleanup any added resource substitutions "
"at shutdown"}}},
"nsIStringBundleService":
u"nsIStringBundleService":
{"value":
{"createStringBundle":
{u"createStringBundle":
{"dangerous":
lambda a, t, e:
e.get_resource("em:bootstrap") and \
"Authors of bootstrapped add-ons must take care "
"to flush the string bundle cache at shutdown"},
"createExtensibleBundle":
u"createExtensibleBundle":
{"dangerous":
lambda a, t, e:
e.get_resource("em:bootstrap") and \
"Authors of bootstrapped add-ons must take care "
"to flush the string bundle cache at shutdown"}}},
"nsIStyleSheetService":
u"nsIStyleSheetService":
{"value":
{"loadAndRegisterSheet":
{u"loadAndRegisterSheet":
{"dangerous":
lambda a, t, e:
e.get_resource("em:bootstrap") and \
"Authors of bootstrapped add-ons must take care "
"to unregister any registered stylesheets "
"at shutdown"}}},
"nsIWindowMediator":
u"nsIWindowMediator":
{"value":
{"registerNotification":
{"dangerous":
@ -96,9 +96,9 @@ INTERFACES = {
"Authors of bootstrapped add-ons must take care "
"to remove any added observers "
"at shutdown"}}},
"nsIWindowWatcher":
u"nsIWindowWatcher":
{"value":
{"addListener":
{u"addListener":
{"dangerous":
lambda a, t, e:
e.get_resource("em:bootstrap") and \
@ -109,126 +109,126 @@ INTERFACES = {
# GLOBAL_ENTITIES is also representative of the `window` object.
GLOBAL_ENTITIES = {
"window": {"value": lambda: GLOBAL_ENTITIES},
"document":
{"value": {"createElement":
u"window": {"value": lambda: GLOBAL_ENTITIES},
u"document":
{"value": {u"createElement":
{"dangerous":
lambda a, t: t(a[0]).get_literal_value()
.lower() == "script"},
"createElementNS":
u"createElementNS":
{"dangerous":
lambda a, t: t(a[0]).get_literal_value()
.lower() == "script"}}},
# The nefariuos timeout brothers!
"setTimeout": {"dangerous": actions._call_settimeout},
"setInterval": {"dangerous": actions._call_settimeout},
u"setTimeout": {"dangerous": actions._call_settimeout},
u"setInterval": {"dangerous": actions._call_settimeout},
"encodeURI": {"readonly": True},
"decodeURI": {"readonly": True},
"encodeURIComponent": {"readonly": True},
"decodeURIComponent": {"readonly": True},
"escape": {"readonly": True},
"unescape": {"readonly": True},
"isFinite": {"readonly": True},
"isNaN": {"readonly": True},
"parseFloat": {"readonly": True},
"parseInt": {"readonly": True},
u"encodeURI": {"readonly": True},
u"decodeURI": {"readonly": True},
u"encodeURIComponent": {"readonly": True},
u"decodeURIComponent": {"readonly": True},
u"escape": {"readonly": True},
u"unescape": {"readonly": True},
u"isFinite": {"readonly": True},
u"isNaN": {"readonly": True},
u"parseFloat": {"readonly": True},
u"parseInt": {"readonly": True},
"eval": {"dangerous": True},
"Function": {"dangerous": True},
"Object": {"value": {"prototype": {"dangerous": True},
"constructor": # Just an experiment for now
{"value": lambda: GLOBAL_ENTITIES["Function"]}}},
"String": {"value": {"prototype": {"dangerous": True}}},
"Array": {"value": {"prototype": {"dangerous": True}}},
"Number": {"value": {"prototype": {"dangerous": True}}},
"Boolean": {"value": {"prototype": {"dangerous": True}}},
"RegExp": {"value": {"prototype": {"dangerous": True}}},
"Date": {"value": {"prototype": {"dangerous": True}}},
u"eval": {"dangerous": True},
u"Function": {"dangerous": True},
u"Object": {"value": {u"prototype": {"dangerous": True},
u"constructor": # Just an experiment for now
{"value": lambda: GLOBAL_ENTITIES["Function"]}}},
u"String": {"value": {u"prototype": {"dangerous": True}}},
u"Array": {"value": {u"prototype": {"dangerous": True}}},
u"Number": {"value": {u"prototype": {"dangerous": True}}},
u"Boolean": {"value": {u"prototype": {"dangerous": True}}},
u"RegExp": {"value": {u"prototype": {"dangerous": True}}},
u"Date": {"value": {u"prototype": {"dangerous": True}}},
"Math": {"readonly": True},
u"Math": {"readonly": True},
"netscape":
{"value": {"security":
{"value": {"PrivilegeManager":
{"value": {"enablePrivilege":
u"netscape":
{"value": {u"security":
{"value": {u"PrivilegeManager":
{"value": {u"enablePrivilege":
{"dangerous": True}}}}}}},
"navigator":
{"value": {"wifi": {"dangerous": True},
"geolocation": {"dangerous": True}}},
u"navigator":
{"value": {u"wifi": {"dangerous": True},
u"geolocation": {"dangerous": True}}},
"Components":
u"Components":
{"readonly": True,
"value":
{"classes":
{u"classes":
{"xpcom_wildcard": True,
"value":
{"createInstance":
{u"createInstance":
{"return": call_definitions.xpcom_constructor("createInstance")},
"getService":
u"getService":
{"return": call_definitions.xpcom_constructor("getService")}}},
"utils":
{"value": {"evalInSandbox":
{"value": {u"evalInSandbox":
{"dangerous": True},
"import":
u"import":
{"dangerous":
lambda a, t:
a and \
str(t(a[0]).get_literal_value())
.count("ctypes.jsm")}}},
"interfaces":
{"value": {"nsIXMLHttpRequest":
unicode(t(a[0]).get_literal_value())
.count("ctypes.jsm")}}},
u"interfaces":
{"value": {u"nsIXMLHttpRequest":
{"xpcom_map":
lambda:
GLOBAL_ENTITIES["XMLHttpRequest"]},
"nsICategoryManager":
u"nsICategoryManager":
{"xpcom_map":
lambda:
INTERFACES["nsICategoryManager"]},
"nsIComponentRegistrar":
u"nsIComponentRegistrar":
{"xpcom_map":
lambda:
INTERFACES["nsIComponentRegistrar"]},
"nsIObserverService":
u"nsIObserverService":
{"xpcom_map":
lambda:
INTERFACES["nsIObserverService"]},
"nsIResProtocolHandler":
u"nsIResProtocolHandler":
{"xpcom_map":
lambda:
INTERFACES["nsIResProtocolHandler"]},
"nsIStyleSheetService":
u"nsIStyleSheetService":
{"xpcom_map":
lambda:
INTERFACES["nsIStyleSheetService"]},
"nsIStringBundleService":
u"nsIStringBundleService":
{"xpcom_map":
lambda:
INTERFACES["nsIStringBundleService"]},
"nsIWindowMediator":
u"nsIWindowMediator":
{"xpcom_map":
lambda:
INTERFACES["nsIWindowMediator"]},
"nsIWindowWatcher":
u"nsIWindowWatcher":
{"xpcom_map":
lambda:
INTERFACES["nsIWindowWatcher"]},
"nsIProcess":
u"nsIProcess":
{"dangerous": True},
"nsIDOMGeoGeolocation":
u"nsIDOMGeoGeolocation":
{"dangerous": True},
"nsIX509CertDB":
u"nsIX509CertDB":
{"dangerous": True},
"mozIJSSubScriptLoader":
u"mozIJSSubScriptLoader":
{"dangerous": True}}}}},
"extensions": {"dangerous": True},
"xpcnativewrappers": {"dangerous": True},
u"extensions": {"dangerous": True},
u"xpcnativewrappers": {"dangerous": True},
"XMLHttpRequest":
u"XMLHttpRequest":
{"value":
{"open": {"dangerous":
{u"open": {"dangerous":
# Ban syncrhonous XHR by making sure the third arg
# is absent and false.
lambda a, t:
@ -241,7 +241,7 @@ GLOBAL_ENTITIES = {
"connections."}}},
# Global properties are inherently read-only, though this formalizes it.
"Infinity": {"readonly": True},
"NaN": {"readonly": True},
"undefined": {"readonly": True},
u"Infinity": {"readonly": True},
u"NaN": {"readonly": True},
u"undefined": {"readonly": True},
}

Просмотреть файл

@ -1,3 +1,4 @@
import codecs
import json
import os
import re
@ -7,7 +8,7 @@ from cStringIO import StringIO
from validator.constants import SPIDERMONKEY_INSTALLATION
from validator.contextgenerator import ContextGenerator
from validator.textfilter import *
import validator.unicodehelper as unicodehelper
JS_ESCAPE = re.compile("\\\\+[ux]", re.I)
@ -81,59 +82,21 @@ def prepare_code(code, err, filename):
# slash: a character is necessary to prevent bad identifier errors
code = JS_ESCAPE.sub("u", code)
encoding = None
try:
code = unicode(code) # Make sure we can get a Unicode representation
code = strip_weird_chars(code, err=err, name=filename)
except UnicodeDecodeError:
# If it's not an easily decodeable encoding, detect it and decode that
code = filter_ascii(code)
code = unicodehelper.decode(code)
return code
def strip_weird_chars(chardata, err=None, name=""):
line_num = 1
out_code = StringIO()
has_warned_ctrlchar = False
for line in chardata.split("\n"):
charpos = 0
for char in line:
if is_standard_ascii(char):
out_code.write(char)
else:
if not has_warned_ctrlchar and err is not None:
err.warning(("testcases_scripting",
"_get_tree",
"control_char_filter"),
"Invalid control character in JS file",
"An invalid character (ASCII 0-31, except CR "
"and LF) has been found in a JS file. These "
"are considered unsafe and should be removed.",
filename=name,
line=line_num,
column=charpos,
context=ContextGenerator(chardata))
has_warned_ctrlchar = True
charpos += 1
out_code.write("\n")
line_num += 1
return out_code.getvalue()
def _get_tree(code, shell=SPIDERMONKEY_INSTALLATION):
"Returns an AST tree of the JS passed in `code`."
if not code:
return None
temp = tempfile.NamedTemporaryFile(mode="w+", delete=False)
temp.write(code)
code = unicodehelper.decode(code)
temp = tempfile.NamedTemporaryFile(mode="w+b", delete=False)
#temp.write(codecs.BOM_UTF8)
temp.write(code.encode("utf_8"))
temp.flush()
data = """try{
@ -147,7 +110,7 @@ def _get_tree(code, shell=SPIDERMONKEY_INSTALLATION):
}""" % json.dumps(temp.name)
try:
cmd = [shell, "-e", data]
cmd = [shell, "-e", data, "-U"]
try:
shell_obj = subprocess.Popen(cmd,
shell=False,
@ -171,11 +134,7 @@ def _get_tree(code, shell=SPIDERMONKEY_INSTALLATION):
if not data:
raise JSReflectException("Reflection failed")
try:
data = unicode(data)
except UnicodeDecodeError:
data = unicode(filter_ascii(data))
data = unicodehelper.decode(data)
parsed = json.loads(data, strict=False)
if "error" in parsed and parsed["error"]:

Просмотреть файл

@ -40,12 +40,14 @@ class MockBundler:
# Increment the message counter
self.message_count += 1
self.ids.append(id)
self.ids.append(err_id)
error = unicode(error)
print "-" * 30
print error
print error.encode("ascii", "replace")
print "~" * len(error)
if isinstance(description, str):
if isinstance(description, types.StringTypes):
print description
else:
# Errors can have multiple lines
@ -105,12 +107,14 @@ class Traverser:
output = data
if isinstance(data, JSObject) or isinstance(data, JSContext):
output = data.output()
print ". " * self.debug_level + output
output = unicode(output)
print ". " * self.debug_level + output.encode("ascii", "replace")
def run(self, data):
if DEBUG:
x = open("/tmp/output.js", "w")
x.write(str(data))
x.write(unicode(data))
x.close()
if "type" not in data or not self._can_handle_node(data["type"]):
@ -189,7 +193,7 @@ class Traverser:
if action is not None:
action_result = action(self, node)
self._debug("ACTION>>%s (%s)" %
("halt>>%s" % str(action_result) if
("halt>>%s" % unicode(action_result) if
action_result else
"continue",
node["type"]))
@ -350,7 +354,8 @@ class Traverser:
"_build_global",
"dangerous_global"),
"Dangerous Global Object",
[dang if isinstance(dang, str) else
[dang if
isinstance(dang, types.StringTypes) else
"A dangerous or banned global object was "
"accessed by some JavaScript code.",
"Accessed object: %s" % name],

Просмотреть файл

@ -1,4 +1,3 @@
import re
try:
from HTMLParser import HTMLParser
@ -6,6 +5,7 @@ except ImportError: # pragma: no cover
from html.parser import HTMLParser
import validator.testcases.scripting as scripting
import validator.unicodehelper as unicodehelper
from validator.testcases.markup import csstester
from validator.contextgenerator import ContextGenerator
from validator.constants import *
@ -51,7 +51,7 @@ class MarkupParser(HTMLParser):
self.xml_state = []
self.xml_buffer = []
self.reported = {}
self.reported = set()
def process(self, filename, data, extension="xul"):
"""Processes data by splitting it into individual lines, then
@ -61,7 +61,7 @@ class MarkupParser(HTMLParser):
self.filename = filename
self.extension = extension
self.reported = {}
self.reported = set()
self.context = ContextGenerator(data)
@ -100,6 +100,8 @@ class MarkupParser(HTMLParser):
try:
self.feed(line + "\n")
except UnicodeDecodeError:
raise
except Exception as inst:
if DEBUG: # pragma: no cover
print self.xml_state, inst
@ -107,8 +109,8 @@ class MarkupParser(HTMLParser):
if "markup" in self.reported:
return
if "script" in self.xml_state or (
self.debug and "testscript" in self.xml_state):
if ("script" in self.xml_state or
self.debug and "testscript" in self.xml_state):
if "script_comments" in self.reported or not self.strict:
return
self.err.notice(("testcases_markup_markuptester",
@ -122,7 +124,7 @@ class MarkupParser(HTMLParser):
self.filename,
line=self.line,
context=self.context)
self.reported["script_comments"] = True
self.reported.add("script_comments")
return
if self.strict:
@ -136,7 +138,7 @@ class MarkupParser(HTMLParser):
self.filename,
line=self.line,
context=self.context)
self.reported["markup"] = True
self.reported.add("markup")
def handle_startendtag(self, tag, attrs):
# Self closing tags don't have an end tag, so we want to
@ -154,7 +156,7 @@ class MarkupParser(HTMLParser):
self_closing = tag in SELF_CLOSING_TAGS
if DEBUG: # pragma: no cover
print self.xml_state, tag, self_closing
print "S: ", self.xml_state, tag, self_closing
# A fictional tag for testing purposes.
if tag == "xbannedxtestx":
@ -286,17 +288,19 @@ class MarkupParser(HTMLParser):
return
self.xml_state.append(tag)
self.xml_buffer.append("")
self.xml_buffer.append(unicode(""))
def handle_endtag(self, tag):
tag = tag.lower()
if DEBUG: # pragma: no cover
print tag, self.xml_state
print "E: ", tag, self.xml_state
if not self.xml_state:
if "closing_tags" in self.reported or not self.strict:
if DEBUG:
print "Unstrict; extra closing tags ------"
return
self.err.warning(("testcases_markup_markuptester",
"handle_endtag",
@ -307,16 +311,18 @@ class MarkupParser(HTMLParser):
self.filename,
line=self.line,
context=self.context)
self.reported["closing_tags"] = True
self.reported.add("closing_tags")
if DEBUG: # pragma: no cover
print "Too many closing tags ------"
return
elif "script" in self.xml_state:
elif "script" in self.xml_state[:-1]:
# If we're in a script tag, nothing else matters. Just rush
# everything possible into the xml buffer.
self._save_to_buffer("</" + tag + ">")
if DEBUG:
print "Markup as text in script ------"
return
elif tag not in self.xml_state:
@ -344,6 +350,8 @@ class MarkupParser(HTMLParser):
# classifies as a self-closing tag, we just recursively close
# down to the level of the tag we're actualy closing.
if old_state != tag and old_state in SELF_CLOSING_TAGS:
if DEBUG:
print "Self closing tag cascading down ------"
return self.handle_endtag(tag)
# If this is an XML-derived language, everything must nest
@ -365,17 +373,20 @@ class MarkupParser(HTMLParser):
if DEBUG: # pragma: no cover
print "Invalid markup nesting ------"
data_buffer = data_buffer.strip()
# Perform analysis on collected data.
if tag == "script":
scripting.test_js_snippet(self.err,
data_buffer,
self.filename,
self.line)
elif tag == "style":
csstester.test_css_file(self.err,
self.filename,
data_buffer,
self.line)
if data_buffer:
if tag == "script":
scripting.test_js_snippet(self.err,
data_buffer,
self.filename,
self.line)
elif tag == "style":
csstester.test_css_file(self.err,
self.filename,
data_buffer,
self.line)
def handle_data(self, data):
self._save_to_buffer(data)
@ -413,6 +424,8 @@ class MarkupParser(HTMLParser):
if not self.xml_buffer:
return
data = unicodehelper.decode(data)
self.xml_buffer[-1] += data
def _format_args(self, args):

Просмотреть файл

@ -0,0 +1,55 @@
import codecs
import textfilter
# Many thanks to nmaier for inspiration and code in this module
UNICODES = [
(codecs.BOM_UTF8, "utf-8"),
(codecs.BOM_UTF32_LE, "utf-32-le"),
(codecs.BOM_UTF32_BE, "utf-32-be"),
(codecs.BOM_UTF16_LE, "utf-16-le"),
(codecs.BOM_UTF16_BE, "utf-16-be"),
]
COMMON_ENCODINGS = ("utf-16", "latin_1", "ascii")
def decode(data):
"""
Decode data employing some charset detection and including unicode BOM
stripping.
"""
# Don't make more work than we have to.
if not isinstance(data, str):
return data
# Detect standard unicodes.
for bom, encoding in UNICODES:
if data.startswith(bom):
return unicode(data[len(bom):], encoding, "ignore")
# Try straight UTF-8
try:
return unicode(data, "utf-8")
except:
pass
# Test for latin_1, because it can be matched as UTF-16
# Somewhat of a hack, but it works and is about a thousand times faster
# than using chardet.
if all(ord(c) < 256 for c in data):
try:
return unicode(data, "latin_1")
except:
pass
# Test for various common encodings.
for encoding in COMMON_ENCODINGS:
try:
return unicode(data, encoding)
except UnicodeDecodeError:
pass
# Anything else gets filtered.
return unicode(textfilter.filter_ascii(data), errors="replace")