Python 2.7.3 and JS 1.8.5 updates
This commit is contained in:
Родитель
e698aedda2
Коммит
63e375e3cd
|
@ -0,0 +1,472 @@
|
|||
"""A parser for HTML and XHTML."""
|
||||
|
||||
# This file is based on sgmllib.py, but the API is slightly different.
|
||||
|
||||
# XXX There should be a way to distinguish between PCDATA (parsed
|
||||
# character data -- the normal case), RCDATA (replaceable character
|
||||
# data -- only char and entity references and end tags are special)
|
||||
# and CDATA (character data -- only end tags are special).
|
||||
|
||||
|
||||
import markupbase
|
||||
import re
|
||||
|
||||
# Regular expressions used for parsing
|
||||
|
||||
interesting_normal = re.compile('[&<]')
|
||||
incomplete = re.compile('&[a-zA-Z#]')
|
||||
|
||||
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
|
||||
charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
|
||||
|
||||
starttagopen = re.compile('<[a-zA-Z]')
|
||||
piclose = re.compile('>')
|
||||
commentclose = re.compile(r'--\s*>')
|
||||
tagfind = re.compile('([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*')
|
||||
# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
|
||||
# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
|
||||
tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\x00]*')
|
||||
|
||||
attrfind = re.compile(
|
||||
r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*'
|
||||
r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*')
|
||||
|
||||
locatestarttagend = re.compile(r"""
|
||||
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
|
||||
(?:[\s/]* # optional whitespace before attribute name
|
||||
(?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name
|
||||
(?:\s*=+\s* # value indicator
|
||||
(?:'[^']*' # LITA-enclosed value
|
||||
|"[^"]*" # LIT-enclosed value
|
||||
|(?!['"])[^>\s]* # bare value
|
||||
)
|
||||
)?(?:\s|/(?!>))*
|
||||
)*
|
||||
)?
|
||||
\s* # trailing whitespace
|
||||
""", re.VERBOSE)
|
||||
endendtag = re.compile('>')
|
||||
# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between
|
||||
# </ and the tag name, so maybe this should be fixed
|
||||
endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
|
||||
|
||||
|
||||
class HTMLParseError(Exception):
|
||||
"""Exception raised for all parse errors."""
|
||||
|
||||
def __init__(self, msg, position=(None, None)):
|
||||
assert msg
|
||||
self.msg = msg
|
||||
self.lineno = position[0]
|
||||
self.offset = position[1]
|
||||
|
||||
def __str__(self):
|
||||
result = self.msg
|
||||
if self.lineno is not None:
|
||||
result = result + ", at line %d" % self.lineno
|
||||
if self.offset is not None:
|
||||
result = result + ", column %d" % (self.offset + 1)
|
||||
return result
|
||||
|
||||
|
||||
class HTMLParser(markupbase.ParserBase):
|
||||
"""Find tags and other markup and call handler functions.
|
||||
|
||||
Usage:
|
||||
p = HTMLParser()
|
||||
p.feed(data)
|
||||
...
|
||||
p.close()
|
||||
|
||||
Start tags are handled by calling self.handle_starttag() or
|
||||
self.handle_startendtag(); end tags by self.handle_endtag(). The
|
||||
data between tags is passed from the parser to the derived class
|
||||
by calling self.handle_data() with the data as argument (the data
|
||||
may be split up in arbitrary chunks). Entity references are
|
||||
passed by calling self.handle_entityref() with the entity
|
||||
reference as the argument. Numeric character references are
|
||||
passed to self.handle_charref() with the string containing the
|
||||
reference as the argument.
|
||||
"""
|
||||
|
||||
CDATA_CONTENT_ELEMENTS = ("script", "style")
|
||||
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize and reset this instance."""
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
"""Reset this instance. Loses all unprocessed data."""
|
||||
self.rawdata = ''
|
||||
self.lasttag = '???'
|
||||
self.interesting = interesting_normal
|
||||
self.cdata_elem = None
|
||||
markupbase.ParserBase.reset(self)
|
||||
|
||||
def feed(self, data):
|
||||
r"""Feed data to the parser.
|
||||
|
||||
Call this as often as you want, with as little or as much text
|
||||
as you want (may include '\n').
|
||||
"""
|
||||
self.rawdata = self.rawdata + data
|
||||
self.goahead(0)
|
||||
|
||||
def close(self):
|
||||
"""Handle any buffered data."""
|
||||
self.goahead(1)
|
||||
|
||||
def error(self, message):
|
||||
raise HTMLParseError(message, self.getpos())
|
||||
|
||||
__starttag_text = None
|
||||
|
||||
def get_starttag_text(self):
|
||||
"""Return full source of start tag: '<...>'."""
|
||||
return self.__starttag_text
|
||||
|
||||
def set_cdata_mode(self, elem):
|
||||
self.cdata_elem = elem.lower()
|
||||
self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
|
||||
|
||||
def clear_cdata_mode(self):
|
||||
self.interesting = interesting_normal
|
||||
self.cdata_elem = None
|
||||
|
||||
# Internal -- handle data as far as reasonable. May leave state
|
||||
# and data to be processed by a subsequent call. If 'end' is
|
||||
# true, force handling all data as if followed by EOF marker.
|
||||
def goahead(self, end):
|
||||
rawdata = self.rawdata
|
||||
i = 0
|
||||
n = len(rawdata)
|
||||
while i < n:
|
||||
match = self.interesting.search(rawdata, i) # < or &
|
||||
if match:
|
||||
j = match.start()
|
||||
else:
|
||||
if self.cdata_elem:
|
||||
break
|
||||
j = n
|
||||
if i < j: self.handle_data(rawdata[i:j])
|
||||
i = self.updatepos(i, j)
|
||||
if i == n: break
|
||||
startswith = rawdata.startswith
|
||||
if startswith('<', i):
|
||||
if starttagopen.match(rawdata, i): # < + letter
|
||||
k = self.parse_starttag(i)
|
||||
elif startswith("</", i):
|
||||
k = self.parse_endtag(i)
|
||||
elif startswith("<!--", i):
|
||||
k = self.parse_comment(i)
|
||||
elif startswith("<?", i):
|
||||
k = self.parse_pi(i)
|
||||
elif startswith("<!", i):
|
||||
k = self.parse_html_declaration(i)
|
||||
elif (i + 1) < n:
|
||||
self.handle_data("<")
|
||||
k = i + 1
|
||||
else:
|
||||
break
|
||||
if k < 0:
|
||||
if not end:
|
||||
break
|
||||
k = rawdata.find('>', i + 1)
|
||||
if k < 0:
|
||||
k = rawdata.find('<', i + 1)
|
||||
if k < 0:
|
||||
k = i + 1
|
||||
else:
|
||||
k += 1
|
||||
self.handle_data(rawdata[i:k])
|
||||
i = self.updatepos(i, k)
|
||||
elif startswith("&#", i):
|
||||
match = charref.match(rawdata, i)
|
||||
if match:
|
||||
name = match.group()[2:-1]
|
||||
self.handle_charref(name)
|
||||
k = match.end()
|
||||
if not startswith(';', k-1):
|
||||
k = k - 1
|
||||
i = self.updatepos(i, k)
|
||||
continue
|
||||
else:
|
||||
if ";" in rawdata[i:]: #bail by consuming &#
|
||||
self.handle_data(rawdata[0:2])
|
||||
i = self.updatepos(i, 2)
|
||||
break
|
||||
elif startswith('&', i):
|
||||
match = entityref.match(rawdata, i)
|
||||
if match:
|
||||
name = match.group(1)
|
||||
self.handle_entityref(name)
|
||||
k = match.end()
|
||||
if not startswith(';', k-1):
|
||||
k = k - 1
|
||||
i = self.updatepos(i, k)
|
||||
continue
|
||||
match = incomplete.match(rawdata, i)
|
||||
if match:
|
||||
# match.group() will contain at least 2 chars
|
||||
if end and match.group() == rawdata[i:]:
|
||||
self.error("EOF in middle of entity or char ref")
|
||||
# incomplete
|
||||
break
|
||||
elif (i + 1) < n:
|
||||
# not the end of the buffer, and can't be confused
|
||||
# with some other construct
|
||||
self.handle_data("&")
|
||||
i = self.updatepos(i, i + 1)
|
||||
else:
|
||||
break
|
||||
else:
|
||||
assert 0, "interesting.search() lied"
|
||||
# end while
|
||||
if end and i < n and not self.cdata_elem:
|
||||
self.handle_data(rawdata[i:n])
|
||||
i = self.updatepos(i, n)
|
||||
self.rawdata = rawdata[i:]
|
||||
|
||||
# Internal -- parse html declarations, return length or -1 if not terminated
|
||||
# See w3.org/TR/html5/tokenization.html#markup-declaration-open-state
|
||||
# See also parse_declaration in _markupbase
|
||||
def parse_html_declaration(self, i):
|
||||
rawdata = self.rawdata
|
||||
if rawdata[i:i+2] != '<!':
|
||||
self.error('unexpected call to parse_html_declaration()')
|
||||
if rawdata[i:i+4] == '<!--':
|
||||
# this case is actually already handled in goahead()
|
||||
return self.parse_comment(i)
|
||||
elif rawdata[i:i+3] == '<![':
|
||||
return self.parse_marked_section(i)
|
||||
elif rawdata[i:i+9].lower() == '<!doctype':
|
||||
# find the closing >
|
||||
gtpos = rawdata.find('>', i+9)
|
||||
if gtpos == -1:
|
||||
return -1
|
||||
self.handle_decl(rawdata[i+2:gtpos])
|
||||
return gtpos+1
|
||||
else:
|
||||
return self.parse_bogus_comment(i)
|
||||
|
||||
# Internal -- parse bogus comment, return length or -1 if not terminated
|
||||
# see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
|
||||
def parse_bogus_comment(self, i, report=1):
|
||||
rawdata = self.rawdata
|
||||
if rawdata[i:i+2] not in ('<!', '</'):
|
||||
self.error('unexpected call to parse_comment()')
|
||||
pos = rawdata.find('>', i+2)
|
||||
if pos == -1:
|
||||
return -1
|
||||
if report:
|
||||
self.handle_comment(rawdata[i+2:pos])
|
||||
return pos + 1
|
||||
|
||||
# Internal -- parse processing instr, return end or -1 if not terminated
|
||||
def parse_pi(self, i):
|
||||
rawdata = self.rawdata
|
||||
assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
|
||||
match = piclose.search(rawdata, i+2) # >
|
||||
if not match:
|
||||
return -1
|
||||
j = match.start()
|
||||
self.handle_pi(rawdata[i+2: j])
|
||||
j = match.end()
|
||||
return j
|
||||
|
||||
# Internal -- handle starttag, return end or -1 if not terminated
|
||||
def parse_starttag(self, i):
|
||||
self.__starttag_text = None
|
||||
endpos = self.check_for_whole_start_tag(i)
|
||||
if endpos < 0:
|
||||
return endpos
|
||||
rawdata = self.rawdata
|
||||
self.__starttag_text = rawdata[i:endpos]
|
||||
|
||||
# Now parse the data between i+1 and j into a tag and attrs
|
||||
attrs = []
|
||||
match = tagfind.match(rawdata, i+1)
|
||||
assert match, 'unexpected call to parse_starttag()'
|
||||
k = match.end()
|
||||
self.lasttag = tag = match.group(1).lower()
|
||||
|
||||
while k < endpos:
|
||||
m = attrfind.match(rawdata, k)
|
||||
if not m:
|
||||
break
|
||||
attrname, rest, attrvalue = m.group(1, 2, 3)
|
||||
if not rest:
|
||||
attrvalue = None
|
||||
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
|
||||
attrvalue[:1] == '"' == attrvalue[-1:]:
|
||||
attrvalue = attrvalue[1:-1]
|
||||
if attrvalue:
|
||||
attrvalue = self.unescape(attrvalue)
|
||||
attrs.append((attrname.lower(), attrvalue))
|
||||
k = m.end()
|
||||
|
||||
end = rawdata[k:endpos].strip()
|
||||
if end not in (">", "/>"):
|
||||
lineno, offset = self.getpos()
|
||||
if "\n" in self.__starttag_text:
|
||||
lineno = lineno + self.__starttag_text.count("\n")
|
||||
offset = len(self.__starttag_text) \
|
||||
- self.__starttag_text.rfind("\n")
|
||||
else:
|
||||
offset = offset + len(self.__starttag_text)
|
||||
self.handle_data(rawdata[i:endpos])
|
||||
return endpos
|
||||
if end.endswith('/>'):
|
||||
# XHTML-style empty tag: <span attr="value" />
|
||||
self.handle_startendtag(tag, attrs)
|
||||
else:
|
||||
self.handle_starttag(tag, attrs)
|
||||
if tag in self.CDATA_CONTENT_ELEMENTS:
|
||||
self.set_cdata_mode(tag)
|
||||
return endpos
|
||||
|
||||
# Internal -- check to see if we have a complete starttag; return end
|
||||
# or -1 if incomplete.
|
||||
def check_for_whole_start_tag(self, i):
|
||||
rawdata = self.rawdata
|
||||
m = locatestarttagend.match(rawdata, i)
|
||||
if m:
|
||||
j = m.end()
|
||||
next = rawdata[j:j+1]
|
||||
if next == ">":
|
||||
return j + 1
|
||||
if next == "/":
|
||||
if rawdata.startswith("/>", j):
|
||||
return j + 2
|
||||
if rawdata.startswith("/", j):
|
||||
# buffer boundary
|
||||
return -1
|
||||
# else bogus input
|
||||
self.updatepos(i, j + 1)
|
||||
self.error("malformed empty start tag")
|
||||
if next == "":
|
||||
# end of input
|
||||
return -1
|
||||
if next in ("abcdefghijklmnopqrstuvwxyz=/"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
|
||||
# end of input in or before attribute value, or we have the
|
||||
# '/' from a '/>' ending
|
||||
return -1
|
||||
if j > i:
|
||||
return j
|
||||
else:
|
||||
return i + 1
|
||||
raise AssertionError("we should not get here!")
|
||||
|
||||
# Internal -- parse endtag, return end or -1 if incomplete
|
||||
def parse_endtag(self, i):
|
||||
rawdata = self.rawdata
|
||||
assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
|
||||
match = endendtag.search(rawdata, i+1) # >
|
||||
if not match:
|
||||
return -1
|
||||
gtpos = match.end()
|
||||
match = endtagfind.match(rawdata, i) # </ + tag + >
|
||||
if not match:
|
||||
if self.cdata_elem is not None:
|
||||
self.handle_data(rawdata[i:gtpos])
|
||||
return gtpos
|
||||
# find the name: w3.org/TR/html5/tokenization.html#tag-name-state
|
||||
namematch = tagfind_tolerant.match(rawdata, i+2)
|
||||
if not namematch:
|
||||
# w3.org/TR/html5/tokenization.html#end-tag-open-state
|
||||
if rawdata[i:i+3] == '</>':
|
||||
return i+3
|
||||
else:
|
||||
return self.parse_bogus_comment(i)
|
||||
tagname = namematch.group().lower()
|
||||
# consume and ignore other stuff between the name and the >
|
||||
# Note: this is not 100% correct, since we might have things like
|
||||
# </tag attr=">">, but looking for > after tha name should cover
|
||||
# most of the cases and is much simpler
|
||||
gtpos = rawdata.find('>', namematch.end())
|
||||
self.handle_endtag(tagname)
|
||||
return gtpos+1
|
||||
|
||||
elem = match.group(1).lower() # script or style
|
||||
if self.cdata_elem is not None:
|
||||
if elem != self.cdata_elem:
|
||||
self.handle_data(rawdata[i:gtpos])
|
||||
return gtpos
|
||||
|
||||
self.handle_endtag(elem)
|
||||
self.clear_cdata_mode()
|
||||
return gtpos
|
||||
|
||||
# Overridable -- finish processing of start+end tag: <tag.../>
|
||||
def handle_startendtag(self, tag, attrs):
|
||||
self.handle_starttag(tag, attrs)
|
||||
self.handle_endtag(tag)
|
||||
|
||||
# Overridable -- handle start tag
|
||||
def handle_starttag(self, tag, attrs):
|
||||
pass
|
||||
|
||||
# Overridable -- handle end tag
|
||||
def handle_endtag(self, tag):
|
||||
pass
|
||||
|
||||
# Overridable -- handle character reference
|
||||
def handle_charref(self, name):
|
||||
pass
|
||||
|
||||
# Overridable -- handle entity reference
|
||||
def handle_entityref(self, name):
|
||||
pass
|
||||
|
||||
# Overridable -- handle data
|
||||
def handle_data(self, data):
|
||||
pass
|
||||
|
||||
# Overridable -- handle comment
|
||||
def handle_comment(self, data):
|
||||
pass
|
||||
|
||||
# Overridable -- handle declaration
|
||||
def handle_decl(self, decl):
|
||||
pass
|
||||
|
||||
# Overridable -- handle processing instruction
|
||||
def handle_pi(self, data):
|
||||
pass
|
||||
|
||||
def unknown_decl(self, data):
|
||||
pass
|
||||
|
||||
# Internal -- helper to remove special character quoting
|
||||
entitydefs = None
|
||||
def unescape(self, s):
|
||||
if '&' not in s:
|
||||
return s
|
||||
def replaceEntities(s):
|
||||
s = s.groups()[0]
|
||||
try:
|
||||
if s[0] == "#":
|
||||
s = s[1:]
|
||||
if s[0] in ['x','X']:
|
||||
c = int(s[1:], 16)
|
||||
else:
|
||||
c = int(s)
|
||||
return unichr(c)
|
||||
except ValueError:
|
||||
return '&#'+s+';'
|
||||
else:
|
||||
# Cannot use name2codepoint directly, because HTMLParser supports apos,
|
||||
# which is not part of HTML 4
|
||||
import htmlentitydefs
|
||||
if HTMLParser.entitydefs is None:
|
||||
entitydefs = HTMLParser.entitydefs = {'apos':u"'"}
|
||||
for k, v in htmlentitydefs.name2codepoint.iteritems():
|
||||
entitydefs[k] = unichr(v)
|
||||
try:
|
||||
return self.entitydefs[s]
|
||||
except KeyError:
|
||||
return '&'+s+';'
|
||||
|
||||
return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));", replaceEntities, s)
|
|
@ -0,0 +1,6 @@
|
|||
Notice
|
||||
======
|
||||
|
||||
The files in this directory are extracted from the CPython standard library.
|
||||
They are included solely for compatibility purposes, and their respective
|
||||
copyrights and licenses are held and controlled by the original developer(s).
|
|
@ -0,0 +1,433 @@
|
|||
"""Generic (shallow and deep) copying operations.
|
||||
|
||||
Interface summary:
|
||||
|
||||
import copy
|
||||
|
||||
x = copy.copy(y) # make a shallow copy of y
|
||||
x = copy.deepcopy(y) # make a deep copy of y
|
||||
|
||||
For module specific errors, copy.Error is raised.
|
||||
|
||||
The difference between shallow and deep copying is only relevant for
|
||||
compound objects (objects that contain other objects, like lists or
|
||||
class instances).
|
||||
|
||||
- A shallow copy constructs a new compound object and then (to the
|
||||
extent possible) inserts *the same objects* into it that the
|
||||
original contains.
|
||||
|
||||
- A deep copy constructs a new compound object and then, recursively,
|
||||
inserts *copies* into it of the objects found in the original.
|
||||
|
||||
Two problems often exist with deep copy operations that don't exist
|
||||
with shallow copy operations:
|
||||
|
||||
a) recursive objects (compound objects that, directly or indirectly,
|
||||
contain a reference to themselves) may cause a recursive loop
|
||||
|
||||
b) because deep copy copies *everything* it may copy too much, e.g.
|
||||
administrative data structures that should be shared even between
|
||||
copies
|
||||
|
||||
Python's deep copy operation avoids these problems by:
|
||||
|
||||
a) keeping a table of objects already copied during the current
|
||||
copying pass
|
||||
|
||||
b) letting user-defined classes override the copying operation or the
|
||||
set of components copied
|
||||
|
||||
This version does not copy types like module, class, function, method,
|
||||
nor stack trace, stack frame, nor file, socket, window, nor array, nor
|
||||
any similar types.
|
||||
|
||||
Classes can use the same interfaces to control copying that they use
|
||||
to control pickling: they can define methods called __getinitargs__(),
|
||||
__getstate__() and __setstate__(). See the documentation for module
|
||||
"pickle" for information on these methods.
|
||||
"""
|
||||
|
||||
import types
|
||||
import weakref
|
||||
from copy_reg import dispatch_table
|
||||
|
||||
class Error(Exception):
|
||||
pass
|
||||
error = Error # backward compatibility
|
||||
|
||||
try:
|
||||
from org.python.core import PyStringMap
|
||||
except ImportError:
|
||||
PyStringMap = None
|
||||
|
||||
__all__ = ["Error", "copy", "deepcopy"]
|
||||
|
||||
def copy(x):
|
||||
"""Shallow copy operation on arbitrary Python objects.
|
||||
|
||||
See the module's __doc__ string for more info.
|
||||
"""
|
||||
|
||||
cls = type(x)
|
||||
|
||||
copier = _copy_dispatch.get(cls)
|
||||
if copier:
|
||||
return copier(x)
|
||||
|
||||
copier = getattr(cls, "__copy__", None)
|
||||
if copier:
|
||||
return copier(x)
|
||||
|
||||
reductor = dispatch_table.get(cls)
|
||||
if reductor:
|
||||
rv = reductor(x)
|
||||
else:
|
||||
reductor = getattr(x, "__reduce_ex__", None)
|
||||
if reductor:
|
||||
rv = reductor(2)
|
||||
else:
|
||||
reductor = getattr(x, "__reduce__", None)
|
||||
if reductor:
|
||||
rv = reductor()
|
||||
else:
|
||||
raise Error("un(shallow)copyable object of type %s" % cls)
|
||||
|
||||
return _reconstruct(x, rv, 0)
|
||||
|
||||
|
||||
_copy_dispatch = d = {}
|
||||
|
||||
def _copy_immutable(x):
|
||||
return x
|
||||
for t in (type(None), int, long, float, bool, str, tuple,
|
||||
frozenset, type, xrange, types.ClassType,
|
||||
types.BuiltinFunctionType, type(Ellipsis),
|
||||
types.FunctionType, weakref.ref):
|
||||
d[t] = _copy_immutable
|
||||
for name in ("ComplexType", "UnicodeType", "CodeType"):
|
||||
t = getattr(types, name, None)
|
||||
if t is not None:
|
||||
d[t] = _copy_immutable
|
||||
|
||||
def _copy_with_constructor(x):
|
||||
return type(x)(x)
|
||||
for t in (list, dict, set):
|
||||
d[t] = _copy_with_constructor
|
||||
|
||||
def _copy_with_copy_method(x):
|
||||
return x.copy()
|
||||
if PyStringMap is not None:
|
||||
d[PyStringMap] = _copy_with_copy_method
|
||||
|
||||
def _copy_inst(x):
|
||||
if hasattr(x, '__copy__'):
|
||||
return x.__copy__()
|
||||
if hasattr(x, '__getinitargs__'):
|
||||
args = x.__getinitargs__()
|
||||
y = x.__class__(*args)
|
||||
else:
|
||||
y = _EmptyClass()
|
||||
y.__class__ = x.__class__
|
||||
if hasattr(x, '__getstate__'):
|
||||
state = x.__getstate__()
|
||||
else:
|
||||
state = x.__dict__
|
||||
if hasattr(y, '__setstate__'):
|
||||
y.__setstate__(state)
|
||||
else:
|
||||
y.__dict__.update(state)
|
||||
return y
|
||||
d[types.InstanceType] = _copy_inst
|
||||
|
||||
del d
|
||||
|
||||
def deepcopy(x, memo=None, _nil=[]):
|
||||
"""Deep copy operation on arbitrary Python objects.
|
||||
|
||||
See the module's __doc__ string for more info.
|
||||
"""
|
||||
|
||||
if memo is None:
|
||||
memo = {}
|
||||
|
||||
d = id(x)
|
||||
y = memo.get(d, _nil)
|
||||
if y is not _nil:
|
||||
return y
|
||||
|
||||
cls = type(x)
|
||||
|
||||
copier = _deepcopy_dispatch.get(cls)
|
||||
if copier:
|
||||
y = copier(x, memo)
|
||||
else:
|
||||
try:
|
||||
issc = issubclass(cls, type)
|
||||
except TypeError: # cls is not a class (old Boost; see SF #502085)
|
||||
issc = 0
|
||||
if issc:
|
||||
y = _deepcopy_atomic(x, memo)
|
||||
else:
|
||||
copier = getattr(x, "__deepcopy__", None)
|
||||
if copier:
|
||||
y = copier(memo)
|
||||
else:
|
||||
reductor = dispatch_table.get(cls)
|
||||
if reductor:
|
||||
rv = reductor(x)
|
||||
else:
|
||||
reductor = getattr(x, "__reduce_ex__", None)
|
||||
if reductor:
|
||||
rv = reductor(2)
|
||||
else:
|
||||
reductor = getattr(x, "__reduce__", None)
|
||||
if reductor:
|
||||
rv = reductor()
|
||||
else:
|
||||
raise Error(
|
||||
"un(deep)copyable object of type %s" % cls)
|
||||
y = _reconstruct(x, rv, 1, memo)
|
||||
|
||||
memo[d] = y
|
||||
_keep_alive(x, memo) # Make sure x lives at least as long as d
|
||||
return y
|
||||
|
||||
_deepcopy_dispatch = d = {}
|
||||
|
||||
def _deepcopy_atomic(x, memo):
|
||||
return x
|
||||
d[type(None)] = _deepcopy_atomic
|
||||
d[type(Ellipsis)] = _deepcopy_atomic
|
||||
d[int] = _deepcopy_atomic
|
||||
d[long] = _deepcopy_atomic
|
||||
d[float] = _deepcopy_atomic
|
||||
d[bool] = _deepcopy_atomic
|
||||
try:
|
||||
d[complex] = _deepcopy_atomic
|
||||
except NameError:
|
||||
pass
|
||||
d[str] = _deepcopy_atomic
|
||||
try:
|
||||
d[unicode] = _deepcopy_atomic
|
||||
except NameError:
|
||||
pass
|
||||
try:
|
||||
d[types.CodeType] = _deepcopy_atomic
|
||||
except AttributeError:
|
||||
pass
|
||||
d[type] = _deepcopy_atomic
|
||||
d[xrange] = _deepcopy_atomic
|
||||
d[types.ClassType] = _deepcopy_atomic
|
||||
d[types.BuiltinFunctionType] = _deepcopy_atomic
|
||||
d[types.FunctionType] = _deepcopy_atomic
|
||||
d[weakref.ref] = _deepcopy_atomic
|
||||
|
||||
def _deepcopy_list(x, memo):
|
||||
y = []
|
||||
memo[id(x)] = y
|
||||
for a in x:
|
||||
y.append(deepcopy(a, memo))
|
||||
return y
|
||||
d[list] = _deepcopy_list
|
||||
|
||||
def _deepcopy_tuple(x, memo):
|
||||
y = []
|
||||
for a in x:
|
||||
y.append(deepcopy(a, memo))
|
||||
d = id(x)
|
||||
try:
|
||||
return memo[d]
|
||||
except KeyError:
|
||||
pass
|
||||
for i in range(len(x)):
|
||||
if x[i] is not y[i]:
|
||||
y = tuple(y)
|
||||
break
|
||||
else:
|
||||
y = x
|
||||
memo[d] = y
|
||||
return y
|
||||
d[tuple] = _deepcopy_tuple
|
||||
|
||||
def _deepcopy_dict(x, memo):
|
||||
y = {}
|
||||
memo[id(x)] = y
|
||||
for key, value in x.iteritems():
|
||||
y[deepcopy(key, memo)] = deepcopy(value, memo)
|
||||
return y
|
||||
d[dict] = _deepcopy_dict
|
||||
if PyStringMap is not None:
|
||||
d[PyStringMap] = _deepcopy_dict
|
||||
|
||||
def _deepcopy_method(x, memo): # Copy instance methods
|
||||
return type(x)(x.im_func, deepcopy(x.im_self, memo), x.im_class)
|
||||
_deepcopy_dispatch[types.MethodType] = _deepcopy_method
|
||||
|
||||
def _keep_alive(x, memo):
|
||||
"""Keeps a reference to the object x in the memo.
|
||||
|
||||
Because we remember objects by their id, we have
|
||||
to assure that possibly temporary objects are kept
|
||||
alive by referencing them.
|
||||
We store a reference at the id of the memo, which should
|
||||
normally not be used unless someone tries to deepcopy
|
||||
the memo itself...
|
||||
"""
|
||||
try:
|
||||
memo[id(memo)].append(x)
|
||||
except KeyError:
|
||||
# aha, this is the first one :-)
|
||||
memo[id(memo)]=[x]
|
||||
|
||||
def _deepcopy_inst(x, memo):
|
||||
if hasattr(x, '__deepcopy__'):
|
||||
return x.__deepcopy__(memo)
|
||||
if hasattr(x, '__getinitargs__'):
|
||||
args = x.__getinitargs__()
|
||||
args = deepcopy(args, memo)
|
||||
y = x.__class__(*args)
|
||||
else:
|
||||
y = _EmptyClass()
|
||||
y.__class__ = x.__class__
|
||||
memo[id(x)] = y
|
||||
if hasattr(x, '__getstate__'):
|
||||
state = x.__getstate__()
|
||||
else:
|
||||
state = x.__dict__
|
||||
state = deepcopy(state, memo)
|
||||
if hasattr(y, '__setstate__'):
|
||||
y.__setstate__(state)
|
||||
else:
|
||||
y.__dict__.update(state)
|
||||
return y
|
||||
d[types.InstanceType] = _deepcopy_inst
|
||||
|
||||
def _reconstruct(x, info, deep, memo=None):
|
||||
if isinstance(info, str):
|
||||
return x
|
||||
assert isinstance(info, tuple)
|
||||
if memo is None:
|
||||
memo = {}
|
||||
n = len(info)
|
||||
assert n in (2, 3, 4, 5)
|
||||
callable, args = info[:2]
|
||||
if n > 2:
|
||||
state = info[2]
|
||||
else:
|
||||
state = {}
|
||||
if n > 3:
|
||||
listiter = info[3]
|
||||
else:
|
||||
listiter = None
|
||||
if n > 4:
|
||||
dictiter = info[4]
|
||||
else:
|
||||
dictiter = None
|
||||
if deep:
|
||||
args = deepcopy(args, memo)
|
||||
y = callable(*args)
|
||||
memo[id(x)] = y
|
||||
|
||||
if state:
|
||||
if deep:
|
||||
state = deepcopy(state, memo)
|
||||
if hasattr(y, '__setstate__'):
|
||||
y.__setstate__(state)
|
||||
else:
|
||||
if isinstance(state, tuple) and len(state) == 2:
|
||||
state, slotstate = state
|
||||
else:
|
||||
slotstate = None
|
||||
if state is not None:
|
||||
y.__dict__.update(state)
|
||||
if slotstate is not None:
|
||||
for key, value in slotstate.iteritems():
|
||||
setattr(y, key, value)
|
||||
|
||||
if listiter is not None:
|
||||
for item in listiter:
|
||||
if deep:
|
||||
item = deepcopy(item, memo)
|
||||
y.append(item)
|
||||
if dictiter is not None:
|
||||
for key, value in dictiter:
|
||||
if deep:
|
||||
key = deepcopy(key, memo)
|
||||
value = deepcopy(value, memo)
|
||||
y[key] = value
|
||||
return y
|
||||
|
||||
del d
|
||||
|
||||
del types
|
||||
|
||||
# Helper for instance creation without calling __init__
|
||||
class _EmptyClass:
|
||||
pass
|
||||
|
||||
def _test():
|
||||
l = [None, 1, 2L, 3.14, 'xyzzy', (1, 2L), [3.14, 'abc'],
|
||||
{'abc': 'ABC'}, (), [], {}]
|
||||
l1 = copy(l)
|
||||
print l1==l
|
||||
l1 = map(copy, l)
|
||||
print l1==l
|
||||
l1 = deepcopy(l)
|
||||
print l1==l
|
||||
class C:
|
||||
def __init__(self, arg=None):
|
||||
self.a = 1
|
||||
self.arg = arg
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
file = sys.argv[0]
|
||||
else:
|
||||
file = __file__
|
||||
self.fp = open(file)
|
||||
self.fp.close()
|
||||
def __getstate__(self):
|
||||
return {'a': self.a, 'arg': self.arg}
|
||||
def __setstate__(self, state):
|
||||
for key, value in state.iteritems():
|
||||
setattr(self, key, value)
|
||||
def __deepcopy__(self, memo=None):
|
||||
new = self.__class__(deepcopy(self.arg, memo))
|
||||
new.a = self.a
|
||||
return new
|
||||
c = C('argument sketch')
|
||||
l.append(c)
|
||||
l2 = copy(l)
|
||||
print l == l2
|
||||
print l
|
||||
print l2
|
||||
l2 = deepcopy(l)
|
||||
print l == l2
|
||||
print l
|
||||
print l2
|
||||
l.append({l[1]: l, 'xyz': l[2]})
|
||||
l3 = copy(l)
|
||||
import repr
|
||||
print map(repr.repr, l)
|
||||
print map(repr.repr, l1)
|
||||
print map(repr.repr, l2)
|
||||
print map(repr.repr, l3)
|
||||
l3 = deepcopy(l)
|
||||
import repr
|
||||
print map(repr.repr, l)
|
||||
print map(repr.repr, l1)
|
||||
print map(repr.repr, l2)
|
||||
print map(repr.repr, l3)
|
||||
class odict(dict):
|
||||
def __init__(self, d = {}):
|
||||
self.a = 99
|
||||
dict.__init__(self, d)
|
||||
def __setitem__(self, k, i):
|
||||
dict.__setitem__(self, k, i)
|
||||
self.a
|
||||
o = odict({"A" : "B"})
|
||||
x = deepcopy(o)
|
||||
print(o, x)
|
||||
|
||||
if __name__ == '__main__':
|
||||
_test()
|
|
@ -0,0 +1,201 @@
|
|||
"""Helper to provide extensibility for pickle/cPickle.
|
||||
|
||||
This is only useful to add pickle support for extension types defined in
|
||||
C, not for instances of user-defined classes.
|
||||
"""
|
||||
|
||||
from types import ClassType as _ClassType
|
||||
|
||||
__all__ = ["pickle", "constructor",
|
||||
"add_extension", "remove_extension", "clear_extension_cache"]
|
||||
|
||||
dispatch_table = {}
|
||||
|
||||
def pickle(ob_type, pickle_function, constructor_ob=None):
|
||||
if type(ob_type) is _ClassType:
|
||||
raise TypeError("copy_reg is not intended for use with classes")
|
||||
|
||||
if not hasattr(pickle_function, '__call__'):
|
||||
raise TypeError("reduction functions must be callable")
|
||||
dispatch_table[ob_type] = pickle_function
|
||||
|
||||
# The constructor_ob function is a vestige of safe for unpickling.
|
||||
# There is no reason for the caller to pass it anymore.
|
||||
if constructor_ob is not None:
|
||||
constructor(constructor_ob)
|
||||
|
||||
def constructor(object):
|
||||
if not hasattr(object, '__call__'):
|
||||
raise TypeError("constructors must be callable")
|
||||
|
||||
# Example: provide pickling support for complex numbers.
|
||||
|
||||
try:
|
||||
complex
|
||||
except NameError:
|
||||
pass
|
||||
else:
|
||||
|
||||
def pickle_complex(c):
|
||||
return complex, (c.real, c.imag)
|
||||
|
||||
pickle(complex, pickle_complex, complex)
|
||||
|
||||
# Support for pickling new-style objects
|
||||
|
||||
def _reconstructor(cls, base, state):
|
||||
if base is object:
|
||||
obj = object.__new__(cls)
|
||||
else:
|
||||
obj = base.__new__(cls, state)
|
||||
if base.__init__ != object.__init__:
|
||||
base.__init__(obj, state)
|
||||
return obj
|
||||
|
||||
_HEAPTYPE = 1<<9
|
||||
|
||||
# Python code for object.__reduce_ex__ for protocols 0 and 1
|
||||
|
||||
def _reduce_ex(self, proto):
|
||||
assert proto < 2
|
||||
for base in self.__class__.__mro__:
|
||||
if hasattr(base, '__flags__') and not base.__flags__ & _HEAPTYPE:
|
||||
break
|
||||
else:
|
||||
base = object # not really reachable
|
||||
if base is object:
|
||||
state = None
|
||||
else:
|
||||
if base is self.__class__:
|
||||
raise TypeError, "can't pickle %s objects" % base.__name__
|
||||
state = base(self)
|
||||
args = (self.__class__, base, state)
|
||||
try:
|
||||
getstate = self.__getstate__
|
||||
except AttributeError:
|
||||
if getattr(self, "__slots__", None):
|
||||
raise TypeError("a class that defines __slots__ without "
|
||||
"defining __getstate__ cannot be pickled")
|
||||
try:
|
||||
dict = self.__dict__
|
||||
except AttributeError:
|
||||
dict = None
|
||||
else:
|
||||
dict = getstate()
|
||||
if dict:
|
||||
return _reconstructor, args, dict
|
||||
else:
|
||||
return _reconstructor, args
|
||||
|
||||
# Helper for __reduce_ex__ protocol 2
|
||||
|
||||
def __newobj__(cls, *args):
|
||||
return cls.__new__(cls, *args)
|
||||
|
||||
def _slotnames(cls):
|
||||
"""Return a list of slot names for a given class.
|
||||
|
||||
This needs to find slots defined by the class and its bases, so we
|
||||
can't simply return the __slots__ attribute. We must walk down
|
||||
the Method Resolution Order and concatenate the __slots__ of each
|
||||
class found there. (This assumes classes don't modify their
|
||||
__slots__ attribute to misrepresent their slots after the class is
|
||||
defined.)
|
||||
"""
|
||||
|
||||
# Get the value from a cache in the class if possible
|
||||
names = cls.__dict__.get("__slotnames__")
|
||||
if names is not None:
|
||||
return names
|
||||
|
||||
# Not cached -- calculate the value
|
||||
names = []
|
||||
if not hasattr(cls, "__slots__"):
|
||||
# This class has no slots
|
||||
pass
|
||||
else:
|
||||
# Slots found -- gather slot names from all base classes
|
||||
for c in cls.__mro__:
|
||||
if "__slots__" in c.__dict__:
|
||||
slots = c.__dict__['__slots__']
|
||||
# if class has a single slot, it can be given as a string
|
||||
if isinstance(slots, basestring):
|
||||
slots = (slots,)
|
||||
for name in slots:
|
||||
# special descriptors
|
||||
if name in ("__dict__", "__weakref__"):
|
||||
continue
|
||||
# mangled names
|
||||
elif name.startswith('__') and not name.endswith('__'):
|
||||
names.append('_%s%s' % (c.__name__, name))
|
||||
else:
|
||||
names.append(name)
|
||||
|
||||
# Cache the outcome in the class if at all possible
|
||||
try:
|
||||
cls.__slotnames__ = names
|
||||
except:
|
||||
pass # But don't die if we can't
|
||||
|
||||
return names
|
||||
|
||||
# A registry of extension codes. This is an ad-hoc compression
|
||||
# mechanism. Whenever a global reference to <module>, <name> is about
|
||||
# to be pickled, the (<module>, <name>) tuple is looked up here to see
|
||||
# if it is a registered extension code for it. Extension codes are
|
||||
# universal, so that the meaning of a pickle does not depend on
|
||||
# context. (There are also some codes reserved for local use that
|
||||
# don't have this restriction.) Codes are positive ints; 0 is
|
||||
# reserved.
|
||||
|
||||
_extension_registry = {} # key -> code
|
||||
_inverted_registry = {} # code -> key
|
||||
_extension_cache = {} # code -> object
|
||||
# Don't ever rebind those names: cPickle grabs a reference to them when
|
||||
# it's initialized, and won't see a rebinding.
|
||||
|
||||
def add_extension(module, name, code):
|
||||
"""Register an extension code."""
|
||||
code = int(code)
|
||||
if not 1 <= code <= 0x7fffffff:
|
||||
raise ValueError, "code out of range"
|
||||
key = (module, name)
|
||||
if (_extension_registry.get(key) == code and
|
||||
_inverted_registry.get(code) == key):
|
||||
return # Redundant registrations are benign
|
||||
if key in _extension_registry:
|
||||
raise ValueError("key %s is already registered with code %s" %
|
||||
(key, _extension_registry[key]))
|
||||
if code in _inverted_registry:
|
||||
raise ValueError("code %s is already in use for key %s" %
|
||||
(code, _inverted_registry[code]))
|
||||
_extension_registry[key] = code
|
||||
_inverted_registry[code] = key
|
||||
|
||||
def remove_extension(module, name, code):
|
||||
"""Unregister an extension code. For testing only."""
|
||||
key = (module, name)
|
||||
if (_extension_registry.get(key) != code or
|
||||
_inverted_registry.get(code) != key):
|
||||
raise ValueError("key %s is not registered with code %s" %
|
||||
(key, code))
|
||||
del _extension_registry[key]
|
||||
del _inverted_registry[code]
|
||||
if code in _extension_cache:
|
||||
del _extension_cache[code]
|
||||
|
||||
def clear_extension_cache():
|
||||
_extension_cache.clear()
|
||||
|
||||
# Standard extension code assignments
|
||||
|
||||
# Reserved ranges
|
||||
|
||||
# First Last Count Purpose
|
||||
# 1 127 127 Reserved for Python standard library
|
||||
# 128 191 64 Reserved for Zope
|
||||
# 192 239 48 Reserved for 3rd parties
|
||||
# 240 255 16 Reserved for private use (will never be assigned)
|
||||
# 256 Inf Inf Reserved for future assignment
|
||||
|
||||
# Extension codes are assigned by the Python Software Foundation.
|
|
@ -1,8 +1,9 @@
|
|||
import copy
|
||||
import simplejson as json
|
||||
import types
|
||||
import urlparse
|
||||
|
||||
import appvalidator.python.copy as copy
|
||||
|
||||
from ..constants import DESCRIPTION_TYPES
|
||||
from ..specprocessor import Spec, LITERAL_TYPE
|
||||
|
||||
|
|
|
@ -67,6 +67,14 @@ def prepare_package(err, path, timeout=None):
|
|||
return err
|
||||
|
||||
|
||||
def write_zip_error(err):
|
||||
return err.error(
|
||||
err_id=("submain", "badzipfile"),
|
||||
error="Corrupt ZIP file",
|
||||
description="We were unable to decompress all or part of the zip "
|
||||
"file.")
|
||||
|
||||
|
||||
def test_package(err, file_, name):
|
||||
"""Begins tests for the package."""
|
||||
|
||||
|
@ -80,10 +88,7 @@ def test_package(err, file_, name):
|
|||
error="The package could not be opened.")
|
||||
except (BadZipfile, zlib_error):
|
||||
# Die if the zip file is corrupt.
|
||||
return err.error(
|
||||
err_id=("submain", "_load_install_rdf", "badzipfile"),
|
||||
error="Corrupt ZIP file",
|
||||
description="We were unable to decompress the zip file.")
|
||||
return write_zip_error(err)
|
||||
|
||||
try:
|
||||
output = test_inner_package(err, package)
|
||||
|
@ -109,8 +114,11 @@ def test_inner_package(err, package):
|
|||
err.set_tier(tier)
|
||||
|
||||
# Iterate through each test of our detected type.
|
||||
for test in testcases._get_tests(tier):
|
||||
test(err, package)
|
||||
try:
|
||||
for test in testcases._get_tests(tier):
|
||||
test(err, package)
|
||||
except (BadZipfile, zlib_error):
|
||||
write_zip_error(err)
|
||||
|
||||
# Return any errors at the end of the tier if undetermined.
|
||||
if err.failed(fail_on_warnings=False) and not err.determined:
|
||||
|
|
|
@ -105,11 +105,19 @@ def _get_tree(code, shell=SPIDERMONKEY_INSTALLATION):
|
|||
try:
|
||||
cmd = [shell, "-e", data, "-U"]
|
||||
shell_obj = subprocess.Popen(
|
||||
cmd, shell=False,
|
||||
stderr=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE)
|
||||
cmd, shell=False, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
|
||||
data, stderr = shell_obj.communicate()
|
||||
# Spidermonkey dropped the -U flag on 29 Oct 2012
|
||||
if stderr and ("Invalid short option: -U" in stderr or
|
||||
"usage: js [options] [scriptfile]" in stderr):
|
||||
cmd.remove("-U")
|
||||
shell_obj = subprocess.Popen(
|
||||
cmd, shell=False,
|
||||
stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
|
||||
data, stderr = shell_obj.communicate()
|
||||
|
||||
if stderr:
|
||||
raise RuntimeError('Error calling %r: %s' % (cmd, stderr))
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ from . import csstester
|
|||
from appvalidator.contextgenerator import ContextGenerator
|
||||
from appvalidator.constants import *
|
||||
from appvalidator.csp import warn as message_csp
|
||||
from patchedhtmlparser import htmlparser, PatchedHTMLParser
|
||||
from appvalidator.python.HTMLParser import HTMLParser
|
||||
|
||||
|
||||
DEBUG = False
|
||||
|
@ -25,11 +25,11 @@ DOM_MUTATION_HANDLERS = set([
|
|||
"ondomnoderemovedfromdocument", "ondomsubtreemodified", ])
|
||||
|
||||
|
||||
class MarkupParser(PatchedHTMLParser):
|
||||
class MarkupParser(HTMLParser):
|
||||
"""Parse and analyze the versious components of markup files."""
|
||||
|
||||
def __init__(self, err, strict=True, debug=False):
|
||||
PatchedHTMLParser.__init__(self)
|
||||
HTMLParser.__init__(self)
|
||||
self.err = err
|
||||
self.is_jetpack = "is_jetpack" in err.metadata # Cache this value.
|
||||
self.line = 0
|
||||
|
|
|
@ -1,97 +0,0 @@
|
|||
import re
|
||||
|
||||
try:
|
||||
import HTMLParser as htmlparser
|
||||
except ImportError: # pragma: no cover
|
||||
import html.parser as htmlparser
|
||||
|
||||
interesting_cdata = re.compile(r'<(/|\Z)')
|
||||
|
||||
|
||||
class PatchedHTMLParser(htmlparser.HTMLParser):
|
||||
"""
|
||||
A version of the Python HTML parser that includes the fixes bundled with
|
||||
the latest versions of Python.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
htmlparser.HTMLParser.__init__(self, *args, **kwargs)
|
||||
# Added as a patch for various Python HTMLParser issues.
|
||||
self.cdata_tag = None
|
||||
|
||||
# Code to fix for Python issue 670664
|
||||
def parse_starttag(self, i):
|
||||
self.__starttag_text = None
|
||||
endpos = self.check_for_whole_start_tag(i)
|
||||
if endpos < 0:
|
||||
return endpos
|
||||
rawdata = self.rawdata
|
||||
self.__starttag_text = rawdata[i:endpos]
|
||||
|
||||
# Now parse the data between i+1 and j into a tag and attrs
|
||||
attrs = []
|
||||
match = htmlparser.tagfind.match(rawdata, i+1)
|
||||
assert match, 'unexpected call to parse_starttag()'
|
||||
k = match.end()
|
||||
self.lasttag = tag = rawdata[i+1:k].lower()
|
||||
|
||||
while k < endpos:
|
||||
m = htmlparser.attrfind.match(rawdata, k)
|
||||
if not m:
|
||||
break
|
||||
attrname, rest, attrvalue = m.group(1, 2, 3)
|
||||
if not rest:
|
||||
attrvalue = None
|
||||
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
|
||||
attrvalue[:1] == '"' == attrvalue[-1:]:
|
||||
attrvalue = attrvalue[1:-1]
|
||||
attrvalue = self.unescape(attrvalue)
|
||||
attrs.append((attrname.lower(), attrvalue))
|
||||
k = m.end()
|
||||
|
||||
end = rawdata[k:endpos].strip()
|
||||
if end not in (">", "/>"):
|
||||
lineno, offset = self.getpos()
|
||||
if "\n" in self.__starttag_text:
|
||||
lineno = lineno + self.__starttag_text.count("\n")
|
||||
offset = len(self.__starttag_text) \
|
||||
- self.__starttag_text.rfind("\n")
|
||||
else:
|
||||
offset = offset + len(self.__starttag_text)
|
||||
self.error("junk characters in start tag: %r"
|
||||
% (rawdata[k:endpos][:20],))
|
||||
if end.endswith('/>'):
|
||||
# XHTML-style empty tag: <span attr="value" />
|
||||
self.handle_startendtag(tag, attrs)
|
||||
else:
|
||||
self.handle_starttag(tag, attrs)
|
||||
if tag in self.CDATA_CONTENT_ELEMENTS:
|
||||
self.set_cdata_mode(tag)
|
||||
return endpos
|
||||
|
||||
def parse_endtag(self, i):
|
||||
rawdata = self.rawdata
|
||||
assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
|
||||
match = htmlparser.endendtag.search(rawdata, i+1) # >
|
||||
if not match:
|
||||
return -1
|
||||
j = match.end()
|
||||
match = htmlparser.endtagfind.match(rawdata, i) # </ + tag + >
|
||||
if not match:
|
||||
if self.cdata_tag is not None:
|
||||
self.handle_data(rawdata[i:j])
|
||||
return j
|
||||
self.error("bad end tag: %r" % (rawdata[i:j],))
|
||||
tag = match.group(1).strip()
|
||||
|
||||
if self.cdata_tag is not None and tag.lower() != self.cdata_tag:
|
||||
self.handle_data(rawdata[i:j])
|
||||
return j
|
||||
|
||||
self.handle_endtag(tag.lower())
|
||||
self.clear_cdata_mode()
|
||||
return j
|
||||
|
||||
def set_cdata_mode(self, tag):
|
||||
self.interesting = interesting_cdata
|
||||
self.cdata_tag = None
|
|
@ -1 +0,0 @@
|
|||
function täst() {}
|
Двоичные данные
tests/resources/controlchars/controlchars_utf-8_warn.js
Двоичные данные
tests/resources/controlchars/controlchars_utf-8_warn.js
Двоичный файл не отображается.
Двоичные данные
tests/resources/corrupt.xpi
Двоичные данные
tests/resources/corrupt.xpi
Двоичный файл не отображается.
|
@ -1,4 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<xml>
|
||||
<thisisa<broken<>>>
|
||||
</xml>
|
|
@ -1,7 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<xml>
|
||||
<!-- There should be comments in the script tag. -->
|
||||
<testscript>
|
||||
<whatever
|
||||
</testscript>
|
||||
</xml>
|
|
@ -35,22 +35,6 @@ class TestControlChars(TestCase):
|
|||
self.assert_failed(with_warnings=True)
|
||||
eq_(self.err.warnings[0]["id"][2], "syntax_error")
|
||||
|
||||
def test_controlchars_utf8_ok(self):
|
||||
"""Test that multi-byte characters are decoded properly (utf-8)."""
|
||||
|
||||
self.run_test("tests/resources/controlchars/controlchars_utf-8_ok.js")
|
||||
self.assert_silent()
|
||||
|
||||
def test_controlchars_utf8_warn(self):
|
||||
"""
|
||||
Tests that multi-byte characters are decoded properly (utf-8) but remaining
|
||||
non-ASCII characters raise warnings.
|
||||
"""
|
||||
|
||||
self.run_test("tests/resources/controlchars/controlchars_utf-8_warn.js")
|
||||
self.assert_failed(with_warnings=True)
|
||||
eq_(self.err.warnings[0]["id"][2], "syntax_error")
|
||||
|
||||
@raises(JSONDecodeError)
|
||||
def test_controlchar_in_webapp(self):
|
||||
"""
|
||||
|
|
|
@ -156,16 +156,6 @@ def test_html_ignore_comment():
|
|||
_test_xul("tests/resources/markup/markuptester/ignore_comments.html")
|
||||
|
||||
|
||||
def test_invalid_markup():
|
||||
"Tests an markup file that is simply broken."
|
||||
|
||||
result = _test_xul("tests/resources/markup/markuptester/bad.xml", True)
|
||||
assert result.warnings
|
||||
result = _test_xul("tests/resources/markup/markuptester/bad_script.xml",
|
||||
False)
|
||||
assert result.notices
|
||||
|
||||
|
||||
def test_bad_encoding():
|
||||
"""Test that bad encodings don't cause the parser to fail."""
|
||||
_test_xul("tests/resources/markup/encoding.txt")
|
||||
|
|
|
@ -32,14 +32,4 @@ class TestSubmainPackage(TestCase):
|
|||
with open(name) as pack:
|
||||
result = submain.test_package(self.err, pack, name)
|
||||
|
||||
self.assert_failed()
|
||||
|
||||
def test_package_corrupt(self):
|
||||
"Tests the test_package function fails with a corrupt file"
|
||||
|
||||
self.setup_err()
|
||||
|
||||
name = "tests/resources/corrupt.xpi"
|
||||
result = submain.test_package(self.err, name, name)
|
||||
|
||||
self.assert_failed(with_errors=True, with_warnings=True)
|
||||
assert self.err.errors
|
||||
|
|
|
@ -72,15 +72,3 @@ class TestBadZipFile(TestCase):
|
|||
def test_missing_file(self):
|
||||
"""Tests that the XPI manager correctly reports a missing XPI file."""
|
||||
ZipPackage("foo.bar")
|
||||
|
||||
def test_corrupt_zip(self):
|
||||
"""Tests that the XPI manager correctly reports a missing XPI file."""
|
||||
x = ZipPackage(get_path("corrupt.xpi"))
|
||||
try:
|
||||
x.read("install.rdf")
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
raise "Exception should have been raised on corrupt file access."
|
||||
|
||||
assert "install.rdf" in x.broken_files
|
||||
|
|
Загрузка…
Ссылка в новой задаче