diff --git a/appvalidator/python/HTMLParser.py b/appvalidator/python/HTMLParser.py new file mode 100644 index 0000000..b336a4c --- /dev/null +++ b/appvalidator/python/HTMLParser.py @@ -0,0 +1,472 @@ +"""A parser for HTML and XHTML.""" + +# This file is based on sgmllib.py, but the API is slightly different. + +# XXX There should be a way to distinguish between PCDATA (parsed +# character data -- the normal case), RCDATA (replaceable character +# data -- only char and entity references and end tags are special) +# and CDATA (character data -- only end tags are special). + + +import markupbase +import re + +# Regular expressions used for parsing + +interesting_normal = re.compile('[&<]') +incomplete = re.compile('&[a-zA-Z#]') + +entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') +charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') + +starttagopen = re.compile('<[a-zA-Z]') +piclose = re.compile('>') +commentclose = re.compile(r'--\s*>') +tagfind = re.compile('([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*') +# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state +# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state +tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\x00]*') + +attrfind = re.compile( + r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*' + r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*') + +locatestarttagend = re.compile(r""" + <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name + (?:[\s/]* # optional whitespace before attribute name + (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name + (?:\s*=+\s* # value indicator + (?:'[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\s]* # bare value + ) + )?(?:\s|/(?!>))* + )* + )? + \s* # trailing whitespace +""", re.VERBOSE) +endendtag = re.compile('>') +# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between +# ') + + +class HTMLParseError(Exception): + """Exception raised for all parse errors.""" + + def __init__(self, msg, position=(None, None)): + assert msg + self.msg = msg + self.lineno = position[0] + self.offset = position[1] + + def __str__(self): + result = self.msg + if self.lineno is not None: + result = result + ", at line %d" % self.lineno + if self.offset is not None: + result = result + ", column %d" % (self.offset + 1) + return result + + +class HTMLParser(markupbase.ParserBase): + """Find tags and other markup and call handler functions. + + Usage: + p = HTMLParser() + p.feed(data) + ... + p.close() + + Start tags are handled by calling self.handle_starttag() or + self.handle_startendtag(); end tags by self.handle_endtag(). The + data between tags is passed from the parser to the derived class + by calling self.handle_data() with the data as argument (the data + may be split up in arbitrary chunks). Entity references are + passed by calling self.handle_entityref() with the entity + reference as the argument. Numeric character references are + passed to self.handle_charref() with the string containing the + reference as the argument. + """ + + CDATA_CONTENT_ELEMENTS = ("script", "style") + + + def __init__(self): + """Initialize and reset this instance.""" + self.reset() + + def reset(self): + """Reset this instance. Loses all unprocessed data.""" + self.rawdata = '' + self.lasttag = '???' + self.interesting = interesting_normal + self.cdata_elem = None + markupbase.ParserBase.reset(self) + + def feed(self, data): + r"""Feed data to the parser. + + Call this as often as you want, with as little or as much text + as you want (may include '\n'). + """ + self.rawdata = self.rawdata + data + self.goahead(0) + + def close(self): + """Handle any buffered data.""" + self.goahead(1) + + def error(self, message): + raise HTMLParseError(message, self.getpos()) + + __starttag_text = None + + def get_starttag_text(self): + """Return full source of start tag: '<...>'.""" + return self.__starttag_text + + def set_cdata_mode(self, elem): + self.cdata_elem = elem.lower() + self.interesting = re.compile(r'' % self.cdata_elem, re.I) + + def clear_cdata_mode(self): + self.interesting = interesting_normal + self.cdata_elem = None + + # Internal -- handle data as far as reasonable. May leave state + # and data to be processed by a subsequent call. If 'end' is + # true, force handling all data as if followed by EOF marker. + def goahead(self, end): + rawdata = self.rawdata + i = 0 + n = len(rawdata) + while i < n: + match = self.interesting.search(rawdata, i) # < or & + if match: + j = match.start() + else: + if self.cdata_elem: + break + j = n + if i < j: self.handle_data(rawdata[i:j]) + i = self.updatepos(i, j) + if i == n: break + startswith = rawdata.startswith + if startswith('<', i): + if starttagopen.match(rawdata, i): # < + letter + k = self.parse_starttag(i) + elif startswith("', i + 1) + if k < 0: + k = rawdata.find('<', i + 1) + if k < 0: + k = i + 1 + else: + k += 1 + self.handle_data(rawdata[i:k]) + i = self.updatepos(i, k) + elif startswith("&#", i): + match = charref.match(rawdata, i) + if match: + name = match.group()[2:-1] + self.handle_charref(name) + k = match.end() + if not startswith(';', k-1): + k = k - 1 + i = self.updatepos(i, k) + continue + else: + if ";" in rawdata[i:]: #bail by consuming &# + self.handle_data(rawdata[0:2]) + i = self.updatepos(i, 2) + break + elif startswith('&', i): + match = entityref.match(rawdata, i) + if match: + name = match.group(1) + self.handle_entityref(name) + k = match.end() + if not startswith(';', k-1): + k = k - 1 + i = self.updatepos(i, k) + continue + match = incomplete.match(rawdata, i) + if match: + # match.group() will contain at least 2 chars + if end and match.group() == rawdata[i:]: + self.error("EOF in middle of entity or char ref") + # incomplete + break + elif (i + 1) < n: + # not the end of the buffer, and can't be confused + # with some other construct + self.handle_data("&") + i = self.updatepos(i, i + 1) + else: + break + else: + assert 0, "interesting.search() lied" + # end while + if end and i < n and not self.cdata_elem: + self.handle_data(rawdata[i:n]) + i = self.updatepos(i, n) + self.rawdata = rawdata[i:] + + # Internal -- parse html declarations, return length or -1 if not terminated + # See w3.org/TR/html5/tokenization.html#markup-declaration-open-state + # See also parse_declaration in _markupbase + def parse_html_declaration(self, i): + rawdata = self.rawdata + if rawdata[i:i+2] != ' + gtpos = rawdata.find('>', i+9) + if gtpos == -1: + return -1 + self.handle_decl(rawdata[i+2:gtpos]) + return gtpos+1 + else: + return self.parse_bogus_comment(i) + + # Internal -- parse bogus comment, return length or -1 if not terminated + # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state + def parse_bogus_comment(self, i, report=1): + rawdata = self.rawdata + if rawdata[i:i+2] not in ('', i+2) + if pos == -1: + return -1 + if report: + self.handle_comment(rawdata[i+2:pos]) + return pos + 1 + + # Internal -- parse processing instr, return end or -1 if not terminated + def parse_pi(self, i): + rawdata = self.rawdata + assert rawdata[i:i+2] == ' + if not match: + return -1 + j = match.start() + self.handle_pi(rawdata[i+2: j]) + j = match.end() + return j + + # Internal -- handle starttag, return end or -1 if not terminated + def parse_starttag(self, i): + self.__starttag_text = None + endpos = self.check_for_whole_start_tag(i) + if endpos < 0: + return endpos + rawdata = self.rawdata + self.__starttag_text = rawdata[i:endpos] + + # Now parse the data between i+1 and j into a tag and attrs + attrs = [] + match = tagfind.match(rawdata, i+1) + assert match, 'unexpected call to parse_starttag()' + k = match.end() + self.lasttag = tag = match.group(1).lower() + + while k < endpos: + m = attrfind.match(rawdata, k) + if not m: + break + attrname, rest, attrvalue = m.group(1, 2, 3) + if not rest: + attrvalue = None + elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ + attrvalue[:1] == '"' == attrvalue[-1:]: + attrvalue = attrvalue[1:-1] + if attrvalue: + attrvalue = self.unescape(attrvalue) + attrs.append((attrname.lower(), attrvalue)) + k = m.end() + + end = rawdata[k:endpos].strip() + if end not in (">", "/>"): + lineno, offset = self.getpos() + if "\n" in self.__starttag_text: + lineno = lineno + self.__starttag_text.count("\n") + offset = len(self.__starttag_text) \ + - self.__starttag_text.rfind("\n") + else: + offset = offset + len(self.__starttag_text) + self.handle_data(rawdata[i:endpos]) + return endpos + if end.endswith('/>'): + # XHTML-style empty tag: + self.handle_startendtag(tag, attrs) + else: + self.handle_starttag(tag, attrs) + if tag in self.CDATA_CONTENT_ELEMENTS: + self.set_cdata_mode(tag) + return endpos + + # Internal -- check to see if we have a complete starttag; return end + # or -1 if incomplete. + def check_for_whole_start_tag(self, i): + rawdata = self.rawdata + m = locatestarttagend.match(rawdata, i) + if m: + j = m.end() + next = rawdata[j:j+1] + if next == ">": + return j + 1 + if next == "/": + if rawdata.startswith("/>", j): + return j + 2 + if rawdata.startswith("/", j): + # buffer boundary + return -1 + # else bogus input + self.updatepos(i, j + 1) + self.error("malformed empty start tag") + if next == "": + # end of input + return -1 + if next in ("abcdefghijklmnopqrstuvwxyz=/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"): + # end of input in or before attribute value, or we have the + # '/' from a '/>' ending + return -1 + if j > i: + return j + else: + return i + 1 + raise AssertionError("we should not get here!") + + # Internal -- parse endtag, return end or -1 if incomplete + def parse_endtag(self, i): + rawdata = self.rawdata + assert rawdata[i:i+2] == " + if not match: + return -1 + gtpos = match.end() + match = endtagfind.match(rawdata, i) # + if not match: + if self.cdata_elem is not None: + self.handle_data(rawdata[i:gtpos]) + return gtpos + # find the name: w3.org/TR/html5/tokenization.html#tag-name-state + namematch = tagfind_tolerant.match(rawdata, i+2) + if not namematch: + # w3.org/TR/html5/tokenization.html#end-tag-open-state + if rawdata[i:i+3] == '': + return i+3 + else: + return self.parse_bogus_comment(i) + tagname = namematch.group().lower() + # consume and ignore other stuff between the name and the > + # Note: this is not 100% correct, since we might have things like + # , but looking for > after tha name should cover + # most of the cases and is much simpler + gtpos = rawdata.find('>', namematch.end()) + self.handle_endtag(tagname) + return gtpos+1 + + elem = match.group(1).lower() # script or style + if self.cdata_elem is not None: + if elem != self.cdata_elem: + self.handle_data(rawdata[i:gtpos]) + return gtpos + + self.handle_endtag(elem) + self.clear_cdata_mode() + return gtpos + + # Overridable -- finish processing of start+end tag: + def handle_startendtag(self, tag, attrs): + self.handle_starttag(tag, attrs) + self.handle_endtag(tag) + + # Overridable -- handle start tag + def handle_starttag(self, tag, attrs): + pass + + # Overridable -- handle end tag + def handle_endtag(self, tag): + pass + + # Overridable -- handle character reference + def handle_charref(self, name): + pass + + # Overridable -- handle entity reference + def handle_entityref(self, name): + pass + + # Overridable -- handle data + def handle_data(self, data): + pass + + # Overridable -- handle comment + def handle_comment(self, data): + pass + + # Overridable -- handle declaration + def handle_decl(self, decl): + pass + + # Overridable -- handle processing instruction + def handle_pi(self, data): + pass + + def unknown_decl(self, data): + pass + + # Internal -- helper to remove special character quoting + entitydefs = None + def unescape(self, s): + if '&' not in s: + return s + def replaceEntities(s): + s = s.groups()[0] + try: + if s[0] == "#": + s = s[1:] + if s[0] in ['x','X']: + c = int(s[1:], 16) + else: + c = int(s) + return unichr(c) + except ValueError: + return '&#'+s+';' + else: + # Cannot use name2codepoint directly, because HTMLParser supports apos, + # which is not part of HTML 4 + import htmlentitydefs + if HTMLParser.entitydefs is None: + entitydefs = HTMLParser.entitydefs = {'apos':u"'"} + for k, v in htmlentitydefs.name2codepoint.iteritems(): + entitydefs[k] = unichr(v) + try: + return self.entitydefs[s] + except KeyError: + return '&'+s+';' + + return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));", replaceEntities, s) diff --git a/appvalidator/python/NOTICE.md b/appvalidator/python/NOTICE.md new file mode 100644 index 0000000..e6bedad --- /dev/null +++ b/appvalidator/python/NOTICE.md @@ -0,0 +1,6 @@ +Notice +====== + +The files in this directory are extracted from the CPython standard library. +They are included solely for compatibility purposes, and their respective +copyrights and licenses are held and controlled by the original developer(s). diff --git a/appvalidator/python/__init__.py b/appvalidator/python/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/appvalidator/python/copy.py b/appvalidator/python/copy.py new file mode 100644 index 0000000..c227a2e --- /dev/null +++ b/appvalidator/python/copy.py @@ -0,0 +1,433 @@ +"""Generic (shallow and deep) copying operations. + +Interface summary: + + import copy + + x = copy.copy(y) # make a shallow copy of y + x = copy.deepcopy(y) # make a deep copy of y + +For module specific errors, copy.Error is raised. + +The difference between shallow and deep copying is only relevant for +compound objects (objects that contain other objects, like lists or +class instances). + +- A shallow copy constructs a new compound object and then (to the + extent possible) inserts *the same objects* into it that the + original contains. + +- A deep copy constructs a new compound object and then, recursively, + inserts *copies* into it of the objects found in the original. + +Two problems often exist with deep copy operations that don't exist +with shallow copy operations: + + a) recursive objects (compound objects that, directly or indirectly, + contain a reference to themselves) may cause a recursive loop + + b) because deep copy copies *everything* it may copy too much, e.g. + administrative data structures that should be shared even between + copies + +Python's deep copy operation avoids these problems by: + + a) keeping a table of objects already copied during the current + copying pass + + b) letting user-defined classes override the copying operation or the + set of components copied + +This version does not copy types like module, class, function, method, +nor stack trace, stack frame, nor file, socket, window, nor array, nor +any similar types. + +Classes can use the same interfaces to control copying that they use +to control pickling: they can define methods called __getinitargs__(), +__getstate__() and __setstate__(). See the documentation for module +"pickle" for information on these methods. +""" + +import types +import weakref +from copy_reg import dispatch_table + +class Error(Exception): + pass +error = Error # backward compatibility + +try: + from org.python.core import PyStringMap +except ImportError: + PyStringMap = None + +__all__ = ["Error", "copy", "deepcopy"] + +def copy(x): + """Shallow copy operation on arbitrary Python objects. + + See the module's __doc__ string for more info. + """ + + cls = type(x) + + copier = _copy_dispatch.get(cls) + if copier: + return copier(x) + + copier = getattr(cls, "__copy__", None) + if copier: + return copier(x) + + reductor = dispatch_table.get(cls) + if reductor: + rv = reductor(x) + else: + reductor = getattr(x, "__reduce_ex__", None) + if reductor: + rv = reductor(2) + else: + reductor = getattr(x, "__reduce__", None) + if reductor: + rv = reductor() + else: + raise Error("un(shallow)copyable object of type %s" % cls) + + return _reconstruct(x, rv, 0) + + +_copy_dispatch = d = {} + +def _copy_immutable(x): + return x +for t in (type(None), int, long, float, bool, str, tuple, + frozenset, type, xrange, types.ClassType, + types.BuiltinFunctionType, type(Ellipsis), + types.FunctionType, weakref.ref): + d[t] = _copy_immutable +for name in ("ComplexType", "UnicodeType", "CodeType"): + t = getattr(types, name, None) + if t is not None: + d[t] = _copy_immutable + +def _copy_with_constructor(x): + return type(x)(x) +for t in (list, dict, set): + d[t] = _copy_with_constructor + +def _copy_with_copy_method(x): + return x.copy() +if PyStringMap is not None: + d[PyStringMap] = _copy_with_copy_method + +def _copy_inst(x): + if hasattr(x, '__copy__'): + return x.__copy__() + if hasattr(x, '__getinitargs__'): + args = x.__getinitargs__() + y = x.__class__(*args) + else: + y = _EmptyClass() + y.__class__ = x.__class__ + if hasattr(x, '__getstate__'): + state = x.__getstate__() + else: + state = x.__dict__ + if hasattr(y, '__setstate__'): + y.__setstate__(state) + else: + y.__dict__.update(state) + return y +d[types.InstanceType] = _copy_inst + +del d + +def deepcopy(x, memo=None, _nil=[]): + """Deep copy operation on arbitrary Python objects. + + See the module's __doc__ string for more info. + """ + + if memo is None: + memo = {} + + d = id(x) + y = memo.get(d, _nil) + if y is not _nil: + return y + + cls = type(x) + + copier = _deepcopy_dispatch.get(cls) + if copier: + y = copier(x, memo) + else: + try: + issc = issubclass(cls, type) + except TypeError: # cls is not a class (old Boost; see SF #502085) + issc = 0 + if issc: + y = _deepcopy_atomic(x, memo) + else: + copier = getattr(x, "__deepcopy__", None) + if copier: + y = copier(memo) + else: + reductor = dispatch_table.get(cls) + if reductor: + rv = reductor(x) + else: + reductor = getattr(x, "__reduce_ex__", None) + if reductor: + rv = reductor(2) + else: + reductor = getattr(x, "__reduce__", None) + if reductor: + rv = reductor() + else: + raise Error( + "un(deep)copyable object of type %s" % cls) + y = _reconstruct(x, rv, 1, memo) + + memo[d] = y + _keep_alive(x, memo) # Make sure x lives at least as long as d + return y + +_deepcopy_dispatch = d = {} + +def _deepcopy_atomic(x, memo): + return x +d[type(None)] = _deepcopy_atomic +d[type(Ellipsis)] = _deepcopy_atomic +d[int] = _deepcopy_atomic +d[long] = _deepcopy_atomic +d[float] = _deepcopy_atomic +d[bool] = _deepcopy_atomic +try: + d[complex] = _deepcopy_atomic +except NameError: + pass +d[str] = _deepcopy_atomic +try: + d[unicode] = _deepcopy_atomic +except NameError: + pass +try: + d[types.CodeType] = _deepcopy_atomic +except AttributeError: + pass +d[type] = _deepcopy_atomic +d[xrange] = _deepcopy_atomic +d[types.ClassType] = _deepcopy_atomic +d[types.BuiltinFunctionType] = _deepcopy_atomic +d[types.FunctionType] = _deepcopy_atomic +d[weakref.ref] = _deepcopy_atomic + +def _deepcopy_list(x, memo): + y = [] + memo[id(x)] = y + for a in x: + y.append(deepcopy(a, memo)) + return y +d[list] = _deepcopy_list + +def _deepcopy_tuple(x, memo): + y = [] + for a in x: + y.append(deepcopy(a, memo)) + d = id(x) + try: + return memo[d] + except KeyError: + pass + for i in range(len(x)): + if x[i] is not y[i]: + y = tuple(y) + break + else: + y = x + memo[d] = y + return y +d[tuple] = _deepcopy_tuple + +def _deepcopy_dict(x, memo): + y = {} + memo[id(x)] = y + for key, value in x.iteritems(): + y[deepcopy(key, memo)] = deepcopy(value, memo) + return y +d[dict] = _deepcopy_dict +if PyStringMap is not None: + d[PyStringMap] = _deepcopy_dict + +def _deepcopy_method(x, memo): # Copy instance methods + return type(x)(x.im_func, deepcopy(x.im_self, memo), x.im_class) +_deepcopy_dispatch[types.MethodType] = _deepcopy_method + +def _keep_alive(x, memo): + """Keeps a reference to the object x in the memo. + + Because we remember objects by their id, we have + to assure that possibly temporary objects are kept + alive by referencing them. + We store a reference at the id of the memo, which should + normally not be used unless someone tries to deepcopy + the memo itself... + """ + try: + memo[id(memo)].append(x) + except KeyError: + # aha, this is the first one :-) + memo[id(memo)]=[x] + +def _deepcopy_inst(x, memo): + if hasattr(x, '__deepcopy__'): + return x.__deepcopy__(memo) + if hasattr(x, '__getinitargs__'): + args = x.__getinitargs__() + args = deepcopy(args, memo) + y = x.__class__(*args) + else: + y = _EmptyClass() + y.__class__ = x.__class__ + memo[id(x)] = y + if hasattr(x, '__getstate__'): + state = x.__getstate__() + else: + state = x.__dict__ + state = deepcopy(state, memo) + if hasattr(y, '__setstate__'): + y.__setstate__(state) + else: + y.__dict__.update(state) + return y +d[types.InstanceType] = _deepcopy_inst + +def _reconstruct(x, info, deep, memo=None): + if isinstance(info, str): + return x + assert isinstance(info, tuple) + if memo is None: + memo = {} + n = len(info) + assert n in (2, 3, 4, 5) + callable, args = info[:2] + if n > 2: + state = info[2] + else: + state = {} + if n > 3: + listiter = info[3] + else: + listiter = None + if n > 4: + dictiter = info[4] + else: + dictiter = None + if deep: + args = deepcopy(args, memo) + y = callable(*args) + memo[id(x)] = y + + if state: + if deep: + state = deepcopy(state, memo) + if hasattr(y, '__setstate__'): + y.__setstate__(state) + else: + if isinstance(state, tuple) and len(state) == 2: + state, slotstate = state + else: + slotstate = None + if state is not None: + y.__dict__.update(state) + if slotstate is not None: + for key, value in slotstate.iteritems(): + setattr(y, key, value) + + if listiter is not None: + for item in listiter: + if deep: + item = deepcopy(item, memo) + y.append(item) + if dictiter is not None: + for key, value in dictiter: + if deep: + key = deepcopy(key, memo) + value = deepcopy(value, memo) + y[key] = value + return y + +del d + +del types + +# Helper for instance creation without calling __init__ +class _EmptyClass: + pass + +def _test(): + l = [None, 1, 2L, 3.14, 'xyzzy', (1, 2L), [3.14, 'abc'], + {'abc': 'ABC'}, (), [], {}] + l1 = copy(l) + print l1==l + l1 = map(copy, l) + print l1==l + l1 = deepcopy(l) + print l1==l + class C: + def __init__(self, arg=None): + self.a = 1 + self.arg = arg + if __name__ == '__main__': + import sys + file = sys.argv[0] + else: + file = __file__ + self.fp = open(file) + self.fp.close() + def __getstate__(self): + return {'a': self.a, 'arg': self.arg} + def __setstate__(self, state): + for key, value in state.iteritems(): + setattr(self, key, value) + def __deepcopy__(self, memo=None): + new = self.__class__(deepcopy(self.arg, memo)) + new.a = self.a + return new + c = C('argument sketch') + l.append(c) + l2 = copy(l) + print l == l2 + print l + print l2 + l2 = deepcopy(l) + print l == l2 + print l + print l2 + l.append({l[1]: l, 'xyz': l[2]}) + l3 = copy(l) + import repr + print map(repr.repr, l) + print map(repr.repr, l1) + print map(repr.repr, l2) + print map(repr.repr, l3) + l3 = deepcopy(l) + import repr + print map(repr.repr, l) + print map(repr.repr, l1) + print map(repr.repr, l2) + print map(repr.repr, l3) + class odict(dict): + def __init__(self, d = {}): + self.a = 99 + dict.__init__(self, d) + def __setitem__(self, k, i): + dict.__setitem__(self, k, i) + self.a + o = odict({"A" : "B"}) + x = deepcopy(o) + print(o, x) + +if __name__ == '__main__': + _test() diff --git a/appvalidator/python/copy_reg.py b/appvalidator/python/copy_reg.py new file mode 100644 index 0000000..db17150 --- /dev/null +++ b/appvalidator/python/copy_reg.py @@ -0,0 +1,201 @@ +"""Helper to provide extensibility for pickle/cPickle. + +This is only useful to add pickle support for extension types defined in +C, not for instances of user-defined classes. +""" + +from types import ClassType as _ClassType + +__all__ = ["pickle", "constructor", + "add_extension", "remove_extension", "clear_extension_cache"] + +dispatch_table = {} + +def pickle(ob_type, pickle_function, constructor_ob=None): + if type(ob_type) is _ClassType: + raise TypeError("copy_reg is not intended for use with classes") + + if not hasattr(pickle_function, '__call__'): + raise TypeError("reduction functions must be callable") + dispatch_table[ob_type] = pickle_function + + # The constructor_ob function is a vestige of safe for unpickling. + # There is no reason for the caller to pass it anymore. + if constructor_ob is not None: + constructor(constructor_ob) + +def constructor(object): + if not hasattr(object, '__call__'): + raise TypeError("constructors must be callable") + +# Example: provide pickling support for complex numbers. + +try: + complex +except NameError: + pass +else: + + def pickle_complex(c): + return complex, (c.real, c.imag) + + pickle(complex, pickle_complex, complex) + +# Support for pickling new-style objects + +def _reconstructor(cls, base, state): + if base is object: + obj = object.__new__(cls) + else: + obj = base.__new__(cls, state) + if base.__init__ != object.__init__: + base.__init__(obj, state) + return obj + +_HEAPTYPE = 1<<9 + +# Python code for object.__reduce_ex__ for protocols 0 and 1 + +def _reduce_ex(self, proto): + assert proto < 2 + for base in self.__class__.__mro__: + if hasattr(base, '__flags__') and not base.__flags__ & _HEAPTYPE: + break + else: + base = object # not really reachable + if base is object: + state = None + else: + if base is self.__class__: + raise TypeError, "can't pickle %s objects" % base.__name__ + state = base(self) + args = (self.__class__, base, state) + try: + getstate = self.__getstate__ + except AttributeError: + if getattr(self, "__slots__", None): + raise TypeError("a class that defines __slots__ without " + "defining __getstate__ cannot be pickled") + try: + dict = self.__dict__ + except AttributeError: + dict = None + else: + dict = getstate() + if dict: + return _reconstructor, args, dict + else: + return _reconstructor, args + +# Helper for __reduce_ex__ protocol 2 + +def __newobj__(cls, *args): + return cls.__new__(cls, *args) + +def _slotnames(cls): + """Return a list of slot names for a given class. + + This needs to find slots defined by the class and its bases, so we + can't simply return the __slots__ attribute. We must walk down + the Method Resolution Order and concatenate the __slots__ of each + class found there. (This assumes classes don't modify their + __slots__ attribute to misrepresent their slots after the class is + defined.) + """ + + # Get the value from a cache in the class if possible + names = cls.__dict__.get("__slotnames__") + if names is not None: + return names + + # Not cached -- calculate the value + names = [] + if not hasattr(cls, "__slots__"): + # This class has no slots + pass + else: + # Slots found -- gather slot names from all base classes + for c in cls.__mro__: + if "__slots__" in c.__dict__: + slots = c.__dict__['__slots__'] + # if class has a single slot, it can be given as a string + if isinstance(slots, basestring): + slots = (slots,) + for name in slots: + # special descriptors + if name in ("__dict__", "__weakref__"): + continue + # mangled names + elif name.startswith('__') and not name.endswith('__'): + names.append('_%s%s' % (c.__name__, name)) + else: + names.append(name) + + # Cache the outcome in the class if at all possible + try: + cls.__slotnames__ = names + except: + pass # But don't die if we can't + + return names + +# A registry of extension codes. This is an ad-hoc compression +# mechanism. Whenever a global reference to , is about +# to be pickled, the (, ) tuple is looked up here to see +# if it is a registered extension code for it. Extension codes are +# universal, so that the meaning of a pickle does not depend on +# context. (There are also some codes reserved for local use that +# don't have this restriction.) Codes are positive ints; 0 is +# reserved. + +_extension_registry = {} # key -> code +_inverted_registry = {} # code -> key +_extension_cache = {} # code -> object +# Don't ever rebind those names: cPickle grabs a reference to them when +# it's initialized, and won't see a rebinding. + +def add_extension(module, name, code): + """Register an extension code.""" + code = int(code) + if not 1 <= code <= 0x7fffffff: + raise ValueError, "code out of range" + key = (module, name) + if (_extension_registry.get(key) == code and + _inverted_registry.get(code) == key): + return # Redundant registrations are benign + if key in _extension_registry: + raise ValueError("key %s is already registered with code %s" % + (key, _extension_registry[key])) + if code in _inverted_registry: + raise ValueError("code %s is already in use for key %s" % + (code, _inverted_registry[code])) + _extension_registry[key] = code + _inverted_registry[code] = key + +def remove_extension(module, name, code): + """Unregister an extension code. For testing only.""" + key = (module, name) + if (_extension_registry.get(key) != code or + _inverted_registry.get(code) != key): + raise ValueError("key %s is not registered with code %s" % + (key, code)) + del _extension_registry[key] + del _inverted_registry[code] + if code in _extension_cache: + del _extension_cache[code] + +def clear_extension_cache(): + _extension_cache.clear() + +# Standard extension code assignments + +# Reserved ranges + +# First Last Count Purpose +# 1 127 127 Reserved for Python standard library +# 128 191 64 Reserved for Zope +# 192 239 48 Reserved for 3rd parties +# 240 255 16 Reserved for private use (will never be assigned) +# 256 Inf Inf Reserved for future assignment + +# Extension codes are assigned by the Python Software Foundation. diff --git a/appvalidator/specs/webapps.py b/appvalidator/specs/webapps.py index e91485c..e695ea3 100644 --- a/appvalidator/specs/webapps.py +++ b/appvalidator/specs/webapps.py @@ -1,8 +1,9 @@ -import copy import simplejson as json import types import urlparse +import appvalidator.python.copy as copy + from ..constants import DESCRIPTION_TYPES from ..specprocessor import Spec, LITERAL_TYPE diff --git a/appvalidator/submain.py b/appvalidator/submain.py index 75bed7f..48938ea 100644 --- a/appvalidator/submain.py +++ b/appvalidator/submain.py @@ -67,6 +67,14 @@ def prepare_package(err, path, timeout=None): return err +def write_zip_error(err): + return err.error( + err_id=("submain", "badzipfile"), + error="Corrupt ZIP file", + description="We were unable to decompress all or part of the zip " + "file.") + + def test_package(err, file_, name): """Begins tests for the package.""" @@ -80,10 +88,7 @@ def test_package(err, file_, name): error="The package could not be opened.") except (BadZipfile, zlib_error): # Die if the zip file is corrupt. - return err.error( - err_id=("submain", "_load_install_rdf", "badzipfile"), - error="Corrupt ZIP file", - description="We were unable to decompress the zip file.") + return write_zip_error(err) try: output = test_inner_package(err, package) @@ -109,8 +114,11 @@ def test_inner_package(err, package): err.set_tier(tier) # Iterate through each test of our detected type. - for test in testcases._get_tests(tier): - test(err, package) + try: + for test in testcases._get_tests(tier): + test(err, package) + except (BadZipfile, zlib_error): + write_zip_error(err) # Return any errors at the end of the tier if undetermined. if err.failed(fail_on_warnings=False) and not err.determined: diff --git a/appvalidator/testcases/javascript/spidermonkey.py b/appvalidator/testcases/javascript/spidermonkey.py index 5241703..ea61ba7 100644 --- a/appvalidator/testcases/javascript/spidermonkey.py +++ b/appvalidator/testcases/javascript/spidermonkey.py @@ -105,11 +105,19 @@ def _get_tree(code, shell=SPIDERMONKEY_INSTALLATION): try: cmd = [shell, "-e", data, "-U"] shell_obj = subprocess.Popen( - cmd, shell=False, - stderr=subprocess.PIPE, - stdout=subprocess.PIPE) + cmd, shell=False, stderr=subprocess.PIPE, stdout=subprocess.PIPE) data, stderr = shell_obj.communicate() + # Spidermonkey dropped the -U flag on 29 Oct 2012 + if stderr and ("Invalid short option: -U" in stderr or + "usage: js [options] [scriptfile]" in stderr): + cmd.remove("-U") + shell_obj = subprocess.Popen( + cmd, shell=False, + stderr=subprocess.PIPE, stdout=subprocess.PIPE) + + data, stderr = shell_obj.communicate() + if stderr: raise RuntimeError('Error calling %r: %s' % (cmd, stderr)) diff --git a/appvalidator/testcases/markup/markuptester.py b/appvalidator/testcases/markup/markuptester.py index d18278e..bf7e9b3 100644 --- a/appvalidator/testcases/markup/markuptester.py +++ b/appvalidator/testcases/markup/markuptester.py @@ -7,7 +7,7 @@ from . import csstester from appvalidator.contextgenerator import ContextGenerator from appvalidator.constants import * from appvalidator.csp import warn as message_csp -from patchedhtmlparser import htmlparser, PatchedHTMLParser +from appvalidator.python.HTMLParser import HTMLParser DEBUG = False @@ -25,11 +25,11 @@ DOM_MUTATION_HANDLERS = set([ "ondomnoderemovedfromdocument", "ondomsubtreemodified", ]) -class MarkupParser(PatchedHTMLParser): +class MarkupParser(HTMLParser): """Parse and analyze the versious components of markup files.""" def __init__(self, err, strict=True, debug=False): - PatchedHTMLParser.__init__(self) + HTMLParser.__init__(self) self.err = err self.is_jetpack = "is_jetpack" in err.metadata # Cache this value. self.line = 0 diff --git a/appvalidator/testcases/markup/patchedhtmlparser.py b/appvalidator/testcases/markup/patchedhtmlparser.py deleted file mode 100644 index be9075b..0000000 --- a/appvalidator/testcases/markup/patchedhtmlparser.py +++ /dev/null @@ -1,97 +0,0 @@ -import re - -try: - import HTMLParser as htmlparser -except ImportError: # pragma: no cover - import html.parser as htmlparser - -interesting_cdata = re.compile(r'<(/|\Z)') - - -class PatchedHTMLParser(htmlparser.HTMLParser): - """ - A version of the Python HTML parser that includes the fixes bundled with - the latest versions of Python. - """ - - def __init__(self, *args, **kwargs): - htmlparser.HTMLParser.__init__(self, *args, **kwargs) - # Added as a patch for various Python HTMLParser issues. - self.cdata_tag = None - - # Code to fix for Python issue 670664 - def parse_starttag(self, i): - self.__starttag_text = None - endpos = self.check_for_whole_start_tag(i) - if endpos < 0: - return endpos - rawdata = self.rawdata - self.__starttag_text = rawdata[i:endpos] - - # Now parse the data between i+1 and j into a tag and attrs - attrs = [] - match = htmlparser.tagfind.match(rawdata, i+1) - assert match, 'unexpected call to parse_starttag()' - k = match.end() - self.lasttag = tag = rawdata[i+1:k].lower() - - while k < endpos: - m = htmlparser.attrfind.match(rawdata, k) - if not m: - break - attrname, rest, attrvalue = m.group(1, 2, 3) - if not rest: - attrvalue = None - elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ - attrvalue[:1] == '"' == attrvalue[-1:]: - attrvalue = attrvalue[1:-1] - attrvalue = self.unescape(attrvalue) - attrs.append((attrname.lower(), attrvalue)) - k = m.end() - - end = rawdata[k:endpos].strip() - if end not in (">", "/>"): - lineno, offset = self.getpos() - if "\n" in self.__starttag_text: - lineno = lineno + self.__starttag_text.count("\n") - offset = len(self.__starttag_text) \ - - self.__starttag_text.rfind("\n") - else: - offset = offset + len(self.__starttag_text) - self.error("junk characters in start tag: %r" - % (rawdata[k:endpos][:20],)) - if end.endswith('/>'): - # XHTML-style empty tag: - self.handle_startendtag(tag, attrs) - else: - self.handle_starttag(tag, attrs) - if tag in self.CDATA_CONTENT_ELEMENTS: - self.set_cdata_mode(tag) - return endpos - - def parse_endtag(self, i): - rawdata = self.rawdata - assert rawdata[i:i+2] == " - if not match: - return -1 - j = match.end() - match = htmlparser.endtagfind.match(rawdata, i) # - if not match: - if self.cdata_tag is not None: - self.handle_data(rawdata[i:j]) - return j - self.error("bad end tag: %r" % (rawdata[i:j],)) - tag = match.group(1).strip() - - if self.cdata_tag is not None and tag.lower() != self.cdata_tag: - self.handle_data(rawdata[i:j]) - return j - - self.handle_endtag(tag.lower()) - self.clear_cdata_mode() - return j - - def set_cdata_mode(self, tag): - self.interesting = interesting_cdata - self.cdata_tag = None diff --git a/tests/resources/controlchars/controlchars_utf-8_ok.js b/tests/resources/controlchars/controlchars_utf-8_ok.js deleted file mode 100644 index b832fba..0000000 --- a/tests/resources/controlchars/controlchars_utf-8_ok.js +++ /dev/null @@ -1 +0,0 @@ -function täst() {} diff --git a/tests/resources/controlchars/controlchars_utf-8_warn.js b/tests/resources/controlchars/controlchars_utf-8_warn.js deleted file mode 100644 index 8c3c6d7..0000000 Binary files a/tests/resources/controlchars/controlchars_utf-8_warn.js and /dev/null differ diff --git a/tests/resources/corrupt.xpi b/tests/resources/corrupt.xpi index be8e900..67ed879 100644 Binary files a/tests/resources/corrupt.xpi and b/tests/resources/corrupt.xpi differ diff --git a/tests/resources/markup/markuptester/bad.xml b/tests/resources/markup/markuptester/bad.xml deleted file mode 100644 index cfc14dd..0000000 --- a/tests/resources/markup/markuptester/bad.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - >> - \ No newline at end of file diff --git a/tests/resources/markup/markuptester/bad_script.xml b/tests/resources/markup/markuptester/bad_script.xml deleted file mode 100644 index 0b3c12b..0000000 --- a/tests/resources/markup/markuptester/bad_script.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/tests/test_controlchars.py b/tests/test_controlchars.py index 23fc6d2..95da743 100644 --- a/tests/test_controlchars.py +++ b/tests/test_controlchars.py @@ -35,22 +35,6 @@ class TestControlChars(TestCase): self.assert_failed(with_warnings=True) eq_(self.err.warnings[0]["id"][2], "syntax_error") - def test_controlchars_utf8_ok(self): - """Test that multi-byte characters are decoded properly (utf-8).""" - - self.run_test("tests/resources/controlchars/controlchars_utf-8_ok.js") - self.assert_silent() - - def test_controlchars_utf8_warn(self): - """ - Tests that multi-byte characters are decoded properly (utf-8) but remaining - non-ASCII characters raise warnings. - """ - - self.run_test("tests/resources/controlchars/controlchars_utf-8_warn.js") - self.assert_failed(with_warnings=True) - eq_(self.err.warnings[0]["id"][2], "syntax_error") - @raises(JSONDecodeError) def test_controlchar_in_webapp(self): """ diff --git a/tests/test_markup_markuptester.py b/tests/test_markup_markuptester.py index 667a617..06aec46 100644 --- a/tests/test_markup_markuptester.py +++ b/tests/test_markup_markuptester.py @@ -156,16 +156,6 @@ def test_html_ignore_comment(): _test_xul("tests/resources/markup/markuptester/ignore_comments.html") -def test_invalid_markup(): - "Tests an markup file that is simply broken." - - result = _test_xul("tests/resources/markup/markuptester/bad.xml", True) - assert result.warnings - result = _test_xul("tests/resources/markup/markuptester/bad_script.xml", - False) - assert result.notices - - def test_bad_encoding(): """Test that bad encodings don't cause the parser to fail.""" _test_xul("tests/resources/markup/encoding.txt") diff --git a/tests/test_submain_package.py b/tests/test_submain_package.py index 2fa0960..ffcf5e4 100644 --- a/tests/test_submain_package.py +++ b/tests/test_submain_package.py @@ -32,14 +32,4 @@ class TestSubmainPackage(TestCase): with open(name) as pack: result = submain.test_package(self.err, pack, name) - self.assert_failed() - - def test_package_corrupt(self): - "Tests the test_package function fails with a corrupt file" - - self.setup_err() - - name = "tests/resources/corrupt.xpi" - result = submain.test_package(self.err, name, name) - - self.assert_failed(with_errors=True, with_warnings=True) + assert self.err.errors diff --git a/tests/test_xpimanager.py b/tests/test_xpimanager.py index 662d812..fd6ee57 100644 --- a/tests/test_xpimanager.py +++ b/tests/test_xpimanager.py @@ -72,15 +72,3 @@ class TestBadZipFile(TestCase): def test_missing_file(self): """Tests that the XPI manager correctly reports a missing XPI file.""" ZipPackage("foo.bar") - - def test_corrupt_zip(self): - """Tests that the XPI manager correctly reports a missing XPI file.""" - x = ZipPackage(get_path("corrupt.xpi")) - try: - x.read("install.rdf") - except Exception: - pass - else: - raise "Exception should have been raised on corrupt file access." - - assert "install.rdf" in x.broken_files