Python 2.7.3 and JS 1.8.5 updates

2013-05-23 14:58:02 -07:00 · 2013-05-23 14:58:02 -07:00 · 63e375e3cd
--- a/appvalidator/python/HTMLParser.py
+++ b/appvalidator/python/HTMLParser.py
@ -0,0 +1,472 @@
+"""A parser for HTML and XHTML."""
+
+# This file is based on sgmllib.py, but the API is slightly different.
+
+# XXX There should be a way to distinguish between PCDATA (parsed
+# character data -- the normal case), RCDATA (replaceable character
+# data -- only char and entity references and end tags are special)
+# and CDATA (character data -- only end tags are special).
+
+
+import markupbase
+import re
+
+# Regular expressions used for parsing
+
+interesting_normal = re.compile('[&<]')
+incomplete = re.compile('&[a-zA-Z#]')
+
+entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
+charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
+
+starttagopen = re.compile('<[a-zA-Z]')
+piclose = re.compile('>')
+commentclose = re.compile(r'--\s*>')
+tagfind = re.compile('([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*')
+# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
+# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
+tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\x00]*')
+
+attrfind = re.compile(
+    r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*'
+    r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*')
+
+locatestarttagend = re.compile(r"""
+  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
+  (?:[\s/]*                          # optional whitespace before attribute name
+    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
+      (?:\s*=+\s*                    # value indicator
+        (?:'[^']*'                   # LITA-enclosed value
+          |"[^"]*"                   # LIT-enclosed value
+          |(?!['"])[^>\s]*           # bare value
+         )
+       )?(?:\s|/(?!>))*
+     )*
+   )?
+  \s*                                # trailing whitespace
+""", re.VERBOSE)
+endendtag = re.compile('>')
+# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between
+# </ and the tag name, so maybe this should be fixed
+endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
+
+
+class HTMLParseError(Exception):
+    """Exception raised for all parse errors."""
+
+    def __init__(self, msg, position=(None, None)):
+        assert msg
+        self.msg = msg
+        self.lineno = position[0]
+        self.offset = position[1]
+
+    def __str__(self):
+        result = self.msg
+        if self.lineno is not None:
+            result = result + ", at line %d" % self.lineno
+        if self.offset is not None:
+            result = result + ", column %d" % (self.offset + 1)
+        return result
+
+
+class HTMLParser(markupbase.ParserBase):
+    """Find tags and other markup and call handler functions.
+
+    Usage:
+        p = HTMLParser()
+        p.feed(data)
+        ...
+        p.close()
+
+    Start tags are handled by calling self.handle_starttag() or
+    self.handle_startendtag(); end tags by self.handle_endtag().  The
+    data between tags is passed from the parser to the derived class
+    by calling self.handle_data() with the data as argument (the data
+    may be split up in arbitrary chunks).  Entity references are
+    passed by calling self.handle_entityref() with the entity
+    reference as the argument.  Numeric character references are
+    passed to self.handle_charref() with the string containing the
+    reference as the argument.
+    """
+
+    CDATA_CONTENT_ELEMENTS = ("script", "style")
+
+
+    def __init__(self):
+        """Initialize and reset this instance."""
+        self.reset()
+
+    def reset(self):
+        """Reset this instance.  Loses all unprocessed data."""
+        self.rawdata = ''
+        self.lasttag = '???'
+        self.interesting = interesting_normal
+        self.cdata_elem = None
+        markupbase.ParserBase.reset(self)
+
+    def feed(self, data):
+        r"""Feed data to the parser.
+
+        Call this as often as you want, with as little or as much text
+        as you want (may include '\n').
+        """
+        self.rawdata = self.rawdata + data
+        self.goahead(0)
+
+    def close(self):
+        """Handle any buffered data."""
+        self.goahead(1)
+
+    def error(self, message):
+        raise HTMLParseError(message, self.getpos())
+
+    __starttag_text = None
+
+    def get_starttag_text(self):
+        """Return full source of start tag: '<...>'."""
+        return self.__starttag_text
+
+    def set_cdata_mode(self, elem):
+        self.cdata_elem = elem.lower()
+        self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
+
+    def clear_cdata_mode(self):
+        self.interesting = interesting_normal
+        self.cdata_elem = None
+
+    # Internal -- handle data as far as reasonable.  May leave state
+    # and data to be processed by a subsequent call.  If 'end' is
+    # true, force handling all data as if followed by EOF marker.
+    def goahead(self, end):
+        rawdata = self.rawdata
+        i = 0
+        n = len(rawdata)
+        while i < n:
+            match = self.interesting.search(rawdata, i) # < or &
+            if match:
+                j = match.start()
+            else:
+                if self.cdata_elem:
+                    break
+                j = n
+            if i < j: self.handle_data(rawdata[i:j])
+            i = self.updatepos(i, j)
+            if i == n: break
+            startswith = rawdata.startswith
+            if startswith('<', i):
+                if starttagopen.match(rawdata, i): # < + letter
+                    k = self.parse_starttag(i)
+                elif startswith("</", i):
+                    k = self.parse_endtag(i)
+                elif startswith("<!--", i):
+                    k = self.parse_comment(i)
+                elif startswith("<?", i):
+                    k = self.parse_pi(i)
+                elif startswith("<!", i):
+                    k = self.parse_html_declaration(i)
+                elif (i + 1) < n:
+                    self.handle_data("<")
+                    k = i + 1
+                else:
+                    break
+                if k < 0:
+                    if not end:
+                        break
+                    k = rawdata.find('>', i + 1)
+                    if k < 0:
+                        k = rawdata.find('<', i + 1)
+                        if k < 0:
+                            k = i + 1
+                    else:
+                        k += 1
+                    self.handle_data(rawdata[i:k])
+                i = self.updatepos(i, k)
+            elif startswith("&#", i):
+                match = charref.match(rawdata, i)
+                if match:
+                    name = match.group()[2:-1]
+                    self.handle_charref(name)
+                    k = match.end()
+                    if not startswith(';', k-1):
+                        k = k - 1
+                    i = self.updatepos(i, k)
+                    continue
+                else:
+                    if ";" in rawdata[i:]: #bail by consuming &#
+                        self.handle_data(rawdata[0:2])
+                        i = self.updatepos(i, 2)
+                    break
+            elif startswith('&', i):
+                match = entityref.match(rawdata, i)
+                if match:
+                    name = match.group(1)
+                    self.handle_entityref(name)
+                    k = match.end()
+                    if not startswith(';', k-1):
+                        k = k - 1
+                    i = self.updatepos(i, k)
+                    continue
+                match = incomplete.match(rawdata, i)
+                if match:
+                    # match.group() will contain at least 2 chars
+                    if end and match.group() == rawdata[i:]:
+                        self.error("EOF in middle of entity or char ref")
+                    # incomplete
+                    break
+                elif (i + 1) < n:
+                    # not the end of the buffer, and can't be confused
+                    # with some other construct
+                    self.handle_data("&")
+                    i = self.updatepos(i, i + 1)
+                else:
+                    break
+            else:
+                assert 0, "interesting.search() lied"
+        # end while
+        if end and i < n and not self.cdata_elem:
+            self.handle_data(rawdata[i:n])
+            i = self.updatepos(i, n)
+        self.rawdata = rawdata[i:]
+
+    # Internal -- parse html declarations, return length or -1 if not terminated
+    # See w3.org/TR/html5/tokenization.html#markup-declaration-open-state
+    # See also parse_declaration in _markupbase
+    def parse_html_declaration(self, i):
+        rawdata = self.rawdata
+        if rawdata[i:i+2] != '<!':
+            self.error('unexpected call to parse_html_declaration()')
+        if rawdata[i:i+4] == '<!--':
+            # this case is actually already handled in goahead()
+            return self.parse_comment(i)
+        elif rawdata[i:i+3] == '<![':
+            return self.parse_marked_section(i)
+        elif rawdata[i:i+9].lower() == '<!doctype':
+            # find the closing >
+            gtpos = rawdata.find('>', i+9)
+            if gtpos == -1:
+                return -1
+            self.handle_decl(rawdata[i+2:gtpos])
+            return gtpos+1
+        else:
+            return self.parse_bogus_comment(i)
+
+    # Internal -- parse bogus comment, return length or -1 if not terminated
+    # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
+    def parse_bogus_comment(self, i, report=1):
+        rawdata = self.rawdata
+        if rawdata[i:i+2] not in ('<!', '</'):
+            self.error('unexpected call to parse_comment()')
+        pos = rawdata.find('>', i+2)
+        if pos == -1:
+            return -1
+        if report:
+            self.handle_comment(rawdata[i+2:pos])
+        return pos + 1
+
+    # Internal -- parse processing instr, return end or -1 if not terminated
+    def parse_pi(self, i):
+        rawdata = self.rawdata
+        assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
+        match = piclose.search(rawdata, i+2) # >
+        if not match:
+            return -1
+        j = match.start()
+        self.handle_pi(rawdata[i+2: j])
+        j = match.end()
+        return j
+
+    # Internal -- handle starttag, return end or -1 if not terminated
+    def parse_starttag(self, i):
+        self.__starttag_text = None
+        endpos = self.check_for_whole_start_tag(i)
+        if endpos < 0:
+            return endpos
+        rawdata = self.rawdata
+        self.__starttag_text = rawdata[i:endpos]
+
+        # Now parse the data between i+1 and j into a tag and attrs
+        attrs = []
+        match = tagfind.match(rawdata, i+1)
+        assert match, 'unexpected call to parse_starttag()'
+        k = match.end()
+        self.lasttag = tag = match.group(1).lower()
+
+        while k < endpos:
+            m = attrfind.match(rawdata, k)
+            if not m:
+                break
+            attrname, rest, attrvalue = m.group(1, 2, 3)
+            if not rest:
+                attrvalue = None
+            elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
+                 attrvalue[:1] == '"' == attrvalue[-1:]:
+                attrvalue = attrvalue[1:-1]
+            if attrvalue:
+                attrvalue = self.unescape(attrvalue)
+            attrs.append((attrname.lower(), attrvalue))
+            k = m.end()
+
+        end = rawdata[k:endpos].strip()
+        if end not in (">", "/>"):
+            lineno, offset = self.getpos()
+            if "\n" in self.__starttag_text:
+                lineno = lineno + self.__starttag_text.count("\n")
+                offset = len(self.__starttag_text) \
+                         - self.__starttag_text.rfind("\n")
+            else:
+                offset = offset + len(self.__starttag_text)
+            self.handle_data(rawdata[i:endpos])
+            return endpos
+        if end.endswith('/>'):
+            # XHTML-style empty tag: <span attr="value" />
+            self.handle_startendtag(tag, attrs)
+        else:
+            self.handle_starttag(tag, attrs)
+            if tag in self.CDATA_CONTENT_ELEMENTS:
+                self.set_cdata_mode(tag)
+        return endpos
+
+    # Internal -- check to see if we have a complete starttag; return end
+    # or -1 if incomplete.
+    def check_for_whole_start_tag(self, i):
+        rawdata = self.rawdata
+        m = locatestarttagend.match(rawdata, i)
+        if m:
+            j = m.end()
+            next = rawdata[j:j+1]
+            if next == ">":
+                return j + 1
+            if next == "/":
+                if rawdata.startswith("/>", j):
+                    return j + 2
+                if rawdata.startswith("/", j):
+                    # buffer boundary
+                    return -1
+                # else bogus input
+                self.updatepos(i, j + 1)
+                self.error("malformed empty start tag")
+            if next == "":
+                # end of input
+                return -1
+            if next in ("abcdefghijklmnopqrstuvwxyz=/"
+                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
+                # end of input in or before attribute value, or we have the
+                # '/' from a '/>' ending
+                return -1
+            if j > i:
+                return j
+            else:
+                return i + 1
+        raise AssertionError("we should not get here!")
+
+    # Internal -- parse endtag, return end or -1 if incomplete
+    def parse_endtag(self, i):
+        rawdata = self.rawdata
+        assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
+        match = endendtag.search(rawdata, i+1) # >
+        if not match:
+            return -1
+        gtpos = match.end()
+        match = endtagfind.match(rawdata, i) # </ + tag + >
+        if not match:
+            if self.cdata_elem is not None:
+                self.handle_data(rawdata[i:gtpos])
+                return gtpos
+            # find the name: w3.org/TR/html5/tokenization.html#tag-name-state
+            namematch = tagfind_tolerant.match(rawdata, i+2)
+            if not namematch:
+                # w3.org/TR/html5/tokenization.html#end-tag-open-state
+                if rawdata[i:i+3] == '</>':
+                    return i+3
+                else:
+                    return self.parse_bogus_comment(i)
+            tagname = namematch.group().lower()
+            # consume and ignore other stuff between the name and the >
+            # Note: this is not 100% correct, since we might have things like
+            # </tag attr=">">, but looking for > after tha name should cover
+            # most of the cases and is much simpler
+            gtpos = rawdata.find('>', namematch.end())
+            self.handle_endtag(tagname)
+            return gtpos+1
+
+        elem = match.group(1).lower() # script or style
+        if self.cdata_elem is not None:
+            if elem != self.cdata_elem:
+                self.handle_data(rawdata[i:gtpos])
+                return gtpos
+
+        self.handle_endtag(elem)
+        self.clear_cdata_mode()
+        return gtpos
+
+    # Overridable -- finish processing of start+end tag: <tag.../>
+    def handle_startendtag(self, tag, attrs):
+        self.handle_starttag(tag, attrs)
+        self.handle_endtag(tag)
+
+    # Overridable -- handle start tag
+    def handle_starttag(self, tag, attrs):
+        pass
+
+    # Overridable -- handle end tag
+    def handle_endtag(self, tag):
+        pass
+
+    # Overridable -- handle character reference
+    def handle_charref(self, name):
+        pass
+
+    # Overridable -- handle entity reference
+    def handle_entityref(self, name):
+        pass
+
+    # Overridable -- handle data
+    def handle_data(self, data):
+        pass
+
+    # Overridable -- handle comment
+    def handle_comment(self, data):
+        pass
+
+    # Overridable -- handle declaration
+    def handle_decl(self, decl):
+        pass
+
+    # Overridable -- handle processing instruction
+    def handle_pi(self, data):
+        pass
+
+    def unknown_decl(self, data):
+        pass
+
+    # Internal -- helper to remove special character quoting
+    entitydefs = None
+    def unescape(self, s):
+        if '&' not in s:
+            return s
+        def replaceEntities(s):
+            s = s.groups()[0]
+            try:
+                if s[0] == "#":
+                    s = s[1:]
+                    if s[0] in ['x','X']:
+                        c = int(s[1:], 16)
+                    else:
+                        c = int(s)
+                    return unichr(c)
+            except ValueError:
+                return '&#'+s+';'
+            else:
+                # Cannot use name2codepoint directly, because HTMLParser supports apos,
+                # which is not part of HTML 4
+                import htmlentitydefs
+                if HTMLParser.entitydefs is None:
+                    entitydefs = HTMLParser.entitydefs = {'apos':u"'"}
+                    for k, v in htmlentitydefs.name2codepoint.iteritems():
+                        entitydefs[k] = unichr(v)
+                try:
+                    return self.entitydefs[s]
+                except KeyError:
+                    return '&'+s+';'
+
+        return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));", replaceEntities, s)
--- a/appvalidator/python/NOTICE.md
+++ b/appvalidator/python/NOTICE.md
@ -0,0 +1,6 @@
+Notice
+======
+
+The files in this directory are extracted from the CPython standard library.
+They are included solely for compatibility purposes, and their respective
+copyrights and licenses are held and controlled by the original developer(s).
--- a/appvalidator/python/init.py
+++ b/appvalidator/python/init.py
--- a/appvalidator/python/copy.py
+++ b/appvalidator/python/copy.py
@ -0,0 +1,433 @@
+"""Generic (shallow and deep) copying operations.
+
+Interface summary:
+
+        import copy
+
+        x = copy.copy(y)        # make a shallow copy of y
+        x = copy.deepcopy(y)    # make a deep copy of y
+
+For module specific errors, copy.Error is raised.
+
+The difference between shallow and deep copying is only relevant for
+compound objects (objects that contain other objects, like lists or
+class instances).
+
+- A shallow copy constructs a new compound object and then (to the
+  extent possible) inserts *the same objects* into it that the
+  original contains.
+
+- A deep copy constructs a new compound object and then, recursively,
+  inserts *copies* into it of the objects found in the original.
+
+Two problems often exist with deep copy operations that don't exist
+with shallow copy operations:
+
+ a) recursive objects (compound objects that, directly or indirectly,
+    contain a reference to themselves) may cause a recursive loop
+
+ b) because deep copy copies *everything* it may copy too much, e.g.
+    administrative data structures that should be shared even between
+    copies
+
+Python's deep copy operation avoids these problems by:
+
+ a) keeping a table of objects already copied during the current
+    copying pass
+
+ b) letting user-defined classes override the copying operation or the
+    set of components copied
+
+This version does not copy types like module, class, function, method,
+nor stack trace, stack frame, nor file, socket, window, nor array, nor
+any similar types.
+
+Classes can use the same interfaces to control copying that they use
+to control pickling: they can define methods called __getinitargs__(),
+__getstate__() and __setstate__().  See the documentation for module
+"pickle" for information on these methods.
+"""
+
+import types
+import weakref
+from copy_reg import dispatch_table
+
+class Error(Exception):
+    pass
+error = Error   # backward compatibility
+
+try:
+    from org.python.core import PyStringMap
+except ImportError:
+    PyStringMap = None
+
+__all__ = ["Error", "copy", "deepcopy"]
+
+def copy(x):
+    """Shallow copy operation on arbitrary Python objects.
+
+    See the module's __doc__ string for more info.
+    """
+
+    cls = type(x)
+
+    copier = _copy_dispatch.get(cls)
+    if copier:
+        return copier(x)
+
+    copier = getattr(cls, "__copy__", None)
+    if copier:
+        return copier(x)
+
+    reductor = dispatch_table.get(cls)
+    if reductor:
+        rv = reductor(x)
+    else:
+        reductor = getattr(x, "__reduce_ex__", None)
+        if reductor:
+            rv = reductor(2)
+        else:
+            reductor = getattr(x, "__reduce__", None)
+            if reductor:
+                rv = reductor()
+            else:
+                raise Error("un(shallow)copyable object of type %s" % cls)
+
+    return _reconstruct(x, rv, 0)
+
+
+_copy_dispatch = d = {}
+
+def _copy_immutable(x):
+    return x
+for t in (type(None), int, long, float, bool, str, tuple,
+          frozenset, type, xrange, types.ClassType,
+          types.BuiltinFunctionType, type(Ellipsis),
+          types.FunctionType, weakref.ref):
+    d[t] = _copy_immutable
+for name in ("ComplexType", "UnicodeType", "CodeType"):
+    t = getattr(types, name, None)
+    if t is not None:
+        d[t] = _copy_immutable
+
+def _copy_with_constructor(x):
+    return type(x)(x)
+for t in (list, dict, set):
+    d[t] = _copy_with_constructor
+
+def _copy_with_copy_method(x):
+    return x.copy()
+if PyStringMap is not None:
+    d[PyStringMap] = _copy_with_copy_method
+
+def _copy_inst(x):
+    if hasattr(x, '__copy__'):
+        return x.__copy__()
+    if hasattr(x, '__getinitargs__'):
+        args = x.__getinitargs__()
+        y = x.__class__(*args)
+    else:
+        y = _EmptyClass()
+        y.__class__ = x.__class__
+    if hasattr(x, '__getstate__'):
+        state = x.__getstate__()
+    else:
+        state = x.__dict__
+    if hasattr(y, '__setstate__'):
+        y.__setstate__(state)
+    else:
+        y.__dict__.update(state)
+    return y
+d[types.InstanceType] = _copy_inst
+
+del d
+
+def deepcopy(x, memo=None, _nil=[]):
+    """Deep copy operation on arbitrary Python objects.
+
+    See the module's __doc__ string for more info.
+    """
+
+    if memo is None:
+        memo = {}
+
+    d = id(x)
+    y = memo.get(d, _nil)
+    if y is not _nil:
+        return y
+
+    cls = type(x)
+
+    copier = _deepcopy_dispatch.get(cls)
+    if copier:
+        y = copier(x, memo)
+    else:
+        try:
+            issc = issubclass(cls, type)
+        except TypeError: # cls is not a class (old Boost; see SF #502085)
+            issc = 0
+        if issc:
+            y = _deepcopy_atomic(x, memo)
+        else:
+            copier = getattr(x, "__deepcopy__", None)
+            if copier:
+                y = copier(memo)
+            else:
+                reductor = dispatch_table.get(cls)
+                if reductor:
+                    rv = reductor(x)
+                else:
+                    reductor = getattr(x, "__reduce_ex__", None)
+                    if reductor:
+                        rv = reductor(2)
+                    else:
+                        reductor = getattr(x, "__reduce__", None)
+                        if reductor:
+                            rv = reductor()
+                        else:
+                            raise Error(
+                                "un(deep)copyable object of type %s" % cls)
+                y = _reconstruct(x, rv, 1, memo)
+
+    memo[d] = y
+    _keep_alive(x, memo) # Make sure x lives at least as long as d
+    return y
+
+_deepcopy_dispatch = d = {}
+
+def _deepcopy_atomic(x, memo):
+    return x
+d[type(None)] = _deepcopy_atomic
+d[type(Ellipsis)] = _deepcopy_atomic
+d[int] = _deepcopy_atomic
+d[long] = _deepcopy_atomic
+d[float] = _deepcopy_atomic
+d[bool] = _deepcopy_atomic
+try:
+    d[complex] = _deepcopy_atomic
+except NameError:
+    pass
+d[str] = _deepcopy_atomic
+try:
+    d[unicode] = _deepcopy_atomic
+except NameError:
+    pass
+try:
+    d[types.CodeType] = _deepcopy_atomic
+except AttributeError:
+    pass
+d[type] = _deepcopy_atomic
+d[xrange] = _deepcopy_atomic
+d[types.ClassType] = _deepcopy_atomic
+d[types.BuiltinFunctionType] = _deepcopy_atomic
+d[types.FunctionType] = _deepcopy_atomic
+d[weakref.ref] = _deepcopy_atomic
+
+def _deepcopy_list(x, memo):
+    y = []
+    memo[id(x)] = y
+    for a in x:
+        y.append(deepcopy(a, memo))
+    return y
+d[list] = _deepcopy_list
+
+def _deepcopy_tuple(x, memo):
+    y = []
+    for a in x:
+        y.append(deepcopy(a, memo))
+    d = id(x)
+    try:
+        return memo[d]
+    except KeyError:
+        pass
+    for i in range(len(x)):
+        if x[i] is not y[i]:
+            y = tuple(y)
+            break
+    else:
+        y = x
+    memo[d] = y
+    return y
+d[tuple] = _deepcopy_tuple
+
+def _deepcopy_dict(x, memo):
+    y = {}
+    memo[id(x)] = y
+    for key, value in x.iteritems():
+        y[deepcopy(key, memo)] = deepcopy(value, memo)
+    return y
+d[dict] = _deepcopy_dict
+if PyStringMap is not None:
+    d[PyStringMap] = _deepcopy_dict
+
+def _deepcopy_method(x, memo): # Copy instance methods
+    return type(x)(x.im_func, deepcopy(x.im_self, memo), x.im_class)
+_deepcopy_dispatch[types.MethodType] = _deepcopy_method
+
+def _keep_alive(x, memo):
+    """Keeps a reference to the object x in the memo.
+
+    Because we remember objects by their id, we have
+    to assure that possibly temporary objects are kept
+    alive by referencing them.
+    We store a reference at the id of the memo, which should
+    normally not be used unless someone tries to deepcopy
+    the memo itself...
+    """
+    try:
+        memo[id(memo)].append(x)
+    except KeyError:
+        # aha, this is the first one :-)
+        memo[id(memo)]=[x]
+
+def _deepcopy_inst(x, memo):
+    if hasattr(x, '__deepcopy__'):
+        return x.__deepcopy__(memo)
+    if hasattr(x, '__getinitargs__'):
+        args = x.__getinitargs__()
+        args = deepcopy(args, memo)
+        y = x.__class__(*args)
+    else:
+        y = _EmptyClass()
+        y.__class__ = x.__class__
+    memo[id(x)] = y
+    if hasattr(x, '__getstate__'):
+        state = x.__getstate__()
+    else:
+        state = x.__dict__
+    state = deepcopy(state, memo)
+    if hasattr(y, '__setstate__'):
+        y.__setstate__(state)
+    else:
+        y.__dict__.update(state)
+    return y
+d[types.InstanceType] = _deepcopy_inst
+
+def _reconstruct(x, info, deep, memo=None):
+    if isinstance(info, str):
+        return x
+    assert isinstance(info, tuple)
+    if memo is None:
+        memo = {}
+    n = len(info)
+    assert n in (2, 3, 4, 5)
+    callable, args = info[:2]
+    if n > 2:
+        state = info[2]
+    else:
+        state = {}
+    if n > 3:
+        listiter = info[3]
+    else:
+        listiter = None
+    if n > 4:
+        dictiter = info[4]
+    else:
+        dictiter = None
+    if deep:
+        args = deepcopy(args, memo)
+    y = callable(*args)
+    memo[id(x)] = y
+
+    if state:
+        if deep:
+            state = deepcopy(state, memo)
+        if hasattr(y, '__setstate__'):
+            y.__setstate__(state)
+        else:
+            if isinstance(state, tuple) and len(state) == 2:
+                state, slotstate = state
+            else:
+                slotstate = None
+            if state is not None:
+                y.__dict__.update(state)
+            if slotstate is not None:
+                for key, value in slotstate.iteritems():
+                    setattr(y, key, value)
+
+    if listiter is not None:
+        for item in listiter:
+            if deep:
+                item = deepcopy(item, memo)
+            y.append(item)
+    if dictiter is not None:
+        for key, value in dictiter:
+            if deep:
+                key = deepcopy(key, memo)
+                value = deepcopy(value, memo)
+            y[key] = value
+    return y
+
+del d
+
+del types
+
+# Helper for instance creation without calling __init__
+class _EmptyClass:
+    pass
+
+def _test():
+    l = [None, 1, 2L, 3.14, 'xyzzy', (1, 2L), [3.14, 'abc'],
+         {'abc': 'ABC'}, (), [], {}]
+    l1 = copy(l)
+    print l1==l
+    l1 = map(copy, l)
+    print l1==l
+    l1 = deepcopy(l)
+    print l1==l
+    class C:
+        def __init__(self, arg=None):
+            self.a = 1
+            self.arg = arg
+            if __name__ == '__main__':
+                import sys
+                file = sys.argv[0]
+            else:
+                file = __file__
+            self.fp = open(file)
+            self.fp.close()
+        def __getstate__(self):
+            return {'a': self.a, 'arg': self.arg}
+        def __setstate__(self, state):
+            for key, value in state.iteritems():
+                setattr(self, key, value)
+        def __deepcopy__(self, memo=None):
+            new = self.__class__(deepcopy(self.arg, memo))
+            new.a = self.a
+            return new
+    c = C('argument sketch')
+    l.append(c)
+    l2 = copy(l)
+    print l == l2
+    print l
+    print l2
+    l2 = deepcopy(l)
+    print l == l2
+    print l
+    print l2
+    l.append({l[1]: l, 'xyz': l[2]})
+    l3 = copy(l)
+    import repr
+    print map(repr.repr, l)
+    print map(repr.repr, l1)
+    print map(repr.repr, l2)
+    print map(repr.repr, l3)
+    l3 = deepcopy(l)
+    import repr
+    print map(repr.repr, l)
+    print map(repr.repr, l1)
+    print map(repr.repr, l2)
+    print map(repr.repr, l3)
+    class odict(dict):
+        def __init__(self, d = {}):
+            self.a = 99
+            dict.__init__(self, d)
+        def __setitem__(self, k, i):
+            dict.__setitem__(self, k, i)
+            self.a
+    o = odict({"A" : "B"})
+    x = deepcopy(o)
+    print(o, x)
+
+if __name__ == '__main__':
+    _test()
--- a/appvalidator/python/copy_reg.py
+++ b/appvalidator/python/copy_reg.py
@ -0,0 +1,201 @@
+"""Helper to provide extensibility for pickle/cPickle.
+
+This is only useful to add pickle support for extension types defined in
+C, not for instances of user-defined classes.
+"""
+
+from types import ClassType as _ClassType
+
+__all__ = ["pickle", "constructor",
+           "add_extension", "remove_extension", "clear_extension_cache"]
+
+dispatch_table = {}
+
+def pickle(ob_type, pickle_function, constructor_ob=None):
+    if type(ob_type) is _ClassType:
+        raise TypeError("copy_reg is not intended for use with classes")
+
+    if not hasattr(pickle_function, '__call__'):
+        raise TypeError("reduction functions must be callable")
+    dispatch_table[ob_type] = pickle_function
+
+    # The constructor_ob function is a vestige of safe for unpickling.
+    # There is no reason for the caller to pass it anymore.
+    if constructor_ob is not None:
+        constructor(constructor_ob)
+
+def constructor(object):
+    if not hasattr(object, '__call__'):
+        raise TypeError("constructors must be callable")
+
+# Example: provide pickling support for complex numbers.
+
+try:
+    complex
+except NameError:
+    pass
+else:
+
+    def pickle_complex(c):
+        return complex, (c.real, c.imag)
+
+    pickle(complex, pickle_complex, complex)
+
+# Support for pickling new-style objects
+
+def _reconstructor(cls, base, state):
+    if base is object:
+        obj = object.__new__(cls)
+    else:
+        obj = base.__new__(cls, state)
+        if base.__init__ != object.__init__:
+            base.__init__(obj, state)
+    return obj
+
+_HEAPTYPE = 1<<9
+
+# Python code for object.__reduce_ex__ for protocols 0 and 1
+
+def _reduce_ex(self, proto):
+    assert proto < 2
+    for base in self.__class__.__mro__:
+        if hasattr(base, '__flags__') and not base.__flags__ & _HEAPTYPE:
+            break
+    else:
+        base = object # not really reachable
+    if base is object:
+        state = None
+    else:
+        if base is self.__class__:
+            raise TypeError, "can't pickle %s objects" % base.__name__
+        state = base(self)
+    args = (self.__class__, base, state)
+    try:
+        getstate = self.__getstate__
+    except AttributeError:
+        if getattr(self, "__slots__", None):
+            raise TypeError("a class that defines __slots__ without "
+                            "defining __getstate__ cannot be pickled")
+        try:
+            dict = self.__dict__
+        except AttributeError:
+            dict = None
+    else:
+        dict = getstate()
+    if dict:
+        return _reconstructor, args, dict
+    else:
+        return _reconstructor, args
+
+# Helper for __reduce_ex__ protocol 2
+
+def __newobj__(cls, *args):
+    return cls.__new__(cls, *args)
+
+def _slotnames(cls):
+    """Return a list of slot names for a given class.
+
+    This needs to find slots defined by the class and its bases, so we
+    can't simply return the __slots__ attribute.  We must walk down
+    the Method Resolution Order and concatenate the __slots__ of each
+    class found there.  (This assumes classes don't modify their
+    __slots__ attribute to misrepresent their slots after the class is
+    defined.)
+    """
+
+    # Get the value from a cache in the class if possible
+    names = cls.__dict__.get("__slotnames__")
+    if names is not None:
+        return names
+
+    # Not cached -- calculate the value
+    names = []
+    if not hasattr(cls, "__slots__"):
+        # This class has no slots
+        pass
+    else:
+        # Slots found -- gather slot names from all base classes
+        for c in cls.__mro__:
+            if "__slots__" in c.__dict__:
+                slots = c.__dict__['__slots__']
+                # if class has a single slot, it can be given as a string
+                if isinstance(slots, basestring):
+                    slots = (slots,)
+                for name in slots:
+                    # special descriptors
+                    if name in ("__dict__", "__weakref__"):
+                        continue
+                    # mangled names
+                    elif name.startswith('__') and not name.endswith('__'):
+                        names.append('_%s%s' % (c.__name__, name))
+                    else:
+                        names.append(name)
+
+    # Cache the outcome in the class if at all possible
+    try:
+        cls.__slotnames__ = names
+    except:
+        pass # But don't die if we can't
+
+    return names
+
+# A registry of extension codes.  This is an ad-hoc compression
+# mechanism.  Whenever a global reference to <module>, <name> is about
+# to be pickled, the (<module>, <name>) tuple is looked up here to see
+# if it is a registered extension code for it.  Extension codes are
+# universal, so that the meaning of a pickle does not depend on
+# context.  (There are also some codes reserved for local use that
+# don't have this restriction.)  Codes are positive ints; 0 is
+# reserved.
+
+_extension_registry = {}                # key -> code
+_inverted_registry = {}                 # code -> key
+_extension_cache = {}                   # code -> object
+# Don't ever rebind those names:  cPickle grabs a reference to them when
+# it's initialized, and won't see a rebinding.
+
+def add_extension(module, name, code):
+    """Register an extension code."""
+    code = int(code)
+    if not 1 <= code <= 0x7fffffff:
+        raise ValueError, "code out of range"
+    key = (module, name)
+    if (_extension_registry.get(key) == code and
+        _inverted_registry.get(code) == key):
+        return # Redundant registrations are benign
+    if key in _extension_registry:
+        raise ValueError("key %s is already registered with code %s" %
+                         (key, _extension_registry[key]))
+    if code in _inverted_registry:
+        raise ValueError("code %s is already in use for key %s" %
+                         (code, _inverted_registry[code]))
+    _extension_registry[key] = code
+    _inverted_registry[code] = key
+
+def remove_extension(module, name, code):
+    """Unregister an extension code.  For testing only."""
+    key = (module, name)
+    if (_extension_registry.get(key) != code or
+        _inverted_registry.get(code) != key):
+        raise ValueError("key %s is not registered with code %s" %
+                         (key, code))
+    del _extension_registry[key]
+    del _inverted_registry[code]
+    if code in _extension_cache:
+        del _extension_cache[code]
+
+def clear_extension_cache():
+    _extension_cache.clear()
+
+# Standard extension code assignments
+
+# Reserved ranges
+
+# First  Last Count  Purpose
+#     1   127   127  Reserved for Python standard library
+#   128   191    64  Reserved for Zope
+#   192   239    48  Reserved for 3rd parties
+#   240   255    16  Reserved for private use (will never be assigned)
+#   256   Inf   Inf  Reserved for future assignment
+
+# Extension codes are assigned by the Python Software Foundation.
--- a/appvalidator/specs/webapps.py
+++ b/appvalidator/specs/webapps.py
@ -1,8 +1,9 @@
-import copy
 import simplejson as json
 import types
 import urlparse

+import appvalidator.python.copy as copy
+
 from ..constants import DESCRIPTION_TYPES
 from ..specprocessor import Spec, LITERAL_TYPE

--- a/appvalidator/submain.py
+++ b/appvalidator/submain.py
@ -67,6 +67,14 @@ def prepare_package(err, path, timeout=None):
    return err


+def write_zip_error(err):
+    return err.error(
+            err_id=("submain", "badzipfile"),
+            error="Corrupt ZIP file",
+            description="We were unable to decompress all or part of the zip "
+                        "file.")
+
+
 def test_package(err, file_, name):
    """Begins tests for the package."""

@ -80,10 +88,7 @@ def test_package(err, file_, name):
            error="The package could not be opened.")
    except (BadZipfile, zlib_error):
        # Die if the zip file is corrupt.
-        return err.error(
-            err_id=("submain", "_load_install_rdf", "badzipfile"),
-            error="Corrupt ZIP file",
-            description="We were unable to decompress the zip file.")
+        return write_zip_error(err)

    try:
        output = test_inner_package(err, package)
@ -109,8 +114,11 @@ def test_inner_package(err, package):
        err.set_tier(tier)

        # Iterate through each test of our detected type.
-        for test in testcases._get_tests(tier):
-            test(err, package)
+        try:
+            for test in testcases._get_tests(tier):
+                test(err, package)
+        except (BadZipfile, zlib_error):
+            write_zip_error(err)

        # Return any errors at the end of the tier if undetermined.
        if err.failed(fail_on_warnings=False) and not err.determined:
--- a/appvalidator/testcases/javascript/spidermonkey.py
+++ b/appvalidator/testcases/javascript/spidermonkey.py
@ -105,11 +105,19 @@ def _get_tree(code, shell=SPIDERMONKEY_INSTALLATION):
    try:
        cmd = [shell, "-e", data, "-U"]
        shell_obj = subprocess.Popen(
-            cmd, shell=False,
-            stderr=subprocess.PIPE,
-            stdout=subprocess.PIPE)
+            cmd, shell=False, stderr=subprocess.PIPE, stdout=subprocess.PIPE)

        data, stderr = shell_obj.communicate()
+        # Spidermonkey dropped the -U flag on 29 Oct 2012
+        if stderr and ("Invalid short option: -U" in stderr or
+                       "usage: js [options] [scriptfile]" in stderr):
+            cmd.remove("-U")
+            shell_obj = subprocess.Popen(
+                cmd, shell=False,
+                stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+
+            data, stderr = shell_obj.communicate()
+
        if stderr:
            raise RuntimeError('Error calling %r: %s' % (cmd, stderr))

--- a/appvalidator/testcases/markup/markuptester.py
+++ b/appvalidator/testcases/markup/markuptester.py
@ -7,7 +7,7 @@ from . import csstester
 from appvalidator.contextgenerator import ContextGenerator
 from appvalidator.constants import *
 from appvalidator.csp import warn as message_csp
-from patchedhtmlparser import htmlparser, PatchedHTMLParser
+from appvalidator.python.HTMLParser import HTMLParser


 DEBUG = False
@ -25,11 +25,11 @@ DOM_MUTATION_HANDLERS = set([
        "ondomnoderemovedfromdocument", "ondomsubtreemodified", ])


-class MarkupParser(PatchedHTMLParser):
+class MarkupParser(HTMLParser):
    """Parse and analyze the versious components of markup files."""

    def __init__(self, err, strict=True, debug=False):
-        PatchedHTMLParser.__init__(self)
+        HTMLParser.__init__(self)
        self.err = err
        self.is_jetpack = "is_jetpack" in err.metadata  # Cache this value.
        self.line = 0
--- a/appvalidator/testcases/markup/patchedhtmlparser.py
+++ b/appvalidator/testcases/markup/patchedhtmlparser.py
@ -1,97 +0,0 @@
-import re
-
-try:
-    import HTMLParser as htmlparser
-except ImportError:  # pragma: no cover
-    import html.parser as htmlparser
-
-interesting_cdata = re.compile(r'<(/|\Z)')
-
-
-class PatchedHTMLParser(htmlparser.HTMLParser):
-    """
-    A version of the Python HTML parser that includes the fixes bundled with
-    the latest versions of Python.
-    """
-
-    def __init__(self, *args, **kwargs):
-        htmlparser.HTMLParser.__init__(self, *args, **kwargs)
-        # Added as a patch for various Python HTMLParser issues.
-        self.cdata_tag = None
-
-    # Code to fix for Python issue 670664
-    def parse_starttag(self, i):
-        self.__starttag_text = None
-        endpos = self.check_for_whole_start_tag(i)
-        if endpos < 0:
-            return endpos
-        rawdata = self.rawdata
-        self.__starttag_text = rawdata[i:endpos]
-
-        # Now parse the data between i+1 and j into a tag and attrs
-        attrs = []
-        match = htmlparser.tagfind.match(rawdata, i+1)
-        assert match, 'unexpected call to parse_starttag()'
-        k = match.end()
-        self.lasttag = tag = rawdata[i+1:k].lower()
-
-        while k < endpos:
-            m = htmlparser.attrfind.match(rawdata, k)
-            if not m:
-                break
-            attrname, rest, attrvalue = m.group(1, 2, 3)
-            if not rest:
-                attrvalue = None
-            elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
-                 attrvalue[:1] == '"' == attrvalue[-1:]:
-                attrvalue = attrvalue[1:-1]
-                attrvalue = self.unescape(attrvalue)
-            attrs.append((attrname.lower(), attrvalue))
-            k = m.end()
-
-        end = rawdata[k:endpos].strip()
-        if end not in (">", "/>"):
-            lineno, offset = self.getpos()
-            if "\n" in self.__starttag_text:
-                lineno = lineno + self.__starttag_text.count("\n")
-                offset = len(self.__starttag_text) \
-                         - self.__starttag_text.rfind("\n")
-            else:
-                offset = offset + len(self.__starttag_text)
-            self.error("junk characters in start tag: %r"
-                       % (rawdata[k:endpos][:20],))
-        if end.endswith('/>'):
-            # XHTML-style empty tag: <span attr="value" />
-            self.handle_startendtag(tag, attrs)
-        else:
-            self.handle_starttag(tag, attrs)
-            if tag in self.CDATA_CONTENT_ELEMENTS:
-                self.set_cdata_mode(tag)
-        return endpos
-
-    def parse_endtag(self, i):
-        rawdata = self.rawdata
-        assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
-        match = htmlparser.endendtag.search(rawdata, i+1) # >
-        if not match:
-            return -1
-        j = match.end()
-        match = htmlparser.endtagfind.match(rawdata, i) # </ + tag + >
-        if not match:
-            if self.cdata_tag is not None:
-                self.handle_data(rawdata[i:j])
-                return j
-            self.error("bad end tag: %r" % (rawdata[i:j],))
-        tag = match.group(1).strip()
-
-        if self.cdata_tag is not None and tag.lower() != self.cdata_tag:
-            self.handle_data(rawdata[i:j])
-            return j
-
-        self.handle_endtag(tag.lower())
-        self.clear_cdata_mode()
-        return j
-
-    def set_cdata_mode(self, tag):
-        self.interesting = interesting_cdata
-        self.cdata_tag = None
--- a/tests/resources/controlchars/controlchars_utf-8_ok.js
+++ b/tests/resources/controlchars/controlchars_utf-8_ok.js
@ -1 +0,0 @@
-function täst() {}
--- a/tests/resources/controlchars/controlchars_utf-8_warn.js
+++ b/tests/resources/controlchars/controlchars_utf-8_warn.js
--- a/tests/resources/corrupt.xpi
+++ b/tests/resources/corrupt.xpi
--- a/tests/resources/markup/markuptester/bad.xml
+++ b/tests/resources/markup/markuptester/bad.xml
@ -1,4 +0,0 @@
-<?xml version="1.0"?>
-<xml>
-	<thisisa<broken<>>>
-</xml>
--- a/tests/resources/markup/markuptester/bad_script.xml
+++ b/tests/resources/markup/markuptester/bad_script.xml
@ -1,7 +0,0 @@
-<?xml version="1.0"?>
-<xml>
-	<!-- There should be comments in the script tag. -->
-	<testscript>
-		<whatever
-	</testscript>
-</xml>
--- a/tests/test_controlchars.py
+++ b/tests/test_controlchars.py
@ -35,22 +35,6 @@ class TestControlChars(TestCase):
        self.assert_failed(with_warnings=True)
        eq_(self.err.warnings[0]["id"][2], "syntax_error")

-    def test_controlchars_utf8_ok(self):
-        """Test that multi-byte characters are decoded properly (utf-8)."""
-
-        self.run_test("tests/resources/controlchars/controlchars_utf-8_ok.js")
-        self.assert_silent()
-
-    def test_controlchars_utf8_warn(self):
-        """
-        Tests that multi-byte characters are decoded properly (utf-8) but remaining
-        non-ASCII characters raise warnings.
-        """
-
-        self.run_test("tests/resources/controlchars/controlchars_utf-8_warn.js")
-        self.assert_failed(with_warnings=True)
-        eq_(self.err.warnings[0]["id"][2], "syntax_error")
-
    @raises(JSONDecodeError)
    def test_controlchar_in_webapp(self):
        """
--- a/tests/test_markup_markuptester.py
+++ b/tests/test_markup_markuptester.py
@ -156,16 +156,6 @@ def test_html_ignore_comment():
    _test_xul("tests/resources/markup/markuptester/ignore_comments.html")


-def test_invalid_markup():
-    "Tests an markup file that is simply broken."
-
-    result = _test_xul("tests/resources/markup/markuptester/bad.xml", True)
-    assert result.warnings
-    result = _test_xul("tests/resources/markup/markuptester/bad_script.xml",
-                       False)
-    assert result.notices
-
-
 def test_bad_encoding():
    """Test that bad encodings don't cause the parser to fail."""
    _test_xul("tests/resources/markup/encoding.txt")
--- a/tests/test_submain_package.py
+++ b/tests/test_submain_package.py
@ -32,14 +32,4 @@ class TestSubmainPackage(TestCase):
        with open(name) as pack:
            result = submain.test_package(self.err, pack, name)

-        self.assert_failed()
-
-    def test_package_corrupt(self):
-        "Tests the test_package function fails with a corrupt file"
-
-        self.setup_err()
-
-        name = "tests/resources/corrupt.xpi"
-        result = submain.test_package(self.err, name, name)
-
-        self.assert_failed(with_errors=True, with_warnings=True)
+        assert self.err.errors
--- a/tests/test_xpimanager.py
+++ b/tests/test_xpimanager.py
@ -72,15 +72,3 @@ class TestBadZipFile(TestCase):
    def test_missing_file(self):
        """Tests that the XPI manager correctly reports a missing XPI file."""
        ZipPackage("foo.bar")
-
-    def test_corrupt_zip(self):
-        """Tests that the XPI manager correctly reports a missing XPI file."""
-        x = ZipPackage(get_path("corrupt.xpi"))
-        try:
-            x.read("install.rdf")
-        except Exception:
-            pass
-        else:
-            raise "Exception should have been raised on corrupt file access."
-
-        assert "install.rdf" in x.broken_files