add pyquery and lxml dependency
This commit is contained in:
Родитель
b2ffa2aec1
Коммит
77b286a3b0
|
@ -5,3 +5,5 @@ Jinja2==2.5.5
|
|||
hmac==20101005
|
||||
hashlib==20081119
|
||||
py-bcrypt==0.2
|
||||
|
||||
lxml==3.2.1
|
||||
|
|
|
@ -11,6 +11,7 @@ Sphinx==1.0.7
|
|||
nose==1.0.0
|
||||
-e git://github.com/jbalogh/django-nose.git#egg=django_nose
|
||||
-e git://github.com/jbalogh/test-utils.git#egg=test-utils
|
||||
pyquery==1.0
|
||||
|
||||
# L10n
|
||||
translate-toolkit==1.8.0
|
||||
|
|
|
@ -0,0 +1,122 @@
|
|||
Metadata-Version: 1.0
|
||||
Name: pyquery
|
||||
Version: 1.0
|
||||
Summary: A jquery-like library for python
|
||||
Home-page: http://www.bitbucket.org/olauzanne/pyquery/
|
||||
Author: Olivier Lauzanne
|
||||
Author-email: olauzanne@gmail.com
|
||||
License: BSD
|
||||
Description:
|
||||
pyquery: a jquery-like library for python
|
||||
=========================================
|
||||
|
||||
pyquery allows you to make jquery queries on xml documents.
|
||||
The API is as much as possible the similar to jquery. pyquery uses lxml for fast
|
||||
xml and html manipulation.
|
||||
|
||||
This is not (or at least not yet) a library to produce or interact with
|
||||
javascript code. I just liked the jquery API and I missed it in python so I
|
||||
told myself "Hey let's make jquery in python". This is the result.
|
||||
|
||||
It can be used for many purposes, one idea that I might try in the future is to
|
||||
use it for templating with pure http templates that you modify using pyquery.
|
||||
I can also be used for web scrapping or for theming applications with
|
||||
`Deliverance`_.
|
||||
|
||||
The `project`_ is being actively developped on a mercurial repository on
|
||||
Bitbucket. I have the policy of giving push access to anyone who wants it
|
||||
and then to review what he does. So if you want to contribute just email me.
|
||||
|
||||
Please report bugs on the `bitbucket
|
||||
<http://bitbucket.org/olauzanne/pyquery/issues?status=new&status=open>`_ issue
|
||||
tracker.
|
||||
|
||||
.. _deliverance: http://www.gawel.org/weblog/en/2008/12/skinning-with-pyquery-and-deliverance
|
||||
.. _project: http://www.bitbucket.org/olauzanne/pyquery/
|
||||
|
||||
Quickstart
|
||||
==========
|
||||
|
||||
You can use the PyQuery class to load an xml document from a string, a lxml
|
||||
document, from a file or from an url::
|
||||
|
||||
>>> from pyquery import PyQuery as pq
|
||||
>>> from lxml import etree
|
||||
>>> import urllib
|
||||
>>> d = pq("<html></html>")
|
||||
>>> d = pq(etree.fromstring("<html></html>"))
|
||||
>>> d = pq(url='http://google.com/')
|
||||
>>> # d = pq(url='http://google.com/', opener=lambda url: urllib.urlopen(url).read())
|
||||
>>> d = pq(filename=path_to_html_file)
|
||||
|
||||
Now d is like the $ in jquery::
|
||||
|
||||
>>> d("#hello")
|
||||
[<p#hello.hello>]
|
||||
>>> p = d("#hello")
|
||||
>>> print(p.html())
|
||||
Hello world !
|
||||
>>> p.html("you know <a href='http://python.org/'>Python</a> rocks")
|
||||
[<p#hello.hello>]
|
||||
>>> print(p.html())
|
||||
you know <a href="http://python.org/">Python</a> rocks
|
||||
>>> print(p.text())
|
||||
you know Python rocks
|
||||
|
||||
You can use some of the pseudo classes that are available in jQuery but that
|
||||
are not standard in css such as :first :last :even :odd :eq :lt :gt :checked
|
||||
:selected :file::
|
||||
|
||||
>>> d('p:first')
|
||||
[<p#hello.hello>]
|
||||
|
||||
|
||||
|
||||
See http://packages.python.org/pyquery/ for the full documentation
|
||||
|
||||
News
|
||||
====
|
||||
|
||||
1.0
|
||||
---
|
||||
fix issues 24
|
||||
|
||||
0.7
|
||||
---
|
||||
|
||||
Python 3 compatible
|
||||
|
||||
Add __unicode__ method
|
||||
|
||||
Add root and encoding attribute
|
||||
|
||||
fix issues 19, 20, 22, 23
|
||||
|
||||
0.6.1
|
||||
------
|
||||
|
||||
Move README.txt at package root
|
||||
|
||||
Add CHANGES.txt and add it to long_description
|
||||
|
||||
0.6
|
||||
----
|
||||
|
||||
Added PyQuery.outerHtml
|
||||
|
||||
Added PyQuery.fn
|
||||
|
||||
Added PyQuery.map
|
||||
|
||||
Change PyQuery.each behavior to reflect jQuery api
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Keywords: jquery html xml
|
||||
Platform: UNKNOWN
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Programming Language :: Python :: 2
|
||||
Classifier: Programming Language :: Python :: 3
|
|
@ -0,0 +1,19 @@
|
|||
CHANGES.txt
|
||||
MANIFEST.in
|
||||
README.txt
|
||||
setup.cfg
|
||||
setup.py
|
||||
pyquery/__init__.py
|
||||
pyquery/ajax.py
|
||||
pyquery/cssselectpatch.py
|
||||
pyquery/pyquery.py
|
||||
pyquery/rules.py
|
||||
pyquery/test.py
|
||||
pyquery/tests.txt
|
||||
pyquery.egg-info/PKG-INFO
|
||||
pyquery.egg-info/SOURCES.txt
|
||||
pyquery.egg-info/dependency_links.txt
|
||||
pyquery.egg-info/entry_points.txt
|
||||
pyquery.egg-info/not-zip-safe
|
||||
pyquery.egg-info/requires.txt
|
||||
pyquery.egg-info/top_level.txt
|
|
@ -0,0 +1 @@
|
|||
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
# -*- Entry points: -*-
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
../pyquery/__init__.py
|
||||
../pyquery/ajax.py
|
||||
../pyquery/cssselectpatch.py
|
||||
../pyquery/pyquery.py
|
||||
../pyquery/rules.py
|
||||
../pyquery/test.py
|
||||
../pyquery/tests.txt
|
||||
../pyquery/__init__.pyc
|
||||
../pyquery/ajax.pyc
|
||||
../pyquery/cssselectpatch.pyc
|
||||
../pyquery/pyquery.pyc
|
||||
../pyquery/rules.pyc
|
||||
../pyquery/test.pyc
|
||||
./
|
||||
dependency_links.txt
|
||||
entry_points.txt
|
||||
not-zip-safe
|
||||
PKG-INFO
|
||||
requires.txt
|
||||
SOURCES.txt
|
||||
top_level.txt
|
|
@ -0,0 +1 @@
|
|||
|
|
@ -0,0 +1 @@
|
|||
lxml>=2.1
|
|
@ -0,0 +1 @@
|
|||
pyquery
|
|
@ -0,0 +1,15 @@
|
|||
#-*- coding:utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com>
|
||||
#
|
||||
# Distributed under the BSD license, see LICENSE.txt
|
||||
|
||||
import sys
|
||||
|
||||
try:
|
||||
import webob
|
||||
except ImportError:
|
||||
from .pyquery import PyQuery
|
||||
else:
|
||||
from .ajax import PyQuery
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import sys
|
||||
from .pyquery import PyQuery as Base
|
||||
from .pyquery import no_default
|
||||
|
||||
if sys.version_info < (3,):
|
||||
from webob import Request, Response
|
||||
|
||||
try:
|
||||
from paste.proxy import Proxy
|
||||
except ImportError:
|
||||
Proxy = no_default
|
||||
|
||||
class PyQuery(Base):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'response' in kwargs:
|
||||
self.response = kwargs.pop('response')
|
||||
else:
|
||||
self.response = Response()
|
||||
if 'app' in kwargs:
|
||||
self.app = kwargs.pop('app')
|
||||
if len(args) == 0:
|
||||
args = [[]]
|
||||
else:
|
||||
self.app = no_default
|
||||
Base.__init__(self, *args, **kwargs)
|
||||
if self._parent is not no_default:
|
||||
self.app = self._parent.app
|
||||
|
||||
def _wsgi_get(self, path_info, **kwargs):
|
||||
if path_info.startswith('/'):
|
||||
if 'app' in kwargs:
|
||||
app = kwargs.pop('app')
|
||||
elif self.app is not no_default:
|
||||
app = self.app
|
||||
else:
|
||||
raise ValueError('There is no app available')
|
||||
else:
|
||||
if Proxy is not no_default:
|
||||
app = Proxy(path_info)
|
||||
path_info = '/'
|
||||
else:
|
||||
raise ImportError('Paste is not installed')
|
||||
|
||||
if 'environ' in kwargs:
|
||||
environ = kwargs.pop('environ').copy()
|
||||
else:
|
||||
environ = {}
|
||||
if path_info:
|
||||
kwargs['PATH_INFO'] = path_info
|
||||
environ.update(kwargs)
|
||||
|
||||
# unsuported (came from Deliverance)
|
||||
for key in ['HTTP_ACCEPT_ENCODING', 'HTTP_IF_MATCH', 'HTTP_IF_UNMODIFIED_SINCE',
|
||||
'HTTP_RANGE', 'HTTP_IF_RANGE']:
|
||||
if key in environ:
|
||||
del environ[key]
|
||||
|
||||
req = Request(environ)
|
||||
resp = req.get_response(app)
|
||||
status = resp.status.split()
|
||||
ctype = resp.content_type.split(';')[0]
|
||||
if status[0] not in '45' and ctype == 'text/html':
|
||||
body = resp.body
|
||||
else:
|
||||
body = []
|
||||
result = self.__class__(body,
|
||||
parent=self._parent,
|
||||
app=self.app, # always return self.app
|
||||
response=resp)
|
||||
return result
|
||||
|
||||
def get(self, path_info, **kwargs):
|
||||
"""GET a path from wsgi app or url
|
||||
"""
|
||||
kwargs['REQUEST_METHOD'] = 'GET'
|
||||
return self._wsgi_get(path_info, **kwargs)
|
||||
|
||||
def post(self, path_info, **kwargs):
|
||||
"""POST a path from wsgi app or url
|
||||
"""
|
||||
kwargs['REQUEST_METHOD'] = 'POST'
|
||||
return self._wsgi_get(path_info, **kwargs)
|
|
@ -0,0 +1,244 @@
|
|||
#-*- coding:utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com>
|
||||
#
|
||||
# Distributed under the BSD license, see LICENSE.txt
|
||||
from lxml.cssselect import Pseudo, XPathExpr, XPathExprOr, Function, css_to_xpath, Element
|
||||
from lxml import cssselect
|
||||
|
||||
class JQueryPseudo(Pseudo):
|
||||
"""This class is used to implement the css pseudo classes
|
||||
(:first, :last, ...) that are not defined in the css standard,
|
||||
but are defined in the jquery API.
|
||||
"""
|
||||
def _xpath_first(self, xpath):
|
||||
"""Matches the first selected element.
|
||||
"""
|
||||
xpath.add_post_condition('position() = 1')
|
||||
return xpath
|
||||
|
||||
def _xpath_last(self, xpath):
|
||||
"""Matches the last selected element.
|
||||
"""
|
||||
xpath.add_post_condition('position() = last()')
|
||||
return xpath
|
||||
|
||||
def _xpath_even(self, xpath):
|
||||
"""Matches even elements, zero-indexed.
|
||||
"""
|
||||
# the first element is 1 in xpath and 0 in python and js
|
||||
xpath.add_post_condition('position() mod 2 = 1')
|
||||
return xpath
|
||||
|
||||
def _xpath_odd(self, xpath):
|
||||
"""Matches odd elements, zero-indexed.
|
||||
"""
|
||||
xpath.add_post_condition('position() mod 2 = 0')
|
||||
return xpath
|
||||
|
||||
def _xpath_checked(self, xpath):
|
||||
"""Matches odd elements, zero-indexed.
|
||||
"""
|
||||
xpath.add_condition("@checked and name(.) = 'input'")
|
||||
return xpath
|
||||
|
||||
def _xpath_selected(self, xpath):
|
||||
"""Matches all elements that are selected.
|
||||
"""
|
||||
xpath.add_condition("@selected and name(.) = 'option'")
|
||||
return xpath
|
||||
|
||||
def _xpath_disabled(self, xpath):
|
||||
"""Matches all elements that are disabled.
|
||||
"""
|
||||
xpath.add_condition("@disabled")
|
||||
return xpath
|
||||
|
||||
def _xpath_enabled(self, xpath):
|
||||
"""Matches all elements that are enabled.
|
||||
"""
|
||||
xpath.add_condition("not(@disabled) and name(.) = 'input'")
|
||||
return xpath
|
||||
|
||||
def _xpath_file(self, xpath):
|
||||
"""Matches all input elements of type file.
|
||||
"""
|
||||
xpath.add_condition("@type = 'file' and name(.) = 'input'")
|
||||
return xpath
|
||||
|
||||
def _xpath_input(self, xpath):
|
||||
"""Matches all input elements.
|
||||
"""
|
||||
xpath.add_condition("(name(.) = 'input' or name(.) = 'select') "
|
||||
+ "or (name(.) = 'textarea' or name(.) = 'button')")
|
||||
return xpath
|
||||
|
||||
def _xpath_button(self, xpath):
|
||||
"""Matches all button input elements and the button element.
|
||||
"""
|
||||
xpath.add_condition("(@type = 'button' and name(.) = 'input') "
|
||||
+ "or name(.) = 'button'")
|
||||
return xpath
|
||||
|
||||
def _xpath_radio(self, xpath):
|
||||
"""Matches all radio input elements.
|
||||
"""
|
||||
xpath.add_condition("@type = 'radio' and name(.) = 'input'")
|
||||
return xpath
|
||||
|
||||
def _xpath_text(self, xpath):
|
||||
"""Matches all text input elements.
|
||||
"""
|
||||
xpath.add_condition("@type = 'text' and name(.) = 'input'")
|
||||
return xpath
|
||||
|
||||
def _xpath_checkbox(self, xpath):
|
||||
"""Matches all checkbox input elements.
|
||||
"""
|
||||
xpath.add_condition("@type = 'checkbox' and name(.) = 'input'")
|
||||
return xpath
|
||||
|
||||
def _xpath_password(self, xpath):
|
||||
"""Matches all password input elements.
|
||||
"""
|
||||
xpath.add_condition("@type = 'password' and name(.) = 'input'")
|
||||
return xpath
|
||||
|
||||
def _xpath_submit(self, xpath):
|
||||
"""Matches all submit input elements.
|
||||
"""
|
||||
xpath.add_condition("@type = 'submit' and name(.) = 'input'")
|
||||
return xpath
|
||||
|
||||
def _xpath_image(self, xpath):
|
||||
"""Matches all image input elements.
|
||||
"""
|
||||
xpath.add_condition("@type = 'image' and name(.) = 'input'")
|
||||
return xpath
|
||||
|
||||
def _xpath_reset(self, xpath):
|
||||
"""Matches all reset input elements.
|
||||
"""
|
||||
xpath.add_condition("@type = 'reset' and name(.) = 'input'")
|
||||
return xpath
|
||||
|
||||
def _xpath_header(self, xpath):
|
||||
"""Matches all header elelements (h1, ..., h6)
|
||||
"""
|
||||
# this seems kind of brute-force, is there a better way?
|
||||
xpath.add_condition("(name(.) = 'h1' or name(.) = 'h2' or name (.) = 'h3') "
|
||||
+ "or (name(.) = 'h4' or name (.) = 'h5' or name(.) = 'h6')")
|
||||
return xpath
|
||||
|
||||
def _xpath_parent(self, xpath):
|
||||
"""Match all elements that contain other elements
|
||||
"""
|
||||
xpath.add_condition("count(child::*) > 0")
|
||||
return xpath
|
||||
|
||||
def _xpath_empty(self, xpath):
|
||||
"""Match all elements that do not contain other elements
|
||||
"""
|
||||
xpath.add_condition("count(child::*) = 0")
|
||||
return xpath
|
||||
|
||||
cssselect.Pseudo = JQueryPseudo
|
||||
|
||||
class JQueryFunction(Function):
|
||||
"""Represents selector:name(expr) that are present in JQuery but not in the
|
||||
css standard.
|
||||
"""
|
||||
def _xpath_eq(self, xpath, expr):
|
||||
"""Matches a single element by its index.
|
||||
"""
|
||||
xpath.add_post_condition('position() = %s' % int(expr+1))
|
||||
return xpath
|
||||
|
||||
def _xpath_gt(self, xpath, expr):
|
||||
"""Matches all elements with an index over the given one.
|
||||
"""
|
||||
xpath.add_post_condition('position() > %s' % int(expr+1))
|
||||
return xpath
|
||||
|
||||
def _xpath_lt(self, xpath, expr):
|
||||
"""Matches all elements with an index below the given one.
|
||||
"""
|
||||
xpath.add_post_condition('position() < %s' % int(expr+1))
|
||||
return xpath
|
||||
|
||||
def _xpath_contains(self, xpath, expr):
|
||||
"""Matches all elements that contain the given text
|
||||
"""
|
||||
xpath.add_post_condition("contains(text(), '%s')" % str(expr))
|
||||
return xpath
|
||||
|
||||
cssselect.Function = JQueryFunction
|
||||
|
||||
class AdvancedXPathExpr(XPathExpr):
|
||||
def __init__(self, prefix=None, path=None, element='*', condition=None,
|
||||
post_condition=None, star_prefix=False):
|
||||
self.prefix = prefix
|
||||
self.path = path
|
||||
self.element = element
|
||||
self.condition = condition
|
||||
self.post_condition = post_condition
|
||||
self.star_prefix = star_prefix
|
||||
|
||||
def add_post_condition(self, post_condition):
|
||||
if self.post_condition:
|
||||
self.post_condition = '%s and (%s)' % (self.post_condition,
|
||||
post_condition)
|
||||
else:
|
||||
self.post_condition = post_condition
|
||||
|
||||
def __str__(self):
|
||||
path = XPathExpr.__str__(self)
|
||||
if self.post_condition:
|
||||
path = '(%s)[%s]' % (path, self.post_condition)
|
||||
return path
|
||||
|
||||
def join(self, combiner, other):
|
||||
XPathExpr.join(self, combiner, other)
|
||||
self.post_condition = other.post_condition
|
||||
|
||||
cssselect.XPathExpr = AdvancedXPathExpr
|
||||
|
||||
class AdvancedXPathExprOr(XPathExprOr):
|
||||
def __init__(self, items, prefix=None):
|
||||
self.prefix = prefix = prefix or ''
|
||||
self.items = items
|
||||
self.prefix_prepended = False
|
||||
|
||||
def __str__(self):
|
||||
if not self.prefix_prepended:
|
||||
# We cannot prepend the prefix at __init__ since it's legal to
|
||||
# modify it after construction. And because __str__ can be called
|
||||
# multiple times we have to take care not to prepend it twice.
|
||||
prefix = self.prefix or ''
|
||||
for item in self.items:
|
||||
item.prefix = prefix+(item.prefix or '')
|
||||
self.prefix_prepended = True
|
||||
return ' | '.join([str(i) for i in self.items])
|
||||
|
||||
cssselect.XPathExprOr = AdvancedXPathExprOr
|
||||
|
||||
class JQueryElement(Element):
|
||||
"""
|
||||
Represents namespace|element
|
||||
"""
|
||||
|
||||
def xpath(self):
|
||||
if self.namespace == '*':
|
||||
el = self.element
|
||||
else:
|
||||
# FIXME: Should we lowercase here?
|
||||
el = '%s:%s' % (self.namespace, self.element)
|
||||
return AdvancedXPathExpr(element=el)
|
||||
|
||||
cssselect.Element = JQueryElement
|
||||
|
||||
def selector_to_xpath(selector, prefix='descendant-or-self::'):
|
||||
"""JQuery selector to xpath.
|
||||
"""
|
||||
selector = selector.replace('[@', '[')
|
||||
return css_to_xpath(selector, prefix)
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,31 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
try:
|
||||
from deliverance.pyref import PyReference
|
||||
from deliverance import rules
|
||||
from ajax import PyQuery as pq
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
class PyQuery(rules.AbstractAction):
|
||||
"""Python function"""
|
||||
name = 'py'
|
||||
def __init__(self, source_location, pyref):
|
||||
self.source_location = source_location
|
||||
self.pyref = pyref
|
||||
|
||||
def apply(self, content_doc, theme_doc, resource_fetcher, log):
|
||||
self.pyref(pq([content_doc]), pq([theme_doc]), resource_fetcher, log)
|
||||
|
||||
@classmethod
|
||||
def from_xml(cls, el, source_location):
|
||||
"""Parses and instantiates the class from an element"""
|
||||
pyref = PyReference.parse_xml(
|
||||
el, source_location=source_location,
|
||||
default_function='transform')
|
||||
return cls(source_location, pyref)
|
||||
|
||||
rules._actions['pyquery'] = PyQuery
|
||||
|
||||
def deliverance_proxy():
|
||||
import deliverance.proxycommand
|
||||
deliverance.proxycommand.main()
|
|
@ -0,0 +1,454 @@
|
|||
#-*- coding:utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com>
|
||||
#
|
||||
# Distributed under the BSD license, see LICENSE.txt
|
||||
from lxml import etree
|
||||
import unittest
|
||||
import doctest
|
||||
import socket
|
||||
import sys
|
||||
import os
|
||||
|
||||
PY3k = sys.version_info >= (3,)
|
||||
|
||||
if PY3k:
|
||||
from io import StringIO
|
||||
import pyquery
|
||||
from pyquery.pyquery import PyQuery as pq
|
||||
from http.client import HTTPConnection
|
||||
pqa = pq
|
||||
else:
|
||||
from cStringIO import StringIO
|
||||
import pyquery
|
||||
from httplib import HTTPConnection
|
||||
from webob import Request, Response, exc
|
||||
from pyquery import PyQuery as pq
|
||||
from ajax import PyQuery as pqa
|
||||
|
||||
socket.setdefaulttimeout(1)
|
||||
|
||||
try:
|
||||
conn = HTTPConnection("pyquery.org:80")
|
||||
conn.request("GET", "/")
|
||||
response = conn.getresponse()
|
||||
except (socket.timeout, socket.error):
|
||||
GOT_NET=False
|
||||
else:
|
||||
GOT_NET=True
|
||||
|
||||
|
||||
def with_net(func):
|
||||
if GOT_NET:
|
||||
return func
|
||||
|
||||
def not_py3k(func):
|
||||
if not PY3k:
|
||||
return func
|
||||
|
||||
dirname = os.path.dirname(os.path.abspath(pyquery.__file__))
|
||||
docs = os.path.join(os.path.dirname(dirname), 'docs')
|
||||
path_to_html_file = os.path.join(dirname, 'test.html')
|
||||
|
||||
def input_app(environ, start_response):
|
||||
resp = Response()
|
||||
req = Request(environ)
|
||||
if req.path_info == '/':
|
||||
resp.body = '<input name="youyou" type="text" value="" />'
|
||||
elif req.path_info == '/submit':
|
||||
resp.body = '<input type="submit" value="OK" />'
|
||||
else:
|
||||
resp.body = ''
|
||||
return resp(environ, start_response)
|
||||
|
||||
class TestReadme(doctest.DocFileCase):
|
||||
path = os.path.join(dirname, '..', 'README.txt')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
parser = doctest.DocTestParser()
|
||||
doc = open(self.path).read()
|
||||
test = parser.get_doctest(doc, globals(), '', self.path, 0)
|
||||
doctest.DocFileCase.__init__(self, test, optionflags=doctest.ELLIPSIS)
|
||||
|
||||
def setUp(self):
|
||||
test = self._dt_test
|
||||
test.globs.update(globals())
|
||||
|
||||
for filename in os.listdir(docs):
|
||||
if filename.endswith('.txt'):
|
||||
if not GOT_NET and filename in ('ajax.txt', 'tips.txt'):
|
||||
continue
|
||||
if PY3k and filename in ('ajax.txt',):
|
||||
continue
|
||||
klass_name = 'Test%s' % filename.replace('.txt', '').title()
|
||||
path = os.path.join(docs, filename)
|
||||
exec('%s = type("%s", (TestReadme,), dict(path=path))' % (klass_name, klass_name))
|
||||
|
||||
class TestTests(doctest.DocFileCase):
|
||||
path = os.path.join(dirname, 'tests.txt')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
parser = doctest.DocTestParser()
|
||||
doc = open(self.path).read()
|
||||
test = parser.get_doctest(doc, globals(), '', self.path, 0)
|
||||
doctest.DocFileCase.__init__(self, test, optionflags=doctest.ELLIPSIS)
|
||||
|
||||
class TestUnicode(unittest.TestCase):
|
||||
|
||||
@not_py3k
|
||||
def test_unicode(self):
|
||||
xml = pq(unicode("<p>é</p>", 'utf-8'))
|
||||
self.assertEqual(unicode(xml), unicode("<p>é</p>", 'utf-8'))
|
||||
self.assertEqual(type(xml.html()), unicode)
|
||||
self.assertEqual(str(xml), '<p>é</p>')
|
||||
|
||||
|
||||
class TestSelector(unittest.TestCase):
|
||||
klass = pq
|
||||
html = """
|
||||
<html>
|
||||
<body>
|
||||
<div>node1</div>
|
||||
<div id="node2">node2</div>
|
||||
<div class="node3">node3</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
html2 = """
|
||||
<html>
|
||||
<body>
|
||||
<div>node1</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
html3 = """
|
||||
<html>
|
||||
<body>
|
||||
<div>node1</div>
|
||||
<div id="node2">node2</div>
|
||||
<div class="node3">node3</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
html4 = """
|
||||
<html>
|
||||
<body>
|
||||
<form action="/">
|
||||
<input name="enabled" type="text" value="test"/>
|
||||
<input name="disabled" type="text" value="disabled" disabled="disabled"/>
|
||||
<input name="file" type="file" />
|
||||
<select name="select">
|
||||
<option value="">Choose something</option>
|
||||
<option value="one">One</option>
|
||||
<option value="two" selected="selected">Two</option>
|
||||
<option value="three">Three</option>
|
||||
</select>
|
||||
<input name="radio" type="radio" value="one"/>
|
||||
<input name="radio" type="radio" value="two" checked="checked"/>
|
||||
<input name="radio" type="radio" value="three"/>
|
||||
<input name="checkbox" type="checkbox" value="a"/>
|
||||
<input name="checkbox" type="checkbox" value="b" checked="checked"/>
|
||||
<input name="checkbox" type="checkbox" value="c"/>
|
||||
<input name="button" type="button" value="button" />
|
||||
<button>button</button>
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
html5 = """
|
||||
<html>
|
||||
<body>
|
||||
<h1>Heading 1</h1>
|
||||
<h2>Heading 2</h2>
|
||||
<h3>Heading 3</h3>
|
||||
<h4>Heading 4</h4>
|
||||
<h5>Heading 5</h5>
|
||||
<h6>Heading 6</h6>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
@not_py3k
|
||||
def test_get_root(self):
|
||||
doc = pq('<?xml version="1.0" encoding="UTF-8"?><root><p/></root>')
|
||||
self.assertEqual(isinstance(doc.root, etree._ElementTree), True)
|
||||
self.assertEqual(doc.encoding, 'UTF-8')
|
||||
|
||||
def test_selector_from_doc(self):
|
||||
doc = etree.fromstring(self.html)
|
||||
assert len(self.klass(doc)) == 1
|
||||
assert len(self.klass('div', doc)) == 3
|
||||
assert len(self.klass('div#node2', doc)) == 1
|
||||
|
||||
def test_selector_from_html(self):
|
||||
assert len(self.klass(self.html)) == 1
|
||||
assert len(self.klass('div', self.html)) == 3
|
||||
assert len(self.klass('div#node2', self.html)) == 1
|
||||
|
||||
def test_selector_from_obj(self):
|
||||
e = self.klass(self.html)
|
||||
assert len(e('div')) == 3
|
||||
assert len(e('div#node2')) == 1
|
||||
|
||||
def test_selector_from_html_from_obj(self):
|
||||
e = self.klass(self.html)
|
||||
assert len(e('div', self.html2)) == 1
|
||||
assert len(e('div#node2', self.html2)) == 0
|
||||
|
||||
def test_class(self):
|
||||
e = self.klass(self.html)
|
||||
assert isinstance(e, self.klass)
|
||||
n = e('div', self.html2)
|
||||
assert isinstance(n, self.klass)
|
||||
assert n._parent is e
|
||||
|
||||
def test_pseudo_classes(self):
|
||||
e = self.klass(self.html)
|
||||
self.assertEqual(e('div:first').text(), 'node1')
|
||||
self.assertEqual(e('div:last').text(), 'node3')
|
||||
self.assertEqual(e('div:even').text(), 'node1 node3')
|
||||
self.assertEqual(e('div div:even').text(), None)
|
||||
self.assertEqual(e('body div:even').text(), 'node1 node3')
|
||||
self.assertEqual(e('div:gt(0)').text(), 'node2 node3')
|
||||
self.assertEqual(e('div:lt(1)').text(), 'node1')
|
||||
self.assertEqual(e('div:eq(2)').text(), 'node3')
|
||||
|
||||
#test on the form
|
||||
e = self.klass(self.html4)
|
||||
assert len(e(':disabled')) == 1
|
||||
assert len(e('input:enabled')) == 9
|
||||
assert len(e(':selected')) == 1
|
||||
assert len(e(':checked')) == 2
|
||||
assert len(e(':file')) == 1
|
||||
assert len(e(':input')) == 12
|
||||
assert len(e(':button')) == 2
|
||||
assert len(e(':radio')) == 3
|
||||
assert len(e(':checkbox')) == 3
|
||||
|
||||
#test on other elements
|
||||
e = self.klass(self.html5)
|
||||
assert len(e(":header")) == 6
|
||||
assert len(e(":parent")) == 2
|
||||
assert len(e(":empty")) == 6
|
||||
assert len(e(":contains('Heading')")) == 6
|
||||
|
||||
def test_on_the_fly_dom_creation(self):
|
||||
e = self.klass(self.html)
|
||||
assert e('<p>Hello world</p>').text() == 'Hello world'
|
||||
assert e('').text() == None
|
||||
|
||||
class TestTraversal(unittest.TestCase):
|
||||
klass = pq
|
||||
html = """
|
||||
<html>
|
||||
<body>
|
||||
<div id="node1"><span>node1</span></div>
|
||||
<div id="node2" class="node3"><span>node2</span><span> booyah</span></div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
def test_filter(self):
|
||||
assert len(self.klass('div', self.html).filter('.node3')) == 1
|
||||
assert len(self.klass('div', self.html).filter('#node2')) == 1
|
||||
assert len(self.klass('div', self.html).filter(lambda i: i == 0)) == 1
|
||||
|
||||
d = pq('<p>Hello <b>warming</b> world</p>')
|
||||
self.assertEqual(d('strong').filter(lambda el: True), [])
|
||||
|
||||
def test_not(self):
|
||||
assert len(self.klass('div', self.html).not_('.node3')) == 1
|
||||
|
||||
def test_is(self):
|
||||
assert self.klass('div', self.html).is_('.node3')
|
||||
assert not self.klass('div', self.html).is_('.foobazbar')
|
||||
|
||||
def test_find(self):
|
||||
assert len(self.klass('#node1', self.html).find('span')) == 1
|
||||
assert len(self.klass('#node2', self.html).find('span')) == 2
|
||||
assert len(self.klass('div', self.html).find('span')) == 3
|
||||
|
||||
def test_each(self):
|
||||
doc = self.klass(self.html)
|
||||
doc('span').each(lambda: doc(this).wrap("<em></em>"))
|
||||
assert len(doc('em')) == 3
|
||||
|
||||
def test_map(self):
|
||||
def ids_minus_one(i, elem):
|
||||
return int(self.klass(elem).attr('id')[-1]) - 1
|
||||
assert self.klass('div', self.html).map(ids_minus_one) == [0, 1]
|
||||
|
||||
d = pq('<p>Hello <b>warming</b> world</p>')
|
||||
self.assertEqual(d('strong').map(lambda i,el: pq(this).text()), [])
|
||||
|
||||
def test_end(self):
|
||||
assert len(self.klass('div', self.html).find('span').end()) == 2
|
||||
assert len(self.klass('#node2', self.html).find('span').end()) == 1
|
||||
|
||||
def test_closest(self):
|
||||
assert len(self.klass('#node1 span', self.html).closest('body')) == 1
|
||||
assert self.klass('#node2', self.html).closest('.node3').attr('id') == 'node2'
|
||||
assert self.klass('.node3', self.html).closest('form') == []
|
||||
|
||||
class TestOpener(unittest.TestCase):
|
||||
|
||||
def test_custom_opener(self):
|
||||
def opener(url):
|
||||
return '<html><body><div class="node"></div>'
|
||||
|
||||
doc = pq(url='http://example.com', opener=opener)
|
||||
assert len(doc('.node')) == 1, doc
|
||||
|
||||
class TestCallback(unittest.TestCase):
|
||||
html = """
|
||||
<ol>
|
||||
<li>Coffee</li>
|
||||
<li>Tea</li>
|
||||
<li>Milk</li>
|
||||
</ol>
|
||||
"""
|
||||
|
||||
def test_S_this_inside_callback(self):
|
||||
S = pq(self.html)
|
||||
self.assertEqual(S('li').map(lambda i, el: S(this).html()), ['Coffee', 'Tea', 'Milk'])
|
||||
|
||||
def test_parameterless_callback(self):
|
||||
S = pq(self.html)
|
||||
self.assertEqual(S('li').map(lambda: S(this).html()), ['Coffee', 'Tea', 'Milk'])
|
||||
|
||||
def application(environ, start_response):
|
||||
req = Request(environ)
|
||||
response = Response()
|
||||
if req.method == 'GET':
|
||||
response.body = '<pre>Yeah !</pre>'
|
||||
else:
|
||||
response.body = '<a href="/plop">Yeah !</a>'
|
||||
return response(environ, start_response)
|
||||
|
||||
def secure_application(environ, start_response):
|
||||
if 'REMOTE_USER' not in environ:
|
||||
return exc.HTTPUnauthorized('vomis')(environ, start_response)
|
||||
return application(environ, start_response)
|
||||
|
||||
class TestAjaxSelector(TestSelector):
|
||||
klass = pqa
|
||||
|
||||
@not_py3k
|
||||
@with_net
|
||||
def test_proxy(self):
|
||||
e = self.klass([])
|
||||
val = e.get('http://pyquery.org/')
|
||||
assert len(val('body')) == 1, (str(val.response), val)
|
||||
|
||||
@not_py3k
|
||||
def test_get(self):
|
||||
e = self.klass(app=application)
|
||||
val = e.get('/')
|
||||
assert len(val('pre')) == 1, val
|
||||
|
||||
@not_py3k
|
||||
def test_secure_get(self):
|
||||
e = self.klass(app=secure_application)
|
||||
val = e.get('/', environ=dict(REMOTE_USER='gawii'))
|
||||
assert len(val('pre')) == 1, val
|
||||
val = e.get('/', REMOTE_USER='gawii')
|
||||
assert len(val('pre')) == 1, val
|
||||
|
||||
@not_py3k
|
||||
def test_secure_get_not_authorized(self):
|
||||
e = self.klass(app=secure_application)
|
||||
val = e.get('/')
|
||||
assert len(val('pre')) == 0, val
|
||||
|
||||
@not_py3k
|
||||
def test_post(self):
|
||||
e = self.klass(app=application)
|
||||
val = e.post('/')
|
||||
assert len(val('a')) == 1, val
|
||||
|
||||
@not_py3k
|
||||
def test_subquery(self):
|
||||
e = self.klass(app=application)
|
||||
n = e('div')
|
||||
val = n.post('/')
|
||||
assert len(val('a')) == 1, val
|
||||
|
||||
class TestManipulating(unittest.TestCase):
|
||||
html = '''
|
||||
<div class="portlet">
|
||||
<a href="/toto">Test<img src ="myimage" />My link text</a>
|
||||
<a href="/toto2"><img src ="myimage2" />My link text 2</a>
|
||||
</div>
|
||||
'''
|
||||
|
||||
def test_remove(self):
|
||||
d = pq(self.html)
|
||||
d('img').remove()
|
||||
val = d('a:first').html()
|
||||
assert val == 'Test My link text', repr(val)
|
||||
val = d('a:last').html()
|
||||
assert val == ' My link text 2', repr(val)
|
||||
|
||||
class TestHTMLParser(unittest.TestCase):
|
||||
xml = "<div>I'm valid XML</div>"
|
||||
html = '''
|
||||
<div class="portlet">
|
||||
<a href="/toto">TestimageMy link text</a>
|
||||
<a href="/toto2">imageMy link text 2</a>
|
||||
Behind you, a three-headed HTML‐Entity!
|
||||
</div>
|
||||
'''
|
||||
def test_parser_persistance(self):
|
||||
d = pq(self.xml, parser='xml')
|
||||
self.assertRaises(etree.XMLSyntaxError, lambda: d.after(self.html))
|
||||
d = pq(self.xml, parser='html')
|
||||
d.after(self.html) # this should not fail
|
||||
|
||||
|
||||
@not_py3k
|
||||
def test_soup_parser(self):
|
||||
d = pq('<meta><head><title>Hello</head><body onload=crash()>Hi all<p>', parser='soup')
|
||||
self.assertEqual(str(d), '<html><meta/><head><title>Hello</title></head><body onload="crash()">Hi all<p/></body></html>')
|
||||
|
||||
def test_replaceWith(self):
|
||||
expected = '''<div class="portlet">
|
||||
<a href="/toto">TestimageMy link text</a>
|
||||
<a href="/toto2">imageMy link text 2</a>
|
||||
Behind you, a three-headed HTML&dash;Entity!
|
||||
</div>'''
|
||||
d = pq(self.html)
|
||||
d('img').replaceWith('image')
|
||||
val = d.__html__()
|
||||
assert val == expected, (repr(val), repr(expected))
|
||||
|
||||
def test_replaceWith_with_function(self):
|
||||
expected = '''<div class="portlet">
|
||||
TestimageMy link text
|
||||
imageMy link text 2
|
||||
Behind you, a three-headed HTML&dash;Entity!
|
||||
</div>'''
|
||||
d = pq(self.html)
|
||||
d('a').replaceWith(lambda i, e: pq(e).html())
|
||||
val = d.__html__()
|
||||
assert val == expected, (repr(val), repr(expected))
|
||||
|
||||
class TestWebScrapping(unittest.TestCase):
|
||||
@with_net
|
||||
def test_get(self):
|
||||
d = pq('http://www.theonion.com/search/', {'q': 'inconsistency'}, method='get')
|
||||
self.assertEqual(d('input[name=q]:last').val(), 'inconsistency')
|
||||
self.assertEqual(d('.news-in-brief h3').text(), 'Slight Inconsistency Found In Bible')
|
||||
|
||||
@with_net
|
||||
def test_post(self):
|
||||
d = pq('http://www.theonion.com/search/', {'q': 'inconsistency'}, method='post')
|
||||
self.assertEqual(d('input[name=q]:last').val(), '') # the onion does not search on post
|
||||
|
||||
if __name__ == '__main__':
|
||||
fails, total = unittest.main()
|
||||
if fails == 0:
|
||||
print('OK')
|
|
@ -0,0 +1,32 @@
|
|||
|
||||
Assume spaces normalization::
|
||||
|
||||
>>> pq('<ul> <li> </li> </ul>').text()
|
||||
''
|
||||
|
||||
>>> print(pq('<ul> <li> toto </li> <li> tata </li> </ul>').text())
|
||||
toto tata
|
||||
|
||||
Complex wrapping::
|
||||
|
||||
>>> d = pq('<div id="bouh"><span>youhou</span></div>')
|
||||
>>> s = d('span')
|
||||
>>> s is d
|
||||
False
|
||||
>>> s.wrap('<div><div id="wrapper"></div></div>')
|
||||
[<div>]
|
||||
|
||||
We get the original doc with new node::
|
||||
|
||||
>>> print(d)
|
||||
<div id="bouh"><div><div id="wrapper"><span>youhou</span></div></div></div>
|
||||
|
||||
Complex wrapAll::
|
||||
|
||||
>>> doc = pq('<div><span>Hey</span><span>you !</span></div>')
|
||||
>>> s = doc('span')
|
||||
>>> s.wrapAll('<div id="wrapper"></div>')
|
||||
[<div#wrapper>]
|
||||
|
||||
>>> print(doc)
|
||||
<div><div id="wrapper"><span>Hey</span><span>you !</span></div></div>
|
Загрузка…
Ссылка в новой задаче