add pyquery and lxml dependency

This commit is contained in:
James Long 2011-10-31 16:14:48 -04:00
Родитель b2ffa2aec1
Коммит 77b286a3b0
17 изменённых файлов: 2237 добавлений и 0 удалений

Просмотреть файл

@ -5,3 +5,5 @@ Jinja2==2.5.5
hmac==20101005
hashlib==20081119
py-bcrypt==0.2
lxml==3.2.1

Просмотреть файл

@ -11,6 +11,7 @@ Sphinx==1.0.7
nose==1.0.0
-e git://github.com/jbalogh/django-nose.git#egg=django_nose
-e git://github.com/jbalogh/test-utils.git#egg=test-utils
pyquery==1.0
# L10n
translate-toolkit==1.8.0

Просмотреть файл

@ -0,0 +1,122 @@
Metadata-Version: 1.0
Name: pyquery
Version: 1.0
Summary: A jquery-like library for python
Home-page: http://www.bitbucket.org/olauzanne/pyquery/
Author: Olivier Lauzanne
Author-email: olauzanne@gmail.com
License: BSD
Description:
pyquery: a jquery-like library for python
=========================================
pyquery allows you to make jquery queries on xml documents.
The API is as much as possible the similar to jquery. pyquery uses lxml for fast
xml and html manipulation.
This is not (or at least not yet) a library to produce or interact with
javascript code. I just liked the jquery API and I missed it in python so I
told myself "Hey let's make jquery in python". This is the result.
It can be used for many purposes, one idea that I might try in the future is to
use it for templating with pure http templates that you modify using pyquery.
I can also be used for web scrapping or for theming applications with
`Deliverance`_.
The `project`_ is being actively developped on a mercurial repository on
Bitbucket. I have the policy of giving push access to anyone who wants it
and then to review what he does. So if you want to contribute just email me.
Please report bugs on the `bitbucket
<http://bitbucket.org/olauzanne/pyquery/issues?status=new&status=open>`_ issue
tracker.
.. _deliverance: http://www.gawel.org/weblog/en/2008/12/skinning-with-pyquery-and-deliverance
.. _project: http://www.bitbucket.org/olauzanne/pyquery/
Quickstart
==========
You can use the PyQuery class to load an xml document from a string, a lxml
document, from a file or from an url::
>>> from pyquery import PyQuery as pq
>>> from lxml import etree
>>> import urllib
>>> d = pq("<html></html>")
>>> d = pq(etree.fromstring("<html></html>"))
>>> d = pq(url='http://google.com/')
>>> # d = pq(url='http://google.com/', opener=lambda url: urllib.urlopen(url).read())
>>> d = pq(filename=path_to_html_file)
Now d is like the $ in jquery::
>>> d("#hello")
[<p#hello.hello>]
>>> p = d("#hello")
>>> print(p.html())
Hello world !
>>> p.html("you know <a href='http://python.org/'>Python</a> rocks")
[<p#hello.hello>]
>>> print(p.html())
you know <a href="http://python.org/">Python</a> rocks
>>> print(p.text())
you know Python rocks
You can use some of the pseudo classes that are available in jQuery but that
are not standard in css such as :first :last :even :odd :eq :lt :gt :checked
:selected :file::
>>> d('p:first')
[<p#hello.hello>]
See http://packages.python.org/pyquery/ for the full documentation
News
====
1.0
---
fix issues 24
0.7
---
Python 3 compatible
Add __unicode__ method
Add root and encoding attribute
fix issues 19, 20, 22, 23
0.6.1
------
Move README.txt at package root
Add CHANGES.txt and add it to long_description
0.6
----
Added PyQuery.outerHtml
Added PyQuery.fn
Added PyQuery.map
Change PyQuery.each behavior to reflect jQuery api
Keywords: jquery html xml
Platform: UNKNOWN
Classifier: Intended Audience :: Developers
Classifier: Development Status :: 5 - Production/Stable
Classifier: Programming Language :: Python :: 2
Classifier: Programming Language :: Python :: 3

Просмотреть файл

@ -0,0 +1,19 @@
CHANGES.txt
MANIFEST.in
README.txt
setup.cfg
setup.py
pyquery/__init__.py
pyquery/ajax.py
pyquery/cssselectpatch.py
pyquery/pyquery.py
pyquery/rules.py
pyquery/test.py
pyquery/tests.txt
pyquery.egg-info/PKG-INFO
pyquery.egg-info/SOURCES.txt
pyquery.egg-info/dependency_links.txt
pyquery.egg-info/entry_points.txt
pyquery.egg-info/not-zip-safe
pyquery.egg-info/requires.txt
pyquery.egg-info/top_level.txt

Просмотреть файл

@ -0,0 +1 @@

Просмотреть файл

@ -0,0 +1,3 @@
# -*- Entry points: -*-

Просмотреть файл

@ -0,0 +1,21 @@
../pyquery/__init__.py
../pyquery/ajax.py
../pyquery/cssselectpatch.py
../pyquery/pyquery.py
../pyquery/rules.py
../pyquery/test.py
../pyquery/tests.txt
../pyquery/__init__.pyc
../pyquery/ajax.pyc
../pyquery/cssselectpatch.pyc
../pyquery/pyquery.pyc
../pyquery/rules.pyc
../pyquery/test.pyc
./
dependency_links.txt
entry_points.txt
not-zip-safe
PKG-INFO
requires.txt
SOURCES.txt
top_level.txt

Просмотреть файл

@ -0,0 +1 @@

Просмотреть файл

@ -0,0 +1 @@
lxml>=2.1

Просмотреть файл

@ -0,0 +1 @@
pyquery

Просмотреть файл

@ -0,0 +1,15 @@
#-*- coding:utf-8 -*-
#
# Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com>
#
# Distributed under the BSD license, see LICENSE.txt
import sys
try:
import webob
except ImportError:
from .pyquery import PyQuery
else:
from .ajax import PyQuery

Просмотреть файл

@ -0,0 +1,84 @@
# -*- coding: utf-8 -*-
import sys
from .pyquery import PyQuery as Base
from .pyquery import no_default
if sys.version_info < (3,):
from webob import Request, Response
try:
from paste.proxy import Proxy
except ImportError:
Proxy = no_default
class PyQuery(Base):
def __init__(self, *args, **kwargs):
if 'response' in kwargs:
self.response = kwargs.pop('response')
else:
self.response = Response()
if 'app' in kwargs:
self.app = kwargs.pop('app')
if len(args) == 0:
args = [[]]
else:
self.app = no_default
Base.__init__(self, *args, **kwargs)
if self._parent is not no_default:
self.app = self._parent.app
def _wsgi_get(self, path_info, **kwargs):
if path_info.startswith('/'):
if 'app' in kwargs:
app = kwargs.pop('app')
elif self.app is not no_default:
app = self.app
else:
raise ValueError('There is no app available')
else:
if Proxy is not no_default:
app = Proxy(path_info)
path_info = '/'
else:
raise ImportError('Paste is not installed')
if 'environ' in kwargs:
environ = kwargs.pop('environ').copy()
else:
environ = {}
if path_info:
kwargs['PATH_INFO'] = path_info
environ.update(kwargs)
# unsuported (came from Deliverance)
for key in ['HTTP_ACCEPT_ENCODING', 'HTTP_IF_MATCH', 'HTTP_IF_UNMODIFIED_SINCE',
'HTTP_RANGE', 'HTTP_IF_RANGE']:
if key in environ:
del environ[key]
req = Request(environ)
resp = req.get_response(app)
status = resp.status.split()
ctype = resp.content_type.split(';')[0]
if status[0] not in '45' and ctype == 'text/html':
body = resp.body
else:
body = []
result = self.__class__(body,
parent=self._parent,
app=self.app, # always return self.app
response=resp)
return result
def get(self, path_info, **kwargs):
"""GET a path from wsgi app or url
"""
kwargs['REQUEST_METHOD'] = 'GET'
return self._wsgi_get(path_info, **kwargs)
def post(self, path_info, **kwargs):
"""POST a path from wsgi app or url
"""
kwargs['REQUEST_METHOD'] = 'POST'
return self._wsgi_get(path_info, **kwargs)

Просмотреть файл

@ -0,0 +1,244 @@
#-*- coding:utf-8 -*-
#
# Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com>
#
# Distributed under the BSD license, see LICENSE.txt
from lxml.cssselect import Pseudo, XPathExpr, XPathExprOr, Function, css_to_xpath, Element
from lxml import cssselect
class JQueryPseudo(Pseudo):
"""This class is used to implement the css pseudo classes
(:first, :last, ...) that are not defined in the css standard,
but are defined in the jquery API.
"""
def _xpath_first(self, xpath):
"""Matches the first selected element.
"""
xpath.add_post_condition('position() = 1')
return xpath
def _xpath_last(self, xpath):
"""Matches the last selected element.
"""
xpath.add_post_condition('position() = last()')
return xpath
def _xpath_even(self, xpath):
"""Matches even elements, zero-indexed.
"""
# the first element is 1 in xpath and 0 in python and js
xpath.add_post_condition('position() mod 2 = 1')
return xpath
def _xpath_odd(self, xpath):
"""Matches odd elements, zero-indexed.
"""
xpath.add_post_condition('position() mod 2 = 0')
return xpath
def _xpath_checked(self, xpath):
"""Matches odd elements, zero-indexed.
"""
xpath.add_condition("@checked and name(.) = 'input'")
return xpath
def _xpath_selected(self, xpath):
"""Matches all elements that are selected.
"""
xpath.add_condition("@selected and name(.) = 'option'")
return xpath
def _xpath_disabled(self, xpath):
"""Matches all elements that are disabled.
"""
xpath.add_condition("@disabled")
return xpath
def _xpath_enabled(self, xpath):
"""Matches all elements that are enabled.
"""
xpath.add_condition("not(@disabled) and name(.) = 'input'")
return xpath
def _xpath_file(self, xpath):
"""Matches all input elements of type file.
"""
xpath.add_condition("@type = 'file' and name(.) = 'input'")
return xpath
def _xpath_input(self, xpath):
"""Matches all input elements.
"""
xpath.add_condition("(name(.) = 'input' or name(.) = 'select') "
+ "or (name(.) = 'textarea' or name(.) = 'button')")
return xpath
def _xpath_button(self, xpath):
"""Matches all button input elements and the button element.
"""
xpath.add_condition("(@type = 'button' and name(.) = 'input') "
+ "or name(.) = 'button'")
return xpath
def _xpath_radio(self, xpath):
"""Matches all radio input elements.
"""
xpath.add_condition("@type = 'radio' and name(.) = 'input'")
return xpath
def _xpath_text(self, xpath):
"""Matches all text input elements.
"""
xpath.add_condition("@type = 'text' and name(.) = 'input'")
return xpath
def _xpath_checkbox(self, xpath):
"""Matches all checkbox input elements.
"""
xpath.add_condition("@type = 'checkbox' and name(.) = 'input'")
return xpath
def _xpath_password(self, xpath):
"""Matches all password input elements.
"""
xpath.add_condition("@type = 'password' and name(.) = 'input'")
return xpath
def _xpath_submit(self, xpath):
"""Matches all submit input elements.
"""
xpath.add_condition("@type = 'submit' and name(.) = 'input'")
return xpath
def _xpath_image(self, xpath):
"""Matches all image input elements.
"""
xpath.add_condition("@type = 'image' and name(.) = 'input'")
return xpath
def _xpath_reset(self, xpath):
"""Matches all reset input elements.
"""
xpath.add_condition("@type = 'reset' and name(.) = 'input'")
return xpath
def _xpath_header(self, xpath):
"""Matches all header elelements (h1, ..., h6)
"""
# this seems kind of brute-force, is there a better way?
xpath.add_condition("(name(.) = 'h1' or name(.) = 'h2' or name (.) = 'h3') "
+ "or (name(.) = 'h4' or name (.) = 'h5' or name(.) = 'h6')")
return xpath
def _xpath_parent(self, xpath):
"""Match all elements that contain other elements
"""
xpath.add_condition("count(child::*) > 0")
return xpath
def _xpath_empty(self, xpath):
"""Match all elements that do not contain other elements
"""
xpath.add_condition("count(child::*) = 0")
return xpath
cssselect.Pseudo = JQueryPseudo
class JQueryFunction(Function):
"""Represents selector:name(expr) that are present in JQuery but not in the
css standard.
"""
def _xpath_eq(self, xpath, expr):
"""Matches a single element by its index.
"""
xpath.add_post_condition('position() = %s' % int(expr+1))
return xpath
def _xpath_gt(self, xpath, expr):
"""Matches all elements with an index over the given one.
"""
xpath.add_post_condition('position() > %s' % int(expr+1))
return xpath
def _xpath_lt(self, xpath, expr):
"""Matches all elements with an index below the given one.
"""
xpath.add_post_condition('position() < %s' % int(expr+1))
return xpath
def _xpath_contains(self, xpath, expr):
"""Matches all elements that contain the given text
"""
xpath.add_post_condition("contains(text(), '%s')" % str(expr))
return xpath
cssselect.Function = JQueryFunction
class AdvancedXPathExpr(XPathExpr):
def __init__(self, prefix=None, path=None, element='*', condition=None,
post_condition=None, star_prefix=False):
self.prefix = prefix
self.path = path
self.element = element
self.condition = condition
self.post_condition = post_condition
self.star_prefix = star_prefix
def add_post_condition(self, post_condition):
if self.post_condition:
self.post_condition = '%s and (%s)' % (self.post_condition,
post_condition)
else:
self.post_condition = post_condition
def __str__(self):
path = XPathExpr.__str__(self)
if self.post_condition:
path = '(%s)[%s]' % (path, self.post_condition)
return path
def join(self, combiner, other):
XPathExpr.join(self, combiner, other)
self.post_condition = other.post_condition
cssselect.XPathExpr = AdvancedXPathExpr
class AdvancedXPathExprOr(XPathExprOr):
def __init__(self, items, prefix=None):
self.prefix = prefix = prefix or ''
self.items = items
self.prefix_prepended = False
def __str__(self):
if not self.prefix_prepended:
# We cannot prepend the prefix at __init__ since it's legal to
# modify it after construction. And because __str__ can be called
# multiple times we have to take care not to prepend it twice.
prefix = self.prefix or ''
for item in self.items:
item.prefix = prefix+(item.prefix or '')
self.prefix_prepended = True
return ' | '.join([str(i) for i in self.items])
cssselect.XPathExprOr = AdvancedXPathExprOr
class JQueryElement(Element):
"""
Represents namespace|element
"""
def xpath(self):
if self.namespace == '*':
el = self.element
else:
# FIXME: Should we lowercase here?
el = '%s:%s' % (self.namespace, self.element)
return AdvancedXPathExpr(element=el)
cssselect.Element = JQueryElement
def selector_to_xpath(selector, prefix='descendant-or-self::'):
"""JQuery selector to xpath.
"""
selector = selector.replace('[@', '[')
return css_to_xpath(selector, prefix)

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-
try:
from deliverance.pyref import PyReference
from deliverance import rules
from ajax import PyQuery as pq
except ImportError:
pass
else:
class PyQuery(rules.AbstractAction):
"""Python function"""
name = 'py'
def __init__(self, source_location, pyref):
self.source_location = source_location
self.pyref = pyref
def apply(self, content_doc, theme_doc, resource_fetcher, log):
self.pyref(pq([content_doc]), pq([theme_doc]), resource_fetcher, log)
@classmethod
def from_xml(cls, el, source_location):
"""Parses and instantiates the class from an element"""
pyref = PyReference.parse_xml(
el, source_location=source_location,
default_function='transform')
return cls(source_location, pyref)
rules._actions['pyquery'] = PyQuery
def deliverance_proxy():
import deliverance.proxycommand
deliverance.proxycommand.main()

Просмотреть файл

@ -0,0 +1,454 @@
#-*- coding:utf-8 -*-
#
# Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com>
#
# Distributed under the BSD license, see LICENSE.txt
from lxml import etree
import unittest
import doctest
import socket
import sys
import os
PY3k = sys.version_info >= (3,)
if PY3k:
from io import StringIO
import pyquery
from pyquery.pyquery import PyQuery as pq
from http.client import HTTPConnection
pqa = pq
else:
from cStringIO import StringIO
import pyquery
from httplib import HTTPConnection
from webob import Request, Response, exc
from pyquery import PyQuery as pq
from ajax import PyQuery as pqa
socket.setdefaulttimeout(1)
try:
conn = HTTPConnection("pyquery.org:80")
conn.request("GET", "/")
response = conn.getresponse()
except (socket.timeout, socket.error):
GOT_NET=False
else:
GOT_NET=True
def with_net(func):
if GOT_NET:
return func
def not_py3k(func):
if not PY3k:
return func
dirname = os.path.dirname(os.path.abspath(pyquery.__file__))
docs = os.path.join(os.path.dirname(dirname), 'docs')
path_to_html_file = os.path.join(dirname, 'test.html')
def input_app(environ, start_response):
resp = Response()
req = Request(environ)
if req.path_info == '/':
resp.body = '<input name="youyou" type="text" value="" />'
elif req.path_info == '/submit':
resp.body = '<input type="submit" value="OK" />'
else:
resp.body = ''
return resp(environ, start_response)
class TestReadme(doctest.DocFileCase):
path = os.path.join(dirname, '..', 'README.txt')
def __init__(self, *args, **kwargs):
parser = doctest.DocTestParser()
doc = open(self.path).read()
test = parser.get_doctest(doc, globals(), '', self.path, 0)
doctest.DocFileCase.__init__(self, test, optionflags=doctest.ELLIPSIS)
def setUp(self):
test = self._dt_test
test.globs.update(globals())
for filename in os.listdir(docs):
if filename.endswith('.txt'):
if not GOT_NET and filename in ('ajax.txt', 'tips.txt'):
continue
if PY3k and filename in ('ajax.txt',):
continue
klass_name = 'Test%s' % filename.replace('.txt', '').title()
path = os.path.join(docs, filename)
exec('%s = type("%s", (TestReadme,), dict(path=path))' % (klass_name, klass_name))
class TestTests(doctest.DocFileCase):
path = os.path.join(dirname, 'tests.txt')
def __init__(self, *args, **kwargs):
parser = doctest.DocTestParser()
doc = open(self.path).read()
test = parser.get_doctest(doc, globals(), '', self.path, 0)
doctest.DocFileCase.__init__(self, test, optionflags=doctest.ELLIPSIS)
class TestUnicode(unittest.TestCase):
@not_py3k
def test_unicode(self):
xml = pq(unicode("<p>é</p>", 'utf-8'))
self.assertEqual(unicode(xml), unicode("<p>é</p>", 'utf-8'))
self.assertEqual(type(xml.html()), unicode)
self.assertEqual(str(xml), '<p>&#233;</p>')
class TestSelector(unittest.TestCase):
klass = pq
html = """
<html>
<body>
<div>node1</div>
<div id="node2">node2</div>
<div class="node3">node3</div>
</body>
</html>
"""
html2 = """
<html>
<body>
<div>node1</div>
</body>
</html>
"""
html3 = """
<html>
<body>
<div>node1</div>
<div id="node2">node2</div>
<div class="node3">node3</div>
</body>
</html>
"""
html4 = """
<html>
<body>
<form action="/">
<input name="enabled" type="text" value="test"/>
<input name="disabled" type="text" value="disabled" disabled="disabled"/>
<input name="file" type="file" />
<select name="select">
<option value="">Choose something</option>
<option value="one">One</option>
<option value="two" selected="selected">Two</option>
<option value="three">Three</option>
</select>
<input name="radio" type="radio" value="one"/>
<input name="radio" type="radio" value="two" checked="checked"/>
<input name="radio" type="radio" value="three"/>
<input name="checkbox" type="checkbox" value="a"/>
<input name="checkbox" type="checkbox" value="b" checked="checked"/>
<input name="checkbox" type="checkbox" value="c"/>
<input name="button" type="button" value="button" />
<button>button</button>
</form>
</body>
</html>
"""
html5 = """
<html>
<body>
<h1>Heading 1</h1>
<h2>Heading 2</h2>
<h3>Heading 3</h3>
<h4>Heading 4</h4>
<h5>Heading 5</h5>
<h6>Heading 6</h6>
</body>
</html>
"""
@not_py3k
def test_get_root(self):
doc = pq('<?xml version="1.0" encoding="UTF-8"?><root><p/></root>')
self.assertEqual(isinstance(doc.root, etree._ElementTree), True)
self.assertEqual(doc.encoding, 'UTF-8')
def test_selector_from_doc(self):
doc = etree.fromstring(self.html)
assert len(self.klass(doc)) == 1
assert len(self.klass('div', doc)) == 3
assert len(self.klass('div#node2', doc)) == 1
def test_selector_from_html(self):
assert len(self.klass(self.html)) == 1
assert len(self.klass('div', self.html)) == 3
assert len(self.klass('div#node2', self.html)) == 1
def test_selector_from_obj(self):
e = self.klass(self.html)
assert len(e('div')) == 3
assert len(e('div#node2')) == 1
def test_selector_from_html_from_obj(self):
e = self.klass(self.html)
assert len(e('div', self.html2)) == 1
assert len(e('div#node2', self.html2)) == 0
def test_class(self):
e = self.klass(self.html)
assert isinstance(e, self.klass)
n = e('div', self.html2)
assert isinstance(n, self.klass)
assert n._parent is e
def test_pseudo_classes(self):
e = self.klass(self.html)
self.assertEqual(e('div:first').text(), 'node1')
self.assertEqual(e('div:last').text(), 'node3')
self.assertEqual(e('div:even').text(), 'node1 node3')
self.assertEqual(e('div div:even').text(), None)
self.assertEqual(e('body div:even').text(), 'node1 node3')
self.assertEqual(e('div:gt(0)').text(), 'node2 node3')
self.assertEqual(e('div:lt(1)').text(), 'node1')
self.assertEqual(e('div:eq(2)').text(), 'node3')
#test on the form
e = self.klass(self.html4)
assert len(e(':disabled')) == 1
assert len(e('input:enabled')) == 9
assert len(e(':selected')) == 1
assert len(e(':checked')) == 2
assert len(e(':file')) == 1
assert len(e(':input')) == 12
assert len(e(':button')) == 2
assert len(e(':radio')) == 3
assert len(e(':checkbox')) == 3
#test on other elements
e = self.klass(self.html5)
assert len(e(":header")) == 6
assert len(e(":parent")) == 2
assert len(e(":empty")) == 6
assert len(e(":contains('Heading')")) == 6
def test_on_the_fly_dom_creation(self):
e = self.klass(self.html)
assert e('<p>Hello world</p>').text() == 'Hello world'
assert e('').text() == None
class TestTraversal(unittest.TestCase):
klass = pq
html = """
<html>
<body>
<div id="node1"><span>node1</span></div>
<div id="node2" class="node3"><span>node2</span><span> booyah</span></div>
</body>
</html>
"""
def test_filter(self):
assert len(self.klass('div', self.html).filter('.node3')) == 1
assert len(self.klass('div', self.html).filter('#node2')) == 1
assert len(self.klass('div', self.html).filter(lambda i: i == 0)) == 1
d = pq('<p>Hello <b>warming</b> world</p>')
self.assertEqual(d('strong').filter(lambda el: True), [])
def test_not(self):
assert len(self.klass('div', self.html).not_('.node3')) == 1
def test_is(self):
assert self.klass('div', self.html).is_('.node3')
assert not self.klass('div', self.html).is_('.foobazbar')
def test_find(self):
assert len(self.klass('#node1', self.html).find('span')) == 1
assert len(self.klass('#node2', self.html).find('span')) == 2
assert len(self.klass('div', self.html).find('span')) == 3
def test_each(self):
doc = self.klass(self.html)
doc('span').each(lambda: doc(this).wrap("<em></em>"))
assert len(doc('em')) == 3
def test_map(self):
def ids_minus_one(i, elem):
return int(self.klass(elem).attr('id')[-1]) - 1
assert self.klass('div', self.html).map(ids_minus_one) == [0, 1]
d = pq('<p>Hello <b>warming</b> world</p>')
self.assertEqual(d('strong').map(lambda i,el: pq(this).text()), [])
def test_end(self):
assert len(self.klass('div', self.html).find('span').end()) == 2
assert len(self.klass('#node2', self.html).find('span').end()) == 1
def test_closest(self):
assert len(self.klass('#node1 span', self.html).closest('body')) == 1
assert self.klass('#node2', self.html).closest('.node3').attr('id') == 'node2'
assert self.klass('.node3', self.html).closest('form') == []
class TestOpener(unittest.TestCase):
def test_custom_opener(self):
def opener(url):
return '<html><body><div class="node"></div>'
doc = pq(url='http://example.com', opener=opener)
assert len(doc('.node')) == 1, doc
class TestCallback(unittest.TestCase):
html = """
<ol>
<li>Coffee</li>
<li>Tea</li>
<li>Milk</li>
</ol>
"""
def test_S_this_inside_callback(self):
S = pq(self.html)
self.assertEqual(S('li').map(lambda i, el: S(this).html()), ['Coffee', 'Tea', 'Milk'])
def test_parameterless_callback(self):
S = pq(self.html)
self.assertEqual(S('li').map(lambda: S(this).html()), ['Coffee', 'Tea', 'Milk'])
def application(environ, start_response):
req = Request(environ)
response = Response()
if req.method == 'GET':
response.body = '<pre>Yeah !</pre>'
else:
response.body = '<a href="/plop">Yeah !</a>'
return response(environ, start_response)
def secure_application(environ, start_response):
if 'REMOTE_USER' not in environ:
return exc.HTTPUnauthorized('vomis')(environ, start_response)
return application(environ, start_response)
class TestAjaxSelector(TestSelector):
klass = pqa
@not_py3k
@with_net
def test_proxy(self):
e = self.klass([])
val = e.get('http://pyquery.org/')
assert len(val('body')) == 1, (str(val.response), val)
@not_py3k
def test_get(self):
e = self.klass(app=application)
val = e.get('/')
assert len(val('pre')) == 1, val
@not_py3k
def test_secure_get(self):
e = self.klass(app=secure_application)
val = e.get('/', environ=dict(REMOTE_USER='gawii'))
assert len(val('pre')) == 1, val
val = e.get('/', REMOTE_USER='gawii')
assert len(val('pre')) == 1, val
@not_py3k
def test_secure_get_not_authorized(self):
e = self.klass(app=secure_application)
val = e.get('/')
assert len(val('pre')) == 0, val
@not_py3k
def test_post(self):
e = self.klass(app=application)
val = e.post('/')
assert len(val('a')) == 1, val
@not_py3k
def test_subquery(self):
e = self.klass(app=application)
n = e('div')
val = n.post('/')
assert len(val('a')) == 1, val
class TestManipulating(unittest.TestCase):
html = '''
<div class="portlet">
<a href="/toto">Test<img src ="myimage" />My link text</a>
<a href="/toto2"><img src ="myimage2" />My link text 2</a>
</div>
'''
def test_remove(self):
d = pq(self.html)
d('img').remove()
val = d('a:first').html()
assert val == 'Test My link text', repr(val)
val = d('a:last').html()
assert val == ' My link text 2', repr(val)
class TestHTMLParser(unittest.TestCase):
xml = "<div>I'm valid XML</div>"
html = '''
<div class="portlet">
<a href="/toto">TestimageMy link text</a>
<a href="/toto2">imageMy link text 2</a>
Behind you, a three-headed HTML&dash;Entity!
</div>
'''
def test_parser_persistance(self):
d = pq(self.xml, parser='xml')
self.assertRaises(etree.XMLSyntaxError, lambda: d.after(self.html))
d = pq(self.xml, parser='html')
d.after(self.html) # this should not fail
@not_py3k
def test_soup_parser(self):
d = pq('<meta><head><title>Hello</head><body onload=crash()>Hi all<p>', parser='soup')
self.assertEqual(str(d), '<html><meta/><head><title>Hello</title></head><body onload="crash()">Hi all<p/></body></html>')
def test_replaceWith(self):
expected = '''<div class="portlet">
<a href="/toto">TestimageMy link text</a>
<a href="/toto2">imageMy link text 2</a>
Behind you, a three-headed HTML&amp;dash;Entity!
</div>'''
d = pq(self.html)
d('img').replaceWith('image')
val = d.__html__()
assert val == expected, (repr(val), repr(expected))
def test_replaceWith_with_function(self):
expected = '''<div class="portlet">
TestimageMy link text
imageMy link text 2
Behind you, a three-headed HTML&amp;dash;Entity!
</div>'''
d = pq(self.html)
d('a').replaceWith(lambda i, e: pq(e).html())
val = d.__html__()
assert val == expected, (repr(val), repr(expected))
class TestWebScrapping(unittest.TestCase):
@with_net
def test_get(self):
d = pq('http://www.theonion.com/search/', {'q': 'inconsistency'}, method='get')
self.assertEqual(d('input[name=q]:last').val(), 'inconsistency')
self.assertEqual(d('.news-in-brief h3').text(), 'Slight Inconsistency Found In Bible')
@with_net
def test_post(self):
d = pq('http://www.theonion.com/search/', {'q': 'inconsistency'}, method='post')
self.assertEqual(d('input[name=q]:last').val(), '') # the onion does not search on post
if __name__ == '__main__':
fails, total = unittest.main()
if fails == 0:
print('OK')

Просмотреть файл

@ -0,0 +1,32 @@
Assume spaces normalization::
>>> pq('<ul> <li> </li> </ul>').text()
''
>>> print(pq('<ul> <li> toto </li> <li> tata </li> </ul>').text())
toto tata
Complex wrapping::
>>> d = pq('<div id="bouh"><span>youhou</span></div>')
>>> s = d('span')
>>> s is d
False
>>> s.wrap('<div><div id="wrapper"></div></div>')
[<div>]
We get the original doc with new node::
>>> print(d)
<div id="bouh"><div><div id="wrapper"><span>youhou</span></div></div></div>
Complex wrapAll::
>>> doc = pq('<div><span>Hey</span><span>you !</span></div>')
>>> s = doc('span')
>>> s.wrapAll('<div id="wrapper"></div>')
[<div#wrapper>]
>>> print(doc)
<div><div id="wrapper"><span>Hey</span><span>you !</span></div></div>