2015-03-23 10:09:27 +03:00
|
|
|
======
|
2014-02-01 13:02:08 +04:00
|
|
|
PyHive
|
|
|
|
======
|
|
|
|
|
2014-02-02 01:37:29 +04:00
|
|
|
PyHive is a collection of Python `DB-API <http://www.python.org/dev/peps/pep-0249/>`_ and
|
|
|
|
`SQLAlchemy <http://www.sqlalchemy.org/>`_ interfaces for `Presto <http://prestodb.io/>`_ and
|
|
|
|
`Hive <http://hive.apache.org/>`_.
|
2014-02-01 13:02:08 +04:00
|
|
|
|
|
|
|
Usage
|
|
|
|
=====
|
|
|
|
|
2014-02-02 01:41:54 +04:00
|
|
|
DB-API
|
2014-03-29 05:10:36 +04:00
|
|
|
------
|
2014-02-02 01:37:29 +04:00
|
|
|
.. code-block:: python
|
2014-02-01 13:02:08 +04:00
|
|
|
|
2014-02-02 01:37:29 +04:00
|
|
|
from pyhive import presto
|
2014-02-01 13:02:08 +04:00
|
|
|
cursor = presto.connect('localhost').cursor()
|
2014-02-02 01:37:29 +04:00
|
|
|
cursor.execute('SELECT * FROM my_awesome_data LIMIT 10')
|
2014-02-01 13:02:08 +04:00
|
|
|
print cursor.fetchone()
|
|
|
|
print cursor.fetchall()
|
2016-05-19 15:16:13 +03:00
|
|
|
|
|
|
|
DB-API (asynchronous)
|
|
|
|
---------------------
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
from pyhive import hive
|
|
|
|
from TCLIService.ttypes import TOperationState
|
|
|
|
cursor = hive.connect('localhost').cursor()
|
|
|
|
cursor.execute('SELECT * FROM my_awesome_data LIMIT 10', async=True)
|
|
|
|
|
|
|
|
status = cursor.poll().operationState
|
|
|
|
while status in (TOperationState.INITIALIZED_STATE, TOperationState.RUNNING_STATE):
|
|
|
|
logs = cursor.fetch_logs()
|
|
|
|
for message in logs:
|
|
|
|
print message
|
|
|
|
|
|
|
|
# If needed, an asynchronous query can be cancelled at any time with:
|
|
|
|
# cursor.cancel()
|
|
|
|
|
|
|
|
status = cursor.poll().operationState
|
|
|
|
|
|
|
|
print cursor.fetchall()
|
2014-02-01 13:02:08 +04:00
|
|
|
|
2014-02-02 01:37:29 +04:00
|
|
|
SQLAlchemy
|
|
|
|
----------
|
2014-02-02 06:29:40 +04:00
|
|
|
First install this package to register it with SQLAlchemy (see ``setup.py``).
|
|
|
|
|
2014-02-02 01:37:29 +04:00
|
|
|
.. code-block:: python
|
2014-02-01 13:02:08 +04:00
|
|
|
|
2014-02-02 01:37:29 +04:00
|
|
|
from sqlalchemy import *
|
|
|
|
from sqlalchemy.engine import create_engine
|
|
|
|
from sqlalchemy.schema import *
|
2014-02-01 14:06:12 +04:00
|
|
|
engine = create_engine('presto://localhost:8080/hive/default')
|
2014-02-02 01:37:29 +04:00
|
|
|
logs = Table('my_awesome_data', MetaData(bind=engine), autoload=True)
|
|
|
|
print select([func.count('*')], from_obj=logs).scalar()
|
2014-02-01 13:02:08 +04:00
|
|
|
|
2015-03-23 10:09:27 +03:00
|
|
|
Note: query generation functionality is not exhaustive or fully tested, but there should be no
|
|
|
|
problem with raw SQL.
|
|
|
|
|
2016-03-31 06:01:54 +03:00
|
|
|
Passing session configuration
|
|
|
|
-----------------------------
|
2016-01-14 01:19:05 +03:00
|
|
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
# DB-API
|
|
|
|
hive.connect('localhost', configuration={'hive.exec.reducers.max': '123'})
|
2016-03-31 06:01:54 +03:00
|
|
|
presto.connect('localhost', session_props={'query_max_run_time': '1234m'})
|
2016-01-14 01:19:05 +03:00
|
|
|
# SQLAlchemy
|
|
|
|
create_engine(
|
|
|
|
'hive://user@host:10000/database',
|
|
|
|
connect_args={'configuration': {'hive.exec.reducers.max': '123'}},
|
|
|
|
)
|
|
|
|
|
2014-02-01 13:02:08 +04:00
|
|
|
Requirements
|
|
|
|
============
|
|
|
|
|
2016-01-14 21:57:54 +03:00
|
|
|
Install using
|
|
|
|
|
|
|
|
- ``pip install pyhive[hive]`` for the Hive interface and
|
|
|
|
- ``pip install pyhive[presto]`` for the Presto interface.
|
|
|
|
|
|
|
|
`PyHive` works with
|
2015-01-31 23:44:10 +03:00
|
|
|
|
2014-02-04 00:17:13 +04:00
|
|
|
- Python 2.7
|
2015-08-25 02:52:37 +03:00
|
|
|
- For Presto: Presto install
|
|
|
|
- For Hive: `HiveServer2 <https://cwiki.apache.org/confluence/display/Hive/Setting+up+HiveServer2>`_ daemon
|
2014-02-01 13:08:54 +04:00
|
|
|
|
2015-01-31 23:44:10 +03:00
|
|
|
There's also a `third party Conda package <https://binstar.org/blaze/pyhive>`_.
|
|
|
|
|
2016-06-02 04:16:04 +03:00
|
|
|
Changelog
|
|
|
|
=========
|
|
|
|
See https://github.com/dropbox/PyHive/releases.
|
|
|
|
|
2014-02-01 13:08:54 +04:00
|
|
|
Testing
|
|
|
|
=======
|
2015-09-21 07:19:57 +03:00
|
|
|
.. image:: https://travis-ci.org/dropbox/PyHive.svg
|
|
|
|
:target: https://travis-ci.org/dropbox/PyHive
|
|
|
|
.. image:: http://codecov.io/github/dropbox/PyHive/coverage.svg?branch=master
|
|
|
|
:target: http://codecov.io/github/dropbox/PyHive?branch=master
|
2014-02-01 13:08:54 +04:00
|
|
|
|
|
|
|
Run the following in an environment with Hive/Presto::
|
|
|
|
|
|
|
|
./scripts/make_test_tables.sh
|
2014-09-04 21:41:03 +04:00
|
|
|
virtualenv --no-site-packages env
|
2014-02-04 00:17:13 +04:00
|
|
|
source env/bin/activate
|
2015-03-23 07:22:22 +03:00
|
|
|
pip install -e .
|
2014-03-24 11:25:22 +04:00
|
|
|
pip install -r dev_requirements.txt
|
|
|
|
py.test
|
2014-02-02 01:37:29 +04:00
|
|
|
|
2014-09-04 21:41:03 +04:00
|
|
|
WARNING: This drops/creates tables named ``one_row``, ``one_row_complex``, and ``many_rows``, plus a
|
|
|
|
database called ``pyhive_test_database``.
|