- add a build that dynamically pulls latest packages
- pin CDH in all other builds
- move more of setup to script so it's easier to test outside Travis
- move Hive warehouse to a persistent directory
- switch hive-cdh5 with hive-hadoop2, since the former is deprecated
- sleep after metastore installation to fix CDH 5.11 build
- fix compatibility with latest Presto
This commit is contained in:
Jing Wang 2017-04-22 13:24:17 -07:00
Родитель e3ecad1e41
Коммит c0e266b083
10 изменённых файлов: 76 добавлений и 47 удалений

Просмотреть файл

@ -3,26 +3,23 @@ language: python
matrix:
include:
# https://docs.python.org/devguide/#status-of-python-branches
# newest dependencies + a few python versions
# One build pulls latest versions dynamically
- python: 3.6
env: CDH=cdh5 PRESTO=0.147 SQLALCHEMY=1.0.12
env: CDH=cdh5 CDH_VERSION=5 PRESTO=RELEASE SQLALCHEMY=sqlalchemy
# Others use pinned versions.
- python: 3.5
env: CDH=cdh5 PRESTO=0.147 SQLALCHEMY=1.0.12
env: CDH=cdh5 CDH_VERSION=5.10.1 PRESTO=0.147 SQLALCHEMY=sqlalchemy==1.0.12
- python: 3.4
env: CDH=cdh5 PRESTO=0.147 SQLALCHEMY=1.0.12
env: CDH=cdh5 CDH_VERSION=5.10.1 PRESTO=0.147 SQLALCHEMY=sqlalchemy==1.0.12
- python: 2.7
env: CDH=cdh5 PRESTO=0.147 SQLALCHEMY=1.0.12
env: CDH=cdh5 CDH_VERSION=5.10.1 PRESTO=0.147 SQLALCHEMY=sqlalchemy==1.0.12
# stale stuff we're still using / supporting
- python: 2.7
env: CDH=cdh5 PRESTO=0.147 SQLALCHEMY=0.5.8
env: CDH=cdh5 CDH_VERSION=5.10.1 PRESTO=0.147 SQLALCHEMY=sqlalchemy==0.5.8
# exclude: python 3 against old libries
before_install:
install:
- ./scripts/travis-install.sh
- pip install codecov
install:
- pip install -e .
- pip install sqlalchemy==$SQLALCHEMY
- pip install -r dev_requirements.txt
# sleep so Presto has time to start up. Otherwise we might get 'No nodes available to run query'
script: sleep 10 && py.test -v
after_success: codecov

Просмотреть файл

@ -14,6 +14,7 @@ from sqlalchemy.sql import compiler
from sqlalchemy import exc
from sqlalchemy import types
from sqlalchemy import util
# TODO shouldn't use mysql type
from sqlalchemy.databases import mysql
from sqlalchemy.engine import default
import decimal

Просмотреть файл

@ -13,6 +13,8 @@ from pyhive.common import UniversalSet
from sqlalchemy import exc
from sqlalchemy import types
from sqlalchemy import util
# TODO shouldn't use mysql type
from sqlalchemy.databases import mysql
from sqlalchemy.engine import default
from sqlalchemy.sql import compiler
import re
@ -34,9 +36,12 @@ try:
except ImportError:
from sqlalchemy.databases.mysql import MSBigInteger as BigInteger
_type_map = {
'bigint': BigInteger,
'integer': types.Integer,
'boolean': types.Boolean,
'tinyint': mysql.MSTinyInteger,
'smallint': types.SmallInteger,
'integer': types.Integer,
'bigint': BigInteger,
'real': types.Float,
'double': types.Float,
'varchar': types.String,
'timestamp': types.TIMESTAMP,
@ -150,8 +155,16 @@ class PrestoDialect(default.DefaultDialect):
col_names = []
for row in rows:
part_key = 'Partition Key'
# Newer Presto moved this information from a column to the comment
if (part_key in row and row[part_key]) or row['Comment'].startswith(part_key):
# Presto puts this information in one of 3 places depending on version
# - a boolean column named "Partition Key"
# - a string in the "Comment" column
# - a string in the "Extra" column
is_partition_key = (
(part_key in row and row[part_key])
or row['Comment'].startswith(part_key)
or ('Extra' in row and 'partition key' in row['Extra'])
)
if is_partition_key:
col_names.append(row['Column'])
if col_names:
return [{'name': 'partition', 'column_names': col_names, 'unique': False}]

Просмотреть файл

@ -8,6 +8,7 @@ from __future__ import absolute_import
from __future__ import unicode_literals
import contextlib
import os
from pyhive import exc
from pyhive import presto
@ -44,13 +45,22 @@ class TestPresto(unittest.TestCase, DBAPITestCase):
for row in cursor.description:
description.append((row[0], row[1].replace('<', '(').replace('>', ')')) + row[2:])
# TODO Presto drops the union field
if os.environ.get('PRESTO') == '0.147':
tinyint_type = 'integer'
smallint_type = 'integer'
float_type = 'double'
else:
# some later version made these map to more specific types
tinyint_type = 'tinyint'
smallint_type = 'smallint'
float_type = 'real'
self.assertEqual(description, [
('boolean', 'boolean', None, None, None, None, True),
('tinyint', 'integer', None, None, None, None, True),
('smallint', 'integer', None, None, None, None, True),
('tinyint', tinyint_type, None, None, None, None, True),
('smallint', smallint_type, None, None, None, None, True),
('int', 'integer', None, None, None, None, True),
('bigint', 'bigint', None, None, None, None, True),
('float', 'double', None, None, None, None, True),
('float', float_type, None, None, None, None, True),
('double', 'double', None, None, None, None, True),
('string', 'varchar', None, None, None, None, True),
('timestamp', 'timestamp', None, None, None, None, True),
@ -90,7 +100,7 @@ class TestPresto(unittest.TestCase, DBAPITestCase):
"FROM many_rows a "
"CROSS JOIN many_rows b "
)
self.assertIn(cursor.poll()['stats']['state'], ('PLANNING', 'RUNNING'))
self.assertIn(cursor.poll()['stats']['state'], ('STARTING', 'PLANNING', 'RUNNING'))
cursor.cancel()
self.assertIsNone(cursor.poll())

Просмотреть файл

@ -15,7 +15,6 @@ from sqlalchemy.schema import Table
import contextlib
import datetime
import decimal
import os
import sqlalchemy.types
import unittest
@ -170,7 +169,7 @@ class TestSqlAlchemyHive(unittest.TestCase, SqlAlchemyTestCase):
expected = [(1,)]
self.assertEqual(result, expected)
@unittest.skipIf(os.environ.get('SQLALCHEMY') == '0.5.8', "not supported on old sqlalchemy")
@unittest.skipIf(sqlalchemy.__version__ == '0.5.8', "not supported on old sqlalchemy")
@with_engine_connection
def test_insert_values(self, engine, connection):
table = Table('insert_test', MetaData(bind=engine),

Просмотреть файл

@ -13,9 +13,14 @@
<name>fs.defaultFS</name>
<value>file:///</value>
</property>
<!--
TODO tests rely having result set column names unprefixed
This could be improved by having an option to strip out prefixes when it would not result in
ambiguity.
-->
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/tmp/hive/warehouse</value>
<name>hive.resultset.use.unique.column.names</name>
<value>false</value>
</property>
<property>
<name>hive.server2.authentication</name>
@ -33,8 +38,4 @@
<name>hive.server2.authentication.ldap.guidKey</name>
<value>cn</value>
</property>
<property>
<name>hive.resultset.use.unique.column.names</name>
<value>false</value>
</property>
</configuration>

Просмотреть файл

@ -13,10 +13,6 @@
<name>fs.defaultFS</name>
<value>file:///</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/tmp/hive/warehouse</value>
</property>
<!--
TODO tests rely having result set column names unprefixed
This could be improved by having an option to strip out prefixes when it would not result in

Просмотреть файл

@ -1,2 +1,2 @@
connector.name=hive-%CDH%
connector.name=hive-hadoop2
hive.metastore.uri=thrift://localhost:9083

Просмотреть файл

@ -1,17 +1,17 @@
#!/bin/bash -eux
# FIXME cdh 5.11 broken for some reason
echo 'deb [arch=amd64] https://archive.cloudera.com/cdh5/ubuntu/precise/amd64/cdh precise-cdh5.10.1 contrib
deb-src https://archive.cloudera.com/cdh5/ubuntu/precise/amd64/cdh precise-cdh5.10.1 contrib' | sudo tee /etc/apt/sources.list.d/cloudera.list
# sudo wget "http://archive.cloudera.com/$CDH/ubuntu/precise/amd64/cdh/cloudera.list" \
# -O /etc/apt/sources.list.d/cloudera.list
echo "deb [arch=amd64] https://archive.cloudera.com/${CDH}/ubuntu/precise/amd64/cdh precise-cdh${CDH_VERSION} contrib
deb-src https://archive.cloudera.com/${CDH}/ubuntu/precise/amd64/cdh precise-cdh${CDH_VERSION} contrib" | sudo tee /etc/apt/sources.list.d/cloudera.list
sudo apt-get update
sudo apt-get install -y oracle-java8-installer python-dev g++ libsasl2-dev maven
sudo update-java-alternatives -s java-8-oracle
#
# LDAP
#
sudo apt-get -y --no-install-suggests --no-install-recommends --force-yes install ldap-utils slapd
sudo mkdir /tmp/slapd
sudo mkdir -p /tmp/slapd
sudo slapd -f $(dirname $0)/ldap_config/slapd.conf -h ldap://localhost:3389 &
sleep 10
sudo ldapadd -h localhost:3389 -D cn=admin,dc=example,dc=com -w test -f $(dirname $0)/../pyhive/tests/ldif_data/base.ldif
@ -22,10 +22,13 @@ sudo ldapadd -h localhost:3389 -D cn=admin,dc=example,dc=com -w test -f $(dirnam
#
sudo apt-get install -y --force-yes hive
sudo mkdir -p /user/hive
sudo chown hive:hive /user/hive
sudo cp $(dirname $0)/travis-conf/hive/hive-site.xml /etc/hive/conf/hive-site.xml
sudo -u hive mkdir /tmp/hive && sudo chmod 777 /tmp/hive
sudo apt-get install -y --force-yes hive-metastore hive-server2
sleep 5
sudo -Eu hive $(dirname $0)/make_test_tables.sh
#
@ -33,13 +36,22 @@ sudo -Eu hive $(dirname $0)/make_test_tables.sh
#
sudo apt-get install -y python # Use python2 for presto server
sudo apt-get install -y oracle-java8-installer
sudo update-java-alternatives -s java-8-oracle
curl https://repo1.maven.org/maven2/com/facebook/presto/presto-server/$PRESTO/presto-server-$PRESTO.tar.gz \
| tar zxf -
mvn org.apache.maven.plugins:maven-dependency-plugin:3.0.0:copy \
-Dartifact=com.facebook.presto:presto-server:${PRESTO}:tar.gz \
-DoutputDirectory=.
tar -x -v -z -f presto-server-*.tar.gz
rm -rf presto-server
mv presto-server-*/ presto-server
cp -r $(dirname $0)/travis-conf/presto presto-server-$PRESTO/etc
sed -i s/%CDH%/$CDH/g presto-server-$PRESTO/etc/catalog/hive.properties
cp -r $(dirname $0)/travis-conf/presto presto-server/etc
/usr/bin/python2.7 presto-server-$PRESTO/bin/launcher.py start
/usr/bin/python2.7 presto-server/bin/launcher.py start
#
# Python
#
pip install $SQLALCHEMY
pip install -e .
pip install -r dev_requirements.txt

Просмотреть файл

@ -11,6 +11,6 @@ flake8-max-line-length = 100
flake8-ignore =
TCLIService/*.py ALL
pyhive/sqlalchemy_backports.py ALL
presto-server-*/** ALL
presto-server/** ALL
pyhive/hive.py F405
pyhive/presto.py F405