[AIRFLOW-2412] Fix HiveCliHook.load_file to address HIVE-10541

HiveCliHook.load_file doesn't actually execute
LOAD DATA statement via beeline bundled with
Hive under 2.0 due to HIVE-10541.
This PR provides a workaround for this problem.

Closes #3327 from sekikn/AIRFLOW-2412
This commit is contained in:
Kengo Seki 2018-05-08 11:51:18 +02:00 коммит произвёл Fokko Driesprong
Родитель 088900ffb7
Коммит baf15e11a5
2 изменённых файлов: 30 добавлений и 1 удалений

Просмотреть файл

@ -420,6 +420,11 @@ class HiveCliHook(BaseHook):
pvals = ", ".join(
["{0}='{1}'".format(k, v) for k, v in partition.items()])
hql += "PARTITION ({pvals});"
# As a workaround for HIVE-10541, add a newline character
# at the end of hql (AIRFLOW-2412).
hql += '\n'
hql = hql.format(**locals())
self.log.info(hql)
self.run_cli(hql)

Просмотреть файл

@ -20,12 +20,14 @@
import datetime
import random
import mock
import unittest
from hmsclient import HMSClient
from airflow.exceptions import AirflowException
from airflow.hooks.hive_hooks import HiveMetastoreHook
from airflow.hooks.hive_hooks import HiveCliHook, HiveMetastoreHook
from airflow import DAG, configuration, operators
from airflow.utils import timezone
@ -82,6 +84,28 @@ class HiveEnvironmentTest(unittest.TestCase):
metastore.drop_table(self.database, self.table, deleteData=True)
class TestHiveCliHook(unittest.TestCase):
def test_run_cli(self):
hook = HiveCliHook()
hook.run_cli("SHOW DATABASES")
@mock.patch('airflow.hooks.hive_hooks.HiveCliHook.run_cli')
def test_load_file(self, mock_run_cli):
filepath = "/path/to/input/file"
table = "output_table"
hook = HiveCliHook()
hook.load_file(filepath=filepath, table=table, create=False)
query = (
"LOAD DATA LOCAL INPATH '{filepath}' "
"OVERWRITE INTO TABLE {table} \n"
.format(filepath=filepath, table=table)
)
mock_run_cli.assert_called_with(query)
class TestHiveMetastoreHook(HiveEnvironmentTest):
VALID_FILTER_MAP = {'key2': 'value2'}