diff --git a/airflow/hooks/hdfs_hook.py b/airflow/hooks/hdfs_hook.py index f4f244631a..f02cc7cffc 100644 --- a/airflow/hooks/hdfs_hook.py +++ b/airflow/hooks/hdfs_hook.py @@ -47,11 +47,15 @@ class HDFSHook(BaseHook): if autoconfig: client = AutoConfigClient(effective_user=effective_user, use_sasl=use_sasl) else: + hdfs_namenode_principal = connections[0].extra_dejson.get('hdfs_namenode_principal') client = Client(connections[0].host, connections[0].port, - effective_user=effective_user, use_sasl=use_sasl) + effective_user=effective_user, use_sasl=use_sasl, + hdfs_namenode_principal=hdfs_namenode_principal) elif len(connections) > 1: + hdfs_namenode_principal = connections[0].extra_dejson.get('hdfs_namenode_principal') nn = [Namenode(conn.host, conn.port) for conn in connections] - client = HAClient(nn, effective_user=effective_user, use_sasl=use_sasl) + client = HAClient(nn, effective_user=effective_user, use_sasl=use_sasl, + hdfs_namenode_principal=hdfs_namenode_principal) else: raise HDFSHookException("conn_id doesn't exist in the repository") diff --git a/setup.py b/setup.py index 2b1b85f002..ca67bf5f5f 100644 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ gcp_api = [ 'google-api-python-client<=1.4.2', 'oauth2client>=1.5.2, <2.0.0', ] -hdfs = ['snakebite>=2.4.13'] +hdfs = ['snakebite>=2.7.8'] webhdfs = ['hdfs[dataframe,avro,kerberos]>=2.0.4'] hive = [ 'hive-thrift-py>=0.0.1', @@ -83,7 +83,10 @@ slack = ['slackclient>=1.0.0'] statsd = ['statsd>=3.0.1, <4.0'] vertica = ['vertica-python>=0.5.1'] ldap = ['ldap3>=0.9.9.1'] -kerberos = ['pykerberos>=1.1.8'] +kerberos = [ + 'pykerberos>=1.1.8', + 'snakebite[kerberos]>=2.7.8' +] password = [ 'bcrypt>=2.0.0', 'flask-bcrypt>=0.7.1',