From c699187835aed9e924024edfd3d1099bbe00979d Mon Sep 17 00:00:00 2001 From: Arthur Wiedmer Date: Tue, 2 Jun 2015 14:14:11 -0700 Subject: [PATCH] Docs and docstrings edits --- airflow/hooks/presto_hook.py | 30 ++++++++++++++++++++---------- airflow/operators/hive_to_mysql.py | 2 +- docs/code.rst | 4 +++- docs/start.rst | 20 ++++++++++---------- 4 files changed, 34 insertions(+), 22 deletions(-) diff --git a/airflow/hooks/presto_hook.py b/airflow/hooks/presto_hook.py index b122776625..b1b40b0711 100644 --- a/airflow/hooks/presto_hook.py +++ b/airflow/hooks/presto_hook.py @@ -48,9 +48,11 @@ class PrestoHook(BaseHook): session.close() # currently only a pass in pyhive def get_cursor(self): - ''' + """ + Returns a cursor. - ''' + """ + return self.cursor @staticmethod @@ -58,9 +60,11 @@ class PrestoHook(BaseHook): return sql.strip().rstrip(';') def get_records(self, hql, parameters=None): - ''' + """ + Get a set of records from Presto - ''' + """ + try: self.cursor.execute(self._strip_sql(hql), parameters) records = self.cursor.fetchall() @@ -70,10 +74,12 @@ class PrestoHook(BaseHook): return records def get_first(self, hql, parameters=None): - ''' + """ + Returns only the first row, regardless of how many rows the query returns. - ''' + """ + try: self.cursor.execute(self._strip_sql(hql), parameters) record = self.cursor.fetchone() @@ -83,9 +89,11 @@ class PrestoHook(BaseHook): return record def get_pandas_df(self, hql, parameters=None): - ''' + """ + Get a pandas dataframe from a sql query. - ''' + """ + import pandas cursor = self.get_cursor() cursor.execute(self._strip_sql(hql), parameters) @@ -103,7 +111,9 @@ class PrestoHook(BaseHook): return df def run(self, hql, parameters=None): - ''' + """ + Execute the statement against Presto. Can be used to create views. - ''' + """ + self.cursor.execute(self._strip_sql(hql), parameters) diff --git a/airflow/operators/hive_to_mysql.py b/airflow/operators/hive_to_mysql.py index 54aa814fa9..775a1509b7 100644 --- a/airflow/operators/hive_to_mysql.py +++ b/airflow/operators/hive_to_mysql.py @@ -18,7 +18,7 @@ class HiveToMySqlTransfer(BaseOperator): :type mysql_table: str :param mysql_conn_id: source mysql connection :type mysql_conn_id: str - :param hive_conn_id: desctination hive connection + :param hive_conn_id: destination hive connection :type hive_conn_id: str :param mysql_preoperator: sql statement to run against mysql prior to import, typically use to truncate of delete in place of the data diff --git a/docs/code.rst b/docs/code.rst index f39e2cf1b2..11af35e741 100644 --- a/docs/code.rst +++ b/docs/code.rst @@ -30,6 +30,7 @@ There are 3 main types of operators: Hive2SambaOperator, HiveOperator, HivePartitionSensor, + HiveToMySqlTransfer, MySqlOperator, MySqlToHiveTransfer, PostgresOperator, @@ -37,6 +38,7 @@ There are 3 main types of operators: PrestoIntervalCheckOperator, PrestoValueCheckOperator, PythonOperator, + S3KeySensor, S3ToHiveTransfer, SqlSensor, SubDagOperator, @@ -108,7 +110,7 @@ Hooks ----- .. automodule:: airflow.hooks :show-inheritance: - :members: MySqlHook, PostgresHook, PrestoHook, HiveCliHook, HiveServer2Hook, HiveMetastoreHook, S3Hook + :members: MySqlHook, PostgresHook, PrestoHook, HiveCliHook, HiveServer2Hook, HiveMetastoreHook, S3Hook, SqliteHook Executors --------- diff --git a/docs/start.rst b/docs/start.rst index b23cb9cf3d..4d0f30084a 100644 --- a/docs/start.rst +++ b/docs/start.rst @@ -1,10 +1,10 @@ Quick Start ''''''''''' -The installation is quick and straightforward. +The installation is quick and straightforward. .. code-block:: bash - # airflow needs a home, ~/airflow is the default, + # airflow needs a home, ~/airflow is the default, # but you can lay foundation somewhere else if you prefer # (optional) export AIRFLOW_HOME=~/airflow @@ -18,28 +18,28 @@ The installation is quick and straightforward. # start the web server, default port is 8080 airflow webserver -p 8080 -Upon running these commands, airflow will create the ``$AIRFLOW_HOME`` folder +Upon running these commands, airflow will create the ``$AIRFLOW_HOME`` folder and lay a "airflow.cfg" files with defaults that get you going fast. You can -inspect the file either in ``$AIRFLOW_HOME/airflow.cfg``, or through the UI in +inspect the file either in ``$AIRFLOW_HOME/airflow.cfg``, or through the UI in the ``Admin->Configuration`` menu. -Out of the box, airflow uses a sqlite database, which you should outgrow +Out of the box, airflow uses a sqlite database, which you should outgrow fairly quickly since no parallelization is possible using this database -backend. It works in conjunction with the ``SequentialExecutor`` which will +backend. It works in conjunction with the ``SequentialExecutor`` which will only run task instances sequentially. While this is very limiting, it allows -you to get up and running quickly and take a tour of the UI and the +you to get up and running quickly and take a tour of the UI and the command line utilities. Here are a few commands that will trigger a few task instances. You should -be able to see the status of the jobs change in the ``example1`` DAG as you +be able to see the status of the jobs change in the ``example1`` DAG as you run the commands below. .. code-block:: bash # run your first task instance - airflow run example1 runme_0 2015-01-01 + airflow run example_bash_operator runme_0 2015-01-01 # run a backfill over 2 days - airflow backfill example1 -s 2015-01-01 -e 2015-01-02 + airflow backfill example_bash_operator -s 2015-01-01 -e 2015-01-02 From this point, you can move on to the :doc:`tutorial` section, and come back if/when you are ready to make your Airflow sandbox more of a serious