[AIRFLOW-2063] Add missing docs for GCP
- Add missing operator in `code.rst` and `integration.rst` - Fix documentation in DataProc operator - Minor doc fix in GCS operators - Fixed codeblocks & links in docstrings for BigQuery, DataProc, DataFlow, MLEngine, GCS hooks & operators Closes #3003 from kaxil/doc_update
This commit is contained in:
Родитель
49ac26dad3
Коммит
f4e3e352e1
|
@ -66,7 +66,9 @@ class DatastoreHook(GoogleCloudBaseHook):
|
|||
def commit(self, body):
|
||||
"""
|
||||
Commit a transaction, optionally creating, deleting or modifying some entities.
|
||||
see https://cloud.google.com/datastore/docs/reference/rest/v1/projects/commit
|
||||
|
||||
.. seealso::
|
||||
https://cloud.google.com/datastore/docs/reference/rest/v1/projects/commit
|
||||
|
||||
:param body: the body of the commit request
|
||||
:return: the response body of the commit request
|
||||
|
@ -77,7 +79,10 @@ class DatastoreHook(GoogleCloudBaseHook):
|
|||
def lookup(self, keys, read_consistency=None, transaction=None):
|
||||
"""
|
||||
Lookup some entities by key
|
||||
see https://cloud.google.com/datastore/docs/reference/rest/v1/projects/lookup
|
||||
|
||||
.. seealso::
|
||||
https://cloud.google.com/datastore/docs/reference/rest/v1/projects/lookup
|
||||
|
||||
:param keys: the keys to lookup
|
||||
:param read_consistency: the read consistency to use. default, strong or eventual.
|
||||
Cannot be used with a transaction.
|
||||
|
@ -94,7 +99,10 @@ class DatastoreHook(GoogleCloudBaseHook):
|
|||
def rollback(self, transaction):
|
||||
"""
|
||||
Roll back a transaction
|
||||
see https://cloud.google.com/datastore/docs/reference/rest/v1/projects/rollback
|
||||
|
||||
.. seealso::
|
||||
https://cloud.google.com/datastore/docs/reference/rest/v1/projects/rollback
|
||||
|
||||
:param transaction: the transaction to roll back
|
||||
"""
|
||||
self.connection.projects().rollback(projectId=self.project_id, body={'transaction': transaction})\
|
||||
|
@ -103,7 +111,10 @@ class DatastoreHook(GoogleCloudBaseHook):
|
|||
def run_query(self, body):
|
||||
"""
|
||||
Run a query for entities.
|
||||
see https://cloud.google.com/datastore/docs/reference/rest/v1/projects/runQuery
|
||||
|
||||
.. seealso::
|
||||
https://cloud.google.com/datastore/docs/reference/rest/v1/projects/runQuery
|
||||
|
||||
:param body: the body of the query request
|
||||
:return: the batch of query results.
|
||||
"""
|
||||
|
|
|
@ -67,14 +67,14 @@ class MLEngineHook(GoogleCloudBaseHook):
|
|||
:type project_id: string
|
||||
|
||||
:param job: MLEngine Job object that should be provided to the MLEngine
|
||||
API, such as:
|
||||
{
|
||||
'jobId': 'my_job_id',
|
||||
'trainingInput': {
|
||||
'scaleTier': 'STANDARD_1',
|
||||
...
|
||||
}
|
||||
}
|
||||
API, such as: ::
|
||||
{
|
||||
'jobId': 'my_job_id',
|
||||
'trainingInput': {
|
||||
'scaleTier': 'STANDARD_1',
|
||||
...
|
||||
}
|
||||
}
|
||||
:type job: dict
|
||||
|
||||
:param use_existing_job_fn: In case that a MLEngine job with the same
|
||||
|
|
|
@ -231,7 +231,7 @@ class GoogleCloudStorageHook(GoogleCloudBaseHook):
|
|||
:param prefix: prefix string which filters objects whose name begin with this prefix
|
||||
:type prefix: string
|
||||
:param delimiter: filters objects based on the delimiter (for e.g '.csv')
|
||||
:type delimiter:string
|
||||
:type delimiter: string
|
||||
:return: a stream of object names matching the filtering criteria
|
||||
"""
|
||||
service = self.get_conn()
|
||||
|
@ -273,11 +273,12 @@ class GoogleCloudStorageHook(GoogleCloudBaseHook):
|
|||
def get_size(self, bucket, object):
|
||||
"""
|
||||
Gets the size of a file in Google Cloud Storage.
|
||||
|
||||
:param bucket: The Google cloud storage bucket where the object is.
|
||||
:type bucket: string
|
||||
:param object: The name of the object to check in the Google cloud
|
||||
storage bucket.
|
||||
:param object: The name of the object to check in the Google cloud storage bucket.
|
||||
:type object: string
|
||||
|
||||
"""
|
||||
self.log.info('Checking the file size of object: %s in bucket: %s', object, bucket)
|
||||
service = self.get_conn()
|
||||
|
@ -290,7 +291,7 @@ class GoogleCloudStorageHook(GoogleCloudBaseHook):
|
|||
if 'name' in response and response['name'][-1] != '/':
|
||||
# Remove Directories & Just check size of files
|
||||
size = response['size']
|
||||
self.log.info('The file size of %s is %s', object, size)
|
||||
self.log.info('The file size of %s is %s bytes.', object, size)
|
||||
return size
|
||||
else:
|
||||
raise ValueError('Object is not a file')
|
||||
|
@ -301,6 +302,7 @@ class GoogleCloudStorageHook(GoogleCloudBaseHook):
|
|||
def get_crc32c(self, bucket, object):
|
||||
"""
|
||||
Gets the CRC32c checksum of an object in Google Cloud Storage.
|
||||
|
||||
:param bucket: The Google cloud storage bucket where the object is.
|
||||
:type bucket: string
|
||||
:param object: The name of the object to check in the Google cloud
|
||||
|
@ -327,6 +329,7 @@ class GoogleCloudStorageHook(GoogleCloudBaseHook):
|
|||
def get_md5hash(self, bucket, object):
|
||||
"""
|
||||
Gets the MD5 hash of an object in Google Cloud Storage.
|
||||
|
||||
:param bucket: The Google cloud storage bucket where the object is.
|
||||
:type bucket: string
|
||||
:param object: The name of the object to check in the Google cloud
|
||||
|
|
|
@ -93,10 +93,10 @@ class BigQueryIntervalCheckOperator(IntervalCheckOperator):
|
|||
Checks that the values of metrics given as SQL expressions are within
|
||||
a certain tolerance of the ones from days_back before.
|
||||
|
||||
This method constructs a query like so:
|
||||
This method constructs a query like so ::
|
||||
|
||||
SELECT {metrics_threshold_dict_key} FROM {table}
|
||||
WHERE {date_filter_column}=<date>
|
||||
SELECT {metrics_threshold_dict_key} FROM {table}
|
||||
WHERE {date_filter_column}=<date>
|
||||
|
||||
:param table: the table name
|
||||
:type table: str
|
||||
|
|
|
@ -26,25 +26,26 @@ class BigQueryGetDataOperator(BaseOperator):
|
|||
be equal to the number of rows fetched. Each element in the list will again be a list
|
||||
where element would represent the columns values for that row.
|
||||
|
||||
Example Result: [['Tony', '10'], ['Mike', '20'], ['Steve', '15']]
|
||||
**Example Result**: ``[['Tony', '10'], ['Mike', '20'], ['Steve', '15']]``
|
||||
|
||||
Note: If you pass fields to `selected_fields` which are in different order than the
|
||||
order of columns already in
|
||||
BQ table, the data will still be in the order of BQ table.
|
||||
For example if the BQ table has 3 columns as
|
||||
[A,B,C] and you pass 'B,A' in the `selected_fields`
|
||||
the data would still be of the form 'A,B'.
|
||||
.. note::
|
||||
If you pass fields to ``selected_fields`` which are in different order than the
|
||||
order of columns already in
|
||||
BQ table, the data will still be in the order of BQ table.
|
||||
For example if the BQ table has 3 columns as
|
||||
``[A,B,C]`` and you pass 'B,A' in the ``selected_fields``
|
||||
the data would still be of the form ``'A,B'``.
|
||||
|
||||
Example:
|
||||
**Example**: ::
|
||||
|
||||
get_data = BigQueryGetDataOperator(
|
||||
task_id='get_data_from_bq',
|
||||
dataset_id='test_dataset',
|
||||
table_id='Transaction_partitions',
|
||||
max_results='100',
|
||||
# selected_fields='DATE',
|
||||
bigquery_conn_id='airflow-service-account'
|
||||
)
|
||||
get_data = BigQueryGetDataOperator(
|
||||
task_id='get_data_from_bq',
|
||||
dataset_id='test_dataset',
|
||||
table_id='Transaction_partitions',
|
||||
max_results='100',
|
||||
selected_fields='DATE',
|
||||
bigquery_conn_id='airflow-service-account'
|
||||
)
|
||||
|
||||
:param dataset_id: The dataset ID of the requested table.
|
||||
:type destination_dataset_table: string
|
||||
|
|
|
@ -19,11 +19,11 @@ from airflow.utils.decorators import apply_defaults
|
|||
|
||||
class BigQueryToBigQueryOperator(BaseOperator):
|
||||
"""
|
||||
Copies data from one BigQuery table to another. See here:
|
||||
Copies data from one BigQuery table to another.
|
||||
|
||||
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy
|
||||
|
||||
For more details about these parameters.
|
||||
.. seealso::
|
||||
For more details about these parameters:
|
||||
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy
|
||||
|
||||
:param source_project_dataset_tables: One or more
|
||||
dotted (project:|project.)<dataset>.<table> BigQuery tables to use as the
|
||||
|
|
|
@ -21,11 +21,9 @@ class BigQueryToCloudStorageOperator(BaseOperator):
|
|||
"""
|
||||
Transfers a BigQuery table to a Google Cloud Storage bucket.
|
||||
|
||||
See here:
|
||||
|
||||
https://cloud.google.com/bigquery/docs/reference/v2/jobs
|
||||
|
||||
For more details about these parameters.
|
||||
.. seealso::
|
||||
For more details about these parameters:
|
||||
https://cloud.google.com/bigquery/docs/reference/v2/jobs
|
||||
|
||||
:param source_project_dataset_table: The dotted
|
||||
(<project>.|<project>:)<dataset>.<table> BigQuery table to use as the source
|
||||
|
|
|
@ -31,34 +31,33 @@ class DataFlowJavaOperator(BaseOperator):
|
|||
It's a good practice to define dataflow_* parameters in the default_args of the dag
|
||||
like the project, zone and staging location.
|
||||
|
||||
```
|
||||
default_args = {
|
||||
'dataflow_default_options': {
|
||||
'project': 'my-gcp-project',
|
||||
'zone': 'europe-west1-d',
|
||||
'stagingLocation': 'gs://my-staging-bucket/staging/'
|
||||
.. code-block:: python
|
||||
|
||||
default_args = {
|
||||
'dataflow_default_options': {
|
||||
'project': 'my-gcp-project',
|
||||
'zone': 'europe-west1-d',
|
||||
'stagingLocation': 'gs://my-staging-bucket/staging/'
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You need to pass the path to your dataflow as a file reference with the ``jar``
|
||||
parameter, the jar needs to be a self executing jar. Use ``options`` to pass on
|
||||
options to your job.
|
||||
|
||||
```
|
||||
t1 = DataFlowOperation(
|
||||
task_id='datapflow_example',
|
||||
jar='{{var.value.gcp_dataflow_base}}pipeline/build/libs/pipeline-example-1.0.jar',
|
||||
options={
|
||||
'autoscalingAlgorithm': 'BASIC',
|
||||
'maxNumWorkers': '50',
|
||||
'start': '{{ds}}',
|
||||
'partitionType': 'DAY',
|
||||
'labels': {'foo' : 'bar'}
|
||||
},
|
||||
gcp_conn_id='gcp-airflow-service-account',
|
||||
dag=my-dag)
|
||||
```
|
||||
.. code-block:: python
|
||||
t1 = DataFlowOperation(
|
||||
task_id='datapflow_example',
|
||||
jar='{{var.value.gcp_dataflow_base}}pipeline/build/libs/pipeline-example-1.0.jar',
|
||||
options={
|
||||
'autoscalingAlgorithm': 'BASIC',
|
||||
'maxNumWorkers': '50',
|
||||
'start': '{{ds}}',
|
||||
'partitionType': 'DAY',
|
||||
'labels': {'foo' : 'bar'}
|
||||
},
|
||||
gcp_conn_id='gcp-airflow-service-account',
|
||||
dag=my-dag)
|
||||
|
||||
Both ``jar`` and ``options`` are templated so you can use variables in them.
|
||||
"""
|
||||
|
@ -84,9 +83,10 @@ class DataFlowJavaOperator(BaseOperator):
|
|||
high-level options, for instances, project and zone information, which
|
||||
apply to all dataflow operators in the DAG.
|
||||
|
||||
For more detail on job submission have a look at the reference:
|
||||
|
||||
https://cloud.google.com/dataflow/pipelines/specifying-exec-params
|
||||
.. seealso::
|
||||
For more detail on job submission have a look at the reference:
|
||||
https://cloud.google.com/dataflow/pipelines/specifying-exec-params
|
||||
|
||||
:param jar: The reference to a self executing DataFlow jar.
|
||||
:type jar: string
|
||||
|
@ -144,33 +144,37 @@ class DataflowTemplateOperator(BaseOperator):
|
|||
will be passed to the job.
|
||||
It's a good practice to define dataflow_* parameters in the default_args of the dag
|
||||
like the project, zone and staging location.
|
||||
https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters
|
||||
https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment
|
||||
```
|
||||
default_args = {
|
||||
'dataflow_default_options': {
|
||||
'project': 'my-gcp-project'
|
||||
'zone': 'europe-west1-d',
|
||||
'tempLocation': 'gs://my-staging-bucket/staging/'
|
||||
|
||||
.. seealso::
|
||||
https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters
|
||||
https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment
|
||||
|
||||
.. code-block:: python
|
||||
default_args = {
|
||||
'dataflow_default_options': {
|
||||
'project': 'my-gcp-project'
|
||||
'zone': 'europe-west1-d',
|
||||
'tempLocation': 'gs://my-staging-bucket/staging/'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You need to pass the path to your dataflow template as a file reference with the
|
||||
``template`` parameter. Use ``parameters`` to pass on parameters to your job.
|
||||
Use ``environment`` to pass on runtime environment variables to your job.
|
||||
```
|
||||
t1 = DataflowTemplateOperator(
|
||||
task_id='datapflow_example',
|
||||
template='{{var.value.gcp_dataflow_base}}',
|
||||
parameters={
|
||||
'inputFile': "gs://bucket/input/my_input.txt",
|
||||
'outputFile': "gs://bucket/output/my_output.txt"
|
||||
},
|
||||
gcp_conn_id='gcp-airflow-service-account',
|
||||
dag=my-dag)
|
||||
```
|
||||
``template`` ``dataflow_default_options`` and ``parameters`` are templated so you can
|
||||
|
||||
.. code-block:: python
|
||||
t1 = DataflowTemplateOperator(
|
||||
task_id='datapflow_example',
|
||||
template='{{var.value.gcp_dataflow_base}}',
|
||||
parameters={
|
||||
'inputFile': "gs://bucket/input/my_input.txt",
|
||||
'outputFile': "gs://bucket/output/my_output.txt"
|
||||
},
|
||||
gcp_conn_id='gcp-airflow-service-account',
|
||||
dag=my-dag)
|
||||
|
||||
``template``, ``dataflow_default_options`` and ``parameters`` are templated so you can
|
||||
use variables in them.
|
||||
"""
|
||||
template_fields = ['parameters', 'dataflow_default_options', 'template']
|
||||
|
@ -191,11 +195,14 @@ class DataflowTemplateOperator(BaseOperator):
|
|||
Create a new DataflowTemplateOperator. Note that
|
||||
dataflow_default_options is expected to save high-level options
|
||||
for project information, which apply to all dataflow operators in the DAG.
|
||||
https://cloud.google.com/dataflow/docs/reference/rest/v1b3
|
||||
/LaunchTemplateParameters
|
||||
https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment
|
||||
For more detail on job template execution have a look at the reference:
|
||||
https://cloud.google.com/dataflow/docs/templates/executing-templates
|
||||
|
||||
.. seealso::
|
||||
https://cloud.google.com/dataflow/docs/reference/rest/v1b3
|
||||
/LaunchTemplateParameters
|
||||
https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment
|
||||
For more detail on job template execution have a look at the reference:
|
||||
https://cloud.google.com/dataflow/docs/templates/executing-templates
|
||||
|
||||
:param template: The reference to the DataFlow template.
|
||||
:type template: string
|
||||
:param dataflow_default_options: Map of default job environment options.
|
||||
|
@ -258,9 +265,9 @@ class DataFlowPythonOperator(BaseOperator):
|
|||
high-level options, for instances, project and zone information, which
|
||||
apply to all dataflow operators in the DAG.
|
||||
|
||||
For more detail on job submission have a look at the reference:
|
||||
|
||||
https://cloud.google.com/dataflow/pipelines/specifying-exec-params
|
||||
.. seealso::
|
||||
For more detail on job submission have a look at the reference:
|
||||
https://cloud.google.com/dataflow/pipelines/specifying-exec-params
|
||||
|
||||
:param py_file: Reference to the python dataflow pipleline file.py, e.g.,
|
||||
/some/local/file/path/to/your/python/pipeline/file.
|
||||
|
|
|
@ -63,7 +63,7 @@ class DataprocClusterCreateOperator(BaseOperator):
|
|||
:param master_machine_type: Compute engine machine type to use for the master node
|
||||
:type master_machine_type: string
|
||||
:param master_disk_size: Disk size for the master node
|
||||
:type int
|
||||
:type master_disk_size: int
|
||||
:param worker_machine_type:Compute engine machine type to use for the worker nodes
|
||||
:type worker_machine_type: string
|
||||
:param worker_disk_size: Disk size for the worker nodes
|
||||
|
@ -398,31 +398,31 @@ class DataProcPigOperator(BaseOperator):
|
|||
It's a good practice to define dataproc_* parameters in the default_args of the dag
|
||||
like the cluster name and UDFs.
|
||||
|
||||
```
|
||||
default_args = {
|
||||
'cluster_name': 'cluster-1',
|
||||
'dataproc_pig_jars': [
|
||||
'gs://example/udf/jar/datafu/1.2.0/datafu.jar',
|
||||
'gs://example/udf/jar/gpig/1.2/gpig.jar'
|
||||
]
|
||||
}
|
||||
```
|
||||
.. code-block:: python
|
||||
|
||||
default_args = {
|
||||
'cluster_name': 'cluster-1',
|
||||
'dataproc_pig_jars': [
|
||||
'gs://example/udf/jar/datafu/1.2.0/datafu.jar',
|
||||
'gs://example/udf/jar/gpig/1.2/gpig.jar'
|
||||
]
|
||||
}
|
||||
|
||||
You can pass a pig script as string or file reference. Use variables to pass on
|
||||
variables for the pig script to be resolved on the cluster or use the parameters to
|
||||
be resolved in the script as template parameters.
|
||||
|
||||
```
|
||||
t1 = DataProcPigOperator(
|
||||
task_id='dataproc_pig',
|
||||
query='a_pig_script.pig',
|
||||
variables={'out': 'gs://example/output/{{ds}}'},
|
||||
dag=dag)
|
||||
```
|
||||
**Example**: ::
|
||||
|
||||
For more detail on about job submission have a look at the reference:
|
||||
t1 = DataProcPigOperator(
|
||||
task_id='dataproc_pig',
|
||||
query='a_pig_script.pig',
|
||||
variables={'out': 'gs://example/output/{{ds}}'},
|
||||
dag=dag)
|
||||
|
||||
https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs
|
||||
.. seealso::
|
||||
For more detail on about job submission have a look at the reference:
|
||||
https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs
|
||||
|
||||
:param query: The query or reference to the query file (pg or pig extension).
|
||||
:type query: string
|
||||
|
@ -967,8 +967,9 @@ class DataprocWorkflowTemplateInstantiateOperator(DataprocWorkflowTemplateBaseOp
|
|||
Instantiate a WorkflowTemplate on Google Cloud Dataproc. The operator will wait
|
||||
until the WorkflowTemplate is finished executing.
|
||||
|
||||
Please refer to:
|
||||
https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiate
|
||||
.. seealso::
|
||||
Please refer to:
|
||||
https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiate
|
||||
|
||||
:param template_id: The id of the template.
|
||||
:type template_id: string
|
||||
|
@ -1008,8 +1009,9 @@ class DataprocWorkflowTemplateInstantiateInlineOperator(
|
|||
Instantiate a WorkflowTemplate Inline on Google Cloud Dataproc. The operator will
|
||||
wait until the WorkflowTemplate is finished executing.
|
||||
|
||||
Please refer to:
|
||||
https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiateInline
|
||||
.. seealso::
|
||||
Please refer to:
|
||||
https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiateInline
|
||||
|
||||
:param template: The template contents.
|
||||
:type template: map
|
||||
|
|
|
@ -42,18 +42,19 @@ class GoogleCloudStorageCopyOperator(BaseOperator):
|
|||
For this to work, the service account making the request must have domain-wide delegation enabled.
|
||||
:type delegate_to: string
|
||||
|
||||
Example: The following Operator would move all the CSV files from `sales/sales-2017` folder in `data` bucket to
|
||||
`sales` folder in `archive` bucket.
|
||||
**Example**:
|
||||
The following Operator would move all the CSV files from `sales/sales-2017` folder in
|
||||
`data` bucket to `sales` folder in `archive` bucket. ::
|
||||
|
||||
move_file = GoogleCloudStorageCopyOperator(
|
||||
task_id='move_file',
|
||||
source_bucket='data',
|
||||
source_object='sales/sales-2017/',
|
||||
source_files_delimiter='.csv'
|
||||
destination_bucket='archive',
|
||||
destination_directory='sales',
|
||||
google_cloud_storage_conn_id='airflow-service-account'
|
||||
)
|
||||
move_file = GoogleCloudStorageCopyOperator(
|
||||
task_id='move_file',
|
||||
source_bucket='data',
|
||||
source_object='sales/sales-2017/',
|
||||
source_files_delimiter='.csv'
|
||||
destination_bucket='archive',
|
||||
destination_directory='sales',
|
||||
google_cloud_storage_conn_id='airflow-service-account'
|
||||
)
|
||||
"""
|
||||
template_fields = ('source_bucket', 'source_object', 'source_files_delimiter',
|
||||
'destination_bucket', 'destination_directory')
|
||||
|
|
|
@ -40,16 +40,17 @@ class GoogleCloudStorageListOperator(BaseOperator):
|
|||
domain-wide delegation enabled.
|
||||
:type delegate_to: string
|
||||
|
||||
Example: The following Operator would list all the Avro files from `sales/sales-2017`
|
||||
folder in `data` bucket.
|
||||
**Example**:
|
||||
The following Operator would list all the Avro files from `sales/sales-2017`
|
||||
folder in `data` bucket. ::
|
||||
|
||||
GCS_Files = GoogleCloudStorageListOperator(
|
||||
task_id='GCS_Files',
|
||||
bucket='data',
|
||||
prefix='sales/sales-2017/',
|
||||
delimiter='.avro',
|
||||
google_cloud_storage_conn_id=google_cloud_conn_id
|
||||
)
|
||||
GCS_Files = GoogleCloudStorageListOperator(
|
||||
task_id='GCS_Files',
|
||||
bucket='data',
|
||||
prefix='sales/sales-2017/',
|
||||
delimiter='.avro',
|
||||
google_cloud_storage_conn_id=google_cloud_conn_id
|
||||
)
|
||||
"""
|
||||
template_fields = ('bucket', 'prefix', 'delimiter')
|
||||
ui_color = '#f0eee4'
|
||||
|
|
|
@ -120,11 +120,15 @@ class MLEngineBatchPredictionOperator(BaseOperator):
|
|||
|
||||
In options 2 and 3, both model and version name should contain the
|
||||
minimal identifier. For instance, call
|
||||
|
||||
::
|
||||
|
||||
MLEngineBatchPredictionOperator(
|
||||
...,
|
||||
model_name='my_model',
|
||||
version_name='my_version',
|
||||
...)
|
||||
|
||||
if the desired model version is
|
||||
"projects/my_project/models/my_model/versions/my_version".
|
||||
|
||||
|
@ -189,7 +193,7 @@ class MLEngineBatchPredictionOperator(BaseOperator):
|
|||
:type delegate_to: string
|
||||
|
||||
Raises:
|
||||
ValueError: if a unique model/version origin cannot be determined.
|
||||
``ValueError``: if a unique model/version origin cannot be determined.
|
||||
"""
|
||||
|
||||
template_fields = [
|
||||
|
@ -346,23 +350,23 @@ class MLEngineVersionOperator(BaseOperator):
|
|||
:type version: dict
|
||||
|
||||
:param operation: The operation to perform. Available operations are:
|
||||
'create': Creates a new version in the model specified by `model_name`,
|
||||
* ``create``: Creates a new version in the model specified by `model_name`,
|
||||
in which case the `version` parameter should contain all the
|
||||
information to create that version
|
||||
(e.g. `name`, `deploymentUrl`).
|
||||
'get': Gets full information of a particular version in the model
|
||||
* ``get``: Gets full information of a particular version in the model
|
||||
specified by `model_name`.
|
||||
The name of the version should be specified in the `version`
|
||||
parameter.
|
||||
|
||||
'list': Lists all available versions of the model specified
|
||||
* ``list``: Lists all available versions of the model specified
|
||||
by `model_name`.
|
||||
|
||||
'delete': Deletes the version specified in `version` parameter from the
|
||||
* ``delete``: Deletes the version specified in `version` parameter from the
|
||||
model specified by `model_name`).
|
||||
The name of the version should be specified in the `version`
|
||||
parameter.
|
||||
:type operation: string
|
||||
:type operation: string
|
||||
|
||||
:param gcp_conn_id: The connection ID to use when fetching connection info.
|
||||
:type gcp_conn_id: string
|
||||
|
|
|
@ -94,14 +94,36 @@ Community-contributed Operators
|
|||
.. deprecated:: 1.8
|
||||
Use :code:`from airflow.operators.bash_operator import BashOperator` instead.
|
||||
|
||||
.. autoclass:: airflow.contrib.operators.bigquery_check_operator.BigQueryCheckOperator
|
||||
.. autoclass:: airflow.contrib.operators.bigquery_check_operator.BigQueryValueCheckOperator
|
||||
.. autoclass:: airflow.contrib.operators.bigquery_check_operator.BigQueryIntervalCheckOperator
|
||||
.. autoclass:: airflow.contrib.operators.bigquery_get_data.BigQueryGetDataOperator
|
||||
.. autoclass:: airflow.contrib.operators.bigquery_operator.BigQueryOperator
|
||||
.. autoclass:: airflow.contrib.operators.bigquery_table_delete_operator.BigQueryTableDeleteOperator
|
||||
.. autoclass:: airflow.contrib.operators.bigquery_to_bigquery.BigQueryToBigQueryOperator
|
||||
.. autoclass:: airflow.contrib.operators.bigquery_to_gcs.BigQueryToCloudStorageOperator
|
||||
.. autoclass:: airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator
|
||||
.. autoclass:: airflow.contrib.operators.dataflow_operator.DataFlowJavaOperator
|
||||
.. autoclass:: airflow.contrib.operators.dataflow_operator.DataflowTemplateOperator
|
||||
.. autoclass:: airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator
|
||||
.. autoclass:: airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator
|
||||
.. autoclass:: airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator
|
||||
.. autoclass:: airflow.contrib.operators.dataproc_operator.DataProcPigOperator
|
||||
.. autoclass:: airflow.contrib.operators.dataproc_operator.DataProcHiveOperator
|
||||
.. autoclass:: airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator
|
||||
.. autoclass:: airflow.contrib.operators.dataproc_operator.DataProcSparkOperator
|
||||
.. autoclass:: airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator
|
||||
.. autoclass:: airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator
|
||||
.. autoclass:: airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateOperator
|
||||
.. autoclass:: airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateInlineOperator
|
||||
.. autoclass:: airflow.contrib.operators.datastore_export_operator.DatastoreExportOperator
|
||||
.. autoclass:: airflow.contrib.operators.datastore_import_operator.DatastoreImportOperator
|
||||
.. autoclass:: airflow.contrib.operators.ecs_operator.ECSOperator
|
||||
.. autoclass:: airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator
|
||||
.. autoclass:: airflow.contrib.operators.file_to_wasb.FileToWasbOperator
|
||||
.. autoclass:: airflow.contrib.operators.gcs_copy_operator.GoogleCloudStorageCopyOperator
|
||||
.. autoclass:: airflow.contrib.operators.gcs_download_operator.GoogleCloudStorageDownloadOperator
|
||||
.. autoclass:: airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator
|
||||
.. autoclass:: airflow.contrib.operators.gcs_to_gcs.GoogleCloudStorageToGoogleCloudStorageOperator
|
||||
.. autoclass:: airflow.contrib.operators.pubsub_operator.PubSubTopicCreateOperator
|
||||
.. autoclass:: airflow.contrib.operators.pubsub_operator.PubSubTopicDeleteOperator
|
||||
|
|
|
@ -62,8 +62,8 @@ Your reverse proxy (ex: nginx) should be configured as follow:
|
|||
proxy_set_header Connection "upgrade";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
.. _Azure:
|
||||
|
||||
Azure: Microsoft Azure
|
||||
|
@ -374,6 +374,13 @@ BigQueryIntervalCheckOperator
|
|||
|
||||
.. autoclass:: airflow.contrib.operators.bigquery_check_operator.BigQueryIntervalCheckOperator
|
||||
|
||||
.. _BigQueryGetDataOperator:
|
||||
|
||||
BigQueryGetDataOperator
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. autoclass:: airflow.contrib.operators.bigquery_get_data.BigQueryGetDataOperator
|
||||
|
||||
.. _BigQueryOperator:
|
||||
|
||||
BigQueryOperator
|
||||
|
@ -381,6 +388,13 @@ BigQueryOperator
|
|||
|
||||
.. autoclass:: airflow.contrib.operators.bigquery_operator.BigQueryOperator
|
||||
|
||||
.. _BigQueryTableDeleteOperator:
|
||||
|
||||
BigQueryTableDeleteOperator
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. autoclass:: airflow.contrib.operators.bigquery_table_delete_operator.BigQueryTableDeleteOperator
|
||||
|
||||
.. _BigQueryToBigQueryOperator:
|
||||
|
||||
BigQueryToBigQueryOperator
|
||||
|
@ -410,6 +424,7 @@ DataFlow Operators
|
|||
""""""""""""""""""
|
||||
|
||||
- :ref:`DataFlowJavaOperator` : launching Cloud Dataflow jobs written in Java.
|
||||
- :ref:`DataflowTemplateOperator` : launching a templated Cloud DataFlow batch job.
|
||||
- :ref:`DataFlowPythonOperator` : launching Cloud Dataflow jobs written in python.
|
||||
|
||||
.. _DataFlowJavaOperator:
|
||||
|
@ -453,6 +468,13 @@ DataFlowJavaOperator
|
|||
},
|
||||
dag=dag)
|
||||
|
||||
.. _DataflowTemplateOperator:
|
||||
|
||||
DataflowTemplateOperator
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. autoclass:: airflow.contrib.operators.dataflow_operator.DataflowTemplateOperator
|
||||
|
||||
.. _DataFlowPythonOperator:
|
||||
|
||||
DataFlowPythonOperator
|
||||
|
@ -475,12 +497,30 @@ Cloud DataProc
|
|||
DataProc Operators
|
||||
""""""""""""""""""
|
||||
|
||||
- :ref:`DataprocClusterCreateOperator` : Create a new cluster on Google Cloud Dataproc.
|
||||
- :ref:`DataprocClusterDeleteOperator` : Delete a cluster on Google Cloud Dataproc.
|
||||
- :ref:`DataProcPigOperator` : Start a Pig query Job on a Cloud DataProc cluster.
|
||||
- :ref:`DataProcHiveOperator` : Start a Hive query Job on a Cloud DataProc cluster.
|
||||
- :ref:`DataProcSparkSqlOperator` : Start a Spark SQL query Job on a Cloud DataProc cluster.
|
||||
- :ref:`DataProcSparkOperator` : Start a Spark Job on a Cloud DataProc cluster.
|
||||
- :ref:`DataProcHadoopOperator` : Start a Hadoop Job on a Cloud DataProc cluster.
|
||||
- :ref:`DataProcPySparkOperator` : Start a PySpark Job on a Cloud DataProc cluster.
|
||||
- :ref:`DataprocWorkflowTemplateInstantiateOperator` : Instantiate a WorkflowTemplate on Google Cloud Dataproc.
|
||||
- :ref:`DataprocWorkflowTemplateInstantiateInlineOperator` : Instantiate a WorkflowTemplate Inline on Google Cloud Dataproc.
|
||||
|
||||
.. _DataprocClusterCreateOperator:
|
||||
|
||||
DataprocClusterCreateOperator
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. autoclass:: airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator
|
||||
|
||||
.. _DataprocClusterDeleteOperator:
|
||||
|
||||
DataprocClusterDeleteOperator
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. autoclass:: airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator
|
||||
|
||||
.. _DataProcPigOperator:
|
||||
|
||||
|
@ -523,12 +563,44 @@ DataProcPySparkOperator
|
|||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. autoclass:: airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator
|
||||
:members:
|
||||
|
||||
.. _DataprocWorkflowTemplateInstantiateOperator:
|
||||
|
||||
DataprocWorkflowTemplateInstantiateOperator
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. autoclass:: airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateOperator
|
||||
|
||||
.. _DataprocWorkflowTemplateInstantiateInlineOperator:
|
||||
|
||||
DataprocWorkflowTemplateInstantiateInlineOperator
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. autoclass:: airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateInlineOperator
|
||||
|
||||
Cloud Datastore
|
||||
'''''''''''''''
|
||||
|
||||
Datastore Operators
|
||||
"""""""""""""""""""
|
||||
|
||||
- :ref:`DatastoreExportOperator` : Export entities from Google Cloud Datastore to Cloud Storage.
|
||||
- :ref:`DatastoreImportOperator` : Import entities from Cloud Storage to Google Cloud Datastore.
|
||||
|
||||
.. _DatastoreExportOperator:
|
||||
|
||||
DatastoreExportOperator
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. autoclass:: airflow.contrib.operators.datastore_export_operator.DatastoreExportOperator
|
||||
|
||||
.. _DatastoreImportOperator:
|
||||
|
||||
DatastoreImportOperator
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. autoclass:: airflow.contrib.operators.datastore_import_operator.DatastoreImportOperator
|
||||
|
||||
DatastoreHook
|
||||
"""""""""""""
|
||||
|
||||
|
@ -597,8 +669,26 @@ Cloud Storage
|
|||
Storage Operators
|
||||
"""""""""""""""""
|
||||
|
||||
- :ref:`FileToGoogleCloudStorageOperator` : Uploads a file to Google Cloud Storage.
|
||||
- :ref:`GoogleCloudStorageCopyOperator` : Copies objects (optionally from a directory) filtered by 'delimiter' (file extension for e.g .json) from a bucket to another bucket in a different directory, if required.
|
||||
- :ref:`GoogleCloudStorageListOperator` : List all objects from the bucket with the give string prefix and delimiter in name.
|
||||
- :ref:`GoogleCloudStorageDownloadOperator` : Downloads a file from Google Cloud Storage.
|
||||
- :ref:`GoogleCloudStorageToBigQueryOperator` : Loads files from Google cloud storage into BigQuery.
|
||||
- :ref:`GoogleCloudStorageToGoogleCloudStorageOperator` : Copies a single object from a bucket to another, with renaming if requested.
|
||||
|
||||
.. _FileToGoogleCloudStorageOperator:
|
||||
|
||||
FileToGoogleCloudStorageOperator
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. autoclass:: airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator
|
||||
|
||||
.. _GoogleCloudStorageCopyOperator:
|
||||
|
||||
GoogleCloudStorageCopyOperator
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. autoclass:: airflow.contrib.operators.gcs_copy_operator.GoogleCloudStorageCopyOperator
|
||||
|
||||
.. _GoogleCloudStorageDownloadOperator:
|
||||
|
||||
|
@ -606,7 +696,13 @@ GoogleCloudStorageDownloadOperator
|
|||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. autoclass:: airflow.contrib.operators.gcs_download_operator.GoogleCloudStorageDownloadOperator
|
||||
:members:
|
||||
|
||||
.. _GoogleCloudStorageListOperator:
|
||||
|
||||
GoogleCloudStorageListOperator
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. autoclass:: airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator
|
||||
|
||||
.. _GoogleCloudStorageToBigQueryOperator:
|
||||
|
||||
|
@ -614,8 +710,13 @@ GoogleCloudStorageToBigQueryOperator
|
|||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. autoclass:: airflow.contrib.operators.gcs_to_bq.GoogleCloudStorageToBigQueryOperator
|
||||
:members:
|
||||
|
||||
.. _GoogleCloudStorageToGoogleCloudStorageOperator:
|
||||
|
||||
GoogleCloudStorageToGoogleCloudStorageOperator
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. autoclass:: airflow.contrib.operators.gcs_to_gcs.GoogleCloudStorageToGoogleCloudStorageOperator
|
||||
|
||||
GoogleCloudStorageHook
|
||||
""""""""""""""""""""""
|
||||
|
|
Загрузка…
Ссылка в новой задаче