update airflow config for 2.3.3
This commit is contained in:
Родитель
e80472ab9a
Коммит
d19cc711aa
169
airflow.cfg
169
airflow.cfg
|
@ -1,4 +1,14 @@
|
|||
[core]
|
||||
# Hostname by providing a path to a callable, which will resolve the hostname.
|
||||
# The format is "package.function".
|
||||
#
|
||||
# For example, default value "socket.getfqdn" means that result from getfqdn() of "socket"
|
||||
# package will be used as hostname.
|
||||
#
|
||||
# No argument should be required in the function specified.
|
||||
# If using IP address as hostname is preferred, use value ``airflow.utils.net.get_host_ip_address``
|
||||
# hostname_callable = socket.getfqdn
|
||||
|
||||
default_timezone = utc
|
||||
|
||||
hide_sensitive_var_conn_fields = True
|
||||
|
@ -34,12 +44,6 @@ max_active_runs_per_dag = 5
|
|||
# environment
|
||||
load_examples = False
|
||||
|
||||
# Whether to load the default connections that ship with Airflow. It's good to
|
||||
# get started, but you probably want to set this to ``False`` in a production
|
||||
# environment
|
||||
# We have configured google_cloud_default, so hopefully this wont remove it.
|
||||
load_default_connections = False
|
||||
|
||||
# Where your Airflow plugins are stored
|
||||
plugins_folder = $AIRFLOW_HOME/plugins
|
||||
|
||||
|
@ -88,6 +92,10 @@ unit_test_mode = False
|
|||
# RCE exploits).
|
||||
enable_xcom_pickling = False
|
||||
|
||||
# When a task is killed forcefully, this is the amount of time in seconds that
|
||||
# it has to cleanup after it is sent a SIGTERM, before it is SIGKILLED
|
||||
killed_task_cleanup_time = 60
|
||||
|
||||
# Whether to override params with dag_run.conf. If you pass some key-value pairs
|
||||
# through ``airflow dags backfill -c`` or
|
||||
# ``airflow dags trigger -c``, the key-value pairs will override the existing ones in params.
|
||||
|
@ -96,15 +104,31 @@ dag_run_conf_overrides_params = True
|
|||
# When discovering DAGs, ignore any files that don't contain the strings ``DAG`` and ``airflow``.
|
||||
dag_discovery_safe_mode = False
|
||||
|
||||
# The pattern syntax used in the ".airflowignore" files in the DAG directories. Valid values are
|
||||
# ``regexp`` or ``glob``.
|
||||
dag_ignore_file_syntax = regexp
|
||||
|
||||
# The number of retries each task is going to have by default. Can be overridden at dag or task level.
|
||||
default_task_retries = 0
|
||||
|
||||
# The weighting method used for the effective total priority weight of the task
|
||||
default_task_weight_rule = downstream
|
||||
|
||||
# The default task execution_timeout value for the operators. Expected an integer value to
|
||||
# be passed into timedelta as seconds. If not specified, then the value is considered as None,
|
||||
# meaning that the operators are never timed out by default.
|
||||
default_task_execution_timeout =
|
||||
|
||||
# We will override the next 2 intervals in prod via env vars.
|
||||
# Updating serialized DAG can not be faster than a minimum interval to reduce database write rate.
|
||||
# This flag sets the minimum interval (in seconds) after which the serialized DAGs in the DB should be updated.
|
||||
# This helps in reducing database write rate.
|
||||
min_serialized_dag_update_interval = 10
|
||||
|
||||
# If True, serialized DAGs are compressed before writing to DB.
|
||||
# Note: this will disable the DAG dependencies view
|
||||
compress_serialized_dags = False
|
||||
|
||||
# Fetching serialized DAG can not be faster than a minimum interval to reduce database
|
||||
# read rate. This config controls when your DAGs are updated in the Webserver
|
||||
min_serialized_dag_fetch_interval = 5
|
||||
|
@ -139,8 +163,18 @@ lazy_load_plugins = True
|
|||
# loaded from module.
|
||||
lazy_discover_providers = True
|
||||
|
||||
# The maximum list/dict length an XCom can push to trigger task mapping. If the pushed list/dict has a
|
||||
# length exceeding this value, the task pushing the XCom will be failed automatically to prevent the
|
||||
# mapped tasks from clogging the scheduler.
|
||||
max_map_length = 1024
|
||||
|
||||
[database]
|
||||
# Whether to load the default connections that ship with Airflow. It's good to
|
||||
# get started, but you probably want to set this to ``False`` in a production
|
||||
# environment
|
||||
# We have configured google_cloud_default, so hopefully this wont remove it.
|
||||
load_default_connections = False
|
||||
|
||||
# The SqlAlchemy connection string to the metadata database.
|
||||
# SqlAlchemy supports many different database engine, more information
|
||||
# their website
|
||||
|
@ -160,6 +194,14 @@ sql_alchemy_pool_recycle = 3600
|
|||
# Currently it is only used in ``DagFileProcessor.process_file`` to retry ``dagbag.sync_to_db``.
|
||||
max_db_retries = 3
|
||||
|
||||
# Collation for ``dag_id``, ``task_id``, ``key`` columns in case they have different encoding.
|
||||
# By default this collation is the same as the database collation, however for ``mysql`` and ``mariadb``
|
||||
# the default is ``utf8mb3_general_ci`` so that the index sizes of our index keys will not exceed
|
||||
# the maximum size of allowed index when collation is set to ``utf8mb4`` variant
|
||||
# (see https://github.com/apache/airflow/pull/17603#issuecomment-901121618).
|
||||
# and https://github.com/apache/airflow/pull/17729/
|
||||
# sql_engine_collation_for_ids =
|
||||
|
||||
|
||||
[logging]
|
||||
# The folder where airflow should store its log files. This location
|
||||
|
@ -170,10 +212,22 @@ base_log_folder = $AIRFLOW_HOME/logs
|
|||
# Supported values: ``CRITICAL``, ``ERROR``, ``WARNING``, ``INFO``, ``DEBUG``.
|
||||
logging_level = INFO
|
||||
|
||||
# Logging level for celery. If not set, it uses the value of logging_level
|
||||
#
|
||||
# Supported values: ``CRITICAL``, ``ERROR``, ``WARNING``, ``INFO``, ``DEBUG``.
|
||||
# celery_logging_level =
|
||||
|
||||
# Logging level for Flask-appbuilder UI.
|
||||
#
|
||||
# Supported values: ``CRITICAL``, ``ERROR``, ``WARNING``, ``INFO``, ``DEBUG``.
|
||||
fab_logging_level = WARN
|
||||
fab_logging_level = WARNING
|
||||
|
||||
# When you start an airflow worker, airflow starts a tiny web server
|
||||
# subprocess to serve the workers local log files to the airflow main
|
||||
# web server, who then builds pages and sends them to users. This defines
|
||||
# the port on which the logs are served. It needs to be unused, and open
|
||||
# visible from the main web server to connect into the workers.
|
||||
worker_log_server_port = 8793
|
||||
|
||||
# Logging class
|
||||
# Specify the class that will specify the logging configuration
|
||||
|
@ -204,7 +258,7 @@ log_filename_template = {{ ti.dag_id }}/{{ ti.task_id }}/{{ execution_date.strft
|
|||
log_processor_filename_template = {{ filename }}.log
|
||||
|
||||
# full path of dag_processor_manager logfile
|
||||
dag_processor_manager_log_location = {AIRFLOW_HOME}/logs/dag_processor_manager/dag_processor_manager.log
|
||||
dag_processor_manager_log_location = ${AIRFLOW_HOME}/logs/dag_processor_manager/dag_processor_manager.log
|
||||
|
||||
# Name of handler to read task instance logs.
|
||||
# Defaults to use ``task`` handler.
|
||||
|
@ -212,8 +266,8 @@ task_log_reader = task
|
|||
|
||||
# A comma\-separated list of third-party logger names that will be configured to print messages to
|
||||
# consoles\.
|
||||
# Example: extra_loggers = connexion,sqlalchemy
|
||||
# extra_loggers =
|
||||
# Example: extra_logger_names = connexion,sqlalchemy
|
||||
# extra_logger_names =
|
||||
|
||||
|
||||
[webserver]
|
||||
|
@ -255,10 +309,29 @@ auth_backend = $AIRFLOW_AUTH_BACKEND
|
|||
# provided SSL will be enabled. This does not change the web server port.
|
||||
# web_server_ssl_key =
|
||||
|
||||
# The type of backend used to store web session data, can be 'database' or 'securecookie'
|
||||
# Example: session_backend = securecookie
|
||||
# session_backend = database
|
||||
|
||||
# Number of seconds the webserver waits before killing gunicorn master that doesn't respond
|
||||
web_server_master_timeout = 300
|
||||
|
||||
# Number of seconds the gunicorn webserver waits before timing out on a worker
|
||||
web_server_worker_timeout = 300
|
||||
|
||||
# Number of workers to refresh at a time. When set to 0, worker refresh is
|
||||
# disabled. When nonzero, airflow periodically refreshes webserver workers by
|
||||
# bringing up new ones and killing old ones.
|
||||
worker_refresh_batch_size = 1
|
||||
|
||||
# Number of seconds to wait before refreshing a batch of workers.
|
||||
worker_refresh_interval = 6000
|
||||
|
||||
# If set to True, Airflow will track files in plugins_folder directory. When it detects changes,
|
||||
# then reload the gunicorn.
|
||||
# You can toggle this for Development when iterating on plugins
|
||||
reload_on_plugin_change = False
|
||||
# We set this to True for local development, and override it with ENV var in prod
|
||||
# False in prod so that changes pushed to plugins folder do not kill currently running backfills
|
||||
reload_on_plugin_change = True
|
||||
|
||||
# Log files for the gunicorn webserver. '-' means log to stderr.
|
||||
access_logfile = -
|
||||
|
@ -365,6 +438,26 @@ session_lifetime_minutes = 43200
|
|||
# Sets a custom page title for the DAGs overview page and site title for all pages
|
||||
# instance_name =
|
||||
|
||||
# Whether the custom page title for the DAGs overview page contains any Markup language
|
||||
instance_name_has_markup = False
|
||||
|
||||
# How frequently, in seconds, the DAG data will auto-refresh in graph or grid view
|
||||
# when auto-refresh is turned on
|
||||
auto_refresh_interval = 3
|
||||
|
||||
# Boolean for displaying warning for publicly viewable deployment
|
||||
warn_deployment_exposure = True
|
||||
|
||||
# Comma separated string of view events to exclude from dag audit view.
|
||||
# All other events will be added minus the ones passed here.
|
||||
# The audit logs in the db will not be affected by this parameter.
|
||||
audit_view_excluded_events = gantt,landing_times,tries,duration,calendar,graph,grid,tree,tree_data
|
||||
|
||||
# Comma separated string of view events to include in dag audit view.
|
||||
# If passed, only these events will populate the dag audit view.
|
||||
# The audit logs in the db will not be affected by this parameter.
|
||||
# Example: audit_view_included_events = dagrun_cleared,failed
|
||||
# audit_view_included_events =
|
||||
|
||||
[email]
|
||||
email_backend = $AIRFLOW_EMAIL_BACKEND
|
||||
|
@ -457,18 +550,16 @@ worker_concurrency = 32
|
|||
# Example: worker_prefetch_multiplier = 1
|
||||
# worker_prefetch_multiplier =
|
||||
|
||||
# Specify if remote control of the workers is enabled.
|
||||
# When using Amazon SQS as the broker, Celery creates lots of ``.*reply-celery-pidbox`` queues. You can
|
||||
# prevent this by setting this to false. However, with this disabled Flower won't work.
|
||||
# worker_enable_remote_control = true
|
||||
|
||||
# Umask that will be used when starting workers with the ``airflow celery worker``
|
||||
# in daemon mode. This control the file-creation mode mask which determines the initial
|
||||
# value of file permission bits for newly created files.
|
||||
# worker_umask = 0o077
|
||||
|
||||
# When you start an airflow worker, airflow starts a tiny web server
|
||||
# subprocess to serve the workers local log files to the airflow main
|
||||
# web server, who then builds pages and sends them to users. This defines
|
||||
# the port on which the logs are served. It needs to be unused, and open
|
||||
# visible from the main web server to connect into the workers.
|
||||
worker_log_server_port = 8793
|
||||
|
||||
# The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally
|
||||
# a sqlalchemy database. Refer to the Celery documentation for more
|
||||
# information.
|
||||
|
@ -583,10 +674,6 @@ allow_illegal_arguments = False
|
|||
# listen (in seconds).
|
||||
job_heartbeat_sec = 5
|
||||
|
||||
# How often (in seconds) to check and tidy up 'running' TaskInstancess
|
||||
# that no longer have a matching DagRun
|
||||
clean_tis_without_dagrun_interval = 15.0
|
||||
|
||||
# The scheduler constantly tries to trigger new tasks (look at the
|
||||
# scheduler section in the docs for more information). This defines
|
||||
# how often the scheduler should run (in seconds).
|
||||
|
@ -605,6 +692,10 @@ scheduler_idle_sleep_time = 1
|
|||
# this interval. Keeping this number low will increase CPU usage.
|
||||
min_file_process_interval = 60
|
||||
|
||||
# How often (in seconds) to check for stale DAGs (DAGs which are no longer present in
|
||||
# the expected files) which should be deactivated.
|
||||
deactivate_stale_dags_interval = 120
|
||||
|
||||
# How often (in seconds) to scan the DAGs directory for new files. Default to 5 minutes.
|
||||
# This is set via env var to 300 in prod, but 30 for local testing
|
||||
dag_dir_list_interval = 30
|
||||
|
@ -629,6 +720,9 @@ child_process_log_directory = ${AIRFLOW_HOME}/logs/scheduler
|
|||
# associated task instance as failed and will re-schedule the task.
|
||||
scheduler_zombie_task_threshold = 300
|
||||
|
||||
# How often (in seconds) should the scheduler check for zombie tasks.
|
||||
zombie_detection_interval = 60.0
|
||||
|
||||
# Turn off scheduler catchup by setting this to False.
|
||||
# Default behavior is unchanged and
|
||||
# Command Line Backfills still work, but the scheduler
|
||||
|
@ -637,6 +731,13 @@ scheduler_zombie_task_threshold = 300
|
|||
# DAG definition (catchup)
|
||||
catchup_by_default = False
|
||||
|
||||
# Setting this to True will make first task instance of a task
|
||||
# ignore depends_on_past setting. A task instance will be considered
|
||||
# as the first task instance of a task when there is no task instance
|
||||
# in the DB with an execution_date earlier than it., i.e. no manual marking
|
||||
# success will be needed for a newly added task to be scheduled.
|
||||
ignore_first_depends_on_past_by_default = True
|
||||
|
||||
# This changes the batch size of queries in the scheduling main loop.
|
||||
# If this is too high, SQL query performance may be impacted by one
|
||||
# or more of the following:
|
||||
|
@ -685,6 +786,14 @@ parsing_processes = 2
|
|||
# * ``alphabetical``: Sort by filename
|
||||
file_parsing_sort_mode = modified_time
|
||||
|
||||
# Whether the dag processor is running as a standalone process or it is a subprocess of a scheduler
|
||||
# job.
|
||||
standalone_dag_processor = False
|
||||
|
||||
# Only applicable if `[scheduler]standalone_dag_processor` is true and callbacks are stored
|
||||
# in database. Contains maximum number of callbacks that are fetched during a single loop.
|
||||
max_callbacks_per_loop = 20
|
||||
|
||||
# Turn off scheduler use of cron intervals by setting this to False.
|
||||
# DAGs submitted manually in the web UI or with trigger_dag will still run.
|
||||
use_job_schedule = True
|
||||
|
@ -696,6 +805,12 @@ allow_trigger_in_future = False
|
|||
# DAG dependency detector class to use
|
||||
dependency_detector = airflow.serialization.serialized_objects.DependencyDetector
|
||||
|
||||
# How often to check for expired trigger requests that have not run yet.
|
||||
trigger_timeout_check_interval = 15
|
||||
|
||||
[triggerer]
|
||||
# How many triggers a single Triggerer will run at once, by default.
|
||||
default_capacity = 1000
|
||||
|
||||
[metrics]
|
||||
# Statsd (https://github.com/etsy/statsd) integration settings
|
||||
|
@ -790,6 +905,9 @@ fallback_page_limit = 100
|
|||
# Indicates whether the response can be shared with requesting code from the given origin.
|
||||
# access_control_allow_origin =
|
||||
|
||||
[lineage]
|
||||
# what lineage backend to use
|
||||
# backend =
|
||||
|
||||
[mesos]
|
||||
# Mesos master address which MesosExecutor will connect to.
|
||||
|
@ -857,3 +975,8 @@ authenticate = False
|
|||
|
||||
# [github_enterprise]
|
||||
# api_rev = v3
|
||||
|
||||
[sensors]
|
||||
# A sensor will immediately fail without retrying if timeout is reached
|
||||
# Set to 3 days, default is 7 days or 604800
|
||||
default_timeout = 259200
|
||||
|
|
|
@ -28,7 +28,7 @@ from flask_appbuilder.security.manager import AUTH_OAUTH
|
|||
basedir = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
# The SQLAlchemy connection string.
|
||||
SQLALCHEMY_DATABASE_URI = conf.conf.get('core', 'SQL_ALCHEMY_CONN')
|
||||
SQLALCHEMY_DATABASE_URI = conf.conf.get('database', 'SQL_ALCHEMY_CONN')
|
||||
|
||||
# Flask-WTF flag for CSRF
|
||||
CSRF_ENABLED = True
|
||||
|
|
Загрузка…
Ссылка в новой задаче