By default PIP will install all packages in .local folder (#14125)
In order to optimize the Docker image, we use the ~/.local folder copied from build imge (this gives huge optimisations regarding the docker image size). So far we instructed the users to add --user flag manually when installing any packages when they extend the images, however this has proven to be problematic as users rarely read the whole documentation and simply try what they know. This PR attempts to fix it. `PIP_USER` variable is set to `true` in the final image, which means that the installation by default will use ~/.local folder as target. This can be disabled by unsetting the variable or setting it to `false`. Also since pylint version has been released to 2.7.0, it fixes a few pylint versions so that we can update to the latest constraints.
This commit is contained in:
Родитель
beed53064c
Коммит
ca35bd7f7f
|
@ -523,6 +523,9 @@ LABEL org.apache.airflow.distro="debian" \
|
|||
org.opencontainers.image.title="Production Airflow Image" \
|
||||
org.opencontainers.image.description="Installed Apache Airflow"
|
||||
|
||||
# By default PIP will install everything in ~/.local
|
||||
ARG PIP_USER="true"
|
||||
ENV PIP_USER=${PIP_USER}
|
||||
|
||||
ENTRYPOINT ["/usr/bin/dumb-init", "--", "/entrypoint"]
|
||||
CMD ["--help"]
|
||||
|
|
|
@ -68,6 +68,7 @@ class RefreshKubeConfigLoader(KubeConfigLoader):
|
|||
return True
|
||||
except Exception as e: # pylint: disable=W0703
|
||||
logging.error(str(e))
|
||||
return None
|
||||
|
||||
def refresh_api_key(self, client_configuration):
|
||||
"""Refresh API key if expired"""
|
||||
|
|
|
@ -330,7 +330,7 @@ class DagBag(LoggingMixin):
|
|||
if not might_contain_dag(zip_info.filename, safe_mode, current_zip_file):
|
||||
# todo: create ignore list
|
||||
# Don't want to spam user with skip messages
|
||||
if not self.has_logged or True:
|
||||
if not self.has_logged:
|
||||
self.has_logged = True
|
||||
self.log.info(
|
||||
"File %s:%s assumed to contain no DAGs. Skipping.", filepath, zip_info.filename
|
||||
|
|
|
@ -56,13 +56,15 @@ class CloudwatchTaskHandler(FileTaskHandler, LoggingMixin):
|
|||
from airflow.providers.amazon.aws.hooks.logs import AwsLogsHook
|
||||
|
||||
return AwsLogsHook(aws_conn_id=remote_conn_id, region_name=self.region_name)
|
||||
except Exception: # pylint: disable=broad-except
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
self.log.error(
|
||||
'Could not create an AwsLogsHook with connection id "%s". '
|
||||
'Please make sure that airflow[aws] is installed and '
|
||||
'the Cloudwatch logs connection exists.',
|
||||
'the Cloudwatch logs connection exists. Exception: "%s"',
|
||||
remote_conn_id,
|
||||
e,
|
||||
)
|
||||
return None
|
||||
|
||||
def _render_filename(self, ti, try_number):
|
||||
# Replace unsupported log group name characters
|
||||
|
|
|
@ -47,13 +47,15 @@ class S3TaskHandler(FileTaskHandler, LoggingMixin):
|
|||
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
||||
|
||||
return S3Hook(remote_conn_id)
|
||||
except Exception: # pylint: disable=broad-except
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
self.log.exception(
|
||||
'Could not create an S3Hook with connection id "%s". '
|
||||
'Please make sure that airflow[aws] is installed and '
|
||||
'the S3 connection exists.',
|
||||
'the S3 connection exists. Exception : "%s"',
|
||||
remote_conn_id,
|
||||
e,
|
||||
)
|
||||
return None
|
||||
|
||||
def set_context(self, ti):
|
||||
super().set_context(ti)
|
||||
|
|
|
@ -397,6 +397,7 @@ class CloudFunctionDeleteFunctionOperator(BaseOperator):
|
|||
status = e.resp.status
|
||||
if status == 404:
|
||||
self.log.info('The function does not exist in this project')
|
||||
return None
|
||||
else:
|
||||
self.log.error('An error occurred. Exiting.')
|
||||
raise e
|
||||
|
|
|
@ -59,13 +59,15 @@ class WasbTaskHandler(FileTaskHandler, LoggingMixin):
|
|||
from airflow.providers.microsoft.azure.hooks.wasb import WasbHook
|
||||
|
||||
return WasbHook(remote_conn_id)
|
||||
except AzureHttpError:
|
||||
except AzureHttpError as e:
|
||||
self.log.error(
|
||||
'Could not create an WasbHook with connection id "%s". '
|
||||
'Please make sure that airflow[azure] is installed and '
|
||||
'the Wasb connection exists.',
|
||||
'the Wasb connection exists. Exception "%s"',
|
||||
remote_conn_id,
|
||||
e,
|
||||
)
|
||||
return None
|
||||
|
||||
def set_context(self, ti) -> None:
|
||||
super().set_context(ti)
|
||||
|
@ -136,8 +138,9 @@ class WasbTaskHandler(FileTaskHandler, LoggingMixin):
|
|||
"""
|
||||
try:
|
||||
return self.hook.check_for_blob(self.wasb_container, remote_log_location)
|
||||
except Exception: # pylint: disable=broad-except
|
||||
pass
|
||||
# pylint: disable=broad-except
|
||||
except Exception as e:
|
||||
self.log.debug('Exception when trying to check remote location: "%s"', e)
|
||||
return False
|
||||
|
||||
def wasb_read(self, remote_log_location: str, return_error: bool = False):
|
||||
|
@ -153,12 +156,13 @@ class WasbTaskHandler(FileTaskHandler, LoggingMixin):
|
|||
"""
|
||||
try:
|
||||
return self.hook.read_file(self.wasb_container, remote_log_location)
|
||||
except AzureHttpError:
|
||||
except AzureHttpError as e:
|
||||
msg = f'Could not read logs from {remote_log_location}'
|
||||
self.log.exception(msg)
|
||||
self.log.exception("Message: '%s', exception '%s'", msg, e)
|
||||
# return error if needed
|
||||
if return_error:
|
||||
return msg
|
||||
return ''
|
||||
|
||||
def wasb_write(self, log: str, remote_log_location: str, append: bool = True) -> None:
|
||||
"""
|
||||
|
|
|
@ -290,6 +290,7 @@ class BaseSerialization:
|
|||
elif type_ == DAT.SET:
|
||||
return {cls._deserialize(v) for v in var}
|
||||
elif type_ == DAT.TUPLE:
|
||||
# pylint: disable=consider-using-generator
|
||||
return tuple([cls._deserialize(v) for v in var])
|
||||
else:
|
||||
raise TypeError(f'Invalid type {type_!s} in deserialization.')
|
||||
|
|
|
@ -243,6 +243,7 @@ class AllowListValidator:
|
|||
|
||||
def __init__(self, allow_list=None):
|
||||
if allow_list:
|
||||
# pylint: disable=consider-using-generator
|
||||
self.allow_list = tuple([item.strip().lower() for item in allow_list.split(',')])
|
||||
else:
|
||||
self.allow_list = None
|
||||
|
|
|
@ -3648,8 +3648,9 @@ class TaskInstanceModelView(AirflowModelView):
|
|||
flash(f"{len(task_instances)} task instances have been cleared")
|
||||
self.update_redirect()
|
||||
return redirect(self.get_redirect())
|
||||
except Exception: # noqa pylint: disable=broad-except
|
||||
flash('Failed to clear task instances', 'error')
|
||||
except Exception as e: # noqa pylint: disable=broad-except
|
||||
flash(f'Failed to clear task instances: "{e}"', 'error')
|
||||
return None
|
||||
|
||||
@provide_session
|
||||
def set_task_instance_state(self, tis, target_state, session=None):
|
||||
|
|
|
@ -197,6 +197,9 @@ You should be aware, about a few things:
|
|||
FROM apache/airflow:2.0.1
|
||||
RUN pip install --no-cache-dir --user my-awesome-pip-dependency-to-add
|
||||
|
||||
* As of 2.0.1 image the ``--user`` flag is turned on by default by setting ``PIP_USER`` environment variable
|
||||
to ``true``. This can be disabled by un-setting the variable or by setting it to ``false``.
|
||||
|
||||
|
||||
* If your apt, or PyPI dependencies require some of the build-essentials, then your best choice is
|
||||
to follow the "Customize the image" route. However it requires to checkout sources of Apache Airflow,
|
||||
|
|
Загрузка…
Ссылка в новой задаче