By default PIP will install all packages in .local folder (#14125)

In order to optimize the Docker image, we use the ~/.local
folder copied from build imge (this gives huge optimisations
regarding the docker image size). So far we instructed the users
to add --user flag manually when installing any packages when they
extend the images, however this has proven to be problematic as
users rarely read the whole documentation and simply try what they
know.

This PR attempts to fix it. `PIP_USER` variable is set to `true`
in the final image, which means that the installation by default
will use ~/.local folder as target. This can be disabled by
unsetting the variable or setting it to `false`.

Also since pylint version has been released to 2.7.0, it fixes
a few pylint versions so that we can update to the latest constraints.
This commit is contained in:
Jarek Potiuk 2021-02-21 21:09:13 +01:00 коммит произвёл GitHub
Родитель beed53064c
Коммит ca35bd7f7f
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
11 изменённых файлов: 32 добавлений и 13 удалений

Просмотреть файл

@ -523,6 +523,9 @@ LABEL org.apache.airflow.distro="debian" \
org.opencontainers.image.title="Production Airflow Image" \
org.opencontainers.image.description="Installed Apache Airflow"
# By default PIP will install everything in ~/.local
ARG PIP_USER="true"
ENV PIP_USER=${PIP_USER}
ENTRYPOINT ["/usr/bin/dumb-init", "--", "/entrypoint"]
CMD ["--help"]

Просмотреть файл

@ -68,6 +68,7 @@ class RefreshKubeConfigLoader(KubeConfigLoader):
return True
except Exception as e: # pylint: disable=W0703
logging.error(str(e))
return None
def refresh_api_key(self, client_configuration):
"""Refresh API key if expired"""

Просмотреть файл

@ -330,7 +330,7 @@ class DagBag(LoggingMixin):
if not might_contain_dag(zip_info.filename, safe_mode, current_zip_file):
# todo: create ignore list
# Don't want to spam user with skip messages
if not self.has_logged or True:
if not self.has_logged:
self.has_logged = True
self.log.info(
"File %s:%s assumed to contain no DAGs. Skipping.", filepath, zip_info.filename

Просмотреть файл

@ -56,13 +56,15 @@ class CloudwatchTaskHandler(FileTaskHandler, LoggingMixin):
from airflow.providers.amazon.aws.hooks.logs import AwsLogsHook
return AwsLogsHook(aws_conn_id=remote_conn_id, region_name=self.region_name)
except Exception: # pylint: disable=broad-except
except Exception as e: # pylint: disable=broad-except
self.log.error(
'Could not create an AwsLogsHook with connection id "%s". '
'Please make sure that airflow[aws] is installed and '
'the Cloudwatch logs connection exists.',
'the Cloudwatch logs connection exists. Exception: "%s"',
remote_conn_id,
e,
)
return None
def _render_filename(self, ti, try_number):
# Replace unsupported log group name characters

Просмотреть файл

@ -47,13 +47,15 @@ class S3TaskHandler(FileTaskHandler, LoggingMixin):
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
return S3Hook(remote_conn_id)
except Exception: # pylint: disable=broad-except
except Exception as e: # pylint: disable=broad-except
self.log.exception(
'Could not create an S3Hook with connection id "%s". '
'Please make sure that airflow[aws] is installed and '
'the S3 connection exists.',
'the S3 connection exists. Exception : "%s"',
remote_conn_id,
e,
)
return None
def set_context(self, ti):
super().set_context(ti)

Просмотреть файл

@ -397,6 +397,7 @@ class CloudFunctionDeleteFunctionOperator(BaseOperator):
status = e.resp.status
if status == 404:
self.log.info('The function does not exist in this project')
return None
else:
self.log.error('An error occurred. Exiting.')
raise e

Просмотреть файл

@ -59,13 +59,15 @@ class WasbTaskHandler(FileTaskHandler, LoggingMixin):
from airflow.providers.microsoft.azure.hooks.wasb import WasbHook
return WasbHook(remote_conn_id)
except AzureHttpError:
except AzureHttpError as e:
self.log.error(
'Could not create an WasbHook with connection id "%s". '
'Please make sure that airflow[azure] is installed and '
'the Wasb connection exists.',
'the Wasb connection exists. Exception "%s"',
remote_conn_id,
e,
)
return None
def set_context(self, ti) -> None:
super().set_context(ti)
@ -136,8 +138,9 @@ class WasbTaskHandler(FileTaskHandler, LoggingMixin):
"""
try:
return self.hook.check_for_blob(self.wasb_container, remote_log_location)
except Exception: # pylint: disable=broad-except
pass
# pylint: disable=broad-except
except Exception as e:
self.log.debug('Exception when trying to check remote location: "%s"', e)
return False
def wasb_read(self, remote_log_location: str, return_error: bool = False):
@ -153,12 +156,13 @@ class WasbTaskHandler(FileTaskHandler, LoggingMixin):
"""
try:
return self.hook.read_file(self.wasb_container, remote_log_location)
except AzureHttpError:
except AzureHttpError as e:
msg = f'Could not read logs from {remote_log_location}'
self.log.exception(msg)
self.log.exception("Message: '%s', exception '%s'", msg, e)
# return error if needed
if return_error:
return msg
return ''
def wasb_write(self, log: str, remote_log_location: str, append: bool = True) -> None:
"""

Просмотреть файл

@ -290,6 +290,7 @@ class BaseSerialization:
elif type_ == DAT.SET:
return {cls._deserialize(v) for v in var}
elif type_ == DAT.TUPLE:
# pylint: disable=consider-using-generator
return tuple([cls._deserialize(v) for v in var])
else:
raise TypeError(f'Invalid type {type_!s} in deserialization.')

Просмотреть файл

@ -243,6 +243,7 @@ class AllowListValidator:
def __init__(self, allow_list=None):
if allow_list:
# pylint: disable=consider-using-generator
self.allow_list = tuple([item.strip().lower() for item in allow_list.split(',')])
else:
self.allow_list = None

Просмотреть файл

@ -3648,8 +3648,9 @@ class TaskInstanceModelView(AirflowModelView):
flash(f"{len(task_instances)} task instances have been cleared")
self.update_redirect()
return redirect(self.get_redirect())
except Exception: # noqa pylint: disable=broad-except
flash('Failed to clear task instances', 'error')
except Exception as e: # noqa pylint: disable=broad-except
flash(f'Failed to clear task instances: "{e}"', 'error')
return None
@provide_session
def set_task_instance_state(self, tis, target_state, session=None):

Просмотреть файл

@ -197,6 +197,9 @@ You should be aware, about a few things:
FROM apache/airflow:2.0.1
RUN pip install --no-cache-dir --user my-awesome-pip-dependency-to-add
* As of 2.0.1 image the ``--user`` flag is turned on by default by setting ``PIP_USER`` environment variable
to ``true``. This can be disabled by un-setting the variable or by setting it to ``false``.
* If your apt, or PyPI dependencies require some of the build-essentials, then your best choice is
to follow the "Customize the image" route. However it requires to checkout sources of Apache Airflow,