Removes pip download when installing from local packages (#13422)

This PR improves building production image from local packages,
in preparation for moving provider requirements out of setup.cfg.

Previously `pip download` step was executed in the CI scripts
in order to download all the packages that were needed. However
this had two problems:

1) PIP download was executed outside of Dockerfile in CI scripts
   which means that any change to requirements there could not
   be executed in 'workflow_run' event - because main branch version
   of CI scripts is used there. We want to add extra requirements
   when installing airflow so in order to be able to change
   it, those requirements should be added in Dockerfile.
   This will be done in the follow-up #13409 PR.

2) Packages downloaded with PIP download have a "file" version
   rather than regular == version when you run pip freeze/check.
   This looks weird and while you can figure out the version
   from file name, when you `pip install` them, they look
   much more normal. The airflow package and provider package
   will still get the "file" form but this is ok because we are
   building those packages from sources and they are not yet
   available in PyPI.

Example:

  adal==1.2.5
  aiohttp==3.7.3
  alembic==1.4.3
  amqp==2.6.1
  apache-airflow @ file:///docker-context-files/apache_airflow-2.1.0.dev0-py3-none-any.whl
  apache-airflow-providers-amazon @ file:///docker-context-files/apache_airflow_providers_amazon-1.0.0-py3-none-any.whl
  apache-airflow-providers-celery @ file:///docker-context-files/apache_airflow_providers_celery-1.0.0-py3-none-any.whl
  ...

With this PR, we do not `pip download` all packages, but instead
we prepare airflow + providers packages as .whl files and
install them from there (all the dependencies are installed
from PyPI)

(cherry picked from commit e436883583)
This commit is contained in:
Jarek Potiuk 2021-01-02 11:16:51 +01:00 коммит произвёл Kaxil Naik
Родитель 754f14651e
Коммит 9bfc783449
8 изменённых файлов: 127 добавлений и 55 удалений

Просмотреть файл

@ -247,31 +247,54 @@ ENV UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES}
WORKDIR /opt/airflow
# remove mysql from extras if client is not installed
# hadolint ignore=SC2086, SC2010
RUN if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then \
# Remove mysql from extras if client is not installed \
AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,}; \
fi; \
if [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \
if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then \
pip install --user "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_INSTALL_VERSION}" \
--upgrade --upgrade-strategy eager; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
else \
pip install --user "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_INSTALL_VERSION}" \
pip install --upgrade --upgrade-strategy only-if-needed \
--user "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_INSTALL_VERSION}" \
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
fi; \
fi; \
if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} == "true" ]]; then \
reinstalling_apache_airflow_packages=$(ls /docker-context-files/apache?airflow*.{whl,tar.gz} 2>/dev/null || true); \
# We want to install apache airflow packages with constraints \
if [[ "${reinstalling_apache_airflow_packages}" != "" ]]; then \
if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then \
pip install --force-reinstall --upgrade --upgrade-strategy eager \
--user ${reinstalling_apache_airflow_packages}; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
else \
pip install --force-reinstall --upgrade --upgrade-strategy only-if-needed \
--user ${reinstalling_apache_airflow_packages} --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
fi; \
fi ; \
# All the others we want to reinstall as-is, without dependencies \
reinstalling_other_packages=$(ls /docker-context-files/*.{whl,tar.gz} 2>/dev/null | \
grep -v apache_airflow | grep -v apache-airflow || true); \
if [[ "${reinstalling_other_packages}" != "" ]]; then \
pip install --force-reinstall --user --no-deps ${reinstalling_other_packages}; \
fi; \
fi; \
if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \
if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then \
pip install --user ${ADDITIONAL_PYTHON_DEPS} --upgrade --upgrade-strategy eager; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
else \
pip install --user ${ADDITIONAL_PYTHON_DEPS} --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
pip install --user ${ADDITIONAL_PYTHON_DEPS} --upgrade --upgrade-strategy only-if-needed \
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
fi; \
fi; \
if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} == "true" ]]; then \
if ls /docker-context-files/*.{whl,tar.gz} 1> /dev/null 2>&1; then \
pip install --user --no-deps /docker-context-files/*.{whl,tar.gz}; \
fi ; \
fi; \
find /root/.local/ -name '*.pyc' -print0 | xargs -0 rm -r || true ; \
find /root/.local/ -type d -name '__pycache__' -print0 | xargs -0 rm -r || true

Просмотреть файл

@ -281,8 +281,8 @@ RUN pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"
RUN if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" ]]; then \
pip install \
"https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" \
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" \
&& pip uninstall --yes apache-airflow; \
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
pip uninstall --yes apache-airflow; \
fi
# Generate random hex dump file so that we can determine whether it's faster to rebuild the image
@ -325,7 +325,8 @@ RUN if [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \
pip install -e ".[${AIRFLOW_EXTRAS}]" --upgrade --upgrade-strategy eager; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
else \
pip install -e ".[${AIRFLOW_EXTRAS}]" --upgrade --upgrade-strategy only-if-needed; \
pip install -e ".[${AIRFLOW_EXTRAS}]" --upgrade --upgrade-strategy only-if-needed\
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
fi; \
fi
@ -334,11 +335,28 @@ RUN if [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \
# they are also installed additionally to whatever is installed from Airflow.
COPY docker-context-files/ /docker-context-files/
RUN if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} != "true" ]]; then \
if ls /docker-context-files/*.{whl,tar.gz} 1> /dev/null 2>&1; then \
pip install --no-deps /docker-context-files/*.{whl,tar.gz}; \
# hadolint ignore=SC2086, SC2010
RUN if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} == "true" ]]; then \
reinstalling_apache_airflow_packages=$(ls /docker-context-files/apache?airflow*.{whl,tar.gz} 2>/dev/null || true); \
# We want to install apache airflow packages with constraints \
if [[ "${reinstalling_apache_airflow_packages}" != "" ]]; then \
if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then \
pip install --force-reinstall --upgrade --upgrade-strategy eager \
--user ${reinstalling_apache_airflow_packages}; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
else \
pip install --force-reinstall --upgrade --upgrade-strategy only-if-needed \
--user ${reinstalling_apache_airflow_packages} --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
fi; \
fi ; \
fi
# All the others we want to reinstall as-is, without dependencies \
reinstalling_other_packages=$(ls /docker-context-files/*.{whl,tar.gz} 2>/dev/null | \
grep -v apache_airflow | grep -v apache-airflow || true); \
if [[ "${reinstalling_other_packages}" != "" ]]; then \
pip install --force-reinstall --user --no-deps ${reinstalling_other_packages}; \
fi; \
fi;
# Copy all the www/ files we need to compile assets. Done as two separate COPY
# commands so as otherwise it copies the _contents_ of static/ in to www/

Просмотреть файл

@ -449,10 +449,27 @@ The following build arguments (``--build-arg`` in docker build command) can be u
| | | package. It has no effect when |
| | | installing from PyPI or GitHub repo. |
+------------------------------------------+------------------------------------------+------------------------------------------+
| ``INSTALL_FROM_DOCKER_CONTEXT_FILES`` | ``false`` | If set to true, Airflow and it's |
| | | dependencies are installed from locally |
| | | downloaded .whl files placed in the |
| | | ``docker-context-files``. |
| ``INSTALL_FROM_DOCKER_CONTEXT_FILES`` | ``false`` | If set to true, Airflow, providers and |
| | | all dependencies are installed from |
| | | from locally built/downloaded |
| | | .whl and .tar.gz files placed in the |
| | | ``docker-context-files``. In certain |
| | | corporate environments, this is required |
| | | to install airflow from such pre-vetted |
| | | packages rather than from PyPI. For this |
| | | to work, also set ``INSTALL_FROM_PYPI``. |
| | | Note that packages starting with |
| | | ``apache?airflow`` glob are treated |
| | | differently than other packages. All |
| | | ``apache?airflow`` packages are |
| | | installed with dependencies limited by |
| | | airflow constraints. All other packages |
| | | are installed without dependencies |
| | | 'as-is'. If you wish to install airflow |
| | | via 'pip download' with all dependencies |
| | | downloaded, you have to rename the |
| | | apache airflow and provider packages to |
| | | not start with ``apache?airflow`` glob. |
+------------------------------------------+------------------------------------------+------------------------------------------+
| ``AIRFLOW_EXTRAS`` | ``all`` | extras to install |
+------------------------------------------+------------------------------------------+------------------------------------------+

Просмотреть файл

@ -317,8 +317,19 @@ Preparing the constraint files and wheel files:
--constraint docker-context-files/constraints-2-0.txt \
apache-airflow[async,aws,azure,celery,dask,elasticsearch,gcp,kubernetes,mysql,postgres,redis,slack,ssh,statsd,virtualenv]==2.0.0
Since apache-airflow .whl packages are treated differently by the docker image, you need to rename the
downloaded apache-airflow* files, for example:
Building the image (after copying the files downloaded to the "docker-context-files" directory:
.. code-block:: bash
pushd docker-context-files
for file in apache?airflow*
do
mv ${file} _${file}
done
popd
Building the image:
.. code-block:: bash
@ -539,7 +550,19 @@ The following build arguments (``--build-arg`` in docker build command) can be u
| | | corporate environments, this is required |
| | | to install airflow from such pre-vetted |
| | | packages rather than from PyPI. For this |
| | | to work, also set ``INSTALL_FROM_PYPI`` |
| | | to work, also set ``INSTALL_FROM_PYPI``. |
| | | Note that packages starting with |
| | | ``apache?airflow`` glob are treated |
| | | differently than other packages. All |
| | | ``apache?airflow`` packages are |
| | | installed with dependencies limited by |
| | | airflow constraints. All other packages |
| | | are installed without dependencies |
| | | 'as-is'. If you wish to install airflow |
| | | via 'pip download' with all dependencies |
| | | downloaded, you have to rename the |
| | | apache airflow and provider packages to |
| | | not start with ``apache?airflow`` glob. |
+------------------------------------------+------------------------------------------+------------------------------------------+
| ``UPGRADE_TO_NEWER_DEPENDENCIES`` | ``false`` | If set to true, the dependencies are |
| | | upgraded to newer versions matching |

Просмотреть файл

@ -68,8 +68,8 @@ if [[ ! "${DOCKER_TAG}" =~ ^[0-9].* ]]; then
# we need to run those in sub-processes
(
export INSTALL_FROM_PYPI="true"
export INSTALL_FROM_DOCKER_CONTEXT_FILES="false"
export INSTALL_PROVIDERS_FROM_SOURCES="true"
export INSTALL_FROM_DOCKER_CONTEXT_FILES="false"
export AIRFLOW_PRE_CACHED_PIP_PACKAGES="true"
export DOCKER_CACHE="pulled"
# shellcheck source=scripts/ci/libraries/_script_init.sh
@ -86,7 +86,6 @@ if [[ ! "${DOCKER_TAG}" =~ ^[0-9].* ]]; then
(
export INSTALL_FROM_PYPI="false"
export INSTALL_FROM_DOCKER_CONTEXT_FILES="true"
export INSTALL_PROVIDERS_FROM_SOURCES="false"
export AIRFLOW_PRE_CACHED_PIP_PACKAGES="false"
export DOCKER_CACHE="pulled"
# shellcheck source=scripts/ci/libraries/_script_init.sh
@ -97,7 +96,7 @@ if [[ ! "${DOCKER_TAG}" =~ ^[0-9].* ]]; then
rm -rf "${BUILD_CACHE_DIR}"
rm -rf "${AIRFLOW_SOURCES}/docker-context-files/*"
build_images::prepare_prod_build
build_images::build_prod_images_from_packages
build_images::build_prod_images_from_locally_built_airflow_packages
push_pull_remove_images::push_prod_images
)
else

Просмотреть файл

@ -18,9 +18,8 @@
export INSTALL_FROM_PYPI="false"
export INSTALL_FROM_DOCKER_CONTEXT_FILES="true"
export INSTALL_PROVIDERS_FROM_SOURCES="false"
export AIRFLOW_PRE_CACHED_PIP_PACKAGES="false"
export DOCKER_CACHE="local"
export DOCKER_CACHE="pulled"
export VERBOSE="true"
@ -41,7 +40,7 @@ function build_prod_images_on_ci() {
":${GITHUB_REGISTRY_PULL_IMAGE_TAG}" "${AIRFLOW_PROD_IMAGE}"
else
build_images::build_prod_images_from_packages
build_images::build_prod_images_from_locally_built_airflow_packages
fi

Просмотреть файл

@ -913,31 +913,40 @@ function build_images::determine_docker_cache_strategy() {
}
function build_images::build_prod_images_from_packages() {
function build_image::assert_variable() {
local variable_name="${1}"
local expected_value="${2}"
local variable_value=${!variable_name}
if [[ ${variable_value} != "${expected_value}" ]]; then
echo
echo "${COLOR_RED_ERROR}: Variable ${variable_name}: expected_value: '${expected_value}' but was '${variable_value}'!${COLOR_RESET}"
echo
exit 1
fi
}
function build_images::build_prod_images_from_locally_built_airflow_packages() {
# We do not install from PyPI
build_image::assert_variable INSTALL_FROM_PYPI "false"
# But then we reinstall airflow and providers from prepared packages in the docker context files
build_image::assert_variable INSTALL_FROM_DOCKER_CONTEXT_FILES "true"
# But we install everything from scratch to make a "clean" installation in case any dependencies got removed
build_image::assert_variable AIRFLOW_PRE_CACHED_PIP_PACKAGES "false"
# Cleanup dist and docker-context-files folders
mkdir -pv "${AIRFLOW_SOURCES}/dist"
mkdir -pv "${AIRFLOW_SOURCES}/docker-context-files"
rm -f "${AIRFLOW_SOURCES}/dist/"*.{whl,tar.gz}
rm -f "${AIRFLOW_SOURCES}/docker-context-files/"*.{whl,tar.gz}
runs::run_pip_download
# Remove all downloaded apache airflow packages
rm -f "${AIRFLOW_SOURCES}/dist/"apache_airflow*.whl
rm -f "${AIRFLOW_SOURCES}/dist/"apache-airflow*.tar.gz
# Remove all downloaded apache airflow packages
mv -f "${AIRFLOW_SOURCES}/dist/"* "${AIRFLOW_SOURCES}/docker-context-files/"
# Build necessary provider packages
runs::run_prepare_provider_packages "${INSTALLED_PROVIDERS[@]}"
mv "${AIRFLOW_SOURCES}/dist/"* "${AIRFLOW_SOURCES}/docker-context-files/"
# Build apache airflow packages
build_airflow_packages::build_airflow_packages
mv "${AIRFLOW_SOURCES}/dist/"* "${AIRFLOW_SOURCES}/docker-context-files/"
build_images::build_prod_images_with_group
}

Просмотреть файл

@ -27,22 +27,6 @@ function runs::run_docs() {
start_end::group_end
}
# Downloads packages from PIP
function runs::run_pip_download() {
start_end::group_start "PIP download"
if [[ ${UPGRADE_TO_NEWER_DEPENDENCIES} ]]; then
pip_download_command="pip download -d /dist '.[${INSTALLED_EXTRAS}]'"
else
pip_download_command="pip download -d /dist '.[${INSTALLED_EXTRAS}]' --constraint
'https://raw.githubusercontent.com/apache/airflow/${DEFAULT_CONSTRAINTS_BRANCH}/constraints-${PYTHON_MAJOR_MINOR_VERSION}.txt'"
fi
# Download all dependencies needed
docker run --rm --entrypoint /bin/bash \
"${EXTRA_DOCKER_FLAGS[@]}" \
"${AIRFLOW_CI_IMAGE}" -c "${pip_download_command}"
start_end::group_end
}
# Docker command to generate constraint files.
function runs::run_generate_constraints() {
start_end::group_start "Run generate constraints"