Removes pip download when installing from local packages (#13422)
This PR improves building production image from local packages,
in preparation for moving provider requirements out of setup.cfg.
Previously `pip download` step was executed in the CI scripts
in order to download all the packages that were needed. However
this had two problems:
1) PIP download was executed outside of Dockerfile in CI scripts
which means that any change to requirements there could not
be executed in 'workflow_run' event - because main branch version
of CI scripts is used there. We want to add extra requirements
when installing airflow so in order to be able to change
it, those requirements should be added in Dockerfile.
This will be done in the follow-up #13409 PR.
2) Packages downloaded with PIP download have a "file" version
rather than regular == version when you run pip freeze/check.
This looks weird and while you can figure out the version
from file name, when you `pip install` them, they look
much more normal. The airflow package and provider package
will still get the "file" form but this is ok because we are
building those packages from sources and they are not yet
available in PyPI.
Example:
adal==1.2.5
aiohttp==3.7.3
alembic==1.4.3
amqp==2.6.1
apache-airflow @ file:///docker-context-files/apache_airflow-2.1.0.dev0-py3-none-any.whl
apache-airflow-providers-amazon @ file:///docker-context-files/apache_airflow_providers_amazon-1.0.0-py3-none-any.whl
apache-airflow-providers-celery @ file:///docker-context-files/apache_airflow_providers_celery-1.0.0-py3-none-any.whl
...
With this PR, we do not `pip download` all packages, but instead
we prepare airflow + providers packages as .whl files and
install them from there (all the dependencies are installed
from PyPI)
(cherry picked from commit e436883583
)
This commit is contained in:
Родитель
754f14651e
Коммит
9bfc783449
39
Dockerfile
39
Dockerfile
|
@ -247,31 +247,54 @@ ENV UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES}
|
|||
|
||||
WORKDIR /opt/airflow
|
||||
|
||||
# remove mysql from extras if client is not installed
|
||||
# hadolint ignore=SC2086, SC2010
|
||||
RUN if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then \
|
||||
# Remove mysql from extras if client is not installed \
|
||||
AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,}; \
|
||||
fi; \
|
||||
if [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \
|
||||
if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then \
|
||||
pip install --user "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_INSTALL_VERSION}" \
|
||||
--upgrade --upgrade-strategy eager; \
|
||||
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
|
||||
else \
|
||||
pip install --user "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_INSTALL_VERSION}" \
|
||||
pip install --upgrade --upgrade-strategy only-if-needed \
|
||||
--user "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_INSTALL_VERSION}" \
|
||||
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
|
||||
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
|
||||
fi; \
|
||||
fi; \
|
||||
if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} == "true" ]]; then \
|
||||
reinstalling_apache_airflow_packages=$(ls /docker-context-files/apache?airflow*.{whl,tar.gz} 2>/dev/null || true); \
|
||||
# We want to install apache airflow packages with constraints \
|
||||
if [[ "${reinstalling_apache_airflow_packages}" != "" ]]; then \
|
||||
if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then \
|
||||
pip install --force-reinstall --upgrade --upgrade-strategy eager \
|
||||
--user ${reinstalling_apache_airflow_packages}; \
|
||||
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
|
||||
else \
|
||||
pip install --force-reinstall --upgrade --upgrade-strategy only-if-needed \
|
||||
--user ${reinstalling_apache_airflow_packages} --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
|
||||
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
|
||||
fi; \
|
||||
fi ; \
|
||||
# All the others we want to reinstall as-is, without dependencies \
|
||||
reinstalling_other_packages=$(ls /docker-context-files/*.{whl,tar.gz} 2>/dev/null | \
|
||||
grep -v apache_airflow | grep -v apache-airflow || true); \
|
||||
if [[ "${reinstalling_other_packages}" != "" ]]; then \
|
||||
pip install --force-reinstall --user --no-deps ${reinstalling_other_packages}; \
|
||||
fi; \
|
||||
fi; \
|
||||
if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \
|
||||
if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then \
|
||||
pip install --user ${ADDITIONAL_PYTHON_DEPS} --upgrade --upgrade-strategy eager; \
|
||||
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
|
||||
else \
|
||||
pip install --user ${ADDITIONAL_PYTHON_DEPS} --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
|
||||
pip install --user ${ADDITIONAL_PYTHON_DEPS} --upgrade --upgrade-strategy only-if-needed \
|
||||
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
|
||||
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
|
||||
fi; \
|
||||
fi; \
|
||||
if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} == "true" ]]; then \
|
||||
if ls /docker-context-files/*.{whl,tar.gz} 1> /dev/null 2>&1; then \
|
||||
pip install --user --no-deps /docker-context-files/*.{whl,tar.gz}; \
|
||||
fi ; \
|
||||
fi; \
|
||||
find /root/.local/ -name '*.pyc' -print0 | xargs -0 rm -r || true ; \
|
||||
find /root/.local/ -type d -name '__pycache__' -print0 | xargs -0 rm -r || true
|
||||
|
||||
|
|
|
@ -281,8 +281,8 @@ RUN pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"
|
|||
RUN if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" ]]; then \
|
||||
pip install \
|
||||
"https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" \
|
||||
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" \
|
||||
&& pip uninstall --yes apache-airflow; \
|
||||
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
|
||||
pip uninstall --yes apache-airflow; \
|
||||
fi
|
||||
|
||||
# Generate random hex dump file so that we can determine whether it's faster to rebuild the image
|
||||
|
@ -325,7 +325,8 @@ RUN if [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \
|
|||
pip install -e ".[${AIRFLOW_EXTRAS}]" --upgrade --upgrade-strategy eager; \
|
||||
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
|
||||
else \
|
||||
pip install -e ".[${AIRFLOW_EXTRAS}]" --upgrade --upgrade-strategy only-if-needed; \
|
||||
pip install -e ".[${AIRFLOW_EXTRAS}]" --upgrade --upgrade-strategy only-if-needed\
|
||||
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
|
||||
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
|
||||
fi; \
|
||||
fi
|
||||
|
@ -334,11 +335,28 @@ RUN if [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \
|
|||
# they are also installed additionally to whatever is installed from Airflow.
|
||||
COPY docker-context-files/ /docker-context-files/
|
||||
|
||||
RUN if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} != "true" ]]; then \
|
||||
if ls /docker-context-files/*.{whl,tar.gz} 1> /dev/null 2>&1; then \
|
||||
pip install --no-deps /docker-context-files/*.{whl,tar.gz}; \
|
||||
# hadolint ignore=SC2086, SC2010
|
||||
RUN if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} == "true" ]]; then \
|
||||
reinstalling_apache_airflow_packages=$(ls /docker-context-files/apache?airflow*.{whl,tar.gz} 2>/dev/null || true); \
|
||||
# We want to install apache airflow packages with constraints \
|
||||
if [[ "${reinstalling_apache_airflow_packages}" != "" ]]; then \
|
||||
if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then \
|
||||
pip install --force-reinstall --upgrade --upgrade-strategy eager \
|
||||
--user ${reinstalling_apache_airflow_packages}; \
|
||||
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
|
||||
else \
|
||||
pip install --force-reinstall --upgrade --upgrade-strategy only-if-needed \
|
||||
--user ${reinstalling_apache_airflow_packages} --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
|
||||
pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
|
||||
fi; \
|
||||
fi ; \
|
||||
fi
|
||||
# All the others we want to reinstall as-is, without dependencies \
|
||||
reinstalling_other_packages=$(ls /docker-context-files/*.{whl,tar.gz} 2>/dev/null | \
|
||||
grep -v apache_airflow | grep -v apache-airflow || true); \
|
||||
if [[ "${reinstalling_other_packages}" != "" ]]; then \
|
||||
pip install --force-reinstall --user --no-deps ${reinstalling_other_packages}; \
|
||||
fi; \
|
||||
fi;
|
||||
|
||||
# Copy all the www/ files we need to compile assets. Done as two separate COPY
|
||||
# commands so as otherwise it copies the _contents_ of static/ in to www/
|
||||
|
|
25
IMAGES.rst
25
IMAGES.rst
|
@ -449,10 +449,27 @@ The following build arguments (``--build-arg`` in docker build command) can be u
|
|||
| | | package. It has no effect when |
|
||||
| | | installing from PyPI or GitHub repo. |
|
||||
+------------------------------------------+------------------------------------------+------------------------------------------+
|
||||
| ``INSTALL_FROM_DOCKER_CONTEXT_FILES`` | ``false`` | If set to true, Airflow and it's |
|
||||
| | | dependencies are installed from locally |
|
||||
| | | downloaded .whl files placed in the |
|
||||
| | | ``docker-context-files``. |
|
||||
| ``INSTALL_FROM_DOCKER_CONTEXT_FILES`` | ``false`` | If set to true, Airflow, providers and |
|
||||
| | | all dependencies are installed from |
|
||||
| | | from locally built/downloaded |
|
||||
| | | .whl and .tar.gz files placed in the |
|
||||
| | | ``docker-context-files``. In certain |
|
||||
| | | corporate environments, this is required |
|
||||
| | | to install airflow from such pre-vetted |
|
||||
| | | packages rather than from PyPI. For this |
|
||||
| | | to work, also set ``INSTALL_FROM_PYPI``. |
|
||||
| | | Note that packages starting with |
|
||||
| | | ``apache?airflow`` glob are treated |
|
||||
| | | differently than other packages. All |
|
||||
| | | ``apache?airflow`` packages are |
|
||||
| | | installed with dependencies limited by |
|
||||
| | | airflow constraints. All other packages |
|
||||
| | | are installed without dependencies |
|
||||
| | | 'as-is'. If you wish to install airflow |
|
||||
| | | via 'pip download' with all dependencies |
|
||||
| | | downloaded, you have to rename the |
|
||||
| | | apache airflow and provider packages to |
|
||||
| | | not start with ``apache?airflow`` glob. |
|
||||
+------------------------------------------+------------------------------------------+------------------------------------------+
|
||||
| ``AIRFLOW_EXTRAS`` | ``all`` | extras to install |
|
||||
+------------------------------------------+------------------------------------------+------------------------------------------+
|
||||
|
|
|
@ -317,8 +317,19 @@ Preparing the constraint files and wheel files:
|
|||
--constraint docker-context-files/constraints-2-0.txt \
|
||||
apache-airflow[async,aws,azure,celery,dask,elasticsearch,gcp,kubernetes,mysql,postgres,redis,slack,ssh,statsd,virtualenv]==2.0.0
|
||||
|
||||
Since apache-airflow .whl packages are treated differently by the docker image, you need to rename the
|
||||
downloaded apache-airflow* files, for example:
|
||||
|
||||
Building the image (after copying the files downloaded to the "docker-context-files" directory:
|
||||
.. code-block:: bash
|
||||
|
||||
pushd docker-context-files
|
||||
for file in apache?airflow*
|
||||
do
|
||||
mv ${file} _${file}
|
||||
done
|
||||
popd
|
||||
|
||||
Building the image:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
|
@ -539,7 +550,19 @@ The following build arguments (``--build-arg`` in docker build command) can be u
|
|||
| | | corporate environments, this is required |
|
||||
| | | to install airflow from such pre-vetted |
|
||||
| | | packages rather than from PyPI. For this |
|
||||
| | | to work, also set ``INSTALL_FROM_PYPI`` |
|
||||
| | | to work, also set ``INSTALL_FROM_PYPI``. |
|
||||
| | | Note that packages starting with |
|
||||
| | | ``apache?airflow`` glob are treated |
|
||||
| | | differently than other packages. All |
|
||||
| | | ``apache?airflow`` packages are |
|
||||
| | | installed with dependencies limited by |
|
||||
| | | airflow constraints. All other packages |
|
||||
| | | are installed without dependencies |
|
||||
| | | 'as-is'. If you wish to install airflow |
|
||||
| | | via 'pip download' with all dependencies |
|
||||
| | | downloaded, you have to rename the |
|
||||
| | | apache airflow and provider packages to |
|
||||
| | | not start with ``apache?airflow`` glob. |
|
||||
+------------------------------------------+------------------------------------------+------------------------------------------+
|
||||
| ``UPGRADE_TO_NEWER_DEPENDENCIES`` | ``false`` | If set to true, the dependencies are |
|
||||
| | | upgraded to newer versions matching |
|
||||
|
|
|
@ -68,8 +68,8 @@ if [[ ! "${DOCKER_TAG}" =~ ^[0-9].* ]]; then
|
|||
# we need to run those in sub-processes
|
||||
(
|
||||
export INSTALL_FROM_PYPI="true"
|
||||
export INSTALL_FROM_DOCKER_CONTEXT_FILES="false"
|
||||
export INSTALL_PROVIDERS_FROM_SOURCES="true"
|
||||
export INSTALL_FROM_DOCKER_CONTEXT_FILES="false"
|
||||
export AIRFLOW_PRE_CACHED_PIP_PACKAGES="true"
|
||||
export DOCKER_CACHE="pulled"
|
||||
# shellcheck source=scripts/ci/libraries/_script_init.sh
|
||||
|
@ -86,7 +86,6 @@ if [[ ! "${DOCKER_TAG}" =~ ^[0-9].* ]]; then
|
|||
(
|
||||
export INSTALL_FROM_PYPI="false"
|
||||
export INSTALL_FROM_DOCKER_CONTEXT_FILES="true"
|
||||
export INSTALL_PROVIDERS_FROM_SOURCES="false"
|
||||
export AIRFLOW_PRE_CACHED_PIP_PACKAGES="false"
|
||||
export DOCKER_CACHE="pulled"
|
||||
# shellcheck source=scripts/ci/libraries/_script_init.sh
|
||||
|
@ -97,7 +96,7 @@ if [[ ! "${DOCKER_TAG}" =~ ^[0-9].* ]]; then
|
|||
rm -rf "${BUILD_CACHE_DIR}"
|
||||
rm -rf "${AIRFLOW_SOURCES}/docker-context-files/*"
|
||||
build_images::prepare_prod_build
|
||||
build_images::build_prod_images_from_packages
|
||||
build_images::build_prod_images_from_locally_built_airflow_packages
|
||||
push_pull_remove_images::push_prod_images
|
||||
)
|
||||
else
|
||||
|
|
|
@ -18,9 +18,8 @@
|
|||
|
||||
export INSTALL_FROM_PYPI="false"
|
||||
export INSTALL_FROM_DOCKER_CONTEXT_FILES="true"
|
||||
export INSTALL_PROVIDERS_FROM_SOURCES="false"
|
||||
export AIRFLOW_PRE_CACHED_PIP_PACKAGES="false"
|
||||
export DOCKER_CACHE="local"
|
||||
export DOCKER_CACHE="pulled"
|
||||
export VERBOSE="true"
|
||||
|
||||
|
||||
|
@ -41,7 +40,7 @@ function build_prod_images_on_ci() {
|
|||
":${GITHUB_REGISTRY_PULL_IMAGE_TAG}" "${AIRFLOW_PROD_IMAGE}"
|
||||
|
||||
else
|
||||
build_images::build_prod_images_from_packages
|
||||
build_images::build_prod_images_from_locally_built_airflow_packages
|
||||
fi
|
||||
|
||||
|
||||
|
|
|
@ -913,31 +913,40 @@ function build_images::determine_docker_cache_strategy() {
|
|||
}
|
||||
|
||||
|
||||
function build_images::build_prod_images_from_packages() {
|
||||
function build_image::assert_variable() {
|
||||
local variable_name="${1}"
|
||||
local expected_value="${2}"
|
||||
local variable_value=${!variable_name}
|
||||
if [[ ${variable_value} != "${expected_value}" ]]; then
|
||||
echo
|
||||
echo "${COLOR_RED_ERROR}: Variable ${variable_name}: expected_value: '${expected_value}' but was '${variable_value}'!${COLOR_RESET}"
|
||||
echo
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function build_images::build_prod_images_from_locally_built_airflow_packages() {
|
||||
# We do not install from PyPI
|
||||
build_image::assert_variable INSTALL_FROM_PYPI "false"
|
||||
# But then we reinstall airflow and providers from prepared packages in the docker context files
|
||||
build_image::assert_variable INSTALL_FROM_DOCKER_CONTEXT_FILES "true"
|
||||
# But we install everything from scratch to make a "clean" installation in case any dependencies got removed
|
||||
build_image::assert_variable AIRFLOW_PRE_CACHED_PIP_PACKAGES "false"
|
||||
|
||||
# Cleanup dist and docker-context-files folders
|
||||
mkdir -pv "${AIRFLOW_SOURCES}/dist"
|
||||
mkdir -pv "${AIRFLOW_SOURCES}/docker-context-files"
|
||||
rm -f "${AIRFLOW_SOURCES}/dist/"*.{whl,tar.gz}
|
||||
rm -f "${AIRFLOW_SOURCES}/docker-context-files/"*.{whl,tar.gz}
|
||||
|
||||
runs::run_pip_download
|
||||
|
||||
# Remove all downloaded apache airflow packages
|
||||
rm -f "${AIRFLOW_SOURCES}/dist/"apache_airflow*.whl
|
||||
rm -f "${AIRFLOW_SOURCES}/dist/"apache-airflow*.tar.gz
|
||||
|
||||
# Remove all downloaded apache airflow packages
|
||||
mv -f "${AIRFLOW_SOURCES}/dist/"* "${AIRFLOW_SOURCES}/docker-context-files/"
|
||||
|
||||
# Build necessary provider packages
|
||||
runs::run_prepare_provider_packages "${INSTALLED_PROVIDERS[@]}"
|
||||
|
||||
mv "${AIRFLOW_SOURCES}/dist/"* "${AIRFLOW_SOURCES}/docker-context-files/"
|
||||
|
||||
# Build apache airflow packages
|
||||
build_airflow_packages::build_airflow_packages
|
||||
|
||||
mv "${AIRFLOW_SOURCES}/dist/"* "${AIRFLOW_SOURCES}/docker-context-files/"
|
||||
|
||||
build_images::build_prod_images_with_group
|
||||
}
|
||||
|
||||
|
|
|
@ -27,22 +27,6 @@ function runs::run_docs() {
|
|||
start_end::group_end
|
||||
}
|
||||
|
||||
# Downloads packages from PIP
|
||||
function runs::run_pip_download() {
|
||||
start_end::group_start "PIP download"
|
||||
if [[ ${UPGRADE_TO_NEWER_DEPENDENCIES} ]]; then
|
||||
pip_download_command="pip download -d /dist '.[${INSTALLED_EXTRAS}]'"
|
||||
else
|
||||
pip_download_command="pip download -d /dist '.[${INSTALLED_EXTRAS}]' --constraint
|
||||
'https://raw.githubusercontent.com/apache/airflow/${DEFAULT_CONSTRAINTS_BRANCH}/constraints-${PYTHON_MAJOR_MINOR_VERSION}.txt'"
|
||||
fi
|
||||
# Download all dependencies needed
|
||||
docker run --rm --entrypoint /bin/bash \
|
||||
"${EXTRA_DOCKER_FLAGS[@]}" \
|
||||
"${AIRFLOW_CI_IMAGE}" -c "${pip_download_command}"
|
||||
start_end::group_end
|
||||
}
|
||||
|
||||
# Docker command to generate constraint files.
|
||||
function runs::run_generate_constraints() {
|
||||
start_end::group_start "Run generate constraints"
|
||||
|
|
Загрузка…
Ссылка в новой задаче