Fix docker image layer caching to avoid redundant docker building and transient connection exceptions. (#21612)

### Description
Improve docker commands to make docker image layer caching works.
It can make docker building faster and more stable.
So far, A100 pool's system disk is too small to use docker cache.
We won't use pipeline cache for docker image and remove some legacy
code.

### Motivation and Context
There are often an exception of
```
64.58 + curl https://nodejs.org/dist/v18.17.1/node-v18.17.1-linux-x64.tar.gz -sSL --retry 5 --retry-delay 30 --create-dirs -o /tmp/src/node-v18.17.1-linux-x64.tar.gz --fail
286.4 curl: (92) HTTP/2 stream 0 was not closed cleanly: INTERNAL_ERROR (err 2)
```
Because Onnxruntime pipeline have been sending too many requests to
download Nodejs in docker building.
Which is the major reason of pipeline failing now

In fact, docker image layer caching never works.
We can always see the scrips are still running
```
#9 [3/5] RUN cd /tmp/scripts && /tmp/scripts/install_centos.sh && /tmp/scripts/install_deps.sh && rm -rf /tmp/scripts
#9 0.234 /bin/sh: warning: setlocale: LC_ALL: cannot change locale (en_US.UTF-8)
#9 0.235 /bin/sh: warning: setlocale: LC_ALL: cannot change locale (en_US.UTF-8)
#9 0.235 /tmp/scripts/install_centos.sh: line 1: !/bin/bash: No such file or directory
#9 0.235 ++ '[' '!' -f /etc/yum.repos.d/microsoft-prod.repo ']'
#9 0.236 +++ tr -dc 0-9.
#9 0.236 +++ cut -d . -f1
#9 0.238 ++ os_major_version=8
....
#9 60.41 + curl https://nodejs.org/dist/v18.17.1/node-v18.17.1-linux-x64.tar.gz -sSL --retry 5 --retry-delay 30 --create-dirs -o /tmp/src/node-v18.17.1-linux-x64.tar.gz --fail
#9 60.59 + return 0
...
```

This PR is improving the docker command to make image layer caching
work.
Thus, CI won't send so many redundant request of downloading NodeJS.
```
#9 [2/5] ADD scripts /tmp/scripts
#9 CACHED

#10 [3/5] RUN cd /tmp/scripts && /tmp/scripts/install_centos.sh && /tmp/scripts/install_deps.sh && rm -rf /tmp/scripts
#10 CACHED

#11 [4/5] RUN adduser --uid 1000 onnxruntimedev
#11 CACHED

#12 [5/5] WORKDIR /home/onnxruntimedev
#12 CACHED
```

###Reference
https://docs.docker.com/build/drivers/

---------

Co-authored-by: Yi Zhang <your@email.com>
This commit is contained in:
Yi Zhang 2024-08-06 21:37:09 +08:00 коммит произвёл GitHub
Родитель f6f9657fb6
Коммит 0d1da41ca8
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
6 изменённых файлов: 32 добавлений и 69 удалений

Просмотреть файл

@ -98,17 +98,19 @@ def main():
)
if use_container_registry:
run(args.docker_path, "buildx", "create", "--driver=docker-container", "--name=container_builder")
run(
args.docker_path,
"--log-level",
"error",
"buildx",
"build",
"--push",
"--load",
"--tag",
full_image_name,
"--cache-from",
full_image_name,
"--cache-from=type=registry,ref=" + full_image_name,
"--builder",
"container_builder",
"--build-arg",
"BUILDKIT_INLINE_CACHE=1",
*shlex.split(args.docker_build_args),
@ -116,24 +118,10 @@ def main():
args.dockerfile,
args.context,
)
elif args.use_imagecache:
log.info("Building image with pipeline cache...")
run(
args.docker_path,
"--log-level",
"error",
"buildx",
"build",
"--tag",
"push",
full_image_name,
"--cache-from",
full_image_name,
"--build-arg",
"BUILDKIT_INLINE_CACHE=1",
*shlex.split(args.docker_build_args),
"-f",
args.dockerfile,
args.context,
)
else:
log.info("Building image...")

Просмотреть файл

@ -321,6 +321,7 @@ stages:
--build-arg TRT_VERSION=${{ variables.linux_trt_version }}
"
Repository: onnxruntimeubi8packagestest_torch
UseImageCacheContainerRegistry: false
UpdateDepsTxt: false
- task: DownloadPackage@1

Просмотреть файл

@ -51,15 +51,15 @@ jobs:
Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile
Context: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu
DockerBuildArgs: "--build-arg BUILD_UID=$( id -u ) --build-arg BASEIMAGE=${{parameters.BaseImage}}"
Repository: onnxruntimecpubuildcentos8${{parameters.OnnxruntimeArch}}
Repository: onnxruntimecpubuildcentos8${{parameters.OnnxruntimeArch}}_packaging
- ${{ if eq(parameters.OnnxruntimeArch, 'aarch64') }}:
- template: get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/inference/aarch64/default/cpu/Dockerfile
Context: tools/ci_build/github/linux/docker/inference/aarch64/default/cpu
DockerBuildArgs: "--build-arg BUILD_UID=$( id -u ) --build-arg BASEIMAGE=${{parameters.BaseImage}}"
Repository: onnxruntimecpubuildcentos8${{parameters.OnnxruntimeArch}}
Repository: onnxruntimecpubuildcentos8${{parameters.OnnxruntimeArch}}_packaging
UpdateDepsTxt: false
- task: CmdLine@2
@ -67,7 +67,7 @@ jobs:
script: |
mkdir -p $HOME/.onnx
docker run --rm --volume /data/onnx:/data/onnx:ro --volume $(Build.SourcesDirectory):/onnxruntime_src --volume $(Build.BinariesDirectory):/build \
--volume $HOME/.onnx:/home/onnxruntimedev/.onnx -e NIGHTLY_BUILD onnxruntimecpubuildcentos8${{parameters.OnnxruntimeArch}} /bin/bash -c "python3.9 \
--volume $HOME/.onnx:/home/onnxruntimedev/.onnx -e NIGHTLY_BUILD onnxruntimecpubuildcentos8${{parameters.OnnxruntimeArch}}_packaging /bin/bash -c "python3.9 \
/onnxruntime_src/tools/ci_build/build.py --enable_lto --build_java --build_nodejs --build_dir /build --config Release \
--skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib ${{ parameters.AdditionalBuildFlags }} && cd /build/Release && make install DESTDIR=/build/installed"
workingDirectory: $(Build.SourcesDirectory)

Просмотреть файл

@ -53,6 +53,7 @@ steps:
displayName: patch manylinux
- script: |
docker version
docker image ls
docker system df
displayName: Check Docker Images
@ -71,52 +72,25 @@ steps:
displayName: "Get ${{ parameters.Repository }} image for ${{ parameters.Dockerfile }}"
ContainerRegistry: onnxruntimebuildcache
- ${{ if eq(parameters.UseImageCacheContainerRegistry, false) }}:
- task: Cache@2
displayName: Cache Docker Image Task
inputs:
key: ' "${{ parameters.Repository }}" | "$(Build.SourceVersion)" '
path: ${{ parameters.IMAGE_CACHE_DIR }}
restoreKeys: |
"${{ parameters.Repository }}" | "$(Build.SourceVersion)"
"${{ parameters.Repository }}"
cacheHitVar: CACHE_RESTORED
condition: eq('${{ parameters.UsePipelineCache }}', 'true')
# the difference is no --container-registry
- template: with-container-registry-steps.yml
parameters:
Steps:
- script: |
${{ parameters.ScriptName }} \
--dockerfile "${{ parameters.Dockerfile }}" \
--context "${{ parameters.Context }}" \
--docker-build-args "${{ parameters.DockerBuildArgs }}" \
--repository "${{ parameters.Repository }}"
displayName: "Get ${{ parameters.Repository }} image for ${{ parameters.Dockerfile }}"
ContainerRegistry: onnxruntimebuildcache
- script: |
test -f ${{ parameters.IMAGE_CACHE_DIR }}/cache.tar && docker load -i ${{ parameters.IMAGE_CACHE_DIR }}/cache.tar
docker image ls
displayName: Docker restore
condition: eq('${{ parameters.UsePipelineCache }}', 'true')
- script: |
if [ ${{ parameters.UsePipelineCache}} ]
then
use_imagecache="--use_imagecache"
else
use_imagecache=""
fi
${{ parameters.ScriptName }} \
--dockerfile "${{ parameters.Dockerfile }}" \
--context "${{ parameters.Context }}" \
--docker-build-args "${{ parameters.DockerBuildArgs }}" \
--repository "${{ parameters.Repository }}" \
$use_imagecache
displayName: "Get ${{ parameters.Repository }} image for ${{ parameters.Dockerfile }}"
- script: |
set -ex
mkdir -p "${{ parameters.IMAGE_CACHE_DIR }}"
docker save -o "${{ parameters.IMAGE_CACHE_DIR }}/cache.tar" ${{ parameters.Repository }}
docker image ls
docker system df
displayName: Docker save
condition: eq('${{ parameters.UsePipelineCache }}', 'true')
- script: |
echo ${{ parameters.IMAGE_CACHE_DIR }}
ls -lah ${{ parameters.IMAGE_CACHE_DIR }}
displayName: Display docker dir
condition: eq('${{ parameters.UsePipelineCache }}', 'true')
- script: |
docker version
docker image ls
docker system df
df -h
displayName: Check Docker Images
- ${{ if and(eq(parameters.UpdateDepsTxt, true), or(eq(variables['System.CollectionId'], 'f3ad12f2-e480-4533-baf2-635c95467d29'),eq(variables['System.CollectionId'], 'bc038106-a83b-4dab-9dd3-5a41bc58f34c'))) }}:
- task: PythonScript@0

Просмотреть файл

@ -5,7 +5,7 @@
ARG BASEIMAGE=arm64v8/almalinux:8
FROM $BASEIMAGE
ENV PATH /opt/rh/gcc-toolset-12/root/usr/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ENV PATH=/opt/rh/gcc-toolset-12/root/usr/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ENV LANG=en_US.UTF-8
ENV LC_ALL=en_US.UTF-8

Просмотреть файл

@ -5,7 +5,7 @@
ARG BASEIMAGE=amd64/almalinux:8
FROM $BASEIMAGE
ENV PATH /usr/lib/jvm/msopenjdk-11/bin:/opt/rh/gcc-toolset-12/root/usr/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ENV PATH=/usr/lib/jvm/msopenjdk-11/bin:/opt/rh/gcc-toolset-12/root/usr/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ENV LANG=en_US.UTF-8
ENV LC_ALL=en_US.UTF-8
ENV JAVA_HOME=/usr/lib/jvm/msopenjdk-11