Fix docker image layer caching to avoid redundant docker building and transient connection exceptions. (#21612)
### Description Improve docker commands to make docker image layer caching works. It can make docker building faster and more stable. So far, A100 pool's system disk is too small to use docker cache. We won't use pipeline cache for docker image and remove some legacy code. ### Motivation and Context There are often an exception of ``` 64.58 + curl https://nodejs.org/dist/v18.17.1/node-v18.17.1-linux-x64.tar.gz -sSL --retry 5 --retry-delay 30 --create-dirs -o /tmp/src/node-v18.17.1-linux-x64.tar.gz --fail 286.4 curl: (92) HTTP/2 stream 0 was not closed cleanly: INTERNAL_ERROR (err 2) ``` Because Onnxruntime pipeline have been sending too many requests to download Nodejs in docker building. Which is the major reason of pipeline failing now In fact, docker image layer caching never works. We can always see the scrips are still running ``` #9 [3/5] RUN cd /tmp/scripts && /tmp/scripts/install_centos.sh && /tmp/scripts/install_deps.sh && rm -rf /tmp/scripts #9 0.234 /bin/sh: warning: setlocale: LC_ALL: cannot change locale (en_US.UTF-8) #9 0.235 /bin/sh: warning: setlocale: LC_ALL: cannot change locale (en_US.UTF-8) #9 0.235 /tmp/scripts/install_centos.sh: line 1: !/bin/bash: No such file or directory #9 0.235 ++ '[' '!' -f /etc/yum.repos.d/microsoft-prod.repo ']' #9 0.236 +++ tr -dc 0-9. #9 0.236 +++ cut -d . -f1 #9 0.238 ++ os_major_version=8 .... #9 60.41 + curl https://nodejs.org/dist/v18.17.1/node-v18.17.1-linux-x64.tar.gz -sSL --retry 5 --retry-delay 30 --create-dirs -o /tmp/src/node-v18.17.1-linux-x64.tar.gz --fail #9 60.59 + return 0 ... ``` This PR is improving the docker command to make image layer caching work. Thus, CI won't send so many redundant request of downloading NodeJS. ``` #9 [2/5] ADD scripts /tmp/scripts #9 CACHED #10 [3/5] RUN cd /tmp/scripts && /tmp/scripts/install_centos.sh && /tmp/scripts/install_deps.sh && rm -rf /tmp/scripts #10 CACHED #11 [4/5] RUN adduser --uid 1000 onnxruntimedev #11 CACHED #12 [5/5] WORKDIR /home/onnxruntimedev #12 CACHED ``` ###Reference https://docs.docker.com/build/drivers/ --------- Co-authored-by: Yi Zhang <your@email.com>
This commit is contained in:
Родитель
f6f9657fb6
Коммит
0d1da41ca8
|
@ -98,17 +98,19 @@ def main():
|
|||
)
|
||||
|
||||
if use_container_registry:
|
||||
run(args.docker_path, "buildx", "create", "--driver=docker-container", "--name=container_builder")
|
||||
run(
|
||||
args.docker_path,
|
||||
"--log-level",
|
||||
"error",
|
||||
"buildx",
|
||||
"build",
|
||||
"--push",
|
||||
"--load",
|
||||
"--tag",
|
||||
full_image_name,
|
||||
"--cache-from",
|
||||
full_image_name,
|
||||
"--cache-from=type=registry,ref=" + full_image_name,
|
||||
"--builder",
|
||||
"container_builder",
|
||||
"--build-arg",
|
||||
"BUILDKIT_INLINE_CACHE=1",
|
||||
*shlex.split(args.docker_build_args),
|
||||
|
@ -116,24 +118,10 @@ def main():
|
|||
args.dockerfile,
|
||||
args.context,
|
||||
)
|
||||
elif args.use_imagecache:
|
||||
log.info("Building image with pipeline cache...")
|
||||
run(
|
||||
args.docker_path,
|
||||
"--log-level",
|
||||
"error",
|
||||
"buildx",
|
||||
"build",
|
||||
"--tag",
|
||||
"push",
|
||||
full_image_name,
|
||||
"--cache-from",
|
||||
full_image_name,
|
||||
"--build-arg",
|
||||
"BUILDKIT_INLINE_CACHE=1",
|
||||
*shlex.split(args.docker_build_args),
|
||||
"-f",
|
||||
args.dockerfile,
|
||||
args.context,
|
||||
)
|
||||
else:
|
||||
log.info("Building image...")
|
||||
|
|
|
@ -321,6 +321,7 @@ stages:
|
|||
--build-arg TRT_VERSION=${{ variables.linux_trt_version }}
|
||||
"
|
||||
Repository: onnxruntimeubi8packagestest_torch
|
||||
UseImageCacheContainerRegistry: false
|
||||
UpdateDepsTxt: false
|
||||
|
||||
- task: DownloadPackage@1
|
||||
|
|
|
@ -51,15 +51,15 @@ jobs:
|
|||
Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile
|
||||
Context: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu
|
||||
DockerBuildArgs: "--build-arg BUILD_UID=$( id -u ) --build-arg BASEIMAGE=${{parameters.BaseImage}}"
|
||||
Repository: onnxruntimecpubuildcentos8${{parameters.OnnxruntimeArch}}
|
||||
|
||||
Repository: onnxruntimecpubuildcentos8${{parameters.OnnxruntimeArch}}_packaging
|
||||
|
||||
- ${{ if eq(parameters.OnnxruntimeArch, 'aarch64') }}:
|
||||
- template: get-docker-image-steps.yml
|
||||
parameters:
|
||||
Dockerfile: tools/ci_build/github/linux/docker/inference/aarch64/default/cpu/Dockerfile
|
||||
Context: tools/ci_build/github/linux/docker/inference/aarch64/default/cpu
|
||||
DockerBuildArgs: "--build-arg BUILD_UID=$( id -u ) --build-arg BASEIMAGE=${{parameters.BaseImage}}"
|
||||
Repository: onnxruntimecpubuildcentos8${{parameters.OnnxruntimeArch}}
|
||||
Repository: onnxruntimecpubuildcentos8${{parameters.OnnxruntimeArch}}_packaging
|
||||
UpdateDepsTxt: false
|
||||
|
||||
- task: CmdLine@2
|
||||
|
@ -67,7 +67,7 @@ jobs:
|
|||
script: |
|
||||
mkdir -p $HOME/.onnx
|
||||
docker run --rm --volume /data/onnx:/data/onnx:ro --volume $(Build.SourcesDirectory):/onnxruntime_src --volume $(Build.BinariesDirectory):/build \
|
||||
--volume $HOME/.onnx:/home/onnxruntimedev/.onnx -e NIGHTLY_BUILD onnxruntimecpubuildcentos8${{parameters.OnnxruntimeArch}} /bin/bash -c "python3.9 \
|
||||
--volume $HOME/.onnx:/home/onnxruntimedev/.onnx -e NIGHTLY_BUILD onnxruntimecpubuildcentos8${{parameters.OnnxruntimeArch}}_packaging /bin/bash -c "python3.9 \
|
||||
/onnxruntime_src/tools/ci_build/build.py --enable_lto --build_java --build_nodejs --build_dir /build --config Release \
|
||||
--skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib ${{ parameters.AdditionalBuildFlags }} && cd /build/Release && make install DESTDIR=/build/installed"
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
|
|
|
@ -53,6 +53,7 @@ steps:
|
|||
displayName: patch manylinux
|
||||
|
||||
- script: |
|
||||
docker version
|
||||
docker image ls
|
||||
docker system df
|
||||
displayName: Check Docker Images
|
||||
|
@ -71,52 +72,25 @@ steps:
|
|||
displayName: "Get ${{ parameters.Repository }} image for ${{ parameters.Dockerfile }}"
|
||||
ContainerRegistry: onnxruntimebuildcache
|
||||
- ${{ if eq(parameters.UseImageCacheContainerRegistry, false) }}:
|
||||
- task: Cache@2
|
||||
displayName: Cache Docker Image Task
|
||||
inputs:
|
||||
key: ' "${{ parameters.Repository }}" | "$(Build.SourceVersion)" '
|
||||
path: ${{ parameters.IMAGE_CACHE_DIR }}
|
||||
restoreKeys: |
|
||||
"${{ parameters.Repository }}" | "$(Build.SourceVersion)"
|
||||
"${{ parameters.Repository }}"
|
||||
cacheHitVar: CACHE_RESTORED
|
||||
condition: eq('${{ parameters.UsePipelineCache }}', 'true')
|
||||
# the difference is no --container-registry
|
||||
- template: with-container-registry-steps.yml
|
||||
parameters:
|
||||
Steps:
|
||||
- script: |
|
||||
${{ parameters.ScriptName }} \
|
||||
--dockerfile "${{ parameters.Dockerfile }}" \
|
||||
--context "${{ parameters.Context }}" \
|
||||
--docker-build-args "${{ parameters.DockerBuildArgs }}" \
|
||||
--repository "${{ parameters.Repository }}"
|
||||
displayName: "Get ${{ parameters.Repository }} image for ${{ parameters.Dockerfile }}"
|
||||
ContainerRegistry: onnxruntimebuildcache
|
||||
|
||||
- script: |
|
||||
test -f ${{ parameters.IMAGE_CACHE_DIR }}/cache.tar && docker load -i ${{ parameters.IMAGE_CACHE_DIR }}/cache.tar
|
||||
docker image ls
|
||||
displayName: Docker restore
|
||||
condition: eq('${{ parameters.UsePipelineCache }}', 'true')
|
||||
|
||||
- script: |
|
||||
if [ ${{ parameters.UsePipelineCache}} ]
|
||||
then
|
||||
use_imagecache="--use_imagecache"
|
||||
else
|
||||
use_imagecache=""
|
||||
fi
|
||||
${{ parameters.ScriptName }} \
|
||||
--dockerfile "${{ parameters.Dockerfile }}" \
|
||||
--context "${{ parameters.Context }}" \
|
||||
--docker-build-args "${{ parameters.DockerBuildArgs }}" \
|
||||
--repository "${{ parameters.Repository }}" \
|
||||
$use_imagecache
|
||||
displayName: "Get ${{ parameters.Repository }} image for ${{ parameters.Dockerfile }}"
|
||||
|
||||
- script: |
|
||||
set -ex
|
||||
mkdir -p "${{ parameters.IMAGE_CACHE_DIR }}"
|
||||
docker save -o "${{ parameters.IMAGE_CACHE_DIR }}/cache.tar" ${{ parameters.Repository }}
|
||||
docker image ls
|
||||
docker system df
|
||||
displayName: Docker save
|
||||
condition: eq('${{ parameters.UsePipelineCache }}', 'true')
|
||||
|
||||
- script: |
|
||||
echo ${{ parameters.IMAGE_CACHE_DIR }}
|
||||
ls -lah ${{ parameters.IMAGE_CACHE_DIR }}
|
||||
displayName: Display docker dir
|
||||
condition: eq('${{ parameters.UsePipelineCache }}', 'true')
|
||||
- script: |
|
||||
docker version
|
||||
docker image ls
|
||||
docker system df
|
||||
df -h
|
||||
displayName: Check Docker Images
|
||||
|
||||
- ${{ if and(eq(parameters.UpdateDepsTxt, true), or(eq(variables['System.CollectionId'], 'f3ad12f2-e480-4533-baf2-635c95467d29'),eq(variables['System.CollectionId'], 'bc038106-a83b-4dab-9dd3-5a41bc58f34c'))) }}:
|
||||
- task: PythonScript@0
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
ARG BASEIMAGE=arm64v8/almalinux:8
|
||||
FROM $BASEIMAGE
|
||||
|
||||
ENV PATH /opt/rh/gcc-toolset-12/root/usr/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ENV PATH=/opt/rh/gcc-toolset-12/root/usr/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ENV LANG=en_US.UTF-8
|
||||
ENV LC_ALL=en_US.UTF-8
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
ARG BASEIMAGE=amd64/almalinux:8
|
||||
FROM $BASEIMAGE
|
||||
|
||||
ENV PATH /usr/lib/jvm/msopenjdk-11/bin:/opt/rh/gcc-toolset-12/root/usr/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ENV PATH=/usr/lib/jvm/msopenjdk-11/bin:/opt/rh/gcc-toolset-12/root/usr/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ENV LANG=en_US.UTF-8
|
||||
ENV LC_ALL=en_US.UTF-8
|
||||
ENV JAVA_HOME=/usr/lib/jvm/msopenjdk-11
|
||||
|
|
Загрузка…
Ссылка в новой задаче