From 54453e4f45c5e1773a3ffd834124b48dd56a6772 Mon Sep 17 00:00:00 2001 From: YundongYe <8675883+ydye@users.noreply.github.com> Date: Fri, 24 Apr 2020 13:08:01 +0800 Subject: [PATCH] [build] Disable yarn option in pai_build.py. (#4443) * Disable Yarn build option and remove hadoop binary building source code * Update doc --- build/pai_build.py | 18 +-- docs/pai-build/pai-build.md | 6 +- src/hadoop-ai/build/YARN-8896-2.9.0.patch | 17 --- src/hadoop-ai/build/build-pre.sh | 54 -------- src/hadoop-ai/build/build.sh | 49 ------- src/hadoop-ai/build/docker-executor.patch | 123 ------------------ src/hadoop-ai/build/hadoop-2.9.0-fix.patch | 38 ------ src/hadoop-ai/build/hadoop-ai | 82 ------------ src/hadoop-ai/build/hadoop-ai-fix.patch | 66 ---------- .../build/hadoop-ai-port-conflict.patch | 32 ----- src/hadoop-run/build/build-pre.sh | 32 ----- src/hadoop-run/build/component.dep | 1 - .../build/hadoop-run.yarn.dockerfile | 63 --------- src/hadoop-run/build/start.sh | 35 ----- 14 files changed, 4 insertions(+), 612 deletions(-) delete mode 100644 src/hadoop-ai/build/YARN-8896-2.9.0.patch delete mode 100755 src/hadoop-ai/build/build-pre.sh delete mode 100644 src/hadoop-ai/build/build.sh delete mode 100644 src/hadoop-ai/build/docker-executor.patch delete mode 100644 src/hadoop-ai/build/hadoop-2.9.0-fix.patch delete mode 100644 src/hadoop-ai/build/hadoop-ai delete mode 100644 src/hadoop-ai/build/hadoop-ai-fix.patch delete mode 100644 src/hadoop-ai/build/hadoop-ai-port-conflict.patch delete mode 100755 src/hadoop-run/build/build-pre.sh delete mode 100644 src/hadoop-run/build/component.dep delete mode 100644 src/hadoop-run/build/hadoop-run.yarn.dockerfile delete mode 100644 src/hadoop-run/build/start.sh diff --git a/build/pai_build.py b/build/pai_build.py index 9368bd012..3d554a0b6 100755 --- a/build/pai_build.py +++ b/build/pai_build.py @@ -39,11 +39,11 @@ def load_build_config(config_dir): return configModel def build_service(args,config_model): - pai_build = build_center.BuildCenter(config_model, args.service, args.mode) + pai_build = build_center.BuildCenter(config_model, args.service, 'k8s') pai_build.build_center() def push_image(args,config_model): - pai_push = build_center.BuildCenter(config_model, args.imagelist, args.mode) + pai_push = build_center.BuildCenter(config_model, args.imagelist, 'k8s') pai_push.push_center() def main(): @@ -72,13 +72,6 @@ def main(): nargs='+', help="The service list you want to build" ) - build_parser.add_argument( - '-m', '--mode', - type=bytes, - default='all', - choices=['all', 'yarn', 'k8s'], - help='Choose image type to build. Available Option: all, yarn, k8s' - ) build_parser.set_defaults(func = build_service) # Push commands @@ -95,13 +88,6 @@ def main(): nargs='+', help="The image list you want to push" ) - push_parser.add_argument( - '-m', '--mode', - type=bytes, - default='all', - choices=['all', 'yarn', 'k8s'], - help='Choose image type to push. Available Option: all, yarn, k8s' - ) push_parser.set_defaults(func = push_image) args = parser.parse_args() diff --git a/docs/pai-build/pai-build.md b/docs/pai-build/pai-build.md index a89ab4a6b..331028147 100644 --- a/docs/pai-build/pai-build.md +++ b/docs/pai-build/pai-build.md @@ -27,22 +27,20 @@ Build image by using ```pai_build.py``` which put under ``build/``. for the conf ### Build infrastructure services ``` -./pai_build.py build -c /path/to/configuration-dir/ [ -s component-list ] [-m all|k8s|yarn] +./pai_build.py build -c /path/to/configuration-dir/ [ -s component-list ] ``` - Build the corresponding component. - If the option `-s` is added, only the specified component will be built. By default will build all components under ``src/`` -- Default value of `-m` is `all`, and with it all image will be built. When the value is `k8s`, only the images of k8s-type service will be built, and the same as the value of `yarn`. ### Push infrastructure image(s) ``` -./pai_build.py push -c /path/to/configuration-dir/ [ -i image-list ] [-m all|k8s|yarn] +./pai_build.py push -c /path/to/configuration-dir/ [ -i image-list ] ``` - tag and push image to the docker registry which is configured in the ```cluster-configuration```. - If the option `-i` is added, only the specified image will be pushed. By default will push all images which ``.dockerfile`` can be found under ``{src/${componentName}/build/`` -- Default value of `-m` is `all`, and with it all image will be pushed. When the value is `k8s`, only the images of k8s-type service will be pushed, and the same as the value of `yarn`. # Current pai build process diff --git a/src/hadoop-ai/build/YARN-8896-2.9.0.patch b/src/hadoop-ai/build/YARN-8896-2.9.0.patch deleted file mode 100644 index 4797c1905..000000000 --- a/src/hadoop-ai/build/YARN-8896-2.9.0.patch +++ /dev/null @@ -1,17 +0,0 @@ -diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java -index 3a80856dfa5..11ee8e68d28 100644 ---- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java -+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java -@@ -318,8 +318,11 @@ - public static final String MAX_ASSIGN_PER_HEARTBEAT = PREFIX - + "per-node-heartbeat.maximum-container-assignments"; - -+ /** -+ * Avoid potential risk that greedy assign multiple may involve -+ * */ - @Private -- public static final int DEFAULT_MAX_ASSIGN_PER_HEARTBEAT = -1; -+ public static final int DEFAULT_MAX_ASSIGN_PER_HEARTBEAT = 100; - - AppPriorityACLConfigurationParser priorityACLConfig = new AppPriorityACLConfigurationParser(); - diff --git a/src/hadoop-ai/build/build-pre.sh b/src/hadoop-ai/build/build-pre.sh deleted file mode 100755 index 35387075f..000000000 --- a/src/hadoop-ai/build/build-pre.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash - -# Copyright (c) Microsoft Corporation -# All rights reserved. -# -# MIT License -# -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the "Software"), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -# to permit persons to whom the Software is furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING -# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -pushd $(dirname "$0") > /dev/null - -hadoopBinaryDir="/hadoop-binary" - -# When changing the patch id, please update it. -patchId="12940533-12933562-docker_executor-12944563-fix1-20190819" - -hadoopBinaryPath="${hadoopBinaryDir}/hadoop-2.9.0.tar.gz" -cacheVersion="${hadoopBinaryDir}/${patchId}-done" - - -echo "Hadoop binary path: ${hadoopBinaryDir}" - -[[ -f ${cacheVersion} ]] && [[ -f ${hadoopBinaryPath} ]] && [[ ${cacheVersion} -ot ${hadoopBinaryPath} ]] && -{ - echo "Hadoop ai with patch ${patchId} has been built" - echo "Skip this build precess" - exit 0 -} - -[[ ! -f "${hadoopBinaryPath}" ]] || -{ - - rm -rf ${hadoopBinaryPath} - -} - -rm ${hadoopBinaryDir}/*-done -touch ${cacheVersion} - -docker build -t hadoop-build -f hadoop-ai . - -docker run --rm --name=hadoop-build --volume=${hadoopBinaryDir}:/hadoop-binary hadoop-build - -popd > /dev/null diff --git a/src/hadoop-ai/build/build.sh b/src/hadoop-ai/build/build.sh deleted file mode 100644 index 87cc1ebb9..000000000 --- a/src/hadoop-ai/build/build.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash - -# Copyright (c) Microsoft Corporation -# All rights reserved. -# -# MIT License -# -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the "Software"), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -# to permit persons to whom the Software is furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING -# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -cd / - -wget https://issues.apache.org/jira/secure/attachment/12940533/hadoop-2.9.0.gpu-port.20180920.patch -O hadoop-2.9.0.gpu-port.patch -# patch for webhdfs upload issue when using nginx as a reverse proxy -wget https://issues.apache.org/jira/secure/attachment/12933562/HDFS-13773.patch - -git clone https://github.com/apache/hadoop.git - -cd hadoop - -git checkout branch-2.9.0 - -git apply /hadoop-2.9.0.gpu-port.patch -git apply /HDFS-13773.patch -git apply /docker-executor.patch -# to avoid potential endless loop, refer to https://issues.apache.org/jira/browse/YARN-8513?page=com.atlassian.jira.plugin.system.issuetabpanels%3Aall-tabpanel -git apply /YARN-8896-2.9.0.patch -git apply /hadoop-ai-fix.patch -git apply /hadoop-2.9.0-fix.patch -git apply /hadoop-ai-port-conflict.patch - -mvn package -Pdist,native -DskipTests -Dmaven.javadoc.skip=true -Dtar - -cp /hadoop/hadoop-dist/target/hadoop-2.9.0.tar.gz /hadoop-binary - -echo "Successfully build hadoop 2.9.0 AI" - - - - diff --git a/src/hadoop-ai/build/docker-executor.patch b/src/hadoop-ai/build/docker-executor.patch deleted file mode 100644 index 6996c03f9..000000000 --- a/src/hadoop-ai/build/docker-executor.patch +++ /dev/null @@ -1,123 +0,0 @@ -diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java -index 96f6c57..1b89e90 100644 ---- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java -+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java -@@ -1544,6 +1544,14 @@ public static boolean isAclEnabled(Configuration conf) { - public static final String NM_DOCKER_CONTAINER_EXECUTOR_IMAGE_NAME = - NM_PREFIX + "docker-container-executor.image-name"; - -+ /** The Docker run option(For DockerContainerExecutor).*/ -+ public static final String NM_DOCKER_CONTAINER_EXECUTOR_EXEC_OPTION = -+ NM_PREFIX + "docker-container-executor.exec-option"; -+ -+ /** The command before launch script(For DockerContainerExecutor).*/ -+ public static final String NM_DOCKER_CONTAINER_EXECUTOR_SCRIPT_COMMAND = -+ NM_PREFIX + "docker-container-executor.script-command"; -+ - /** The name of the docker executor (For DockerContainerExecutor).*/ - public static final String NM_DOCKER_CONTAINER_EXECUTOR_EXEC_NAME = - NM_PREFIX + "docker-container-executor.exec-name"; -diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DockerContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DockerContainerExecutor.java -index a044cb6..819c496 100644 ---- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DockerContainerExecutor.java -+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DockerContainerExecutor.java -@@ -98,7 +98,7 @@ - //containername:0.1 or - //containername - public static final String DOCKER_IMAGE_PATTERN = -- "^(([\\w\\.-]+)(:\\d+)*\\/)?[\\w\\.:-]+$"; -+ "^(([a-zA-Z0-9.-]+)(:\\d+)?/)?([a-z0-9_./-]+)(:[\\w.-]+)?$"; - - private final FileContext lfs; - private final Pattern dockerImagePattern; -@@ -127,7 +127,12 @@ public void init() throws IOException { - String dockerExecutor = getConf().get( - YarnConfiguration.NM_DOCKER_CONTAINER_EXECUTOR_EXEC_NAME, - YarnConfiguration.NM_DEFAULT_DOCKER_CONTAINER_EXECUTOR_EXEC_NAME); -- if (!new File(dockerExecutor).exists()) { -+ // /use/bin/docker -H=tcp://0.0.0.0:xx is also a valid docker executor -+ String[] arr = dockerExecutor.split("\\s"); -+ if (LOG.isDebugEnabled()) { -+ LOG.debug("dockerExecutor: " + dockerExecutor); -+ } -+ if (!new File(arr[0]).exists()) { - throw new IllegalStateException( - "Invalid docker exec path: " + dockerExecutor); - } -@@ -181,8 +186,11 @@ public int launchContainer(ContainerStartContext ctx) throws IOException { - - //Variables for the launch environment can be injected from the command-line - //while submitting the application -- String containerImageName = container.getLaunchContext().getEnvironment() -- .get(YarnConfiguration.NM_DOCKER_CONTAINER_EXECUTOR_IMAGE_NAME); -+ //modify get image from configuration rather than env -+ String containerImageName = getConf().get( -+ YarnConfiguration.NM_DOCKER_CONTAINER_EXECUTOR_IMAGE_NAME); -+ -+ // - if (LOG.isDebugEnabled()) { - LOG.debug("containerImageName from launchContext: " + containerImageName); - } -@@ -240,19 +248,27 @@ public int launchContainer(ContainerStartContext ctx) throws IOException { - //--net=host allows the container to take on the host's network stack - //--name sets the Docker Container name to the YARN containerId string - //-v is used to bind mount volumes for local, log and work dirs. -+ //-w sets the work dir inside the container -+ //add docker option -+ String dockerOption = getConf().get( -+ YarnConfiguration.NM_DOCKER_CONTAINER_EXECUTOR_EXEC_OPTION); - String commandStr = commands.append(dockerExecutor) - .append(" ") - .append("run") - .append(" ") -- .append("--rm --net=host") -+ .append("--rm --net=host --pid=host --privileged=true") -+ .append(" ") -+ .append("-w " + containerWorkDir.toUri().getPath().toString()) -+ .append(" ") -+ .append(dockerOption) - .append(" ") - .append(" --name " + containerIdStr) -- .append(localDirMount) -- .append(logDirMount) -- .append(containerWorkDirMount) - .append(" ") - .append(containerImageName) - .toString(); -+ if (LOG.isDebugEnabled()) { -+ LOG.debug("Docker run command: " + commandStr); -+ } - //Get the pid of the process which has been launched as a docker container - //using docker inspect - String dockerPidScript = "`" + dockerExecutor + -@@ -597,13 +613,28 @@ private void writeSessionScript(Path launchDst, Path pidFile) - // We need to do a move as writing to a file is not atomic - // Process reading a file being written to may get garbled data - // hence write pid to tmp file first followed by a mv -+ // Move dockerpid command to backend, avoid blocking docker run command -+ // need to improve it with publisher mode -+ // Ref: https://issues.apache.org/jira/browse/YARN-3080 - pout.println("#!/usr/bin/env bash"); - pout.println(); -+ pout.println("{"); -+ pout.println("n=10"); -+ pout.println("while [ $n -gt 0 ]; do"); -+ pout.println("let n=$n-1"); -+ pout.println("sleep 5"); - pout.println("echo "+ dockerPidScript +" > " + pidFile.toString() - + ".tmp"); -+ pout.println("[ -n \"$(cat \"" + pidFile.toString() -+ + ".tmp\")\" ] && break"); -+ pout.println("done"); - pout.println("/bin/mv -f " + pidFile.toString() + ".tmp " + pidFile); -- pout.println(dockerCommand + " bash \"" + -- launchDst.toUri().getPath().toString() + "\""); -+ pout.println("} &"); -+ //Add exec command before launch_script. -+ String scriptCommand = getConf().get( -+ YarnConfiguration.NM_DOCKER_CONTAINER_EXECUTOR_SCRIPT_COMMAND); -+ pout.println(dockerCommand + " bash -c '" + scriptCommand + " && bash \"" + -+ launchDst.toUri().getPath().toString() + "\"'"); - } finally { - IOUtils.cleanupWithLogger(LOG, pout, out); - } diff --git a/src/hadoop-ai/build/hadoop-2.9.0-fix.patch b/src/hadoop-ai/build/hadoop-2.9.0-fix.patch deleted file mode 100644 index e5ed19950..000000000 --- a/src/hadoop-ai/build/hadoop-2.9.0-fix.patch +++ /dev/null @@ -1,38 +0,0 @@ -diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java -index 0cf6b55..164fcf6 100644 ---- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java -+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java -@@ -1525,9 +1525,15 @@ public void transition(ContainerImpl container, ContainerEvent event) { - static class ExitedWithFailureTransition extends ContainerTransition { - - boolean clCleanupRequired; -+ boolean diagnosticsRequired; - - public ExitedWithFailureTransition(boolean clCleanupRequired) { -+ this(clCleanupRequired, true); -+ } -+ -+ public ExitedWithFailureTransition(boolean clCleanupRequired, boolean diagnosticsRequired) { - this.clCleanupRequired = clCleanupRequired; -+ this.diagnosticsRequired = diagnosticsRequired; - } - - @Override -@@ -1535,7 +1541,7 @@ public void transition(ContainerImpl container, ContainerEvent event) { - container.setIsReInitializing(false); - ContainerExitEvent exitEvent = (ContainerExitEvent) event; - container.exitCode = exitEvent.getExitCode(); -- if (exitEvent.getDiagnosticInfo() != null) { -+ if (diagnosticsRequired && exitEvent.getDiagnosticInfo() != null) { - container.addDiagnostics(exitEvent.getDiagnosticInfo(), "\n"); - } - -@@ -1608,7 +1614,7 @@ public ContainerState transition(final ContainerImpl container, - new KilledForReInitializationTransition().transition(container, event); - return ContainerState.SCHEDULED; - } else { -- new ExitedWithFailureTransition(true).transition(container, event); -+ new ExitedWithFailureTransition(true, false).transition(container, event); - return ContainerState.EXITED_WITH_FAILURE; - } - } diff --git a/src/hadoop-ai/build/hadoop-ai b/src/hadoop-ai/build/hadoop-ai deleted file mode 100644 index ddb618d2b..000000000 --- a/src/hadoop-ai/build/hadoop-ai +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) Microsoft Corporation -# All rights reserved. -# -# MIT License -# -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the "Software"), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -# to permit persons to whom the Software is furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING -# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -FROM ubuntu:16.04 - -RUN apt-get -y update && \ - apt-get -y install \ - nano \ - vim \ - joe \ - wget \ - curl \ - jq \ - gawk \ - psmisc \ - python \ - python-yaml \ - python-jinja2 \ - python-urllib3 \ - python-tz \ - python-nose \ - python-prettytable \ - python-netifaces \ - python-dev \ - python-pip \ - python-mysqldb \ - openjdk-8-jre \ - openjdk-8-jdk \ - openssh-server \ - openssh-client \ - git \ - inotify-tools \ - rsync \ - maven \ - cmake \ - findbugs \ - zlib1g-dev \ - pkg-config \ - libssl-dev \ - autoconf \ - automake \ - libtool \ - build-essential - - -ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 - -RUN wget https://github.com/google/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.gz && \ - tar xzvf protobuf-2.5.0.tar.gz && \ - cd protobuf-2.5.0 && \ - ./configure && \ - make && \ - make check && \ - make install && \ - ldconfig && \ - protoc --version - - -## The build environment of hadoop has been prepared above. -## Copy your build script here. Default script will build our hadoop-ai. - -COPY *.patch / - -COPY build.sh / - -RUN chmod u+x build.sh - -CMD ["/build.sh"] diff --git a/src/hadoop-ai/build/hadoop-ai-fix.patch b/src/hadoop-ai/build/hadoop-ai-fix.patch deleted file mode 100644 index 0d07df078..000000000 --- a/src/hadoop-ai/build/hadoop-ai-fix.patch +++ /dev/null @@ -1,66 +0,0 @@ -diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoLinux.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoLinux.java -index 8801b4a940f..30d33086516 100644 ---- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoLinux.java -+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoLinux.java -@@ -138,7 +138,7 @@ - | 1 Tesla K80 Off | 000083D4:00:00.0 Off | 1 | - | N/A 32C P8 28W / 149W | 11MiB / 11439MiB | 0% Default | - +-------------------------------+----------------------+----------------------+ -- | 2 Tesla K80 Off | 00009D9C:00:00.0 Off | 0 | -+ | 2 Tesla K80 Off | 00009D9C:00:00.0 Off | 2 | - | N/A 29C P8 25W / 149W | 12MiB / 11439MiB | 0% Default | - +-------------------------------+----------------------+----------------------+ - | 3 Tesla K80 Off | 0000B6D4:00:00.0 Off | N/A | -@@ -169,7 +169,7 @@ - +-----------------------------------------------------------------------------+ - */ - Pattern GPU_INFO_FORMAT = -- Pattern.compile("\\s+([0-9]{1,2})\\s+[\\s\\S]*\\s+(0|1|N/A|Off)\\s+"); -+ Pattern.compile("[|]\\s+([0-9]{1,2})[^|]*[|][^|]*[|]\\s+(\\d+|N/A|Off)\\s+[|]"); - Pattern GPU_MEM_FORMAT = - Pattern.compile("([0-9]+)MiB\\s*/\\s*([0-9]+)MiB"); - -@@ -820,11 +820,16 @@ private void refreshGpuIfNeeded(boolean excludeOwnerlessUsingGpus, int gpuNotRea - long index = Long.parseLong(mat.group(1)); - currentIndex = index; - -- String errCode = mat.group(2); -- if (!errCode.equals("1")) { -+ int errCode; -+ try { -+ errCode = Integer.parseInt(mat.group(2)); -+ } catch (NumberFormatException e) { -+ errCode = 0; -+ } -+ if (errCode == 0) { - gpuAttributeCapacity |= (1L << index); - } else { -- LOG.error("ignored error: gpu " + index + " ECC code is 1, will make this gpu unavailable"); -+ LOG.error("GPU error: gpu " + index + " ECC code is " + mat.group(2) + ", will make this gpu unavailable"); - } - } - } -diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestSysInfoLinux.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestSysInfoLinux.java -index 52cc3f8f160..71f9c95cdbc 100644 ---- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestSysInfoLinux.java -+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestSysInfoLinux.java -@@ -269,7 +269,7 @@ int readDiskBlockInformation(String diskName, int defSector) { - "| 1 Tesla K80 Off | 000083D4:00:00.0 Off | 1 |\n" + - "| N/A 32C P8 28W / 149W | 11MiB / 11439MiB | 0% Default |\n" + - "+-------------------------------+----------------------+----------------------+\n" + -- "| 2 Tesla K80 Off | 00009D9C:00:00.0 Off | 0 |\n" + -+ "| 2 Tesla K80 Off | 00009D9C:00:00.0 Off | 2 |\n" + - "| N/A 29C P8 25W / 149W | 12MiB / 11439MiB | 0% Default |\n" + - "+-------------------------------+----------------------+----------------------+\n" + - "| 3 Tesla K80 Off | 0000B6D4:00:00.0 Off | N/A |\n" + -@@ -605,8 +605,8 @@ private void InitialGPUTestFile() throws IOException { - public void parsingGPUFile() throws Exception { - - InitialGPUTestFile(); -- assertEquals(7, plugin.getNumGPUs(false, 0)); -- assertEquals(253, plugin.getGpuAttributeCapacity(false, 0)); -+ assertEquals(6, plugin.getNumGPUs(false, 0)); -+ assertEquals(249, plugin.getGpuAttributeCapacity(false, 0)); - } - - diff --git a/src/hadoop-ai/build/hadoop-ai-port-conflict.patch b/src/hadoop-ai/build/hadoop-ai-port-conflict.patch deleted file mode 100644 index 317a9d5b9..000000000 --- a/src/hadoop-ai/build/hadoop-ai-port-conflict.patch +++ /dev/null @@ -1,32 +0,0 @@ -diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java -index 60443f4..fa30cca 100644 ---- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java -+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java -@@ -174,6 +174,7 @@ - import org.apache.hadoop.yarn.util.Clock; - import org.apache.hadoop.yarn.util.Records; - import org.apache.hadoop.yarn.util.UTCClock; -+import org.apache.hadoop.yarn.util.resource.Resources; - import org.apache.hadoop.yarn.util.timeline.TimelineUtils; - - import com.google.common.annotations.VisibleForTesting; -@@ -1028,12 +1029,17 @@ private NodeReport createNodeReports(RMNode rmNode) { - if (schedulerNodeReport != null) { - used = schedulerNodeReport.getUsedResource(); - numContainers = schedulerNodeReport.getNumContainers(); -- } -+ } -+ -+ Resource total = Resources.clone(rmNode.getTotalCapability()); -+ if (total.getPorts() != null) { -+ total.setPorts(total.getPorts().minusSelf(rmNode.getLocalUsedPortsSnapshot())); -+ } - - NodeReport report = - BuilderUtils.newNodeReport(rmNode.getNodeID(), rmNode.getState(), - rmNode.getHttpAddress(), rmNode.getRackName(), used, -- rmNode.getTotalCapability(), numContainers, -+ total, numContainers, - rmNode.getHealthReport(), rmNode.getLastHealthReportTime(), - rmNode.getNodeLabels(), rmNode.getAggregatedContainersUtilization(), - rmNode.getNodeUtilization()); diff --git a/src/hadoop-run/build/build-pre.sh b/src/hadoop-run/build/build-pre.sh deleted file mode 100755 index a95443e63..000000000 --- a/src/hadoop-run/build/build-pre.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -# Copyright (c) Microsoft Corporation -# All rights reserved. -# -# MIT License -# -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the "Software"), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -# to permit persons to whom the Software is furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING -# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -pushd $(dirname "$0") > /dev/null - -hadoopBinaryPath="/hadoop-binary/" - -hadoopDestDir="../dependency/" - -if [[ ! -d ${hadoopDestDir} ]]; then - mkdir ${hadoopDestDir} -fi - -cp -arf ${hadoopBinaryPath} ${hadoopDestDir} - -popd > /dev/null \ No newline at end of file diff --git a/src/hadoop-run/build/component.dep b/src/hadoop-run/build/component.dep deleted file mode 100644 index 1b7d621f3..000000000 --- a/src/hadoop-run/build/component.dep +++ /dev/null @@ -1 +0,0 @@ -hadoop-ai \ No newline at end of file diff --git a/src/hadoop-run/build/hadoop-run.yarn.dockerfile b/src/hadoop-run/build/hadoop-run.yarn.dockerfile deleted file mode 100644 index 8776b75f8..000000000 --- a/src/hadoop-run/build/hadoop-run.yarn.dockerfile +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright (c) Microsoft Corporation -# All rights reserved. -# -# MIT License -# -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the "Software"), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -# to permit persons to whom the Software is furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING -# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -FROM base-image - -ENV HADOOP_VERSION=hadoop-2.9.0 - -RUN apt-get -y install zookeeper libsnappy-dev -RUN rm -rf /var/lib/apt/lists/* - -COPY dependency/hadoop-binary/hadoop-2.9.0.tar.gz /usr/local/ - -RUN tar -xzf /usr/local/$HADOOP_VERSION.tar.gz -C /usr/local/ && \ - cd /usr/local && \ - ln -s ./$HADOOP_VERSION hadoop - -ENV HADOOP_PREFIX=/usr/local/hadoop \ - HADOOP_BIN_DIR=/usr/local/hadoop/bin \ - HADOOP_SBIN_DIR=/usr/local/hadoop/sbin \ - HADOOP_COMMON_HOME=/usr/local/hadoop \ - HADOOP_HDFS_HOME=/usr/local/hadoop \ - HADOOP_MAPRED_HOME=/usr/local/hadoop \ - HADOOP_YARN_HOME=/usr/local/hadoop \ - HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop \ - HADOOP_ROOT_LOGGER=INFO,console \ - HADOOP_SECURITY_LOGGER=INFO,console - -ENV YARN_CONF_DIR=$HADOOP_PREFIX/etc/hadoop - -ENV PATH=$PATH:$HADOOP_BIN_DIR:$HADOOP_SBIN_DIR:/usr/share/zookeeper/bin - -RUN chown -R root:root /var -RUN mkdir -p $HADOOP_YARN_HOME/logs - -RUN mkdir -p /var/lib/hdfs/name -RUN mkdir -p /var/lib/hdfs/data - -COPY build/start.sh /usr/local/start.sh -RUN chmod a+x /usr/local/start.sh - - -# Only node manager need this.# -#COPY docker-17.06.2-ce.tgz /usr/local -RUN wget https://download.docker.com/linux/static/stable/x86_64/docker-17.06.2-ce.tgz -RUN cp docker-17.06.2-ce.tgz /usr/local -RUN tar xzvf /usr/local/docker-17.06.2-ce.tgz -# Only node manager need this.# - -CMD ["/usr/local/start.sh"] diff --git a/src/hadoop-run/build/start.sh b/src/hadoop-run/build/start.sh deleted file mode 100644 index dbe5a47a3..000000000 --- a/src/hadoop-run/build/start.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -# Copyright (c) Microsoft Corporation -# All rights reserved. -# -# MIT License -# -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the "Software"), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -# to permit persons to whom the Software is furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING -# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -#get the config generating script from kubenretes configmap -cp /hadoop-configuration/${GENERATE_CONFIG} generate_config.sh -chmod u+x generate_config.sh - -./generate_config.sh - -#get the start service script from kuberentes configmap -cp /hadoop-configuration/${START_SERVICE} start_service.sh -chmod u+x start_service.sh - -# This status check is mainly for ensuring the status of image pulling. -# And usually this process costs most of the time when creating a new pod in kubernetes. -mkdir -p /jobstatus -touch /jobstatus/jobok - -./start_service.sh