зеркало из https://github.com/microsoft/pai.git
[build] Disable yarn option in pai_build.py. (#4443)
* Disable Yarn build option and remove hadoop binary building source code * Update doc
This commit is contained in:
Родитель
db46dc0e2e
Коммит
54453e4f45
|
@ -39,11 +39,11 @@ def load_build_config(config_dir):
|
|||
return configModel
|
||||
|
||||
def build_service(args,config_model):
|
||||
pai_build = build_center.BuildCenter(config_model, args.service, args.mode)
|
||||
pai_build = build_center.BuildCenter(config_model, args.service, 'k8s')
|
||||
pai_build.build_center()
|
||||
|
||||
def push_image(args,config_model):
|
||||
pai_push = build_center.BuildCenter(config_model, args.imagelist, args.mode)
|
||||
pai_push = build_center.BuildCenter(config_model, args.imagelist, 'k8s')
|
||||
pai_push.push_center()
|
||||
|
||||
def main():
|
||||
|
@ -72,13 +72,6 @@ def main():
|
|||
nargs='+',
|
||||
help="The service list you want to build"
|
||||
)
|
||||
build_parser.add_argument(
|
||||
'-m', '--mode',
|
||||
type=bytes,
|
||||
default='all',
|
||||
choices=['all', 'yarn', 'k8s'],
|
||||
help='Choose image type to build. Available Option: all, yarn, k8s'
|
||||
)
|
||||
build_parser.set_defaults(func = build_service)
|
||||
|
||||
# Push commands
|
||||
|
@ -95,13 +88,6 @@ def main():
|
|||
nargs='+',
|
||||
help="The image list you want to push"
|
||||
)
|
||||
push_parser.add_argument(
|
||||
'-m', '--mode',
|
||||
type=bytes,
|
||||
default='all',
|
||||
choices=['all', 'yarn', 'k8s'],
|
||||
help='Choose image type to push. Available Option: all, yarn, k8s'
|
||||
)
|
||||
push_parser.set_defaults(func = push_image)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
|
|
@ -27,22 +27,20 @@ Build image by using ```pai_build.py``` which put under ``build/``. for the conf
|
|||
### Build infrastructure services <a name="Service_Build"></a>
|
||||
|
||||
```
|
||||
./pai_build.py build -c /path/to/configuration-dir/ [ -s component-list ] [-m all|k8s|yarn]
|
||||
./pai_build.py build -c /path/to/configuration-dir/ [ -s component-list ]
|
||||
```
|
||||
|
||||
- Build the corresponding component.
|
||||
- If the option `-s` is added, only the specified component will be built. By default will build all components under ``src/``
|
||||
- Default value of `-m` is `all`, and with it all image will be built. When the value is `k8s`, only the images of k8s-type service will be built, and the same as the value of `yarn`.
|
||||
|
||||
### Push infrastructure image(s) <a name="Image_Push"></a>
|
||||
|
||||
```
|
||||
./pai_build.py push -c /path/to/configuration-dir/ [ -i image-list ] [-m all|k8s|yarn]
|
||||
./pai_build.py push -c /path/to/configuration-dir/ [ -i image-list ]
|
||||
```
|
||||
|
||||
- tag and push image to the docker registry which is configured in the ```cluster-configuration```.
|
||||
- If the option `-i` is added, only the specified image will be pushed. By default will push all images which ``.dockerfile`` can be found under ``{src/${componentName}/build/``
|
||||
- Default value of `-m` is `all`, and with it all image will be pushed. When the value is `k8s`, only the images of k8s-type service will be pushed, and the same as the value of `yarn`.
|
||||
|
||||
# Current pai build process
|
||||
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
|
||||
index 3a80856dfa5..11ee8e68d28 100644
|
||||
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
|
||||
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
|
||||
@@ -318,8 +318,11 @@
|
||||
public static final String MAX_ASSIGN_PER_HEARTBEAT = PREFIX
|
||||
+ "per-node-heartbeat.maximum-container-assignments";
|
||||
|
||||
+ /**
|
||||
+ * Avoid potential risk that greedy assign multiple may involve
|
||||
+ * */
|
||||
@Private
|
||||
- public static final int DEFAULT_MAX_ASSIGN_PER_HEARTBEAT = -1;
|
||||
+ public static final int DEFAULT_MAX_ASSIGN_PER_HEARTBEAT = 100;
|
||||
|
||||
AppPriorityACLConfigurationParser priorityACLConfig = new AppPriorityACLConfigurationParser();
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright (c) Microsoft Corporation
|
||||
# All rights reserved.
|
||||
#
|
||||
# MIT License
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
||||
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
||||
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
|
||||
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
pushd $(dirname "$0") > /dev/null
|
||||
|
||||
hadoopBinaryDir="/hadoop-binary"
|
||||
|
||||
# When changing the patch id, please update it.
|
||||
patchId="12940533-12933562-docker_executor-12944563-fix1-20190819"
|
||||
|
||||
hadoopBinaryPath="${hadoopBinaryDir}/hadoop-2.9.0.tar.gz"
|
||||
cacheVersion="${hadoopBinaryDir}/${patchId}-done"
|
||||
|
||||
|
||||
echo "Hadoop binary path: ${hadoopBinaryDir}"
|
||||
|
||||
[[ -f ${cacheVersion} ]] && [[ -f ${hadoopBinaryPath} ]] && [[ ${cacheVersion} -ot ${hadoopBinaryPath} ]] &&
|
||||
{
|
||||
echo "Hadoop ai with patch ${patchId} has been built"
|
||||
echo "Skip this build precess"
|
||||
exit 0
|
||||
}
|
||||
|
||||
[[ ! -f "${hadoopBinaryPath}" ]] ||
|
||||
{
|
||||
|
||||
rm -rf ${hadoopBinaryPath}
|
||||
|
||||
}
|
||||
|
||||
rm ${hadoopBinaryDir}/*-done
|
||||
touch ${cacheVersion}
|
||||
|
||||
docker build -t hadoop-build -f hadoop-ai .
|
||||
|
||||
docker run --rm --name=hadoop-build --volume=${hadoopBinaryDir}:/hadoop-binary hadoop-build
|
||||
|
||||
popd > /dev/null
|
|
@ -1,49 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright (c) Microsoft Corporation
|
||||
# All rights reserved.
|
||||
#
|
||||
# MIT License
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
||||
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
||||
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
|
||||
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
cd /
|
||||
|
||||
wget https://issues.apache.org/jira/secure/attachment/12940533/hadoop-2.9.0.gpu-port.20180920.patch -O hadoop-2.9.0.gpu-port.patch
|
||||
# patch for webhdfs upload issue when using nginx as a reverse proxy
|
||||
wget https://issues.apache.org/jira/secure/attachment/12933562/HDFS-13773.patch
|
||||
|
||||
git clone https://github.com/apache/hadoop.git
|
||||
|
||||
cd hadoop
|
||||
|
||||
git checkout branch-2.9.0
|
||||
|
||||
git apply /hadoop-2.9.0.gpu-port.patch
|
||||
git apply /HDFS-13773.patch
|
||||
git apply /docker-executor.patch
|
||||
# to avoid potential endless loop, refer to https://issues.apache.org/jira/browse/YARN-8513?page=com.atlassian.jira.plugin.system.issuetabpanels%3Aall-tabpanel
|
||||
git apply /YARN-8896-2.9.0.patch
|
||||
git apply /hadoop-ai-fix.patch
|
||||
git apply /hadoop-2.9.0-fix.patch
|
||||
git apply /hadoop-ai-port-conflict.patch
|
||||
|
||||
mvn package -Pdist,native -DskipTests -Dmaven.javadoc.skip=true -Dtar
|
||||
|
||||
cp /hadoop/hadoop-dist/target/hadoop-2.9.0.tar.gz /hadoop-binary
|
||||
|
||||
echo "Successfully build hadoop 2.9.0 AI"
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,123 +0,0 @@
|
|||
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
|
||||
index 96f6c57..1b89e90 100644
|
||||
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
|
||||
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
|
||||
@@ -1544,6 +1544,14 @@ public static boolean isAclEnabled(Configuration conf) {
|
||||
public static final String NM_DOCKER_CONTAINER_EXECUTOR_IMAGE_NAME =
|
||||
NM_PREFIX + "docker-container-executor.image-name";
|
||||
|
||||
+ /** The Docker run option(For DockerContainerExecutor).*/
|
||||
+ public static final String NM_DOCKER_CONTAINER_EXECUTOR_EXEC_OPTION =
|
||||
+ NM_PREFIX + "docker-container-executor.exec-option";
|
||||
+
|
||||
+ /** The command before launch script(For DockerContainerExecutor).*/
|
||||
+ public static final String NM_DOCKER_CONTAINER_EXECUTOR_SCRIPT_COMMAND =
|
||||
+ NM_PREFIX + "docker-container-executor.script-command";
|
||||
+
|
||||
/** The name of the docker executor (For DockerContainerExecutor).*/
|
||||
public static final String NM_DOCKER_CONTAINER_EXECUTOR_EXEC_NAME =
|
||||
NM_PREFIX + "docker-container-executor.exec-name";
|
||||
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DockerContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DockerContainerExecutor.java
|
||||
index a044cb6..819c496 100644
|
||||
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DockerContainerExecutor.java
|
||||
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DockerContainerExecutor.java
|
||||
@@ -98,7 +98,7 @@
|
||||
//containername:0.1 or
|
||||
//containername
|
||||
public static final String DOCKER_IMAGE_PATTERN =
|
||||
- "^(([\\w\\.-]+)(:\\d+)*\\/)?[\\w\\.:-]+$";
|
||||
+ "^(([a-zA-Z0-9.-]+)(:\\d+)?/)?([a-z0-9_./-]+)(:[\\w.-]+)?$";
|
||||
|
||||
private final FileContext lfs;
|
||||
private final Pattern dockerImagePattern;
|
||||
@@ -127,7 +127,12 @@ public void init() throws IOException {
|
||||
String dockerExecutor = getConf().get(
|
||||
YarnConfiguration.NM_DOCKER_CONTAINER_EXECUTOR_EXEC_NAME,
|
||||
YarnConfiguration.NM_DEFAULT_DOCKER_CONTAINER_EXECUTOR_EXEC_NAME);
|
||||
- if (!new File(dockerExecutor).exists()) {
|
||||
+ // /use/bin/docker -H=tcp://0.0.0.0:xx is also a valid docker executor
|
||||
+ String[] arr = dockerExecutor.split("\\s");
|
||||
+ if (LOG.isDebugEnabled()) {
|
||||
+ LOG.debug("dockerExecutor: " + dockerExecutor);
|
||||
+ }
|
||||
+ if (!new File(arr[0]).exists()) {
|
||||
throw new IllegalStateException(
|
||||
"Invalid docker exec path: " + dockerExecutor);
|
||||
}
|
||||
@@ -181,8 +186,11 @@ public int launchContainer(ContainerStartContext ctx) throws IOException {
|
||||
|
||||
//Variables for the launch environment can be injected from the command-line
|
||||
//while submitting the application
|
||||
- String containerImageName = container.getLaunchContext().getEnvironment()
|
||||
- .get(YarnConfiguration.NM_DOCKER_CONTAINER_EXECUTOR_IMAGE_NAME);
|
||||
+ //modify get image from configuration rather than env
|
||||
+ String containerImageName = getConf().get(
|
||||
+ YarnConfiguration.NM_DOCKER_CONTAINER_EXECUTOR_IMAGE_NAME);
|
||||
+
|
||||
+ //
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("containerImageName from launchContext: " + containerImageName);
|
||||
}
|
||||
@@ -240,19 +248,27 @@ public int launchContainer(ContainerStartContext ctx) throws IOException {
|
||||
//--net=host allows the container to take on the host's network stack
|
||||
//--name sets the Docker Container name to the YARN containerId string
|
||||
//-v is used to bind mount volumes for local, log and work dirs.
|
||||
+ //-w sets the work dir inside the container
|
||||
+ //add docker option
|
||||
+ String dockerOption = getConf().get(
|
||||
+ YarnConfiguration.NM_DOCKER_CONTAINER_EXECUTOR_EXEC_OPTION);
|
||||
String commandStr = commands.append(dockerExecutor)
|
||||
.append(" ")
|
||||
.append("run")
|
||||
.append(" ")
|
||||
- .append("--rm --net=host")
|
||||
+ .append("--rm --net=host --pid=host --privileged=true")
|
||||
+ .append(" ")
|
||||
+ .append("-w " + containerWorkDir.toUri().getPath().toString())
|
||||
+ .append(" ")
|
||||
+ .append(dockerOption)
|
||||
.append(" ")
|
||||
.append(" --name " + containerIdStr)
|
||||
- .append(localDirMount)
|
||||
- .append(logDirMount)
|
||||
- .append(containerWorkDirMount)
|
||||
.append(" ")
|
||||
.append(containerImageName)
|
||||
.toString();
|
||||
+ if (LOG.isDebugEnabled()) {
|
||||
+ LOG.debug("Docker run command: " + commandStr);
|
||||
+ }
|
||||
//Get the pid of the process which has been launched as a docker container
|
||||
//using docker inspect
|
||||
String dockerPidScript = "`" + dockerExecutor +
|
||||
@@ -597,13 +613,28 @@ private void writeSessionScript(Path launchDst, Path pidFile)
|
||||
// We need to do a move as writing to a file is not atomic
|
||||
// Process reading a file being written to may get garbled data
|
||||
// hence write pid to tmp file first followed by a mv
|
||||
+ // Move dockerpid command to backend, avoid blocking docker run command
|
||||
+ // need to improve it with publisher mode
|
||||
+ // Ref: https://issues.apache.org/jira/browse/YARN-3080
|
||||
pout.println("#!/usr/bin/env bash");
|
||||
pout.println();
|
||||
+ pout.println("{");
|
||||
+ pout.println("n=10");
|
||||
+ pout.println("while [ $n -gt 0 ]; do");
|
||||
+ pout.println("let n=$n-1");
|
||||
+ pout.println("sleep 5");
|
||||
pout.println("echo "+ dockerPidScript +" > " + pidFile.toString()
|
||||
+ ".tmp");
|
||||
+ pout.println("[ -n \"$(cat \"" + pidFile.toString()
|
||||
+ + ".tmp\")\" ] && break");
|
||||
+ pout.println("done");
|
||||
pout.println("/bin/mv -f " + pidFile.toString() + ".tmp " + pidFile);
|
||||
- pout.println(dockerCommand + " bash \"" +
|
||||
- launchDst.toUri().getPath().toString() + "\"");
|
||||
+ pout.println("} &");
|
||||
+ //Add exec command before launch_script.
|
||||
+ String scriptCommand = getConf().get(
|
||||
+ YarnConfiguration.NM_DOCKER_CONTAINER_EXECUTOR_SCRIPT_COMMAND);
|
||||
+ pout.println(dockerCommand + " bash -c '" + scriptCommand + " && bash \"" +
|
||||
+ launchDst.toUri().getPath().toString() + "\"'");
|
||||
} finally {
|
||||
IOUtils.cleanupWithLogger(LOG, pout, out);
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
|
||||
index 0cf6b55..164fcf6 100644
|
||||
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
|
||||
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
|
||||
@@ -1525,9 +1525,15 @@ public void transition(ContainerImpl container, ContainerEvent event) {
|
||||
static class ExitedWithFailureTransition extends ContainerTransition {
|
||||
|
||||
boolean clCleanupRequired;
|
||||
+ boolean diagnosticsRequired;
|
||||
|
||||
public ExitedWithFailureTransition(boolean clCleanupRequired) {
|
||||
+ this(clCleanupRequired, true);
|
||||
+ }
|
||||
+
|
||||
+ public ExitedWithFailureTransition(boolean clCleanupRequired, boolean diagnosticsRequired) {
|
||||
this.clCleanupRequired = clCleanupRequired;
|
||||
+ this.diagnosticsRequired = diagnosticsRequired;
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -1535,7 +1541,7 @@ public void transition(ContainerImpl container, ContainerEvent event) {
|
||||
container.setIsReInitializing(false);
|
||||
ContainerExitEvent exitEvent = (ContainerExitEvent) event;
|
||||
container.exitCode = exitEvent.getExitCode();
|
||||
- if (exitEvent.getDiagnosticInfo() != null) {
|
||||
+ if (diagnosticsRequired && exitEvent.getDiagnosticInfo() != null) {
|
||||
container.addDiagnostics(exitEvent.getDiagnosticInfo(), "\n");
|
||||
}
|
||||
|
||||
@@ -1608,7 +1614,7 @@ public ContainerState transition(final ContainerImpl container,
|
||||
new KilledForReInitializationTransition().transition(container, event);
|
||||
return ContainerState.SCHEDULED;
|
||||
} else {
|
||||
- new ExitedWithFailureTransition(true).transition(container, event);
|
||||
+ new ExitedWithFailureTransition(true, false).transition(container, event);
|
||||
return ContainerState.EXITED_WITH_FAILURE;
|
||||
}
|
||||
}
|
|
@ -1,82 +0,0 @@
|
|||
# Copyright (c) Microsoft Corporation
|
||||
# All rights reserved.
|
||||
#
|
||||
# MIT License
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
||||
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
||||
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
|
||||
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
FROM ubuntu:16.04
|
||||
|
||||
RUN apt-get -y update && \
|
||||
apt-get -y install \
|
||||
nano \
|
||||
vim \
|
||||
joe \
|
||||
wget \
|
||||
curl \
|
||||
jq \
|
||||
gawk \
|
||||
psmisc \
|
||||
python \
|
||||
python-yaml \
|
||||
python-jinja2 \
|
||||
python-urllib3 \
|
||||
python-tz \
|
||||
python-nose \
|
||||
python-prettytable \
|
||||
python-netifaces \
|
||||
python-dev \
|
||||
python-pip \
|
||||
python-mysqldb \
|
||||
openjdk-8-jre \
|
||||
openjdk-8-jdk \
|
||||
openssh-server \
|
||||
openssh-client \
|
||||
git \
|
||||
inotify-tools \
|
||||
rsync \
|
||||
maven \
|
||||
cmake \
|
||||
findbugs \
|
||||
zlib1g-dev \
|
||||
pkg-config \
|
||||
libssl-dev \
|
||||
autoconf \
|
||||
automake \
|
||||
libtool \
|
||||
build-essential
|
||||
|
||||
|
||||
ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
|
||||
|
||||
RUN wget https://github.com/google/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.gz && \
|
||||
tar xzvf protobuf-2.5.0.tar.gz && \
|
||||
cd protobuf-2.5.0 && \
|
||||
./configure && \
|
||||
make && \
|
||||
make check && \
|
||||
make install && \
|
||||
ldconfig && \
|
||||
protoc --version
|
||||
|
||||
|
||||
## The build environment of hadoop has been prepared above.
|
||||
## Copy your build script here. Default script will build our hadoop-ai.
|
||||
|
||||
COPY *.patch /
|
||||
|
||||
COPY build.sh /
|
||||
|
||||
RUN chmod u+x build.sh
|
||||
|
||||
CMD ["/build.sh"]
|
|
@ -1,66 +0,0 @@
|
|||
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoLinux.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoLinux.java
|
||||
index 8801b4a940f..30d33086516 100644
|
||||
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoLinux.java
|
||||
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoLinux.java
|
||||
@@ -138,7 +138,7 @@
|
||||
| 1 Tesla K80 Off | 000083D4:00:00.0 Off | 1 |
|
||||
| N/A 32C P8 28W / 149W | 11MiB / 11439MiB | 0% Default |
|
||||
+-------------------------------+----------------------+----------------------+
|
||||
- | 2 Tesla K80 Off | 00009D9C:00:00.0 Off | 0 |
|
||||
+ | 2 Tesla K80 Off | 00009D9C:00:00.0 Off | 2 |
|
||||
| N/A 29C P8 25W / 149W | 12MiB / 11439MiB | 0% Default |
|
||||
+-------------------------------+----------------------+----------------------+
|
||||
| 3 Tesla K80 Off | 0000B6D4:00:00.0 Off | N/A |
|
||||
@@ -169,7 +169,7 @@
|
||||
+-----------------------------------------------------------------------------+
|
||||
*/
|
||||
Pattern GPU_INFO_FORMAT =
|
||||
- Pattern.compile("\\s+([0-9]{1,2})\\s+[\\s\\S]*\\s+(0|1|N/A|Off)\\s+");
|
||||
+ Pattern.compile("[|]\\s+([0-9]{1,2})[^|]*[|][^|]*[|]\\s+(\\d+|N/A|Off)\\s+[|]");
|
||||
Pattern GPU_MEM_FORMAT =
|
||||
Pattern.compile("([0-9]+)MiB\\s*/\\s*([0-9]+)MiB");
|
||||
|
||||
@@ -820,11 +820,16 @@ private void refreshGpuIfNeeded(boolean excludeOwnerlessUsingGpus, int gpuNotRea
|
||||
long index = Long.parseLong(mat.group(1));
|
||||
currentIndex = index;
|
||||
|
||||
- String errCode = mat.group(2);
|
||||
- if (!errCode.equals("1")) {
|
||||
+ int errCode;
|
||||
+ try {
|
||||
+ errCode = Integer.parseInt(mat.group(2));
|
||||
+ } catch (NumberFormatException e) {
|
||||
+ errCode = 0;
|
||||
+ }
|
||||
+ if (errCode == 0) {
|
||||
gpuAttributeCapacity |= (1L << index);
|
||||
} else {
|
||||
- LOG.error("ignored error: gpu " + index + " ECC code is 1, will make this gpu unavailable");
|
||||
+ LOG.error("GPU error: gpu " + index + " ECC code is " + mat.group(2) + ", will make this gpu unavailable");
|
||||
}
|
||||
}
|
||||
}
|
||||
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestSysInfoLinux.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestSysInfoLinux.java
|
||||
index 52cc3f8f160..71f9c95cdbc 100644
|
||||
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestSysInfoLinux.java
|
||||
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestSysInfoLinux.java
|
||||
@@ -269,7 +269,7 @@ int readDiskBlockInformation(String diskName, int defSector) {
|
||||
"| 1 Tesla K80 Off | 000083D4:00:00.0 Off | 1 |\n" +
|
||||
"| N/A 32C P8 28W / 149W | 11MiB / 11439MiB | 0% Default |\n" +
|
||||
"+-------------------------------+----------------------+----------------------+\n" +
|
||||
- "| 2 Tesla K80 Off | 00009D9C:00:00.0 Off | 0 |\n" +
|
||||
+ "| 2 Tesla K80 Off | 00009D9C:00:00.0 Off | 2 |\n" +
|
||||
"| N/A 29C P8 25W / 149W | 12MiB / 11439MiB | 0% Default |\n" +
|
||||
"+-------------------------------+----------------------+----------------------+\n" +
|
||||
"| 3 Tesla K80 Off | 0000B6D4:00:00.0 Off | N/A |\n" +
|
||||
@@ -605,8 +605,8 @@ private void InitialGPUTestFile() throws IOException {
|
||||
public void parsingGPUFile() throws Exception {
|
||||
|
||||
InitialGPUTestFile();
|
||||
- assertEquals(7, plugin.getNumGPUs(false, 0));
|
||||
- assertEquals(253, plugin.getGpuAttributeCapacity(false, 0));
|
||||
+ assertEquals(6, plugin.getNumGPUs(false, 0));
|
||||
+ assertEquals(249, plugin.getGpuAttributeCapacity(false, 0));
|
||||
}
|
||||
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
|
||||
index 60443f4..fa30cca 100644
|
||||
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
|
||||
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
|
||||
@@ -174,6 +174,7 @@
|
||||
import org.apache.hadoop.yarn.util.Clock;
|
||||
import org.apache.hadoop.yarn.util.Records;
|
||||
import org.apache.hadoop.yarn.util.UTCClock;
|
||||
+import org.apache.hadoop.yarn.util.resource.Resources;
|
||||
import org.apache.hadoop.yarn.util.timeline.TimelineUtils;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
@@ -1028,12 +1029,17 @@ private NodeReport createNodeReports(RMNode rmNode) {
|
||||
if (schedulerNodeReport != null) {
|
||||
used = schedulerNodeReport.getUsedResource();
|
||||
numContainers = schedulerNodeReport.getNumContainers();
|
||||
- }
|
||||
+ }
|
||||
+
|
||||
+ Resource total = Resources.clone(rmNode.getTotalCapability());
|
||||
+ if (total.getPorts() != null) {
|
||||
+ total.setPorts(total.getPorts().minusSelf(rmNode.getLocalUsedPortsSnapshot()));
|
||||
+ }
|
||||
|
||||
NodeReport report =
|
||||
BuilderUtils.newNodeReport(rmNode.getNodeID(), rmNode.getState(),
|
||||
rmNode.getHttpAddress(), rmNode.getRackName(), used,
|
||||
- rmNode.getTotalCapability(), numContainers,
|
||||
+ total, numContainers,
|
||||
rmNode.getHealthReport(), rmNode.getLastHealthReportTime(),
|
||||
rmNode.getNodeLabels(), rmNode.getAggregatedContainersUtilization(),
|
||||
rmNode.getNodeUtilization());
|
|
@ -1,32 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright (c) Microsoft Corporation
|
||||
# All rights reserved.
|
||||
#
|
||||
# MIT License
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
||||
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
||||
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
|
||||
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
pushd $(dirname "$0") > /dev/null
|
||||
|
||||
hadoopBinaryPath="/hadoop-binary/"
|
||||
|
||||
hadoopDestDir="../dependency/"
|
||||
|
||||
if [[ ! -d ${hadoopDestDir} ]]; then
|
||||
mkdir ${hadoopDestDir}
|
||||
fi
|
||||
|
||||
cp -arf ${hadoopBinaryPath} ${hadoopDestDir}
|
||||
|
||||
popd > /dev/null
|
|
@ -1 +0,0 @@
|
|||
hadoop-ai
|
|
@ -1,63 +0,0 @@
|
|||
# Copyright (c) Microsoft Corporation
|
||||
# All rights reserved.
|
||||
#
|
||||
# MIT License
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
||||
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
||||
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
|
||||
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
FROM base-image
|
||||
|
||||
ENV HADOOP_VERSION=hadoop-2.9.0
|
||||
|
||||
RUN apt-get -y install zookeeper libsnappy-dev
|
||||
RUN rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY dependency/hadoop-binary/hadoop-2.9.0.tar.gz /usr/local/
|
||||
|
||||
RUN tar -xzf /usr/local/$HADOOP_VERSION.tar.gz -C /usr/local/ && \
|
||||
cd /usr/local && \
|
||||
ln -s ./$HADOOP_VERSION hadoop
|
||||
|
||||
ENV HADOOP_PREFIX=/usr/local/hadoop \
|
||||
HADOOP_BIN_DIR=/usr/local/hadoop/bin \
|
||||
HADOOP_SBIN_DIR=/usr/local/hadoop/sbin \
|
||||
HADOOP_COMMON_HOME=/usr/local/hadoop \
|
||||
HADOOP_HDFS_HOME=/usr/local/hadoop \
|
||||
HADOOP_MAPRED_HOME=/usr/local/hadoop \
|
||||
HADOOP_YARN_HOME=/usr/local/hadoop \
|
||||
HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop \
|
||||
HADOOP_ROOT_LOGGER=INFO,console \
|
||||
HADOOP_SECURITY_LOGGER=INFO,console
|
||||
|
||||
ENV YARN_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
|
||||
|
||||
ENV PATH=$PATH:$HADOOP_BIN_DIR:$HADOOP_SBIN_DIR:/usr/share/zookeeper/bin
|
||||
|
||||
RUN chown -R root:root /var
|
||||
RUN mkdir -p $HADOOP_YARN_HOME/logs
|
||||
|
||||
RUN mkdir -p /var/lib/hdfs/name
|
||||
RUN mkdir -p /var/lib/hdfs/data
|
||||
|
||||
COPY build/start.sh /usr/local/start.sh
|
||||
RUN chmod a+x /usr/local/start.sh
|
||||
|
||||
|
||||
# Only node manager need this.#
|
||||
#COPY docker-17.06.2-ce.tgz /usr/local
|
||||
RUN wget https://download.docker.com/linux/static/stable/x86_64/docker-17.06.2-ce.tgz
|
||||
RUN cp docker-17.06.2-ce.tgz /usr/local
|
||||
RUN tar xzvf /usr/local/docker-17.06.2-ce.tgz
|
||||
# Only node manager need this.#
|
||||
|
||||
CMD ["/usr/local/start.sh"]
|
|
@ -1,35 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright (c) Microsoft Corporation
|
||||
# All rights reserved.
|
||||
#
|
||||
# MIT License
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
||||
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
||||
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
|
||||
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
#get the config generating script from kubenretes configmap
|
||||
cp /hadoop-configuration/${GENERATE_CONFIG} generate_config.sh
|
||||
chmod u+x generate_config.sh
|
||||
|
||||
./generate_config.sh
|
||||
|
||||
#get the start service script from kuberentes configmap
|
||||
cp /hadoop-configuration/${START_SERVICE} start_service.sh
|
||||
chmod u+x start_service.sh
|
||||
|
||||
# This status check is mainly for ensuring the status of image pulling.
|
||||
# And usually this process costs most of the time when creating a new pod in kubernetes.
|
||||
mkdir -p /jobstatus
|
||||
touch /jobstatus/jobok
|
||||
|
||||
./start_service.sh
|
Загрузка…
Ссылка в новой задаче