Initial code for open source AI Infrastructure.
This commit is contained in:
Yifan Xiong 2017-11-17 15:00:18 +08:00
Родитель 9cfbe40272
Коммит 15e70d6a39
331 изменённых файлов: 28943 добавлений и 23 удалений

21
.gitattributes поставляемый Normal file
Просмотреть файл

@ -0,0 +1,21 @@
# Auto detect text files and perform LF normalization
* text=auto
*.cs text diff=csharp
*.java text diff=java
*.html text diff=html
*.py text diff=python
*.pl text diff=perl
*.pm text diff=perl
*.css text
*.js text
*.sql text
*.sh text eol=lf
*.mustache text eol=lf
*.bat text eol=crlf
*.cmd text eol=crlf
*.vcxproj text merge=union eol=crlf
*.csproj text merge=union eol=crlf
*.sln text merge=union eol=crlf

41
.gitignore поставляемый
Просмотреть файл

@ -1,22 +1,19 @@
# Compiled class file
*.class
# Log file
*.log
# BlueJ files
*.ctxt
# Mobile Tools for Java (J2ME)
.mtj.tmp/
# Package Files #
*.jar
*.war
*.ear
*.zip
*.tar.gz
*.rar
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
*.iml
*.gv
*.ipr
*.iws
*.orig
*.rej
*.sdf
*.suo
*.vcxproj.user
.idea
.svn
.classpath
.project
.settings
target/
build/
out/
tmp/
dist/

Просмотреть файл

@ -1,5 +1,6 @@
# AI Infrastructure
# Contributing
## Contributing
This project welcomes contributions and suggestions. Most contributions require you to agree to a
Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us

37
aii-fs/README.md Normal file
Просмотреть файл

@ -0,0 +1,37 @@
# About aii-fs
aii-fs is a standard tool to transfer files between users local file system and the HDFS of AII platform.
# Getting Started
Please make sure you have installed python and pip in you computer.
# Instructions for Developers
* Run "pip install -r requirements.txt", it will install dependence for the project.
* Run "python aii-fs.py -h" to get detailed usage.
* Run "python aii-fs.py --config host=10.0.3.9 port=50070 user=root" to store the config of hdfs, default port is 50070, default user is root, no default host.
* You can also run command with argument "--host", "--port", "--user". These arguments have higer priority than the stored config.
# Usage
```
example use:
aii-fs --config host=10.0.3.9 port=50070 user=root -- store hdfs config
aii-fs -ls hdfs:// -- list the contents of a root HDFS directory
aii-fs -ls hdfs:// --host 10.0.3.9 -- list the contents of a root HDFS directory with host specified
aii-fs -ls hdfs:// --host 10.0.3.9 --port 50070 --user root -- list the contents of a root HDFS directory with host, port and user specified
aii-fs -ls -r hdfs:// -- list the contents of a root HDFS directory, recursively
aii-fs -mkdir hdfs://mydir/mysubdir/mysubdir2 -- makes mysubdir2 and all directories along the way
aii-fs -rm hdfs://mydir/mysubdir/myfile -- removes myfile from mysubdir
aii-fs -rm hdfs://mydir/mysubdir -- removes mysubdir and all files and directories in it
aii-fs -cp c:\mylocalfile hdfs://mydir/myremotedir -- copy mylocalfile into myremotedir
aii-fs -cp -r c:\mylocaldir hdfs://mydir/myremotedir -- copy mylocaldir into myremotedir, recursively
aii-fs -cp -r c:\mylocaldir\* hdfs://mydir/myremotedir -- copy mylocaldir's contents into myremotedir, recursively
aii-fs -cp c:\mylocaldir\\a hdfs://mydir/myremotedir/b -- copy file a from mylocaldir to myremotedir and rename to b
aii-fs -cp -r hdfs://mydir/myremotedir c:\mylocaldir -- copy myremotedir into mylocaldir, recursively
aii-fs -cp -r hdfs://mydir/myremotedir/* c:\mylocaldir -- copy myremotedir's contents into mylocaldir, recursively
exit code:
0 -- Success
1 -- An exception happened during the operation including bad connection
2 -- AII_VC environment variable not set to valid VC or insufficient/invalid command line argument(s)
3 -- Path not found
4 -- Unauthorized access
5 -- Path not empty
6 -- Check failed after operation
100 -- Failed to copy too many times
101 -- Failed to concat chunks into file
```

460
aii-fs/aii-fs.py Normal file
Просмотреть файл

@ -0,0 +1,460 @@
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import os
import sys
localPath = os.path.dirname(os.path.realpath(__file__))
import logging
import argparse
import time
import re
import getpass
from fsimpl import HadoopFs, LocalFs, Errors, Config
hdfsVirtualCluster = "/"
simulateOnly = False
logger = None
isVerbose = False
hdfsPattern = re.compile("hdfs://+")
fileSystemWithMetrics = None
hdfsUser = None
hdfsHost = None
hdfsPort = None
def process_arg(path, isSrc, dstDirMustExist, allowLocal=None):
assert hdfsVirtualCluster is not None
if hdfsPattern.match(path):
path = path[len("hdfs://"):]
if(path == ""):
path = "/"
logger.info("Path: " + path)
fs = HadoopFs.HadoopFileSystem(hdfsVirtualCluster, simulateOnly, isVerbose, logger, hdfsUser, hdfsHost, hdfsPort)
else:
logger.info("Assuming local file system pattern: " + path)
fs = LocalFs.LocalFileSystem(simulateOnly, isVerbose, logger)
return fs, fs.make_fd(path, isSrc, dstDirMustExist)
def rm_command(args, recursive=False, force=False):
exitCode = 0
for arg in args:
logger.info("RM ARG: path={0}".format(arg))
try:
fileSystem, fileDescriptor = process_arg(arg, isSrc=True, dstDirMustExist=False)
except Errors.FileNotFound:
print("Path not found: %s" % arg)
exitCode = 3
continue
except Errors.Unauthorized:
print("Insufficient privileges to access the path: %s" % arg)
exitCode = 4
continue
try:
fileSystem.delete_file_dir(fileDescriptor, recursive, force)
except Errors.PathNotEmpty:
print("Cannot delete a non empty directory without -r option")
exitCode = 5
except Errors.Unauthorized:
print("Insufficient privileges to delete the path: %s" % fileDescriptor.abspath)
exitCode = 4
# Check if the path deleted
if exitCode == 0:
if isinstance(fileSystem, LocalFs.LocalFileSystem):
return exitCode
if not fileSystem.get_hdfs_file_dir_json(fileDescriptor.abspath) is None:
print("Failed to remove %s" % fileDescriptor.abspath)
exitCode = 6
return exitCode
def mkdir_command(args):
exitCode = 0
for arg in args:
logger.info("MKDIR ARG: path={0}".format(arg))
folderExists = True
try:
fileSystem, fileDescriptor = process_arg(arg, isSrc=False, dstDirMustExist=False)
folderExists = fileDescriptor.exists
except Errors.FileNotFound:
folderExists = False
except Errors.Unauthorized:
print("Insufficient privileges to access the folder: %s" % arg)
exitCode = 4
if not folderExists:
try:
fileSystem.make_dir(fileDescriptor.abspath)
except Errors.Unauthorized:
print("Insufficient privileges to create the folder: %s" % fileDescriptor.abspath)
exitCode = 4
# Check if the path exists
if exitCode == 0:
if isinstance(fileSystem, LocalFs.LocalFileSystem):
return exitCode
if fileSystem.get_hdfs_file_dir_json(fileDescriptor.abspath) is None:
print("Failed to create %s" % fileDescriptor.abspath)
exitCode = 6
return exitCode
def mv_command(args):
if len(args) < 2:
print("cannot initiate move with only one argument")
sys.exit(2)
elif len(args) > 2:
print("more than 2 arguments currently not supported")
sys.exit(2)
src = args[0]
dst = args[1]
logger.info("MOVE ARGS: src={0}, dst={1}".format(src, dst))
try:
srcFileSystem, srcFileDescriptor = process_arg(src, isSrc=True, dstDirMustExist=False)
except Errors.FileNotFound:
print("Source not found: %s" % src)
return 3
except Errors.Unauthorized:
print("Insufficient privileges to access the source: %s" % src)
return 4
srcSize = srcFileDescriptor.size
try:
dstFileSystem, dstFileDescriptor = process_arg(dst, isSrc=False, dstDirMustExist=True)
except Errors.FileNotFound:
print("Destination not found: %s" % dst)
return 3
except Errors.Unauthorized:
print("Insufficient privileges to access the destination: %s" % dst)
return 4
try:
srcFileSystem.mv_file(srcFileDescriptor, dstFileDescriptor, dstFileSystem)
except Errors.Unauthorized:
print("Insufficient privileges to move the path: {0} -> {1}".format(
srcFileDescriptor.abspath, dstFileDescriptor.abspath))
return 4
# Check if mv successes
fileSystem, dstFileDescriptor = process_arg(dst, isSrc=True, dstDirMustExist=False)
if srcSize != dstFileDescriptor.size or \
not fileSystem.get_hdfs_file_dir_json(srcFileDescriptor.abspath) is None or \
fileSystem.get_hdfs_file_dir_json(dstFileDescriptor.abspath) is None:
print("Failed to move the path: {0} -> {1}".format(
srcFileDescriptor.abspath, dstFileDescriptor.abspath))
return 6
return 0
def cp_command(args, recursive=False):
global fileSystemWithMetrics
if len(args) < 2:
print("cannot initiate copy with only one argument")
sys.exit(2)
elif len(args) > 2:
print("more than 2 arguments currently not supported")
sys.exit(2)
src = args[0]
dst = args[1]
logger.info("COPY ARGS: src={0}, dst={1}".format(src, dst))
createTopLevelDir = True
if src.endswith("*"):
createTopLevelDir = False
src = src[:-1]
try:
srcFileSystem, srcFileDescriptor = process_arg(src, isSrc=True, dstDirMustExist=False)
except Errors.FileNotFound:
print("Source not found: %s" % src)
return 3
except Errors.Unauthorized:
print("Insufficient privileges to access the source: %s" % src)
return 4
try:
dstFileSystem, dstFileDescriptor = process_arg(dst, isSrc=False, dstDirMustExist=True)
except Errors.FileNotFound:
print("Destination not found: %s" % dst)
return 3
except Errors.Unauthorized:
print("Insufficient privileges to access the destination: %s" % dst)
return 4
exitCode = 0
if srcFileDescriptor.is_file or srcFileDescriptor.is_symlink:
try:
srcFileSystem.cp_file(srcFileDescriptor, dstFileDescriptor, dstFileSystem)
except Errors.Unauthorized:
print("Insufficient privileges to copy the path: {0} -> {1}".format(
srcFileDescriptor.abspath, dstFileDescriptor.abspath))
exitCode = 4
# Catch the error when copied destination file size mismatches with source file size
except ValueError as error:
print(error)
exitCode = 6
elif srcFileDescriptor.is_directory:
assert dstFileDescriptor.is_directory
if not recursive:
print("cannot copy directories without recursive flag")
return 2
dstRootFileDescriptor = None
for isNewDir, root, currentFile in srcFileSystem.fast_walk(srcFileDescriptor):
if isNewDir:
try:
dstRootFileDescriptor = dstFileSystem.make_dst_dir(
srcFileDescriptor,
dstFileDescriptor,
root,
createTopLevelDir)
except Errors.Unauthorized:
print("Insufficient privileges to create the destination: {0}".format(
dstFileDescriptor.abspath))
exitCode = 4
continue
try:
srcFileSystem.cp_file(currentFile, dstRootFileDescriptor, dstFileSystem)
except Errors.Unauthorized:
print("Insufficient privileges to copy the path: {0} -> {1}".format(
srcFileDescriptor.abspath, dstFileDescriptor.abspath))
exitCode = 4
# Catch the error when copied destination file size mismatches with source file size
except ValueError as error:
print(error)
exitCode = 6
else:
print("File does not exist, ignoring")
exitCode = 3
fileSystemWithMetrics = srcFileSystem
return exitCode
def ls_print(itemList):
size = 0
print("total " + str(len(itemList)))
for item in itemList:
size += item.size
print(("{desc} {numChild:>7,} {owner:>10} {group:>12} {size:>13,} {date} {name}".format(
desc=item.file_descriptor,
numChild=int(item.numChildren),
owner=item.owner,
group=item.group,
size=item.size,
date=item.modificationTime.strftime("%Y-%m-%d %H:%M:%S"),
name=item.name)))
print("\nsize {size:,} ".format(size=size))
return size
def ls_command(args, recursive=False):
exitCode=0
for arg in args:
logger.info("LS ARG: path={0}".format(arg))
try:
fileSystem, fileDescriptor = process_arg(arg, isSrc=True, dstDirMustExist=False)
except Errors.FileNotFound:
print("Path not found: %s" % arg)
exitCode=3
continue
except Errors.Unauthorized:
print("Insufficient privileges to access the path: %s" % arg)
exitCode=4
continue
if fileDescriptor.is_directory:
if recursive:
totalSize = 0
for root, dirs, files in fileSystem.walk(fileDescriptor):
print("\n" + root.abspath)
totalSize += ls_print(dirs + files)
print("\ntotal size {size:,} ".format(size=totalSize))
else:
try:
itemList = list(fileSystem.list_dir(fileDescriptor))
ls_print(itemList)
except Errors.Unauthorized:
print("Insufficient privileges to access the path: %s" % arg)
exitCode=4
continue
else:
ls_print([fileDescriptor])
return exitCode
def hash_command(args):
exitCode=0
for arg in args:
logger.info("HASH ARG: path={0}".format(arg))
try:
fileSystem, fileDescriptor = process_arg(arg, isSrc=True, dstDirMustExist=False)
except Errors.FileNotFound:
print("Path not found: %s" % arg)
exitCode=3
continue
except Errors.Unauthorized:
print("Insufficient privileges to access the path: %s" % arg)
exitCode=4
continue
if fileDescriptor.is_directory:
print("Cannot hash a directory" % arg)
exitCode=2
continue
fileSystem.compute_hash(fileDescriptor)
return exitCode
def config_command(args):
exitCode = 0
config = Config.Config()
for arg in args:
logger.info("CONFIG ARG: {0}".format(arg))
try:
config.storeConfig(arg)
except Errors.FsException:
print("Cannot store the config. Please make sure your configuration is correct")
exitCode = 6
continue
return exitCode
if __name__ == "__main__":
argParser = argparse.ArgumentParser(
description="aii-fs command supported arguments:",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="\nexample use: \n\n\
aii-fs -ls hdfs:// -- list the contents of a root HDFS directory \n\
aii-fs -ls hdfs:// --host 10.0.3.9 -- list the contents of a root HDFS directory with host specified \n\
aii-fs -ls hdfs:// --host 10.0.3.9 --port 50070 --user root -- list the contents of a root HDFS directory with host, port and user specified \n\
aii-fs -ls -r hdfs:// -- list the contents of a root HDFS directory, recursively \n\
aii-fs -mkdir hdfs://mydir/mysubdir/mysubdir2 -- makes mysubdir2 and all directories along the way \n\
aii-fs -rm hdfs://mydir/mysubdir/myfile -- removes myfile from mysubdir \n\
aii-fs -rm hdfs://mydir/mysubdir -- removes mysubdir and all files and directories in it \n\
aii-fs -cp c:\mylocalfile hdfs://mydir/myremotedir -- copy mylocalfile into myremotedir \n\
aii-fs -cp -r c:\mylocaldir hdfs://mydir/myremotedir -- copy mylocaldir into myremotedir, recursively \n\
aii-fs -cp -r c:\mylocaldir\* hdfs://mydir/myremotedir -- copy mylocaldir's contents into myremotedir, recursively \n\
aii-fs -cp c:\mylocaldir\\a hdfs://mydir/myremotedir/b -- copy file a from mylocaldir to myremotedir and rename to b \n\
aii-fs -cp -r hdfs://mydir/myremotedir c:\mylocaldir -- copy myremotedir into mylocaldir, recursively \n\
aii-fs -cp -r hdfs://mydir/myremotedir/* c:\mylocaldir -- copy myremotedir's contents into mylocaldir, recursively \n\
aii-fs --hash hdfs://mydir/myfile -- get the sha1 hash of myfile \n\
aii-fs --config host=10.0.3.9 -- store hdfs config \n\
\nexit code: \n\n\
0 -- Success \n\
1 -- An exception happened during the operation including bad connection \n\
2 -- AII_VC environment variable not set to valid VC or insufficient/invalid command line argument(s) \n\
3 -- Path not found \n\
4 -- Unauthorized access \n\
5 -- Path not empty \n\
6 -- Check failed after operation \n\
100 -- Failed to copy too many times \n\
101 -- Failed to concat chunks into file \n\
"
)
group = argParser.add_mutually_exclusive_group()
group.add_argument("-ls", "--list", action="store_true", help="list a file or a directory metadata")
group.add_argument("-cp", "--copy", action="store_true", help="copy file")
group.add_argument("-rm", "--remove", action="store_true", help="remove a file or an empty directory")
group.add_argument("-mkdir", "--makeDirectory", action="store_true", help="create a new directory (and others along the way)")
group.add_argument("-mv", "--move", action="store_true", help="move/rename a file or directory")
group.add_argument("--hash", action="store_true", help="sha1 hash a file")
group.add_argument("--config", action="store_true", help="store config for aii-fs")
argParser.add_argument("myArgs", nargs="+", help="files and directories to manipulate")
argParser.add_argument("-r", "--recursive", action="store_true", default=False, help="recurse into subdirectories")
argParser.add_argument("-v", "--verbose", action="store_true", default=True, help="verbose output of file operations")
argParser.add_argument("-i", "--info", action="store_true", default=False, help="log all relevant information")
argParser.add_argument("-d", "--debug", action="store_true", default=False, help="debug HDFS REST APIs")
argParser.add_argument("-f", "--force", action="store_true", default=False, help="do not prompt for confirmation")
argParser.add_argument("--user", help="the user of hdfs, use value in aii-fs.conf if not specified", type=str)
argParser.add_argument("--host", help="the host ip of hdfs, use value in aii-fs.conf if not specified", type=str)
argParser.add_argument("--port", help="the port of hdfs, use value in aii-fs.conf if not specified", type=str)
cmdLineArgs = argParser.parse_args()
if cmdLineArgs.verbose:
isVerbose = True
if cmdLineArgs.info:
isDebug = True
logging.basicConfig(level=logging.INFO)
elif cmdLineArgs.debug:
isDebug = True
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.ERROR)
logging.getLogger("requests").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.ERROR)
logger = logging.getLogger(__name__)
if cmdLineArgs.user:
hdfsUser = cmdLineArgs.user
else:
hdfsUser = None
if cmdLineArgs.host:
hdfsHost = cmdLineArgs.host
else:
hdfsHost = None
if cmdLineArgs.port:
hdfsPort = cmdLineArgs.port
else:
hdfsPort = None
startTime = time.time()
exitCode=0
if cmdLineArgs.list:
exitCode=ls_command(cmdLineArgs.myArgs, cmdLineArgs.recursive)
elif cmdLineArgs.remove:
exitCode=rm_command(cmdLineArgs.myArgs, cmdLineArgs.recursive, cmdLineArgs.force)
elif cmdLineArgs.makeDirectory:
exitCode=mkdir_command(cmdLineArgs.myArgs)
elif cmdLineArgs.copy:
exitCode=cp_command(cmdLineArgs.myArgs, cmdLineArgs.recursive)
elif cmdLineArgs.move:
exitCode=mv_command(cmdLineArgs.myArgs)
elif cmdLineArgs.hash:
exitCode=hash_command(cmdLineArgs.myArgs)
elif cmdLineArgs.config:
config_command(cmdLineArgs.myArgs)
if isVerbose:
print("\nElapsed time: %s sec" % (time.time() - startTime))
if fileSystemWithMetrics is not None:
bytesPerSec = fileSystemWithMetrics.get_cp_rate()
if bytesPerSec is not None:
print("Effective copy rate was {rate:,} bytes per second".format(rate=int(bytesPerSec)))
sys.exit(exitCode)

409
aii-fs/fsimpl/BaseFs.py Normal file
Просмотреть файл

@ -0,0 +1,409 @@
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#!/usr/bin/env python
import time
import hashlib
import sys
import os
from fsimpl import Constants, Errors, Retryer
from fsimpl.Retryer import doubling_backoff, RetryAndCatch
def is_normal_stdout():
return os.fstat(0) == os.fstat(1)
class FileDescriptor:
@property
def is_directory(self):
return self.type == "DIRECTORY"
@property
def is_file(self):
return self.type == "FILE"
@property
def is_symlink(self):
return self.type == "SYMLINK"
@property
def file_descriptor(self):
endString = ""
assert len(self.permissions) == 3 or len(self.permissions) == 4
if self.is_directory:
endString += "d"
elif self.is_symlink:
endString += "s"
else:
endString += "-"
hastPerm = False
permissions = self.permissions
if len(self.permissions) == 4:
assert permissions[0] == '1'
hastPerm = True
permissions = permissions[1:]
for i in range(0, 3):
mask = int(permissions[i])
endString += "r" if mask & 4 != 0 else "-"
endString += "w" if mask & 2 != 0 else "-"
endString += "x" if mask & 1 != 0 else "-"
if hastPerm:
endString = endString[:-1] + "t"
return endString
class FileOpen:
def __init__(self, fs, fd, isRead, rwMode="rb", offset=None, size=None):
self.fileSystem = fs
self.fileDescriptor = fd
self.isRead = isRead
self.rwMode = rwMode
self.offset = offset if offset is not None else 0
self.size = size if size is not None else fd.size
self.file = None
def __enter__(self):
self.file = self.fileSystem.open_file(self.fileDescriptor, self.rwMode)
if self.isRead:
return self.fileSystem.read_chunk(self.file, self.offset, self.size)
else:
return self.file
def __exit__(self, exc_type, exc_val, exc_tb):
self.fileSystem.close_file(self.file)
class FileSystem:
def __init__(self, simulateOnly, isVerbose, logger):
self.simulateOnly = simulateOnly
self.bytesCopied = 0
self.copyTime = 0.0
self.verbose = isVerbose
self.logger = logger
def make_dst_dir(self, srcFd, dstFd, rootFd, createTopLevelDir):
if createTopLevelDir:
baseDir, _ = self.get_dir_basename(srcFd.abspath)
else:
baseDir = srcFd.abspath
if(baseDir == "/"):
skipLen = 0
else:
skipLen = self.get_dir_prefix_len(baseDir)
dstDir = self.path_join(dstFd.abspath, rootFd.abspath[skipLen:])
self.make_dir(dstDir)
dstDirFd = self.make_fd(path=dstDir, isSrc=False, dstDirMustExist=True)
return dstDirFd
def path_join(self, base, suffix):
base.replace("\\", "/")
suffix.replace("\\", "/")
assert not suffix.startswith("/")
sep = "" if base.endswith("/") else "/"
return base + sep + suffix
def get_dir_basename(self, path):
if path.endswith("/"):
path = path[:-1]
dstName = path.rpartition("/")[2]
dstDir = path.rpartition("/")[0]
if dstDir == "":
dstDir = "/"
return dstDir, dstName
def canonicalize_path(self, path):
return path.replace("\\", "/")
def get_dir_prefix_len(self, path):
return len(path) if path.endswith("/") else len(path) + 1
def get_cp_rate(self):
if self.bytesCopied != 0:
return self.bytesCopied / self.copyTime
def mv_file(self, src, dst, dstFs):
if dst.is_directory:
newPath = self.path_join(dst.abspath, src.name)
newDst = self.make_fd(path=newPath, isSrc=False, dstDirMustExist=True)
self.logger.info("MOVE TARGET: change from {0} to {1}".format(dst.abspath, newDst.abspath))
dst = newDst
if dst.exists and (src.is_directory == dst.is_directory) and (src.name == dst.name):
print("Destination already exists, move will not be performed.")
return
if self.verbose:
print("move: %s -> %s" % (src.abspath, dst.abspath))
# TODO: There must be a better way to do this, but type(src) == type(dst) does not work
if src.__class__.__name__ == dst.__class__.__name__:
self.local_mv_file(src, dst)
else:
self.cp_file(src, dst, dstFs)
self.delete_file_dir(src)
@RetryAndCatch(Errors.FsException, 10, 10, doubling_backoff)
def cp_chunk(self, src, dst, dstFs, srcOffset, dstOffset, lastChunk, dstWriteMode):
size = Constants.DEFAULT_BIG_FILE_THRESHOLD - srcOffset % Constants.DEFAULT_BIG_FILE_THRESHOLD
if lastChunk:
size = src.size - srcOffset
progressFormatString = "({0:>13,}/{1:>13,}) bytes"
sizeWritten = 0
with FileOpen(self, src, True, "rb", srcOffset, size) as srcCopy:
with FileOpen(dstFs, dst, False, dstWriteMode, dstOffset, dstOffset) as dstCopy:
for batch in srcCopy:
if self.verbose and is_normal_stdout():
sizeWritten += len(batch)
progressString = progressFormatString.format(sizeWritten, size)
sys.stdout.write(progressString)
sys.stdout.write("\b" * len(progressString))
sys.stdout.flush()
dstFs.append_data(dstCopy, batch)
@RetryAndCatch(Errors.FsException, 5, 10)
def make_fd_retriable(self, path, isSrc, dstDirMustExist):
return self.make_fd(path, isSrc, dstDirMustExist)
@RetryAndCatch(Errors.FsException, 5, 10, doubling_backoff)
def concat_chunk_files(self, fs, fileName, chunkList):
if chunkList:
fs.touch_file(fileName)
fs.try_concat_files(fileName, chunkList)
def remote_cp_file(self, src, dst, dstFs):
dstChunkList = None
# Step 1: Perform a copy
progressString = "- Progress: "
self.logger.info("REMOTE COPY ({0}): {1} -> {2}".format(src.size, src.abspath, dst.abspath))
if src.size <= Constants.DEFAULT_BIG_FILE_THRESHOLD:
if self.verbose and is_normal_stdout():
sys.stdout.write(progressString)
sys.stdout.flush()
if not dst.exists:
dstFs.touch_file(dst)
self.cp_chunk(src, dst, dstFs, 0, 0, True, "wb")
else:
chunk = 0
offset = 0
chunkSize = Constants.DEFAULT_BIG_FILE_THRESHOLD
numChunks = (src.size / chunkSize) + 1
dstChunkList = list()
while offset < src.size:
dstChunk = dstFs.make_fd_retriable(
dst.abspath + ".__chunk__" + str(chunk),
isSrc=False,
dstDirMustExist=True)
dstChunkList.append(dstChunk)
self.logger.info("BIG COPY: chunk={0}, dst={1}".format(chunk, dstChunk.abspath))
if not dstChunk.exists:
dstFs.touch_file(dstChunk)
if dstChunk.size == Constants.DEFAULT_BIG_FILE_THRESHOLD \
and src.modificationTime <= dstChunk.modificationTime:
if self.verbose:
print("%s -> %s: skipped" % (src.abspath, dstChunk.abspath))
elif dstChunk.size > Constants.DEFAULT_BIG_FILE_THRESHOLD:
errMsg = "a chunk: {0} has its size bigger than max size, you need remove it before next retry".format(dstChunk.abspath)
self.logger.error(errMsg)
raise Errors.FsException(errMsg)
else:
if self.verbose:
print("%s -> %s" % (src.abspath, dstChunk.abspath))
if is_normal_stdout():
progressFormatString = "Chunk ({0}/{1}) - "
progressString += progressFormatString.format(chunk + 1, numChunks)
sys.stdout.write(progressString)
sys.stdout.flush()
self.cp_chunk(src, dstChunk, dstFs, offset+dstChunk.size, dstChunk.size, chunk == numChunks -1, "ab")
if self.verbose and is_normal_stdout():
sys.stdout.write("\r")
sys.stdout.flush()
chunk += 1
offset = chunk * chunkSize
# Step2: concat all chunk files into final file
self.concat_chunk_files(dstFs, dst, dstChunkList)
def cp_file(self, src, dst, dstFs):
self.logger.info("COPY: from({0})={1} to({2})={3}".format(src.fs.__class__.__name__,
src.abspath,
dst.fs.__class__.__name__,
dst.abspath))
if dst.is_directory:
newPath = self.path_join(dst.abspath, src.name)
newDst = dstFs.make_fd(path=newPath, isSrc=False, dstDirMustExist=True)
self.logger.info("COPY TARGET: change from {0} to {1}".format(dst.abspath, newDst.abspath))
dst = newDst
if dst.exists and src.size == dst.size and src.modificationTime <= dst.modificationTime:
if self.verbose:
print("%s -> %s: skipped" % (src.abspath, dst.abspath))
return
else:
if self.verbose:
print("%s -> %s" % (src.abspath, dst.abspath))
startTime = time.time()
# TODO: There must be a better way to do this, but type(src) == type(dst) does not work
if src.__class__.__name__ == dst.__class__.__name__:
# This is purely a performance optimization, the code below will
# perform a deep copy which is just as good
self.logger.info("LOCAL COPY: {0} -> {1}".format(src.abspath, dst.abspath))
self.local_cp_file(src, dst)
else:
self.remote_cp_file(src, dst, dstFs)
endTime = time.time()
dst = dstFs.make_fd_retriable(dst.abspath, False, True)
if src.size != dst.size:
raise ValueError("Size mismatch, %s size %d, %s size %d" % (src.abspath, src.size, dst.abspath, dst.size))
self.bytesCopied += src.size
self.copyTime += endTime - startTime
def compute_hash(self, fd):
chunk = 0
offset = 0
hashList = list()
while offset < fd.size:
hasher = hashlib.sha1()
size = min(Constants.DEFAULT_BIG_FILE_THRESHOLD, fd.size - offset)
with FileOpen(self, fd, True, "rb", offset, size) as srcCopy:
for batch in srcCopy:
hasher.update(batch)
currHash = str(hasher.hexdigest())
self.logger.debug("HASH for {0}, {1}-{2}: {3}".format(fd.abspath, offset, offset + size, currHash))
hashList.append(currHash)
chunk += 1
offset = chunk*Constants.DEFAULT_BIG_FILE_THRESHOLD
hasher = hashlib.sha1()
for subHash in hashList:
hasher.update(subHash.encode("utf-8"))
print("Hash for file {0} is {1}".format(fd.abspath, str(hasher.hexdigest())))
def walk(self, fd):
# Not able to use os.walk in local case as it is too slow for large directories
workList = list()
workList.append(fd)
while len(workList) > 0:
currDir = workList.pop(0)
fileList = list()
dirList = list()
try:
for item in self.list_dir(currDir):
if item.is_directory:
workList.append(item)
dirList.append(item)
elif item.is_file or item.is_symlink:
fileList.append(item)
except Errors.Unauthorized:
print("Insufficient privileges to access the path: %s" % currDir.abspath)
yield currDir, dirList, fileList
def fast_walk(self, fd):
# Not able to use os.walk in local case as it is too slow for large directories
workList = list()
workList.append(fd)
currDir = None
while len(workList) > 0:
prevDir = currDir
currDir = workList.pop(0)
try:
for item in self.list_dir(currDir):
if item.is_directory:
workList.append(item)
elif item.is_file or item.is_symlink:
yield prevDir != currDir, currDir, item
except Errors.Unauthorized:
print("Insufficient privileges to access the path: %s" % currDir.abspath)
def read_chunk(self, srcFile, offset, size, chunkSize=Constants.DEFAULT_COPY_CHUNK_SIZE):
sizeLeftToRead = size
while sizeLeftToRead != 0:
startTime = time.time()
data = self.read_data(srcFile, offset, chunkSize)
elapsedTime = time.time() - startTime
sizeRead = len(data)
self.logger.debug("Read: {0:,} bytes in {1} secs, copy rate {2:,} bytes/sec".format(
sizeRead, elapsedTime, sizeRead/elapsedTime))
sizeLeftToRead -= sizeRead
assert sizeLeftToRead >= 0
offset += sizeRead
yield data
def make_fd(self, path, isSrc, dstDirMustExist):
raise NotImplementedError("This function must be implemented by the FS class that extends base FS")
def exists_file_dir(self, fd):
raise NotImplementedError("This function must be implemented by the FS class that extends base FS")
def delete_file_dir(self, fd, recursive, force):
raise NotImplementedError("This function must be implemented by the FS class that extends base FS")
def list_dir(self, fd):
raise NotImplementedError("This function must be implemented by the FS class that extends base FS")
def make_dir(self, path):
raise NotImplementedError("This function must be implemented by the FS class that extends base FS")
def open_file(self, fd, rwMode):
raise NotImplementedError("This function must be implemented by the FS class that extends base FS")
def close_file(self, fd):
raise NotImplementedError("This function must be implemented by the FS class that extends base FS")
def touch_file(self, fd):
raise NotImplementedError("This function must be implemented by the FS class that extends base FS")
def truncate_file(self, fd, size):
raise NotImplementedError("This function must be implemented by the FS class that extends base FS")
def try_concat_files(self, fd, chunkFdList):
raise NotImplementedError("This function must be implemented by the FS class that extends base FS")
def concat_files(self, fd, chunkFdList):
raise NotImplementedError("This function must be implemented by the FS class that extends base FS")
def read_data(self, fd, offset, size):
raise NotImplementedError("This function must be implemented by the FS class that extends base FS")
def append_data(self, fd, data):
raise NotImplementedError("This function must be implemented by the FS class that extends base FS")
def local_mv_file(self, src, dst):
raise NotImplementedError("This function must be implemented by the FS class that extends base FS")
def local_cp_file(self, src, dst):
raise NotImplementedError("This function must be implemented by the FS class that extends base FS")

75
aii-fs/fsimpl/Config.py Normal file
Просмотреть файл

@ -0,0 +1,75 @@
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import pickle
from fsimpl import Errors
class Config:
def __init__(self):
try:
file = open("aii-fs.config","rb")
self.config = pickle.load(file)
file.close()
except IOError:
self.config = {}
except EOFError:
self.config = {}
def getHadoopConfig(self, user, host, port):
if user:
hdfsUser = user
elif self.config.has_key("user"):
hdfsUser = self.config["user"]
else:
hdfsUser = "root"
if host:
hdfsHost = host
elif self.config.has_key("host"):
hdfsHost = self.config["host"]
else:
hdfsHost = None
if port:
hdfsPort = port
elif self.config.has_key("port"):
hdfsPort = self.config["port"]
else:
hdfsPort = "50070"
return hdfsUser, hdfsHost, hdfsPort
def storeConfig(self, conf):
try:
file = open("aii-fs.config","rb")
configs = pickle.load(file)
file.close()
except IOError:
configs = {}
except EOFError:
configs = {}
attrs = conf.split("=")
if(len(attrs)==2 and attrs[1]!=""):
configs[attrs[0]] = attrs[1]
file = open("aii-fs.config","wb")
pickle.dump(configs,file)
file.close()
else:
raise Errors.FsException

Просмотреть файл

@ -0,0 +1,27 @@
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# Values used for testing
# DEFAULT_COPY_CHUNK_SIZE = 128 * 1024 # 128 KB
# DEFAULT_BIG_FILE_THRESHOLD = DEFAULT_COPY_CHUNK_SIZE * 2 # 256 KB
# Production values
# Big file threshold: 16GB is too big, and easy to fail, so decrease its size to 8GB
# DEFAULT_COPY_CHUNK_SIZE can't be changed, or not Python's shellutil.filecopyobj will crash
DEFAULT_COPY_CHUNK_SIZE = 128 * 1024 * 1024 # 128 MB
DEFAULT_BIG_FILE_THRESHOLD = DEFAULT_COPY_CHUNK_SIZE * 64 # 8 GB

36
aii-fs/fsimpl/Errors.py Normal file
Просмотреть файл

@ -0,0 +1,36 @@
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
class FsException(Exception):
def __init__(self, msg=str()):
self.msg = msg
super(FsException, self).__init__(self.msg)
class BadConnection(FsException):
pass
class Unauthorized(FsException):
pass
class FileNotFound(FsException):
pass
class PathNotEmpty(FsException):
pass

365
aii-fs/fsimpl/HadoopFs.py Normal file
Просмотреть файл

@ -0,0 +1,365 @@
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import os
import sys
localPath = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(localPath, "../pywebhdfs"))
from pywebhdfs.webhdfs import PyWebHdfsClient
import pywebhdfs.errors
import requests
import datetime
import fsimpl
from fsimpl import BaseFs, Errors, Config
try:
import __builtin__
input = getattr(__builtin__, 'raw_input')
except (ImportError, AttributeError):
pass
def query_yes_no(question, default="yes"):
"""Ask a yes/no question via raw_input() and return their answer.
"question" is a string that is presented to the user.
"default" is the presumed answer if the user just hits <Enter>.
It must be "yes" (the default), "no" or None (meaning
an answer is required of the user).
The "answer" return value is True for "yes" or False for "no".
"""
valid = {"yes": True, "y": True, "ye": True,
"no": False, "n": False}
if default is None:
prompt = " [y/n] "
elif default == "yes":
prompt = " [Y/n] "
elif default == "no":
prompt = " [y/N] "
else:
raise ValueError("invalid default answer: '%s'" % default)
while True:
sys.stdout.write(question + prompt)
choice = input().lower()
if default is not None and choice == '':
return valid[default]
elif choice in valid:
return valid[choice]
else:
sys.stdout.write("Please respond with 'yes' or 'no' (or 'y' or 'n').\n")
class HadoopFileDescriptor(BaseFs.FileDescriptor):
def __init__(self, hadoopFs, path, isSrc, needsDstDirCheck, fileJson=None):
self.fs = hadoopFs
if fileJson is None:
# If JSON is not passed in, we have to read the entry from HDFS
fileJson = hadoopFs.get_hdfs_file_dir_json(path)
if fileJson is None:
# File/directory does not exist
if isSrc:
raise pywebhdfs.errors.FileNotFound("path: %s not found" % path)
else:
dstDir, dstName = hadoopFs.get_dir_basename(path)
# Pure optimization. In some cases we already know that destination folder is there
# since we traversed over it. So in that case elide the HDFS query
if needsDstDirCheck:
fileJson = hadoopFs.get_hdfs_file_dir_json(dstDir)
if fileJson is None:
raise pywebhdfs.errors.FileNotFound("destination directory: %s not found" % dstDir)
self.name = dstName
self.abspath = path
self.type = "FILE"
# due to a bug of python3.6, it will show an error if timestamp is set to 0.
self.accessTime = datetime.datetime.fromtimestamp(86400)
self.modificationTime = datetime.datetime.fromtimestamp(86400)
self.replication = "3"
self.exists = False
self.size = 0
return
else:
# JSON is passed in as a part of a previous query on a containing directory
if fileJson["pathSuffix"] != "":
path = hadoopFs.path_join(path, fileJson["pathSuffix"])
self.exists = True
self.abspath = path
self.type = fileJson["type"]
if fileJson["pathSuffix"] == "":
_, self.name = hadoopFs.get_dir_basename(self.abspath)
else:
self.name = fileJson["pathSuffix"]
self.replication = fileJson["replication"]
self.permissions = fileJson["permission"]
if self.permissions == "0":
self.permissions = "000"
self.owner = fileJson["owner"]
self.group = fileJson["group"]
self.size = fileJson["length"]
self.numChildren = fileJson["childrenNum"]
if self.is_file or self.is_symlink and self.numChildren == 0:
self.numChildren = 1
if(float(fileJson["modificationTime"])/1000<86400):
self.modificationTime = datetime.datetime.fromtimestamp(86400)
else:
self.modificationTime = datetime.datetime.fromtimestamp(float(fileJson["modificationTime"])/1000)
if(float(fileJson["accessTime"])/1000<86400):
self.accessTime = datetime.datetime.fromtimestamp(86400)
else:
self.accessTime = datetime.datetime.fromtimestamp(float(fileJson["accessTime"])/1000)
class HadoopFileSystem(BaseFs.FileSystem):
def __init__(self, vcPath, simulateOnly = False, isVerbose=False, logger=None, user=None, host=None, port=None):
BaseFs.FileSystem.__init__(self, simulateOnly, isVerbose, logger)
config = Config.Config()
hdfsUser, hdfsHost, hdfsPort = config.getHadoopConfig(user, host, port)
self.hdfs = PyWebHdfsClient(host=hdfsHost, port=hdfsPort, user_name=hdfsUser)
self.vcPath = vcPath
def make_fd(self, path, isSrc, dstDirMustExist):
fd = None
try:
fd = HadoopFileDescriptor(self, path, isSrc, dstDirMustExist)
except pywebhdfs.errors.FileNotFound:
self.logger.info("DESC: does not exist: " + path)
raise Errors.FileNotFound("Path {0} does not exist".format(path))
except pywebhdfs.errors.Unauthorized as e:
self.logger.info("Unauthorized for path {0}: {1}".format(path, e))
raise Errors.Unauthorized("Unauthorized access to the path {0}: {1}".format(path, e))
except requests.exceptions.RequestException as e:
self.logger.info("ConnectionError for path {0}: {1}".format(path, e))
raise Errors.BadConnection("Connection error while looking for path: {0}, exc={1}".format(path, e))
except pywebhdfs.errors.PyWebHdfsException as e:
self.logger.info("PyWebHdfsException for path {0}: {1}".format(path, e))
raise Errors.FsException("An exception happened while looking for path: {0}, exc={1}".format(path, e))
return fd
def exists_file_dir(self, fd):
try:
return self.hdfs.exists_file_dir(fd.abspath)
except pywebhdfs.errors.Unauthorized as e:
self.logger.info("Unauthorized for path {0}: {1}".format(fd.abspath, e))
raise Errors.Unauthorized("Unauthorized access to the path {0}: {1}".format(fd.abspath, e))
except requests.exceptions.RequestException as e:
self.logger.info("ConnectionError for path {0}: {1}".format(fd.abspath, e))
raise Errors.BadConnection("Connection error during HDFS exists test: {0}, exc={1}".format(fd.abspath, e))
except pywebhdfs.errors.PyWebHdfsException as e:
self.logger.info("PyWebHdfsException for path {0}: {1}".format(fd.abspath, e))
raise Errors.FsException("An exception happened during HDFS exists test: {0}, exc={1}".format(fd.abspath, e))
def delete_file_dir(self, fd, recursive=False, force=False):
if self.simulateOnly:
print("SIMULATE -> remove file/dir: {0}, recursive={1}".format(fd.abspath, recursive))
else:
try:
if not recursive or force or \
query_yes_no(question="Are you sure you want to delete folder recursively?", default="no"):
status = self.hdfs.delete_file_dir(fd.abspath, recursive=recursive)
except pywebhdfs.errors.Unauthorized as e:
self.logger.info("Unauthorized for path {0}: {1}".format(fd.abspath, e))
raise Errors.Unauthorized("Unauthorized access to the path {0}: {1}".format(fd.abspath, e))
except requests.exceptions.RequestException as e:
self.logger.info("ConnectionError for path {0}: {1}".format(fd.abspath, e))
raise Errors.BadConnection("Connection error during HDFS delete directory: {0}, exc={1}".format(fd.abspath, e))
except pywebhdfs.errors.PyWebHdfsException as e:
self.logger.info("PyWebHdfsException for path {0}: {1}".format(fd.abspath, e))
raise Errors.FsException("An exception happened during HDFS delete directory: {0}, exc={1}".format(fd.abspath, e))
def list_dir(self, fd):
try:
status = self.hdfs.list_dir(fd.abspath)
except pywebhdfs.errors.Unauthorized as e:
self.logger.info("Unauthorized for path {0}: {1}".format(fd.abspath, e))
raise Errors.Unauthorized("Unauthorized access to the path {0}: {1}".format(fd.abspath, e))
except requests.exceptions.RequestException as e:
self.logger.info("ConnectionError for path {0}: {1}".format(fd.abspath, e))
raise Errors.BadConnection("Connection error while looking for path: {0}, exc={1}".format(fd.abspath, e))
except pywebhdfs.errors.PyWebHdfsException as e:
self.logger.info("PyWebHdfsException for path {0}: {1}".format(fd.abspath, e))
raise Errors.FsException("An exception happened while looking for path: {0}, exc={1}".format(fd.abspath, e))
currentDir = status["FileStatuses"]["FileStatus"]
for item in currentDir:
yield HadoopFileDescriptor(self, fd.abspath, isSrc=True, needsDstDirCheck=False, fileJson=item)
def make_dir(self, path):
if self.simulateOnly:
print("SIMULATE -> make dir: " + path)
else:
try:
self.hdfs.make_dir(path)
except pywebhdfs.errors.Unauthorized as e:
self.logger.info("Unauthorized for path {0}: {1}".format(path, e))
raise Errors.Unauthorized("Unauthorized access to the path {0}: {1}".format(path, e))
except requests.exceptions.RequestException as e:
self.logger.info("ConnectionError for path {0}: {1}".format(path, e))
raise Errors.BadConnection("Connection error during HDFS create directory: {0}, exc={1}".format(path, e))
except pywebhdfs.errors.PyWebHdfsException as e:
self.logger.info("PyWebHdfsException for path {0}: {1}".format(path, e))
raise Errors.FsException("An exception happened during HDFS create directory: {0}, exc={1}".format(path, e))
def open_file(self, fd, rwMode):
return fd
def close_file(self, fd):
pass
def touch_file(self, fd):
if self.simulateOnly:
print("SIMULATE -> touch file: " + fd.abspath)
else:
try:
self.hdfs.create_file(fd.abspath, 0, overwrite=True)
except pywebhdfs.errors.Unauthorized as e:
self.logger.info("Unauthorized for path {0}: {1}".format(fd.abspath, e))
raise Errors.Unauthorized("Unauthorized access to the path {0}: {1}".format(fd.abspath, e))
except requests.exceptions.RequestException as e:
self.logger.info("ConnectionError for path {0}: {1}".format(fd.abspath, e))
raise Errors.BadConnection("Connection error during HDFS create file: {0}, exc={1}".format(fd.abspath, e))
except pywebhdfs.errors.PyWebHdfsException as e:
self.logger.info("PyWebHdfsException for path {0}: {1}".format(fd.abspath, e))
raise Errors.FsException("An exception happened during HDFS create file: {0}, exc={1}".format(fd.abspath, e))
def truncate_file(self, fd, size):
if self.simulateOnly:
print("SIMULATE -> truncate file: {0}, size={1}".format(fd.abspath, size))
else:
try:
self.hdfs.truncate_file(fd.abspath, size)
except pywebhdfs.errors.Unauthorized as e:
self.logger.info("Unauthorized for path {0}: {1}".format(fd.abspath, e))
raise Errors.Unauthorized("Unauthorized access to the path {0}: {1}".format(fd.abspath, e))
except requests.exceptions.RequestException as e:
self.logger.info("ConnectionError for path {0}: {1}".format(fd.abspath, e))
raise Errors.BadConnection("Connection error during HDFS truncate file: {0}, exc={1}".format(fd.abspath, e))
except pywebhdfs.errors.PyWebHdfsException as e:
self.logger.info("PyWebHdfsException for path {0}: {1}".format(fd.abspath, e))
raise Errors.FsException("An exception happened during HDFS truncate file: {0}, exc={1}".format(fd.abspath, e))
def try_concat_files(self, fd, chunkFdList):
# Workaround for unordered concat bug in Hadoop 2.7.1 is to use one source at the time
# https://issues.apache.org/jira/browse/HDFS-8891
currIndex = 0
concatStep = 20
chunkedList = [chunkFdList[pos:pos + concatStep] for pos in range(0, len(chunkFdList), concatStep)]
for sourceChunk in chunkedList:
try:
self.concat_files(fd, sourceChunk)
currIndex += len(sourceChunk)
except Errors.FsException as e:
break
return currIndex
def concat_files(self, fd, chunkFdList):
strList = list()
for chunkFd in chunkFdList:
strList.append(chunkFd.abspath)
if self.simulateOnly:
print("SIMULATE -> concat file: {0}, sources={1}".format(fd.abspath, ",".join(strList)))
else:
try:
self.hdfs.concat_files(fd.abspath, strList)
except pywebhdfs.errors.Unauthorized as e:
self.logger.info("Unauthorized for path {0}: {1}".format(fd.abspath, e))
raise Errors.Unauthorized("Unauthorized access to the path {0}: {1}".format(fd.abspath, e))
except requests.exceptions.RequestException as e:
self.logger.info("ConnectionError for path {0}: {1}".format(fd.abspath, e))
raise Errors.BadConnection("Connection error during HDFS concat file: {0}, exc={1}".format(fd.abspath, e))
except pywebhdfs.errors.PyWebHdfsException as e:
self.logger.info("PyWebHdfsException for path {0}: {1}".format(fd.abspath, e))
raise Errors.FsException("An exception happened during HDFS concat file: {0}, exc={1}".format(fd.abspath, e))
def read_data(self, fd, offset, size):
if offset >= fd.size:
return ""
else:
try:
contents = self.hdfs.read_file(fd.abspath, offset=offset, length=size)
except pywebhdfs.errors.Unauthorized as e:
self.logger.info("Unauthorized for path {0}: {1}".format(fd.abspath, e))
raise Errors.Unauthorized("Unauthorized access to the path {0}: {1}".format(fd.abspath, e))
except requests.exceptions.RequestException as e:
self.logger.info("ConnectionError for path {0}: {1}".format(fd.abspath, e))
raise Errors.BadConnection("Connection error during HDFS read file: {0}, exc={1}".format(fd.abspath, e))
except pywebhdfs.errors.PyWebHdfsException as e:
self.logger.info("PyWebHdfsException for path {0}: {1}".format(fd.abspath, e))
raise Errors.FsException("An exception happened during HDFS read file: {0}, exc={1}".format(fd.abspath, e))
return contents
def append_data(self, fd, data):
if self.simulateOnly:
print("SIMULATE -> write file data: " + fd.abspath)
else:
try:
self.hdfs.append_file(fd.abspath, data)
except pywebhdfs.errors.Unauthorized as e:
self.logger.info("Unauthorized for path {0}: {1}".format(fd.abspath, e))
raise Errors.Unauthorized("Unauthorized access to the path {0}: {1}".format(fd.abspath, e))
except requests.exceptions.RequestException as e:
self.logger.info("ConnectionError for path {0}: {1}".format(fd.abspath, e))
raise Errors.BadConnection("Connection error during HDFS append file: {0}, exc={1}".format(fd.abspath, e))
except pywebhdfs.errors.PyWebHdfsException as e:
self.logger.info("PyWebHdfsException for path {0}: {1}".format(fd.abspath, e))
raise Errors.FsException("An exception happened during HDFS append file: {0}, exc={1}".format(fd.abspath, e))
def local_mv_file(self, src, dst):
if self.simulateOnly:
print("SIMULATE -> local move file: {0} -> {1} ".format(src.abspath, dst.abspath))
else:
try:
self.hdfs.rename_file_dir(src.abspath, dst.abspath)
except pywebhdfs.errors.Unauthorized as e:
self.logger.info("Unauthorized for path {0}: {1}".format(src.abspath, e))
raise Errors.Unauthorized("Unauthorized access to the path {0}: {1}".format(src.abspath, e))
except requests.exceptions.RequestException as e:
self.logger.info("ConnectionError for path {0}: {1}".format(src.abspath, e))
raise Errors.BadConnection("Connection error during HDFS rename file: {0}, exc={1}".format(src.abspath, e))
except pywebhdfs.errors.PyWebHdfsException as e:
self.logger.info("PyWebHdfsException for path {0}: {1}".format(src.abspath, e))
raise Errors.FsException("An exception happened during HDFS rename file: {0}, exc={1}".format(src.abspath, e))
def local_cp_file(self, src, dst):
# This is an open issue in Hadoop community: https://issues.apache.org/jira/browse/HDFS-3370
# Instead, we can do a symbolic link
if self.simulateOnly:
print("SIMULATE -> local copy file: {0} -> {1} ".format(src.abspath, dst.abspath))
else:
print("Copy within HDFS is not supported due to lack of Hadoop support")
print("Once symbolic links are enabled, this feature will be enabled")
sys.exit(1)
# self.hdfs.create_sym_link(src.abspath, dst.abspath, createParent=True)
def get_hdfs_file_dir_json(self, path):
try:
status = self.hdfs.get_file_dir_status(path)
return status["FileStatus"]
except pywebhdfs.errors.FileNotFound:
return None
def validate_hdfs_arg(self, arg):
if not arg.startswith(self.vcPath):
print("Error: You don't have permissions to the path: %s" % arg)
print("Your path must be rooted under: %s" % self.vcPath)
sys.exit(1)

198
aii-fs/fsimpl/LocalFs.py Normal file
Просмотреть файл

@ -0,0 +1,198 @@
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import os
import errno
import shutil
import datetime
if os.name != "nt":
import pwd
import grp
from fsimpl import BaseFs, Constants, Errors
class LocalFileDescriptor(BaseFs.FileDescriptor):
def __init__(self, localFs, path, isSrc, dstDirMustExist):
self.fs = localFs
# Canonicalize path to "/" on descriptor creation
path = localFs.canonicalize_path(path)
# Local file system
if not os.path.exists(path):
if isSrc:
raise Errors.FileNotFound("path: %s not found" % path)
else:
dstDir, dstName = localFs.get_dir_basename(path)
if dstDirMustExist and not os.path.exists(dstDir):
raise Errors.FileNotFound("destination directory: %s not found" % dstDir)
self.name = dstName
self.abspath = path
self.type = "FILE"
self.accessTime = datetime.datetime.fromtimestamp(86400)
self.modificationTime = datetime.datetime.fromtimestamp(86400)
self.exists = False
self.replication = "1"
self.permissions = "777"
self.owner = "root"
self.group = "supergroup"
self.numChildren = "0"
self.size = 0
return
if os.path.islink(path):
self.type = "SYMLINK"
elif os.path.isfile(path):
self.type = "FILE"
elif os.path.isdir(path):
self.type = "DIRECTORY"
self.exists = True
self.abspath = localFs.canonicalize_path(os.path.abspath(path))
_, self.name = localFs.get_dir_basename(self.abspath)
self.replication = "1"
self.numChildren = "1"
if os.name == "nt":
self.size = os.path.getsize(path)
self.modificationTime = datetime.datetime.fromtimestamp(os.path.getmtime(path))
self.accessTime = datetime.datetime.fromtimestamp(os.path.getatime(path))
self.permissions = "777"
self.owner = "-"
self.group = "-"
else:
statinfo = os.stat(self.abspath)
self.size = statinfo.st_size
self.modificationTime = datetime.datetime.fromtimestamp(statinfo.st_mtime)
self.accessTime = datetime.datetime.fromtimestamp(statinfo.st_atime)
self.permissions = str(oct(statinfo.st_mode))[-3:]
self.owner = pwd.getpwuid(statinfo.st_uid).pw_name
self.group = grp.getgrgid(statinfo.st_gid).gr_name
class LocalFileSystem(BaseFs.FileSystem):
def __init__(self, simulateOnly = False, isVerbose=False, logger=None):
BaseFs.FileSystem.__init__(self, simulateOnly, isVerbose, logger)
def make_fd(self, path, isSrc, dstDirMustExist):
return LocalFileDescriptor(self, path, isSrc, dstDirMustExist)
def exists_file_dir(self, fd):
return os.path.isdir(fd.abspath) or os.path.isdir(fd.abspath)
def delete_file_dir(self, fd, recursive=False, force=False):
if fd.is_file or fd.is_symlink:
if self.simulateOnly:
print("SIMULATE -> remove file: " + fd.abspath)
else:
os.remove(fd.abspath)
elif fd.is_directory:
if recursive:
if self.simulateOnly:
print("SIMULATE -> remove directory recursively: " + fd.abspath)
else:
shutil.rmtree(fd.abspath)
else:
if self.simulateOnly:
print("SIMULATE -> remove directory: " + fd.abspath)
else:
#os.rmdir(fd.abspath)
print("cannot remove: Is a directory")
def list_dir(self, fd):
try:
dirList = os.listdir(fd.abspath)
except:
print("Not enough permissions to enter: " + fd.abspath)
dirList = list()
for item in dirList:
yield LocalFileDescriptor(self, self.path_join(fd.abspath, item), isSrc=True, dstDirMustExist=False)
def make_dir(self, path):
try:
if self.simulateOnly:
print("SIMULATE -> making dir: " + path)
else:
os.makedirs(path)
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
def open_file(self, fd, rwMode="rb"):
if self.simulateOnly:
print("SIMULATE -> write file: " + fd.abspath)
else:
return open(fd.abspath, rwMode)
def close_file(self, fd):
fd.flush()
fd.close()
def touch_file(self, fd):
with open(fd.abspath, 'a'):
os.utime(fd.abspath, None)
def truncate_file(self, fd, size):
if self.simulateOnly:
print("SIMULATE -> truncate file({0}): {1}".format(size, fd.abspath))
else:
fd.truncate(size)
def try_concat_files(self, fd, chunkFdList):
self.concat_files(fd, chunkFdList)
return len(chunkFdList)
def concat_files(self, fd, chunkFdList):
with open(fd.abspath, 'wb') as dst:
for chunkFd in chunkFdList:
with open(chunkFd.abspath, 'rb') as chunkFile:
shutil.copyfileobj(chunkFile, dst, Constants.DEFAULT_COPY_CHUNK_SIZE)
for chunkFd in chunkFdList:
self.delete_file_dir(chunkFd, False)
def read_data(self, fd, offset, size):
currentPos = fd.tell()
if currentPos != offset:
self.logger.info(
"Current position {0} in the file does not match the offset {1} provided, seek will be used".format(
currentPos, offset))
fd.seek(offset)
return fd.read(size)
def append_data(self, fd, data):
if self.simulateOnly:
print("SIMULATE -> write data: " + fd.abspath)
else:
fd.write(data)
def local_mv_file(self, src, dst):
if self.simulateOnly:
print("SIMULATE -> local move file: {0} -> {1} ".format(src.abspath, dst.abspath))
else:
shutil.move(src.abspath, dst.abspath)
def local_cp_file(self, src, dst):
if self.simulateOnly:
print("SIMULATE -> local copy file: {0} -> {1} ".format(src.abspath, dst.abspath))
else:
shutil.copyfile(src.abspath, dst.abspath)

63
aii-fs/fsimpl/Retryer.py Normal file
Просмотреть файл

@ -0,0 +1,63 @@
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import time
import functools
def doubling_backoff(start):
if start == 0:
start = 1
yield start
while True:
start *=2
yield start
def fixed_interval_delay(start):
while True:
yield start
class RetryAndCatch(object):
def __init__(self, exceptions_to_catch, max_retries = 10, delayinsecond=5, backoff=fixed_interval_delay, logger=None):
self.exceptions = exceptions_to_catch
self.tries = max_retries
self.logger = logger
self.delay = delayinsecond
self.backoff = backoff
self.max_retries = max_retries
def __call__(self, f):
@functools.wraps(f)
def retrier(*args, **kwargs):
backoff_gen = self.backoff(self.delay)
try:
while self.tries > 1:
try:
return f(*args, **kwargs)
except self.exceptions as e:
message = "Exception {} caught, retrying {} more times.".format(e.message, self.tries)
if self.logger :
self.logger.error(message)
time.sleep(self.delay)
self.delay = next(backoff_gen)
self.tries -= 1
return f(*args, **kwargs)
finally:
self.tries = self.max_retries
return retrier

16
aii-fs/fsimpl/__init__.py Normal file
Просмотреть файл

@ -0,0 +1,16 @@
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

19
aii-fs/requirements.txt Normal file
Просмотреть файл

@ -0,0 +1,19 @@
altgraph==0.14
certifi==2017.11.5
chardet==3.0.4
click==6.7
configparser==3.5.0
Flask==0.12.2
future==0.16.0
idna==2.6
itsdangerous==0.24
Jinja2==2.9.6
macholib==1.8
MarkupSafe==1.0
pefile==2017.11.5
PyInstaller==3.3
pywebhdfs==0.4.1
requests==2.18.4
six==1.11.0
urllib3==1.22
Werkzeug==0.12.2

Просмотреть файл

@ -0,0 +1,78 @@
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.'
# tag: aii.build.base:hadoop2.7.2-cuda8.0-cudnn6-devel-ubuntu16.04
#
# Base image to build for AII.
# Other images depend on it, so build it like:
#
# docker build -f Dockerfile.build.base -t aii.build.base:hadoop2.7.2-cuda8.0-cudnn6-devel-ubuntu16.04 .
# Tag: nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04
# Label: com.nvidia.build.id: 32579957
# Label: com.nvidia.build.ref: 88bc4e08cac2668ec821eead444e5ede2cafcf25
# Label: com.nvidia.cuda.version: 8.0.61
# Label: com.nvidia.cudnn.version: 6.0.21
# Label: com.nvidia.volumes.needed: nvidia_driver
# Label: maintainer: NVIDIA CORPORATION <cudatools@nvidia.com>
# Ubuntu 16.04
FROM nvidia/cuda@sha256:0f107eb29b1254ba4fb3a8f0c35e74dd3f2e32fd0cb962280450d1f5359f5ee3
ENV HADOOP_VERSION=2.7.2
LABEL HADOOP_VERSION=2.7.2
RUN DEBIAN_FRONTEND=noninteractive && \
apt-get -y update && \
apt-get -y install python \
python-pip \
python-dev \
python3 \
python3-pip \
python3-dev \
python-yaml \
python-six \
build-essential \
wget \
curl \
unzip \
automake \
openjdk-8-jdk \
openssh-server \
openssh-client \
libcupti-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN wget -qO- http://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz | \
tar xz -C /usr/local && \
mv /usr/local/hadoop-${HADOOP_VERSION} /usr/local/hadoop
ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \
HADOOP_INSTALL=/usr/local/hadoop \
HADOOP_PREFIX=${HADOOP_INSTALL} \
HADOOP_BIN_DIR=${HADOOP_INSTALL}/bin \
HADOOP_SBIN_DIR=${HADOOP_INSTALL}/sbin \
HADOOP_HDFS_HOME=${HADOOP_INSTALL} \
HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_INSTALL}/lib/native \
HADOOP_OPTS="-Djava.library.path=${HADOOP_INSTALL}/lib/native" \
NVIDIA_VISIBLE_DEVICES=all
ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${HADOOP_BIN_DIR}:${HADOOP_SBIN_DIR} \
LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:${JAVA_HOME}/jre/lib/amd64/server \
CLASSPATH="$(hadoop classpath --glob)"

Просмотреть файл

@ -0,0 +1,31 @@
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# tag: aii.run.tensorflow
#
# Before building this image you need to build the base image first:
#
# docker build -f Dockerfile.build.base -t aii.build.base:hadoop2.7.2-cuda8.0-cudnn6-devel-ubuntu16.04 .
FROM aii.build.base:hadoop2.7.2-cuda8.0-cudnn6-devel-ubuntu16.04
# For how to run TensorFlow on Hadoop,
# please refer to https://www.tensorflow.org/deploy/hadoop
RUN pip install tensorflow-gpu && \
pip3 install tensorflow-gpu

162
examples/README.md Normal file
Просмотреть файл

@ -0,0 +1,162 @@
<!--
Copyright (c) Microsoft Corporation
All rights reserved.
MIT License
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-->
# Introduction
The system supports training or evaluation with CNTK, TensorFlow, and other custom docker images for deep learning. Users need to prepare a config file and submit it to run a job. This guide will introduce how to prepare a config file and how to run a deep learning job on the system.
# Prerequisites
This guide assumes users have already installed and configured the system properly.
# How to Run a Deep Learning Job
## Config File
Users need to prepare a json config file to describe the details of jobs, here is its format:
```json
{
"jobName": String,
"image": String,
"dataDir": String,
"outputDir": String,
"codeDir": String,
"taskRoles": [
{
"name": String,
"taskNumber": Integer,
"cpuNumber": Integer,
"memoryMB": Integer,
"gpuNumber": Integer,
"command": String
}
],
"killAllOnCompletedTaskNumber": Integer,
"retryCount": Integer
}
```
Here's all the parameters for job config file:
| Field Name | Schema | Description |
| :----------------------------- | :------------------------- | :--------------------------------------- |
| `jobName` | String, required | Name for the job, need to be unique |
| `image` | String, required | URL pointing to the docker image for all tasks in the job |
| `dataDir` | String, optional, HDFS URI | Data directory existing on HDFS |
| `outputDir` | String, optional, HDFS URI | Output directory existing on HDFS |
| `codeDir` | String, required, HDFS URI | Code directory existing on HDFS |
| `taskRoles` | List, required | List of `taskRole`, one task role at least |
| `taskRole.name` | String, required | Name for the task role, need to be unique with other roles |
| `taskRole.taskNumber` | Integer, required | Number for the task role, no less than 1 |
| `taskRole.cpuNumber` | Integer, required | CPU number for one task in the task role, no less than 1 |
| `taskRole.memoryMB` | Integer, required | Memory for one task in the task role, no less than 100 |
| `taskRole.gpuNumber` | Integer, required | GPU number for one task in the task role, no less than 0 |
| `taskRole.command` | String, required | Executable command for tasks in the task role, can not be empty |
| `killAllOnCompletedTaskNumber` | Integer, optional | Number of completed tasks to kill all tasks, no less than 0 |
| `retryCount` | Integer, optional | Job retry count, no less than 0 |
## Runtime Environment
All user jobs will run separately in docker containers using the docker image specified in config file. For a certain job, each task will run in one docker container. The allocation of docker containers are influenced by resources on each node, so all containers in one job may on one node or different nodes. It's easy for one task in a job running without communication. But for distributed deep learning jobs, some tasks must communicate with each other so they have to know other tasks' information. We export some environment variables in docker container so that users can access to runtime environment in their code.
Here's all the `AII` prefixed environment variables in runtime docker containers:
| Environment Variable Name | Description |
| :--------------------------------- | :--------------------------------------- |
| AII_JOB_NAME | `jobName` in config file |
| AII_DATA_DIR | `dataDir` in config file |
| AII_OUTPUT_DIR | `outputDir`in config file |
| AII_CODE_DIR | `codeDir` in config file |
| AII_TASK_ROLE_NAME | `taskRole.name` of current task role |
| AII_TASK_ROLE_NUM | `taskRole.number` of current task role |
| AII_TASK_CPU_NUM | `taskRole.cpuNumber` of current task |
| AII_TASK_MEM_MB | `taskRole.memoryMB` of current task |
| AII_TASK_GPU_NUM | `taskRole.gpuNumber` of current task |
| AII_TASK_ROLE_INDEX | Index of current task in the task role, starting from 0 |
| AII_TASK_ROLE_NO | Index of current task role in config file, starting from 0 |
| AII_TASKS_NUM | Total tasks' number in config file |
| AII_TASK_ROLES_NUM | Total task roles' number in config file |
| AII_KILL_ALL_ON_COMPLETED_TASK_NUM | `killAllOnCompletedTaskNumber` in config file |
| AII_CURRENT_CONTAINER_IP | Allocated ip for current docker container |
| AII_CURRENT_CONTAINER_PORT | Allocated port for current docker container |
| AII_TASK_ROLE\_`$i`\_HOST_LIST | Host list for `AII_TASK_ROLE_NO == $i`, comma separated `ip:port` string |
## Deep Learning Job Example
Users can use the json config file to run deep learning jobs in docker environment, we use a distributed tensorflow job as an example:
```json
{
"jobName": "tensorflow-distributed-example",
// customized tensorflow docker image with hdfs support
"image": "aii.run.tensorflow",
// this example uses cifar10 dataset, which is available from
// http://www.cs.toronto.edu/~kriz/cifar.html
"dataDir": "hdfs://path/to/data",
"outputDir": "hdfs://path/to/output",
// this example uses code from tensorflow benchmark https://git.io/vF4wT
"codeDir": "hdfs://path/to/code",
"taskRoles": [
{
"name": "ps_server",
// use 2 ps servers in this job
"taskNumber": 2,
"cpuNumber": 2,
"memoryMB": 8192,
"gpuNumber": 0,
// run tf_cnn_benchmarks.py in code directory
// please refer to https://www.tensorflow.org/performance/performance_models#executing_the_script for arguments' detail
// if there's no `scipy` in the docker image, need to install it first
"command": "pip install scipy && python tf_cnn_benchmarks.py --local_parameter_device=cpu --num_gpus=4 --batch_size=32 --model=resnet20 --variable_update=parameter_server --data_dir=$AII_DATA_DIR --data_name=cifar10 --train_dir=$AII_OUTPUT_DIR --ps_hosts=$AII_TASK_ROLE_0_HOST_LIST --worker_hosts=$AII_TASK_ROLE_1_HOST_LIST --job_name=ps --task_index=$AII_TASK_ROLE_INDEX"
},
{
"name": "worker",
// use 2 workers in this job
"taskNumber": 2,
"cpuNumber": 2,
"memoryMB": 16384,
"gpuNumber": 4,
"command": "pip install scipy && python tf_cnn_benchmarks.py --local_parameter_device=cpu --num_gpus=4 --batch_size=32 --model=resnet20 --variable_update=parameter_server --data_dir=$AII_DATA_DIR --data_name=cifar10 --train_dir=$AII_OUTPUT_DIR --ps_hosts=$AII_TASK_ROLE_0_HOST_LIST --worker_hosts=$AII_TASK_ROLE_1_HOST_LIST --job_name=worker --task_index=$AII_TASK_ROLE_INDEX"
}
],
// kill all 4 tasks when 2 worker tasks completed
"killAllOnCompletedTaskNumber": 2,
"retryCount": 0
}
```
## Job Submission
1. Put the code and data on HDFS
Use `aii-fs` to upload your code and data to HDFS on the system, for example
```sh
aii-fs -cp -r /local/data/dir hdfs://path/to/data
```
please refer to [aii-fs/README.md](aii-fs/README.md) for more details.
2. Prepare a job config file
Prepare your deep learning job [config file](## Config File).
3. Submit the job through web portal
Open web portal in a browser, click "Submit Job" and upload your config file.

Просмотреть файл

@ -0,0 +1,32 @@
{
"jobName": "tensorflow-distributed-example",
"image": "aii.run.tensorflow",
// download cifar10 dataset from http://www.cs.toronto.edu/~kriz/cifar.html and upload to hdfs
"dataDir": "hdfs://path/to/data",
// make a new dir for output on hdfs
"outputDir": "hdfs://path/to/output",
// download code from tensorflow benchmark https://git.io/vF4wT and upload to hdfs
"codeDir": "hdfs://path/to/code",
"taskRoles": [
{
"name": "ps_server",
"taskNumber": 2,
"cpuNumber": 2,
"memoryMB": 8192,
"gpuNumber": 0,
"command": "pip install scipy && python tf_cnn_benchmarks.py --local_parameter_device=cpu --num_gpus=4 --batch_size=32 --model=resnet20 --variable_update=parameter_server --data_dir=$AII_DATA_DIR --data_name=cifar10 --train_dir=$AII_OUTPUT_DIR --ps_hosts=$AII_TASK_ROLE_0_HOST_LIST --worker_hosts=$AII_TASK_ROLE_1_HOST_LIST --job_name=ps --task_index=$AII_TASK_ROLE_INDEX"
},
{
"name": "worker",
"taskNumber": 2,
"cpuNumber": 2,
"memoryMB": 16384,
"gpuNumber": 4,
"command": "pip install scipy && python tf_cnn_benchmarks.py --local_parameter_device=cpu --num_gpus=4 --batch_size=32 --model=resnet20 --variable_update=parameter_server --data_dir=$AII_DATA_DIR --data_name=cifar10 --train_dir=$AII_OUTPUT_DIR --ps_hosts=$AII_TASK_ROLE_0_HOST_LIST --worker_hosts=$AII_TASK_ROLE_1_HOST_LIST --job_name=worker --task_index=$AII_TASK_ROLE_INDEX"
}
],
"killAllOnCompletedTaskNumber": 2,
"retryCount": 0
}

Просмотреть файл

@ -0,0 +1,82 @@
<!--
Copyright (c) Microsoft Corporation
All rights reserved.
MIT License
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-->
# Microsoft FrameworkLauncher
FrameworkLauncher (or Launcher for short) is built to enable running Large-Scale Long-Running Services inside [YARN](http://hadoop.apache.org/) Containers without making changes to the Services themselves. It also supports Batch Jobs, such as TensorFlow, CNTK, etc.
## Features
* **High Availability**
* All Launcher and Hadoop components are Recoverable and Work Preserving. So, User Services is by designed No Down Time, i.e. always uninterrupted when our components shutdown, crash, upgrade, or even any kinds of outage for a long time.
* Launcher can tolerate many unexpected errors and has well defined Failure Model, such as dependent components shutdown, machine error, network error, configuration error, environment error, corrupted internal data, etc.
* User Services can be ensured to Retry on Transient Failures, Migrate to another Machine per Users Request, etc.
* **High Usability**
* No User code changes needed to run the existing executable inside Container. User only need to setup the FrameworkDescription in Json format.
* RestAPI is supported.
* Work Preserving FrameworkDescription Update, such as change TaskNumber, add TaskRole on the fly.
* Migrate running Task per Users Request
* Override default ApplicationProgress per Users Request
* **Services Requirements**
* Versioned Service Deployment
* ServiceDiscovery
* AntiaffinityAllocation: Services running on different Machines
* **Batch Jobs Requirements**
* GPU as a Resource
* GangAllocation: Start Services together
* KillAllOnAnyCompleted and KillAllOnAnyServiceCompleted
* Framework Tree Management: DeleteOnParentDeleted
* DataPartition
## Build and Start
### Dependencies
Compile-time dependencies:
* [Apache Maven](http://maven.apache.org/)
* JDK 1.8+
Run-time dependencies:
* Hadoop 2.7.2.ai is required to support GPU as a Resource, if you do not need it, any Hadoop 2.7+ is fine.
* Apache Zookeeper
### Build Launcher Distribution
*Launcher Distribution is built into folder .\dist.*
Windows cmd line:
.\build.bat
GNU/Linux cmd line:
./build.sh
### Start Launcher Service
*Launcher Distribution is required before Start Launcher Service.*
Windows cmd line:
.\dist\start.bat
GNU/Linux cmd line:
./dist/start.sh
## User Manual
See [User Manual](doc/USERMANUAL.md) to learn how to use Launcher Service to Launch Framework.

Просмотреть файл

@ -0,0 +1,30 @@
@echo off
@rem Copyright (c) Microsoft Corporation
@rem All rights reserved.
@rem
@rem MIT License
@rem
@rem Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
@rem documentation files (the "Software"), to deal in the Software without restriction, including without limitation
@rem the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
@rem to permit persons to whom the Software is furnished to do so, subject to the following conditions:
@rem The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
@rem
@rem THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
@rem BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@rem NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
@rem DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
@rem OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
setlocal enableextensions enabledelayedexpansion
pushd %~dp0
if not defined LAUNCHER_LOG_DIR (
set LAUNCHER_LOG_DIR=.\logs
)
set PATH=%PATH%;%HADOOP_HOME%\bin;%JAVA_HOME%\bin
for /f %%i in ('hadoop classpath') do set HADOOP_CLASSPATH=%%i
java -DLOG_DIRS=%LAUNCHER_LOG_DIR% -cp *;%CLASSPATH%;%HADOOP_CLASSPATH% com.microsoft.frameworklauncher.service.Bootstrap
popd

Просмотреть файл

@ -0,0 +1,28 @@
#!/bin/bash
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
pushd "${0%/*}"
if [ "$LAUNCHER_LOG_DIR" = "" ]; then
export LAUNCHER_LOG_DIR=./logs
fi
export PATH=$PATH:$HADOOP_HOME/bin:$JAVA_HOME/bin
export HADOOP_CLASSPATH=$(hadoop classpath)
java -DLOG_DIRS=$LAUNCHER_LOG_DIR -cp *:$CLASSPATH:$HADOOP_CLASSPATH com.microsoft.frameworklauncher.service.Bootstrap
popd

Просмотреть файл

@ -0,0 +1,54 @@
@echo off
@rem Copyright (c) Microsoft Corporation
@rem All rights reserved.
@rem
@rem MIT License
@rem
@rem Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
@rem documentation files (the "Software"), to deal in the Software without restriction, including without limitation
@rem the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
@rem to permit persons to whom the Software is furnished to do so, subject to the following conditions:
@rem The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
@rem
@rem THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
@rem BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@rem NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
@rem DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
@rem OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
setlocal enableextensions enabledelayedexpansion
pushd %~dp0
echo Start to make Binary Distributions into directory: dist
if exist dist (
call :run rmdir /s /q dist
)
call :run mkdir dist
call :run mvn clean install
call :run copy /b /y target\*-with-dependencies.jar dist\
call :run copy /b /y bin\* dist\
call :run copy /b /y conf\* dist\
goto :stop
:run
echo cmd /c %*
cmd /c %*
set exitcode=%errorlevel%
if %exitcode% neq 0 (
goto :stop
) else (
exit /b %exitcode%
)
:stop
if %exitcode% neq 0 (
echo Failed to make Binary Distributions with exitcode %exitcode%
) else (
echo Succeed to make Binary Distributions with exitcode %exitcode%
)
popd
exit %exitcode%
endlocal

Просмотреть файл

@ -0,0 +1,21 @@
@echo off
@rem Copyright (c) Microsoft Corporation
@rem All rights reserved.
@rem
@rem MIT License
@rem
@rem Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
@rem documentation files (the "Software"), to deal in the Software without restriction, including without limitation
@rem the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
@rem to permit persons to whom the Software is furnished to do so, subject to the following conditions:
@rem The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
@rem
@rem THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
@rem BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@rem NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
@rem DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
@rem OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@rem Start a new cmd.exe to avoid exit the caller cmd.exe.
cmd /c "%~dp0build-internal.bat" %*

Просмотреть файл

@ -0,0 +1,49 @@
#!/bin/bash
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
stop() {
exitcode=$1
if [ $exitcode != 0 ]; then
echo Failed to make Binary Distributions with exitcode $exitcode
else
echo Succeed to make Binary Distributions with exitcode $exitcode
fi
popd
exit $exitcode
}
run() {
echo "\$ ${@}"
"${@}"
exitcode=$?
if [ $exitcode != 0 ]; then
stop $exitcode
fi
}
pushd "${0%/*}"
echo Start to make Binary Distributions into directory: dist
run rm -rf dist
run mkdir dist
run mvn clean install
run cp -r target/*-with-dependencies.jar dist
run cp -r bin/* dist
run cp -r conf/* dist
stop 0

Просмотреть файл

@ -0,0 +1,28 @@
!!com.microsoft.frameworklauncher.common.model.LauncherConfiguration
# Common Setup
zkConnectString: 127.0.0.1:2181
zkRootDir: /Launcher
hdfsRootDir: /Launcher
# Service Setup
serviceRMResyncIntervalSec: 60
serviceRequestPullIntervalSec: 5
# Application Setup
applicationRetrieveDiagnosticsIntervalSec: 60
applicationRetrieveDiagnosticsMaxRetryCount: 15
applicationTransientConflictMaxDelaySec: 3600
applicationTransientConflictMinDelaySec: 600
# Framework Setup
frameworkCompletedRetainSec: 43200
# ApplicationMaster Setup
amVersion: 0
amRmResyncFrequency: 6
amRequestPullIntervalSec: 60
amStatusPushIntervalSec: 60
# WebServer Setup
webServerAddress: http://localhost:9086
webServerStatusPullIntervalSec: 5

Просмотреть файл

@ -0,0 +1,490 @@
<!--
Copyright (c) Microsoft Corporation
All rights reserved.
MIT License
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-->
# Microsoft FrameworkLauncher User Manual
## <a name="Concepts">Concepts</a>
* Different **TaskRoles** compose a **Framework**
* Same **Tasks** compose a **TaskRole**
* A **User Service** executed by all **Tasks** in a **TaskRole**
## <a name="QuickStart">Quick Start</a>
1. **Prepare Framework**
1. **Upload Framework Executable to HDFS**
Upload the [Example Framework Executable](./example/ExampleFramework.sh) to HDFS:
hadoop fs -mkdir -p /ExampleFramework/
hadoop fs -put -f ExampleFramework.sh /ExampleFramework/
2. **Write Framework Description File**
Just use the [Example Framework Description File](./example/ExampleFramework.json).
Example Framework Description Explanation:
• The Example Framework with Version 1 contains 1 TaskRole named LRSMaster.
• LRSMaster contains 2 Tasks and they will be executed for LRSMaster's TaskService.
• LRSMaster's TaskService with Version 1 is defined by its EntryPoint, SourceLocations and Resource.
• The EntryPoint and SourceLocations defines the Service's corresponding Executable which needs to be ran inside Containers.
• The Resource defines the Container Resource Guarantee / Limitation.
2. **Launch Framework**
*Launcher Service need to be started before Launch Framework. See [README](../README.md) to Start Launcher Service.*
*See [Root URI](#RootURI) to get {LauncherAddress}*
HTTP PUT the Framework Description File as json to:
http://{LauncherAddress}/v1/Frameworks/ExampleFramework
For example, with [curl](https://curl.haxx.se/), you can execute below cmd line :
curl -X PUT http://{LauncherAddress}/v1/Frameworks/ExampleFramework -d @ExampleFramework.json --header "Content-Type: application/json"
3. **Monitor Framework**
*Below information may not be updated immediately, since all Launcher operations are asynchronous.*
Check the Requested FrameworkNames by:
http://{LauncherAddress}/v1/Frameworks
Check ExampleFramework Request by:
http://{LauncherAddress}/v1/Frameworks/ExampleFramework/FrameworkRequest
Check ExampleFramework Status by:
http://{LauncherAddress}/v1/Frameworks/ExampleFramework
## <a name="Architecture">Architecture</a>
<p style="text-align: left;">
<img src="img/Architecture.png" title="Architecture" alt="Architecture" />
</p>
**LauncherInterfaces**:
* RestAPI
* Submit Framework Description
**LauncherService**:
* One Central Service
* Manages all Frameworks for the whole Cluster.
**LauncherAM**:
* Per-Framework Service
* Manage Tasks for a single Framework by customized feature requirement
## <a name="Pipeline">Pipeline</a>
<p style="text-align: left;">
<img src="img/Pipeline.png" title="Pipeline" alt="Pipeline" />
</p>
## <a name="RestAPI">RestAPI</a>
### <a name="Guarantees">Guarantees</a>
* All APIs are IDEMPOTENT and STATELESS, to allowed trivial Work Preserving Client Restart.
In other words, User do not need to worry about call one API multiple times by different Client instance (such as Client Restart, etc).
* All APIs are DISTRIBUTED THREAD SAFE, to allow multiple distributed Client instances to access.
In other words, User do not need to worry about call them at the same time in Multiple Threads/Processes/Nodes.
### <a name="RootURI">Root URI (LauncherAddress)</a>
Configure it as webServerAddress inside [LauncherConfiguration File](../conf/frameworklauncher.yml).
### <a name="BestPractices">Best Practices</a>
* LauncherService can only handle a finite, limited request volume. User should try to minimize its overall request frequency and payload, so that the LauncherService is not overloaded. To achieve this, User can centralize requests, space out requests, filter respond and so on.
* Completed Frameworks will ONLY be retained in recent FrameworkCompletedRetainSec, in case Client miss to delete the Framework after FrameworkCompleted. One exclusion is the Framework Launched by DataDeployment, it will be retained until the corresponding FrameworkDescriptionFile deleted in the DataDeployment. To avoid missing the CompletedFrameworkStatus, the polling interval seconds of Client should be less than FrameworkCompletedRetainSec. Check the FrameworkCompletedRetainSec by [GET LauncherStatus](#GET_LauncherStatus).
### <a name="Notes">Notes</a>
* Refer [Data Model](#DataModel) for the DataStructure of HTTP Request and Response.
### <a name="APIDetails">API Details</a>
#### <a name="PUT_Framework">PUT Framework</a>
**Request**
PUT /v1/Frameworks/{FrameworkName}
Type: application/json
Body: [FrameworkDescriptor](../src/main/java/com/microsoft/frameworklauncher/common/model/FrameworkDescriptor.java)
**Description**
Add a NOT Requested Framework or Update a Requested Framework.
1. Add a NOT Requested Framework: Framework will be Added and Launched (Now it is Requested).
2. Update a Requested Framework:
1. If FrameworkVersion unchanged:
1. Framework will be Updated to the FrameworkDescription on the fly (i.e. Work Preserving).
2. To Update Framework on the fly, it is better to use the corresponding PartialUpdate (such as [PUT TaskNumber](#PUT_TaskNumber)) than PUT the entire FrameworkDescription here. Because, partially update the FrameworkDescription can avoid the Race Condition (or Transaction Conflict) between two PUT Requests. Besides, the behaviour is undefined when change parameters in FrameworkDescription which is not supported by PartialUpdate.
2. Else:
1. Framework will be NonRolling Upgraded to new FrameworkVersion. (i.e. Not Work Preserving).
2. NonRolling Upgrade can be used to change parameters in FrameworkDescription which is not supported by PartialUpdate (such as Framework Queue).
3. NonRolling Upgrade should be triggered by change FrameworkVersion, instead of DELETE then PUT with the same FrameworkVersion.
3. User is responsible to specify FrameworkName explicitly.
4. After Accepted Response, FrameworkStatus may not be Initialized or Updated immediately. And only after FrameworkStatus is Initialized, the Framework is guaranteed to be scheduled.
**Response**
| HttpStatusCode | Body | Description |
|:---- |:---- |:---- |
| Accepted(202) | NULL | The Request has been recorded for backend to process, not that the processing of the Request has been completed. |
| BadRequest(400) | ExceptionMessage | The Request validation failed. So, Client is expected to not retry for this nontransient failure and then correct the Request. |
| TooManyRequests(429) | ExceptionMessage | The Request is rejected due to the New Total TaskNumber will exceed the Max Total TaskNumber if backend accepted it. So, the Client is expected to retry for this transient failure or migrate the whole Framework to another Cluster. |
| ServiceUnavailable(503) | ExceptionMessage | The Request cannot be recorded for backend to process. In our system, this only happens when target Cluster's Zookeeper is down for a long time. So, the Client is expected to retry for this transient failure or migrate the whole Framework to another Cluster. |
#### <a name="DELETE_Framework">DELETE Framework</a>
**Request**
DELETE /v1/Frameworks/{FrameworkName}
**Description**
Delete a Framework, no matter it is Requested or not.
Notes:
1. Framework will be Stopped and Deleted (Now it is NOT Requested).
2. After Accepted Response, FrameworkStatus may not be Deleted immediately. And only after [FrameworkStatus](#GET_FrameworkStatus) is Deleted, the Framework is guaranteed to be Stopped completely.
3. Only recently completed Frameworks will be kept, if Client miss to DELETE the Framework after FrameworkCompleted. One exclusion is the Framework Launched by DataDeployment, it will be kept until the corresponding FrameworkDescriptionFile deleted in the DataDeployment.
**Response**
| HttpStatusCode | Body | Description |
|:---- |:---- |:---- |
| Accepted(202) | NULL | Same as [PUT Framework](#PUT_Framework) |
| ServiceUnavailable(503) | ExceptionMessage | Same as [PUT Framework](#PUT_Framework) |
#### <a name="GET_FrameworkStatus">GET FrameworkStatus</a>
**Request**
GET /v1/Frameworks/{FrameworkName}/FrameworkStatus
**Description**
Get the FrameworkStatus of a Requested Framework
Recipes:
1. User Level RetryPolicy (Based on FrameworkState, ApplicationExitCode, ApplicationDiagnostic, applicationExitType)
2. Directly Monitor Underlay YARN Application by YARN CLI or RestAPI (Based on ApplicationId or ApplicationTrackingUrl)
**Response**
| HttpStatusCode | Body | Description |
|:---- |:---- |:---- |
| OK(200) | [FrameworkStatus](../src/main/java/com/microsoft/frameworklauncher/common/model/FrameworkStatus.java) | |
| NotFound(404) | ExceptionMessage | Specified Framework's Status does not exist. This may due to specified Framework is not Requested or the Framework Requested but the Status has not been initialized by backend (See [PUT Framework](#PUT_Framework)). So, the Client is expected to retry for the latter case. |
| ServiceUnavailable(503) | ExceptionMessage | Same as [PUT Framework](#PUT_Framework) |
#### <a name="GET_TaskRoleStatus">GET TaskRoleStatus</a>
**Request**
GET /v1/Frameworks/{FrameworkName}/TaskRoles/{TaskRoleName}/TaskRoleStatus
**Description**
Get the TaskRoleStatus of a Requested Framework
**Response**
| HttpStatusCode | Body | Description |
|:---- |:---- |:---- |
| OK(200) | [TaskRoleStatus](../src/main/java/com/microsoft/frameworklauncher/common/model/TaskRoleStatus.java) | |
| NotFound(404) | ExceptionMessage | Same as [GET FrameworkStatus](#GET_FrameworkStatus) |
| ServiceUnavailable(503) | ExceptionMessage | Same as [PUT Framework](#PUT_Framework) |
#### <a name="GET_TaskStatuses">GET TaskStatuses</a>
**Request**
GET /v1/Frameworks/{FrameworkName}/TaskRoles/{TaskRoleName}/TaskStatuses
**Description**
Get the TaskStatuses of a Requested Framework
Recipes:
1. ServiceDecovery (Based on TaskRoleName, ContainerHostName, ContainerIPAddress, ServiceId)
2. TaskLogForwarding (Based on ContainerLogHttpAddress)
3. MasterSlave and MigrateTask (Based on ContainerId)
4. DataPartition (Based on TaskIndex) (Note TaskIndex will not change after Task Restart, Migrated or Upgraded)
**Response**
| HttpStatusCode | Body | Description |
|:---- |:---- |:---- |
| OK(200) | [TaskStatuses](../src/main/java/com/microsoft/frameworklauncher/common/model/TaskStatuses.java) | |
| NotFound(404) | ExceptionMessage | Same as [GET FrameworkStatus](#GET_FrameworkStatus) |
| ServiceUnavailable(503) | ExceptionMessage | Same as [PUT Framework](#PUT_Framework) |
#### <a name="PUT_TaskNumber">PUT TaskNumber</a>
**Request**
PUT /v1/Frameworks/{FrameworkName}/TaskRoles/{TaskRoleName}/TaskNumber
Type: application/json
Body: [UpdateTaskNumberRequest](../src/main/java/com/microsoft/frameworklauncher/common/model/UpdateTaskNumberRequest.java)
**Description**
Update TaskNumber for a Requested Framework
**Response**
| HttpStatusCode | Body | Description |
|:---- |:---- |:---- |
| Accepted(202) | NULL | Same as [PUT Framework](#PUT_Framework) |
| BadRequest(400) | ExceptionMessage | Same as [PUT Framework](#PUT_Framework) |
| NotFound(404) | ExceptionMessage | Specified Framework does not exist. So, Client is expected to not retry for this non-transient failure and then PUT the corresponding Framework first. |
| TooManyRequests(429) | ExceptionMessage | Same as [PUT Framework](#PUT_Framework) |
| ServiceUnavailable(503) | ExceptionMessage | Same as [PUT Framework](#PUT_Framework) |
#### <a name="PUT_MigrateTask">PUT MigrateTask</a>
**Request**
PUT /v1/Frameworks/{FrameworkName}/MigrateTasks/{ContainerId}
Type: application/json
Body: [MigrateTaskRequest](../src/main/java/com/microsoft/frameworklauncher/common/model/MigrateTaskRequest.java)
**Description**
Migrate a Task from current Container to another Container for a Requested Framework
And new Container and old Container will satisfy the AntiAffinityLevel constraint.
Notes:
1. User is responsible for implement Health/Perf Measurement of the Service based on Monitoring TaskStatuses or self-contained communication. And if found some Health/Perf degradations, User can migrate it by calling this API with corresponding ContainerId as parameter.
2. Currently, only support Any AntiAffinityLevel.
**Response**
| HttpStatusCode | Body | Description |
|:---- |:---- |:---- |
| Accepted(202) | NULL | Same as [PUT Framework](#PUT_Framework) |
| BadRequest(400) | ExceptionMessage | Same as [PUT Framework](#PUT_Framework) |
| NotFound(404) | ExceptionMessage | Same as [PUT TaskNumber](#PUT_TaskNumber) |
| ServiceUnavailable(503) | ExceptionMessage | Same as [PUT Framework](#PUT_Framework) |
#### <a name="PUT_ApplicationProgress">PUT ApplicationProgress</a>
**Request**
PUT /v1/Frameworks/{FrameworkName}/ApplicationProgress
Type: application/json
Body: [OverrideApplicationProgressRequest](../src/main/java/com/microsoft/frameworklauncher/common/model/OverrideApplicationProgressRequest.java)
**Description**
Update ApplicationProgress for a Requested Framework
Notes:
1. If User does not call this API. Default ApplicationProgress is used, and it is calculated as CompletedTaskCount / TotalTaskCount.
2. User is responsible for implement Progress Measurement of the Service based on Monitoring Task logs or self-contained communication. And then feedback the Progress by calling this API to Override the default ApplicationProgress.
**Response**
| HttpStatusCode | Body | Description |
|:---- |:---- |:---- |
| Accepted(202) | NULL | Same as [PUT Framework](#PUT_Framework) |
| BadRequest(400) | ExceptionMessage | Same as [PUT Framework](#PUT_Framework) |
| NotFound(404) | ExceptionMessage | Same as [PUT TaskNumber](#PUT_TaskNumber) |
| ServiceUnavailable(503) | ExceptionMessage | Same as [PUT Framework](#PUT_Framework) |
#### <a name="GET_Frameworks">GET Frameworks</a>
**Request**
GET /v1/Frameworks
**Description**
List all FrameworkNames of current Requested Frameworks.
**Response**
| HttpStatusCode | Body | Description |
|:---- |:---- |:---- |
| OK(200) | [RequestedFrameworkNames](../src/main/java/com/microsoft/frameworklauncher/common/model/RequestedFrameworkNames.java) | |
| ServiceUnavailable(503) | ExceptionMessage | Same as [PUT Framework](#PUT_Framework) |
#### <a name="GET_AggregatedFrameworkStatus">GET AggregatedFrameworkStatus</a>
**Request**
GET /v1/Frameworks/{FrameworkName}
GET /v1/Frameworks/{FrameworkName}/AggregatedFrameworkStatus
**Description**
Get the AggregatedFrameworkStatus of a Requested Framework
AggregatedFrameworkStatus = FrameworkStatus + all TaskRoles' (TaskRoleStatus + TaskStatuses)
**Response**
| HttpStatusCode | Body | Description |
|:---- |:---- |:---- |
| OK(200) | [AggregatedFrameworkStatus](../src/main/java/com/microsoft/frameworklauncher/common/model/AggregatedFrameworkStatus.java) | |
| NotFound(404) | ExceptionMessage | Same as [GET FrameworkStatus](#GET_FrameworkStatus) |
| ServiceUnavailable(503) | ExceptionMessage | Same as [PUT Framework](#PUT_Framework) |
#### <a name="GET_FrameworkRequest">GET FrameworkRequest</a>
**Request**
GET /v1/Frameworks/{FrameworkName}/FrameworkRequest
**Description**
Get the FrameworkRequest of a Requested Framework
Current [FrameworkDescriptor](../src/main/java/com/microsoft/frameworklauncher/common/model/FrameworkDescriptor.java) for the Framework is included in FrameworkRequest and it can reflect latest updates.
**Response**
| HttpStatusCode | Body | Description |
|:---- |:---- |:---- |
| OK(200) | [FrameworkRequest](../src/main/java/com/microsoft/frameworklauncher/common/model/FrameworkRequest.java) | |
| NotFound(404) | ExceptionMessage | Same as [PUT TaskNumber](#PUT_TaskNumber) |
| ServiceUnavailable(503) | ExceptionMessage | Same as [PUT Framework](#PUT_Framework) |
#### <a name="GET_AggregatedFrameworkRequest">GET AggregatedFrameworkRequest</a>
**Request**
GET /v1/Frameworks/{FrameworkName}/AggregatedFrameworkRequest
**Description**
Get the AggregatedFrameworkRequest of a Requested Framework
AggregatedFrameworkRequest = FrameworkRequest + all other feedback Request
**Response**
| HttpStatusCode | Body | Description |
|:---- |:---- |:---- |
| OK(200) | [AggregatedFrameworkRequest](../src/main/java/com/microsoft/frameworklauncher/common/model/AggregatedFrameworkRequest.java) | |
| NotFound(404) | ExceptionMessage | Same as [PUT TaskNumber](#PUT_TaskNumber) |
| ServiceUnavailable(503) | ExceptionMessage | Same as [PUT Framework](#PUT_Framework) |
#### <a name="GET_LauncherRequest">GET LauncherRequest</a>
**Request**
GET /v1/LauncherRequest
**Description**
Get the LauncherRequest
**Response**
| HttpStatusCode | Body | Description |
|:---- |:---- |:---- |
| OK(200) | [LauncherRequest](../src/main/java/com/microsoft/frameworklauncher/common/model/LauncherRequest.java) | |
| ServiceUnavailable(503) | ExceptionMessage | Same as [PUT Framework](#PUT_Framework) |
#### <a name="GET_LauncherStatus">GET LauncherStatus</a>
**Request**
GET /v1/LauncherStatus
**Description**
Get the LauncherStatus
Current [LauncherConfiguration](../src/main/java/com/microsoft/frameworklauncher/common/model/LauncherConfiguration.java) is included in LauncherStatus and it can reflect latest updates.
**Response**
| HttpStatusCode | Body | Description |
|:---- |:---- |:---- |
| OK(200) | [LauncherStatus](../src/main/java/com/microsoft/frameworklauncher/common/model/LauncherStatus.java) | |
| ServiceUnavailable(503) | ExceptionMessage | Same as [PUT Framework](#PUT_Framework) |
## <a name="DataModel">DataModel</a>
You can check the DataStructure, Specification and FeatureUsage inside Launcher Data Model:
../src/main/java/com/microsoft/frameworklauncher/common/model/*
For example:
A Framework is Defined and Requested by FrameworkDescriptor data structure. To find the feature usage inside FrameworkDescriptor, you can refer the comment inside [FrameworkDescriptor](../src/main/java/com/microsoft/frameworklauncher/common/model/FrameworkDescriptor.java).
## <a name="EnvironmentVariables">EnvironmentVariables</a>
Launcher sets up below EnvironmentVariables for each User Service to use:
1. Used to locate itself in the Cluster irrespective of any kinds of Migration and Restart.
| EnvironmentVariable | Description |
|:---- |:---- |
| LAUNCHER_ADDRESS | |
| FRAMEWORK_NAME | |
| FRAMEWORK_VERSION | |
| TASKROLE_NAME | |
| TASK_INDEX | |
| SERVICE_NAME | |
| SERVICE_VERSION | |
2. Used to get the allocated Resource by LauncherAM, only set when corresponding feature is enabled.
| EnvironmentVariable | Description |
|:---- |:---- |
| CONTAINER_IP | Only set when generateInstanceHostList is enabled. |
| CONTAINER_GPUS | Only set when gpuNumber is greater than 0. |
## <a name="ExitStatus_Convention">ExitStatus Convention</a>
You can check the all the defined ExitStatus by: [ExitType](../src/main/java/com/microsoft/frameworklauncher/common/model/ExitType.java), [RetryPolicyDescriptor](../src/main/java/com/microsoft/frameworklauncher/common/model/RetryPolicyDescriptor.java), [RetryPolicyState](../src/main/java/com/microsoft/frameworklauncher/common/model/RetryPolicyState.java), [DiagnosticsUtils](../src/main/java/com/microsoft/frameworklauncher/utils/DiagnosticsUtils.java).
Recipes:
1. Your LauncherClient can depend on the ExitStatus Convention
2. If your Service failed, the Service can optionally return the ExitCode of USER_APP_TRANSIENT_ERROR and USER_APP_NON_TRANSIENT_ERROR to help FancyRetryPolicy to identify your Services TRANSIENT_NORMAL and NON_TRANSIENT ExitType. If neither ExitCode is returned, the Service is considered to exit due to UNKNOWN ExitType.
## <a name="Notes">Notes</a>
1. The **Initial Working Directory** of your EntryPoint is the root directory of the EntryPoint.
Your Service can read data anywhere, however it can ONLY write data under the Initial Working Directory with the Service Directory excluded. And if the Source is a **ZIP file**, it will be uncompressed before starting your Service.
For example:
EntryPoint=HbaseRS.zip/start.bat
SourceLocations=hdfs:///HbaseRS.zip, hdfs:///HbaseCom <- HbaseRS.zip is a ZIP file
The two Sources HbaseRS.zip and HbaseCom will be downloaded (and uncompressed) to local machine as below structure:
./ <- The Initial Working Directory
├─HbaseRS.zip <- Service Directory <- HbaseRS.zip is a directory uncompressed from original ZIP file
└─HbaseCom <- Service Directory
2. Launcher will not restart the succeeded Task (i.e. the process started by EntryPoint ends with exit code 0) in any RetryPolicy. So, if you want to always restart Service on the same machine irrespective of its exit code, you need to **warp the original EntryPoint** by another script, such as:
while true; do
# call the original EntryPoint
done
3. Increase the replication number your data and binary on target HDFS (Higher ReplicationNumber means faster downloading, higher availability and higher durability).
hadoop fs -setrep -w <ReplicationNumber> <HDFS Path>
4. Do not modify your data and binary on target HDFS. To use new data and binary, upload them to a different HDFS Path and then change the FrameworkVersion and SourceLocations.

Просмотреть файл

@ -0,0 +1,19 @@
{
"version": 1,
"taskRoles": {
"LRSMaster": {
"taskNumber": 2,
"taskService": {
"version": 1,
"entryPoint": "ExampleFramework/ExampleFramework.sh",
"sourceLocations": [
"/ExampleFramework"
],
"resource": {
"cpuNumber": 1,
"memoryMB": 1
}
}
}
}
}

Просмотреть файл

@ -0,0 +1,25 @@
#!/bin/bash
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
echo $PATH
while true; do
sleep 10
echo $(date)
done

Двоичные данные
frameworklauncher/doc/img/Architecture.png Normal file

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 13 KiB

Двоичные данные
frameworklauncher/doc/img/Pipeline.png Normal file

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 14 KiB

186
frameworklauncher/pom.xml Normal file
Просмотреть файл

@ -0,0 +1,186 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright (c) Microsoft Corporation
All rights reserved.
MIT License
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-->
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.microsoft.frameworklauncher</groupId>
<artifactId>frameworklauncher</artifactId>
<version>1.0-SNAPSHOT</version>
<description>Microsoft FrameworkLauncher</description>
<name>Microsoft FrameworkLauncher</name>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<project.java.src.version>1.8</project.java.src.version>
<maven-compiler-plugin.version>3.1</maven-compiler-plugin.version>
<maven-assembly-plugin.version>2.4</maven-assembly-plugin.version>
<hadoop.version>2.7.2</hadoop.version>
<zookeeper.version>3.4.10</zookeeper.version>
<httpcomponents.version>4.3.4</httpcomponents.version>
<snakeyaml.version>1.18</snakeyaml.version>
<log4j.version>1.2.17</log4j.version>
<junit.version>4.12</junit.version>
<jackson.version>1.9.13</jackson.version>
<jackson.jaxrs.version>2.3.0</jackson.jaxrs.version>
<validation.version>2.0.0.Final</validation.version>
<validator.version>5.2.1.Final</validator.version>
<el.version>2.2</el.version>
</properties>
<dependencies>
<!-- Json provider dependency must be placed before others -->
<dependency>
<groupId>com.fasterxml.jackson.jaxrs</groupId>
<artifactId>jackson-jaxrs-json-provider</artifactId>
<version>${jackson.jaxrs.version}</version>
</dependency>
<dependency>
<groupId>javax.validation</groupId>
<artifactId>validation-api</artifactId>
<version>${validation.version}</version>
</dependency>
<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-validator</artifactId>
<version>${validator.version}</version>
</dependency>
<dependency>
<groupId>javax.el</groupId>
<artifactId>el-api</artifactId>
<version>${el.version}</version>
</dependency>
<dependency>
<groupId>org.glassfish.web</groupId>
<artifactId>el-impl</artifactId>
<version>${el.version}</version>
</dependency>
<!-- Hadoop dependencies -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-minicluster</artifactId>
<version>${hadoop.version}</version>
<scope>test</scope>
</dependency>
<!-- Other dependencies -->
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>${zookeeper.version}</version>
<type>pom</type>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>${httpcomponents.version}</version>
</dependency>
<dependency>
<groupId>org.yaml</groupId>
<artifactId>snakeyaml</artifactId>
<version>${snakeyaml.version}</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>${log4j.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-core-asl</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
<version>${jackson.version}</version>
</dependency>
</dependencies>
<build>
<sourceDirectory>src/main/java</sourceDirectory>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>${maven-assembly-plugin.version}</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>${maven-compiler-plugin.version}</version>
<configuration>
<source>${project.java.src.version}</source>
<target>${project.java.src.version}</target>
</configuration>
</plugin>
</plugins>
</build>
</project>

Просмотреть файл

@ -0,0 +1,114 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.applicationmaster;
import com.microsoft.frameworklauncher.utils.DefaultLogger;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
public class AntiaffinityAllocationManager { // THREAD SAFE
private static final DefaultLogger LOGGER = new DefaultLogger(AntiaffinityAllocationManager.class);
// Candidate request host names for this application
private final LinkedList<String> candidateRequestHostNames = new LinkedList<>();
// Host name will be specified in the next Container request
// Ensure next() is always available if not null
private ListIterator<String> nextRequestHostName = null;
public synchronized void updateCandidateRequestHostNames(
List<String> hostNames) {
LOGGER.logInfo("updateCandidateRequestHostNames: %s", hostNames.size());
candidateRequestHostNames.clear();
nextRequestHostName = null;
if (hostNames.size() > 0) {
// Randomly shuffle host name list to differentiate Container
// allocation for each job
Collections.shuffle(hostNames);
for (String candidateNodeHostName : hostNames) {
candidateRequestHostNames.addLast(candidateNodeHostName);
}
nextRequestHostName = candidateRequestHostNames.listIterator(0);
}
}
public synchronized void addCandidateRequestHostName(String hostName) {
if (!candidateRequestHostNames.contains(hostName)) {
LOGGER.logInfo("addCandidateRequestHostName: %s", hostName);
if (nextRequestHostName == null) {
candidateRequestHostNames.addLast(hostName);
nextRequestHostName = candidateRequestHostNames.listIterator(0);
} else {
// Since we know a Container just completed on the hostName, it is
// better to request next Container on the hostName.
nextRequestHostName.add(hostName);
nextRequestHostName.previous();
}
}
}
public synchronized String getCandidateRequestHostName() {
if (nextRequestHostName == null) {
return null;
} else {
return circularAdvanceNextRequestHostName();
}
}
public synchronized void removeCandidateRequestHostName(String hostName) {
if (nextRequestHostName == null) {
return;
} else {
int indexToRemove = candidateRequestHostNames.indexOf(hostName);
if (indexToRemove == -1) {
return;
} else {
LOGGER.logInfo("removeCandidateRequestHostName: %s", hostName);
int indexOfNextRequestHostName = nextRequestHostName.nextIndex();
candidateRequestHostNames.remove(indexToRemove);
if (candidateRequestHostNames.size() == 0) {
nextRequestHostName = null;
} else {
if (indexOfNextRequestHostName > indexToRemove) {
indexOfNextRequestHostName--;
}
if (indexOfNextRequestHostName == candidateRequestHostNames.size()) {
indexOfNextRequestHostName = 0;
}
nextRequestHostName = candidateRequestHostNames.listIterator(indexOfNextRequestHostName);
}
}
}
}
private synchronized String circularAdvanceNextRequestHostName() {
String hostName = nextRequestHostName.next();
if (!nextRequestHostName.hasNext()) {
nextRequestHostName = candidateRequestHostNames.listIterator(0);
}
return hostName;
}
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,25 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.applicationmaster;
// Bootstrap Services
public class Bootstrap {
public static void main(String[] args) {
new ApplicationMaster().start();
}
}

Просмотреть файл

@ -0,0 +1,177 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.applicationmaster;
import com.microsoft.frameworklauncher.common.model.LauncherConfiguration;
import com.microsoft.frameworklauncher.utils.CommonUtils;
import com.microsoft.frameworklauncher.utils.GlobalConstants;
import com.microsoft.frameworklauncher.zookeeperstore.ZookeeperStore;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.util.ConverterUtils;
// Const parameters for the current AM instead of state variable
class Configuration {
private YarnConfiguration yarnConfig;
private String frameworkName;
private Integer frameworkVersion;
private String zkConnectString;
private String zkRootDir;
private Integer amVersion;
private Integer amRmHeartbeatIntervalSec;
private String amHostName;
private Integer amRpcPort;
private String amTrackingUrl;
private String amUser;
private String amLocalDirs;
private String amLogDirs;
private String amContainerId;
private String attemptId;
private String applicationId;
private LauncherConfiguration launcherConfig;
// Below properties defined for RM when AM Registered, it may be changed after RM configuration changed.
private Resource maxResource;
private String amQueue;
private String amQueueDefaultNodeLabel;
// For a normal container, initializeNoDependenceConfig must succeed
public void initializeNoDependenceConfig() throws Exception {
yarnConfig = new YarnConfiguration();
frameworkName = CommonUtils.getEnvironmentVariable(GlobalConstants.ENV_VAR_FRAMEWORK_NAME);
// frameworkVersion and amVersion for this AM is got from EnvironmentVariable,
// so it will not change across attempts.
// This can avoid multiple AM of one Framework running at the same time eventually,
// by comparing these versions with the corresponding ones on the ZK.
frameworkVersion = Integer.parseInt(CommonUtils.getEnvironmentVariable(GlobalConstants.ENV_VAR_FRAMEWORK_VERSION));
zkConnectString = CommonUtils.getEnvironmentVariable(GlobalConstants.ENV_VAR_ZK_CONNECT_STRING);
zkRootDir = CommonUtils.getEnvironmentVariable(GlobalConstants.ENV_VAR_ZK_ROOT_DIR);
amVersion = Integer.parseInt(CommonUtils.getEnvironmentVariable(GlobalConstants.ENV_VAR_AM_VERSION));
amRmHeartbeatIntervalSec = Integer.parseInt(CommonUtils.getEnvironmentVariable(GlobalConstants.ENV_VAR_AM_RM_HEARTBEAT_INTERVAL_SEC));
amHostName = GlobalConstants.LOCAL_HOST_NAME;
amRpcPort = -1;
// Set a NotEmpty amTrackingUrl will override default (Proxied)TrackingUrl and OriginalTrackingUrl
// which point to RMWebAPP.
amTrackingUrl = "";
amUser = CommonUtils.getEnvironmentVariable(GlobalConstants.ENV_VAR_USER);
amLocalDirs = CommonUtils.getEnvironmentVariable(GlobalConstants.ENV_VAR_LOCAL_DIRS);
amLogDirs = CommonUtils.getEnvironmentVariable(GlobalConstants.ENV_VAR_LOG_DIRS);
amContainerId = CommonUtils.getEnvironmentVariable(GlobalConstants.ENV_VAR_CONTAINER_ID);
}
public void initializeDependOnZKStoreConfig(ZookeeperStore zkStore) throws Exception {
// ConverterUtils depends on the JVM inited by ZooKeeperClient
ApplicationAttemptId attemptId = ConverterUtils.toContainerId(getAmContainerId()).getApplicationAttemptId();
this.attemptId = attemptId.toString();
applicationId = attemptId.getApplicationId().toString();
launcherConfig = zkStore.getLauncherStatus().getLauncherConfiguration();
}
public void initializeDependOnRMResponseConfig(RegisterApplicationMasterResponse rmResp) {
amQueue = rmResp.getQueue();
maxResource = rmResp.getMaximumResourceCapability();
}
public void initializeDependOnYarnClientConfig(YarnClient yarnClient) throws Exception {
amQueueDefaultNodeLabel = yarnClient.getQueueInfo(getAmQueue()).getDefaultNodeLabelExpression();
}
protected YarnConfiguration getYarnConfig() {
return yarnConfig;
}
protected String getFrameworkName() {
return frameworkName;
}
protected Integer getFrameworkVersion() {
return frameworkVersion;
}
protected String getZkConnectString() {
return zkConnectString;
}
protected String getZkRootDir() {
return zkRootDir;
}
protected Integer getAmVersion() {
return amVersion;
}
protected Integer getAmRmHeartbeatIntervalSec() {
return amRmHeartbeatIntervalSec;
}
protected String getAmHostName() {
return amHostName;
}
protected Integer getAmRpcPort() {
return amRpcPort;
}
protected String getAmTrackingUrl() {
return amTrackingUrl;
}
protected String getAmUser() {
return amUser;
}
protected String getAmLocalDirs() {
return amLocalDirs;
}
protected String getAmLogDirs() {
return amLogDirs;
}
protected String getAmContainerId() {
return amContainerId;
}
protected String getAttemptId() {
return attemptId;
}
protected String getApplicationId() {
return applicationId;
}
protected LauncherConfiguration getLauncherConfig() {
return launcherConfig;
}
protected Resource getMaxResource() {
return maxResource;
}
protected String getAmQueue() {
return amQueue;
}
protected String getAmQueueDefaultNodeLabel() {
return amQueueDefaultNodeLabel;
}
}

Просмотреть файл

@ -0,0 +1,105 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.applicationmaster;
import com.microsoft.frameworklauncher.common.model.ResourceDescriptor;
import com.microsoft.frameworklauncher.utils.DefaultLogger;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
public class GpuAllocationManager { // THREAD SAFE
private static final DefaultLogger LOGGER = new DefaultLogger(GpuAllocationManager.class);
// Candidate request host names for this application
private final LinkedHashMap<String, Node> candidateRequestNodes = new LinkedHashMap<>();
public void addCandidateRequestNode(Node candidateRequestNode) {
synchronized (candidateRequestNodes) {
if (!candidateRequestNodes.containsKey(candidateRequestNode.getHostName())) {
LOGGER.logInfo("addCandidateRequestNode: %s", candidateRequestNode.getHostName());
candidateRequestNodes.put(candidateRequestNode.getHostName(), candidateRequestNode);
} else {
Node existNode = candidateRequestNodes.get(candidateRequestNode.getHostName());
existNode.updateNode(candidateRequestNode);
LOGGER.logInfo("updateCandidateRequestNode: %s ", existNode);
}
}
}
// According to the request resource, Find a best candidate node.
// best candidate: for CPU and memory only job request, the first node with the required resource.
// for GPU request job, consider the GPU topology structure, find a node which can minimum the communication cost between gpus;
public Node allocateCandidateRequestNode(ResourceDescriptor request, String nodeLabel) {
synchronized (candidateRequestNodes) {
Iterator<Map.Entry<String, Node>> iter = candidateRequestNodes.entrySet().iterator();
Node candidateNode = null;
LOGGER.logInfo(
"allocateCandidateRequestNode: Request resources:" + request.toString());
long candidateSelectGPU = 0;
while (iter.hasNext()) {
Map.Entry<String, Node> entry = iter.next();
LOGGER.logInfo(
"allocateCandidateRequestNode: Try node: " + entry.getValue().toString());
if (nodeLabel != null) {
Set<String> nodeLabels = entry.getValue().getNodeLabels();
if (!nodeLabels.contains(nodeLabel)) {
LOGGER.logInfo(
"allocateCandidateRequestNode: Skip node %s, label does not match:%s",
entry.getValue().getHostName(), nodeLabel);
continue;
}
}
if (request.getMemoryMB() <= entry.getValue().getAvailableMemory() &&
request.getCpuNumber() <= entry.getValue().getAvailableCpu() &&
request.getGpuNumber() <= entry.getValue().getAvailableNumGpus()) {
if (request.getGpuNumber() > 0) {
candidateNode = entry.getValue();
candidateSelectGPU = candidateNode.calculateCandidateGPU(request.getGpuNumber());
}
break;
}
}
if (candidateNode != null) {
candidateNode.allocateResource(request, candidateSelectGPU);
LOGGER.logInfo(
"allocateCandidateRequestNode: select node: " + candidateNode.toString());
} else {
// AM will request resource with any node.
LOGGER.logInfo(
"allocateCandidateRequestNode: No enough resource");
}
return candidateNode;
}
}
public void removeCandidateRequestNode(Node candidateRequestHost) {
synchronized (candidateRequestNodes) {
if (candidateRequestNodes.containsKey(candidateRequestHost.getHostName())) {
LOGGER.logInfo("removeCandidateRequestNode: %s", candidateRequestHost.getHostName());
candidateRequestNodes.remove(candidateRequestHost.getHostName());
}
}
}
}

Просмотреть файл

@ -0,0 +1,64 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.applicationmaster;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.client.api.async.NMClientAsync;
import java.nio.ByteBuffer;
import java.util.Map;
public class NMClientCallbackHandler implements NMClientAsync.CallbackHandler {
private final ApplicationMaster am;
public NMClientCallbackHandler(ApplicationMaster am) {
this.am = am;
}
public void onContainerStarted(ContainerId containerId, Map<String, ByteBuffer> allServiceResponse) {
am.onContainerStarted(containerId, allServiceResponse);
}
public void onStartContainerError(ContainerId containerId, Throwable e) {
am.onStartContainerError(containerId, e);
}
public void onContainerStopped(ContainerId containerId) {
am.onContainerStopped(containerId);
}
public void onStopContainerError(ContainerId containerId, Throwable e) {
am.onStopContainerError(containerId, e);
}
public void onContainerStatusReceived(ContainerId containerId, ContainerStatus containerStatus) {
am.onContainerStatusReceived(containerId, containerStatus);
}
public void onGetContainerStatusError(ContainerId containerId, Throwable e) {
am.onGetContainerStatusError(containerId, e);
}
public void onContainerResourceIncreased(ContainerId containerId, Resource resource) {
}
public void onIncreaseContainerResourceError(ContainerId containerId, Throwable t) {
}
}

Просмотреть файл

@ -0,0 +1,120 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.applicationmaster;
import com.microsoft.frameworklauncher.common.model.ResourceDescriptor;
import java.util.Set;
public class Node {
private ResourceDescriptor capacity;
private final String name;
private ResourceDescriptor used;
//localAllocated in remember the local tried request, it will remember the tried information don't re-try the same request in a AM life cycle.
//so here, it is no need to de-crease the localAllocated information once the container request success.
private ResourceDescriptor localAllocated;
private long selectedGpuBitmap;
private Set<String> nodeLabels;
public Node(String name, Set<String> label, ResourceDescriptor capacity, ResourceDescriptor used) {
this.name = name;
this.capacity = capacity;
this.used = used;
this.selectedGpuBitmap = 0;
this.nodeLabels = label;
this.localAllocated = ResourceDescriptor.newInstance(0, 0, 0, (long) 0);
}
public void updateNode(Node updateNode) {
this.capacity = updateNode.getCapacityResource();
this.used = updateNode.getUsedResource();
}
public ResourceDescriptor getCapacityResource() {
return capacity;
}
public ResourceDescriptor getUsedResource() {
return used;
}
public String getHostName() {
return name;
}
public Set<String> getNodeLabels() {
return nodeLabels;
}
public int getTotalNumGpus() {
return capacity.getGpuNumber();
}
public int getUsedNumGpus() {
return Long.bitCount(used.getGpuAttribute() | localAllocated.getGpuAttribute());
}
public long getNodeGpuStatus() {
return capacity.getGpuAttribute() & (~(used.getGpuAttribute() | localAllocated.getGpuAttribute()));
}
public int getAvailableNumGpus() {
return capacity.getGpuNumber() - getUsedNumGpus();
}
public int getAvailableMemory() {
return capacity.getMemoryMB() - used.getMemoryMB() - localAllocated.getMemoryMB();
}
public int getAvailableCpu() {
return capacity.getCpuNumber() - used.getCpuNumber() - localAllocated.getCpuNumber();
}
public long getSelectedGpuBitmap() {
return selectedGpuBitmap;
}
public long calculateCandidateGPU(int requestGPUCount){
//sequencial pick GPUs to serving by default
long candidateSelectGPU = 0;
long availableGPU = getNodeGpuStatus();
for(int i = 0; i < requestGPUCount; i++) {
candidateSelectGPU += (availableGPU - (availableGPU&(availableGPU -1)));
availableGPU &=(availableGPU -1);
}
return candidateSelectGPU;
}
public void allocateResource(ResourceDescriptor resource, long gpuMap) {
localAllocated.setCpuNumber(localAllocated.getCpuNumber() + resource.getCpuNumber());
localAllocated.setMemoryMB(localAllocated.getMemoryMB() + resource.getMemoryMB());
localAllocated.setGpuAttribute(localAllocated.getGpuAttribute() | gpuMap);
localAllocated.setGpuNumber(localAllocated.getGpuNumber() + resource.getGpuNumber());
selectedGpuBitmap = gpuMap;
}
@Override
public String toString() {
return this.name + "(capacity: " + this.capacity + ", used: " + this.used + ", localAllocated:" + this.localAllocated + ")";
}
@Override
public int hashCode() {
return name.hashCode();
}
}

Просмотреть файл

@ -0,0 +1,66 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.applicationmaster;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.api.records.PreemptionMessage;
import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
import java.util.List;
public class RMClientCallbackHandler implements AMRMClientAsync.CallbackHandler {
private final ApplicationMaster am;
public RMClientCallbackHandler(ApplicationMaster am) {
this.am = am;
}
public void onError(Throwable e) {
am.onError(e);
}
public void onShutdownRequest() {
am.onShutdownRequest();
}
public float getProgress() {
try {
return am.getProgress();
} catch (Exception e) {
return 0;
}
}
public void onNodesUpdated(List<NodeReport> updatedNodes) {
am.onNodesUpdated(updatedNodes);
}
public void onContainersAllocated(List<Container> allocatedContainers) {
am.onContainersAllocated(allocatedContainers);
}
public void onContainersCompleted(List<ContainerStatus> completedContainers) {
am.onContainersCompleted(completedContainers);
}
public void onPreemptionMessage(PreemptionMessage message) {
am.onPreemptionMessage(message);
}
}

Просмотреть файл

@ -0,0 +1,83 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.applicationmaster;
import com.microsoft.frameworklauncher.utils.DefaultLogger;
import com.microsoft.frameworklauncher.utils.HadoopUtils;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import java.util.HashSet;
public class RMResyncHandler { // THREAD SAFE
private static final DefaultLogger LOGGER = new DefaultLogger(RMResyncHandler.class);
private final ApplicationMaster am;
private final Configuration conf;
// NM expiry interval buffer
private static final int NM_EXPIRY_INTERVAL_BUFFER_SECONDS = 600;
// RMResync interval
private final int intervalSeconds;
public RMResyncHandler(ApplicationMaster am, Configuration conf) {
this.am = am;
this.conf = conf;
// Using the NMExpiryInterval from RM configuration which can ensure AM and RM has the same behaviour to
// to expire the NM container when ContainerConnectionMaxLostCount = RMResyncFrequency.
int nmExpiryIntervalSeconds =
conf.getYarnConfig().getInt(YarnConfiguration.RM_NM_EXPIRY_INTERVAL_MS,
YarnConfiguration.DEFAULT_RM_NM_EXPIRY_INTERVAL_MS) / 1000;
// During the RM Starting/Down time, the liveContainerIds is incomplete since RM has not fully
// synced with all NMs to enter a stable RM stage.
// To make our decisions more reliable, we have to check liveContainerIds at least RMResyncFrequency
// times before make decisions.
intervalSeconds =
(nmExpiryIntervalSeconds + NM_EXPIRY_INTERVAL_BUFFER_SECONDS) /
this.conf.getLauncherConfig().getAmRmResyncFrequency();
}
public void start() {
LOGGER.logInfo("Starting RMResyncHandler");
// The order is important between executing resyncWithRM and other SystemTasks,
// so resyncWithRM is also need to be queued to execute.
// And do not use Timer, otherwise after RM Down for a long time, multiple getLiveContainerIdsFromRM
// call will return at the same time with the same incomplete liveContainerIds.
am.queueResyncWithRM(intervalSeconds);
LOGGER.logInfo("Running RMResyncHandler");
}
public void resyncWithRM() throws Exception {
HashSet<String> liveContainerIds = null;
try {
liveContainerIds = HadoopUtils.getLiveContainerIdsFromRM(conf.getAttemptId(), conf.getAmContainerId());
} catch (Exception e) {
LOGGER.logWarning(e,
"Exception occurred during getLiveContainerIdsFromRM. It should be transient. " +
"Will retry next time after %ss", intervalSeconds);
}
am.onLiveContainersUpdated(liveContainerIds);
am.queueResyncWithRM(intervalSeconds);
}
}

Просмотреть файл

@ -0,0 +1,408 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.applicationmaster;
import com.microsoft.frameworklauncher.common.LauncherClientInternal;
import com.microsoft.frameworklauncher.common.WebCommon;
import com.microsoft.frameworklauncher.common.exceptions.NonTransientException;
import com.microsoft.frameworklauncher.common.exceptions.NotAvailableException;
import com.microsoft.frameworklauncher.common.model.*;
import com.microsoft.frameworklauncher.utils.*;
import com.microsoft.frameworklauncher.zookeeperstore.ZookeeperStore;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Level;
import org.apache.zookeeper.KeeperException.NoNodeException;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
// Manage the CURD to ZK Request
// Note:
// Public property and interface is considered as underlay Request which does not need to be
// synchronized with (notified to) AM and it can be changed at any time.
// So, AM can implicitly support some Requests changed on the fly.
public class RequestManager extends AbstractService { // THREAD SAFE
private static final DefaultLogger LOGGER = new DefaultLogger(RequestManager.class);
private final ApplicationMaster am;
private final Configuration conf;
private final ZookeeperStore zkStore;
private final LauncherClientInternal launcherClient;
/**
* REGION BaseRequest
*/
// AM only need to retrieve AggregatedFrameworkRequest
private FrameworkDescriptor frameworkDescriptor = null;
private OverrideApplicationProgressRequest overrideApplicationProgressRequest = null;
// ContainerId -> MigrateTaskRequest
private Map<String, MigrateTaskRequest> migrateTaskRequests = null;
/**
* REGION ExtensionRequest
* ExtensionRequest should be always CONSISTENT with BaseRequest
*/
private PlatformSpecificParametersDescriptor platParams;
// TaskRoleName -> TaskRoleDescriptor
private Map<String, TaskRoleDescriptor> taskRoles;
// TaskRoleName -> RetryPolicyDescriptor
private Map<String, RetryPolicyDescriptor> taskRetryPolicies;
// TaskRoleName -> ServiceDescriptor
private Map<String, ServiceDescriptor> taskServices;
// TaskRoleName -> ResourceDescriptor
private Map<String, ResourceDescriptor> taskResources;
/**
* REGION StateVariable
*/
// -1: not available, 0: does not exist, 1: exists
private volatile int existsLocalVersionFrameworkRequest = -1;
// Used to workaround for bug YARN-314.
// If there are multiple TaskRoles in one Framework and these TaskRoles has different Resource specified,
// we need to make sure the Priority for each TaskRoles is also different, otherwise some TaskRoles may not get resources to run.
// Note:
// 1. With this workaround, User cannot control the Priority anymore.
// 2. No need to persistent this info, since the bug only happens within one application attempt.
// TaskRoleName -> RevisedPriority
private final Map<String, Integer> taskRevisedPriority = new HashMap<>();
/**
* REGION AbstractService
*/
public RequestManager(ApplicationMaster am, Configuration conf, ZookeeperStore zkStore, LauncherClientInternal launcherClient) {
super(RequestManager.class.getName());
this.am = am;
this.conf = conf;
this.zkStore = zkStore;
this.launcherClient = launcherClient;
}
@Override
protected Boolean handleException(Exception e) {
super.handleException(e);
LOGGER.logError(e,
"Exception occurred in %1$s. %1$s will be stopped.",
serviceName);
// Rethrow is not work in another Thread, so using CallBack
am.onExceptionOccurred(e);
return false;
}
// No need to initialize for RequestManager
// No need to recover for RequestManager
// No need to stop ongoing Thread, since zkStore is Atomic
@Override
protected void run() throws Exception {
super.run();
new Thread(() -> {
while (true) {
try {
checkAmVersion();
pullRequest();
} catch (Exception e) {
// Directly throw TransientException to AM to actively migrate to another node
handleException(e);
} finally {
try {
Thread.sleep(conf.getLauncherConfig().getAmRequestPullIntervalSec() * 1000);
} catch (InterruptedException e) {
handleException(e);
}
}
}
}).start();
}
/**
* REGION InternalUtils
*/
// Throw NonTransientException to stop AM ASAP, in case the LauncherService or the NodeManager is down,
// which may lead AM process cannot be killed in time.
private void checkAmVersion() throws Exception {
// LauncherStatus should always exist.
LauncherStatus launcherStatus;
try {
launcherStatus = zkStore.getLauncherStatus();
} catch (NoNodeException e) {
throw new NonTransientException(
"Failed to getLauncherStatus to checkAmVersion, LauncherStatus is already deleted on ZK", e);
}
Integer newAmVersion = launcherStatus.getLauncherConfiguration().getAmVersion();
if (!newAmVersion.equals(conf.getAmVersion())) {
throw new NonTransientException(String.format(
"AmVersion mismatch: Local Version %s, Latest Version %s",
conf.getAmVersion(), newAmVersion));
}
}
private void pullRequest() throws Exception {
AggregatedFrameworkRequest aggFrameworkRequest;
try {
LOGGER.logDebug("Pulling AggregatedFrameworkRequest");
aggFrameworkRequest = zkStore.getAggregatedFrameworkRequest(conf.getFrameworkName());
LOGGER.logDebug("Pulled AggregatedFrameworkRequest");
} catch (NoNodeException e) {
existsLocalVersionFrameworkRequest = 0;
throw new NonTransientException(
"Failed to getAggregatedFrameworkRequest, FrameworkRequest is already deleted on ZK", e);
}
// newFrameworkDescriptor is always not null
FrameworkDescriptor newFrameworkDescriptor = aggFrameworkRequest.getFrameworkRequest().getFrameworkDescriptor();
checkFrameworkVersion(newFrameworkDescriptor);
reviseFrameworkDescriptor(newFrameworkDescriptor);
updateFrameworkDescriptor(newFrameworkDescriptor);
updateOverrideApplicationProgressRequest(aggFrameworkRequest.getOverrideApplicationProgressRequest());
updateMigrateTaskRequests(aggFrameworkRequest.getMigrateTaskRequests());
}
private void checkFrameworkVersion(FrameworkDescriptor newFrameworkDescriptor) throws Exception {
if (!newFrameworkDescriptor.getVersion().equals(conf.getFrameworkVersion())) {
existsLocalVersionFrameworkRequest = 0;
throw new NonTransientException(String.format(
"FrameworkVersion mismatch: Local Version %s, Latest Version %s",
conf.getFrameworkVersion(), newFrameworkDescriptor.getVersion()));
} else {
existsLocalVersionFrameworkRequest = 1;
}
}
private void reviseFrameworkDescriptor(FrameworkDescriptor newFrameworkDescriptor) {
Map<String, TaskRoleDescriptor> frameworkTaskRoles = newFrameworkDescriptor.getTaskRoles();
for (Map.Entry<String, TaskRoleDescriptor> taskRole : frameworkTaskRoles.entrySet()) {
String taskRoleName = taskRole.getKey();
if (!taskRevisedPriority.containsKey(taskRoleName)) {
taskRevisedPriority.put(taskRoleName, taskRevisedPriority.size());
}
taskRole.getValue().setPriority(taskRevisedPriority.get(taskRoleName));
}
}
private void checkUnsupportedOnTheFlyChanges(FrameworkDescriptor newFrameworkDescriptor) throws Exception {
if (frameworkDescriptor == null) {
return;
}
Boolean detectedUnsupportedChanges = false;
FrameworkDescriptor clonedNewFrameworkDescriptor = YamlUtils.deepCopy(newFrameworkDescriptor, FrameworkDescriptor.class);
Map<String, TaskRoleDescriptor> clonedNewTaskRoles = clonedNewFrameworkDescriptor.getTaskRoles();
Map<String, TaskRoleDescriptor> frameworkTaskRoles = frameworkDescriptor.getTaskRoles();
for (Map.Entry<String, TaskRoleDescriptor> taskRole : frameworkTaskRoles.entrySet()) {
String taskRoleName = taskRole.getKey();
if (!clonedNewTaskRoles.containsKey(taskRoleName)) {
detectedUnsupportedChanges = true;
break;
}
TaskRoleDescriptor clonedNewTaskRoleDescriptor = clonedNewTaskRoles.get(taskRoleName);
// Set supported changes
clonedNewTaskRoleDescriptor.setTaskNumber(taskRole.getValue().getTaskNumber());
}
if (!detectedUnsupportedChanges) {
if (!YamlUtils.deepEquals(frameworkDescriptor, clonedNewFrameworkDescriptor)) {
detectedUnsupportedChanges = true;
}
}
if (detectedUnsupportedChanges) {
LOGGER.logWarning("Detected unsupported FrameworkDescriptor changes on the fly, the behaviour is undefined.");
}
}
private void updateFrameworkDescriptor(FrameworkDescriptor newFrameworkDescriptor) throws Exception {
if (YamlUtils.deepEquals(frameworkDescriptor, newFrameworkDescriptor)) {
return;
}
LOGGER.logSplittedLines(Level.INFO,
"Detected FrameworkDescriptor changes. Updating to new FrameworkDescriptor:\n%s",
WebCommon.toJson(newFrameworkDescriptor));
checkUnsupportedOnTheFlyChanges(newFrameworkDescriptor);
// Replace on the fly FrameworkDescriptor with newFrameworkDescriptor.
// The operation is Atomic, since it only modifies the reference.
// So, the on going read for the old FrameworkDescriptor will not get intermediate results
frameworkDescriptor = newFrameworkDescriptor;
// Backup old to detect changes
PlatformSpecificParametersDescriptor oldPlatParams = platParams;
Map<String, TaskRoleDescriptor> oldTaskRoles = taskRoles;
Map<String, ServiceDescriptor> oldTaskServices = taskServices;
// Update ExtensionRequest
platParams = frameworkDescriptor.getPlatformSpecificParameters();
taskRoles = frameworkDescriptor.getTaskRoles();
taskRetryPolicies = new HashMap<>();
taskServices = new HashMap<>();
taskResources = new HashMap<>();
for (Map.Entry<String, TaskRoleDescriptor> taskRole : taskRoles.entrySet()) {
taskRetryPolicies.put(taskRole.getKey(), taskRole.getValue().getTaskRetryPolicy());
taskServices.put(taskRole.getKey(), taskRole.getValue().getTaskService());
taskResources.put(taskRole.getKey(), taskRole.getValue().getTaskService().getResource());
}
Map<String, Integer> taskNumbers = getTaskNumbers(taskRoles);
Map<String, Integer> serviceVersions = getServiceVersions(taskServices);
// Notify AM to take actions for Request
if (oldPlatParams == null) {
// For the first time, send all Request to AM
am.onTaskNodeLabelUpdated(platParams.getTaskNodeLabel());
am.onServiceVersionsUpdated(serviceVersions);
am.onTaskNumbersUpdated(taskNumbers);
{
// Only start them for the first time
am.onStartRMResyncHandler();
// Start TransitionTaskStateQueue at last, in case some Tasks in the queue
// depend on the Request or previous AM Notify.
am.onStartTransitionTaskStateQueue();
}
} else {
// For the other times, only send changed Request to AM
if (!StringUtils.equals(oldPlatParams.getTaskNodeLabel(), platParams.getTaskNodeLabel())) {
am.onTaskNodeLabelUpdated(platParams.getTaskNodeLabel());
}
if (!CommonExtensions.equals(getServiceVersions(oldTaskServices), serviceVersions)) {
am.onServiceVersionsUpdated(serviceVersions);
}
if (!CommonExtensions.equals(getTaskNumbers(oldTaskRoles), taskNumbers)) {
am.onTaskNumbersUpdated(taskNumbers);
}
}
}
private void updateOverrideApplicationProgressRequest(
OverrideApplicationProgressRequest newOverrideApplicationProgressRequest) throws IOException {
if (YamlUtils.deepEquals(overrideApplicationProgressRequest, newOverrideApplicationProgressRequest)) {
return;
}
LOGGER.logSplittedLines(Level.INFO,
"Detected OverrideApplicationProgressRequest changes. Updating to new OverrideApplicationProgressRequest:\n%s",
WebCommon.toJson(newOverrideApplicationProgressRequest));
// No need to notify AM, since getApplicationProgress is CallIn instead of CallBack
overrideApplicationProgressRequest = newOverrideApplicationProgressRequest;
}
private void updateMigrateTaskRequests(Map<String, MigrateTaskRequest> newMigrateTaskRequests) throws IOException {
if (YamlUtils.deepEquals(migrateTaskRequests, newMigrateTaskRequests)) {
return;
}
// MigrateTaskRequest only can be Added by User and Deleted by AM,
// so here we only need to notify AM the Added.
if (newMigrateTaskRequests != null) {
for (String containerId : newMigrateTaskRequests.keySet()) {
if (migrateTaskRequests == null || !migrateTaskRequests.containsKey(containerId)) {
am.onMigrateTaskRequested(containerId, newMigrateTaskRequests.get(containerId));
}
}
}
migrateTaskRequests = CommonExtensions.asReadOnly(newMigrateTaskRequests);
}
private Map<String, Integer> getTaskNumbers(Map<String, TaskRoleDescriptor> taskRoles) {
Map<String, Integer> taskNumbers = new HashMap<>();
for (Map.Entry<String, TaskRoleDescriptor> taskRole : taskRoles.entrySet()) {
taskNumbers.put(taskRole.getKey(), taskRole.getValue().getTaskNumber());
}
return taskNumbers;
}
private Map<String, Integer> getServiceVersions(Map<String, ServiceDescriptor> taskServices) {
Map<String, Integer> serviceVersions = new HashMap<>();
for (Map.Entry<String, ServiceDescriptor> taskService : taskServices.entrySet()) {
serviceVersions.put(taskService.getKey(), taskService.getValue().getVersion());
}
return serviceVersions;
}
/**
* REGION ReadInterface
*/
public PlatformSpecificParametersDescriptor getPlatParams() {
return platParams;
}
public Map<String, TaskRoleDescriptor> getTaskRoles() {
return taskRoles;
}
public Map<String, RetryPolicyDescriptor> getTaskRetryPolicies() {
return taskRetryPolicies;
}
public Map<String, ServiceDescriptor> getTaskServices() {
return taskServices;
}
public Map<String, ResourceDescriptor> getTaskResources() {
return taskResources;
}
public Integer getServiceVersion(String taskRoleName) {
return taskRoles.get(taskRoleName).getTaskService().getVersion();
}
public Float getApplicationProgress() throws Exception {
Float progress = overrideApplicationProgressRequest.getApplicationProgress().floatValue();
if (progress >= 0) {
return progress;
} else {
throw new Exception(String.format(
"ApplicationProgress %s is not nonnegative", progress));
}
}
public boolean existsLocalVersionFrameworkRequest() throws NotAvailableException {
if (existsLocalVersionFrameworkRequest == -1) {
throw new NotAvailableException("FrameworkRequest for local FrameworkVersion is not available");
}
return existsLocalVersionFrameworkRequest == 1;
}
/**
* REGION Callbacks
*/
public void onMigrateTaskRequestContainerReleased(String containerId) {
try {
LOGGER.logDebug("[%s]: onMigrateTaskRequestContainerReleased", containerId);
launcherClient.deleteMigrateTask(conf.getFrameworkName(), containerId);
} catch (Exception e) {
// Best Effort to deleteMigrateTask
LOGGER.logWarning(e,
"[%s]: Failed to deleteMigrateTask", containerId);
}
}
}

Просмотреть файл

@ -0,0 +1,717 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.applicationmaster;
import com.microsoft.frameworklauncher.common.exceptions.NonTransientException;
import com.microsoft.frameworklauncher.common.exceptions.NotAvailableException;
import com.microsoft.frameworklauncher.common.model.*;
import com.microsoft.frameworklauncher.utils.*;
import com.microsoft.frameworklauncher.zookeeperstore.ZookeeperStore;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.zookeeper.KeeperException;
import java.util.*;
// Manage the CURD to ZK Status
public class StatusManager extends AbstractService { // THREAD SAFE
private static final DefaultLogger LOGGER = new DefaultLogger(StatusManager.class);
private final ApplicationMaster am;
private final Configuration conf;
private final ZookeeperStore zkStore;
/**
* REGION BaseStatus
*/
// AM only need to maintain TaskRoleStatus and TaskStatuses, and it is the only maintainer.
// TaskRoleName -> TaskRoleStatus
private Map<String, TaskRoleStatus> taskRoleStatuses = new HashMap<>();
// TaskRoleName -> TaskStatuses
private Map<String, TaskStatuses> taskStatuseses = new HashMap<>();
/**
* REGION ExtensionStatus
* ExtensionStatus should be always CONSISTENT with BaseStatus
*/
// Whether Mem Status is changed since previous zkStore update
// TaskRoleName -> TaskRoleStatusChanged
private Map<String, Boolean> taskRoleStatusesChanged = new HashMap<>();
// TaskRoleName -> TaskStatusesChanged
private Map<String, Boolean> taskStatusesesChanged = new HashMap<>();
// Used to invert index TaskStatus by ContainerId/TaskState instead of TaskStatusLocator, i.e. TaskRoleName + TaskIndex
// TaskState -> TaskStatusLocators
private Map<TaskState, HashSet<TaskStatusLocator>> taskStateLocators = new HashMap<>();
// Live Associated ContainerId -> TaskStatusLocator
private Map<String, TaskStatusLocator> liveAssociatedContainerIdLocators = new HashMap<>();
// Live Associated HostNames
// TODO: Using MachineName instead of HostName to avoid unstable HostName Resolution
private HashSet<String> liveAssociatedHostNames = new HashSet<>();
/**
* REGION AbstractService
*/
public StatusManager(ApplicationMaster am, Configuration conf, ZookeeperStore zkStore) {
super(StatusManager.class.getName());
this.am = am;
this.conf = conf;
this.zkStore = zkStore;
}
@Override
protected Boolean handleException(Exception e) {
super.handleException(e);
LOGGER.logError(e,
"Exception occurred in %1$s. %1$s will be stopped.",
serviceName);
// Rethrow is not work in another Thread, so using CallBack
am.onExceptionOccurred(e);
return false;
}
@Override
protected void initialize() throws Exception {
super.initialize();
for (TaskState taskState : TaskState.values()) {
taskStateLocators.put(taskState, new HashSet<>());
}
}
@Override
protected void recover() throws Exception {
super.recover();
AggregatedFrameworkStatus aggFrameworkStatus;
try {
aggFrameworkStatus = zkStore.getAggregatedFrameworkStatus(conf.getFrameworkName());
for (Map.Entry<String, AggregatedTaskRoleStatus> aggTaskRoleStatus :
aggFrameworkStatus.getAggregatedTaskRoleStatuses().entrySet()) {
String taskRoleName = aggTaskRoleStatus.getKey();
TaskRoleStatus taskRoleStatus = aggTaskRoleStatus.getValue().getTaskRoleStatus();
TaskStatuses taskStatuses = aggTaskRoleStatus.getValue().getTaskStatuses();
if (!taskRoleStatus.getFrameworkVersion().equals(conf.getFrameworkVersion())) {
throw new NonTransientException(String.format(
"[%s]: FrameworkVersion mismatch: Local Version %s, Previous TaskRoleStatus Version %s",
taskRoleName, conf.getFrameworkVersion(), taskRoleStatus.getFrameworkVersion()));
}
if (!taskStatuses.getFrameworkVersion().equals(conf.getFrameworkVersion())) {
throw new NonTransientException(String.format(
"[%s]: FrameworkVersion mismatch: Local Version %s, Previous TaskStatuses Version %s",
taskRoleName, conf.getFrameworkVersion(), taskStatuses.getFrameworkVersion()));
}
}
} catch (KeeperException.NoNodeException e) {
throw new NonTransientException(
"Failed to getAggregatedFrameworkStatus, FrameworkStatus is already deleted on ZK", e);
} catch (KeeperException e) {
throw e;
} catch (Exception e) {
LOGGER.logError(e,
"Failed to recover %s. Reinitializing all TaskRoleStatuses and TaskStatuseses in the Framework on ZK.",
serviceName);
zkStore.deleteFrameworkStatus(conf.getFrameworkName(), true);
aggFrameworkStatus = null;
}
if (aggFrameworkStatus != null) {
for (Map.Entry<String, AggregatedTaskRoleStatus> aggTaskRoleStatus :
aggFrameworkStatus.getAggregatedTaskRoleStatuses().entrySet()) {
String taskRoleName = aggTaskRoleStatus.getKey();
TaskRoleStatus taskRoleStatus = aggTaskRoleStatus.getValue().getTaskRoleStatus();
TaskStatuses taskStatuses = aggTaskRoleStatus.getValue().getTaskStatuses();
taskRoleStatuses.put(taskRoleName, taskRoleStatus);
taskStatuseses.put(taskRoleName, taskStatuses);
taskRoleStatusesChanged.put(taskRoleName, false);
taskStatusesesChanged.put(taskRoleName, false);
List<TaskStatus> taskStatusArray = taskStatuses.getTaskStatusArray();
for (int taskIndex = 0; taskIndex < taskStatusArray.size(); taskIndex++) {
addExtensionTaskStatus(new TaskStatusLocator(taskRoleName, taskIndex));
}
}
LOGGER.logInfo("Succeeded to recover %s.", serviceName);
}
// Here ZK and Mem Status is the same.
// Since Request may be ahead of Status even when Running,
// so here the Recovery of AM StatusManager is completed.
}
@Override
protected void run() throws Exception {
super.run();
new Thread(() -> {
while (true) {
try {
pushStatus();
} catch (Exception e) {
// Directly throw TransientException to AM to actively migrate to another node
handleException(e);
} finally {
try {
Thread.sleep(conf.getLauncherConfig().getAmStatusPushIntervalSec() * 1000);
} catch (InterruptedException e) {
handleException(e);
}
}
}
}).start();
}
@Override
public void stop(StopStatus stopStatus) {
// Best Effort to stop Gracefully
try {
super.stop(stopStatus);
LOGGER.logInfo("pushStatus for the last time before stop %s.", serviceName);
pushStatus();
// No need to stop ongoing Thread, since zkStore is Atomic
} catch (Exception e) {
LOGGER.logWarning(e, "Failed to stop %s gracefully", serviceName);
}
}
/**
* REGION InternalUtils
*/
private void assertTaskStatusLocator(TaskStatusLocator locator) {
assert containsTask(locator);
}
private void assertLiveAssociatedContainerId(String containerId) {
assert isContainerIdLiveAssociated(containerId);
}
private synchronized void pushStatus() throws Exception {
// TODO: Store AttemptId in AMStatus, and double check it before pushStatus
// Best Effort to avoid pushStatus, if the FrameworkRequest for local FrameworkVersion does not exist
try {
if (!am.existsLocalVersionFrameworkRequest()) {
LOGGER.logInfo("FrameworkRequest for local FrameworkVersion does not exist, skip to pushStatus");
return;
}
} catch (NotAvailableException e) {
LOGGER.logInfo(e, "FrameworkRequest for local FrameworkVersion is not available, skip to pushStatus");
return;
}
// Push TaskRoleStatuses
for (TaskRoleStatus taskRoleStatus : taskRoleStatuses.values()) {
String taskRoleName = taskRoleStatus.getTaskRoleName();
if (taskRoleStatusesChanged.get(taskRoleName)) {
LOGGER.logInfo("[%s]: Pushing TaskRoleStatus", taskRoleName);
zkStore.setTaskRoleStatus(conf.getFrameworkName(), taskRoleName, taskRoleStatuses.get(taskRoleName));
taskRoleStatusesChanged.put(taskRoleName, false);
LOGGER.logInfo("[%s]: Pushed TaskRoleStatus", taskRoleName);
}
}
// Push TaskStatuseses
for (TaskStatuses taskStatuses : taskStatuseses.values()) {
String taskRoleName = taskStatuses.getTaskRoleName();
if (taskStatusesesChanged.get(taskRoleName)) {
LOGGER.logInfo("[%s]: Pushing TaskStatuses", taskRoleName);
zkStore.setTaskStatuses(conf.getFrameworkName(), taskRoleName, taskStatuseses.get(taskRoleName));
taskStatusesesChanged.put(taskRoleName, false);
logTaskStateCounters(taskRoleName);
LOGGER.logInfo("[%s]: Pushed TaskStatuses", taskRoleName);
}
}
}
// Should call disassociateTaskWithContainer if associateTaskWithContainer failed
private void associateTaskWithContainer(TaskStatusLocator locator, Container container) throws Exception {
TaskStatus taskStatus = getTaskStatus(locator);
String containerId = container.getId().toString();
taskStatus.setContainerId(containerId);
taskStatus.setContainerHost(container.getNodeId().getHost());
taskStatus.setContainerIp(
DnsClient.resolveExternalIPv4Address(taskStatus.getContainerHost()));
taskStatus.setContainerLogHttpAddress(
HadoopUtils.getContainerLogHttpAddress(container.getNodeHttpAddress(), containerId, conf.getAmUser()));
taskStatus.setContainerConnectionLostCount(0);
taskStatus.setContainerGpus(
ResourceDescriptor.fromResource(container.getResource()).getGpuAttribute());
taskStatusesesChanged.put(locator.getTaskRoleName(), true);
}
private void disassociateTaskWithContainer(TaskStatusLocator locator) {
TaskStatus taskStatus = getTaskStatus(locator);
taskStatus.setContainerId(null);
taskStatus.setContainerHost(null);
taskStatus.setContainerIp(null);
taskStatus.setContainerLogHttpAddress(null);
taskStatus.setContainerConnectionLostCount(null);
taskStatus.setContainerIsDecommissioning(null);
taskStatus.setContainerLaunchedTimestamp(null);
taskStatus.setContainerCompletedTimestamp(null);
taskStatus.setContainerExitCode(null);
taskStatus.setContainerExitDiagnostics(null);
taskStatus.setContainerExitType(ExitType.NOT_AVAILABLE);
taskStatus.setContainerGpus(null);
taskStatusesesChanged.put(locator.getTaskRoleName(), true);
}
private void updateExtensionTaskStatusWithContainerLiveness(TaskStatusLocator locator, Boolean isLive) {
TaskStatus taskStatus = getTaskStatus(locator);
String containerId = taskStatus.getContainerId();
String containerHostName = taskStatus.getContainerHost();
if (isLive) {
liveAssociatedContainerIdLocators.put(containerId, locator);
liveAssociatedHostNames.add(taskStatus.getContainerHost());
} else {
liveAssociatedContainerIdLocators.remove(containerId);
liveAssociatedHostNames.remove(containerHostName);
}
}
private void decreaseTaskNumber(String taskRoleName, int newTaskNumber) {
List<TaskStatus> taskStatusArray = taskStatuseses.get(taskRoleName).getTaskStatusArray();
LOGGER.logInfo(
"[%s]: Decrease TaskNumber from [%s] to [%s]",
taskRoleName, taskStatusArray.size(), newTaskNumber);
LOGGER.logInfo(
"[%s]: Remove Tasks in TaskIndex range [%s, %s). " +
"Will release the corresponding Container later.",
taskRoleName, newTaskNumber, taskStatusArray.size());
// Pop TaskStatuses Stack
for (int taskIndex = taskStatusArray.size() - 1; taskIndex >= newTaskNumber; taskIndex--) {
TaskStatusLocator locator = new TaskStatusLocator(taskRoleName, taskIndex);
TaskStatus taskStatus = getTaskStatus(locator);
// Notify AM to Cleanup Task level external resource [RM] immediately
// instead of waiting until next round RMResync
am.onTaskToRemove(taskStatus);
// Update ExtensionStatus
removeExtensionTaskStatus(locator);
// To ensure other Task's TaskIndex unchanged, we have to remove the Task at tail
taskStatusArray.remove(taskIndex);
}
taskStatusesesChanged.put(taskRoleName, true);
}
private void removeExtensionTaskStatus(TaskStatusLocator locator) {
TaskStatus taskStatus = getTaskStatus(locator);
TaskState taskState = taskStatus.getTaskState();
taskStateLocators.get(taskState).remove(locator);
if (TaskStateDefinition.CONTAINER_LIVE_ASSOCIATED_STATES.contains(taskState)) {
updateExtensionTaskStatusWithContainerLiveness(locator, false);
}
}
private void increaseTaskNumber(String taskRoleName, int newTaskNumber) {
List<TaskStatus> taskStatusArray = taskStatuseses.get(taskRoleName).getTaskStatusArray();
LOGGER.logInfo(
"[%s]: Increase TaskNumber from [%s] to [%s]",
taskRoleName, taskStatusArray.size(), newTaskNumber);
LOGGER.logInfo(
"[%s]: Add Tasks in TaskIndex range [%s, %s). " +
"Will request the corresponding Container later.",
taskRoleName, taskStatusArray.size(), newTaskNumber);
// Push TaskStatuses Stack
for (int taskIndex = taskStatusArray.size(); taskIndex < newTaskNumber; taskIndex++) {
TaskStatus taskStatus = new TaskStatus();
taskStatus.setTaskIndex(taskIndex);
taskStatus.setTaskRoleName(taskRoleName);
taskStatus.setTaskState(TaskState.TASK_WAITING);
taskStatus.setTaskRetryPolicyState(new RetryPolicyState());
taskStatus.setTaskCreatedTimestamp(System.currentTimeMillis());
taskStatus.setTaskServiceStatus(new ServiceStatus());
taskStatus.getTaskServiceStatus().setServiceVersion(am.getServiceVersion(taskRoleName));
// To ensure other Task's TaskIndex unchanged, we have to add the Task at tail
// The corresponding Containers will be requested by following AddContainerRequest
taskStatusArray.add(taskStatus);
// Update ExtensionStatus
addExtensionTaskStatus(new TaskStatusLocator(taskRoleName, taskIndex));
}
taskStatusesesChanged.put(taskRoleName, true);
}
private void addExtensionTaskStatus(TaskStatusLocator locator) {
TaskStatus taskStatus = getTaskStatus(locator);
TaskState taskState = taskStatus.getTaskState();
taskStateLocators.get(taskState).add(locator);
if (TaskStateDefinition.CONTAINER_LIVE_ASSOCIATED_STATES.contains(taskState)) {
updateExtensionTaskStatusWithContainerLiveness(locator, true);
}
}
private void setContainerConnectionLostCount(String containerId, int count) {
TaskStatus taskStatus = getTaskStatusWithLiveAssociatedContainerId(containerId);
if (taskStatus.getContainerConnectionLostCount() != count) {
taskStatus.setContainerConnectionLostCount(count);
taskStatusesesChanged.put(taskStatus.getTaskRoleName(), true);
}
}
private void logTaskStateCounters(String taskRoleName) {
Map<String, Integer> taskStateCounters = getTaskStateCounters(taskRoleName);
for (Map.Entry<String, Integer> taskStateCounter : taskStateCounters.entrySet()) {
String taskStateStr = taskStateCounter.getKey();
Integer taskCount = taskStateCounter.getValue();
LOGGER.logInfo(
"TaskStateCounters: [%s][%s]: Count %s",
taskRoleName, taskStateStr, taskCount);
}
}
private synchronized Map<String, Integer> getTaskStateCounters(String taskRoleName) {
List<TaskStatus> taskStatusArray = taskStatuseses.get(taskRoleName).getTaskStatusArray();
Map<String, Integer> taskStateCounters = new HashMap<>();
for (TaskStatus taskStatus : taskStatusArray) {
TaskState taskState = taskStatus.getTaskState();
String taskStateStr = taskState.toString();
Integer containerExitCode = taskStatus.getContainerExitCode();
if (taskState == TaskState.TASK_COMPLETED) {
// Override TASK_COMPLETED to provide more detailed TaskState
if (containerExitCode == 0) {
taskStateStr = "TaskSucceeded";
} else {
taskStateStr = "TaskFailed";
}
}
if (!taskStateCounters.containsKey(taskStateStr)) {
taskStateCounters.put(taskStateStr, 0);
}
taskStateCounters.put(taskStateStr, taskStateCounters.get(taskStateStr) + 1);
}
return taskStateCounters;
}
/**
* REGION ReadInterface
*/
// Returned TaskStatus is readonly, caller should not modify it
public synchronized TaskStatus getTaskStatus(TaskStatusLocator locator) {
assertTaskStatusLocator(locator);
return taskStatuseses.get(locator.getTaskRoleName()).getTaskStatusArray().get(locator.getTaskIndex());
}
// Returned TaskStatus is readonly, caller should not modify it
public synchronized List<TaskStatus> getTaskStatus(Set<TaskState> taskStateSet) {
return getTaskStatus(taskStateSet, true);
}
// Returned TaskStatus is readonly, caller should not modify it
public synchronized List<TaskStatus> getTaskStatus(Set<TaskState> taskStateSet, Boolean contains) {
HashSet<TaskState> acceptableTaskStateSet = new HashSet<>();
if (contains) {
acceptableTaskStateSet.addAll(taskStateSet);
} else {
for (TaskState taskState : TaskState.values()) {
if (!taskStateSet.contains(taskState)) {
acceptableTaskStateSet.add(taskState);
}
}
}
List<TaskStatus> taskStatuses = new ArrayList<>();
for (TaskState taskState : acceptableTaskStateSet) {
for (TaskStatusLocator locator : taskStateLocators.get(taskState)) {
taskStatuses.add(getTaskStatus(locator));
}
}
return taskStatuses;
}
// Returned TaskStatus is readonly, caller should not modify it
public synchronized TaskStatus getTaskStatusWithLiveAssociatedContainerId(String containerId) {
assertLiveAssociatedContainerId(containerId);
return getTaskStatus(liveAssociatedContainerIdLocators.get(containerId));
}
// Returned TaskStatus is readonly, caller should not modify it
public synchronized List<TaskStatus> getFailedTaskStatus() {
ArrayList<TaskStatus> failedTaskStatuses = new ArrayList<>();
for (TaskStatus taskStatus : getTaskStatus(TaskStateDefinition.FINAL_STATES)) {
if (taskStatus.getContainerExitCode() != ExitStatusKey.SUCCEEDED.toInt()) {
failedTaskStatuses.add(taskStatus);
}
}
return failedTaskStatuses;
}
public synchronized List<String> getLiveAssociatedContainerIds() {
return new ArrayList<>(liveAssociatedContainerIdLocators.keySet());
}
public synchronized Boolean isContainerIdLiveAssociated(String containerId) {
return liveAssociatedContainerIdLocators.containsKey(containerId);
}
public synchronized List<String> getLiveAssociatedHostNames() {
return new ArrayList<>(liveAssociatedHostNames);
}
public synchronized Boolean isHostNameLiveAssociated(String hostName) {
return liveAssociatedHostNames.contains(hostName);
}
public synchronized Boolean containsTask(TaskStatusLocator locator) {
return (taskStatuseses.containsKey(locator.getTaskRoleName()) &&
taskStatuseses.get(locator.getTaskRoleName()).getTaskStatusArray().size() > locator.getTaskIndex() &&
locator.getTaskIndex() >= 0);
}
public synchronized int getTaskCount(String taskRoleName) {
return taskStatuseses.get(taskRoleName).getTaskStatusArray().size();
}
public synchronized int getTaskCount() {
int taskCount = 0;
for (String taskRoleName : taskStatuseses.keySet()) {
taskCount += getTaskCount(taskRoleName);
}
return taskCount;
}
public synchronized int getTaskCount(Set<TaskState> taskStateSet) {
return getTaskStatus(taskStateSet).size();
}
public synchronized int getStartStateTaskCount() {
return getTaskCount(TaskStateDefinition.START_STATES);
}
public synchronized int getFinalStateTaskCount() {
return getTaskCount(TaskStateDefinition.FINAL_STATES);
}
public synchronized Boolean isAllTaskInFinalState() {
return (getFinalStateTaskCount() == getTaskCount());
}
public synchronized float getApplicationProgress() throws Exception {
float progress = (float) getFinalStateTaskCount() / getTaskCount();
if (progress >= 0) {
return progress;
} else {
throw new Exception(String.format(
"ApplicationProgress %s is not nonnegative", progress));
}
}
/**
* REGION ModifyInterface
* Note to avoid update partially modified Status on ZK
*/
// transitionTaskState is the only interface to modify TaskState for both internal and external
public void transitionTaskState(
TaskStatusLocator locator,
TaskState dstState) throws Exception {
transitionTaskState(locator, dstState, null, ExitStatusKey.NOT_AVAILABLE.toInt(), "", null);
}
public void transitionTaskState(
TaskStatusLocator locator,
TaskState dstState,
Container container) throws Exception {
transitionTaskState(locator, dstState, container, ExitStatusKey.NOT_AVAILABLE.toInt(), "", null);
}
public void transitionTaskState(
TaskStatusLocator locator,
TaskState dstState,
Container container,
int containerExitCode,
String containerExitDiagnostics) throws Exception {
transitionTaskState(locator, dstState, container, containerExitCode, containerExitDiagnostics, null);
}
public synchronized void transitionTaskState(
TaskStatusLocator locator,
TaskState dstState,
Container container,
int containerExitCode,
String containerExitDiagnostics,
RetryPolicyState newRetryPolicyState) throws Exception {
TaskStatus taskStatus = getTaskStatus(locator);
TaskState srcState = taskStatus.getTaskState();
// State transition function between each TaskState
// Attempt to transition
if (srcState == dstState) {
return;
}
assert (!TaskStateDefinition.FINAL_STATES.contains(srcState));
if (!TaskStateDefinition.CONTAINER_ASSOCIATED_STATES.contains(srcState) &&
TaskStateDefinition.CONTAINER_ASSOCIATED_STATES.contains(dstState)) {
assert (container != null);
String containerId = container.getId().toString();
try {
associateTaskWithContainer(locator, container);
LOGGER.logInfo("Associated Task %s with Container %s", locator, containerId);
} catch (Exception e) {
disassociateTaskWithContainer(locator);
// Mark as unchanged
taskStatusesesChanged.put(locator.getTaskRoleName(), false);
throw new Exception(
String.format("Failed to associate Container %s to Task %s",
containerId, locator), e);
}
}
if (!TaskStateDefinition.CONTAINER_LIVE_ASSOCIATED_STATES.contains(srcState) &&
TaskStateDefinition.CONTAINER_LIVE_ASSOCIATED_STATES.contains(dstState)) {
updateExtensionTaskStatusWithContainerLiveness(locator, true);
}
if (TaskStateDefinition.CONTAINER_LIVE_ASSOCIATED_STATES.contains(srcState) &&
!TaskStateDefinition.CONTAINER_LIVE_ASSOCIATED_STATES.contains(dstState)) {
updateExtensionTaskStatusWithContainerLiveness(locator, false);
}
if (TaskStateDefinition.CONTAINER_ASSOCIATED_STATES.contains(srcState) &&
!TaskStateDefinition.CONTAINER_ASSOCIATED_STATES.contains(dstState)) {
disassociateTaskWithContainer(locator);
}
if (dstState == TaskState.CONTAINER_COMPLETED) {
assert (containerExitCode != ExitStatusKey.NOT_AVAILABLE.toInt());
taskStatus.setContainerExitCode(containerExitCode);
taskStatus.setContainerExitDiagnostics(containerExitDiagnostics);
taskStatus.setContainerExitType(DiagnosticsUtils.lookupExitType(
containerExitCode, containerExitDiagnostics));
}
// Task will be Retried
if (srcState == TaskState.CONTAINER_COMPLETED && dstState == TaskState.TASK_WAITING) {
// Ensure transitionTaskState and RetryPolicyState is Transactional
assert (newRetryPolicyState != null);
taskStatus.setTaskRetryPolicyState(newRetryPolicyState);
}
// Record Timestamps
Long currentTimestamp = System.currentTimeMillis();
if (dstState == TaskState.TASK_COMPLETED) {
taskStatus.setTaskCompletedTimestamp(currentTimestamp);
} else if (dstState == TaskState.CONTAINER_LAUNCHED) {
taskStatus.setContainerLaunchedTimestamp(currentTimestamp);
} else if (dstState == TaskState.CONTAINER_COMPLETED) {
taskStatus.setContainerCompletedTimestamp(currentTimestamp);
}
// Start Transition
taskStateLocators.get(srcState).remove(locator);
taskStateLocators.get(dstState).add(locator);
taskStatus.setTaskState(dstState);
// Mark as changed
taskStatusesesChanged.put(locator.getTaskRoleName(), true);
LOGGER.logInfo("Transitioned Task %s from [%s] to [%s]", locator, srcState, dstState);
}
public synchronized void updateTaskNumbers(Map<String, Integer> newTaskNumbers) {
for (Map.Entry<String, Integer> newTaskNumberKV : newTaskNumbers.entrySet()) {
String newTaskRoleName = newTaskNumberKV.getKey();
int newTaskNumber = newTaskNumberKV.getValue();
// Setup TaskRole
if (!taskRoleStatuses.containsKey(newTaskRoleName)) {
TaskRoleStatus taskRoleStatus = new TaskRoleStatus();
taskRoleStatus.setTaskRoleName(newTaskRoleName);
taskRoleStatus.setTaskRoleRolloutStatus(new TaskRoleRolloutStatus());
taskRoleStatus.setFrameworkVersion(conf.getFrameworkVersion());
taskRoleStatuses.put(newTaskRoleName, taskRoleStatus);
taskRoleStatusesChanged.put(newTaskRoleName, true);
}
if (!taskStatuseses.containsKey(newTaskRoleName)) {
TaskStatuses taskStatuses = new TaskStatuses();
taskStatuses.setTaskRoleName(newTaskRoleName);
taskStatuses.setTaskStatusArray(new ArrayList<>());
taskStatuses.setFrameworkVersion(conf.getFrameworkVersion());
taskStatuseses.put(newTaskRoleName, taskStatuses);
taskStatusesesChanged.put(newTaskRoleName, true);
}
// Update TaskStatus
Integer curTaskNumber = taskStatuseses.get(newTaskRoleName).getTaskStatusArray().size();
if (newTaskNumber < curTaskNumber) {
decreaseTaskNumber(newTaskRoleName, newTaskNumber);
} else if (newTaskNumber > curTaskNumber) {
increaseTaskNumber(newTaskRoleName, newTaskNumber);
}
}
}
public synchronized void resetContainerConnectionLostCount(String containerId) {
setContainerConnectionLostCount(containerId, 0);
}
public synchronized void resetContainerConnectionLostCount() {
for (String containerId : getLiveAssociatedContainerIds()) {
resetContainerConnectionLostCount(containerId);
}
}
public synchronized void increaseContainerConnectionLostCount(String containerId) {
TaskStatus taskStatus = getTaskStatusWithLiveAssociatedContainerId(containerId);
setContainerConnectionLostCount(containerId, taskStatus.getContainerConnectionLostCount() + 1);
}
}

Просмотреть файл

@ -0,0 +1,66 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.applicationmaster;
import com.microsoft.frameworklauncher.common.model.TaskState;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
public class TaskStateDefinition {
public static final Set<TaskState> START_STATES = Collections.unmodifiableSet(
new HashSet<>(Arrays.asList(
TaskState.TASK_WAITING
)));
public static final Set<TaskState> FINAL_STATES = Collections.unmodifiableSet(
new HashSet<>(Arrays.asList(
TaskState.TASK_COMPLETED
)));
public static final Set<TaskState> CONTAINER_LIVE_ASSOCIATED_STATES = Collections.unmodifiableSet(
new HashSet<>(Arrays.asList(
TaskState.CONTAINER_ALLOCATED,
TaskState.CONTAINER_LAUNCHED,
TaskState.CONTAINER_RUNNING
)));
public static final Set<TaskState> CONTAINER_ASSOCIATED_STATES = Collections.unmodifiableSet(
new HashSet<>(Arrays.asList(
TaskState.CONTAINER_ALLOCATED,
TaskState.CONTAINER_LAUNCHED,
TaskState.CONTAINER_RUNNING,
TaskState.CONTAINER_COMPLETED,
TaskState.TASK_COMPLETED
)));
public static final Set<TaskState> STATE_CORRUPTED_AFTER_RESTART_STATES = Collections.unmodifiableSet(
new HashSet<>(Arrays.asList(
TaskState.CONTAINER_REQUESTED,
TaskState.CONTAINER_ALLOCATED,
TaskState.CONTAINER_LAUNCHED
)));
public static final Set<TaskState> QUEUE_CORRUPTED_AFTER_RESTART_STATES = Collections.unmodifiableSet(
new HashSet<>(Arrays.asList(
TaskState.TASK_WAITING,
TaskState.CONTAINER_COMPLETED
)));
}

Просмотреть файл

@ -0,0 +1,67 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.applicationmaster;
public class TaskStatusLocator implements Comparable<TaskStatusLocator> {
private final String taskRoleName;
private final int taskIndex;
public TaskStatusLocator(String taskRoleName, int taskIndex) {
this.taskRoleName = taskRoleName;
this.taskIndex = taskIndex;
}
public String getTaskRoleName() {
return taskRoleName;
}
public int getTaskIndex() {
return taskIndex;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (!(obj instanceof TaskStatusLocator))
return false;
TaskStatusLocator other = (TaskStatusLocator) obj;
return compareTo(other) == 0;
}
@Override
public int compareTo(TaskStatusLocator other) {
int ret = taskRoleName.compareTo(other.taskRoleName);
if (ret == 0) {
ret = Integer.valueOf(taskIndex).compareTo(other.taskIndex);
}
return ret;
}
@Override
public int hashCode() {
return toString().hashCode();
}
@Override
public String toString() {
return String.format("[%s][%s]", taskRoleName, taskIndex);
}
}

Просмотреть файл

@ -0,0 +1,278 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common;
import com.microsoft.frameworklauncher.common.exceptions.LauncherClientException;
import com.microsoft.frameworklauncher.common.model.*;
import org.apache.http.HttpStatus;
import org.apache.http.entity.ContentType;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.function.Predicate;
/**
* Internal implementation of LauncherClient.
* It is for internal use only, external User should not use it.
*/
public class LauncherClientInternal {
private final WebClient webClient;
private final int maxRetryCount;
private final int retryIntervalSec;
public LauncherClientInternal(String launcherAddress, int maxRetryCount, int retryIntervalSec, LaunchClientType launchClientType) {
this.webClient = new WebClient(launcherAddress, launchClientType.toString());
this.maxRetryCount = maxRetryCount;
this.retryIntervalSec = retryIntervalSec;
}
public RequestedFrameworkNames getFrameworks() throws Exception {
return getFrameworks(null);
}
public RequestedFrameworkNames getFrameworks(LaunchClientType launchClientType) throws Exception {
return executeWithRetry(() -> {
Map<String, String> parameters = null;
if (launchClientType != null) {
parameters = new HashMap<>();
parameters.put(WebCommon.LAUNCH_CLIENT_TYPE_REQUEST_HEADER, launchClientType.toString());
}
return webClient.get(WebStructure.FRAMEWORK_ROOT_PATH, parameters);
}, RequestedFrameworkNames.class);
}
public void putFramework(String frameworkName, String frameworkDescriptor) throws Exception {
putFramework(frameworkName, WebCommon.toObject(frameworkDescriptor, FrameworkDescriptor.class));
}
public void putFramework(String frameworkName, FrameworkDescriptor frameworkDescriptor) throws Exception {
executeWithRetry(() -> {
ModelValidation.validate(frameworkName);
ModelValidation.validate(frameworkDescriptor);
return webClient.put(
WebStructure.getFrameworkPath(frameworkName),
ContentType.APPLICATION_JSON,
WebCommon.toJson(frameworkDescriptor));
});
}
public void putTaskNumber(String frameworkName, String taskRoleName, UpdateTaskNumberRequest updateTaskNumberRequest) throws Exception {
executeWithRetry(() -> {
ModelValidation.validate(frameworkName);
ModelValidation.validate(taskRoleName);
ModelValidation.validate(updateTaskNumberRequest);
return webClient.put(
WebStructure.getTaskNumberPath(frameworkName, taskRoleName),
ContentType.APPLICATION_JSON,
WebCommon.toJson(updateTaskNumberRequest));
});
}
public void putMigrateTask(String frameworkName, String containerId, MigrateTaskRequest migrateTaskRequest) throws Exception {
executeWithRetry(() -> {
ModelValidation.validate(frameworkName);
ModelValidation.validate(migrateTaskRequest);
return webClient.put(
WebStructure.getMigrateTaskPath(frameworkName, containerId),
ContentType.APPLICATION_JSON,
WebCommon.toJson(migrateTaskRequest));
});
}
public void putApplicationProgress(String frameworkName, OverrideApplicationProgressRequest overrideApplicationProgressRequest) throws Exception {
executeWithRetry(() -> {
ModelValidation.validate(frameworkName);
ModelValidation.validate(overrideApplicationProgressRequest);
return webClient.put(
WebStructure.getApplicationProgressPath(frameworkName),
ContentType.APPLICATION_JSON,
WebCommon.toJson(overrideApplicationProgressRequest));
});
}
public void deleteFramework(String frameworkName) throws Exception {
executeWithRetry(() -> {
return webClient.delete(WebStructure.getFrameworkPath(frameworkName));
});
}
public void deleteMigrateTask(String frameworkName, String containerId) throws Exception {
executeWithRetry(() -> webClient.delete(WebStructure.getMigrateTaskPath(frameworkName, containerId)));
}
public AggregatedFrameworkStatus getAggregatedFrameworkStatus(String frameworkName) throws Exception {
return executeWithRetry(() -> {
return webClient.get(WebStructure.getAggregatedFrameworkStatusPath(frameworkName));
}, AggregatedFrameworkStatus.class, (output) -> {
return shouldRetryGetStatus(output, frameworkName);
});
}
public FrameworkStatus getFrameworkStatus(String frameworkName) throws Exception {
return executeWithRetry(() -> {
return webClient.get(WebStructure.getFrameworkStatusPath(frameworkName));
}, FrameworkStatus.class, (output) -> {
return shouldRetryGetStatus(output, frameworkName);
});
}
public TaskRoleStatus getTaskRoleStatus(String frameworkName, String taskRoleName) throws Exception {
return executeWithRetry(() -> {
return webClient.get(WebStructure.getTaskRoleStatusPath(frameworkName, taskRoleName));
}, TaskRoleStatus.class, (output) -> {
return shouldRetryGetStatus(output, frameworkName);
});
}
public TaskStatuses getTaskStatuses(String frameworkName, String taskRoleName) throws Exception {
return executeWithRetry(() -> {
return webClient.get(WebStructure.getTaskStatusesPath(frameworkName, taskRoleName));
}, TaskStatuses.class, (output) -> {
return shouldRetryGetStatus(output, frameworkName);
});
}
public AggregatedFrameworkRequest getAggregatedFrameworkRequest(String frameworkName) throws Exception {
return executeWithRetry(() -> {
return webClient.get(WebStructure.getAggregatedFrameworkRequestPath(frameworkName));
}, AggregatedFrameworkRequest.class);
}
public FrameworkRequest getFrameworkRequest(String frameworkName) throws Exception {
return executeWithRetry(() -> {
return webClient.get(WebStructure.getFrameworkRequestPath(frameworkName));
}, FrameworkRequest.class);
}
public LauncherStatus getLauncherStatus() throws Exception {
return executeWithRetry(() -> {
return webClient.get(WebStructure.LAUNCHER_STATUS_PATH);
}, LauncherStatus.class);
}
public LauncherRequest getLauncherRequest() throws Exception {
return executeWithRetry(() -> {
return webClient.get(WebStructure.LAUNCHER_REQUEST_PATH);
}, LauncherRequest.class);
}
public void putDataDeploymentVersion(UpdateDataDeploymentVersionRequest updateDataDeploymentVersionRequest) throws Exception {
executeWithRetry(() -> {
ModelValidation.validate(updateDataDeploymentVersionRequest);
return webClient.put(
WebStructure.DATA_DEPLOYMENT_VERSION_PATH,
ContentType.APPLICATION_JSON,
WebCommon.toJson(updateDataDeploymentVersionRequest));
});
}
private Boolean shouldRetryCommon(WebClientOutput output) {
if (output.getStatusCode() == HttpStatus.SC_REQUEST_TIMEOUT ||
output.getStatusCode() == HttpStatus.SC_SERVICE_UNAVAILABLE ||
output.getStatusCode() == WebCommon.SC_TOO_MANY_REQUESTS) {
// Must be Transient Failure
return true;
} else if (output.getStatusCode() == HttpStatus.SC_BAD_REQUEST) {
// Must be NON_TRANSIENT Failure
return false;
} else {
// UNKNOWN Failure
return null;
}
}
private boolean shouldRetryGetStatus(WebClientOutput output, String frameworkName) {
if (output.getStatusCode() == HttpStatus.SC_NOT_FOUND) {
// Specified Framework's Status does not exist.
// This may due to specified Framework is not Requested or
// the Framework Requested but the Status has not been initialized by backend.
// So, the Client is expected to retry for the latter case.
try {
getFrameworkRequest(frameworkName);
return true;
} catch (Exception e) {
return false;
}
} else {
// At last, consider all UNKNOWN Failure as NON_TRANSIENT
return false;
}
}
private void executeWithRetry(Callable<WebClientOutput> action) throws Exception {
executeWithRetry(action, null);
}
private <T> T executeWithRetry(Callable<WebClientOutput> action, Class<T> classRef) throws Exception {
return executeWithRetry(action, classRef, null);
}
private <T> T executeWithRetry(Callable<WebClientOutput> action, Class<T> classRef, Predicate<WebClientOutput> shouldRetrySupplement) throws Exception {
int retriedCount = 0;
while (true) {
String msg = String.format(
"Retry [%s / %s -> %ss]: Failed Finally, check LauncherClientException for more details.",
retriedCount, maxRetryCount, retryIntervalSec);
WebClientOutput output;
try {
output = action.call();
} catch (Exception e) {
output = new WebClientOutput(HttpStatus.SC_BAD_REQUEST, e.toString(), false, e);
}
if (output.isSuccessStatusCode()) {
if (classRef == null) {
return null;
}
try {
return WebCommon.toObject(output.getContent(), classRef);
} catch (Exception e) {
// This can only happen when Client use an incompatible model with Server
output = new WebClientOutput(HttpStatus.SC_BAD_REQUEST, output.getContent(), false, e);
}
}
Boolean shouldRetryCommonResult = shouldRetryCommon(output);
Boolean shouldRetryFinalResult;
if (shouldRetryCommonResult != null) {
shouldRetryFinalResult = shouldRetryCommonResult;
} else {
if (shouldRetrySupplement != null && shouldRetrySupplement.test(output)) {
shouldRetryFinalResult = true;
} else {
// At last, consider all UNKNOWN Failure as NON_TRANSIENT
shouldRetryFinalResult = false;
}
}
if (!shouldRetryFinalResult) {
throw new LauncherClientException(msg, output, false);
} else if ((maxRetryCount != -1 && retriedCount >= maxRetryCount)) {
throw new LauncherClientException(msg, output, true);
} else {
if (retryIntervalSec > 0) {
Thread.sleep(retryIntervalSec * 1000);
}
retriedCount++;
}
}
}
}

Просмотреть файл

@ -0,0 +1,53 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common;
import com.microsoft.frameworklauncher.common.exceptions.BadRequestException;
import javax.validation.ConstraintViolation;
import javax.validation.ConstraintViolationException;
import javax.validation.Validation;
import javax.validation.Validator;
import java.util.Set;
import java.util.regex.Pattern;
public class ModelValidation {
public static final String NAMING_CONVENTION_REGEX_STR = "^[a-zA-Z0-9._\\-()]+$";
public static final Pattern NAMING_CONVENTION_REGEX = Pattern.compile(NAMING_CONVENTION_REGEX_STR);
private static final Validator VALIDATOR = Validation.buildDefaultValidatorFactory().getValidator();
public static <T> void validate(T o) throws BadRequestException {
if (o == null) {
throw new BadRequestException("Object is null");
}
Set<ConstraintViolation<T>> violations = VALIDATOR.validate(o);
if (!violations.isEmpty()) {
throw new BadRequestException(new ConstraintViolationException(violations));
}
}
public static void validate(String s) throws BadRequestException {
if (s == null) {
throw new BadRequestException("Object is null");
} else if (!NAMING_CONVENTION_REGEX.matcher(s).matches()) {
throw new BadRequestException(String.format(
"Name [%s] is not matched with naming convention regex [%s]",
s, NAMING_CONVENTION_REGEX));
}
}
}

Просмотреть файл

@ -0,0 +1,92 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.client.methods.HttpDelete;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPut;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicHeader;
import java.net.SocketException;
import java.util.Arrays;
import java.util.Map;
import java.util.concurrent.Callable;
class WebClient {
// Each instance of the HttpClient will create a new socket and hold a connection open for a specific interval.
// To avoid socket exhaustion problem, HttpClient instance need to be shared to use.
private CloseableHttpClient httpClient;
private String baseURI;
public WebClient(String baseURI, String launchClientType) {
this.baseURI = baseURI;
Header header = new BasicHeader(WebCommon.LAUNCH_CLIENT_TYPE_REQUEST_HEADER, launchClientType);
this.httpClient = HttpClients.custom().setDefaultHeaders(Arrays.asList(header)).build();
}
public WebClientOutput put(String relativeURI, ContentType contentType, String body) {
HttpPut request = new HttpPut(WebCommon.getURI(baseURI, relativeURI));
request.setEntity(new StringEntity(body, contentType));
return execute(() -> httpClient.execute(request));
}
public WebClientOutput delete(String relativeURI) {
HttpDelete request = new HttpDelete(WebCommon.getURI(baseURI, relativeURI));
return execute(() -> httpClient.execute(request));
}
public WebClientOutput get(String relativeURI) {
return get(relativeURI, null);
}
public WebClientOutput get(String relativeURI, Map<String, String> parameters) {
HttpGet request = new HttpGet(WebCommon.getURI(baseURI, relativeURI, parameters));
return execute(() -> httpClient.execute(request));
}
private static WebClientOutput execute(Callable<HttpResponse> action) {
try {
HttpResponse response = action.call();
String content = new BasicResponseHandler().handleResponse(response);
int statusCode = response.getStatusLine().getStatusCode();
return new WebClientOutput(statusCode, content, statusCode >= 200 && statusCode <= 299);
} catch (Exception e) {
if (isNetworkError(e)) {
return new WebClientOutput(HttpStatus.SC_REQUEST_TIMEOUT, e.toString(), false, e);
} else {
return new WebClientOutput(HttpStatus.SC_BAD_REQUEST, e.toString(), false, e);
}
}
}
private static Boolean isNetworkError(Throwable e) {
if (e instanceof SocketException)
return true;
if (e.getCause() != null)
return isNetworkError(e.getCause());
return false;
}
}

Просмотреть файл

@ -0,0 +1,63 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common;
public class WebClientOutput {
private final int statusCode;
private final String content;
private final Boolean isSuccessStatusCode;
private final Exception clientSideException;
public WebClientOutput(int statusCode, String content, Boolean isSuccessStatusCode) {
this(statusCode, content, isSuccessStatusCode, null);
}
public WebClientOutput(int statusCode, String content, Boolean isSuccessStatusCode, Exception clientSideException) {
this.statusCode = statusCode;
this.content = content;
this.isSuccessStatusCode = isSuccessStatusCode;
this.clientSideException = clientSideException;
}
public int getStatusCode() {
return statusCode;
}
public String getContent() {
return content;
}
public Boolean isSuccessStatusCode() {
return isSuccessStatusCode;
}
/**
* If clientSideException is not null, it means the Exception occurred
* in the Client side during Client request to Server, includes:
* Serialization, Deserialization, Validation, HttpClient Exceptions.
*/
public Exception getClientSideException() {
return clientSideException;
}
public String toString() {
return String.format(
"HttpStatusCode: %2$s%1$sContent: %3$s%1$sIsSuccessStatusCode: %4$s%1$sClientSideException: %5$s",
"\n", statusCode, content, isSuccessStatusCode, clientSideException);
}
}

Просмотреть файл

@ -0,0 +1,128 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.net.NetUtils;
import org.codehaus.jackson.map.DeserializationConfig;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.map.ObjectWriter;
import java.io.IOException;
import java.util.Map;
public class WebCommon {
public final static String LAUNCH_CLIENT_TYPE_REQUEST_HEADER = "LaunchClientType";
public final static int SC_TOO_MANY_REQUESTS = 429;
public static String getURI(String baseURI, String relativeURI) {
return getURI(baseURI, relativeURI, null);
}
public static String getURI(String baseURI, String relativeURI, Map<String, String> parameters) {
String path = WebStructure.getNodePath(baseURI, relativeURI);
String paramsStr = "";
if (parameters != null) {
for (Map.Entry<String, String> param : parameters.entrySet()) {
if (!paramsStr.trim().isEmpty()) {
paramsStr += "&";
}
paramsStr += (param.getKey().trim() + "=" + param.getValue().trim());
}
}
if (!paramsStr.trim().isEmpty()) {
return path + "?" + paramsStr;
} else {
return path;
}
}
public static String getBindAddress(String bindHost, String address) {
return bindHost.trim() + ":" + NetUtils.createSocketAddr(address).getPort();
}
// Object <-> Json
// obj can be null, but cannot be Exception
public static String toJson(Object obj) throws IOException {
ObjectWriter ow = new ObjectMapper().writer();
return toFormatedJson(ow.writeValueAsString(obj));
}
// json can be "null"
public static <T> T toObject(String json, Class<T> targetType) throws Exception {
ObjectMapper om = new ObjectMapper();
om.configure(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false);
return om.readValue(json, targetType);
}
// Internal utils
private static String toFormatedJson(String json) {
final String indentStr = " ";
int indent = 0;
Boolean quoted = false;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < json.length(); i++) {
char ch = json.charAt(i);
switch (ch) {
case '{':
case '[':
sb.append(ch);
if (!quoted) {
sb.append("\n");
sb.append(StringUtils.repeat(indentStr, ++indent));
}
break;
case '}':
case ']':
if (!quoted) {
sb.append("\n");
sb.append(StringUtils.repeat(indentStr, --indent));
}
sb.append(ch);
break;
case '"':
sb.append(ch);
Boolean escaped = false;
int index = i;
while (index > 0 && json.charAt(--index) == '\\')
escaped = !escaped;
if (!escaped)
quoted = !quoted;
break;
case ',':
sb.append(ch);
if (!quoted) {
sb.append("\n");
sb.append(StringUtils.repeat(indentStr, indent));
}
break;
case ':':
sb.append(ch);
if (!quoted)
sb.append(" ");
break;
default:
sb.append(ch);
break;
}
}
return sb.toString();
}
}

Просмотреть файл

@ -0,0 +1,116 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common;
import org.apache.commons.lang.StringUtils;
// Define Launcher WebStructure
public class WebStructure {
private static final String PATH_SEPARATOR = "/";
public static final String FRAMEWORK_NAME_PATH_PARAM = "FrameworkName";
public static final String TASK_ROLE_NAME_PATH_PARAM = "TaskRoleName";
public static final String CONTAINER_ID_PATH_PARAM = "ContainerId";
private static final String FRAMEWORK_NAME_PATH_PARAM_PLACEMENT = "{" + FRAMEWORK_NAME_PATH_PARAM + "}";
private static final String TASK_ROLE_NAME_PATH_PARAM_PLACEMENT = "{" + TASK_ROLE_NAME_PATH_PARAM + "}";
private static final String CONTAINER_ID_PATH_PARAM_PLACEMENT = "{" + CONTAINER_ID_PATH_PARAM + "}";
public static final String ROOT_PATH = "/v1";
public static final String LAUNCHER_STATUS_PATH = ROOT_PATH + PATH_SEPARATOR + "LauncherStatus";
public static final String LAUNCHER_REQUEST_PATH = ROOT_PATH + PATH_SEPARATOR + "LauncherRequest";
public static final String DATA_DEPLOYMENT_VERSION_PATH = LAUNCHER_REQUEST_PATH + PATH_SEPARATOR + "DataDeploymentVersion";
public static final String FRAMEWORK_ROOT_PATH = ROOT_PATH + PATH_SEPARATOR + "Frameworks";
public static final String FRAMEWORK_PATH = FRAMEWORK_ROOT_PATH + PATH_SEPARATOR + FRAMEWORK_NAME_PATH_PARAM_PLACEMENT;
public static final String AGGREGATED_FRAMEWORK_STATUS_PATH = FRAMEWORK_PATH + PATH_SEPARATOR + "AggregatedFrameworkStatus";
public static final String FRAMEWORK_STATUS_PATH = FRAMEWORK_PATH + PATH_SEPARATOR + "FrameworkStatus";
public static final String TASK_ROLE_PATH = FRAMEWORK_PATH + PATH_SEPARATOR + "TaskRoles" + PATH_SEPARATOR + TASK_ROLE_NAME_PATH_PARAM_PLACEMENT;
public static final String TASK_ROLE_STATUS_PATH = TASK_ROLE_PATH + PATH_SEPARATOR + "TaskRoleStatus";
public static final String TASK_STATUSES_PATH = TASK_ROLE_PATH + PATH_SEPARATOR + "TaskStatuses";
public static final String TASK_NUMBER_PATH = TASK_ROLE_PATH + PATH_SEPARATOR + "TaskNumber";
public static final String MIGRATE_TASK_PATH = FRAMEWORK_PATH + PATH_SEPARATOR + "MigrateTasks" + PATH_SEPARATOR + CONTAINER_ID_PATH_PARAM_PLACEMENT;
public static final String APPLICATION_PROGRESS_PATH = FRAMEWORK_PATH + PATH_SEPARATOR + "ApplicationProgress";
public static final String AGGREGATED_FRAMEWORK_REQUEST_PATH = FRAMEWORK_PATH + PATH_SEPARATOR + "AggregatedFrameworkRequest";
public static final String FRAMEWORK_REQUEST_PATH = FRAMEWORK_PATH + PATH_SEPARATOR + "FrameworkRequest";
public static String getNodePath(String parentNodePath, String nodeName) {
return (StringUtils.stripEnd(parentNodePath, PATH_SEPARATOR) +
PATH_SEPARATOR +
StringUtils.stripStart(nodeName, PATH_SEPARATOR));
}
public static String getFrameworkPath(String frameworkName) {
return FRAMEWORK_PATH
.replace(FRAMEWORK_NAME_PATH_PARAM_PLACEMENT, frameworkName);
}
public static String getAggregatedFrameworkStatusPath(String frameworkName) {
return AGGREGATED_FRAMEWORK_STATUS_PATH
.replace(FRAMEWORK_NAME_PATH_PARAM_PLACEMENT, frameworkName);
}
public static String getFrameworkStatusPath(String frameworkName) {
return FRAMEWORK_STATUS_PATH
.replace(FRAMEWORK_NAME_PATH_PARAM_PLACEMENT, frameworkName);
}
private static String getTaskRolePath(String frameworkName, String taskRoleName) {
return TASK_ROLE_PATH
.replace(FRAMEWORK_NAME_PATH_PARAM_PLACEMENT, frameworkName)
.replace(TASK_ROLE_NAME_PATH_PARAM_PLACEMENT, taskRoleName);
}
public static String getTaskRoleStatusPath(String frameworkName, String taskRoleName) {
return TASK_ROLE_STATUS_PATH
.replace(FRAMEWORK_NAME_PATH_PARAM_PLACEMENT, frameworkName)
.replace(TASK_ROLE_NAME_PATH_PARAM_PLACEMENT, taskRoleName);
}
public static String getTaskStatusesPath(String frameworkName, String taskRoleName) {
return TASK_STATUSES_PATH
.replace(FRAMEWORK_NAME_PATH_PARAM_PLACEMENT, frameworkName)
.replace(TASK_ROLE_NAME_PATH_PARAM_PLACEMENT, taskRoleName);
}
public static String getTaskNumberPath(String frameworkName, String taskRoleName) {
return TASK_NUMBER_PATH
.replace(FRAMEWORK_NAME_PATH_PARAM_PLACEMENT, frameworkName)
.replace(TASK_ROLE_NAME_PATH_PARAM_PLACEMENT, taskRoleName);
}
public static String getMigrateTaskPath(String frameworkName, String containerId) {
return MIGRATE_TASK_PATH
.replace(FRAMEWORK_NAME_PATH_PARAM_PLACEMENT, frameworkName)
.replace(CONTAINER_ID_PATH_PARAM_PLACEMENT, containerId);
}
public static String getApplicationProgressPath(String frameworkName) {
return APPLICATION_PROGRESS_PATH
.replace(FRAMEWORK_NAME_PATH_PARAM_PLACEMENT, frameworkName);
}
public static String getAggregatedFrameworkRequestPath(String frameworkName) {
return AGGREGATED_FRAMEWORK_REQUEST_PATH
.replace(FRAMEWORK_NAME_PATH_PARAM_PLACEMENT, frameworkName);
}
public static String getFrameworkRequestPath(String frameworkName) {
return FRAMEWORK_REQUEST_PATH
.replace(FRAMEWORK_NAME_PATH_PARAM_PLACEMENT, frameworkName);
}
}

Просмотреть файл

@ -0,0 +1,54 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.exceptions;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class AggregateException extends Exception {
private final ArrayList<Exception> exceptions;
public AggregateException() {
this("One or more exceptions occurred.");
}
public AggregateException(String message) {
super(message);
exceptions = new ArrayList<>();
}
public void addException(Exception e) {
exceptions.add(e);
}
public List<Exception> getExceptions() {
return Collections.unmodifiableList(exceptions);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("{");
for (Exception e : exceptions) {
sb.append(e.toString()).append(", ");
}
sb.append("}");
return "AggregateException[" + sb.toString() + "]";
}
}

Просмотреть файл

@ -0,0 +1,38 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.exceptions;
public class BadRequestException extends NonTransientException {
private static final long serialVersionUID = 1L;
public BadRequestException() {
super();
}
public BadRequestException(String message) {
super(message);
}
public BadRequestException(Throwable cause) {
super(cause);
}
public BadRequestException(String message, Throwable cause) {
super(message, cause);
}
}

Просмотреть файл

@ -0,0 +1,54 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.exceptions;
import com.microsoft.frameworklauncher.common.WebClientOutput;
/**
* All possible Exceptions that may be thrown by LauncherClient
*/
public class LauncherClientException extends Exception {
private final WebClientOutput webClientOutput;
private final Boolean isTransient;
public LauncherClientException(String message, WebClientOutput webClientOutput, Boolean isTransient) {
super(message, webClientOutput.getClientSideException());
this.webClientOutput = webClientOutput;
this.isTransient = isTransient;
}
/**
* Only keep the last WebClientOutput of all retries.
*/
public WebClientOutput getWebClientOutput() {
return webClientOutput;
}
/**
* Whether it is caused by Transient Failures.
*/
public Boolean isTransient() {
return isTransient;
}
public String toString() {
return String.format(
"%2$s%1$s%1$sWebClientOutput:%1$s%3$s%1$s%1$sIsTransient: %4$s",
"\n", super.toString(), webClientOutput, isTransient);
}
}

Просмотреть файл

@ -0,0 +1,42 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.exceptions;
/**
* Exceptions except for TransientException and NonTransientException are
* consider to be UnKnownException
*/
public class NonTransientException extends Exception {
private static final long serialVersionUID = 1L;
public NonTransientException() {
super();
}
public NonTransientException(String message) {
super(message);
}
public NonTransientException(Throwable cause) {
super(cause);
}
public NonTransientException(String message, Throwable cause) {
super(message, cause);
}
}

Просмотреть файл

@ -0,0 +1,38 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.exceptions;
public class NotAvailableException extends Exception {
private static final long serialVersionUID = 1L;
public NotAvailableException() {
super();
}
public NotAvailableException(String message) {
super(message);
}
public NotAvailableException(Throwable cause) {
super(cause);
}
public NotAvailableException(String message, Throwable cause) {
super(message, cause);
}
}

Просмотреть файл

@ -0,0 +1,38 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.exceptions;
public class NotFoundException extends Exception {
private static final long serialVersionUID = 1L;
public NotFoundException() {
super();
}
public NotFoundException(String message) {
super(message);
}
public NotFoundException(Throwable cause) {
super(cause);
}
public NotFoundException(String message, Throwable cause) {
super(message, cause);
}
}

Просмотреть файл

@ -0,0 +1,38 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.exceptions;
public class ThrottledRequestException extends TransientException {
private static final long serialVersionUID = 1L;
public ThrottledRequestException() {
super();
}
public ThrottledRequestException(String message) {
super(message);
}
public ThrottledRequestException(Throwable cause) {
super(cause);
}
public ThrottledRequestException(String message, Throwable cause) {
super(message, cause);
}
}

Просмотреть файл

@ -0,0 +1,42 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.exceptions;
/**
* Exceptions except for TransientException and NonTransientException are
* consider to be UnKnownException
*/
public class TransientException extends Exception {
private static final long serialVersionUID = 1L;
public TransientException() {
super();
}
public TransientException(String message) {
super(message);
}
public TransientException(Throwable cause) {
super(cause);
}
public TransientException(String message, Throwable cause) {
super(message, cause);
}
}

Просмотреть файл

@ -0,0 +1,25 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public enum AMType implements Serializable {
DEFAULT,
AGENT
}

Просмотреть файл

@ -0,0 +1,53 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
import java.util.Map;
public class AggregatedFrameworkRequest implements Serializable {
private FrameworkRequest frameworkRequest;
private OverrideApplicationProgressRequest overrideApplicationProgressRequest;
// ContainerId -> MigrateTaskRequest
private Map<String, MigrateTaskRequest> migrateTaskRequests;
public FrameworkRequest getFrameworkRequest() {
return frameworkRequest;
}
public void setFrameworkRequest(FrameworkRequest frameworkRequest) {
this.frameworkRequest = frameworkRequest;
}
public OverrideApplicationProgressRequest getOverrideApplicationProgressRequest() {
return overrideApplicationProgressRequest;
}
public void setOverrideApplicationProgressRequest(OverrideApplicationProgressRequest overrideApplicationProgressRequest) {
this.overrideApplicationProgressRequest = overrideApplicationProgressRequest;
}
public Map<String, MigrateTaskRequest> getMigrateTaskRequests() {
return migrateTaskRequests;
}
public void setMigrateTaskRequests(Map<String, MigrateTaskRequest> migrateTaskRequests) {
this.migrateTaskRequests = migrateTaskRequests;
}
}

Просмотреть файл

@ -0,0 +1,43 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
import java.util.Map;
public class AggregatedFrameworkStatus implements Serializable {
private FrameworkStatus frameworkStatus;
// TaskRoleName -> AggregatedTaskRoleStatus
private Map<String, AggregatedTaskRoleStatus> aggregatedTaskRoleStatuses;
public FrameworkStatus getFrameworkStatus() {
return frameworkStatus;
}
public void setFrameworkStatus(FrameworkStatus frameworkStatus) {
this.frameworkStatus = frameworkStatus;
}
public Map<String, AggregatedTaskRoleStatus> getAggregatedTaskRoleStatuses() {
return aggregatedTaskRoleStatuses;
}
public void setAggregatedTaskRoleStatuses(Map<String, AggregatedTaskRoleStatus> aggregatedTaskRoleStatuses) {
this.aggregatedTaskRoleStatuses = aggregatedTaskRoleStatuses;
}
}

Просмотреть файл

@ -0,0 +1,43 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
import java.util.Map;
public class AggregatedLauncherRequest implements Serializable {
private LauncherRequest launcherRequest;
// FrameworkName -> AggregatedFrameworkRequest
private Map<String, AggregatedFrameworkRequest> aggregatedFrameworkRequests;
public LauncherRequest getLauncherRequest() {
return launcherRequest;
}
public void setLauncherRequest(LauncherRequest launcherRequest) {
this.launcherRequest = launcherRequest;
}
public Map<String, AggregatedFrameworkRequest> getAggregatedFrameworkRequests() {
return aggregatedFrameworkRequests;
}
public void setAggregatedFrameworkRequests(Map<String, AggregatedFrameworkRequest> aggregatedFrameworkRequests) {
this.aggregatedFrameworkRequests = aggregatedFrameworkRequests;
}
}

Просмотреть файл

@ -0,0 +1,43 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
import java.util.Map;
public class AggregatedLauncherStatus implements Serializable {
private LauncherStatus launcherStatus;
// FrameworkName -> AggregatedFrameworkStatus
private Map<String, AggregatedFrameworkStatus> aggregatedFrameworkStatuses;
public LauncherStatus getLauncherStatus() {
return launcherStatus;
}
public void setLauncherStatus(LauncherStatus launcherStatus) {
this.launcherStatus = launcherStatus;
}
public Map<String, AggregatedFrameworkStatus> getAggregatedFrameworkStatuses() {
return aggregatedFrameworkStatuses;
}
public void setAggregatedFrameworkStatuses(Map<String, AggregatedFrameworkStatus> aggregatedFrameworkStatuses) {
this.aggregatedFrameworkStatuses = aggregatedFrameworkStatuses;
}
}

Просмотреть файл

@ -0,0 +1,41 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public class AggregatedTaskRoleStatus implements Serializable {
private TaskRoleStatus taskRoleStatus;
private TaskStatuses taskStatuses;
public TaskRoleStatus getTaskRoleStatus() {
return taskRoleStatus;
}
public void setTaskRoleStatus(TaskRoleStatus taskRoleStatus) {
this.taskRoleStatus = taskRoleStatus;
}
public TaskStatuses getTaskStatuses() {
return taskStatuses;
}
public void setTaskStatuses(TaskStatuses taskStatuses) {
this.taskStatuses = taskStatuses;
}
}

Просмотреть файл

@ -0,0 +1,28 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public enum AntiAffinityLevel implements Serializable {
ANY,
NODE,
RACK
}

Просмотреть файл

@ -0,0 +1,25 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public enum DataDeploymentVersionType implements Serializable {
LAUNCHING,
LAUNCHED
}

Просмотреть файл

@ -0,0 +1,25 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public enum DiskType implements Serializable {
HDD,
SSD
}

Просмотреть файл

@ -0,0 +1,43 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public enum ExitType implements Serializable {
// Succeeded
SUCCEEDED,
// Failed, and it can ensure that it will success within a finite retry times:
// such as hdfs error, env error, machine error, connection error...
TRANSIENT_NORMAL,
// A special TRANSIENT_NORMAL which indicate the exit due to resource conflict
// and cannot get required resource to run.
TRANSIENT_CONFLICT,
// Failed, and it can ensure that it will fail in every retry times:
// such as incorrect usage, input data corruption...
NON_TRANSIENT,
// Failed, and it cannot offer any retry guarantee.
UNKNOWN,
// ExitType NOT_AVAILABLE
NOT_AVAILABLE
}

Просмотреть файл

@ -0,0 +1,103 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import com.microsoft.frameworklauncher.common.ModelValidation;
import com.microsoft.frameworklauncher.common.exceptions.BadRequestException;
import org.hibernate.validator.constraints.NotEmpty;
import javax.validation.Valid;
import javax.validation.constraints.NotNull;
import java.io.Serializable;
import java.util.Map;
public class FrameworkDescriptor implements Serializable {
@Valid
private String description;
@Valid
@NotNull
// version change will trigger the whole Framework NonRolling Upgrade
private Integer version;
@Valid
private RetryPolicyDescriptor retryPolicy = new RetryPolicyDescriptor();
@Valid
private ParentFrameworkDescriptor parentFramework;
@Valid
@NotEmpty
private Map<String, TaskRoleDescriptor> taskRoles;
@Valid
private PlatformSpecificParametersDescriptor platformSpecificParameters = new PlatformSpecificParametersDescriptor();
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public Integer getVersion() {
return version;
}
public void setVersion(Integer version) {
this.version = version;
}
public RetryPolicyDescriptor getRetryPolicy() {
return retryPolicy;
}
public void setRetryPolicy(RetryPolicyDescriptor retryPolicy) {
this.retryPolicy = retryPolicy;
}
public ParentFrameworkDescriptor getParentFramework() {
return parentFramework;
}
public void setParentFramework(ParentFrameworkDescriptor parentFramework) {
this.parentFramework = parentFramework;
}
public Map<String, TaskRoleDescriptor> getTaskRoles() {
return taskRoles;
}
public void setTaskRoles(Map<String, TaskRoleDescriptor> taskRoles) throws BadRequestException {
for (String taskRoleName : taskRoles.keySet()) {
ModelValidation.validate(taskRoleName);
}
this.taskRoles = taskRoles;
}
public PlatformSpecificParametersDescriptor getPlatformSpecificParameters() {
return platformSpecificParameters;
}
public void setPlatformSpecificParameters(PlatformSpecificParametersDescriptor platformSpecificParameters) {
this.platformSpecificParameters = platformSpecificParameters;
}
}

Просмотреть файл

@ -0,0 +1,68 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public class FrameworkRequest implements Serializable {
private String frameworkName;
private FrameworkDescriptor frameworkDescriptor;
private LaunchClientType launchClientType = LaunchClientType.UNKNOWN;
private String launchClientHostName;
private String launchClientUserName;
public String getFrameworkName() {
return frameworkName;
}
public void setFrameworkName(String frameworkName) {
this.frameworkName = frameworkName;
}
public FrameworkDescriptor getFrameworkDescriptor() {
return frameworkDescriptor;
}
public void setFrameworkDescriptor(FrameworkDescriptor frameworkDescriptor) {
this.frameworkDescriptor = frameworkDescriptor;
}
public LaunchClientType getLaunchClientType() {
return launchClientType;
}
public void setLaunchClientType(LaunchClientType launchClientType) {
this.launchClientType = launchClientType;
}
public String getLaunchClientHostName() {
return launchClientHostName;
}
public void setLaunchClientHostName(String launchClientHostName) {
this.launchClientHostName = launchClientHostName;
}
public String getLaunchClientUserName() {
return launchClientUserName;
}
public void setLaunchClientUserName(String launchClientUserName) {
this.launchClientUserName = launchClientUserName;
}
}

Просмотреть файл

@ -0,0 +1,60 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public enum FrameworkState implements Serializable {
// Framework Waiting to Create Application
// [START_STATES]
// createApplication -> APPLICATION_CREATED
FRAMEWORK_WAITING,
// Framework's current associated Application Created
// launchApplication -> APPLICATION_LAUNCHED
// launchApplication -> APPLICATION_RETRIEVING_DIAGNOSTICS
// recover -> APPLICATION_LAUNCHED
APPLICATION_CREATED,
// Framework's current associated Application Launched
// resyncWithRM -> APPLICATION_WAITING
// resyncWithRM -> APPLICATION_RETRIEVING_DIAGNOSTICS
APPLICATION_LAUNCHED,
// Framework's current associated Application Waiting to Allocated AM Container
// resyncWithRM -> APPLICATION_RUNNING
// resyncWithRM -> APPLICATION_RETRIEVING_DIAGNOSTICS
APPLICATION_WAITING,
// Framework's current associated Application Running
// resyncWithRM -> APPLICATION_RETRIEVING_DIAGNOSTICS
APPLICATION_RUNNING,
// Framework's current associated Application Retrieving Diagnostics
// onDiagnosticsRetrieved -> APPLICATION_COMPLETED
APPLICATION_RETRIEVING_DIAGNOSTICS,
// Framework's current associated Application Completed
// attemptToRetry -> FRAMEWORK_WAITING
// attemptToRetry -> FRAMEWORK_COMPLETED
APPLICATION_COMPLETED,
// Framework Completed, possibly with Application retries
// [FINAL_STATES]
FRAMEWORK_COMPLETED
}

Просмотреть файл

@ -0,0 +1,156 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public class FrameworkStatus implements Serializable {
// Framework static status from FrameworkRequest
// Note other status can be retrieved from FrameworkRequest
private String frameworkName;
private Integer frameworkVersion;
// Framework dynamic status
private FrameworkState frameworkState = FrameworkState.FRAMEWORK_WAITING;
private RetryPolicyState frameworkRetryPolicyState;
private Long frameworkCreatedTimestamp;
private Long frameworkCompletedTimestamp;
// Framework's current associated Application status
// Note other status can be retrieved from RM
private String applicationId;
private Float applicationProgress;
private String applicationTrackingUrl;
private Long applicationLaunchedTimestamp;
private Long applicationCompletedTimestamp;
private Integer applicationExitCode;
private String applicationExitDiagnostics;
private ExitType applicationExitType = ExitType.NOT_AVAILABLE;
public String getFrameworkName() {
return frameworkName;
}
public void setFrameworkName(String frameworkName) {
this.frameworkName = frameworkName;
}
public Integer getFrameworkVersion() {
return frameworkVersion;
}
public void setFrameworkVersion(Integer frameworkVersion) {
this.frameworkVersion = frameworkVersion;
}
public FrameworkState getFrameworkState() {
return frameworkState;
}
public void setFrameworkState(FrameworkState frameworkState) {
this.frameworkState = frameworkState;
}
public RetryPolicyState getFrameworkRetryPolicyState() {
return frameworkRetryPolicyState;
}
public void setFrameworkRetryPolicyState(RetryPolicyState frameworkRetryPolicyState) {
this.frameworkRetryPolicyState = frameworkRetryPolicyState;
}
public Long getFrameworkCreatedTimestamp() {
return frameworkCreatedTimestamp;
}
public void setFrameworkCreatedTimestamp(Long frameworkCreatedTimestamp) {
this.frameworkCreatedTimestamp = frameworkCreatedTimestamp;
}
public Long getFrameworkCompletedTimestamp() {
return frameworkCompletedTimestamp;
}
public void setFrameworkCompletedTimestamp(Long frameworkCompletedTimestamp) {
this.frameworkCompletedTimestamp = frameworkCompletedTimestamp;
}
public String getApplicationId() {
return applicationId;
}
public void setApplicationId(String applicationId) {
this.applicationId = applicationId;
}
public Float getApplicationProgress() {
return applicationProgress;
}
public void setApplicationProgress(Float applicationProgress) {
this.applicationProgress = applicationProgress;
}
public String getApplicationTrackingUrl() {
return applicationTrackingUrl;
}
public void setApplicationTrackingUrl(String applicationTrackingUrl) {
this.applicationTrackingUrl = applicationTrackingUrl;
}
public Long getApplicationLaunchedTimestamp() {
return applicationLaunchedTimestamp;
}
public void setApplicationLaunchedTimestamp(Long applicationLaunchedTimestamp) {
this.applicationLaunchedTimestamp = applicationLaunchedTimestamp;
}
public Long getApplicationCompletedTimestamp() {
return applicationCompletedTimestamp;
}
public void setApplicationCompletedTimestamp(Long applicationCompletedTimestamp) {
this.applicationCompletedTimestamp = applicationCompletedTimestamp;
}
public Integer getApplicationExitCode() {
return applicationExitCode;
}
public void setApplicationExitCode(Integer applicationExitCode) {
this.applicationExitCode = applicationExitCode;
}
public String getApplicationExitDiagnostics() {
return applicationExitDiagnostics;
}
public void setApplicationExitDiagnostics(String applicationExitDiagnostics) {
this.applicationExitDiagnostics = applicationExitDiagnostics;
}
public ExitType getApplicationExitType() {
return applicationExitType;
}
public void setApplicationExitType(ExitType applicationExitType) {
this.applicationExitType = applicationExitType;
}
}

Просмотреть файл

@ -0,0 +1,134 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import javax.validation.Valid;
import java.io.Serializable;
public class HealthCheckDescriptor implements Serializable {
@Valid
private HealthCheckType healthCheckType = HealthCheckType.COMMAND;
@Valid
// Command line
// The executable inside the command line must be a system executable (like ping.exe) or
// it is an executable inside the ServiceDescriptor.SourceLocations.
private String entryPoint;
@Valid
private String webUrl;
@Valid
// It is the amount of time to wait until starting health checking
private Integer delaySeconds = 30;
@Valid
// It is the interval between health checks
private Integer intervalSeconds = 30;
@Valid
// It is the amount of time to wait for the health check to complete.
// After this timeout, the health check is aborted and treated as a failure.
private Integer timeoutSeconds = 30;
@Valid
// It is the number of consecutive failures until the user application is killed by the agent
private Integer consecutiveFailures = 10;
@Valid
// It is the amount of time after the user application is launched during which health check failures are ignored.
// Once a health check succeeds for the first time, the grace period does not apply anymore.
// Note that it includes "delaySeconds", i.e., setting "gracePeriodSeconds" < "delaySeconds" has no effect.
private Integer gracePeriodSeconds = 60;
@Valid
// It is the agent exit type in encountering health check failure
private HealthCheckFailureType healthCheckFailureType = HealthCheckFailureType.TRANSIENT_ERROR;
public HealthCheckType getHealthCheckType() {
return healthCheckType;
}
public void setHealthCheckType(HealthCheckType healthCheckType) {
this.healthCheckType = healthCheckType;
}
public String getEntryPoint() {
return entryPoint;
}
public void setEntryPoint(String entryPoint) {
this.entryPoint = entryPoint;
}
public String getWebUrl() {
return webUrl;
}
public void setWebUrl(String webUrl) {
this.webUrl = webUrl;
}
public Integer getDelaySeconds() {
return delaySeconds;
}
public void setDelaySeconds(Integer delaySeconds) {
this.delaySeconds = delaySeconds;
}
public Integer getIntervalSeconds() {
return intervalSeconds;
}
public void setIntervalSeconds(Integer intervalSeconds) {
this.intervalSeconds = intervalSeconds;
}
public Integer getTimeoutSeconds() {
return timeoutSeconds;
}
public void setTimeoutSeconds(Integer timeoutSeconds) {
this.timeoutSeconds = timeoutSeconds;
}
public Integer getConsecutiveFailures() {
return consecutiveFailures;
}
public void setConsecutiveFailures(Integer consecutiveFailures) {
this.consecutiveFailures = consecutiveFailures;
}
public Integer getGracePeriodSeconds() {
return gracePeriodSeconds;
}
public void setGracePeriodSeconds(Integer gracePeriodSeconds) {
this.gracePeriodSeconds = gracePeriodSeconds;
}
public HealthCheckFailureType getHealthCheckFailureType() {
return healthCheckFailureType;
}
public void setHealthCheckFailureType(HealthCheckFailureType healthCheckFailureType) {
this.healthCheckFailureType = healthCheckFailureType;
}
}

Просмотреть файл

@ -0,0 +1,25 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public enum HealthCheckFailureType implements Serializable {
NON_TRANSIENT_ERROR,
TRANSIENT_ERROR
}

Просмотреть файл

@ -0,0 +1,25 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public enum HealthCheckType implements Serializable {
COMMAND,
WEB
}

Просмотреть файл

@ -0,0 +1,29 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public enum LaunchClientType implements Serializable {
UNKNOWN,
LIB,
DATA_DEPLOYMENT,
APPLICATION_MASTER,
CLI,
WEB_UI
}

Просмотреть файл

@ -0,0 +1,303 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import javax.validation.constraints.Pattern;
import java.io.Serializable;
public class LauncherConfiguration implements Serializable {
// Common Setup
private String zkConnectString = "127.0.0.1:2181";
private String zkRootDir = "/Launcher";
private String hdfsRootDir = "/Launcher";
// Service Setup
private Integer serviceRMResyncIntervalSec = 60;
private Integer serviceRequestPullIntervalSec = 30;
// Application Setup
private Integer applicationRetrieveDiagnosticsIntervalSec = 60;
private Integer applicationRetrieveDiagnosticsMaxRetryCount = 15;
private Integer applicationTransientConflictMinDelaySec = 600;
private Integer applicationTransientConflictMaxDelaySec = 3600;
private Integer applicationSetupContextMaxRetryCount = 3;
private Integer applicationSetupContextRetryIntervalSec = 1;
// Framework Setup
// Completed Frameworks will ONLY be retained in recent FrameworkCompletedRetainSec,
// in case Client miss to delete the Framework after FRAMEWORK_COMPLETED.
// One exclusion is the Framework Launched by DataDeployment, it will be retained until
// the corresponding FrameworkDescriptionFile deleted in the DataDeployment.
// To avoid missing the CompletedFrameworkStatus, the polling interval seconds of Client
// should be less than FrameworkCompletedRetainSec.
private Integer frameworkCompletedRetainSec = 43200;
// Leftover Frameworks has some external resource, such as HDFS, need to be GC when Service start.
// FrameworkLeftoverGCMaxCount limit the max Framework count to be GC at each time, so that the
// Service start time is also limited.
private Integer frameworkLeftoverGCMaxCount = 1000;
// Zookeeper is seriously degraded if its data size is larger than 1GB.
// Here, we limit the Total TaskNumber to 500K, such that the Zookeeper data size used by Launcher is also limited to 100MB = 500K * 200 bytes/task.
private Integer maxTotalTaskNumber = 500000;
// ApplicationMaster Setup
private Integer amVersion = 0;
//AM Default Resource which can support max to 10000 total Tasks in one Framework
private ResourceDescriptor amDefaultResource = ResourceDescriptor.newInstance(4096, 1, 0, 0L);
private Integer amPriority = 1;
// Just in case AM cannot be gracefully Stopped and RM cannot judge its exit as transient,
// such as AM process interrupted by external system, AM exit by FailFast, etc.
private Integer amAttemptMaxCount = 3;
private Integer amAttemptFailuresValidityIntervalSec = 10;
// ApplicationMaster Internal Setup which should not be exposed to User
private Integer amRmHeartbeatIntervalSec = 3;
// The RMResync count in one NM expiry time.
// RMResync can detect and process under-allocated(RMResyncLost), and over-allocated(RMResyncExceed)
// Containers actively, instead of waiting the RM call back passively.
// This feature can provide eventual consistency between AM and RM.
private Integer amRmResyncFrequency = 6;
private Integer amRequestPullIntervalSec = 30;
private Integer amStatusPushIntervalSec = 30;
// AMRMClient Max Continuous Failure Count before Shutdown AM.
// This can tolerate transient Exceptions, such as
// RM InterruptedException (RM is shutting down during AM RM heartbeat).
private Integer amRmClientMaxFailureCount = 3;
// WebServer Setup
private String webServerBindHost = "0.0.0.0";
@Pattern(regexp = "https?://[^:^/]+:\\d+")
private String webServerAddress = "http://localhost:9086";
private Integer webServerStatusPullIntervalSec = 30;
public String getZkConnectString() {
return zkConnectString;
}
public void setZkConnectString(String zkConnectString) {
this.zkConnectString = zkConnectString;
}
public String getZkRootDir() {
return zkRootDir;
}
public void setZkRootDir(String zkRootDir) {
this.zkRootDir = zkRootDir;
}
public String getHdfsRootDir() {
return hdfsRootDir;
}
public void setHdfsRootDir(String hdfsRootDir) {
this.hdfsRootDir = hdfsRootDir;
}
public Integer getServiceRMResyncIntervalSec() {
return serviceRMResyncIntervalSec;
}
public void setServiceRMResyncIntervalSec(Integer serviceRMResyncIntervalSec) {
this.serviceRMResyncIntervalSec = serviceRMResyncIntervalSec;
}
public Integer getServiceRequestPullIntervalSec() {
return serviceRequestPullIntervalSec;
}
public void setServiceRequestPullIntervalSec(Integer serviceRequestPullIntervalSec) {
this.serviceRequestPullIntervalSec = serviceRequestPullIntervalSec;
}
public Integer getApplicationRetrieveDiagnosticsIntervalSec() {
return applicationRetrieveDiagnosticsIntervalSec;
}
public void setApplicationRetrieveDiagnosticsIntervalSec(Integer applicationRetrieveDiagnosticsIntervalSec) {
this.applicationRetrieveDiagnosticsIntervalSec = applicationRetrieveDiagnosticsIntervalSec;
}
public Integer getApplicationRetrieveDiagnosticsMaxRetryCount() {
return applicationRetrieveDiagnosticsMaxRetryCount;
}
public void setApplicationRetrieveDiagnosticsMaxRetryCount(Integer applicationRetrieveDiagnosticsMaxRetryCount) {
this.applicationRetrieveDiagnosticsMaxRetryCount = applicationRetrieveDiagnosticsMaxRetryCount;
}
public Integer getApplicationTransientConflictMinDelaySec() {
return applicationTransientConflictMinDelaySec;
}
public void setApplicationTransientConflictMinDelaySec(Integer applicationTransientConflictMinDelaySec) {
this.applicationTransientConflictMinDelaySec = applicationTransientConflictMinDelaySec;
}
public Integer getApplicationTransientConflictMaxDelaySec() {
return applicationTransientConflictMaxDelaySec;
}
public void setApplicationTransientConflictMaxDelaySec(Integer applicationTransientConflictMaxDelaySec) {
this.applicationTransientConflictMaxDelaySec = applicationTransientConflictMaxDelaySec;
}
public Integer getApplicationSetupContextMaxRetryCount() {
return applicationSetupContextMaxRetryCount;
}
public void setApplicationSetupContextMaxRetryCount(Integer applicationSetupContextMaxRetryCount) {
this.applicationSetupContextMaxRetryCount = applicationSetupContextMaxRetryCount;
}
public Integer getApplicationSetupContextRetryIntervalSec() {
return applicationSetupContextRetryIntervalSec;
}
public void setApplicationSetupContextRetryIntervalSec(Integer applicationSetupContextRetryIntervalSec) {
this.applicationSetupContextRetryIntervalSec = applicationSetupContextRetryIntervalSec;
}
public Integer getFrameworkCompletedRetainSec() {
return frameworkCompletedRetainSec;
}
public void setFrameworkCompletedRetainSec(Integer frameworkCompletedRetainSec) {
this.frameworkCompletedRetainSec = frameworkCompletedRetainSec;
}
public Integer getFrameworkLeftoverGCMaxCount() {
return frameworkLeftoverGCMaxCount;
}
public void setFrameworkLeftoverGCMaxCount(Integer frameworkLeftoverGCMaxCount) {
this.frameworkLeftoverGCMaxCount = frameworkLeftoverGCMaxCount;
}
public Integer getMaxTotalTaskNumber() {
return maxTotalTaskNumber;
}
public void setMaxTotalTaskNumber(Integer maxTotalTaskNumber) {
this.maxTotalTaskNumber = maxTotalTaskNumber;
}
public Integer getAmVersion() {
return amVersion;
}
public void setAmVersion(Integer amVersion) {
this.amVersion = amVersion;
}
public ResourceDescriptor getAmDefaultResource() {
return amDefaultResource;
}
public void setAmDefaultResource(ResourceDescriptor amDefaultResource) {
this.amDefaultResource = amDefaultResource;
}
public Integer getAmPriority() {
return amPriority;
}
public void setAmPriority(Integer amPriority) {
this.amPriority = amPriority;
}
public Integer getAmAttemptMaxCount() {
return amAttemptMaxCount;
}
public void setAmAttemptMaxCount(Integer amAttemptMaxCount) {
this.amAttemptMaxCount = amAttemptMaxCount;
}
public Integer getAmAttemptFailuresValidityIntervalSec() {
return amAttemptFailuresValidityIntervalSec;
}
public void setAmAttemptFailuresValidityIntervalSec(Integer amAttemptFailuresValidityIntervalSec) {
this.amAttemptFailuresValidityIntervalSec = amAttemptFailuresValidityIntervalSec;
}
public Integer getAmRmHeartbeatIntervalSec() {
return amRmHeartbeatIntervalSec;
}
public void setAmRmHeartbeatIntervalSec(Integer amRmHeartbeatIntervalSec) {
this.amRmHeartbeatIntervalSec = amRmHeartbeatIntervalSec;
}
public Integer getAmRmResyncFrequency() {
return amRmResyncFrequency;
}
public void setAmRmResyncFrequency(Integer amRmResyncFrequency) {
this.amRmResyncFrequency = amRmResyncFrequency;
}
public Integer getAmRequestPullIntervalSec() {
return amRequestPullIntervalSec;
}
public void setAmRequestPullIntervalSec(Integer amRequestPullIntervalSec) {
this.amRequestPullIntervalSec = amRequestPullIntervalSec;
}
public Integer getAmStatusPushIntervalSec() {
return amStatusPushIntervalSec;
}
public void setAmStatusPushIntervalSec(Integer amStatusPushIntervalSec) {
this.amStatusPushIntervalSec = amStatusPushIntervalSec;
}
public Integer getAmRmClientMaxFailureCount() {
return amRmClientMaxFailureCount;
}
public void setAmRmClientMaxFailureCount(Integer amRmClientMaxFailureCount) {
this.amRmClientMaxFailureCount = amRmClientMaxFailureCount;
}
public String getWebServerBindHost() {
return webServerBindHost;
}
public void setWebServerBindHost(String webServerBindHost) {
this.webServerBindHost = webServerBindHost;
}
public String getWebServerAddress() {
return webServerAddress;
}
public void setWebServerAddress(String webServerAddress) {
this.webServerAddress = webServerAddress;
}
public Integer getWebServerStatusPullIntervalSec() {
return webServerStatusPullIntervalSec;
}
public void setWebServerStatusPullIntervalSec(Integer webServerStatusPullIntervalSec) {
this.webServerStatusPullIntervalSec = webServerStatusPullIntervalSec;
}
}

Просмотреть файл

@ -0,0 +1,41 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public class LauncherRequest implements Serializable {
private String launchingDataDeploymentVersion;
private String launchedDataDeploymentVersion;
public String getLaunchingDataDeploymentVersion() {
return launchingDataDeploymentVersion;
}
public void setLaunchingDataDeploymentVersion(String launchingDataDeploymentVersion) {
this.launchingDataDeploymentVersion = launchingDataDeploymentVersion;
}
public String getLaunchedDataDeploymentVersion() {
return launchedDataDeploymentVersion;
}
public void setLaunchedDataDeploymentVersion(String launchedDataDeploymentVersion) {
this.launchedDataDeploymentVersion = launchedDataDeploymentVersion;
}
}

Просмотреть файл

@ -0,0 +1,32 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public class LauncherStatus implements Serializable {
private LauncherConfiguration launcherConfiguration;
public LauncherConfiguration getLauncherConfiguration() {
return launcherConfiguration;
}
public void setLauncherConfiguration(LauncherConfiguration launcherConfiguration) {
this.launcherConfiguration = launcherConfiguration;
}
}

Просмотреть файл

@ -0,0 +1,32 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public class MigrateTaskRequest implements Serializable {
private AntiAffinityLevel antiAffinityLevel = AntiAffinityLevel.ANY;
public AntiAffinityLevel getAntiAffinityLevel() {
return antiAffinityLevel;
}
public void setAntiAffinityLevel(AntiAffinityLevel antiAffinityLevel) {
this.antiAffinityLevel = antiAffinityLevel;
}
}

Просмотреть файл

@ -0,0 +1,41 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import javax.validation.Valid;
import javax.validation.constraints.DecimalMax;
import javax.validation.constraints.DecimalMin;
import javax.validation.constraints.NotNull;
import java.io.Serializable;
import java.math.BigDecimal;
public class OverrideApplicationProgressRequest implements Serializable {
@Valid
@NotNull
@DecimalMin("0.0")
@DecimalMax("1.0")
private BigDecimal applicationProgress;
public BigDecimal getApplicationProgress() {
return applicationProgress;
}
public void setApplicationProgress(BigDecimal applicationProgress) {
this.applicationProgress = applicationProgress;
}
}

Просмотреть файл

@ -0,0 +1,51 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import com.microsoft.frameworklauncher.common.ModelValidation;
import org.hibernate.validator.constraints.NotEmpty;
import javax.validation.Valid;
import javax.validation.constraints.Pattern;
import java.io.Serializable;
public class ParentFrameworkDescriptor implements Serializable {
@Valid
@NotEmpty
@Pattern(regexp = ModelValidation.NAMING_CONVENTION_REGEX_STR)
private String parentFrameworkName;
@Valid
private boolean deleteOnParentDeleted = false;
public String getParentFrameworkName() {
return parentFrameworkName;
}
public void setParentFrameworkName(String parentFrameworkName) {
this.parentFrameworkName = parentFrameworkName;
}
public void setDeleteOnParentDeleted(boolean deleteOnParentDeleted) {
this.deleteOnParentDeleted = deleteOnParentDeleted;
}
public boolean isDeleteOnParentDeleted() {
return deleteOnParentDeleted;
}
}

Просмотреть файл

@ -0,0 +1,221 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import javax.validation.Valid;
import java.io.Serializable;
// Computation Platform Specific Parameters
// Currently, only support Yarn Platform Parameters
public class PlatformSpecificParametersDescriptor implements Serializable {
@Valid
// If you want to use the LauncherConfiguration.amDefaultResource, do not set it or set it to null.
private ResourceDescriptor amResource;
@Valid
private String amNodeLabel;
@Valid
private String taskNodeLabel;
@Valid
private String queue = "default";
@Valid
// -1 means unlimit.
// -2 means using default value: LauncherConfiguration.RMResyncFrequency.
private Integer containerConnectionMaxLostCount = -2;
@Valid
// No unlimit option, since exceed Container must be released eventually.
private Integer containerConnectionMaxExceedCount = 2;
@Valid
// If this feature enabled, different Tasks is ensured to run on different nodes.
private Boolean antiaffinityAllocation = false;
@Valid
// If this feature enabled, all Running Tasks will be killed after any TASK_COMPLETED.
private Boolean killAllOnAnyCompleted = false;
@Valid
// If this feature enabled, all Running Tasks will be killed after any TASK_COMPLETED
// which is due to the exit of UserService.
private Boolean killAllOnAnyServiceCompleted = false;
@Valid
// If this feature enabled, AM will wait until all Tasks become ContainerAllocated and
// then Launches them together.
// Besides, a ContainerIpList file will be generated in each Task's current working directory.
// All the Tasks' IPAddresses are recorded consistently in this file, and the assigned current
// Task's IPAddress can be retrieved from its environment variable CONTAINER_IP.
private Boolean generateContainerIpList = false;
@Valid
private AMType amType = AMType.DEFAULT;
@Valid
// The following will take effect only if amType is "AGENT".
// If this feature enabled, Agent will be enabled to send heartbeats to AM.
private Boolean agentUseHeartbeat = false;
@Valid
// The following will take effect only if amType is "AGENT" and AgentUseAgent flag is true.
// Frameworks should not set agentHeartbeatIntervalSec to be smaller than LauncherStatus.AgentAMCheckAgentHearbeatsIntervalSec
private Integer agentHeartbeatIntervalSec = 30;
@Valid
// This is the value when AgentAM does not receive the heartbeats for this interval, the agent is treated as expired.
// It should be a value larger than agentHeartbeatIntervalSec.
private Integer agentExpiryIntervalSec = 180;
@Valid
// If this feature enabled, Agent will be enabled to do health checking for user applications.
private Boolean agentUseHealthCheck = false;
@Valid
private HealthCheckDescriptor taskServiceHealthCheck;
public ResourceDescriptor getAmResource() {
return amResource;
}
public void setAmResource(ResourceDescriptor amResource) {
this.amResource = amResource;
}
public String getAmNodeLabel() {
return amNodeLabel;
}
public void setAmNodeLabel(String amNodeLabel) {
this.amNodeLabel = amNodeLabel;
}
public String getTaskNodeLabel() {
return taskNodeLabel;
}
public void setTaskNodeLabel(String taskNodeLabel) {
this.taskNodeLabel = taskNodeLabel;
}
public String getQueue() {
return queue;
}
public void setQueue(String queue) {
this.queue = queue;
}
public Integer getContainerConnectionMaxLostCount() {
return containerConnectionMaxLostCount;
}
public void setContainerConnectionMaxLostCount(Integer containerConnectionMaxLostCount) {
this.containerConnectionMaxLostCount = containerConnectionMaxLostCount;
}
public Integer getContainerConnectionMaxExceedCount() {
return containerConnectionMaxExceedCount;
}
public void setContainerConnectionMaxExceedCount(Integer containerConnectionMaxExceedCount) {
this.containerConnectionMaxExceedCount = containerConnectionMaxExceedCount;
}
public Boolean getAntiaffinityAllocation() {
return antiaffinityAllocation;
}
public void setAntiaffinityAllocation(Boolean antiaffinityAllocation) {
this.antiaffinityAllocation = antiaffinityAllocation;
}
public Boolean getKillAllOnAnyCompleted() {
return killAllOnAnyCompleted;
}
public void setKillAllOnAnyCompleted(Boolean killAllOnAnyCompleted) {
this.killAllOnAnyCompleted = killAllOnAnyCompleted;
}
public Boolean getKillAllOnAnyServiceCompleted() {
return killAllOnAnyServiceCompleted;
}
public void setKillAllOnAnyServiceCompleted(Boolean killAllOnAnyServiceCompleted) {
this.killAllOnAnyServiceCompleted = killAllOnAnyServiceCompleted;
}
public Boolean getGenerateContainerIpList() {
return generateContainerIpList;
}
public void setGenerateContainerIpList(Boolean generateContainerIpList) {
this.generateContainerIpList = generateContainerIpList;
}
public AMType getAmType() {
return amType;
}
public void setAmType(AMType amType) {
this.amType = amType;
}
public Boolean getAgentUseHeartbeat() {
return agentUseHeartbeat;
}
public void setAgentUseHeartbeat(Boolean agentUseHeartbeat) {
this.agentUseHeartbeat = agentUseHeartbeat;
}
public Integer getAgentHeartbeatIntervalSec() {
return agentHeartbeatIntervalSec;
}
public void setAgentHeartbeatIntervalSec(Integer agentHeartbeatIntervalSec) {
this.agentHeartbeatIntervalSec = agentHeartbeatIntervalSec;
}
public Integer getAgentExpiryIntervalSec() {
return agentExpiryIntervalSec;
}
public void setAgentExpiryIntervalSec(Integer agentExpiryIntervalSec) {
this.agentExpiryIntervalSec = agentExpiryIntervalSec;
}
public Boolean getAgentUseHealthCheck() {
return agentUseHealthCheck;
}
public void setAgentUseHealthCheck(Boolean agentUseHealthCheck) {
this.agentUseHealthCheck = agentUseHealthCheck;
}
public HealthCheckDescriptor getTaskServiceHealthCheck() {
return taskServiceHealthCheck;
}
public void setTaskServiceHealthCheck(HealthCheckDescriptor taskServiceHealthCheck) {
this.taskServiceHealthCheck = taskServiceHealthCheck;
}
}

Просмотреть файл

@ -0,0 +1,49 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import javax.validation.Valid;
import javax.validation.constraints.NotNull;
import java.io.Serializable;
// Represent Integer values in Closed Range [begin, end]
public class Range implements Serializable {
@Valid
@NotNull
private Integer begin;
@Valid
@NotNull
private Integer end;
public Integer getBegin() {
return begin;
}
public void setBegin(Integer begin) {
this.begin = begin;
}
public Integer getEnd() {
return end;
}
public void setEnd(Integer end) {
this.end = end;
}
}

Просмотреть файл

@ -0,0 +1,33 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
import java.util.List;
public class RequestedFrameworkNames implements Serializable {
private List<String> frameworkNames;
public List<String> getFrameworkNames() {
return frameworkNames;
}
public void setFrameworkNames(List<String> frameworkNames) {
this.frameworkNames = frameworkNames;
}
}

Просмотреть файл

@ -0,0 +1,167 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import com.microsoft.frameworklauncher.utils.DefaultLogger;
import org.apache.hadoop.yarn.api.records.Resource;
import javax.validation.Valid;
import javax.validation.constraints.NotNull;
import java.io.Serializable;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.List;
public class ResourceDescriptor implements Serializable {
private static final DefaultLogger LOGGER = new DefaultLogger(ResourceDescriptor.class);
@Valid
@NotNull
private Integer cpuNumber;
@Valid
@NotNull
private Integer memoryMB;
@Valid
private List<Range> portRanges = new ArrayList<>();
@Valid
private DiskType diskType = DiskType.HDD;
@Valid
private Integer diskMB = 0;
@Valid
private Long gpuAttribute = 0L;
@Valid
private Integer gpuNumber = 0;
public Integer getCpuNumber() {
return cpuNumber;
}
public void setCpuNumber(Integer cpuNumber) {
this.cpuNumber = cpuNumber;
}
public Integer getMemoryMB() {
return memoryMB;
}
public void setMemoryMB(Integer memoryMB) {
this.memoryMB = memoryMB;
}
public List<Range> getPortRanges() {
return portRanges;
}
public void setPortRanges(List<Range> portRanges) {
this.portRanges = portRanges;
}
public DiskType getDiskType() {
return diskType;
}
public void setDiskType(DiskType diskType) {
this.diskType = diskType;
}
public Integer getDiskMB() {
return diskMB;
}
public void setDiskMB(Integer diskMB) {
this.diskMB = diskMB;
}
public Integer getGpuNumber() {
return gpuNumber;
}
public void setGpuNumber(Integer gpuNumber) {
this.gpuNumber = gpuNumber;
}
public Long getGpuAttribute() {
return gpuAttribute;
}
public void setGpuAttribute(Long gpuAttribute) {
this.gpuAttribute = gpuAttribute;
}
public static ResourceDescriptor newInstance(Integer memoryMB, Integer cpuNumber, Integer gpuNumber, Long gpuAttribute) {
ResourceDescriptor resource = new ResourceDescriptor();
resource.setMemoryMB(memoryMB);
resource.setCpuNumber(cpuNumber);
resource.setGpuNumber(gpuNumber);
resource.setGpuAttribute(gpuAttribute);
return resource;
}
public static ResourceDescriptor fromResource(Resource res) throws Exception {
ResourceDescriptor rd = new ResourceDescriptor();
rd.setMemoryMB(res.getMemory());
rd.setCpuNumber(res.getVirtualCores());
rd.setGpuAttribute(0L);
rd.setGpuNumber(0);
try {
Class<?> clazz = res.getClass();
Method getGpuNumber = clazz.getMethod("getGPUs");
Method getGpuAtrribute = clazz.getMethod("getGPUAttribute");
rd.setGpuNumber((int) getGpuNumber.invoke(res));
rd.setGpuAttribute((long) getGpuAtrribute.invoke(res));
} catch (NoSuchMethodException e) {
LOGGER.logDebug(e, "Ignore: Fail get GPU information, YARN library doesn't support gpu as resources");
} catch (IllegalAccessException e) {
LOGGER.logError(e, "Ignore: Fail to get GPU information, illegal access function");
}
return rd;
}
public Resource toResource() throws Exception {
Resource res = Resource.newInstance(memoryMB, cpuNumber);
if (gpuNumber > 0) {
try {
Class<?> clazz = res.getClass();
Method setGpuNumber = clazz.getMethod("setGPUs", int.class);
Method setGpuAttribute = clazz.getMethod("setGPUAttribute", long.class);
setGpuNumber.invoke(res, gpuNumber);
setGpuAttribute.invoke(res, gpuAttribute);
} catch (NoSuchMethodException e) {
LOGGER.logWarning(e, "Ignore: Fail to set GPU information, YARN library doesn't support:");
} catch (IllegalAccessException e) {
LOGGER.logError(e, "Ignore: Fail to set GPU information, illegal access function");
}
}
return res;
}
@Override
public String toString() {
return "<MemoryMB:" + getMemoryMB() + ", CpuNumber:" + getCpuNumber() + ", GpuNumber:" + getGpuNumber() + ", GpuAttribute:" + getGpuAttribute() + ">";
}
}

Просмотреть файл

@ -0,0 +1,55 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import javax.validation.Valid;
import java.io.Serializable;
// If fancyRetryPolicy is enabled,
// will Retry for TrainsientFailure,
// will Not Retry for NonTrainsientFailure,
// will apply NormalRetryPolicy for UnKnownFailure.
//
// If fancyRetryPolicy is not enabled, will apply NormalRetryPolicy for all kinds of failures.
// NormalRetryPolicy is defined as,
// will Retry and RetriedCount++ if maxRetryCount is equal to -1,
// will Retry and RetriedCount++ if RetriedCount is less than maxRetryCount,
// will Not Retry if all previous conditions are not satisfied.
public class RetryPolicyDescriptor implements Serializable {
@Valid
private Integer maxRetryCount = 0;
@Valid
private Boolean fancyRetryPolicy = false;
public Integer getMaxRetryCount() {
return maxRetryCount;
}
public void setMaxRetryCount(Integer maxRetryCount) {
this.maxRetryCount = maxRetryCount;
}
public Boolean getFancyRetryPolicy() {
return fancyRetryPolicy;
}
public void setFancyRetryPolicy(Boolean fancyRetryPolicy) {
this.fancyRetryPolicy = fancyRetryPolicy;
}
}

Просмотреть файл

@ -0,0 +1,72 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public class RetryPolicyState implements Serializable {
// If FancyRetryPolicy is enabled, it is the RetriedCount for UnKnownFailure, i.e. UnKnownFailureCount.
// Otherwise, it is the RetriedCount for all kinds of failures.
private Integer retriedCount = 0;
// Below counters are available even if FancyRetryPolicy is not enabled.
private Integer transientNormalRetriedCount = 0;
private Integer transientConflictRetriedCount = 0;
private Integer nonTransientRetriedCount = 0;
private Integer unKnownRetriedCount = 0;
public Integer getRetriedCount() {
return retriedCount;
}
public void setRetriedCount(Integer retriedCount) {
this.retriedCount = retriedCount;
}
public Integer getTransientNormalRetriedCount() {
return transientNormalRetriedCount;
}
public void setTransientNormalRetriedCount(Integer transientNormalRetriedCount) {
this.transientNormalRetriedCount = transientNormalRetriedCount;
}
public Integer getTransientConflictRetriedCount() {
return transientConflictRetriedCount;
}
public void setTransientConflictRetriedCount(Integer transientConflictRetriedCount) {
this.transientConflictRetriedCount = transientConflictRetriedCount;
}
public Integer getNonTransientRetriedCount() {
return nonTransientRetriedCount;
}
public void setNonTransientRetriedCount(Integer nonTransientRetriedCount) {
this.nonTransientRetriedCount = nonTransientRetriedCount;
}
public Integer getUnKnownRetriedCount() {
return unKnownRetriedCount;
}
public void setUnKnownRetriedCount(Integer unKnownRetriedCount) {
this.unKnownRetriedCount = unKnownRetriedCount;
}
}

Просмотреть файл

@ -0,0 +1,29 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public enum RolloutStatus implements Serializable {
UNKNOWN,
ONGOING,
STOPPING_OLD,
LAUNCHING_NEW,
WAITING_TIMEOUT,
SUCCEEDED
}

Просмотреть файл

@ -0,0 +1,80 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import org.hibernate.validator.constraints.NotEmpty;
import javax.validation.Valid;
import javax.validation.constraints.NotNull;
import java.io.Serializable;
import java.util.List;
public class ServiceDescriptor implements Serializable {
@Valid
@NotNull
// version change will trigger the Service Rolling Upgrade
private Integer version;
@Valid
@NotEmpty
private String entryPoint;
@Valid
@NotEmpty
// Currently only HDFS URI is supported
private List<String> sourceLocations;
@Valid
@NotNull
private ResourceDescriptor resource;
// private RetryPolicyDescriptor retryPolicy;
// private vector<string> dependOnServices;
public Integer getVersion() {
return version;
}
public void setVersion(Integer version) {
this.version = version;
}
public String getEntryPoint() {
return entryPoint;
}
public void setEntryPoint(String entryPoint) {
this.entryPoint = entryPoint;
}
public List<String> getSourceLocations() {
return sourceLocations;
}
public void setSourceLocations(List<String> sourceLocations) {
this.sourceLocations = sourceLocations;
}
public ResourceDescriptor getResource() {
return resource;
}
public void setResource(ResourceDescriptor resource) {
this.resource = resource;
}
}

Просмотреть файл

@ -0,0 +1,32 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public class ServiceStatus implements Serializable {
private Integer serviceVersion;
public Integer getServiceVersion() {
return serviceVersion;
}
public void setServiceVersion(Integer serviceVersion) {
this.serviceVersion = serviceVersion;
}
}

Просмотреть файл

@ -0,0 +1,100 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import javax.validation.Valid;
import javax.validation.constraints.Max;
import javax.validation.constraints.Min;
import javax.validation.constraints.NotNull;
import java.io.Serializable;
public class TaskRoleDescriptor implements Serializable {
@Valid
@NotNull
@Min(0)
@Max(50000)
private Integer taskNumber;
@Valid
// The priority to Allocate Container to this TaskRole's Tasks.
private Integer priority = 1;
@Valid
private Integer scaleUnitNumber = 1;
@Valid
private Integer scaleUnitTimeoutSec = 0;
@Valid
private RetryPolicyDescriptor taskRetryPolicy = new RetryPolicyDescriptor();
@Valid
@NotNull
private ServiceDescriptor taskService;
// private List<ServiceDescriptor> taskServices;
// private List<String> dependOnTaskRoles;
public Integer getTaskNumber() {
return taskNumber;
}
public void setTaskNumber(Integer taskNumber) {
this.taskNumber = taskNumber;
}
public Integer getPriority() {
return priority;
}
public void setPriority(Integer priority) {
this.priority = priority;
}
public Integer getScaleUnitNumber() {
return scaleUnitNumber;
}
public void setScaleUnitNumber(Integer scaleUnitNumber) {
this.scaleUnitNumber = scaleUnitNumber;
}
public Integer getScaleUnitTimeoutSec() {
return scaleUnitTimeoutSec;
}
public void setScaleUnitTimeoutSec(Integer scaleUnitTimeoutSec) {
this.scaleUnitTimeoutSec = scaleUnitTimeoutSec;
}
public RetryPolicyDescriptor getTaskRetryPolicy() {
return taskRetryPolicy;
}
public void setTaskRetryPolicy(RetryPolicyDescriptor taskRetryPolicy) {
this.taskRetryPolicy = taskRetryPolicy;
}
public ServiceDescriptor getTaskService() {
return taskService;
}
public void setTaskService(ServiceDescriptor taskService) {
this.taskService = taskService;
}
}

Просмотреть файл

@ -0,0 +1,106 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
import java.util.List;
public class TaskRoleRolloutStatus implements Serializable {
private Integer overallRolloutServiceVersion;
private RolloutStatus overallRolloutStatus = RolloutStatus.UNKNOWN;
private Integer overallRolloutStartTimestamp;
private Integer overallRolloutEndTimestamp;
private Integer currentRolloutScaleUnit;
private List<Integer> currentRolloutTaskIndexes;
private RolloutStatus currentRolloutStatus = RolloutStatus.UNKNOWN;
private Integer currentRolloutStartTimestamp;
private Integer currentRolloutEndTimestamp;
public Integer getOverallRolloutServiceVersion() {
return overallRolloutServiceVersion;
}
public void setOverallRolloutServiceVersion(Integer overallRolloutServiceVersion) {
this.overallRolloutServiceVersion = overallRolloutServiceVersion;
}
public RolloutStatus getOverallRolloutStatus() {
return overallRolloutStatus;
}
public void setOverallRolloutStatus(RolloutStatus overallRolloutStatus) {
this.overallRolloutStatus = overallRolloutStatus;
}
public Integer getOverallRolloutStartTimestamp() {
return overallRolloutStartTimestamp;
}
public void setOverallRolloutStartTimestamp(Integer overallRolloutStartTimestamp) {
this.overallRolloutStartTimestamp = overallRolloutStartTimestamp;
}
public Integer getOverallRolloutEndTimestamp() {
return overallRolloutEndTimestamp;
}
public void setOverallRolloutEndTimestamp(Integer overallRolloutEndTimestamp) {
this.overallRolloutEndTimestamp = overallRolloutEndTimestamp;
}
public Integer getCurrentRolloutScaleUnit() {
return currentRolloutScaleUnit;
}
public void setCurrentRolloutScaleUnit(Integer currentRolloutScaleUnit) {
this.currentRolloutScaleUnit = currentRolloutScaleUnit;
}
public List<Integer> getCurrentRolloutTaskIndexes() {
return currentRolloutTaskIndexes;
}
public void setCurrentRolloutTaskIndexes(List<Integer> currentRolloutTaskIndexes) {
this.currentRolloutTaskIndexes = currentRolloutTaskIndexes;
}
public RolloutStatus getCurrentRolloutStatus() {
return currentRolloutStatus;
}
public void setCurrentRolloutStatus(RolloutStatus currentRolloutStatus) {
this.currentRolloutStatus = currentRolloutStatus;
}
public Integer getCurrentRolloutStartTimestamp() {
return currentRolloutStartTimestamp;
}
public void setCurrentRolloutStartTimestamp(Integer currentRolloutStartTimestamp) {
this.currentRolloutStartTimestamp = currentRolloutStartTimestamp;
}
public Integer getCurrentRolloutEndTimestamp() {
return currentRolloutEndTimestamp;
}
public void setCurrentRolloutEndTimestamp(Integer currentRolloutEndTimestamp) {
this.currentRolloutEndTimestamp = currentRolloutEndTimestamp;
}
}

Просмотреть файл

@ -0,0 +1,53 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public class TaskRoleStatus implements Serializable {
private String taskRoleName;
private TaskRoleRolloutStatus taskRoleRolloutStatus;
private Integer frameworkVersion;
// Add TaskRoleState to support TaskRoleDescriptor.dependOnTaskRoles
// private TaskRoleState TaskRoleState = TaskRoleState.TaskRoleWaiting;
public String getTaskRoleName() {
return taskRoleName;
}
public void setTaskRoleName(String taskRoleName) {
this.taskRoleName = taskRoleName;
}
public TaskRoleRolloutStatus getTaskRoleRolloutStatus() {
return taskRoleRolloutStatus;
}
public void setTaskRoleRolloutStatus(TaskRoleRolloutStatus taskRoleRolloutStatus) {
this.taskRoleRolloutStatus = taskRoleRolloutStatus;
}
public Integer getFrameworkVersion() {
return frameworkVersion;
}
public void setFrameworkVersion(Integer frameworkVersion) {
this.frameworkVersion = frameworkVersion;
}
}

Просмотреть файл

@ -0,0 +1,61 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public enum TaskState implements Serializable {
// Task Waiting to Request Container
// [START_STATES]
// addContainerRequest -> CONTAINER_REQUESTED
TASK_WAITING,
// Task's Container Requested
// onContainersAllocated -> CONTAINER_ALLOCATED
// recover -> TASK_WAITING
CONTAINER_REQUESTED,
// Task's current associated Container Allocated
// startContainerAsync -> CONTAINER_LAUNCHED
// onContainersCompleted -> CONTAINER_COMPLETED
// resyncWithRM -> CONTAINER_COMPLETED
// recover -> CONTAINER_RUNNING
CONTAINER_ALLOCATED,
// Task's current associated Container Launched
// onContainerStarted -> CONTAINER_RUNNING
// onStartContainerError -> CONTAINER_COMPLETED
// onContainersCompleted -> CONTAINER_COMPLETED
// resyncWithRM -> CONTAINER_COMPLETED
// recover -> CONTAINER_RUNNING
CONTAINER_LAUNCHED,
// Task's current associated Container Running
// onContainersCompleted -> CONTAINER_COMPLETED
// resyncWithRM -> CONTAINER_COMPLETED
CONTAINER_RUNNING,
// Task's current associated Container Completed
// attemptToRetry -> TASK_WAITING
// attemptToRetry -> TASK_COMPLETED
CONTAINER_COMPLETED,
// Task Completed, possibly with Container retries
// [FINAL_STATES]
TASK_COMPLETED,
}

Просмотреть файл

@ -0,0 +1,205 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
public class TaskStatus implements Serializable {
// Task static status
// taskIndex is the index of this TaskStatus object in TaskStatuses.TaskStatusArray
// Note taskIndex will not change after Task Restart, Migrated or Upgraded.
private Integer taskIndex;
private String taskRoleName;
// Task dynamic status
private TaskState taskState = TaskState.TASK_WAITING;
private RetryPolicyState taskRetryPolicyState;
private Long taskCreatedTimestamp;
private Long taskCompletedTimestamp;
private ServiceStatus taskServiceStatus;
// Task's current associated Container status which should not change across attempts
// Note other status can be retrieved from RM
private String containerId;
// containerHost is hostname of the node on which the container runs.
private String containerHost;
// containerIp is the assigned ipv4 address of the corresponding containerHost
private String containerIp;
// containerGpus is the assigned Gpus of the container
private Long containerGpus;
private String containerLogHttpAddress;
private Integer containerConnectionLostCount;
private Boolean containerIsDecommissioning;
private Long containerLaunchedTimestamp;
private Long containerCompletedTimestamp;
private Integer containerExitCode;
private String containerExitDiagnostics;
private ExitType containerExitType = ExitType.NOT_AVAILABLE;
public Integer getTaskIndex() {
return taskIndex;
}
public void setTaskIndex(Integer taskIndex) {
this.taskIndex = taskIndex;
}
public String getTaskRoleName() {
return taskRoleName;
}
public void setTaskRoleName(String taskRoleName) {
this.taskRoleName = taskRoleName;
}
public TaskState getTaskState() {
return taskState;
}
public void setTaskState(TaskState taskState) {
this.taskState = taskState;
}
public RetryPolicyState getTaskRetryPolicyState() {
return taskRetryPolicyState;
}
public void setTaskRetryPolicyState(RetryPolicyState taskRetryPolicyState) {
this.taskRetryPolicyState = taskRetryPolicyState;
}
public Long getTaskCreatedTimestamp() {
return taskCreatedTimestamp;
}
public void setTaskCreatedTimestamp(Long taskCreatedTimestamp) {
this.taskCreatedTimestamp = taskCreatedTimestamp;
}
public Long getTaskCompletedTimestamp() {
return taskCompletedTimestamp;
}
public void setTaskCompletedTimestamp(Long taskCompletedTimestamp) {
this.taskCompletedTimestamp = taskCompletedTimestamp;
}
public ServiceStatus getTaskServiceStatus() {
return taskServiceStatus;
}
public void setTaskServiceStatus(ServiceStatus taskServiceStatus) {
this.taskServiceStatus = taskServiceStatus;
}
public String getContainerId() {
return containerId;
}
public void setContainerId(String containerId) {
this.containerId = containerId;
}
public String getContainerHost() {
return containerHost;
}
public void setContainerHost(String containerHost) {
this.containerHost = containerHost;
}
public String getContainerIp() {
return containerIp;
}
public void setContainerIp(String containerIp) {
this.containerIp = containerIp;
}
public Long getContainerGpus() {
return containerGpus;
}
public void setContainerGpus(Long gpus) {
containerGpus = gpus;
}
public String getContainerLogHttpAddress() {
return containerLogHttpAddress;
}
public void setContainerLogHttpAddress(String containerLogHttpAddress) {
this.containerLogHttpAddress = containerLogHttpAddress;
}
public Integer getContainerConnectionLostCount() {
return containerConnectionLostCount;
}
public void setContainerConnectionLostCount(Integer containerConnectionLostCount) {
this.containerConnectionLostCount = containerConnectionLostCount;
}
public Boolean getContainerIsDecommissioning() {
return containerIsDecommissioning;
}
public void setContainerIsDecommissioning(Boolean containerIsDecommissioning) {
this.containerIsDecommissioning = containerIsDecommissioning;
}
public Long getContainerLaunchedTimestamp() {
return containerLaunchedTimestamp;
}
public void setContainerLaunchedTimestamp(Long containerLaunchedTimestamp) {
this.containerLaunchedTimestamp = containerLaunchedTimestamp;
}
public Long getContainerCompletedTimestamp() {
return containerCompletedTimestamp;
}
public void setContainerCompletedTimestamp(Long containerCompletedTimestamp) {
this.containerCompletedTimestamp = containerCompletedTimestamp;
}
public Integer getContainerExitCode() {
return containerExitCode;
}
public void setContainerExitCode(Integer containerExitCode) {
this.containerExitCode = containerExitCode;
}
public String getContainerExitDiagnostics() {
return containerExitDiagnostics;
}
public void setContainerExitDiagnostics(String containerExitDiagnostics) {
this.containerExitDiagnostics = containerExitDiagnostics;
}
public ExitType getContainerExitType() {
return containerExitType;
}
public void setContainerExitType(ExitType containerExitType) {
this.containerExitType = containerExitType;
}
}

Просмотреть файл

@ -0,0 +1,51 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import java.io.Serializable;
import java.util.List;
public class TaskStatuses implements Serializable {
private String taskRoleName;
private List<TaskStatus> taskStatusArray;
private Integer frameworkVersion;
public String getTaskRoleName() {
return taskRoleName;
}
public void setTaskRoleName(String taskRoleName) {
this.taskRoleName = taskRoleName;
}
public List<TaskStatus> getTaskStatusArray() {
return taskStatusArray;
}
public void setTaskStatusArray(List<TaskStatus> taskStatusArray) {
this.taskStatusArray = taskStatusArray;
}
public Integer getFrameworkVersion() {
return frameworkVersion;
}
public void setFrameworkVersion(Integer frameworkVersion) {
this.frameworkVersion = frameworkVersion;
}
}

Просмотреть файл

@ -0,0 +1,50 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import org.hibernate.validator.constraints.NotEmpty;
import javax.validation.Valid;
import javax.validation.constraints.NotNull;
import java.io.Serializable;
public class UpdateDataDeploymentVersionRequest implements Serializable {
@Valid
@NotNull
private DataDeploymentVersionType dataDeploymentVersionType;
@Valid
@NotEmpty
private String dataDeploymentVersion;
public DataDeploymentVersionType getDataDeploymentVersionType() {
return dataDeploymentVersionType;
}
public void setDataDeploymentVersionType(DataDeploymentVersionType dataDeploymentVersionType) {
this.dataDeploymentVersionType = dataDeploymentVersionType;
}
public String getDataDeploymentVersion() {
return dataDeploymentVersion;
}
public void setDataDeploymentVersion(String dataDeploymentVersion) {
this.dataDeploymentVersion = dataDeploymentVersion;
}
}

Просмотреть файл

@ -0,0 +1,40 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.common.model;
import javax.validation.Valid;
import javax.validation.constraints.Max;
import javax.validation.constraints.Min;
import javax.validation.constraints.NotNull;
import java.io.Serializable;
public class UpdateTaskNumberRequest implements Serializable {
@Valid
@NotNull
@Min(0)
@Max(50000)
private Integer taskNumber;
public Integer getTaskNumber() {
return taskNumber;
}
public void setTaskNumber(Integer taskNumber) {
this.taskNumber = taskNumber;
}
}

Просмотреть файл

@ -0,0 +1,81 @@
// Copyright (c) Microsoft Corporation
// All rights reserved.
//
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
// to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package com.microsoft.frameworklauncher.hdfsstore;
import com.google.common.annotations.VisibleForTesting;
import com.microsoft.frameworklauncher.utils.DefaultLogger;
import com.microsoft.frameworklauncher.utils.GlobalConstants;
import com.microsoft.frameworklauncher.utils.HadoopUtils;
import java.util.Set;
public class HdfsStore {
private static final DefaultLogger LOGGER = new DefaultLogger(HdfsStore.class);
private final HdfsStoreStructure hdfsStruct;
public HdfsStore(String launcherRootPath) throws Exception {
LOGGER.logInfo("Initializing HdfsStore: [LauncherRootPath] = [%s]", launcherRootPath);
hdfsStruct = new HdfsStoreStructure(launcherRootPath);
setupHDFSStructure();
}
public HdfsStoreStructure getHdfsStruct() {
return hdfsStruct;
}
// Setup Basic HdfsStoreStructure
@VisibleForTesting
protected void setupHDFSStructure() throws Exception {
HadoopUtils.makeDirInHdfs(hdfsStruct.getLauncherRootPath());
}
public Set<String> getFrameworkNames() throws Exception {
return HadoopUtils.listDirInHdfs(hdfsStruct.getLauncherRootPath());
}
public void makeFrameworkRootDir(String frameworkName) throws Exception {
HadoopUtils.makeDirInHdfs(hdfsStruct.getFrameworkRootPath(frameworkName));
}
public void removeFrameworkRoot(String frameworkName) throws Exception {
HadoopUtils.removeDirInHdfs(hdfsStruct.getFrameworkRootPath(frameworkName));
}
public void makeAMStoreRootDir(String frameworkName) throws Exception {
HadoopUtils.makeDirInHdfs(hdfsStruct.getAMStoreRootPath(frameworkName));
}
public String uploadAMPackageFile(String frameworkName) throws Exception {
String hdfsPath = hdfsStruct.getAMPackageFilePath(frameworkName);
HadoopUtils.uploadFileToHdfs(GlobalConstants.PACKAGE_APPLICATION_MASTER_FILE, hdfsPath);
return hdfsPath;
}
public String uploadAgentPackageFile(String frameworkName) throws Exception {
String hdfsPath = hdfsStruct.getAgentPackageFilePath(frameworkName);
HadoopUtils.uploadFileToHdfs(GlobalConstants.PACKAGE_AGENT_FILE, hdfsPath);
return hdfsPath;
}
public String uploadContainerIpListFile(String frameworkName) throws Exception {
String hdfsPath = hdfsStruct.getContainerIpListFilePath(frameworkName);
HadoopUtils.uploadFileToHdfs(GlobalConstants.CONTAINER_IP_LIST_FILE, hdfsPath);
return hdfsPath;
}
}

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше