HDFS update
This commit is contained in:
Родитель
d7bf9e0510
Коммит
a0d3eb9180
|
@ -0,0 +1,3 @@
|
|||
// Place your settings in this file to overwrite default and user settings.
|
||||
{
|
||||
}
|
|
@ -26,3 +26,28 @@ The document describes the procedure to deploy HDFS across a cluster.
|
|||
```
|
||||
deploy.py hdfs umount
|
||||
```
|
||||
|
||||
3. Configure zookeeper and HDFS
|
||||
```
|
||||
deploy.py docker push zookeeper
|
||||
deploy.py hdfs config
|
||||
```
|
||||
|
||||
4. Deploy HDFS zookeeper and journal node
|
||||
```
|
||||
deploy.py kubernetes start zookeeper
|
||||
deploy.py kubernetes start hdfsjournal
|
||||
```
|
||||
You may shutdown zookeeper and journal node via:
|
||||
```
|
||||
deploy.py kubernetes stop hdfsjournal
|
||||
deploy.py kubernetes stop zookeeper
|
||||
```
|
||||
|
||||
|
||||
5. Format HDFS namenode
|
||||
```
|
||||
deploy.py kubernetes start hdfsformat
|
||||
```
|
||||
|
||||
6.
|
|
@ -88,7 +88,7 @@ default_config_parameters = {
|
|||
"local-mount-path" : "/mnt",
|
||||
|
||||
# required storage folder under storage-mount-path
|
||||
"default-storage-folders" : ["jobfiles", "storage", "work" ],
|
||||
"default-storage-folders" : ["jobfiles", "storage", "work", "namenodeshare" ],
|
||||
|
||||
|
||||
# the path of where nvidia driver is installed on each node, default /opt/nvidia-driver/current
|
||||
|
@ -219,7 +219,11 @@ default_config_parameters = {
|
|||
"mountoptions": "ext4 defaults 0 1",
|
||||
},
|
||||
|
||||
# optional hdfs_cluster_name: if not inherit cluster_name from cluster
|
||||
# "hdfs_cluster_name": cluster_name for HDFS
|
||||
|
||||
"hdfsconfig" : {
|
||||
|
||||
# Comma separated list of paths on the local filesystem of a DataNode where it should store its blocks.
|
||||
"dfs" : {
|
||||
# Data node configuration,
|
||||
|
@ -228,7 +232,7 @@ default_config_parameters = {
|
|||
"data": "",
|
||||
},
|
||||
"namenode" : {
|
||||
"data": "/var/lib/hdfsnamenode",
|
||||
"data": "/mnt/namenodeshare",
|
||||
},
|
||||
"zks" : {
|
||||
# The IP address should be within service_cluster_ip_range
|
||||
|
@ -243,7 +247,7 @@ default_config_parameters = {
|
|||
# location of configuration file
|
||||
"configfile": "/etc/hdfs/config.yaml",
|
||||
# logging directory
|
||||
"loggingDirBase": "/var/log/hdfs"
|
||||
"loggingDirBase": "/usr/local/hadoop/logs"
|
||||
},
|
||||
"ubuntuconfig" : {
|
||||
"version" : "16.04.1",
|
||||
|
@ -558,6 +562,7 @@ default_config_mapping = {
|
|||
"pxeserverip": (["pxeserver"], lambda x: fetch_dictionary(x,["ip"])),
|
||||
"pxeserverrootpasswd": (["pxeserver"], lambda x: get_root_passwd()),
|
||||
"pxeoptions": (["pxeserver"], lambda x: "" if fetch_dictionary(x,["options"]) is None else fetch_dictionary(x,["options"])),
|
||||
"hdfs_cluster_name" : ( ["cluster_name"], lambda x:x ),
|
||||
}
|
||||
|
||||
# Merge entries in config2 to that of config1, if entries are dictionary.
|
||||
|
@ -2011,7 +2016,7 @@ def generate_hdfs_nodelist( nodes, port):
|
|||
|
||||
def generate_hdfs_config( nodes, deviceSelect):
|
||||
hdfsconfig = copy.deepcopy( config["hdfsconfig"] )
|
||||
hdfsconfig["cluster_name"] = config["cluster_name"]
|
||||
hdfsconfig["hdfs_cluster_name"] = config["hdfs_cluster_name"]
|
||||
zknodes = get_node_lists_for_service("zookeeper")
|
||||
zknodelist = generate_hdfs_nodelist( zknodes, fetch_config( ["hdfsconfig", "zks", "port"]))
|
||||
if verbose:
|
||||
|
@ -2358,17 +2363,20 @@ def get_all_services():
|
|||
for service in os.listdir(rootdir):
|
||||
dirname = os.path.join(rootdir, service)
|
||||
if os.path.isdir(dirname):
|
||||
yamlname = os.path.join(dirname, service + ".yaml")
|
||||
if not os.path.isfile(yamlname):
|
||||
yamls = glob.glob("*.yaml")
|
||||
yamlname = yamls[0]
|
||||
with open( yamlname ) as f:
|
||||
content = f.read()
|
||||
f.close()
|
||||
if content.find( "DaemonSet" )>=0:
|
||||
# Only add service if it is a daemonset.
|
||||
servicedic[service] = yamlname
|
||||
|
||||
launch_order_file = os.path.join( dirname, "launch_order")
|
||||
if os.path.isfile( launch_order_file ):
|
||||
servicedic[service] = launch_order_file
|
||||
else:
|
||||
yamlname = os.path.join(dirname, service + ".yaml")
|
||||
if not os.path.isfile(yamlname):
|
||||
yamls = glob.glob("*.yaml")
|
||||
yamlname = yamls[0]
|
||||
with open( yamlname ) as f:
|
||||
content = f.read()
|
||||
f.close()
|
||||
if content.find( "DaemonSet" )>=0:
|
||||
# Only add service if it is a daemonset.
|
||||
servicedic[service] = yamlname
|
||||
return servicedic
|
||||
|
||||
def get_service_name(service_config_file):
|
||||
|
@ -2397,7 +2405,7 @@ def get_service_yaml( use_service ):
|
|||
servicename = get_service_name(servicedic[service])
|
||||
newentries[servicename] = servicedic[service]
|
||||
servicedic.update(newentries)
|
||||
# print use_service
|
||||
# print servicedic
|
||||
fname = servicedic[use_service]
|
||||
return fname
|
||||
|
||||
|
@ -2568,6 +2576,13 @@ def push_docker_images(nargs):
|
|||
if verbose:
|
||||
print "Build & push docker images to docker register ..."
|
||||
push_dockers("./deploy/docker-images/", config["dockerprefix"], config["dockertag"], nargs, config, verbose, nocache = nocache )
|
||||
|
||||
def check_buildable_images(nargs):
|
||||
for imagename in nargs:
|
||||
imagename = imagename.lower()
|
||||
if imagename in config["build-docker-via-config"]:
|
||||
print "Docker image %s should be built via configuration. " % imagename
|
||||
exit()
|
||||
|
||||
def run_docker_image( imagename, native = False, sudo = False ):
|
||||
dockerConfig = fetch_config( ["docker-run", imagename ])
|
||||
|
@ -3025,8 +3040,10 @@ def run_command( args, command, nargs, parser ):
|
|||
elif command == "docker":
|
||||
if len(nargs)>=1:
|
||||
if nargs[0] == "build":
|
||||
check_buildable_images(nargs[1:])
|
||||
build_docker_images(nargs[1:])
|
||||
elif nargs[0] == "push":
|
||||
check_buildable_images(nargs[1:])
|
||||
push_docker_images(nargs[1:])
|
||||
elif nargs[0] == "run":
|
||||
if len(nargs)>=2:
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
kind: DaemonSet
|
||||
apiVersion: extensions/v1beta1
|
||||
metadata:
|
||||
name: hdfsformat
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: hdfsformat
|
||||
annotations:
|
||||
pod.alpha.kubernetes.io/initialized: "true"
|
||||
spec:
|
||||
nodeSelector:
|
||||
namenode1: active
|
||||
hostNetwork: true
|
||||
containers:
|
||||
- name: k8shdfsformat
|
||||
imagePullPolicy: Always
|
||||
image: {{cnf["worker-dockerregistry"]}}{{cnf["dockerprefix"]}}hdfs:{{cnf["dockertag"]}}
|
||||
ports:
|
||||
env:
|
||||
- name : CLUSTER_NAME
|
||||
value: {{cnf["hdfs_cluster_name"]}}
|
||||
volumeMounts:
|
||||
- name: datadir
|
||||
mountPath: {{cnf["hdfsconfig"]["journalnode"]["data"]}}
|
||||
- name: loghadoop
|
||||
mountPath: /usr/local/hadoop/logs
|
||||
- name: configdir
|
||||
mountPath: /etc/hdfs/
|
||||
- name: namenodedir
|
||||
mountPath: /mnt/namenodeshare
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- /usr/local/hadoop/etc/hadoop/hadoop-env.sh && cd {{cnf["docker-run"]["hdfs"]["workdir"]}} && pwd && ./bootstrap_hdfs.py format && /bin/sleep infinity
|
||||
volumes:
|
||||
- name: datadir
|
||||
hostPath:
|
||||
path: {{cnf["hdfsconfig"]["journalnode"]["data"]}}
|
||||
- name: loghadoop
|
||||
hostPath:
|
||||
path: /var/log/hadoop/
|
||||
- name: configdir
|
||||
hostPath:
|
||||
path: /etc/hdfs
|
||||
- name: namenodedir
|
||||
hostPath:
|
||||
path: {{cnf["storage-mount-path"]}}/namenodeshare
|
|
@ -0,0 +1,49 @@
|
|||
kind: DaemonSet
|
||||
apiVersion: extensions/v1beta1
|
||||
metadata:
|
||||
name: hdfsjournal
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: hdfsjournal
|
||||
annotations:
|
||||
pod.alpha.kubernetes.io/initialized: "true"
|
||||
spec:
|
||||
nodeSelector:
|
||||
journalnode: active
|
||||
hostNetwork: true
|
||||
containers:
|
||||
- name: k8shdfsjournal
|
||||
imagePullPolicy: Always
|
||||
image: {{cnf["worker-dockerregistry"]}}{{cnf["dockerprefix"]}}hdfs:{{cnf["dockertag"]}}
|
||||
ports:
|
||||
env:
|
||||
- name : CLUSTER_NAME
|
||||
value: {{cnf["hdfs_cluster_name"]}}
|
||||
volumeMounts:
|
||||
- name: datadir
|
||||
mountPath: {{cnf["hdfsconfig"]["journalnode"]["data"]}}
|
||||
- name: loghadoop
|
||||
mountPath: /usr/local/hadoop/logs
|
||||
- name: configdir
|
||||
mountPath: /etc/hdfs/
|
||||
- name: namenodedir
|
||||
mountPath: /mnt/namenodeshare
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- /usr/local/hadoop/etc/hadoop/hadoop-env.sh && cd {{cnf["docker-run"]["hdfs"]["workdir"]}} && pwd && ./bootstrap_hdfs.py journalnode && /bin/sleep infinity
|
||||
volumes:
|
||||
- name: datadir
|
||||
hostPath:
|
||||
path: {{cnf["hdfsconfig"]["journalnode"]["data"]}}
|
||||
- name: loghadoop
|
||||
hostPath:
|
||||
path: /var/log/hadoop/
|
||||
- name: configdir
|
||||
hostPath:
|
||||
path: /etc/hdfs
|
||||
- name: namenodedir
|
||||
hostPath:
|
||||
path: {{cnf["storage-mount-path"]}}/namenodeshare
|
|
@ -0,0 +1 @@
|
|||
journalnode.yaml
|
|
@ -1,7 +1,30 @@
|
|||
From sequenceiq/hadoop-docker:2.7.1
|
||||
FROM williamyeh/java8
|
||||
MAINTAINER Jin Li <jinlmsft@hotmail.com>
|
||||
|
||||
# CentOS 6.6
|
||||
VOLUME /mnt/hadoop/
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y jq curl
|
||||
|
||||
RUN curl -s http://www.apache.org/dist/hadoop/common/hadoop-2.8.0/hadoop-2.8.0.tar.gz | tar -xz -C /usr/local/ \
|
||||
&& cd /usr/local \
|
||||
&& ln -s ./hadoop-2.8.0 hadoop
|
||||
|
||||
ENV JAVA_HOME /usr/lib/jvm/java-8-oracle
|
||||
ENV HADOOP_PREFIX /usr/local/hadoop
|
||||
ENV HADOOP_COMMON_HOME /usr/local/hadoop
|
||||
ENV HADOOP_HDFS_HOME /usr/local/hadoop
|
||||
ENV HADOOP_MAPRED_HOME /usr/local/hadoop
|
||||
ENV HADOOP_YARN_HOME /usr/local/hadoop
|
||||
ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop
|
||||
ENV YARN_CONF_DIR $HADOOP_PREFIX/etc/hadoop
|
||||
|
||||
WORKDIR /usr/local/hadoop
|
||||
RUN sed -i '/^export JAVA_HOME/ s:.*:export JAVA_HOME=/usr/lib/jvm/java-8-oracle\nexport HADOOP_PREFIX=/usr/local/hadoop\nexport HADOOP_HOME=/usr/local/hadoop\n:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh \
|
||||
&& sed -i '/^export HADOOP_CONF_DIR/ s:.*:export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop/:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh \
|
||||
&& chmod +x /usr/local/hadoop/etc/hadoop/*-env.sh
|
||||
|
||||
# NameNode Secondary NameNode DataNode JournalNode NFS Gateway HttpFS ZKFC
|
||||
EXPOSE 8020 50070 50470 50090 50495 50010 1004 50075 1006 50020 8485 8480 2049 4242 111 14000 14001 8019
|
||||
|
||||
RUN curl "https://bootstrap.pypa.io/get-pip.py" -o "get-pip.py"
|
||||
RUN python get-pip.py
|
||||
|
@ -10,7 +33,7 @@ RUN yum install -y attr
|
|||
|
||||
WORKDIR {{cnf["docker-run"]["hdfs"]["workdir"]}}
|
||||
|
||||
ADD core-site.xml {{cnf["docker-run"]["hdfs"]["workdir"]}}
|
||||
ADD core-site.xml /usr/local/hadoop/etc/hadoop/core-site.xml
|
||||
ADD hdfs-site.xml.in-docker {{cnf["docker-run"]["hdfs"]["workdir"]}}
|
||||
ADD logging.yaml.in-docker {{cnf["docker-run"]["hdfs"]["workdir"]}}
|
||||
ADD bootstrap_hdfs.py {{cnf["docker-run"]["hdfs"]["workdir"]}}
|
||||
|
@ -20,5 +43,8 @@ RUN chmod +x {{cnf["docker-run"]["hdfs"]["workdir"]}}/*.py
|
|||
# All process in this docker needs to be run as a service.
|
||||
# Do not change the command, rewrite a service if need to
|
||||
|
||||
# See information on https://stackoverflow.com/questions/19943766/hadoop-unable-to-load-native-hadoop-library-for-your-platform-warning
|
||||
# the 3rd answer, you may ignore warning on NativeCodeLoader
|
||||
|
||||
CMD /bin/bash
|
||||
|
||||
|
|
|
@ -11,13 +11,23 @@ import yaml
|
|||
from jinja2 import Environment, FileSystemLoader, Template
|
||||
import utils
|
||||
|
||||
verbose = False
|
||||
verbose = True
|
||||
|
||||
def create_log( logdir ):
|
||||
if not os.path.exists( logdir ):
|
||||
os.system("mkdir -p " + logdir )
|
||||
if not os.path.exists( logdir ):
|
||||
os.system("mkdir -p " + logdir )
|
||||
|
||||
def exec_with_output( cmd ):
|
||||
try:
|
||||
# https://stackoverflow.com/questions/4814970/subprocess-check-output-doesnt-seem-to-exist-python-2-6-5
|
||||
print cmd
|
||||
output = subprocess.Popen( cmd.split(), stdout=subprocess.PIPE ).communicate()[0]
|
||||
print output
|
||||
except subprocess.CalledProcessError as e:
|
||||
print "Exception " + str(e.returncode) + ", output: " + e.output.strip()
|
||||
|
||||
if __name__ == '__main__':
|
||||
print "Start... boostrap_hdfs.py "
|
||||
try:
|
||||
parser = argparse.ArgumentParser(prog='boostrap_hdfs.py',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
|
@ -42,27 +52,57 @@ datanode: Launch datanode.
|
|||
args = parser.parse_args()
|
||||
verbose = args.verbose
|
||||
server = args.server
|
||||
print "Parse command line argument... "
|
||||
config_file = args.config
|
||||
if not os.path.exists(config_file):
|
||||
print "!!!Error!!! Can't find configuration file %s " % config_file
|
||||
parser.print_help()
|
||||
with open(config_file, 'r') as file:
|
||||
config = yaml.load(file)
|
||||
if verbose:
|
||||
print config
|
||||
print "Configuration is : %s " % config
|
||||
loggingDirBase = "/var/log/hdfs" if not "loggingDirBase" in config else config["loggingDirBase"]
|
||||
config["loggingDir"] = os.path.join( loggingDirBase, server )
|
||||
utils.render_template("logging.yaml.in-docker", "logging.yaml",config, verbose=verbose)
|
||||
logdir = config["loggingDir"]
|
||||
create_log( logdir )
|
||||
with open('logging.yaml') as f:
|
||||
logging_config = yaml.load(f)
|
||||
f.close()
|
||||
print logging_config
|
||||
logutils.dictconfig.dictConfig(logging_config)
|
||||
utils.render_template("hdfs-site.xml.in-docker", "hdfs-site.xml",config, verbose=verbose)
|
||||
# logdir = config["loggingDir"]
|
||||
# create_log( logdir )
|
||||
# with open('logging.yaml') as f:
|
||||
# logging_config = yaml.load(f)
|
||||
# f.close()
|
||||
# print logging_config
|
||||
# logutils.dictconfig.dictConfig(logging_config)
|
||||
utils.render_template("hdfs-site.xml.in-docker", "/usr/local/hadoop/etc/hadoop/hdfs-site.xml",config, verbose=verbose)
|
||||
except Exception as e:
|
||||
print "boostrap_hdfs.py fails during initialization, exception %s" % e
|
||||
exit()
|
||||
|
||||
# Launch journal node
|
||||
if server == "journalnode":
|
||||
cmd = "/usr/local/hadoop/sbin/hadoop-daemon.sh start journalnode"
|
||||
exec_with_output( cmd )
|
||||
exec_with_output( "pgrep -f JournalNode")
|
||||
print "JournalNode running .... "
|
||||
elif server == "zookeeper":
|
||||
cmd = "/usr/local/hadoop/sbin/hadoop-daemon.sh start zookeeper"
|
||||
exec_with_output( cmd )
|
||||
print "Zookeeper node is running .... "
|
||||
elif server == "namenode":
|
||||
cmd = "/usr/local/hadoop/sbin/hadoop-daemon.sh start namenode"
|
||||
exec_with_output( cmd )
|
||||
cmd = "/usr/local/hadoop/sbin/hadoop-daemon.sh start zkfc"
|
||||
exec_with_output( cmd )
|
||||
exec_with_output( "pgrep -f NameNode")
|
||||
exec_with_output( "pgrep -f DFSZKFailoverController")
|
||||
print "Namenode is running"
|
||||
elif server == "datanode":
|
||||
cmd = "/usr/local/hadoop/sbin/hadoop-daemon.sh start datanode"
|
||||
exec_with_output( cmd )
|
||||
exec_with_output( "pgrep -f DataNode")
|
||||
print "Datanode is running"
|
||||
elif server == "format":
|
||||
cmd = "/usr/local/hadoop/bin/hadoop namenode -format -nonInteractive"
|
||||
exec_with_output( cmd )
|
||||
cmd = "/usr/local/hadoop/bin/hdfs zkfs -formatZK -nonInteractive"
|
||||
exec_with_output( cmd )
|
||||
else:
|
||||
()
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
#!/bin/bash
|
||||
|
||||
: ${HADOOP_PREFIX:=/usr/local/hadoop};
|
||||
|
||||
$HADOOP_PREFIX/etc/hadoop/hadoop-env.sh
|
||||
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>fs.defaultFS</name>
|
||||
<value>hdfs://{{cnf["cluster_name"]}}</value>
|
||||
<value>hdfs://{{cnf["hdfs_cluster_name"]}}</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -6,7 +6,7 @@
|
|||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.name.dir</name>
|
||||
<value>{{cnf["namenode"]["data"]}}</value>
|
||||
<value>file://{{cnf["namenode"]["data"]}}</value>
|
||||
<description>Path on the local filesystem where the NameNode stores the namespace and transaction logs persistently.</description>
|
||||
</property>
|
||||
<property>
|
||||
|
@ -19,22 +19,22 @@
|
|||
</property>
|
||||
<property>
|
||||
<name>dfs.nameservices</name>
|
||||
<value>{{cnf["cluster_name"]}}</value>
|
||||
<value>{{cnf["hdfs_cluster_name"]}}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.ha.namenodes.{{cnf["cluster_name"]}}</name>
|
||||
<name>dfs.ha.namenodes.{{cnf["hdfs_cluster_name"]}}</name>
|
||||
<value>nn1,nn2</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.rpc-address.{{cnf["cluster_name"]}}.nn1</name>
|
||||
<name>dfs.namenode.rpc-address.{{cnf["hdfs_cluster_name"]}}.nn1</name>
|
||||
<value>{{cnf["namenode"]["namenode1"]}}:8020</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.rpc-address.{{cnf["cluster_name"]}}.nn2</name>
|
||||
<name>dfs.namenode.rpc-address.{{cnf["hdfs_cluster_name"]}}.nn2</name>
|
||||
<value>{{cnf["namenode"]["namenode2"]}}:8020</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.http-address.{{cnf["cluster_name"]}}.nn1</name>
|
||||
<name>dfs.namenode.http-address.{{cnf["hdfs_cluster_name"]}}.nn1</name>
|
||||
<value>{{cnf["namenode"]["namenode1"]}}:50070</value>
|
||||
</property>
|
||||
<property>
|
||||
|
@ -43,7 +43,7 @@
|
|||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.shared.edits.dir</name>
|
||||
<value>qjournal://{{cnf["journalnode"]["nodes"]}}/{{cnf["cluster_name"]}}</value>
|
||||
<value>qjournal://{{cnf["journalnode"]["nodes"]}}/{{cnf["hdfs_cluster_name"]}}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.client.failover.proxy.provider.mycluster</name>
|
||||
|
|
Загрузка…
Ссылка в новой задаче