HDFS update

2017-06-20 19:27:44 -07:00 · 2017-06-20 19:27:44 -07:00 · a0d3eb9180
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -0,0 +1,3 @@
+// Place your settings in this file to overwrite default and user settings.
+{
+}
--- a/docs/deployment/hdfs.md
+++ b/docs/deployment/hdfs.md
@ -26,3 +26,28 @@ The document describes the procedure to deploy HDFS across a cluster.
  ```
  deploy.py hdfs umount
  ```
+
+3. Configure zookeeper and HDFS
+  ```
+  deploy.py docker push zookeeper
+  deploy.py hdfs config
+  ```
+
+4. Deploy HDFS zookeeper and journal node
+  ```
+  deploy.py kubernetes start zookeeper
+  deploy.py kubernetes start hdfsjournal
+  ```
+  You may shutdown zookeeper and journal node via:
+  ```
+  deploy.py kubernetes stop hdfsjournal
+  deploy.py kubernetes stop zookeeper
+  ```
+
+
+5. Format HDFS namenode
+  ```
+  deploy.py kubernetes start hdfsformat
+  ```
+
+6. 
--- a/src/ClusterBootstrap/deploy.py
+++ b/src/ClusterBootstrap/deploy.py
@ -88,7 +88,7 @@ default_config_parameters = {
 	"local-mount-path" : "/mnt",

 	# required storage folder under storage-mount-path
-	"default-storage-folders" : ["jobfiles", "storage", "work" ],
+	"default-storage-folders" : ["jobfiles", "storage", "work", "namenodeshare" ],


 	# the path of where nvidia driver is installed on each node, default /opt/nvidia-driver/current
@ -219,7 +219,11 @@ default_config_parameters = {
 		"mountoptions": "ext4 defaults 0 1",
 	},

+	# optional hdfs_cluster_name: if not inherit cluster_name from cluster
+	# "hdfs_cluster_name": cluster_name for HDFS
+
 	"hdfsconfig" : {
+
 		# Comma separated list of paths on the local filesystem of a DataNode where it should store its blocks.
 		"dfs" : {
 			# Data node configuration, 
@ -228,7 +232,7 @@ default_config_parameters = {
 			"data": "", 
 		},
 		"namenode" : {
-			"data": "/var/lib/hdfsnamenode",
+			"data": "/mnt/namenodeshare",
 		},
 		"zks" : {
 			# The IP address should be within service_cluster_ip_range
@ -243,7 +247,7 @@ default_config_parameters = {
 		# location of configuration file
 		"configfile": "/etc/hdfs/config.yaml", 
 		# logging directory
-		"loggingDirBase": "/var/log/hdfs"
+		"loggingDirBase": "/usr/local/hadoop/logs"
 	}, 
 	"ubuntuconfig" : {
 		"version" : "16.04.1", 
@ -558,6 +562,7 @@ default_config_mapping = {
 	"pxeserverip": (["pxeserver"], lambda x: fetch_dictionary(x,["ip"])), 
 	"pxeserverrootpasswd": (["pxeserver"], lambda x: get_root_passwd()), 
 	"pxeoptions": (["pxeserver"], lambda x: "" if fetch_dictionary(x,["options"]) is None else fetch_dictionary(x,["options"])), 
+	"hdfs_cluster_name" : ( ["cluster_name"], lambda x:x ), 
 }
 	
 # Merge entries in config2 to that of config1, if entries are dictionary. 
@ -2011,7 +2016,7 @@ def generate_hdfs_nodelist( nodes, port):

 def generate_hdfs_config( nodes, deviceSelect):
 	hdfsconfig = copy.deepcopy( config["hdfsconfig"] )
-	hdfsconfig["cluster_name"] = config["cluster_name"]
+	hdfsconfig["hdfs_cluster_name"] = config["hdfs_cluster_name"]
 	zknodes = get_node_lists_for_service("zookeeper")
 	zknodelist = generate_hdfs_nodelist( zknodes, fetch_config( ["hdfsconfig", "zks", "port"]))
 	if verbose:
@ -2358,17 +2363,20 @@ def get_all_services():
 	for service in os.listdir(rootdir):
 		dirname = os.path.join(rootdir, service)
 		if os.path.isdir(dirname):
-			yamlname = os.path.join(dirname, service + ".yaml")
-			if not os.path.isfile(yamlname):
-				yamls = glob.glob("*.yaml")
-				yamlname = yamls[0]
-			with open( yamlname ) as f:
-				content = f.read()
-				f.close()
-				if content.find( "DaemonSet" )>=0:
-					# Only add service if it is a daemonset. 
-					servicedic[service] = yamlname
-				
+			launch_order_file = os.path.join( dirname, "launch_order")
+			if os.path.isfile( launch_order_file ):
+				servicedic[service] = launch_order_file
+			else:
+				yamlname = os.path.join(dirname, service + ".yaml")
+				if not os.path.isfile(yamlname):
+					yamls = glob.glob("*.yaml")
+					yamlname = yamls[0]
+				with open( yamlname ) as f:
+					content = f.read()
+					f.close()
+					if content.find( "DaemonSet" )>=0:
+						# Only add service if it is a daemonset. 
+						servicedic[service] = yamlname
 	return servicedic
 	
 def get_service_name(service_config_file):
@ -2397,7 +2405,7 @@ def get_service_yaml( use_service ):
 		servicename = get_service_name(servicedic[service])
 		newentries[servicename] = servicedic[service]
 	servicedic.update(newentries)
-	# print use_service
+	# print servicedic
 	fname = servicedic[use_service]
 	return fname
 			
@ -2568,6 +2576,13 @@ def push_docker_images(nargs):
 	if verbose:
 		print "Build & push docker images to docker register  ..."
 	push_dockers("./deploy/docker-images/", config["dockerprefix"], config["dockertag"], nargs, config, verbose, nocache = nocache )
+
+def check_buildable_images(nargs):
+	for imagename in nargs:
+		imagename = imagename.lower()
+		if imagename in config["build-docker-via-config"]:
+			print "Docker image %s should be built via configuration. " % imagename
+			exit()
 	
 def run_docker_image( imagename, native = False, sudo = False ):
 	dockerConfig = fetch_config( ["docker-run", imagename ])
@ -3025,8 +3040,10 @@ def run_command( args, command, nargs, parser ):
 	elif command == "docker":
 		if len(nargs)>=1:
 			if nargs[0] == "build":
+				check_buildable_images(nargs[1:])
 				build_docker_images(nargs[1:])
 			elif nargs[0] == "push":
+				check_buildable_images(nargs[1:])
 				push_docker_images(nargs[1:])
 			elif nargs[0] == "run":
 				if len(nargs)>=2:
--- a/src/ClusterBootstrap/services/hdfsformat/hdfsformat.yaml
+++ b/src/ClusterBootstrap/services/hdfsformat/hdfsformat.yaml
@ -0,0 +1,49 @@
+kind: DaemonSet
+apiVersion: extensions/v1beta1
+metadata:
+  name: hdfsformat
+spec:
+  template:
+    metadata:
+      labels:
+        app: hdfsformat
+      annotations:
+        pod.alpha.kubernetes.io/initialized: "true"
+    spec:
+      nodeSelector:
+        namenode1: active
+      hostNetwork: true
+      containers:
+      - name: k8shdfsformat
+        imagePullPolicy: Always
+        image: {{cnf["worker-dockerregistry"]}}{{cnf["dockerprefix"]}}hdfs:{{cnf["dockertag"]}}
+        ports:
+        env:
+        - name : CLUSTER_NAME
+          value: {{cnf["hdfs_cluster_name"]}}
+        volumeMounts:
+        - name: datadir
+          mountPath: {{cnf["hdfsconfig"]["journalnode"]["data"]}}
+        - name: loghadoop
+          mountPath: /usr/local/hadoop/logs
+        - name: configdir
+          mountPath: /etc/hdfs/
+        - name: namenodedir
+          mountPath: /mnt/namenodeshare
+        command:
+        - sh
+        - -c
+        - /usr/local/hadoop/etc/hadoop/hadoop-env.sh && cd {{cnf["docker-run"]["hdfs"]["workdir"]}} && pwd && ./bootstrap_hdfs.py format && /bin/sleep infinity
+      volumes:
+      - name: datadir
+        hostPath:
+          path: {{cnf["hdfsconfig"]["journalnode"]["data"]}}          
+      - name: loghadoop
+        hostPath:
+          path: /var/log/hadoop/      
+      - name: configdir
+        hostPath:
+          path: /etc/hdfs
+      - name: namenodedir
+        hostPath:
+          path: {{cnf["storage-mount-path"]}}/namenodeshare
--- a/src/ClusterBootstrap/services/hdfsjournal/journalnode.yaml
+++ b/src/ClusterBootstrap/services/hdfsjournal/journalnode.yaml
@ -0,0 +1,49 @@
+kind: DaemonSet
+apiVersion: extensions/v1beta1
+metadata:
+  name: hdfsjournal
+spec:
+  template:
+    metadata:
+      labels:
+        app: hdfsjournal
+      annotations:
+        pod.alpha.kubernetes.io/initialized: "true"
+    spec:
+      nodeSelector:
+        journalnode: active
+      hostNetwork: true
+      containers:
+      - name: k8shdfsjournal
+        imagePullPolicy: Always
+        image: {{cnf["worker-dockerregistry"]}}{{cnf["dockerprefix"]}}hdfs:{{cnf["dockertag"]}}
+        ports:
+        env:
+        - name : CLUSTER_NAME
+          value: {{cnf["hdfs_cluster_name"]}}
+        volumeMounts:
+        - name: datadir
+          mountPath: {{cnf["hdfsconfig"]["journalnode"]["data"]}}
+        - name: loghadoop
+          mountPath: /usr/local/hadoop/logs
+        - name: configdir
+          mountPath: /etc/hdfs/
+        - name: namenodedir
+          mountPath: /mnt/namenodeshare
+        command:
+        - sh
+        - -c
+        - /usr/local/hadoop/etc/hadoop/hadoop-env.sh && cd {{cnf["docker-run"]["hdfs"]["workdir"]}} && pwd && ./bootstrap_hdfs.py journalnode && /bin/sleep infinity
+      volumes:
+      - name: datadir
+        hostPath:
+          path: {{cnf["hdfsconfig"]["journalnode"]["data"]}}          
+      - name: loghadoop
+        hostPath:
+          path: /var/log/hadoop/      
+      - name: configdir
+        hostPath:
+          path: /etc/hdfs
+      - name: namenodedir
+        hostPath:
+          path: {{cnf["storage-mount-path"]}}/namenodeshare
--- a/src/ClusterBootstrap/services/hdfsjournal/launch_order
+++ b/src/ClusterBootstrap/services/hdfsjournal/launch_order
@ -0,0 +1 @@
+journalnode.yaml
--- a/src/docker-images/hdfs/Dockerfile
+++ b/src/docker-images/hdfs/Dockerfile
@ -1,7 +1,30 @@
-From sequenceiq/hadoop-docker:2.7.1
+FROM williamyeh/java8
 MAINTAINER Jin Li <jinlmsft@hotmail.com>

-# CentOS 6.6 
+VOLUME /mnt/hadoop/
+RUN apt-get update \
+  && apt-get install -y jq curl 
+
+RUN curl -s http://www.apache.org/dist/hadoop/common/hadoop-2.8.0/hadoop-2.8.0.tar.gz | tar -xz -C /usr/local/ \
+  && cd /usr/local \
+  && ln -s ./hadoop-2.8.0 hadoop 
+
+ENV JAVA_HOME /usr/lib/jvm/java-8-oracle
+ENV HADOOP_PREFIX /usr/local/hadoop
+ENV HADOOP_COMMON_HOME /usr/local/hadoop
+ENV HADOOP_HDFS_HOME /usr/local/hadoop
+ENV HADOOP_MAPRED_HOME /usr/local/hadoop
+ENV HADOOP_YARN_HOME /usr/local/hadoop
+ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop
+ENV YARN_CONF_DIR $HADOOP_PREFIX/etc/hadoop
+
+WORKDIR /usr/local/hadoop
+RUN sed -i '/^export JAVA_HOME/ s:.*:export JAVA_HOME=/usr/lib/jvm/java-8-oracle\nexport HADOOP_PREFIX=/usr/local/hadoop\nexport HADOOP_HOME=/usr/local/hadoop\n:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh  \
+  && sed -i '/^export HADOOP_CONF_DIR/ s:.*:export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop/:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh \
+  && chmod +x /usr/local/hadoop/etc/hadoop/*-env.sh
+
+# NameNode                Secondary NameNode  DataNode                     JournalNode  NFS Gateway    HttpFS         ZKFC
+EXPOSE 8020 50070 50470   50090 50495         50010 1004 50075 1006 50020  8485 8480    2049 4242 111  14000 14001    8019

 RUN curl "https://bootstrap.pypa.io/get-pip.py" -o "get-pip.py"
 RUN python get-pip.py
@ -10,7 +33,7 @@ RUN yum install -y attr

 WORKDIR {{cnf["docker-run"]["hdfs"]["workdir"]}}

-ADD core-site.xml {{cnf["docker-run"]["hdfs"]["workdir"]}}
+ADD core-site.xml /usr/local/hadoop/etc/hadoop/core-site.xml
 ADD hdfs-site.xml.in-docker {{cnf["docker-run"]["hdfs"]["workdir"]}}
 ADD logging.yaml.in-docker {{cnf["docker-run"]["hdfs"]["workdir"]}}
 ADD bootstrap_hdfs.py {{cnf["docker-run"]["hdfs"]["workdir"]}}
@ -20,5 +43,8 @@ RUN chmod +x {{cnf["docker-run"]["hdfs"]["workdir"]}}/*.py
 # All process in this docker needs to be run as a service. 
 # Do not change the command, rewrite a service if need to 

+# See information on https://stackoverflow.com/questions/19943766/hadoop-unable-to-load-native-hadoop-library-for-your-platform-warning
+# the 3rd answer, you may ignore warning on NativeCodeLoader
+
 CMD /bin/bash

--- a/src/docker-images/hdfs/bootstrap_hdfs.py
+++ b/src/docker-images/hdfs/bootstrap_hdfs.py
@ -11,13 +11,23 @@ import yaml
 from jinja2 import Environment, FileSystemLoader, Template
 import utils

-verbose = False
+verbose = True

 def create_log( logdir ):
-	if not os.path.exists( logdir ):
-		os.system("mkdir -p " + logdir )
+    if not os.path.exists( logdir ):
+        os.system("mkdir -p " + logdir )
+
+def exec_with_output( cmd ):
+    try:
+        # https://stackoverflow.com/questions/4814970/subprocess-check-output-doesnt-seem-to-exist-python-2-6-5                
+        print cmd
+        output = subprocess.Popen( cmd.split(), stdout=subprocess.PIPE ).communicate()[0]
+        print output
+    except subprocess.CalledProcessError as e:
+        print "Exception " + str(e.returncode) + ", output: " + e.output.strip()

 if __name__ == '__main__':
+    print "Start... boostrap_hdfs.py "
    try:
        parser = argparse.ArgumentParser(prog='boostrap_hdfs.py',
            formatter_class=argparse.RawDescriptionHelpFormatter,
@ -42,27 +52,57 @@ datanode:    Launch datanode.
        args = parser.parse_args()
        verbose = args.verbose
        server = args.server
+        print "Parse command line argument... "
        config_file = args.config
        if not os.path.exists(config_file):
            print "!!!Error!!! Can't find configuration file %s " % config_file
            parser.print_help()
        with open(config_file, 'r') as file:
            config = yaml.load(file)
-        if verbose: 
-            print config
+        print "Configuration is : %s " % config
        loggingDirBase = "/var/log/hdfs" if not "loggingDirBase" in config else config["loggingDirBase"]
        config["loggingDir"] = os.path.join( loggingDirBase, server )
        utils.render_template("logging.yaml.in-docker", "logging.yaml",config, verbose=verbose)
-        logdir = config["loggingDir"]
-        create_log( logdir )
-        with open('logging.yaml') as f:
-	        logging_config = yaml.load(f)
-	        f.close()
-	        print logging_config
-	        logutils.dictconfig.dictConfig(logging_config)
-        utils.render_template("hdfs-site.xml.in-docker", "hdfs-site.xml",config, verbose=verbose)
+#        logdir = config["loggingDir"]
+#        create_log( logdir )
+#        with open('logging.yaml') as f:
+#           logging_config = yaml.load(f)
+#           f.close()
+#           print logging_config
+#           logutils.dictconfig.dictConfig(logging_config)
+        utils.render_template("hdfs-site.xml.in-docker", "/usr/local/hadoop/etc/hadoop/hdfs-site.xml",config, verbose=verbose)
    except Exception as e:
        print "boostrap_hdfs.py fails during initialization, exception %s" % e
        exit()
-    
+    # Launch journal node
+    if server == "journalnode":
+        cmd = "/usr/local/hadoop/sbin/hadoop-daemon.sh start journalnode"
+        exec_with_output( cmd )
+        exec_with_output( "pgrep -f JournalNode")
+        print "JournalNode running .... "
+    elif server == "zookeeper":
+        cmd = "/usr/local/hadoop/sbin/hadoop-daemon.sh start zookeeper"
+        exec_with_output( cmd )
+        print "Zookeeper node is running .... "
+    elif server == "namenode":
+        cmd = "/usr/local/hadoop/sbin/hadoop-daemon.sh start namenode"
+        exec_with_output( cmd )
+        cmd = "/usr/local/hadoop/sbin/hadoop-daemon.sh start zkfc"
+        exec_with_output( cmd )
+        exec_with_output( "pgrep -f NameNode")
+        exec_with_output( "pgrep -f DFSZKFailoverController")
+        print "Namenode is running"
+    elif server == "datanode":
+        cmd = "/usr/local/hadoop/sbin/hadoop-daemon.sh start datanode"
+        exec_with_output( cmd )
+        exec_with_output( "pgrep -f DataNode")
+        print "Datanode is running"
+    elif server == "format":
+        cmd = "/usr/local/hadoop/bin/hadoop namenode -format -nonInteractive"
+        exec_with_output( cmd )
+        cmd = "/usr/local/hadoop/bin/hdfs zkfs -formatZK -nonInteractive"
+        exec_with_output( cmd )
+    else:
+        ()
+

--- a/src/docker-images/hdfs/bootstrap_hdfs.sh
+++ b/src/docker-images/hdfs/bootstrap_hdfs.sh
@ -0,0 +1,7 @@
+#!/bin/bash
+
+: ${HADOOP_PREFIX:=/usr/local/hadoop};
+
+$HADOOP_PREFIX/etc/hadoop/hadoop-env.sh
+
+
--- a/src/docker-images/hdfs/core-site.xml
+++ b/src/docker-images/hdfs/core-site.xml
@ -1,6 +1,6 @@
 <configuration>
  <property>
    <name>fs.defaultFS</name>
-    <value>hdfs://{{cnf["cluster_name"]}}</value>
+    <value>hdfs://{{cnf["hdfs_cluster_name"]}}</value>
  </property>
 </configuration>
--- a/src/docker-images/hdfs/hdfs-site.xml.in-docker
+++ b/src/docker-images/hdfs/hdfs-site.xml.in-docker
@ -6,7 +6,7 @@
    </property>
    <property>
        <name>dfs.namenode.name.dir</name>
-        <value>{{cnf["namenode"]["data"]}}</value>
+        <value>file://{{cnf["namenode"]["data"]}}</value>
        <description>Path on the local filesystem where the NameNode stores the namespace and transaction logs persistently.</description>
    </property>
    <property>
@ -19,22 +19,22 @@
    </property>
    <property>
      <name>dfs.nameservices</name>
-      <value>{{cnf["cluster_name"]}}</value>
+      <value>{{cnf["hdfs_cluster_name"]}}</value>
    </property>
    <property>
-      <name>dfs.ha.namenodes.{{cnf["cluster_name"]}}</name>
+      <name>dfs.ha.namenodes.{{cnf["hdfs_cluster_name"]}}</name>
      <value>nn1,nn2</value>
    </property>
    <property>
-      <name>dfs.namenode.rpc-address.{{cnf["cluster_name"]}}.nn1</name>
+      <name>dfs.namenode.rpc-address.{{cnf["hdfs_cluster_name"]}}.nn1</name>
      <value>{{cnf["namenode"]["namenode1"]}}:8020</value>
    </property>
    <property>
-      <name>dfs.namenode.rpc-address.{{cnf["cluster_name"]}}.nn2</name>
+      <name>dfs.namenode.rpc-address.{{cnf["hdfs_cluster_name"]}}.nn2</name>
      <value>{{cnf["namenode"]["namenode2"]}}:8020</value>
    </property>
    <property>
-      <name>dfs.namenode.http-address.{{cnf["cluster_name"]}}.nn1</name>
+      <name>dfs.namenode.http-address.{{cnf["hdfs_cluster_name"]}}.nn1</name>
      <value>{{cnf["namenode"]["namenode1"]}}:50070</value>
    </property>
    <property>
@ -43,7 +43,7 @@
    </property>
    <property>
      <name>dfs.namenode.shared.edits.dir</name>
-      <value>qjournal://{{cnf["journalnode"]["nodes"]}}/{{cnf["cluster_name"]}}</value>
+      <value>qjournal://{{cnf["journalnode"]["nodes"]}}/{{cnf["hdfs_cluster_name"]}}</value>
    </property>
    <property>
      <name>dfs.client.failover.proxy.provider.mycluster</name>