Feature: ssh configuration file support (#77)

* added ssh.yaml configuration file * added jupyter support, better format for ssh.yaml instructions * refactored read_conf_file method * rename master ui to web ui * fixed typo in comments * added documentation for ssh.yaml * refactored merge and _merge_dict methods * changed default ssh experience to require --id cli parameter * renamed conflicting documentation * improved docs, changed default port forwarding to standard ports * fix typo
2017-09-21 17:45:57 -07:00 · 2017-09-21 17:45:57 -07:00 · 859c6fa6c8
--- a/config/ssh.yaml
+++ b/config/ssh.yaml
@ -0,0 +1,18 @@
+# ssh configuration
+
+# cluster_id: <id of the cluster to connect to, reccommended to specify with --id command line parameter>
+
+# username: <name of the user account to ssh into>
+username: spark
+
+# job_ui_port: <local port where the job ui is forwarded to>
+job_ui_port: 4040
+
+# web_ui_port: <local port where the spark master web ui is forwarded to>
+web_ui_port: 8080
+
+# jupyter_port: <local port which where jupyter is forwarded to>
+jupyter_port: 8888
+
+# connect: <true/false, connect to spark master or print connection string (--no-connect)> 
+connect: true
--- a/docs/12-configuration.md
+++ b/docs/12-configuration.md
@ -1,27 +0,0 @@
-# Configuration Files
-
-
-## Cluster Configuration
-
-The core settings for a cluster are configured in the cluster.yaml file. Once you have set your desired values in cluster.yaml, you can create a cluster using `azb spark cluster create`. 
-
-For example, with the default cluster configuration:
-
-```yaml
-id: my_spark_cluster
-vm_size: standard_a2
-size: 2
-username: spark
-wait: true
-```
-
-running `azb spark cluster create` will create a cluster of 4 Standard\_A2 nodes called 'my\_spark\_cluster' with a linux user named 'spark'. This is equivalent to running the command
-
-```sh
-azb spark cluster create --id spark --vm-szie standard_a2 --size 4 --username spark --wait
-```
-
-
-## Secrets Configuration
-
-A template file for necessary secrets is given in config/secrets.yaml.template. Copy the file to secrets.yaml and fill in the proper values. See [Getting Started] (./00-getting-started.md) for more information.   
--- a/docs/13-configuration.md
+++ b/docs/13-configuration.md
@ -0,0 +1,50 @@
+# Configuration Files
+
+## Cluster Configuration
+
+The core settings for a cluster are configured in the cluster.yaml file. Once you have set your desired values in cluster.yaml, you can create a cluster using `azb spark cluster create`. 
+
+For example, with the default cluster configuration:
+
+```yaml
+id: my_spark_cluster
+vm_size: standard_a2
+size: 2
+username: spark
+wait: true
+```
+
+Running `azb spark cluster create` will create a cluster of 4 Standard\_A2 nodes called 'my\_spark\_cluster' with a linux user named 'spark'. This is equivalent to running the command
+
+```sh
+azb spark cluster create --id spark --vm-szie standard_a2 --size 4 --username spark --wait
+```
+
+## Secrets Configuration
+
+A template file for necessary secrets is given in config/secrets.yaml.template. After running `azb spark init`, this file will be copied to a .thunderbolt/ directory in your current working directory. Copy or rename the file to .thunderbolt/secrets.yaml and fill in the proper values for your Batch and Storage accounts. See [Getting Started] (./00-getting-started.md) for more information.   
+
+## SSH Configuration
+
+The SSH connection settings can be configured in the ssh.yaml file. Once you have set your desired values in ssh.yaml, you can connect to the master of your cluster using the command `azb spark cluster ssh`. 
+
+For example, with the default ssh cluster configuration:
+```yaml
+# ssh configuration
+
+# username: <name of the user account to ssh into>
+username: spark
+
+# job_ui_port: <local port where the job ui is forwarded to>
+job_ui_port: 4040
+
+# web_ui_port: <local port where the spark master web ui is forwarded to>
+web_ui_port: 8080
+
+# jupyter_port: <local port which where jupyter is forwarded to>
+jupyter_port: 8088
+```
+
+Running the command `azb spark cluster ssh --id <cluster_id>` will attempt to ssh into the cluster which has the id specified with the username 'spark'. It will forward the Spark Job UI to localhost:4040, the Spark master's web UI to localhost:8080 and Jupyter to localhost:8088.
+
+Note that all of the settings in ssh.yaml will be overrided by parameters passed on the command line.
--- a/dtde/clusterlib.py
+++ b/dtde/clusterlib.py
@ -420,8 +420,8 @@ def delete_cluster(cluster_id: str) -> bool:
 def ssh_in_master(
        cluster_id: str,
        username: str=None,
-        masterui: str=None,
        webui: str=None,
+        jobui: str=None,
        jupyter: str=None,
        ports=None,
        connect: bool=True):
@ -429,8 +429,8 @@ def ssh_in_master(
        SSH into head node of spark-app
        :param cluster_id: Id of the cluster to ssh in
        :param username: Username to use to ssh
-        :param masterui: Port for the master ui(Local port)
-        :param webui: Port for the spark web ui(Local port)
+        :param webui: Port for the spark master web ui (Local port)
+        :param jobui: Port for the job web ui (Local port)
        :param jupyter: Port for jupyter(Local port)
        :param ports: an list of local and remote ports
        :type ports: [[<local-port>, <remote-port>]]
@ -460,9 +460,9 @@ def ssh_in_master(
    ssh_command = CommandBuilder('ssh')

    ssh_command.add_option("-L", "{0}:localhost:{1}".format(
-        masterui,  spark_master_ui_port), enable=bool(masterui))
+        webui,  spark_master_ui_port), enable=bool(webui))
    ssh_command.add_option("-L", "{0}:localhost:{1}".format(
-        webui, spark_web_ui_port), enable=bool(webui))
+        jobui, spark_web_ui_port), enable=bool(jobui))
    ssh_command.add_option("-L", "{0}:localhost:{1}".format(
        jupyter, spark_jupyter_port), enable=bool(jupyter))

--- a/dtde/config.py
+++ b/dtde/config.py
@ -174,7 +174,84 @@ class ClusterConfig:
        if self.vm_size is None:
            raise error.ThunderboltError(
                    "Please supply a vm_size in either the cluster.yaml configuration file or with a parameter (--vm-size)")
-
+            
        if self.username is not None and self.wait is False:
            raise error.ThunderboltError(
                    "User {0} will not be created since wait is not set to true in either the cluster.yaml configuration file or with a parameter (--wait)".format(self.username))
+
+
+class SshConfig:
+
+    def __init__(self):
+        self.username = None
+        self.cluster_id = None
+        self.job_ui_port = None
+        self.web_ui_port = None
+        self.jupyter_port = None
+        self.connect = True
+
+
+    def _read_config_file(self, path: str=constants.DEFAULT_SSH_CONFIG_PATH):
+        """
+            Reads the config file in the .thunderbolt/ directory (.thunderbolt/cluster.yaml)
+        """
+        if not os.path.isfile(path):
+            raise Exception(
+                    "SSH Configuration file doesn't exist at {0}".format(path))
+
+        with open(path, 'r') as stream:
+            try:
+               config = yaml.load(stream)
+            except yaml.YAMLError as err:
+                raise Exception(
+                    "Error in ssh.yaml: {0}".format(err))
+
+            if config is None:
+               return
+
+            self._merge_dict(config)
+
+    
+    def _merge_dict(self, config):
+        if 'username' in config and config['username'] is not None:
+            self.username = config['username']
+
+        if 'cluster_id' in config and config['cluster_id'] is not None:
+            self.cluster_id = config['cluster_id']
+
+        if 'job_ui_port' in config and config['job_ui_port'] is not None:
+            self.job_ui_port = config['job_ui_port']
+
+        if 'web_ui_port' in config and config['web_ui_port'] is not None:
+            self.web_ui_port = config['web_ui_port']
+        
+        if 'jupyter_port' in config and config['jupyter_port'] is not None:
+            self.jupyter_port = config['jupyter_port']
+
+        if 'connect' in config and config['connect'] is False:
+            self.connect = False        
+
+
+    def merge(self, cluster_id, username, job_ui_port, web_ui_port, jupyter_port, connect):
+        """
+            Merges fields with args object
+        """
+        self._read_config_file()
+        self._merge_dict(
+            dict(
+                cluster_id = cluster_id,
+                username = username,
+                job_ui_port = job_ui_port,
+                web_ui_port = web_ui_port,
+                jupyter_port = jupyter_port,
+                connect = connect
+            )
+        )
+
+        if self.cluster_id is None:
+            raise Exception(
+                "Please supply an id for the cluster either in the ssh.yaml configuration file or with a parameter (--id)")
+        
+        if self.username is None:
+            raise Exception(
+                "Please supply a username either in the ssh.yaml configuration file or with a parameter (--username)")
--- a/dtde/constants.py
+++ b/dtde/constants.py
@ -24,9 +24,11 @@ ROOT_PATH = os.path.normpath(os.path.join(os.path.dirname(__file__), '..'))
    Path to the secrets file
 """
 DEFAULT_SECRETS_PATH = os.path.join(os.getcwd(), '.thunderbolt/secrets.yaml')
+
 """
    Paths to the cluster configuration files
 """
+DEFAULT_SSH_CONFIG_PATH = os.path.join(os.getcwd(), '.thunderbolt/ssh.yaml')
 DEFAULT_CLUSTER_CONFIG_PATH = os.path.join(os.getcwd(), '.thunderbolt/cluster.yaml')
 DEFAULT_SPARK_CONF_SOURCE = os.path.join(os.getcwd(), '.thunderbolt')
 DEFAULT_SPARK_CONF_DEST = os.path.join(os.getcwd(), 'node_scripts/conf')
--- a/dtde/spark/cli/cluster_ssh.py
+++ b/dtde/spark/cli/cluster_ssh.py
@ -1,15 +1,16 @@
 import argparse
 import typing
 from dtde import clusterlib, log
+from dtde.config import SshConfig


 def setup_parser(parser: argparse.ArgumentParser):
-    parser.add_argument('--id', dest="cluster_id", required=True,
+    parser.add_argument('--id', dest="cluster_id",
                        help='The unique id of your spark cluster')
-    parser.add_argument('--masterui',
-                        help='Local port to port spark\'s master UI to')
    parser.add_argument('--webui',
-                        help='Local port to port spark\'s webui to')
+                        help='Local port to port spark\'s master UI to')
+    parser.add_argument('--jobui',
+                        help='Local port to port spark\'s job UI to')
    parser.add_argument('--jupyter',
                        help='Local port to port jupyter to')
    parser.add_argument('-u', '--username',
@ -23,25 +24,36 @@ def setup_parser(parser: argparse.ArgumentParser):


 def execute(args: typing.NamedTuple):
+    ssh_conf = SshConfig()
+
+    ssh_conf.merge(
+        cluster_id = args.cluster_id,
+        username = args.username,
+        job_ui_port = args.jobui,
+        web_ui_port = args.webui,
+        jupyter_port = args.jupyter,
+        connect = args.connect
+    )
+
    log.info("-------------------------------------------")
-    log.info("spark cluster id:    %s", args.cluster_id)
-    log.info("open masterui:       %s", args.masterui)
-    log.info("open webui:          %s", args.webui)
-    log.info("open jupyter:        %s", args.jupyter)
-    log.info("ssh username:        %s", args.username)
-    log.info("connect:             %s", args.connect)
+    log.info("spark cluster id:    %s", ssh_conf.cluster_id)
+    log.info("open webui:          %s", ssh_conf.web_ui_port)
+    log.info("open jobui:          %s", ssh_conf.job_ui_port)
+    log.info("open jupyter:        %s", ssh_conf.jupyter_port)
+    log.info("ssh username:        %s", ssh_conf.username)
+    log.info("connect:             %s", ssh_conf.connect)
    log.info("-------------------------------------------")

    # get ssh command
    ssh_cmd = clusterlib.ssh_in_master(
-        cluster_id=args.cluster_id,
-        masterui=args.masterui,
-        webui=args.webui,
-        jupyter=args.jupyter,
-        username=args.username,
-        connect=args.connect)
+        cluster_id=ssh_conf.cluster_id,
+        webui=ssh_conf.web_ui_port,
+        jobui=ssh_conf.job_ui_port,
+        jupyter=ssh_conf.jupyter_port,
+        username=ssh_conf.username,
+        connect=ssh_conf.connect)

-    if not args.connect:
+    if not ssh_conf.connect:
        log.info("")
        log.info("Use the following command to connect to your spark head node:")
        log.info("\t%s", ssh_cmd)