From b3765eaad8b58e223efb16706cc22b47159def1b Mon Sep 17 00:00:00 2001
From: Graham Williams <graham.williams@togaware.com>
Date: Fri, 24 Feb 2017 15:28:53 +0800
Subject: [PATCH] Update and cleanup

---
 vignettes/ClusterDSVM.Rmd | 75 +++++++++++++++++++++------------------
 1 file changed, 40 insertions(+), 35 deletions(-)

diff --git a/vignettes/ClusterDSVM.Rmd b/vignettes/ClusterDSVM.Rmd
index b3ff488..c503e5e 100644
--- a/vignettes/ClusterDSVM.Rmd
+++ b/vignettes/ClusterDSVM.Rmd
@@ -5,15 +5,6 @@ author= "Graham Williams"
 
 # Use Case
 
-A cluster of Linux Data Science Virtual Machines (DSVMs) is deployed
-and a remote command is executed across each to demonstrate they
-exists. Code is included but not run to then delete the resource group
-if the resources are no longer required. Once deleted consumption will
-cease.
-
-This script is best run interactively to review its operation and to
-ensure that the interaction with Azure completes.
-
 A common use case is for a Data Scientist to create their R programs
 to analyse a dataset on their local compute platform (e.g., a laptop
 with 6GB RAM running Ubuntu with R installed). Development is
@@ -23,10 +14,14 @@ quickly. When the experimental setup is complete the script can be
 sent across to a considerably more capable compute engine on Azure,
 possibly a cluster of servers to build models in parallel.
 
-This tutorial will deploy several Linux Data Science Virtual Machines
-(DSVMs), distribute a copmute task over those servers, colelct the
-results and generate a report, and then delete the compute
-resources.
+This tutorial deploys several Linux Data Science Virtual Machines
+(DSVMs), distributes a trivial compute task over those servers,
+collects the results and generates a report. Code is included but not
+run to then delete the resource group if the resources are no longer
+required. Once deleted consumption will cease.
+
+This script is best run interactively to review its operation and to
+ensure that the interaction with Azure completes.
 
 # Setup
 
@@ -50,7 +45,6 @@ library(AzureSMR)    # Support for managing Azure resources.
 library(AzureDSR)    # Further support for the Data Scientist.
 library(magrittr)    
 library(dplyr)
-library(rattle)      # Use weatherAUS as a "large" dataset.
 ```
 
 ```{r tuning}
@@ -59,8 +53,8 @@ library(rattle)      # Use weatherAUS as a "large" dataset.
 # name the resource group that we will create transiently for the
 # purposes of this script.
 
-RG    <- "my_dsvm_rg_sea"  # Will be created if not already exist then kill.
-LOC   <- "southeastasia"   # Where the resource group (resources) will be hosted.
+RG  <- "my_dsvm_rg_sea"  # Will be created if not already exist then kill.
+LOC <- "southeastasia"   # Where the resource group (resources) will be hosted.
 
 # Create names for the VMs.
 
@@ -69,30 +63,22 @@ BASE  <-
   runif(4, 1, 26) %>%
   round() %>%
   letters[.] %>%
-  paste(collapse="")
+  paste(collapse="") %T>% print()
 LDSVM <- paste0("ldsvm", BASE, sprintf("%03d", 1:COUNT)) %T>% print()
 LUSER <- paste0("user", BASE, sprintf("%03d", 1:COUNT)) %T>% print()
 ```
 
 ```{r connect}
 # Connect to the Azure subscription and use this as the context for
-# our activities.
+# all of our activities.
 
 context <- createAzureContext(tenantID=TID, clientID=CID, authKey=KEY)
 
 # Check if the resource group already exists. Take note this script
 # will not remove the resource group if it pre-existed.
 
-context %>%
-  azureListRG() %>%
-  filter(name == RG) %>%
-  select(name, location) %T>%
-  print() %>%
-  nrow() %>%
-  equals(0) %>%
-  not() %T>%
-  print() ->
-rg_pre_exists
+rg_pre_exists <- existsRG(context, RG, LOC)
+
 ```
 # Creation
 
@@ -117,7 +103,7 @@ Create the actual Linux DSVM cluser with public-key based
 authentication method. Name, username, and size can also be
 configured.
 
-```{r deploy a set of DSVMs}
+```{r deploy a set of DSVMs, eval=FALSE}
 
 # Deploy multiple DSVMs using deployDSVMCluster.
 
@@ -154,23 +140,42 @@ for (vm in LDSVM)
 }
 ```
 
-Then we try deploying a cluster of DSVMs. The function will automatically form a DSVM cluster for us with which an R analytical job can be executed on with a "cluster parallel" computing context.
+Now deploy a cluster of DSVMs. The function will automatically form a
+DSVM cluster for us with which an R analytical job can be executed on
+with a "cluster parallel" computing context.
 
 ```{r deploy a cluster of DSVMs}
-
 # Deploy a cluster of DSVMs.
 
 ldsvm_cluster <- deployDSVMCluster(context, 
                                    resource.group=RG, 
                                    location=LOC, 
                                    count=COUNT, 
-                                   name="zzz", 
-                                   username="zzzuser", 
+                                   name=BASE,
+                                   username=USER, 
                                    pubkey=PUBKEY, 
                                    cluster=TRUE)
 
-# throw an data science analysis onto the cluster and run it. Still figuring out how to use mrsdeploy::remoteExecute for the purpose.
-
+for (vm in paste0(BASE, sprintf("%03d", 1:COUNT)))
+{
+  cat(vm, "\n")
+  
+  operateDSVM(context, RG, vm, operation="Check")
+  
+  # Send a simple system() command across to the new server to test
+  # its existence. Expect a single line with an indication of how long
+  # the server has been up and running.
+  
+  cmd <- paste("ssh -q",
+               "-o StrictHostKeyChecking=no",
+               "-o UserKnownHostsFile=/dev/null\\\n   ",
+               paste0(vm, ".", LOC, ".cloudapp.azure.com"),
+               "uptime") %T>%
+    {cat(., "\n")}
+  cmd
+  system(cmd)
+  cat("\n")
+}
 ```
 
 # Optional Delete