Update and cleanup

2017-02-24 15:28:53 +08:00 · 2017-02-24 15:28:53 +08:00 · b3765eaad8
--- a/vignettes/ClusterDSVM.Rmd
+++ b/vignettes/ClusterDSVM.Rmd
@ -5,15 +5,6 @@ author= "Graham Williams"

 # Use Case

-A cluster of Linux Data Science Virtual Machines (DSVMs) is deployed
-and a remote command is executed across each to demonstrate they
-exists. Code is included but not run to then delete the resource group
-if the resources are no longer required. Once deleted consumption will
-cease.
-
-This script is best run interactively to review its operation and to
-ensure that the interaction with Azure completes.
-
 A common use case is for a Data Scientist to create their R programs
 to analyse a dataset on their local compute platform (e.g., a laptop
 with 6GB RAM running Ubuntu with R installed). Development is
@ -23,10 +14,14 @@ quickly. When the experimental setup is complete the script can be
 sent across to a considerably more capable compute engine on Azure,
 possibly a cluster of servers to build models in parallel.

-This tutorial will deploy several Linux Data Science Virtual Machines
-(DSVMs), distribute a copmute task over those servers, colelct the
-results and generate a report, and then delete the compute
-resources.
+This tutorial deploys several Linux Data Science Virtual Machines
+(DSVMs), distributes a trivial compute task over those servers,
+collects the results and generates a report. Code is included but not
+run to then delete the resource group if the resources are no longer
+required. Once deleted consumption will cease.
+
+This script is best run interactively to review its operation and to
+ensure that the interaction with Azure completes.

 # Setup

@ -50,7 +45,6 @@ library(AzureSMR)    # Support for managing Azure resources.
 library(AzureDSR)    # Further support for the Data Scientist.
 library(magrittr)    
 library(dplyr)
-library(rattle)      # Use weatherAUS as a "large" dataset.
 ```

 ```{r tuning}
@ -59,8 +53,8 @@ library(rattle)      # Use weatherAUS as a "large" dataset.
 # name the resource group that we will create transiently for the
 # purposes of this script.

-RG    <- "my_dsvm_rg_sea"  # Will be created if not already exist then kill.
-LOC   <- "southeastasia"   # Where the resource group (resources) will be hosted.
+RG  <- "my_dsvm_rg_sea"  # Will be created if not already exist then kill.
+LOC <- "southeastasia"   # Where the resource group (resources) will be hosted.

 # Create names for the VMs.

@ -69,30 +63,22 @@ BASE  <-
  runif(4, 1, 26) %>%
  round() %>%
  letters[.] %>%
-  paste(collapse="")
+  paste(collapse="") %T>% print()
 LDSVM <- paste0("ldsvm", BASE, sprintf("%03d", 1:COUNT)) %T>% print()
 LUSER <- paste0("user", BASE, sprintf("%03d", 1:COUNT)) %T>% print()
 ```

 ```{r connect}
 # Connect to the Azure subscription and use this as the context for
-# our activities.
+# all of our activities.

 context <- createAzureContext(tenantID=TID, clientID=CID, authKey=KEY)

 # Check if the resource group already exists. Take note this script
 # will not remove the resource group if it pre-existed.

-context %>%
-  azureListRG() %>%
-  filter(name == RG) %>%
-  select(name, location) %T>%
-  print() %>%
-  nrow() %>%
-  equals(0) %>%
-  not() %T>%
-  print() ->
-rg_pre_exists
+rg_pre_exists <- existsRG(context, RG, LOC)
+
 ```
 # Creation

@ -117,7 +103,7 @@ Create the actual Linux DSVM cluser with public-key based
 authentication method. Name, username, and size can also be
 configured.

-```{r deploy a set of DSVMs}
+```{r deploy a set of DSVMs, eval=FALSE}

 # Deploy multiple DSVMs using deployDSVMCluster.

@ -154,23 +140,42 @@ for (vm in LDSVM)
 }
 ```

-Then we try deploying a cluster of DSVMs. The function will automatically form a DSVM cluster for us with which an R analytical job can be executed on with a "cluster parallel" computing context.
+Now deploy a cluster of DSVMs. The function will automatically form a
+DSVM cluster for us with which an R analytical job can be executed on
+with a "cluster parallel" computing context.

 ```{r deploy a cluster of DSVMs}
-
 # Deploy a cluster of DSVMs.

 ldsvm_cluster <- deployDSVMCluster(context, 
                                   resource.group=RG, 
                                   location=LOC, 
                                   count=COUNT, 
-                                   name="zzz", 
-                                   username="zzzuser", 
+                                   name=BASE,
+                                   username=USER, 
                                   pubkey=PUBKEY, 
                                   cluster=TRUE)

-# throw an data science analysis onto the cluster and run it. Still figuring out how to use mrsdeploy::remoteExecute for the purpose.
-
+for (vm in paste0(BASE, sprintf("%03d", 1:COUNT)))
+{
+  cat(vm, "\n")
+  
+  operateDSVM(context, RG, vm, operation="Check")
+  
+  # Send a simple system() command across to the new server to test
+  # its existence. Expect a single line with an indication of how long
+  # the server has been up and running.
+  
+  cmd <- paste("ssh -q",
+               "-o StrictHostKeyChecking=no",
+               "-o UserKnownHostsFile=/dev/null\\\n   ",
+               paste0(vm, ".", LOC, ".cloudapp.azure.com"),
+               "uptime") %T>%
+    {cat(., "\n")}
+  cmd
+  system(cmd)
+  cat("\n")
+}
 ```

 # Optional Delete