From b3765eaad8b58e223efb16706cc22b47159def1b Mon Sep 17 00:00:00 2001 From: Graham Williams Date: Fri, 24 Feb 2017 15:28:53 +0800 Subject: [PATCH] Update and cleanup --- vignettes/ClusterDSVM.Rmd | 75 +++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/vignettes/ClusterDSVM.Rmd b/vignettes/ClusterDSVM.Rmd index b3ff488..c503e5e 100644 --- a/vignettes/ClusterDSVM.Rmd +++ b/vignettes/ClusterDSVM.Rmd @@ -5,15 +5,6 @@ author= "Graham Williams" # Use Case -A cluster of Linux Data Science Virtual Machines (DSVMs) is deployed -and a remote command is executed across each to demonstrate they -exists. Code is included but not run to then delete the resource group -if the resources are no longer required. Once deleted consumption will -cease. - -This script is best run interactively to review its operation and to -ensure that the interaction with Azure completes. - A common use case is for a Data Scientist to create their R programs to analyse a dataset on their local compute platform (e.g., a laptop with 6GB RAM running Ubuntu with R installed). Development is @@ -23,10 +14,14 @@ quickly. When the experimental setup is complete the script can be sent across to a considerably more capable compute engine on Azure, possibly a cluster of servers to build models in parallel. -This tutorial will deploy several Linux Data Science Virtual Machines -(DSVMs), distribute a copmute task over those servers, colelct the -results and generate a report, and then delete the compute -resources. +This tutorial deploys several Linux Data Science Virtual Machines +(DSVMs), distributes a trivial compute task over those servers, +collects the results and generates a report. Code is included but not +run to then delete the resource group if the resources are no longer +required. Once deleted consumption will cease. + +This script is best run interactively to review its operation and to +ensure that the interaction with Azure completes. # Setup @@ -50,7 +45,6 @@ library(AzureSMR) # Support for managing Azure resources. library(AzureDSR) # Further support for the Data Scientist. library(magrittr) library(dplyr) -library(rattle) # Use weatherAUS as a "large" dataset. ``` ```{r tuning} @@ -59,8 +53,8 @@ library(rattle) # Use weatherAUS as a "large" dataset. # name the resource group that we will create transiently for the # purposes of this script. -RG <- "my_dsvm_rg_sea" # Will be created if not already exist then kill. -LOC <- "southeastasia" # Where the resource group (resources) will be hosted. +RG <- "my_dsvm_rg_sea" # Will be created if not already exist then kill. +LOC <- "southeastasia" # Where the resource group (resources) will be hosted. # Create names for the VMs. @@ -69,30 +63,22 @@ BASE <- runif(4, 1, 26) %>% round() %>% letters[.] %>% - paste(collapse="") + paste(collapse="") %T>% print() LDSVM <- paste0("ldsvm", BASE, sprintf("%03d", 1:COUNT)) %T>% print() LUSER <- paste0("user", BASE, sprintf("%03d", 1:COUNT)) %T>% print() ``` ```{r connect} # Connect to the Azure subscription and use this as the context for -# our activities. +# all of our activities. context <- createAzureContext(tenantID=TID, clientID=CID, authKey=KEY) # Check if the resource group already exists. Take note this script # will not remove the resource group if it pre-existed. -context %>% - azureListRG() %>% - filter(name == RG) %>% - select(name, location) %T>% - print() %>% - nrow() %>% - equals(0) %>% - not() %T>% - print() -> -rg_pre_exists +rg_pre_exists <- existsRG(context, RG, LOC) + ``` # Creation @@ -117,7 +103,7 @@ Create the actual Linux DSVM cluser with public-key based authentication method. Name, username, and size can also be configured. -```{r deploy a set of DSVMs} +```{r deploy a set of DSVMs, eval=FALSE} # Deploy multiple DSVMs using deployDSVMCluster. @@ -154,23 +140,42 @@ for (vm in LDSVM) } ``` -Then we try deploying a cluster of DSVMs. The function will automatically form a DSVM cluster for us with which an R analytical job can be executed on with a "cluster parallel" computing context. +Now deploy a cluster of DSVMs. The function will automatically form a +DSVM cluster for us with which an R analytical job can be executed on +with a "cluster parallel" computing context. ```{r deploy a cluster of DSVMs} - # Deploy a cluster of DSVMs. ldsvm_cluster <- deployDSVMCluster(context, resource.group=RG, location=LOC, count=COUNT, - name="zzz", - username="zzzuser", + name=BASE, + username=USER, pubkey=PUBKEY, cluster=TRUE) -# throw an data science analysis onto the cluster and run it. Still figuring out how to use mrsdeploy::remoteExecute for the purpose. - +for (vm in paste0(BASE, sprintf("%03d", 1:COUNT))) +{ + cat(vm, "\n") + + operateDSVM(context, RG, vm, operation="Check") + + # Send a simple system() command across to the new server to test + # its existence. Expect a single line with an indication of how long + # the server has been up and running. + + cmd <- paste("ssh -q", + "-o StrictHostKeyChecking=no", + "-o UserKnownHostsFile=/dev/null\\\n ", + paste0(vm, ".", LOC, ".cloudapp.azure.com"), + "uptime") %T>% + {cat(., "\n")} + cmd + system(cmd) + cat("\n") +} ``` # Optional Delete