зеркало из https://github.com/Azure/AzureDSVM.git
Merge branch 'master' of github.com:Azure/AzureDSR
This commit is contained in:
Коммит
473b2ca4b2
3
Makefile
3
Makefile
|
@ -17,3 +17,6 @@ deploy: scripts
|
|||
delete: scripts
|
||||
(cd vignettes; Rscript DeleteRG.R)
|
||||
|
||||
|
||||
ping:
|
||||
ssh -q -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null myldsvm.southeastasia.cloudapp.azure.com uptime
|
||||
|
|
|
@ -5,6 +5,7 @@ export(deployDSVM)
|
|||
export(deployDSVMCluster)
|
||||
export(dumpInterface)
|
||||
export(executeScript)
|
||||
export(existsRG)
|
||||
export(getVMSizes)
|
||||
export(operateDSVM)
|
||||
export(setConfig)
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
#' @title Check if a resource group exists.
|
||||
#'
|
||||
#' @param context Authentication context of AzureSMR encapsulating the
|
||||
#' TID, CID, and key obtained from Azure Actrive Directory.
|
||||
#' @param resource.group The Azure resource group where the DSVM is
|
||||
#' created.
|
||||
#' @param location Location of the data centre to host the DSVM.
|
||||
#' @export
|
||||
existsRG <- function(context, resource.group, location, verbose=TRUE)
|
||||
{
|
||||
context %>%
|
||||
azureListRG() %>%
|
||||
filter(name == RG) %>%
|
||||
select(name, location) %T>%
|
||||
{if (verbose) print(.)} %>%
|
||||
nrow() %>%
|
||||
equals(0) %>%
|
||||
not()
|
||||
}
|
|
@ -5,15 +5,6 @@ author= "Graham Williams"
|
|||
|
||||
# Use Case
|
||||
|
||||
A cluster of Linux Data Science Virtual Machines (DSVMs) is deployed
|
||||
and a remote command is executed across each to demonstrate they
|
||||
exists. Code is included but not run to then delete the resource group
|
||||
if the resources are no longer required. Once deleted consumption will
|
||||
cease.
|
||||
|
||||
This script is best run interactively to review its operation and to
|
||||
ensure that the interaction with Azure completes.
|
||||
|
||||
A common use case is for a Data Scientist to create their R programs
|
||||
to analyse a dataset on their local compute platform (e.g., a laptop
|
||||
with 6GB RAM running Ubuntu with R installed). Development is
|
||||
|
@ -23,10 +14,14 @@ quickly. When the experimental setup is complete the script can be
|
|||
sent across to a considerably more capable compute engine on Azure,
|
||||
possibly a cluster of servers to build models in parallel.
|
||||
|
||||
This tutorial will deploy several Linux Data Science Virtual Machines
|
||||
(DSVMs), distribute a copmute task over those servers, colelct the
|
||||
results and generate a report, and then delete the compute
|
||||
resources.
|
||||
This tutorial deploys several Linux Data Science Virtual Machines
|
||||
(DSVMs), distributes a trivial compute task over those servers,
|
||||
collects the results and generates a report. Code is included but not
|
||||
run to then delete the resource group if the resources are no longer
|
||||
required. Once deleted consumption will cease.
|
||||
|
||||
This script is best run interactively to review its operation and to
|
||||
ensure that the interaction with Azure completes.
|
||||
|
||||
# Setup
|
||||
|
||||
|
@ -50,7 +45,6 @@ library(AzureSMR) # Support for managing Azure resources.
|
|||
library(AzureDSR) # Further support for the Data Scientist.
|
||||
library(magrittr)
|
||||
library(dplyr)
|
||||
library(rattle) # Use weatherAUS as a "large" dataset.
|
||||
```
|
||||
|
||||
```{r tuning}
|
||||
|
@ -59,8 +53,8 @@ library(rattle) # Use weatherAUS as a "large" dataset.
|
|||
# name the resource group that we will create transiently for the
|
||||
# purposes of this script.
|
||||
|
||||
RG <- "my_dsvm_rg_sea" # Will be created if not already exist then kill.
|
||||
LOC <- "southeastasia" # Where the resource group (resources) will be hosted.
|
||||
RG <- "my_dsvm_rg_sea" # Will be created if not already exist then kill.
|
||||
LOC <- "southeastasia" # Where the resource group (resources) will be hosted.
|
||||
|
||||
# Create names for the VMs.
|
||||
|
||||
|
@ -69,30 +63,22 @@ BASE <-
|
|||
runif(4, 1, 26) %>%
|
||||
round() %>%
|
||||
letters[.] %>%
|
||||
paste(collapse="")
|
||||
paste(collapse="") %T>% print()
|
||||
LDSVM <- paste0("ldsvm", BASE, sprintf("%03d", 1:COUNT)) %T>% print()
|
||||
LUSER <- paste0("user", BASE, sprintf("%03d", 1:COUNT)) %T>% print()
|
||||
```
|
||||
|
||||
```{r connect}
|
||||
# Connect to the Azure subscription and use this as the context for
|
||||
# our activities.
|
||||
# all of our activities.
|
||||
|
||||
context <- createAzureContext(tenantID=TID, clientID=CID, authKey=KEY)
|
||||
|
||||
# Check if the resource group already exists. Take note this script
|
||||
# will not remove the resource group if it pre-existed.
|
||||
|
||||
context %>%
|
||||
azureListRG() %>%
|
||||
filter(name == RG) %>%
|
||||
select(name, location) %T>%
|
||||
print() %>%
|
||||
nrow() %>%
|
||||
equals(0) %>%
|
||||
not() %T>%
|
||||
print() ->
|
||||
rg_pre_exists
|
||||
rg_pre_exists <- existsRG(context, RG, LOC)
|
||||
|
||||
```
|
||||
# Creation
|
||||
|
||||
|
@ -117,7 +103,7 @@ Create the actual Linux DSVM cluser with public-key based
|
|||
authentication method. Name, username, and size can also be
|
||||
configured.
|
||||
|
||||
```{r deploy a set of DSVMs}
|
||||
```{r deploy a set of DSVMs, eval=FALSE}
|
||||
|
||||
# Deploy multiple DSVMs using deployDSVMCluster.
|
||||
|
||||
|
@ -154,23 +140,42 @@ for (vm in LDSVM)
|
|||
}
|
||||
```
|
||||
|
||||
Then we try deploying a cluster of DSVMs. The function will automatically form a DSVM cluster for us with which an R analytical job can be executed on with a "cluster parallel" computing context.
|
||||
Now deploy a cluster of DSVMs. The function will automatically form a
|
||||
DSVM cluster for us with which an R analytical job can be executed on
|
||||
with a "cluster parallel" computing context.
|
||||
|
||||
```{r deploy a cluster of DSVMs}
|
||||
|
||||
# Deploy a cluster of DSVMs.
|
||||
|
||||
ldsvm_cluster <- deployDSVMCluster(context,
|
||||
resource.group=RG,
|
||||
location=LOC,
|
||||
count=COUNT,
|
||||
name="zzz",
|
||||
username="zzzuser",
|
||||
name=BASE,
|
||||
username=USER,
|
||||
pubkey=PUBKEY,
|
||||
cluster=TRUE)
|
||||
|
||||
# throw an data science analysis onto the cluster and run it. Still figuring out how to use mrsdeploy::remoteExecute for the purpose.
|
||||
|
||||
for (vm in paste0(BASE, sprintf("%03d", 1:COUNT)))
|
||||
{
|
||||
cat(vm, "\n")
|
||||
|
||||
operateDSVM(context, RG, vm, operation="Check")
|
||||
|
||||
# Send a simple system() command across to the new server to test
|
||||
# its existence. Expect a single line with an indication of how long
|
||||
# the server has been up and running.
|
||||
|
||||
cmd <- paste("ssh -q",
|
||||
"-o StrictHostKeyChecking=no",
|
||||
"-o UserKnownHostsFile=/dev/null\\\n ",
|
||||
paste0(vm, ".", LOC, ".cloudapp.azure.com"),
|
||||
"uptime") %T>%
|
||||
{cat(., "\n")}
|
||||
cmd
|
||||
system(cmd)
|
||||
cat("\n")
|
||||
}
|
||||
```
|
||||
|
||||
# Optional Delete
|
||||
|
|
|
@ -82,16 +82,8 @@ context <- createAzureContext(tenantID=TID, clientID=CID, authKey=KEY)
|
|||
# Check if the resource group already exists. Take note this script
|
||||
# will not remove the resource group if it pre-existed.
|
||||
|
||||
context %>%
|
||||
azureListRG() %>%
|
||||
filter(name == RG) %>%
|
||||
select(name, location) %T>%
|
||||
print() %>%
|
||||
nrow() %>%
|
||||
equals(0) %>%
|
||||
not() %T>%
|
||||
print() ->
|
||||
rg_pre_exists
|
||||
rg_pre_exists <- existsRG(context, RG, LOC)
|
||||
|
||||
```
|
||||
|
||||
# Delete the Resource Group
|
||||
|
|
|
@ -94,16 +94,7 @@ context <- createAzureContext(tenantID=TID, clientID=CID, authKey=KEY)
|
|||
# Check if the resource group already exists. Take note this script
|
||||
# will not remove the resource group if it pre-existed.
|
||||
|
||||
context %>%
|
||||
azureListRG() %>%
|
||||
filter(name == RG) %>%
|
||||
select(name, location) %T>%
|
||||
print() %>%
|
||||
nrow() %>%
|
||||
equals(0) %>%
|
||||
not() %T>%
|
||||
print() ->
|
||||
rg_pre_exists
|
||||
rg_pre_exists <- existsRG(context, RG, LOC)
|
||||
```
|
||||
# Create a Resource Group
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче