зеркало из https://github.com/Azure/AzureDSVM.git
Updated R scripts for operating DSVM and remote execution with R interface
This commit is contained in:
Родитель
541bcc480f
Коммит
3632db6936
|
@ -1,7 +1,15 @@
|
|||
# Generated by roxygen2: do not edit by hand
|
||||
|
||||
export(createRInterface)
|
||||
export(deployDSVM)
|
||||
export(dumpObject)
|
||||
export(executeScript)
|
||||
export(getVMSizes)
|
||||
export(newScript)
|
||||
export(operateDSVM)
|
||||
export(setConfig)
|
||||
export(setRInterface)
|
||||
export(updateScript)
|
||||
import(dplyr)
|
||||
import(magrittr)
|
||||
importFrom(XML,htmlParse)
|
||||
|
|
|
@ -4,12 +4,12 @@
|
|||
#' @param script R script with full path for execution at remote instance.
|
||||
#' @param config Configuration for remote execution. Settings include computing context, data reference, etc.
|
||||
#' @return An S3 R interface object.
|
||||
#' @export
|
||||
createRInterface <- function(remote,
|
||||
user,
|
||||
script,
|
||||
config){
|
||||
ri_env <- new.env(parent=globalenv())
|
||||
ri_env <- as.RInterface(azEnv)
|
||||
|
||||
# initialize an R interface object.
|
||||
|
||||
|
|
|
@ -39,6 +39,10 @@ deployDSVM <- function(context,
|
|||
dns=name,
|
||||
mode="Sync")
|
||||
{
|
||||
# check if token is valid.
|
||||
|
||||
AzureSMR::azureCheckToken(context)
|
||||
|
||||
# check if required arguments are present.
|
||||
|
||||
if(missing(context))
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#' @title Dump out the object configuration.
|
||||
#' @param object The R interface object.
|
||||
#' @return No return. Print R interface object information.
|
||||
#' @export
|
||||
dumpObject <- function(object) {
|
||||
cat(
|
||||
sprintf("---------------------------------------------------------------------------"),
|
||||
|
|
|
@ -1,4 +1,12 @@
|
|||
# Probably directly use remote functions in "msrdeploy" is a good idea...
|
||||
#' @title Remote execution of R script in an R interface object.
|
||||
#' @param object R interface object.
|
||||
#' @param inputs JSON encoded string of R objects that are loaded into the Remote R session's workspace prior to execution. Only R objects of type: primitives, vectors and dataframes are supported via this parameter. Alternatively the putLocalObject can be used, prior to a call to this function, to move any R object from the local workspace into the remote R session.
|
||||
#' @param outputs Character vector of the names of the objects to retreive. Only primitives, vectors and dataframes can be retrieved using this function. Use getRemoteObject to get any type of R object from the remote session.
|
||||
#' @param checkLibraries if `TRUE`, check whether libraries used in the R script installed on the remote machine.
|
||||
#' @param displayPlots If TRUE, plots generated during execution are displayed in the local plot window. **NOTE** This capability requires that the 'png' package is installed on the local machine.
|
||||
#' @param writePlots If TRUE, plots generated during execution are copied to the working directory of the local session.
|
||||
#' @return Status of scription execution.
|
||||
#' @export
|
||||
executeScript <- function(object,
|
||||
inputs=NULL,
|
||||
outputs=NULL,
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#' @title Generate a new worker script which is run on the remote instance with specifications in R interface object configuration.
|
||||
#' @param path Path to the script.
|
||||
#' @param title Title of the script.
|
||||
#' @export
|
||||
newScript <- function(path=".",
|
||||
title=paste0("worker_new_", Sys.time(), ".R")) {
|
||||
notes <-
|
||||
|
@ -19,7 +20,7 @@ newScript <- function(path=".",
|
|||
|
||||
cat(notes, file=file.path(path, title))
|
||||
writeLines(
|
||||
sprintf("Worker script %s is created at location %s.",
|
||||
sprintf("Worker script %s is created at %s.",
|
||||
title, ifelse(path == ".", "work directory", path))
|
||||
)
|
||||
}
|
||||
|
|
|
@ -1,42 +1,64 @@
|
|||
#' @title Operations on a data science virtual machine. Available operations are "Check", "Start", "Stop", and "Delete".
|
||||
#' @param context AzureSMR context.
|
||||
#' @param resource.group Resource group.
|
||||
#' @param vmname Name of the DSVM.
|
||||
#' @param name Name of the DSVM.
|
||||
#' @param operation Operations on the DSVM. Available operations are "Check", "Start", "Stop", "Delete", which check the status of, start running, stop running, and delete a DSVM, respectively.
|
||||
#' @export
|
||||
operateDSVM <- function(context,
|
||||
resource.group,
|
||||
vmname,
|
||||
operation) {
|
||||
name,
|
||||
operation="Check") {
|
||||
# check if token is valid.
|
||||
|
||||
AzureSMR::azureCheckToken(context)
|
||||
|
||||
# check input arguments.
|
||||
|
||||
if (missing(context)) stop("Please specify AzureSMR context.")
|
||||
if (missing(resource.group)) stop("Please specify resource group.")
|
||||
if (missing(vmname)) stop("Please specify DSVM name.")
|
||||
if (missing(name)) stop("Please specify DSVM name.")
|
||||
if (missing(operation)) stop("Please specify an operation on the DSVM")
|
||||
|
||||
# check if input operations are available.
|
||||
|
||||
if (!(operation %in% c("Check", "Start", "Stop", "Delete"))) stop("Please use an allowed operation, i.e., 'Check', 'Start', 'Stop', or 'Delete', for the DSVM.")
|
||||
|
||||
# check if vm exists.
|
||||
|
||||
vm_names <- AzureSMR::azureListVM(context,
|
||||
resourceGroup=resource.group,
|
||||
verbose=FALSE)
|
||||
|
||||
if(!(name %in% unlist(vm_names$name)))
|
||||
stop("DSVM does not exist.")
|
||||
|
||||
status <- AzureSMR::azureVMStatus(azureActiveContext=context,
|
||||
resourceGroup=resource.group,
|
||||
vmName=name,
|
||||
verbose=FALSE)
|
||||
|
||||
if (operation == "Check") {
|
||||
AzureSMR::azureVMStatus(azureActiveContext=context,
|
||||
resourceGroup=resource.group,
|
||||
vmName=vmname,
|
||||
verbose=FALSE)
|
||||
print(status)
|
||||
} else if (operation == "Start") {
|
||||
if(status == "Provisioning succeeded, VM running")
|
||||
return("The DSVM has already been started.")
|
||||
|
||||
AzureSMR::azureStartVM(azureActiveContext=context,
|
||||
resourceGroup=resource.group,
|
||||
vmName=vmname,
|
||||
vmName=name,
|
||||
verbose=FALSE)
|
||||
} else if (operation == "Stop") {
|
||||
if(status == "Provisioning succeeded, VM deallocated")
|
||||
return("The DSVM has already been stopped.")
|
||||
|
||||
AzureSMR::azureStopVM(azureActiveContext=context,
|
||||
resourceGroup=resource.group,
|
||||
vmName=vmname,
|
||||
vmName=name,
|
||||
verbose=FALSE)
|
||||
} else {
|
||||
AzureSMR::azureDeleteVM(azureActiveContext=context,
|
||||
resourceGroup=resource.group,
|
||||
vmName=vmname,
|
||||
vmName=name,
|
||||
verbose=FALSE)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#' @param slaves Slave nodes of the machine.
|
||||
#' @param data Reference to data used in the analytics.
|
||||
#' @param context Computing context available in Microsoft R Server for running the analytics.
|
||||
#' @export
|
||||
setConfig <- function(object,
|
||||
machine_list,
|
||||
dns_list,
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#' @param script R script with full path for execution at remote instance.
|
||||
#' @param config Configuration for remote execution. Settings include computing context, data reference, etc.
|
||||
#' @return The updated R interface object.
|
||||
#' @export
|
||||
setRInterface <- function(object,
|
||||
remote,
|
||||
user,
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#' @title Update a worker script with R interface object configuration.
|
||||
#' @param object R interface object.
|
||||
#' @export
|
||||
updateScript <- function(object) {
|
||||
if (!file.exists(object$script) || length(object$script) == 0)
|
||||
{
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
\alias{AzureDSR-package}
|
||||
\title{AzureDSR}
|
||||
\description{
|
||||
The AzureDSR functions boost efficiency of data science analytics with Azure resources.
|
||||
Support data science analytics with Azure resources.
|
||||
}
|
||||
\keyword{package}
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
\title{Deploy a new Data Science Virtual Machine (DSVM).}
|
||||
\usage{
|
||||
deployDSVM(context, resource.group, location, name, username,
|
||||
size = "Standard_D3_v2", os, authen = "", pubkey = "", password = "",
|
||||
mode = "Sync")
|
||||
size = "Standard_D1_v2", os, authen = "", pubkey = "", password = "",
|
||||
dns = name, mode = "Sync")
|
||||
}
|
||||
\arguments{
|
||||
\item{context}{Authentication context of AzureSMR encapsulating the
|
||||
|
@ -20,23 +20,29 @@ created.}
|
|||
\item{name}{Name of the DSVM. Lowercase characters or numbers
|
||||
only. Special characters are not permitted.}
|
||||
|
||||
\item{username}{User name of the DSVM. It should be different
|
||||
from `name`.}
|
||||
\item{username}{User name of the DSVM. It should be different from
|
||||
`name`.}
|
||||
|
||||
\item{size}{Size of the DSVM. The default is
|
||||
"Standard_D1_v2". All available sizes can be obtained by function
|
||||
`getsizes`.}
|
||||
\item{size}{Size of the DSVM. The default is "Standard_D1_v2". All
|
||||
available sizes can be obtained by function `getVMSizes`.}
|
||||
|
||||
\item{os}{Operating system of DSVM. Permitted values are "Linux" and "Windows" for Linux based and Windows based operating systems, respectively.}
|
||||
\item{os}{Operating system of DSVM. Permitted values are "Linux"
|
||||
and "Windows" for Linux based and Windows based operating
|
||||
systems, respectively.}
|
||||
|
||||
\item{authen}{Either "Key" or "Password", meaning public-key based or
|
||||
password based authentication, respectively. Note Windows DSVM by default uses password based authentication and this argument can be left unset.}
|
||||
\item{authen}{Either "Key" or "Password", meaning public-key based
|
||||
or password based authentication, respectively. Note Windows DSVM
|
||||
by default uses password based authentication and this argument
|
||||
can be left unset.}
|
||||
|
||||
\item{pubkey}{Public key for the DSVM. Only applicable for
|
||||
public-key based authentication of Linux based DSVM.}
|
||||
|
||||
\item{password}{Pass word for the DSVM.}
|
||||
|
||||
\item{dns}{DNS label for the VM address. The URL for accessing the
|
||||
deployed DSVM will be "<dns_label>.<location>.cloudapp.azure.com}
|
||||
|
||||
\item{mode}{Mode of virtual machine deployment. Default is "Sync".}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/executeScript.R
|
||||
\name{executeScript}
|
||||
\alias{executeScript}
|
||||
\title{Remote execution of R script in an R interface object.}
|
||||
\usage{
|
||||
executeScript(object, inputs = NULL, outputs = NULL,
|
||||
checkLibraries = FALSE, displayPlots = FALSE, writePlots = FALSE)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{R interface object.}
|
||||
|
||||
\item{inputs}{JSON encoded string of R objects that are loaded into the Remote R session's workspace prior to execution. Only R objects of type: primitives, vectors and dataframes are supported via this parameter. Alternatively the putLocalObject can be used, prior to a call to this function, to move any R object from the local workspace into the remote R session.}
|
||||
|
||||
\item{outputs}{Character vector of the names of the objects to retreive. Only primitives, vectors and dataframes can be retrieved using this function. Use getRemoteObject to get any type of R object from the remote session.}
|
||||
|
||||
\item{checkLibraries}{if `TRUE`, check whether libraries used in the R script installed on the remote machine.}
|
||||
|
||||
\item{displayPlots}{If TRUE, plots generated during execution are displayed in the local plot window. **NOTE** This capability requires that the 'png' package is installed on the local machine.}
|
||||
|
||||
\item{writePlots}{If TRUE, plots generated during execution are copied to the working directory of the local session.}
|
||||
}
|
||||
\value{
|
||||
Status of scription execution.
|
||||
}
|
||||
|
|
@ -4,14 +4,14 @@
|
|||
\alias{operateDSVM}
|
||||
\title{Operations on a data science virtual machine. Available operations are "Check", "Start", "Stop", and "Delete".}
|
||||
\usage{
|
||||
operateDSVM(context, resource.group, vmname, operation)
|
||||
operateDSVM(context, resource.group, name, operation = "Check")
|
||||
}
|
||||
\arguments{
|
||||
\item{context}{AzureSMR context.}
|
||||
|
||||
\item{resource.group}{Resource group.}
|
||||
|
||||
\item{vmname}{Name of the DSVM.}
|
||||
\item{name}{Name of the DSVM.}
|
||||
|
||||
\item{operation}{Operations on the DSVM. Available operations are "Check", "Start", "Stop", "Delete", which check the status of, start running, stop running, and delete a DSVM, respectively.}
|
||||
}
|
||||
|
|
|
@ -17,50 +17,21 @@ A Linux Data Science Virtual Machine (DSVM) is deployed, the analysis
|
|||
completed, results collected, and the compute resources deleted. Azure
|
||||
consumption costs are minimised.
|
||||
|
||||
This specific demonstration simply creates a Linux Data Science
|
||||
Virtual Machine within a resource group, demonstrates it exists, and
|
||||
then deletes the resource group.
|
||||
|
||||
This script is best run interactively to review its operation and
|
||||
ensure interaction with Azure completes.
|
||||
This demonstrate presents how an experimental data analytics can be thrown
|
||||
onto a Linux DSVM or a customized Linux DSVM set and executed in a desired
|
||||
high-performance computing context.
|
||||
|
||||
# Setup
|
||||
|
||||
We assume there is already a subscription and we have obtained the
|
||||
credentials required. See
|
||||
[AzureSMR's Authentication Guide](https://github.com/Microsoft/AzureSMR/blob/master/vignettes/Authentication.Rmd)
|
||||
for details. We will then ensure a resource group exists and within
|
||||
that resource group create a Linux DSVM. A public ssh key is used to
|
||||
access the server.
|
||||
|
||||
To get started we need to load the obtained credentials as well as the
|
||||
user's ssh public key. Public keys on Linux are typically created on
|
||||
the users desktop/laptop machine and will be found within
|
||||
~/.ssh/id_rsa.pub. The content's of the user's credentials file will
|
||||
be something like:
|
||||
We assume that the first step of [ConnectToLinuxDSVM](https://github.com/Azure/AzureDSR/vignettes/ConnectToLinuxDSVM.Rmd) has been done, and there is at least one Linux DSVM existing at the created resouce group.
|
||||
|
||||
To begin with, let's check the status of the DSVM and start it if it is deallocated. This is achieved with AzureSMR, and again confidentials for authenticating the app in Active Directory should be provided.
|
||||
```{r credentials, eval=FALSE}
|
||||
# Credentials come from app creation in Active Directory within Azure.
|
||||
|
||||
TID <- "72f9....db47" # Tenant ID
|
||||
CID <- "9c52....074a" # Client ID
|
||||
KEY <- "9Efb....4nwV....ASa8=" # User key
|
||||
|
||||
PUBKEY <- readLines("~/.ssh/id_rsa.pub")
|
||||
```
|
||||
|
||||
```{r setup}
|
||||
# Load the required subscription resources: TID, CID, and KEY.
|
||||
# Also includes the ssh PUBKEY for the user.
|
||||
|
||||
USER <- Sys.getenv("USERNAME")
|
||||
|
||||
source(paste0(USER, "_credentials.R"))
|
||||
|
||||
# Install the packages if required.
|
||||
|
||||
devtools::install_github("Microsoft/AzureSMR")
|
||||
devtools::install_github("Azure/AzureDSR", auth_token=GIT_TOKEN)
|
||||
```
|
||||
|
||||
```{r packages}
|
||||
|
@ -79,107 +50,57 @@ library(rattle) # Use weatherAUS as a "large" dataset.
|
|||
# name the resource group that we will create transiently for the
|
||||
# purposes of this script.
|
||||
|
||||
RG <- "my_dsvm_rg_sea" # Create if not already exist then kill.
|
||||
LOC <- "southeastasia" # Where the resource group (resources) will be hosted.
|
||||
# RG <- "my_dsvm_rg_sea" # Create if not already exist then kill.
|
||||
RG <- "dsvm"
|
||||
LOC <- "southeastasia" # Where the resource group (resources) will be hosted.
|
||||
VM <- "msvm001"
|
||||
VM_URL <- paste(VM, LOC, "cloudapp.azure.com", sep=".")
|
||||
```
|
||||
|
||||
```{r connect}
|
||||
# Connect to the Azure subscription and use this as the context for
|
||||
# our activities.
|
||||
# DSVM Operation
|
||||
|
||||
One can simply operate the created DSVM instance as desired.
|
||||
```{r dsvm operation}
|
||||
# authentication.
|
||||
|
||||
context <- createAzureContext(tenantID=TID, clientID=CID, authKey=KEY)
|
||||
|
||||
# Check if the resource group already exists. Take note this script
|
||||
# will not remove the resource group if it pre-existed.
|
||||
# get VM list under the resource group.
|
||||
|
||||
context %>%
|
||||
azureListRG() %>%
|
||||
filter(name == RG) %>%
|
||||
select(name, location) %T>%
|
||||
print() %>%
|
||||
nrow() %>%
|
||||
equals(0) %>%
|
||||
not() %T>%
|
||||
print() ->
|
||||
rg_pre_exists
|
||||
```
|
||||
# Creation
|
||||
vm_names <-
|
||||
AzureSMR::azureListVM(context, RG, LOC) %T>%
|
||||
print()
|
||||
|
||||
Create the resource group within which all resources we create will be
|
||||
grouped.
|
||||
# check status of a DSVM.
|
||||
|
||||
```{r create resource group}
|
||||
if (! rg_pre_exists)
|
||||
{
|
||||
# Create a new resource group into which we create the VMs and
|
||||
# related resources. Resource group name is RG.
|
||||
|
||||
# To create a new resource group, one needs to add access control of Active Directory application at subscription level.
|
||||
operateDSVM(context, RG, VM, operation="Check")
|
||||
|
||||
azureCreateResourceGroup(context, RG, LOC)
|
||||
# start the DSVM if it is not running.
|
||||
|
||||
}
|
||||
```
|
||||
Create the actual Linux DSVM with public key based authentication method. Name, username, and size can also be configured.
|
||||
operateDSVM(context, RG, VM, operation="Start")
|
||||
|
||||
```{r deploy}
|
||||
# Create the required Linux DSVM - generally 4 minutes.
|
||||
# stop the DSVM
|
||||
|
||||
ldsvm <- deployDSVM(context,
|
||||
resource.group=RG,
|
||||
location=LOC,
|
||||
name="mydsvm010",
|
||||
username=USER,
|
||||
size="Standard_DS1_v2",
|
||||
os="Linux",
|
||||
authen="Key",
|
||||
pubkey=PUBKEY)
|
||||
|
||||
ldsvm
|
||||
operateDSVM(context, RG, VM, operation="Stop")
|
||||
```
|
||||
|
||||
`deployDSVM` also supports deployment of Windows DSVM, which can be
|
||||
achieved by setting the argument of `vmos` to "Windows".
|
||||
# Run analytics.
|
||||
|
||||
```{r}
|
||||
wdsvm <- deployDSVM(context,
|
||||
resource.group=RG,
|
||||
location=LOC,
|
||||
vmname="mydsvm002",
|
||||
vmusername=USER,
|
||||
vmsize="Standard_D3_v2",
|
||||
vmos="Windows",
|
||||
vmpassword=PASSWORD)
|
||||
Next step is to use the DSVM for data analytics.
|
||||
|
||||
wdsvm
|
||||
There are many ways of interacting with a DSVM. For both Linux and Windows based DSVMs, it is convenient to remote login onto the machines with GUI (more detailed information can be found [here](https://docs.microsoft.com/en-us/azure/machine-learning/machine-learning-data-science-provision-vm)). A lot of times remote execution within R session is preferred by data scientist as it can be efficiently automated by R scripts. The following chunks of codes demonstrate how to use an R interface for remote execution of R scripts under a desired computing context.
|
||||
|
||||
```{r set R interface}
|
||||
|
||||
# create an R interface for handling the remote execution.
|
||||
|
||||
interface <- createRInterface(remote=VM_URL, user=USER)
|
||||
|
||||
# create a script for remote execution.
|
||||
|
||||
newScript(path=".", title="experiment1.R")
|
||||
|
||||
# put analytics into the script.
|
||||
|
||||
updateScript(interface)
|
||||
```
|
||||
|
||||
|
||||
Prove that the server exists.
|
||||
|
||||
```{r prove exists}
|
||||
|
||||
# Send a simple system() command across to the new server to test its
|
||||
# existence. Expect a single line with an indication of how long the
|
||||
# server has been up and running.
|
||||
|
||||
cmd <- paste("ssh -q",
|
||||
"-o StrictHostKeyChecking=no",
|
||||
"-o UserKnownHostsFile=/dev/null",
|
||||
ldsvm, "uptime")
|
||||
cmd
|
||||
system(cmd)
|
||||
```
|
||||
|
||||
# Cleanup
|
||||
|
||||
```{r optionally delete resource group}
|
||||
# Delete the resource group now that we have proved existence. There
|
||||
# is probably no need to wait. Only delete if it did not pre-exist
|
||||
# this script. Deletion seems to take 10 minutes or more.
|
||||
|
||||
if (! rg_pre_exists)
|
||||
azureDeleteResourceGroup(context, RG)
|
||||
```
|
||||
|
||||
Once deleted we are consuming no more.
|
||||
|
|
|
@ -139,7 +139,7 @@ ldsvm
|
|||
```
|
||||
|
||||
`deployDSVM` also supports deployment of Windows DSVM, which can be
|
||||
achieved by setting the argument of `vmos` to "Windows".
|
||||
achieved by setting the argument of `os` to "Windows".
|
||||
|
||||
```{r}
|
||||
wdsvm <- deployDSVM(context,
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Your worker script starts from here ...
|
||||
# ---------------------------------------------------------------------------
|
Загрузка…
Ссылка в новой задаче