Updated R scripts for operating DSVM and remote execution with R interface

This commit is contained in:
yueguoguo 2017-02-16 15:02:41 +08:00
Родитель 541bcc480f
Коммит 3632db6936
17 изменённых файлов: 153 добавлений и 149 удалений

Просмотреть файл

@ -1,7 +1,15 @@
# Generated by roxygen2: do not edit by hand
export(createRInterface)
export(deployDSVM)
export(dumpObject)
export(executeScript)
export(getVMSizes)
export(newScript)
export(operateDSVM)
export(setConfig)
export(setRInterface)
export(updateScript)
import(dplyr)
import(magrittr)
importFrom(XML,htmlParse)

Просмотреть файл

@ -4,12 +4,12 @@
#' @param script R script with full path for execution at remote instance.
#' @param config Configuration for remote execution. Settings include computing context, data reference, etc.
#' @return An S3 R interface object.
#' @export
createRInterface <- function(remote,
user,
script,
config){
ri_env <- new.env(parent=globalenv())
ri_env <- as.RInterface(azEnv)
# initialize an R interface object.

Просмотреть файл

@ -39,6 +39,10 @@ deployDSVM <- function(context,
dns=name,
mode="Sync")
{
# check if token is valid.
AzureSMR::azureCheckToken(context)
# check if required arguments are present.
if(missing(context))

Просмотреть файл

@ -1,6 +1,7 @@
#' @title Dump out the object configuration.
#' @param object The R interface object.
#' @return No return. Print R interface object information.
#' @export
dumpObject <- function(object) {
cat(
sprintf("---------------------------------------------------------------------------"),

Просмотреть файл

@ -1,4 +1,12 @@
# Probably directly use remote functions in "msrdeploy" is a good idea...
#' @title Remote execution of R script in an R interface object.
#' @param object R interface object.
#' @param inputs JSON encoded string of R objects that are loaded into the Remote R session's workspace prior to execution. Only R objects of type: primitives, vectors and dataframes are supported via this parameter. Alternatively the putLocalObject can be used, prior to a call to this function, to move any R object from the local workspace into the remote R session.
#' @param outputs Character vector of the names of the objects to retreive. Only primitives, vectors and dataframes can be retrieved using this function. Use getRemoteObject to get any type of R object from the remote session.
#' @param checkLibraries if `TRUE`, check whether libraries used in the R script installed on the remote machine.
#' @param displayPlots If TRUE, plots generated during execution are displayed in the local plot window. **NOTE** This capability requires that the 'png' package is installed on the local machine.
#' @param writePlots If TRUE, plots generated during execution are copied to the working directory of the local session.
#' @return Status of scription execution.
#' @export
executeScript <- function(object,
inputs=NULL,
outputs=NULL,

Просмотреть файл

@ -1,6 +1,7 @@
#' @title Generate a new worker script which is run on the remote instance with specifications in R interface object configuration.
#' @param path Path to the script.
#' @param title Title of the script.
#' @export
newScript <- function(path=".",
title=paste0("worker_new_", Sys.time(), ".R")) {
notes <-
@ -19,7 +20,7 @@ newScript <- function(path=".",
cat(notes, file=file.path(path, title))
writeLines(
sprintf("Worker script %s is created at location %s.",
sprintf("Worker script %s is created at %s.",
title, ifelse(path == ".", "work directory", path))
)
}

Просмотреть файл

@ -1,42 +1,64 @@
#' @title Operations on a data science virtual machine. Available operations are "Check", "Start", "Stop", and "Delete".
#' @param context AzureSMR context.
#' @param resource.group Resource group.
#' @param vmname Name of the DSVM.
#' @param name Name of the DSVM.
#' @param operation Operations on the DSVM. Available operations are "Check", "Start", "Stop", "Delete", which check the status of, start running, stop running, and delete a DSVM, respectively.
#' @export
operateDSVM <- function(context,
resource.group,
vmname,
operation) {
name,
operation="Check") {
# check if token is valid.
AzureSMR::azureCheckToken(context)
# check input arguments.
if (missing(context)) stop("Please specify AzureSMR context.")
if (missing(resource.group)) stop("Please specify resource group.")
if (missing(vmname)) stop("Please specify DSVM name.")
if (missing(name)) stop("Please specify DSVM name.")
if (missing(operation)) stop("Please specify an operation on the DSVM")
# check if input operations are available.
if (!(operation %in% c("Check", "Start", "Stop", "Delete"))) stop("Please use an allowed operation, i.e., 'Check', 'Start', 'Stop', or 'Delete', for the DSVM.")
# check if vm exists.
vm_names <- AzureSMR::azureListVM(context,
resourceGroup=resource.group,
verbose=FALSE)
if(!(name %in% unlist(vm_names$name)))
stop("DSVM does not exist.")
status <- AzureSMR::azureVMStatus(azureActiveContext=context,
resourceGroup=resource.group,
vmName=name,
verbose=FALSE)
if (operation == "Check") {
AzureSMR::azureVMStatus(azureActiveContext=context,
resourceGroup=resource.group,
vmName=vmname,
verbose=FALSE)
print(status)
} else if (operation == "Start") {
if(status == "Provisioning succeeded, VM running")
return("The DSVM has already been started.")
AzureSMR::azureStartVM(azureActiveContext=context,
resourceGroup=resource.group,
vmName=vmname,
vmName=name,
verbose=FALSE)
} else if (operation == "Stop") {
if(status == "Provisioning succeeded, VM deallocated")
return("The DSVM has already been stopped.")
AzureSMR::azureStopVM(azureActiveContext=context,
resourceGroup=resource.group,
vmName=vmname,
vmName=name,
verbose=FALSE)
} else {
AzureSMR::azureDeleteVM(azureActiveContext=context,
resourceGroup=resource.group,
vmName=vmname,
vmName=name,
verbose=FALSE)
}
}

Просмотреть файл

@ -7,6 +7,7 @@
#' @param slaves Slave nodes of the machine.
#' @param data Reference to data used in the analytics.
#' @param context Computing context available in Microsoft R Server for running the analytics.
#' @export
setConfig <- function(object,
machine_list,
dns_list,

Просмотреть файл

@ -5,6 +5,7 @@
#' @param script R script with full path for execution at remote instance.
#' @param config Configuration for remote execution. Settings include computing context, data reference, etc.
#' @return The updated R interface object.
#' @export
setRInterface <- function(object,
remote,
user,

Просмотреть файл

@ -1,5 +1,6 @@
#' @title Update a worker script with R interface object configuration.
#' @param object R interface object.
#' @export
updateScript <- function(object) {
if (!file.exists(object$script) || length(object$script) == 0)
{

Просмотреть файл

@ -6,7 +6,7 @@
\alias{AzureDSR-package}
\title{AzureDSR}
\description{
The AzureDSR functions boost efficiency of data science analytics with Azure resources.
Support data science analytics with Azure resources.
}
\keyword{package}

Просмотреть файл

@ -5,8 +5,8 @@
\title{Deploy a new Data Science Virtual Machine (DSVM).}
\usage{
deployDSVM(context, resource.group, location, name, username,
size = "Standard_D3_v2", os, authen = "", pubkey = "", password = "",
mode = "Sync")
size = "Standard_D1_v2", os, authen = "", pubkey = "", password = "",
dns = name, mode = "Sync")
}
\arguments{
\item{context}{Authentication context of AzureSMR encapsulating the
@ -20,23 +20,29 @@ created.}
\item{name}{Name of the DSVM. Lowercase characters or numbers
only. Special characters are not permitted.}
\item{username}{User name of the DSVM. It should be different
from `name`.}
\item{username}{User name of the DSVM. It should be different from
`name`.}
\item{size}{Size of the DSVM. The default is
"Standard_D1_v2". All available sizes can be obtained by function
`getsizes`.}
\item{size}{Size of the DSVM. The default is "Standard_D1_v2". All
available sizes can be obtained by function `getVMSizes`.}
\item{os}{Operating system of DSVM. Permitted values are "Linux" and "Windows" for Linux based and Windows based operating systems, respectively.}
\item{os}{Operating system of DSVM. Permitted values are "Linux"
and "Windows" for Linux based and Windows based operating
systems, respectively.}
\item{authen}{Either "Key" or "Password", meaning public-key based or
password based authentication, respectively. Note Windows DSVM by default uses password based authentication and this argument can be left unset.}
\item{authen}{Either "Key" or "Password", meaning public-key based
or password based authentication, respectively. Note Windows DSVM
by default uses password based authentication and this argument
can be left unset.}
\item{pubkey}{Public key for the DSVM. Only applicable for
public-key based authentication of Linux based DSVM.}
\item{password}{Pass word for the DSVM.}
\item{dns}{DNS label for the VM address. The URL for accessing the
deployed DSVM will be "<dns_label>.<location>.cloudapp.azure.com}
\item{mode}{Mode of virtual machine deployment. Default is "Sync".}
}

26
man/executeScript.Rd Normal file
Просмотреть файл

@ -0,0 +1,26 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/executeScript.R
\name{executeScript}
\alias{executeScript}
\title{Remote execution of R script in an R interface object.}
\usage{
executeScript(object, inputs = NULL, outputs = NULL,
checkLibraries = FALSE, displayPlots = FALSE, writePlots = FALSE)
}
\arguments{
\item{object}{R interface object.}
\item{inputs}{JSON encoded string of R objects that are loaded into the Remote R session's workspace prior to execution. Only R objects of type: primitives, vectors and dataframes are supported via this parameter. Alternatively the putLocalObject can be used, prior to a call to this function, to move any R object from the local workspace into the remote R session.}
\item{outputs}{Character vector of the names of the objects to retreive. Only primitives, vectors and dataframes can be retrieved using this function. Use getRemoteObject to get any type of R object from the remote session.}
\item{checkLibraries}{if `TRUE`, check whether libraries used in the R script installed on the remote machine.}
\item{displayPlots}{If TRUE, plots generated during execution are displayed in the local plot window. **NOTE** This capability requires that the 'png' package is installed on the local machine.}
\item{writePlots}{If TRUE, plots generated during execution are copied to the working directory of the local session.}
}
\value{
Status of scription execution.
}

Просмотреть файл

@ -4,14 +4,14 @@
\alias{operateDSVM}
\title{Operations on a data science virtual machine. Available operations are "Check", "Start", "Stop", and "Delete".}
\usage{
operateDSVM(context, resource.group, vmname, operation)
operateDSVM(context, resource.group, name, operation = "Check")
}
\arguments{
\item{context}{AzureSMR context.}
\item{resource.group}{Resource group.}
\item{vmname}{Name of the DSVM.}
\item{name}{Name of the DSVM.}
\item{operation}{Operations on the DSVM. Available operations are "Check", "Start", "Stop", "Delete", which check the status of, start running, stop running, and delete a DSVM, respectively.}
}

Просмотреть файл

@ -17,50 +17,21 @@ A Linux Data Science Virtual Machine (DSVM) is deployed, the analysis
completed, results collected, and the compute resources deleted. Azure
consumption costs are minimised.
This specific demonstration simply creates a Linux Data Science
Virtual Machine within a resource group, demonstrates it exists, and
then deletes the resource group.
This script is best run interactively to review its operation and
ensure interaction with Azure completes.
This demonstrate presents how an experimental data analytics can be thrown
onto a Linux DSVM or a customized Linux DSVM set and executed in a desired
high-performance computing context.
# Setup
We assume there is already a subscription and we have obtained the
credentials required. See
[AzureSMR's Authentication Guide](https://github.com/Microsoft/AzureSMR/blob/master/vignettes/Authentication.Rmd)
for details. We will then ensure a resource group exists and within
that resource group create a Linux DSVM. A public ssh key is used to
access the server.
To get started we need to load the obtained credentials as well as the
user's ssh public key. Public keys on Linux are typically created on
the users desktop/laptop machine and will be found within
~/.ssh/id_rsa.pub. The content's of the user's credentials file will
be something like:
We assume that the first step of [ConnectToLinuxDSVM](https://github.com/Azure/AzureDSR/vignettes/ConnectToLinuxDSVM.Rmd) has been done, and there is at least one Linux DSVM existing at the created resouce group.
To begin with, let's check the status of the DSVM and start it if it is deallocated. This is achieved with AzureSMR, and again confidentials for authenticating the app in Active Directory should be provided.
```{r credentials, eval=FALSE}
# Credentials come from app creation in Active Directory within Azure.
TID <- "72f9....db47" # Tenant ID
CID <- "9c52....074a" # Client ID
KEY <- "9Efb....4nwV....ASa8=" # User key
PUBKEY <- readLines("~/.ssh/id_rsa.pub")
```
```{r setup}
# Load the required subscription resources: TID, CID, and KEY.
# Also includes the ssh PUBKEY for the user.
USER <- Sys.getenv("USERNAME")
source(paste0(USER, "_credentials.R"))
# Install the packages if required.
devtools::install_github("Microsoft/AzureSMR")
devtools::install_github("Azure/AzureDSR", auth_token=GIT_TOKEN)
```
```{r packages}
@ -79,107 +50,57 @@ library(rattle) # Use weatherAUS as a "large" dataset.
# name the resource group that we will create transiently for the
# purposes of this script.
RG <- "my_dsvm_rg_sea" # Create if not already exist then kill.
LOC <- "southeastasia" # Where the resource group (resources) will be hosted.
# RG <- "my_dsvm_rg_sea" # Create if not already exist then kill.
RG <- "dsvm"
LOC <- "southeastasia" # Where the resource group (resources) will be hosted.
VM <- "msvm001"
VM_URL <- paste(VM, LOC, "cloudapp.azure.com", sep=".")
```
```{r connect}
# Connect to the Azure subscription and use this as the context for
# our activities.
# DSVM Operation
One can simply operate the created DSVM instance as desired.
```{r dsvm operation}
# authentication.
context <- createAzureContext(tenantID=TID, clientID=CID, authKey=KEY)
# Check if the resource group already exists. Take note this script
# will not remove the resource group if it pre-existed.
# get VM list under the resource group.
context %>%
azureListRG() %>%
filter(name == RG) %>%
select(name, location) %T>%
print() %>%
nrow() %>%
equals(0) %>%
not() %T>%
print() ->
rg_pre_exists
```
# Creation
vm_names <-
AzureSMR::azureListVM(context, RG, LOC) %T>%
print()
Create the resource group within which all resources we create will be
grouped.
# check status of a DSVM.
```{r create resource group}
if (! rg_pre_exists)
{
# Create a new resource group into which we create the VMs and
# related resources. Resource group name is RG.
# To create a new resource group, one needs to add access control of Active Directory application at subscription level.
operateDSVM(context, RG, VM, operation="Check")
azureCreateResourceGroup(context, RG, LOC)
# start the DSVM if it is not running.
}
```
Create the actual Linux DSVM with public key based authentication method. Name, username, and size can also be configured.
operateDSVM(context, RG, VM, operation="Start")
```{r deploy}
# Create the required Linux DSVM - generally 4 minutes.
# stop the DSVM
ldsvm <- deployDSVM(context,
resource.group=RG,
location=LOC,
name="mydsvm010",
username=USER,
size="Standard_DS1_v2",
os="Linux",
authen="Key",
pubkey=PUBKEY)
ldsvm
operateDSVM(context, RG, VM, operation="Stop")
```
`deployDSVM` also supports deployment of Windows DSVM, which can be
achieved by setting the argument of `vmos` to "Windows".
# Run analytics.
```{r}
wdsvm <- deployDSVM(context,
resource.group=RG,
location=LOC,
vmname="mydsvm002",
vmusername=USER,
vmsize="Standard_D3_v2",
vmos="Windows",
vmpassword=PASSWORD)
Next step is to use the DSVM for data analytics.
wdsvm
There are many ways of interacting with a DSVM. For both Linux and Windows based DSVMs, it is convenient to remote login onto the machines with GUI (more detailed information can be found [here](https://docs.microsoft.com/en-us/azure/machine-learning/machine-learning-data-science-provision-vm)). A lot of times remote execution within R session is preferred by data scientist as it can be efficiently automated by R scripts. The following chunks of codes demonstrate how to use an R interface for remote execution of R scripts under a desired computing context.
```{r set R interface}
# create an R interface for handling the remote execution.
interface <- createRInterface(remote=VM_URL, user=USER)
# create a script for remote execution.
newScript(path=".", title="experiment1.R")
# put analytics into the script.
updateScript(interface)
```
Prove that the server exists.
```{r prove exists}
# Send a simple system() command across to the new server to test its
# existence. Expect a single line with an indication of how long the
# server has been up and running.
cmd <- paste("ssh -q",
"-o StrictHostKeyChecking=no",
"-o UserKnownHostsFile=/dev/null",
ldsvm, "uptime")
cmd
system(cmd)
```
# Cleanup
```{r optionally delete resource group}
# Delete the resource group now that we have proved existence. There
# is probably no need to wait. Only delete if it did not pre-exist
# this script. Deletion seems to take 10 minutes or more.
if (! rg_pre_exists)
azureDeleteResourceGroup(context, RG)
```
Once deleted we are consuming no more.

Просмотреть файл

@ -139,7 +139,7 @@ ldsvm
```
`deployDSVM` also supports deployment of Windows DSVM, which can be
achieved by setting the argument of `vmos` to "Windows".
achieved by setting the argument of `os` to "Windows".
```{r}
wdsvm <- deployDSVM(context,

4
vignettes/experiment1.R Normal file
Просмотреть файл

@ -0,0 +1,4 @@
# ---------------------------------------------------------------------------
# Your worker script starts from here ...
# ---------------------------------------------------------------------------