diff --git a/NAMESPACE b/NAMESPACE index 9aea1d4..7d46d50 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,15 @@ # Generated by roxygen2: do not edit by hand +export(createRInterface) export(deployDSVM) +export(dumpObject) +export(executeScript) export(getVMSizes) +export(newScript) +export(operateDSVM) +export(setConfig) +export(setRInterface) +export(updateScript) import(dplyr) import(magrittr) importFrom(XML,htmlParse) diff --git a/R/createInterface.R b/R/createInterface.R index 94d45cf..865202b 100644 --- a/R/createInterface.R +++ b/R/createInterface.R @@ -4,12 +4,12 @@ #' @param script R script with full path for execution at remote instance. #' @param config Configuration for remote execution. Settings include computing context, data reference, etc. #' @return An S3 R interface object. +#' @export createRInterface <- function(remote, user, script, config){ ri_env <- new.env(parent=globalenv()) - ri_env <- as.RInterface(azEnv) # initialize an R interface object. diff --git a/R/deployDSVM.R b/R/deployDSVM.R index 381e228..c341ba1 100644 --- a/R/deployDSVM.R +++ b/R/deployDSVM.R @@ -39,6 +39,10 @@ deployDSVM <- function(context, dns=name, mode="Sync") { + # check if token is valid. + + AzureSMR::azureCheckToken(context) + # check if required arguments are present. if(missing(context)) diff --git a/R/dumpInterface.R b/R/dumpInterface.R index a941358..085714f 100644 --- a/R/dumpInterface.R +++ b/R/dumpInterface.R @@ -1,6 +1,7 @@ #' @title Dump out the object configuration. #' @param object The R interface object. #' @return No return. Print R interface object information. +#' @export dumpObject <- function(object) { cat( sprintf("---------------------------------------------------------------------------"), diff --git a/R/executeScript.R b/R/executeScript.R index ab9bfab..3adfd97 100644 --- a/R/executeScript.R +++ b/R/executeScript.R @@ -1,4 +1,12 @@ -# Probably directly use remote functions in "msrdeploy" is a good idea... +#' @title Remote execution of R script in an R interface object. +#' @param object R interface object. +#' @param inputs JSON encoded string of R objects that are loaded into the Remote R session's workspace prior to execution. Only R objects of type: primitives, vectors and dataframes are supported via this parameter. Alternatively the putLocalObject can be used, prior to a call to this function, to move any R object from the local workspace into the remote R session. +#' @param outputs Character vector of the names of the objects to retreive. Only primitives, vectors and dataframes can be retrieved using this function. Use getRemoteObject to get any type of R object from the remote session. +#' @param checkLibraries if `TRUE`, check whether libraries used in the R script installed on the remote machine. +#' @param displayPlots If TRUE, plots generated during execution are displayed in the local plot window. **NOTE** This capability requires that the 'png' package is installed on the local machine. +#' @param writePlots If TRUE, plots generated during execution are copied to the working directory of the local session. +#' @return Status of scription execution. +#' @export executeScript <- function(object, inputs=NULL, outputs=NULL, diff --git a/R/newScript.R b/R/newScript.R index bbe6b83..b2a91f6 100644 --- a/R/newScript.R +++ b/R/newScript.R @@ -1,6 +1,7 @@ #' @title Generate a new worker script which is run on the remote instance with specifications in R interface object configuration. #' @param path Path to the script. #' @param title Title of the script. +#' @export newScript <- function(path=".", title=paste0("worker_new_", Sys.time(), ".R")) { notes <- @@ -19,7 +20,7 @@ newScript <- function(path=".", cat(notes, file=file.path(path, title)) writeLines( - sprintf("Worker script %s is created at location %s.", + sprintf("Worker script %s is created at %s.", title, ifelse(path == ".", "work directory", path)) ) } diff --git a/R/operateDSVM.R b/R/operateDSVM.R index 1114064..0280460 100644 --- a/R/operateDSVM.R +++ b/R/operateDSVM.R @@ -1,42 +1,64 @@ #' @title Operations on a data science virtual machine. Available operations are "Check", "Start", "Stop", and "Delete". #' @param context AzureSMR context. #' @param resource.group Resource group. -#' @param vmname Name of the DSVM. +#' @param name Name of the DSVM. #' @param operation Operations on the DSVM. Available operations are "Check", "Start", "Stop", "Delete", which check the status of, start running, stop running, and delete a DSVM, respectively. +#' @export operateDSVM <- function(context, resource.group, - vmname, - operation) { + name, + operation="Check") { + # check if token is valid. + + AzureSMR::azureCheckToken(context) + # check input arguments. if (missing(context)) stop("Please specify AzureSMR context.") if (missing(resource.group)) stop("Please specify resource group.") - if (missing(vmname)) stop("Please specify DSVM name.") + if (missing(name)) stop("Please specify DSVM name.") if (missing(operation)) stop("Please specify an operation on the DSVM") # check if input operations are available. if (!(operation %in% c("Check", "Start", "Stop", "Delete"))) stop("Please use an allowed operation, i.e., 'Check', 'Start', 'Stop', or 'Delete', for the DSVM.") + # check if vm exists. + + vm_names <- AzureSMR::azureListVM(context, + resourceGroup=resource.group, + verbose=FALSE) + + if(!(name %in% unlist(vm_names$name))) + stop("DSVM does not exist.") + + status <- AzureSMR::azureVMStatus(azureActiveContext=context, + resourceGroup=resource.group, + vmName=name, + verbose=FALSE) + if (operation == "Check") { - AzureSMR::azureVMStatus(azureActiveContext=context, - resourceGroup=resource.group, - vmName=vmname, - verbose=FALSE) + print(status) } else if (operation == "Start") { + if(status == "Provisioning succeeded, VM running") + return("The DSVM has already been started.") + AzureSMR::azureStartVM(azureActiveContext=context, resourceGroup=resource.group, - vmName=vmname, + vmName=name, verbose=FALSE) } else if (operation == "Stop") { + if(status == "Provisioning succeeded, VM deallocated") + return("The DSVM has already been stopped.") + AzureSMR::azureStopVM(azureActiveContext=context, resourceGroup=resource.group, - vmName=vmname, + vmName=name, verbose=FALSE) } else { AzureSMR::azureDeleteVM(azureActiveContext=context, resourceGroup=resource.group, - vmName=vmname, + vmName=name, verbose=FALSE) } } diff --git a/R/setConfig.R b/R/setConfig.R index df28a9e..a7a7279 100644 --- a/R/setConfig.R +++ b/R/setConfig.R @@ -7,6 +7,7 @@ #' @param slaves Slave nodes of the machine. #' @param data Reference to data used in the analytics. #' @param context Computing context available in Microsoft R Server for running the analytics. +#' @export setConfig <- function(object, machine_list, dns_list, diff --git a/R/setInterface.R b/R/setInterface.R index 74b13df..33adac4 100644 --- a/R/setInterface.R +++ b/R/setInterface.R @@ -5,6 +5,7 @@ #' @param script R script with full path for execution at remote instance. #' @param config Configuration for remote execution. Settings include computing context, data reference, etc. #' @return The updated R interface object. +#' @export setRInterface <- function(object, remote, user, diff --git a/R/updateScript.R b/R/updateScript.R index 2243fdb..c2a5b7a 100644 --- a/R/updateScript.R +++ b/R/updateScript.R @@ -1,5 +1,6 @@ #' @title Update a worker script with R interface object configuration. #' @param object R interface object. +#' @export updateScript <- function(object) { if (!file.exists(object$script) || length(object$script) == 0) { diff --git a/man/AzureDSR-package.Rd b/man/AzureDSR-package.Rd index faec9ed..0b040ca 100644 --- a/man/AzureDSR-package.Rd +++ b/man/AzureDSR-package.Rd @@ -6,7 +6,7 @@ \alias{AzureDSR-package} \title{AzureDSR} \description{ -The AzureDSR functions boost efficiency of data science analytics with Azure resources. +Support data science analytics with Azure resources. } \keyword{package} diff --git a/man/deployDSVM.Rd b/man/deployDSVM.Rd index 47a1bd4..9b51637 100644 --- a/man/deployDSVM.Rd +++ b/man/deployDSVM.Rd @@ -5,8 +5,8 @@ \title{Deploy a new Data Science Virtual Machine (DSVM).} \usage{ deployDSVM(context, resource.group, location, name, username, - size = "Standard_D3_v2", os, authen = "", pubkey = "", password = "", - mode = "Sync") + size = "Standard_D1_v2", os, authen = "", pubkey = "", password = "", + dns = name, mode = "Sync") } \arguments{ \item{context}{Authentication context of AzureSMR encapsulating the @@ -20,23 +20,29 @@ created.} \item{name}{Name of the DSVM. Lowercase characters or numbers only. Special characters are not permitted.} -\item{username}{User name of the DSVM. It should be different -from `name`.} +\item{username}{User name of the DSVM. It should be different from +`name`.} -\item{size}{Size of the DSVM. The default is -"Standard_D1_v2". All available sizes can be obtained by function -`getsizes`.} +\item{size}{Size of the DSVM. The default is "Standard_D1_v2". All +available sizes can be obtained by function `getVMSizes`.} -\item{os}{Operating system of DSVM. Permitted values are "Linux" and "Windows" for Linux based and Windows based operating systems, respectively.} +\item{os}{Operating system of DSVM. Permitted values are "Linux" +and "Windows" for Linux based and Windows based operating +systems, respectively.} -\item{authen}{Either "Key" or "Password", meaning public-key based or -password based authentication, respectively. Note Windows DSVM by default uses password based authentication and this argument can be left unset.} +\item{authen}{Either "Key" or "Password", meaning public-key based +or password based authentication, respectively. Note Windows DSVM +by default uses password based authentication and this argument +can be left unset.} \item{pubkey}{Public key for the DSVM. Only applicable for public-key based authentication of Linux based DSVM.} \item{password}{Pass word for the DSVM.} +\item{dns}{DNS label for the VM address. The URL for accessing the +deployed DSVM will be "..cloudapp.azure.com} + \item{mode}{Mode of virtual machine deployment. Default is "Sync".} } diff --git a/man/executeScript.Rd b/man/executeScript.Rd new file mode 100644 index 0000000..11c44f5 --- /dev/null +++ b/man/executeScript.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/executeScript.R +\name{executeScript} +\alias{executeScript} +\title{Remote execution of R script in an R interface object.} +\usage{ +executeScript(object, inputs = NULL, outputs = NULL, + checkLibraries = FALSE, displayPlots = FALSE, writePlots = FALSE) +} +\arguments{ +\item{object}{R interface object.} + +\item{inputs}{JSON encoded string of R objects that are loaded into the Remote R session's workspace prior to execution. Only R objects of type: primitives, vectors and dataframes are supported via this parameter. Alternatively the putLocalObject can be used, prior to a call to this function, to move any R object from the local workspace into the remote R session.} + +\item{outputs}{Character vector of the names of the objects to retreive. Only primitives, vectors and dataframes can be retrieved using this function. Use getRemoteObject to get any type of R object from the remote session.} + +\item{checkLibraries}{if `TRUE`, check whether libraries used in the R script installed on the remote machine.} + +\item{displayPlots}{If TRUE, plots generated during execution are displayed in the local plot window. **NOTE** This capability requires that the 'png' package is installed on the local machine.} + +\item{writePlots}{If TRUE, plots generated during execution are copied to the working directory of the local session.} +} +\value{ +Status of scription execution. +} + diff --git a/man/operateDSVM.Rd b/man/operateDSVM.Rd index 67868d2..4b5a6b4 100644 --- a/man/operateDSVM.Rd +++ b/man/operateDSVM.Rd @@ -4,14 +4,14 @@ \alias{operateDSVM} \title{Operations on a data science virtual machine. Available operations are "Check", "Start", "Stop", and "Delete".} \usage{ -operateDSVM(context, resource.group, vmname, operation) +operateDSVM(context, resource.group, name, operation = "Check") } \arguments{ \item{context}{AzureSMR context.} \item{resource.group}{Resource group.} -\item{vmname}{Name of the DSVM.} +\item{name}{Name of the DSVM.} \item{operation}{Operations on the DSVM. Available operations are "Check", "Start", "Stop", "Delete", which check the status of, start running, stop running, and delete a DSVM, respectively.} } diff --git a/vignettes/ComputeOnLinuxDSVM.Rmd b/vignettes/ComputeOnLinuxDSVM.Rmd index fc40c88..5a4642e 100644 --- a/vignettes/ComputeOnLinuxDSVM.Rmd +++ b/vignettes/ComputeOnLinuxDSVM.Rmd @@ -17,50 +17,21 @@ A Linux Data Science Virtual Machine (DSVM) is deployed, the analysis completed, results collected, and the compute resources deleted. Azure consumption costs are minimised. -This specific demonstration simply creates a Linux Data Science -Virtual Machine within a resource group, demonstrates it exists, and -then deletes the resource group. - -This script is best run interactively to review its operation and -ensure interaction with Azure completes. +This demonstrate presents how an experimental data analytics can be thrown +onto a Linux DSVM or a customized Linux DSVM set and executed in a desired +high-performance computing context. # Setup -We assume there is already a subscription and we have obtained the -credentials required. See -[AzureSMR's Authentication Guide](https://github.com/Microsoft/AzureSMR/blob/master/vignettes/Authentication.Rmd) -for details. We will then ensure a resource group exists and within -that resource group create a Linux DSVM. A public ssh key is used to -access the server. - -To get started we need to load the obtained credentials as well as the -user's ssh public key. Public keys on Linux are typically created on -the users desktop/laptop machine and will be found within -~/.ssh/id_rsa.pub. The content's of the user's credentials file will -be something like: +We assume that the first step of [ConnectToLinuxDSVM](https://github.com/Azure/AzureDSR/vignettes/ConnectToLinuxDSVM.Rmd) has been done, and there is at least one Linux DSVM existing at the created resouce group. +To begin with, let's check the status of the DSVM and start it if it is deallocated. This is achieved with AzureSMR, and again confidentials for authenticating the app in Active Directory should be provided. ```{r credentials, eval=FALSE} # Credentials come from app creation in Active Directory within Azure. TID <- "72f9....db47" # Tenant ID CID <- "9c52....074a" # Client ID KEY <- "9Efb....4nwV....ASa8=" # User key - -PUBKEY <- readLines("~/.ssh/id_rsa.pub") -``` - -```{r setup} -# Load the required subscription resources: TID, CID, and KEY. -# Also includes the ssh PUBKEY for the user. - -USER <- Sys.getenv("USERNAME") - -source(paste0(USER, "_credentials.R")) - -# Install the packages if required. - -devtools::install_github("Microsoft/AzureSMR") -devtools::install_github("Azure/AzureDSR", auth_token=GIT_TOKEN) ``` ```{r packages} @@ -79,107 +50,57 @@ library(rattle) # Use weatherAUS as a "large" dataset. # name the resource group that we will create transiently for the # purposes of this script. -RG <- "my_dsvm_rg_sea" # Create if not already exist then kill. -LOC <- "southeastasia" # Where the resource group (resources) will be hosted. +# RG <- "my_dsvm_rg_sea" # Create if not already exist then kill. +RG <- "dsvm" +LOC <- "southeastasia" # Where the resource group (resources) will be hosted. +VM <- "msvm001" +VM_URL <- paste(VM, LOC, "cloudapp.azure.com", sep=".") ``` -```{r connect} -# Connect to the Azure subscription and use this as the context for -# our activities. +# DSVM Operation + +One can simply operate the created DSVM instance as desired. +```{r dsvm operation} +# authentication. context <- createAzureContext(tenantID=TID, clientID=CID, authKey=KEY) -# Check if the resource group already exists. Take note this script -# will not remove the resource group if it pre-existed. +# get VM list under the resource group. -context %>% - azureListRG() %>% - filter(name == RG) %>% - select(name, location) %T>% - print() %>% - nrow() %>% - equals(0) %>% - not() %T>% - print() -> -rg_pre_exists -``` -# Creation +vm_names <- + AzureSMR::azureListVM(context, RG, LOC) %T>% + print() -Create the resource group within which all resources we create will be -grouped. +# check status of a DSVM. -```{r create resource group} -if (! rg_pre_exists) -{ - # Create a new resource group into which we create the VMs and - # related resources. Resource group name is RG. - - # To create a new resource group, one needs to add access control of Active Directory application at subscription level. +operateDSVM(context, RG, VM, operation="Check") - azureCreateResourceGroup(context, RG, LOC) +# start the DSVM if it is not running. -} -``` -Create the actual Linux DSVM with public key based authentication method. Name, username, and size can also be configured. +operateDSVM(context, RG, VM, operation="Start") -```{r deploy} -# Create the required Linux DSVM - generally 4 minutes. +# stop the DSVM -ldsvm <- deployDSVM(context, - resource.group=RG, - location=LOC, - name="mydsvm010", - username=USER, - size="Standard_DS1_v2", - os="Linux", - authen="Key", - pubkey=PUBKEY) - -ldsvm +operateDSVM(context, RG, VM, operation="Stop") ``` -`deployDSVM` also supports deployment of Windows DSVM, which can be -achieved by setting the argument of `vmos` to "Windows". +# Run analytics. -```{r} -wdsvm <- deployDSVM(context, - resource.group=RG, - location=LOC, - vmname="mydsvm002", - vmusername=USER, - vmsize="Standard_D3_v2", - vmos="Windows", - vmpassword=PASSWORD) +Next step is to use the DSVM for data analytics. -wdsvm +There are many ways of interacting with a DSVM. For both Linux and Windows based DSVMs, it is convenient to remote login onto the machines with GUI (more detailed information can be found [here](https://docs.microsoft.com/en-us/azure/machine-learning/machine-learning-data-science-provision-vm)). A lot of times remote execution within R session is preferred by data scientist as it can be efficiently automated by R scripts. The following chunks of codes demonstrate how to use an R interface for remote execution of R scripts under a desired computing context. + +```{r set R interface} + +# create an R interface for handling the remote execution. + +interface <- createRInterface(remote=VM_URL, user=USER) + +# create a script for remote execution. + +newScript(path=".", title="experiment1.R") + +# put analytics into the script. + +updateScript(interface) ``` - - -Prove that the server exists. - -```{r prove exists} - -# Send a simple system() command across to the new server to test its -# existence. Expect a single line with an indication of how long the -# server has been up and running. - -cmd <- paste("ssh -q", - "-o StrictHostKeyChecking=no", - "-o UserKnownHostsFile=/dev/null", - ldsvm, "uptime") -cmd -system(cmd) -``` - -# Cleanup - -```{r optionally delete resource group} -# Delete the resource group now that we have proved existence. There -# is probably no need to wait. Only delete if it did not pre-exist -# this script. Deletion seems to take 10 minutes or more. - -if (! rg_pre_exists) - azureDeleteResourceGroup(context, RG) -``` - -Once deleted we are consuming no more. diff --git a/vignettes/ConnectToLinuxDSVM.Rmd b/vignettes/ConnectToLinuxDSVM.Rmd index b448fac..7957aae 100644 --- a/vignettes/ConnectToLinuxDSVM.Rmd +++ b/vignettes/ConnectToLinuxDSVM.Rmd @@ -139,7 +139,7 @@ ldsvm ``` `deployDSVM` also supports deployment of Windows DSVM, which can be -achieved by setting the argument of `vmos` to "Windows". +achieved by setting the argument of `os` to "Windows". ```{r} wdsvm <- deployDSVM(context, diff --git a/vignettes/experiment1.R b/vignettes/experiment1.R new file mode 100644 index 0000000..ae29d40 --- /dev/null +++ b/vignettes/experiment1.R @@ -0,0 +1,4 @@ + +# --------------------------------------------------------------------------- +# Your worker script starts from here ... +# ---------------------------------------------------------------------------