зеркало из https://github.com/Azure/AzureDSVM.git
Upated clusterDSVM with SSH setup; Rewrote executeScript
This commit is contained in:
Родитель
4a5c9c25d9
Коммит
ce122ed6ab
|
@ -25,3 +25,4 @@ importFrom(httr,headers)
|
|||
importFrom(httr,http_status)
|
||||
importFrom(httr,status_code)
|
||||
importFrom(jsonlite,fromJSON)
|
||||
importFrom(stringr,str_c)
|
||||
|
|
|
@ -13,5 +13,6 @@
|
|||
#' @importFrom httr add_headers headers content status_code http_status authenticate
|
||||
#' @importFrom httr GET PUT POST
|
||||
#' @importFrom XML htmlParse xpathApply xpathSApply xmlValue
|
||||
#' @importFrom stringr str_c
|
||||
#'
|
||||
NULL
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#' @param count Number of DSVM instances to be created. Note deploying multiple DSVMs may consume some time.
|
||||
#' @param name Names of the DSVMs. Lowercase characters or numbers only. Special characters are not permitted.
|
||||
#' @param username User name of the DSVM. It should be different from name of the DSVM.
|
||||
#' @param size Size of the DSVM cluster is identical.
|
||||
#' @param pubkey Public key for the DSVM. Only applicable for
|
||||
#' public-key based authentication of Linux based DSVM.
|
||||
#' @param dns DNS label for the VM address. The URL for accessing the deployed DSVM will be "<dns_label>.<location>.cloudapp.azure.com
|
||||
|
@ -76,57 +77,102 @@ deployDSVMCluster <- function(context,
|
|||
|
||||
# TODO: transmitting private key is not good practice! Seeking a better method...
|
||||
|
||||
HOME_DIR <- ifelse(identical(.Platform$OS.type, "windows"),
|
||||
normalizePath(paste0(Sys.getenv("HOME"), "/../"), winslash = "/"),
|
||||
Sys.getenv("HOME"))
|
||||
|
||||
# generate public key from private key.
|
||||
|
||||
shell(paste0("ssh-keygen -y -f ", HOME_DIR, ".ssh/id_rsa > ./id_rsa.pub"))
|
||||
|
||||
# copy private key into local directory. Note local machine may be either Linux or Windows based so it is treated differently.
|
||||
|
||||
ifelse(identical(.Platform$OS.type, "windows"),
|
||||
system(paste0("xcopy /f ", shQuote(paste0(HOME_DIR, ".ssh/id_rsa"), type = "cmd"),
|
||||
" ", shQuote(".", type = "cmd"))),
|
||||
system("cp ~/.ssh/id_rsa ."))
|
||||
# do key gen in each node.
|
||||
# propagate pub keys of each node back to local.
|
||||
# put the pub keys in authorized_keys and distribute onto nodes.
|
||||
|
||||
dns_name_list <- paste(names,
|
||||
location,
|
||||
"cloudapp.azure.com",
|
||||
sep=".")
|
||||
|
||||
# Distribute the key pair to all nodes of the cluster.
|
||||
auth_keys <- character(0)
|
||||
|
||||
for (vm in dns_name_list)
|
||||
{
|
||||
for (vm in dns_name_list) {
|
||||
# add an option to switch off host key checking - for the purpose of avoiding pop up.
|
||||
|
||||
option <- "-q -o StrictHostKeyChecking=no"
|
||||
pubkey_name <- str_c("pubkey", names[which(dns_name_list == vm)])
|
||||
|
||||
# copy the key pairs onto cluster node.
|
||||
# generate key pairs in vm
|
||||
|
||||
system(sprintf("scp %s ./id_rsa %s@%s:.ssh/", option, username, vm))
|
||||
system(sprintf("scp %s ./id_rsa.pub %s@%s:.ssh/", option, username, vm))
|
||||
system(sprintf("ssh %s -l %s %s %s",
|
||||
option,
|
||||
username,
|
||||
vm,
|
||||
"'ssh-keygen -t rsa -N \"\" -f ~/.ssh/id_rsa'"),
|
||||
intern=TRUE,
|
||||
ignore.stdout=FALSE,
|
||||
ignore.stderr=FALSE,
|
||||
wait=FALSE,
|
||||
show.output.on.console=FALSE)
|
||||
|
||||
# create a config file to switch off strick host key checking to enable node-to-node authentication without pop up.
|
||||
# copy the public key and append it into local machine.
|
||||
|
||||
sh <- writeChar(c("cat .ssh/id_rsa.pub > .ssh/authorized_keys\n",
|
||||
paste0("echo Host *.", location, ".cloudapp.azure.com >> ~/.ssh/config\n"),
|
||||
paste0("echo StrictHostKeyChecking no >> ~/.ssh/config\n"),
|
||||
paste0("echo UserKnownHostsFile /dev/null >> ~/.ssh/config\n"),
|
||||
"chmod 600 ~/.ssh/config\n"), con = "./shell_script")
|
||||
system(sprintf("scp %s %s@%s:.ssh/id_rsa.pub %s",
|
||||
option,
|
||||
username,
|
||||
vm,
|
||||
file.path(".", pubkey_name)))
|
||||
|
||||
# upload, change mode of, and run the config script.
|
||||
# append the public keys into authorized_key.
|
||||
|
||||
system(sprintf("scp %s shell_script %s@%s:~", option, username, vm), show.output.on.console = FALSE)
|
||||
system(sprintf("ssh %s -l %s %s 'chmod +x ~/shell_script'", option, username, vm), show.output.on.console = FALSE)
|
||||
system(sprintf("ssh %s -l %s %s '~/shell_script'", option, username, vm), show.output.on.console = FALSE)
|
||||
auth_keys <- paste0(auth_keys,
|
||||
readLines(file.path(".", pubkey_name)),
|
||||
"\n")
|
||||
|
||||
writeLines(auth_keys, file.path(".", "pub_keys"))
|
||||
|
||||
# clean up the temp pub key file
|
||||
|
||||
file.remove(pubkey_name)
|
||||
}
|
||||
|
||||
# Clean up - if you search "remove password" you will get 284,505 records so the following is to clean up confidential information in the working directory to prevent them from leaking anywhere out of your control.
|
||||
# create a config file. To avoid any prompt up when nodes are communicating with each other.
|
||||
|
||||
sh <- writeChar(paste0("cat .ssh/pub_keys >> .ssh/authorized_keys\n",
|
||||
paste0("echo Host *.", location, ".cloudapp.azure.com >> ~/.ssh/config\n"),
|
||||
paste0("echo StrictHostKeyChecking no >> .ssh/config\n"),
|
||||
paste0("echo UserKnownHostsFile /dev/null >> .ssh/config\n"),
|
||||
"chmod 600 .ssh/config",
|
||||
"\n"),
|
||||
con="./shell_script")
|
||||
|
||||
# distribute the public keys and config files to nodes.
|
||||
|
||||
for (vm in dns_name_list) {
|
||||
|
||||
# copy the pub_keys onto node.
|
||||
|
||||
system(sprintf("scp %s ./pub_keys %s@%s:.ssh",
|
||||
option,
|
||||
username,
|
||||
vm))
|
||||
|
||||
# copy the config onto node and run it.
|
||||
|
||||
system(sprintf("scp %s ./shell_script %s@%s:.ssh",
|
||||
option,
|
||||
username,
|
||||
vm))
|
||||
|
||||
system(sprintf("ssh %s -l %s %s 'chmod +x .ssh/shell_script'",
|
||||
option,
|
||||
username,
|
||||
vm),
|
||||
show.output.on.console=TRUE)
|
||||
|
||||
system(sprintf("ssh %s -l %s %s '.ssh/shell_script'",
|
||||
option,
|
||||
username,
|
||||
vm),
|
||||
show.output.on.console=TRUE)
|
||||
}
|
||||
|
||||
# clean up.
|
||||
|
||||
file.remove("./pub_keys", "./shell_script")
|
||||
|
||||
file.remove("./id_rsa", "./id_rsa.pub", "./shell_script")
|
||||
} else {
|
||||
|
||||
# check whether the input arguments are valid for the multi-instance deployment-
|
||||
|
|
|
@ -1,28 +1,31 @@
|
|||
#' @title Remote execution of R script in an R interface object.
|
||||
#' @param context AzureSMR context.
|
||||
#' @param resourceGroup Resource group of Azure resources for computation.
|
||||
#' @param machines Remote DSVMs that will be used for computation.
|
||||
#' @param remote Remote URL for the computation engine. For DSVM, it is either DNS (usually in the format of <dsvm name>.<location>.cloudapp.azure.com) or IP address.
|
||||
#' @param user Username for logging into the remote resource.
|
||||
#' @param script R script to be executed on remote resource.
|
||||
#' @param computeContext Computation context of Microsoft R Server under which the mechanisms of parallelization (e.g., local parallel, cluster based parallel, or Spark) is specified. Accepted computing context include "localParallel", "clusterParallel", "Hadoop", and "Spark".
|
||||
#' @param inputs JSON encoded string of R objects that are loaded into the Remote R session's workspace prior to execution. Only R objects of type: primitives, vectors and dataframes are supported via this parameter. Alternatively the putLocalObject can be used, prior to a call to this function, to move any R object from the local workspace into the remote R session.
|
||||
#' @param outputs Character vector of the names of the objects to retreive. Only primitives, vectors and dataframes can be retrieved using this function. Use getRemoteObject to get any type of R object from the remote session.
|
||||
#' @param checkLibraries if `TRUE`, check whether libraries used in the R script installed on the remote machine.
|
||||
#' @param displayPlots If TRUE, plots generated during execution are displayed in the local plot window. **NOTE** This capability requires that the 'png' package is installed on the local machine.
|
||||
#' @param writePlots If TRUE, plots generated during execution are copied to the working directory of the local session.
|
||||
#' @return Status of scription execution.
|
||||
#' @export
|
||||
executeScript <- function(context,
|
||||
resourceGroup,
|
||||
machines,
|
||||
remote,
|
||||
user,
|
||||
script,
|
||||
computeContext,
|
||||
inputs=NULL,
|
||||
outputs=NULL,
|
||||
checkLibraries=FALSE,
|
||||
displayPlots=FALSE,
|
||||
writePlots=FALSE) {
|
||||
computeContext) {
|
||||
|
||||
# switch on the machines.
|
||||
|
||||
for (vm in machines) {
|
||||
# starting a machine is running in synchronous mode so let's wait for a while patiently until everything is done.
|
||||
|
||||
operateDSVM(context,
|
||||
resource.group=resourceGroup,
|
||||
name=vm,
|
||||
operation="Start")
|
||||
}
|
||||
|
||||
# manage input strings in an interface object.
|
||||
|
||||
|
@ -43,58 +46,48 @@ executeScript <- function(context,
|
|||
|
||||
updateScript(new_interface)
|
||||
|
||||
# authenticate the remote server.
|
||||
# execute script on remote machine(s).
|
||||
|
||||
status <- operateDSVM(context, )
|
||||
option <- "-q -o StrictHostKeyChecking=no"
|
||||
remote_script <- paste0("script", as.character(Sys.time()), ".R")
|
||||
|
||||
# some preconditions of using Microsoft R Server.
|
||||
|
||||
# load mrsdeploy on remote machine.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
mrsdeploy::remoteLogin(deployr_endpoint=object$remote,
|
||||
session=FALSE,
|
||||
commandline=FALSE,
|
||||
username=object$user)
|
||||
|
||||
# need some exception handling?
|
||||
|
||||
# check libraries in the worker script available on R server. If not, install them. To avoid additional code execution, this operation is invoked only on demand, by argument "checkLibraries".
|
||||
|
||||
if (checkLibraries == TRUE) {
|
||||
libs <-
|
||||
# str_extract_all(readLines(object$script), "library\\(.*?\\)") %>%
|
||||
str_extract_all(x, "library\\(.*?\\)") %>%
|
||||
unlist() %>%
|
||||
gsub(".*\\((.*)\\).*", "\\1", .) %>%
|
||||
str_c()
|
||||
|
||||
codes <- paste(paste0("libs <- c(", paste0("'", libs, "'", collapse=","), ")"),
|
||||
"new.packages <- libs[!(libs %in% installed.packages()[,'Package'])]",
|
||||
"if(length(new.packages)) install.packages(new.packages)",
|
||||
sep=";")
|
||||
|
||||
mrsdeploy::remoteExecute(rcode=codes)
|
||||
exe <- system(paste0("scp %s -l %s %s %s:%s",
|
||||
option,
|
||||
object$user,
|
||||
object$script,
|
||||
object$remote,
|
||||
remote_script),
|
||||
show.output.on.console=FALSE)
|
||||
if (is.null(attributes(exe)))
|
||||
{
|
||||
writeLines(sprintf("File %s is successfully uploaded on %s$%s.",
|
||||
object$script, object$user, object$remote))
|
||||
} else {
|
||||
writeLines("Something must be wrong....... See warning message.")
|
||||
}
|
||||
|
||||
# remote execution of script.
|
||||
# Execute the script.
|
||||
|
||||
resp <- mrsdeploy::remoteScript(name=object$script,
|
||||
inputs=inputs,
|
||||
outputs=outputs,
|
||||
displayPlots=displayPlots,
|
||||
writePlots=writePlots)
|
||||
exe <- system(paste("ssh %s -l %s %s Rscript %s %s",
|
||||
option,
|
||||
object$user,
|
||||
object$remote,
|
||||
roptions,
|
||||
remote_script),
|
||||
intern=TRUE,
|
||||
show.output.on.console=TRUE)
|
||||
if (is.null(attributes(exe)))
|
||||
{
|
||||
writeLines(sprintf("File %s is successfully executed on %s$%s.",
|
||||
object$script, object$user, object$remote))
|
||||
} else {
|
||||
writeLines("Something must be wrong....... See warning message.")
|
||||
}
|
||||
|
||||
# need some exception handling?
|
||||
if (!missing(verbose))
|
||||
{
|
||||
if (verbose == TRUE) writeLines(exe)
|
||||
}
|
||||
|
||||
# need post-execution message...
|
||||
}
|
||||
|
|
|
@ -1,9 +1,13 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/deployDSVM.R
|
||||
% Please edit documentation in R/deployDSVM-MININT-AL1V820.R, R/deployDSVM.R
|
||||
\name{deployDSVM}
|
||||
\alias{deployDSVM}
|
||||
\title{Deploy a new Data Science Virtual Machine (DSVM).}
|
||||
\usage{
|
||||
deployDSVM(context, resource.group, location, name, username,
|
||||
size = "Standard_D1_v2", os, authen = "", pubkey = "", password = "",
|
||||
dns = name, mode = "Sync")
|
||||
|
||||
deployDSVM(context, resource.group, location, name, username,
|
||||
size = "Standard_D1_v2", os, authen = "", pubkey = "", password = "",
|
||||
dns = name, mode = "Sync")
|
||||
|
@ -43,6 +47,42 @@ public-key based authentication of Linux based DSVM.}
|
|||
\item{dns}{DNS label for the VM address. The URL for accessing the
|
||||
deployed DSVM will be "<dns_label>.<location>.cloudapp.azure.com}
|
||||
|
||||
\item{mode}{Mode of virtual machine deployment. Default is "Sync".}
|
||||
|
||||
\item{context}{Authentication context of AzureSMR encapsulating the
|
||||
TID, CID, and key obtained from Azure Actrive Directory.}
|
||||
|
||||
\item{resource.group}{The Azure resource group where the DSVM is
|
||||
created.}
|
||||
|
||||
\item{location}{Location of the data centre to host the DSVM.}
|
||||
|
||||
\item{name}{Name of the DSVM. Lowercase characters or numbers
|
||||
only. Special characters are not permitted.}
|
||||
|
||||
\item{username}{User name of the DSVM. It should be different from
|
||||
`name`.}
|
||||
|
||||
\item{size}{Size of the DSVM. The default is "Standard_D1_v2". All
|
||||
available sizes can be obtained by function `getVMSizes`.}
|
||||
|
||||
\item{os}{Operating system of DSVM. Permitted values are "Linux"
|
||||
and "Windows" for Linux based and Windows based operating
|
||||
systems, respectively.}
|
||||
|
||||
\item{authen}{Either "Key" or "Password", meaning public-key based
|
||||
or password based authentication, respectively. Note Windows DSVM
|
||||
by default uses password based authentication and this argument
|
||||
can be left unset.}
|
||||
|
||||
\item{pubkey}{Public key for the DSVM. Only applicable for
|
||||
public-key based authentication of Linux based DSVM.}
|
||||
|
||||
\item{password}{Pass word for the DSVM.}
|
||||
|
||||
\item{dns}{DNS label for the VM address. The URL for accessing the
|
||||
deployed DSVM will be "<dns_label>.<location>.cloudapp.azure.com}
|
||||
|
||||
\item{mode}{Mode of virtual machine deployment. Default is "Sync".}
|
||||
}
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
\usage{
|
||||
deployDSVMCluster(context, resource.group, location, count = 1, name,
|
||||
username, size = "Standard_D1_v2", pubkey = "", dns = name,
|
||||
cluster = FALSE, mode = "Sync")
|
||||
cluster = FALSE)
|
||||
}
|
||||
\arguments{
|
||||
\item{context}{AzureSMR active context.}
|
||||
|
@ -21,13 +21,13 @@ deployDSVMCluster(context, resource.group, location, count = 1, name,
|
|||
|
||||
\item{username}{User name of the DSVM. It should be different from name of the DSVM.}
|
||||
|
||||
\item{size}{Size of the DSVM cluster is identical.}
|
||||
|
||||
\item{pubkey}{Public key for the DSVM. Only applicable for
|
||||
public-key based authentication of Linux based DSVM.}
|
||||
|
||||
\item{dns}{DNS label for the VM address. The URL for accessing the deployed DSVM will be "<dns_label>.<location>.cloudapp.azure.com}
|
||||
|
||||
\item{cluster}{A logical value of TRUE or FALSE to indicate whether the deployed DSVMs form a cluster. If not, the deployment will assign the vectors of name, username, and public key as given in the input arguments to the DSVMs - this is usually used for creating multiple DSVMs for a group of data scientists. If it is TRUE, the deployment will use the first element (if it consists more than one elements) of the given DSVM names as base, and append serial number to the base to form a DSVM full name, and then use the SAME username and public key across the cluster - this can be used for creating a HPC engine on top of the deployed DSVMs in which parallel computing context which is availed in Microsoft R Server ScaleR package can be applied for embarassing parallelization.}
|
||||
|
||||
\item{mode}{Mode of virtual machine deployment. Default is "Sync".}
|
||||
}
|
||||
|
||||
|
|
|
@ -150,8 +150,8 @@ ldsvm_cluster <- deployDSVMCluster(context,
|
|||
resource.group=RG,
|
||||
location=LOC,
|
||||
count=COUNT,
|
||||
name="yyy",
|
||||
username="yyyuser",
|
||||
name="zzz",
|
||||
username="zzzuser",
|
||||
pubkey=PUBKEY,
|
||||
cluster=TRUE)
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче