Add extension and mrs one-box config

This commit is contained in:
yueguoguo 2017-08-14 14:38:51 +08:00
Родитель 678703cd85
Коммит fe9a1962b0
9 изменённых файлов: 175 добавлений и 9 удалений

Двоичные данные
data.xdf Normal file

Двоичный файл не отображается.

56
test/extensions.R Normal file
Просмотреть файл

@ -0,0 +1,56 @@
# DSVM extensions.
library(jsonlite)
x <- readLines("inst/etc/template_linux_key_ubuntu.json")
y <- readLines("inst/etc/extensions.json")
length(x) <- length(x) - 3
z <- paste0(x,
",",
y,
"]",
"}",
collapse="")
library(AzureDSVM)
library(AzureSMR)
settingsfile <- getOption("AzureSMR.config")
config <- read.AzureSMR.config()
timestamp <- format(Sys.time(), format="%y%m%d%H%M")
asc <- createAzureContext()
with(config,
setAzureContext(asc, tenantID=tenantID, clientID=clientID, authKey=authKey)
)
azureAuthenticate(asc)
resourceGroup_name <- paste0("AzureDSVMtest_", timestamp)
location <- "southeastasia"
res <- azureCreateResourceGroup(asc,
location=location,
resourceGroup=resourceGroup_name)
dsvm_size <- "Standard_D4_v2"
dsvm_os <- "Ubuntu"
dsvm_name <- paste0("dsvm",
paste(sample(letters, 3), collapse=""))
dsvm_password <- "AzureDSVM_test123"
dsvm_username <- "dsvmuser"
res <- deployDSVM(asc,
resource.group=resourceGroup_name,
location=location,
hostname=dsvm_name,
username=dsvm_username,
size=dsvm_size,
os=dsvm_os,
authen="Password",
pubkey="",
password=dsvm_password,
mode="Sync")

Просмотреть файл

@ -1,6 +1,63 @@
# ---------------------------------------------------------------------------
# THIS IS A HEADER ADDED BY COMPUTE INTERFACE
# ---------------------------------------------------------------------------
CI_MACHINES <- c( "myqjqs", "myqjqs001", "myqjqs002", "myqjqs003", "myqjqs004" )
CI_DNS <- c( "myqjqs.southeastasia.cloudapp.azure.com", "myqjqs001.southeastasia.cloudapp.azure.com", "myqjqs002.southeastasia.cloudapp.azure.com", "myqjqs003.southeastasia.cloudapp.azure.com", "myqjqs004.southeastasia.cloudapp.azure.com" )
CI_VMUSER <- c( "zhle" )
CI_MASTER <- c( "myqjqs.southeastasia.cloudapp.azure.com" )
CI_SLAVES <- c( "myqjqs001.southeastasia.cloudapp.azure.com", "myqjqs002.southeastasia.cloudapp.azure.com", "myqjqs003.southeastasia.cloudapp.azure.com", "myqjqs004.southeastasia.cloudapp.azure.com" )
CI_DATA <- ""
CI_CONTEXT <- "clusterParallel"
library(RevoScaleR)
# library(readr)
library(doParallel)
# --------- Set compute context
cl <- makePSOCKcluster(names=CI_SLAVES, master=CI_MASTER, user=CI_VMUSER)
registerDoParallel(cl)
rxSetComputeContext(RxForeachDoPar())
# --------- Load data.
# ciData <- ifelse(CI_DATA != '', read_csv(CI_DATA), data.frame(0))
# ---------------------------------------------------------------------------
# END OF THE HEADER ADDED BY COMPUTE INTERFACE
# ---------------------------------------------------------------------------
# In this script a learning process that search for an optimal model for solving a classification problem is presented. To illustrate the convenience of using cloud for parallelizing such a learning process. AzureDSR is used.
# data for use.
# data to use for the ML process.
data_config <- data.frame(name=c("Employee Attrition Prediction",
"Adult Income",
"Credit Card Transaction",
"Australia Weather",
"Mushroom",
"Hep Mass",
"Higgs"),
url=c("https://zhledata.blob.core.windows.net/mldata/employee.xdf",
"https://zhledata.blob.core.windows.net/mldata/adult.xdf",
"https://zhledata.blob.core.windows.net/mldata/credit.xdf",
"https://zhledata.blob.core.windows.net/mldata/weather.xdf",
"https://zhledata.blob.core.windows.net/mldata/mushroom.xdf",
"https://zhledata.blob.core.windows.net/mldata/hepmass.xdf",
"https://zhledata.blob.core.windows.net/mldata/higgs.xdf"),
label=c("Attrition",
"X15",
"Class",
"RainTomorrow",
"class",
"class",
"X1"),
colOptions=c(TRUE,
FALSE,
TRUE,
TRUE,
TRUE,
TRUE,
FALSE),
stringsAsFactors=FALSE)
# algorithms for use.
model_config <- list(name=c("rxLogit", "rxBTrees", "rxDForest"),
@ -94,6 +151,8 @@ mlProcess <- function(formula, data, modelName, modelPara) {
# read data.
data_index <- 3
CI_DATA <- "https://zhledata.blob.core.windows.net/mldata/creditcard.xdf"
download.file(CI_DATA,

Просмотреть файл

@ -1,3 +1,26 @@
# ---------------------------------------------------------------------------
# THIS IS A HEADER ADDED BY COMPUTE INTERFACE
# ---------------------------------------------------------------------------
CI_MACHINES <- c( "jxss001", "jxss002", "jxss003", "jxss004" )
CI_DNS <- c( "jxss001.southeastasia.cloudapp.azure.com", "jxss002.southeastasia.cloudapp.azure.com", "jxss003.southeastasia.cloudapp.azure.com", "jxss004.southeastasia.cloudapp.azure.com" )
CI_VMUSER <- c( "zhle" )
CI_MASTER <- c( "jxss001.southeastasia.cloudapp.azure.com" )
CI_SLAVES <- c( "jxss002.southeastasia.cloudapp.azure.com", "jxss003.southeastasia.cloudapp.azure.com", "jxss004.southeastasia.cloudapp.azure.com" )
CI_DATA <- ""
CI_CONTEXT <- "clusterParallel"
library(RevoScaleR)
# library(readr)
library(doParallel)
# --------- Set compute context
cl <- makePSOCKcluster(names=CI_SLAVES, master=CI_MASTER, user=CI_VMUSER)
registerDoParallel(cl)
rxSetComputeContext(RxForeachDoPar())
# --------- Load data.
# ciData <- ifelse(CI_DATA != '', read_csv(CI_DATA), data.frame(0))
# ---------------------------------------------------------------------------
# END OF THE HEADER ADDED BY COMPUTE INTERFACE
# ---------------------------------------------------------------------------
# This is to run parallel work across nodes for clustering analysis.
# get data from remote blob.
@ -49,4 +72,4 @@ results <- rxExec(FUN=clusterAnalysis,
data="data.xdf",
numClusters=rxElemArg(c(2:5)))
save(results, file="./results.RData")
save(results, file="./results.RData")

Просмотреть файл

@ -1,10 +1,10 @@
# ---------------------------------------------------------------------------
# THIS IS A HEADER ADDED BY COMPUTE INTERFACE
# ---------------------------------------------------------------------------
CI_MACHINES <- c( "mynngf" )
CI_DNS <- c( "mynngf.southeastasia.cloudapp.azure.com" )
CI_MACHINES <- c( "mytbmm" )
CI_DNS <- c( "", "" )
CI_VMUSER <- c( "zhle" )
CI_MASTER <- c( "mynngf.southeastasia.cloudapp.azure.com" )
CI_MASTER <- c( "" )
CI_SLAVES <- c( "" )
CI_DATA <- ""
CI_CONTEXT <- "localParallel"

Просмотреть файл

@ -184,6 +184,8 @@ cat("Resource group", RG, "at", LOC,
# Deploy a Linux Data Science Virtual Machine
## DSVM deployment
Create the actual Linux DSVM with public-key based authentication
method. Name, username, and size can also be configured.
@ -258,7 +260,7 @@ cmd
system(cmd, intern=TRUE)
```
# Some Standard Setup --- Optional
## Some Standard Setup --- Optional
We can install some useful tools on a fesh server. Note that the
Ubuntu server will still be running some background scripts as part of
@ -283,6 +285,32 @@ Sys.sleep(20)
system(paste(ssh, "uptime"))
```
An alternative for this post-deployment system configuration is
`addExtensionDSVM` function, which is detailed in vignette [11Exend.md](https://github.com/Azure/AzureDSVM/blob/master/vignettes/11Extend.Rmd).
## Configuration for Microsoft R Server.
Since version 9, Microsoft R Server offers methods in the package of `mrsdeploy`
for convenient interaction with R session on a remote instance where MRS is
installed and properly configured.
To enable such interaction, a [one-box configuration](https://docs.microsoft.com/en-us/r-server/install/operationalize-r-server-one-box-config) is needed. One-box configuration on a Linux DSVM with
key-based authentication methdod can be achieved via `mrsOneBoxConfiguration`
function.
```{r}
mrsOneBoxConfiguration(context,
resource.group=RG,
location=LOC,
hostname=HOST,
username=USER,
password=PASSWORD)
```
NOTE the passowrd here refers to password used for creating remote session with
`mrsdeploy`. Default user name for `mrsdeploy` is "admin". More details about
how to use `mrsdeploy` for remote interaction can be found [here](https://docs.microsoft.com/en-us/r-server/r/how-to-execute-code-remotely).
# Deploy a Windows Data Science Virtual Machine - Optional
deployDSVM() also supports deployment of Windows DSVM, which can be

Двоичные данные
vignettes/elapsed.RData Normal file

Двоичный файл не отображается.

Двоичные данные
vignettes/results.RData Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -1,11 +1,11 @@
# ---------------------------------------------------------------------------
# THIS IS A HEADER ADDED BY COMPUTE INTERFACE
# ---------------------------------------------------------------------------
CI_MACHINES <- c( "jxss001", "jxss002", "jxss003", "jxss004" )
CI_DNS <- c( "jxss001.southeastasia.cloudapp.azure.com", "jxss002.southeastasia.cloudapp.azure.com", "jxss003.southeastasia.cloudapp.azure.com", "jxss004.southeastasia.cloudapp.azure.com" )
CI_MACHINES <- c( "llxi001", "llxi002", "llxi003", "llxi004" )
CI_DNS <- c( "llxi001.southeastasia.cloudapp.azure.com", "llxi002.southeastasia.cloudapp.azure.com", "llxi003.southeastasia.cloudapp.azure.com", "llxi004.southeastasia.cloudapp.azure.com" )
CI_VMUSER <- c( "zhle" )
CI_MASTER <- c( "jxss001.southeastasia.cloudapp.azure.com" )
CI_SLAVES <- c( "jxss002.southeastasia.cloudapp.azure.com", "jxss003.southeastasia.cloudapp.azure.com", "jxss004.southeastasia.cloudapp.azure.com" )
CI_MASTER <- c( "llxi001.southeastasia.cloudapp.azure.com" )
CI_SLAVES <- c( "llxi002.southeastasia.cloudapp.azure.com", "llxi003.southeastasia.cloudapp.azure.com", "llxi004.southeastasia.cloudapp.azure.com" )
CI_DATA <- ""
CI_CONTEXT <- "clusterParallel"