Fix: Upgrading to R Batch SDK to 2018-12-01.8.0 (#354)
* Added resource files * Added resource files * Removed comments * Fixed resource files documentation * Added check on job state * Fixed jobState
This commit is contained in:
Родитель
93f3fbc6fd
Коммит
96bfc22662
|
@ -19,7 +19,7 @@ BatchUtilities <- R6::R6Class(
|
||||||
|
|
||||||
accountName <- storageClient$authentication$name
|
accountName <- storageClient$authentication$name
|
||||||
|
|
||||||
resourceFiles <- NULL
|
resourceFiles <- args$resourceFiles
|
||||||
if (!is.null(argsList)) {
|
if (!is.null(argsList)) {
|
||||||
envFile <- paste0(taskId, ".rds")
|
envFile <- paste0(taskId, ".rds")
|
||||||
saveRDS(argsList, file = envFile)
|
saveRDS(argsList, file = envFile)
|
||||||
|
@ -37,8 +37,18 @@ BatchUtilities <- R6::R6Class(
|
||||||
envFile,
|
envFile,
|
||||||
readToken,
|
readToken,
|
||||||
config$endpointSuffix)
|
config$endpointSuffix)
|
||||||
|
|
||||||
|
environmentResourceFile <-
|
||||||
|
rAzureBatch::createResourceFile(filePath = envFile, httpUrl = envFileUrl)
|
||||||
|
|
||||||
|
if (is.null(resourceFiles))
|
||||||
|
{
|
||||||
resourceFiles <-
|
resourceFiles <-
|
||||||
list(rAzureBatch::createResourceFile(url = envFileUrl, fileName = envFile))
|
list(environmentResourceFile)
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
resourceFiles <- append(resourceFiles, environmentResourceFile)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Only use the download command if cloudCombine is enabled
|
# Only use the download command if cloudCombine is enabled
|
||||||
|
@ -52,17 +62,6 @@ BatchUtilities <- R6::R6Class(
|
||||||
|
|
||||||
if (!is.null(cloudCombine)) {
|
if (!is.null(cloudCombine)) {
|
||||||
assign("cloudCombine", cloudCombine, .doAzureBatchGlobals)
|
assign("cloudCombine", cloudCombine, .doAzureBatchGlobals)
|
||||||
containerSettings$imageName <- "brianlovedocker/doazureparallel-merge-dockerfile:0.12.1"
|
|
||||||
|
|
||||||
copyCommand <- sprintf(
|
|
||||||
"%s %s %s --download --saskey $BLOBXFER_SASKEY --remoteresource . --include results/*.rds --endpoint %s",
|
|
||||||
accountName,
|
|
||||||
jobId,
|
|
||||||
"$AZ_BATCH_TASK_WORKING_DIR",
|
|
||||||
config$endpointSuffix
|
|
||||||
)
|
|
||||||
|
|
||||||
commands <- c(paste("blobxfer", copyCommand))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
exitConditions <- NULL
|
exitConditions <- NULL
|
||||||
|
|
|
@ -123,7 +123,7 @@ makeCluster <-
|
||||||
|
|
||||||
# install docker
|
# install docker
|
||||||
containerConfiguration <- list(
|
containerConfiguration <- list(
|
||||||
type = "docker"
|
type = "dockerCompatible"
|
||||||
)
|
)
|
||||||
|
|
||||||
dockerImage <- "rocker/tidyverse:latest"
|
dockerImage <- "rocker/tidyverse:latest"
|
||||||
|
|
|
@ -474,12 +474,12 @@ setHttpTraffic <- function(value = FALSE) {
|
||||||
storageEndpointSuffix = config$endpointSuffix)
|
storageEndpointSuffix = config$endpointSuffix)
|
||||||
|
|
||||||
requiredJobResourceFiles <- list(
|
requiredJobResourceFiles <- list(
|
||||||
rAzureBatch::createResourceFile(url = workerScriptUrl, fileName = "worker.R"),
|
rAzureBatch::createResourceFile(filePath = "worker.R", httpUrl = workerScriptUrl),
|
||||||
rAzureBatch::createResourceFile(url = mergerScriptUrl, fileName = "merger.R"),
|
rAzureBatch::createResourceFile(filePath = "merger.R", httpUrl = mergerScriptUrl),
|
||||||
rAzureBatch::createResourceFile(url = installGithubScriptUrl, fileName = "install_github.R"),
|
rAzureBatch::createResourceFile(filePath = "install_github.R", httpUrl = installGithubScriptUrl),
|
||||||
rAzureBatch::createResourceFile(url = installCranScriptUrl, fileName = "install_cran.R"),
|
rAzureBatch::createResourceFile(filePath = "install_cran.R", httpUrl = installCranScriptUrl),
|
||||||
rAzureBatch::createResourceFile(url = installBioConductorScriptUrl, fileName = "install_bioconductor.R"),
|
rAzureBatch::createResourceFile(filePath = "install_bioconductor.R", httpUrl = installBioConductorScriptUrl),
|
||||||
rAzureBatch::createResourceFile(url = jobCommonFileUrl, fileName = jobFileName)
|
rAzureBatch::createResourceFile(filePath = jobFileName, httpUrl = jobCommonFileUrl)
|
||||||
)
|
)
|
||||||
|
|
||||||
resourceFiles <-
|
resourceFiles <-
|
||||||
|
@ -669,6 +669,21 @@ setHttpTraffic <- function(value = FALSE) {
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mergeReadSasToken <- storageClient$generateSasToken("rl", "c", id)
|
||||||
|
mergeResourceFileUrl <-
|
||||||
|
rAzureBatch::createBlobUrl(
|
||||||
|
storageAccount = storageClient$authentication$name,
|
||||||
|
containerName = id,
|
||||||
|
sasToken = mergeReadSasToken,
|
||||||
|
storageEndpointSuffix = config$endpointSuffix
|
||||||
|
)
|
||||||
|
|
||||||
|
mergeResources <-
|
||||||
|
list(
|
||||||
|
rAzureBatch::createResourceFile(
|
||||||
|
storageContainerUrl = mergeResourceFileUrl,
|
||||||
|
blobPrefix = "results"))
|
||||||
|
|
||||||
BatchUtilitiesOperations$addTask(
|
BatchUtilitiesOperations$addTask(
|
||||||
jobId = id,
|
jobId = id,
|
||||||
taskId = "merge",
|
taskId = "merge",
|
||||||
|
@ -684,7 +699,8 @@ setHttpTraffic <- function(value = FALSE) {
|
||||||
dependsOn = taskDependencies,
|
dependsOn = taskDependencies,
|
||||||
cloudCombine = cloudCombine,
|
cloudCombine = cloudCombine,
|
||||||
outputFiles = append(obj$options$azure$outputFiles, mergeOutput),
|
outputFiles = append(obj$options$azure$outputFiles, mergeOutput),
|
||||||
containerImage = data$containerImage
|
containerImage = data$containerImage,
|
||||||
|
resourceFiles = mergeResources
|
||||||
)
|
)
|
||||||
|
|
||||||
cat(". . .")
|
cat(". . .")
|
||||||
|
|
|
@ -472,19 +472,14 @@ waitForTasksToComplete <-
|
||||||
|
|
||||||
flush.console()
|
flush.console()
|
||||||
|
|
||||||
validationFlag <-
|
|
||||||
(taskCounts$validationStatus == "Validated" &&
|
|
||||||
totalTasks <= 200000) ||
|
|
||||||
totalTasks > 200000
|
|
||||||
|
|
||||||
if (taskCounts$failed > 0 &&
|
if (taskCounts$failed > 0 &&
|
||||||
errorHandling == "stop" &&
|
errorHandling == "stop") {
|
||||||
validationFlag) {
|
|
||||||
cat("\n")
|
cat("\n")
|
||||||
|
|
||||||
select <- "id, executionInfo"
|
select <- "id, executionInfo"
|
||||||
|
filter <- "executionInfo/result eq 'failure'"
|
||||||
failedTasks <-
|
failedTasks <-
|
||||||
batchClient$taskOperations$list(jobId, select = select)
|
batchClient$taskOperations$list(jobId, select = select, filter = filter)
|
||||||
|
|
||||||
tasksFailureWarningLabel <-
|
tasksFailureWarningLabel <-
|
||||||
sprintf(
|
sprintf(
|
||||||
|
@ -498,15 +493,10 @@ waitForTasksToComplete <-
|
||||||
)
|
)
|
||||||
|
|
||||||
for (i in 1:length(failedTasks$value)) {
|
for (i in 1:length(failedTasks$value)) {
|
||||||
if (!is.null(failedTasks$value[[i]]$executionInfo$result) &&
|
|
||||||
grepl(failedTasks$value[[i]]$executionInfo$result,
|
|
||||||
"failure",
|
|
||||||
ignore.case = TRUE)) {
|
|
||||||
tasksFailureWarningLabel <-
|
tasksFailureWarningLabel <-
|
||||||
paste0(tasksFailureWarningLabel,
|
paste0(tasksFailureWarningLabel,
|
||||||
sprintf("%s\n", failedTasks$value[[i]]$id))
|
sprintf("%s\n", failedTasks$value[[i]]$id))
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
warning(sprintf(tasksFailureWarningLabel,
|
warning(sprintf(tasksFailureWarningLabel,
|
||||||
taskCounts$failed))
|
taskCounts$failed))
|
||||||
|
@ -533,9 +523,10 @@ waitForTasksToComplete <-
|
||||||
jobId)
|
jobId)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (taskCounts$completed >= totalTasks &&
|
jobInfo <- getJob(jobId, verbose = FALSE)
|
||||||
(taskCounts$validationStatus == "Validated" ||
|
if (taskCounts$completed >= totalTasks ||
|
||||||
totalTasks >= 200000)) {
|
jobInfo$jobState == "completed" ||
|
||||||
|
jobInfo$jobState == "terminating") {
|
||||||
cat("\n")
|
cat("\n")
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,12 +39,12 @@ Here's an example that uses data stored in a public location on Azure Blob Stora
|
||||||
# define where to download data from
|
# define where to download data from
|
||||||
resource_files = list(
|
resource_files = list(
|
||||||
rAzureBatch::createResourceFile(
|
rAzureBatch::createResourceFile(
|
||||||
url = "https://<accountname>.blob.core.windows.net/<container>/2010.csv",
|
httpUrl = "https://<accountname>.blob.core.windows.net/<container>/2010.csv",
|
||||||
fileName = "2010.csv"
|
filePath = "2010.csv"
|
||||||
),
|
),
|
||||||
rAzureBatch::createResourceFile(
|
rAzureBatch::createResourceFile(
|
||||||
url = "https://<accountname>.blob.core.windows.net/<container>/2011.csv",
|
httpUrl = "https://<accountname>.blob.core.windows.net/<container>/2011.csv",
|
||||||
fileName = "2011.csv"
|
filePath = "2011.csv"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -34,12 +34,12 @@ doAzureParallel::setCredentials("credentials.json")
|
||||||
# Using the NYC taxi datasets, http://www.nyc.gov/html/tlc/html/about/trip_record_data.shtml
|
# Using the NYC taxi datasets, http://www.nyc.gov/html/tlc/html/about/trip_record_data.shtml
|
||||||
azureStorageUrl <- "http://playdatastore.blob.core.windows.net/nyc-taxi-dataset"
|
azureStorageUrl <- "http://playdatastore.blob.core.windows.net/nyc-taxi-dataset"
|
||||||
resource_files <- list(
|
resource_files <- list(
|
||||||
rAzureBatch::createResourceFile(url = paste0(azureStorageUrl, "/yellow_tripdata_2016-1.csv"), fileName = "yellow_tripdata_2016-1.csv"),
|
rAzureBatch::createResourceFile(httpUrl = paste0(azureStorageUrl, "/yellow_tripdata_2016-1.csv"), filePath = "yellow_tripdata_2016-1.csv"),
|
||||||
rAzureBatch::createResourceFile(url = paste0(azureStorageUrl, "/yellow_tripdata_2016-2.csv"), fileName = "yellow_tripdata_2016-2.csv"),
|
rAzureBatch::createResourceFile(httpUrl = paste0(azureStorageUrl, "/yellow_tripdata_2016-2.csv"), filePath = "yellow_tripdata_2016-2.csv"),
|
||||||
rAzureBatch::createResourceFile(url = paste0(azureStorageUrl, "/yellow_tripdata_2016-3.csv"), fileName = "yellow_tripdata_2016-3.csv"),
|
rAzureBatch::createResourceFile(httpUrl = paste0(azureStorageUrl, "/yellow_tripdata_2016-3.csv"), filePath = "yellow_tripdata_2016-3.csv"),
|
||||||
rAzureBatch::createResourceFile(url = paste0(azureStorageUrl, "/yellow_tripdata_2016-4.csv"), fileName = "yellow_tripdata_2016-4.csv"),
|
rAzureBatch::createResourceFile(httpUrl = paste0(azureStorageUrl, "/yellow_tripdata_2016-4.csv"), filePath = "yellow_tripdata_2016-4.csv"),
|
||||||
rAzureBatch::createResourceFile(url = paste0(azureStorageUrl, "/yellow_tripdata_2016-5.csv"), fileName = "yellow_tripdata_2016-5.csv"),
|
rAzureBatch::createResourceFile(httpUrl = paste0(azureStorageUrl, "/yellow_tripdata_2016-5.csv"), filePath = "yellow_tripdata_2016-5.csv"),
|
||||||
rAzureBatch::createResourceFile(url = paste0(azureStorageUrl, "/yellow_tripdata_2016-6.csv"), fileName = "yellow_tripdata_2016-6.csv")
|
rAzureBatch::createResourceFile(httpUrl = paste0(azureStorageUrl, "/yellow_tripdata_2016-6.csv"), filePath = "yellow_tripdata_2016-6.csv")
|
||||||
)
|
)
|
||||||
|
|
||||||
# add the parameter 'resourceFiles' to download files to nodes
|
# add the parameter 'resourceFiles' to download files to nodes
|
||||||
|
|
|
@ -56,8 +56,8 @@ csvFileUrl2 <- rAzureBatch::createBlobUrl(storageAccount = storageAccountName,
|
||||||
# Create a list of files to download to the cluster using read-only permissions
|
# Create a list of files to download to the cluster using read-only permissions
|
||||||
# Place the files in a directory called 'data'
|
# Place the files in a directory called 'data'
|
||||||
resource_files = list(
|
resource_files = list(
|
||||||
rAzureBatch::createResourceFile(url = csvFileUrl1, fileName = "data/1989.csv"),
|
rAzureBatch::createResourceFile(httpUrl = csvFileUrl1, filePath = "data/1989.csv"),
|
||||||
rAzureBatch::createResourceFile(url = csvFileUrl2, fileName = "data/1990.csv")
|
rAzureBatch::createResourceFile(httpUrl = csvFileUrl2, filePath = "data/1990.csv")
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create the cluster
|
# Create the cluster
|
||||||
|
|
Загрузка…
Ссылка в новой задаче