From 2d57adcf512f92df119f37376f28a5dde1a05ecd Mon Sep 17 00:00:00 2001
From: Hong Ooi <hongooi@microsoft.com>
Date: Thu, 24 Oct 2019 10:06:31 +0800
Subject: [PATCH] Dev (#13)

- Disk type selectable for OS disk and DSVM data disk
- Scaleset can be created with data disks
- Background pool moved to AzureRMR
---
 DESCRIPTION                      |  9 ++--
 NAMESPACE                        |  2 -
 NEWS.md                          |  6 +++
 R/AzureVM.R                      |  7 ---
 R/az_vmss_resource.R             | 11 ++--
 R/az_vmss_template.R             |  5 +-
 R/build_json.R                   | 31 +++++++++++
 R/pool.R                         | 60 ---------------------
 R/vm_config.R                    |  8 ++-
 R/vm_resource_config.R           |  5 +-
 R/vm_template_builders.R         | 10 ++--
 R/vmss_config.R                  | 92 +++++++++++++++++++++-----------
 R/vmss_template_builders.R       | 26 +++++++++
 README.md                        |  4 +-
 man/az_vmss_resource.Rd          |  4 +-
 man/az_vmss_template.Rd          |  5 +-
 man/pool.Rd                      | 33 ------------
 man/scaleset_options.Rd          |  5 +-
 man/vm_config.Rd                 | 14 +++--
 man/vm_resource_config.Rd        |  3 +-
 man/vmss_config.Rd               | 54 ++++++++++++-------
 tests/testthat/test14_par.R      |  6 +--
 tests/testthat/test15_disktype.R | 53 ++++++++++++++++++
 vignettes/intro.rmd              |  4 +-
 24 files changed, 269 insertions(+), 188 deletions(-)
 delete mode 100644 R/pool.R
 delete mode 100644 man/pool.Rd
 create mode 100644 tests/testthat/test15_disktype.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 05b5805..e44279d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: AzureVM
 Title: Virtual Machines in 'Azure'
-Version: 2.0.1
+Version: 2.0.1.9000
 Authors@R: c(
     person("Hong", "Ooi", , "hongooi@microsoft.com", role = c("aut", "cre")),
     person("Microsoft", role="cph")
@@ -14,9 +14,8 @@ Depends:
     R (>= 3.3)
 Imports: 
     R6,
-    AzureRMR (>= 2.1.2),
-    jsonlite,
-    parallel
+    AzureRMR (>= 2.2.1),
+    jsonlite
 Suggests:
     knitr,
     testthat,
@@ -24,3 +23,5 @@ Suggests:
     AzureVMmetadata
 Roxygen: list(markdown=TRUE)
 RoxygenNote: 6.1.1
+Remotes:
+    Azure/AzureRMR
diff --git a/NAMESPACE b/NAMESPACE
index 53f1fad..39774f7 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -21,9 +21,7 @@ export(debian_8_backports)
 export(debian_8_backports_ss)
 export(debian_9_backports)
 export(debian_9_backports_ss)
-export(delete_pool)
 export(image_config)
-export(init_pool)
 export(ip_config)
 export(is_vm)
 export(is_vm_resource)
diff --git a/NEWS.md b/NEWS.md
index eeccdb6..19165ca 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,9 @@
+# AzureVM 2.0.1.9000
+
+* VM scalesets can now be created with data disks.
+* Make OS disk type and Linux DSVM data disk type selectable, with a default of "Premium_LRS" for both.
+* Background process pool functionality moved into AzureRMR; this removes code duplication and makes it available for other packages that can benefit.
+
 # AzureVM 2.0.1
 
 * Add methods to retrieve Azure resources used by a VM: `get_disk`, `get_vnet`, `get_nic`, `get_nsg`, `get_public_ip_resource`. These return objects of class `AzureRMR::az_resource`, or `NULL` if not present.
diff --git a/R/AzureVM.R b/R/AzureVM.R
index 40b9935..28119ce 100644
--- a/R/AzureVM.R
+++ b/R/AzureVM.R
@@ -9,8 +9,6 @@ AzureRMR::build_template_parameters
 
 globalVariables(c("self", "pool"), "AzureVM")
 
-.AzureVM <- new.env()
-
 
 # adding methods to classes in external package must go in .onLoad
 .onLoad <- function(libname, pkgname)
@@ -22,8 +20,3 @@ globalVariables(c("self", "pool"), "AzureVM")
     options(azure_vm_maxpoolsize=10)
 }
 
-.onUnload <- function(libpath)
-{
-    if(exists("pool", envir=.AzureVM))
-        try(parallel::stopCluster(.AzureVM$pool), silent=TRUE)
-}
diff --git a/R/az_vmss_resource.R b/R/az_vmss_resource.R
index 6863b3a..899a3d9 100644
--- a/R/az_vmss_resource.R
+++ b/R/az_vmss_resource.R
@@ -36,12 +36,12 @@
 #' By wrapping the deployment template used to create these resources, the `az_vmss_template` class allows managing them all as a single entity.
 #'
 #' @section Instance operations:
-#' AzureVM has the ability to parallelise scaleset instance operations using a pool of background processes. This can lead to significant speedups when working with scalesets with high instance counts. The pool is created automatically the first time that it is required, and remains persistent for the session. For more information, see [init_pool].
+#' AzureVM has the ability to parallelise scaleset instance operations using a background process pool provided by AzureRMR. This can lead to significant speedups when working with scalesets with high instance counts. The pool is created automatically the first time that it is required, and remains persistent for the session. You can control the size of the process pool with the `azure_vm_minpoolsize` and `azure_vm_maxpoolsize` options, which have default values 2 and 10 respectively.
 #'
 #' The `id` argument lets you specify a subset of instances on which to carry out an operation. This can be a character vector of instance IDs; a list of instance objects such as returned by `list_instances`; or a single instance object. The default (NULL) is to carry out the operation on all instances.
 #'
 #' @seealso
-#' [AzureRMR::az_resource], [get_vm_scaleset_resource], [az_vmss_template], [init_pool]
+#' [AzureRMR::az_resource], [get_vm_scaleset_resource], [az_vmss_template], [AzureRMR::init_pool]
 #'
 #' [VM scaleset API reference](https://docs.microsoft.com/en-us/rest/api/compute/virtualmachinescalesets)
 #' @format An R6 object of class `az_vmss_resource`, inheriting from `AzureRMR::az_resource`.
@@ -303,7 +303,10 @@ private=list(
         if(length(vms) < 2 || getOption("azure_vm_maxpoolsize") == 0)
             return(lapply(vms, f))
 
-        init_pool(length(vms))
-        parallel::parLapply(.AzureVM$pool, vms, f)
+        minsize <- getOption("azure_vm_minpoolsize")
+        maxsize <- getOption("azure_vm_maxpoolsize")
+        size <- min(max(length(vms), minsize), maxsize)
+        init_pool(size)
+        pool_lapply(vms, f)
     }
 ))
diff --git a/R/az_vmss_template.R b/R/az_vmss_template.R
index 059937d..3e1ad1d 100644
--- a/R/az_vmss_template.R
+++ b/R/az_vmss_template.R
@@ -45,13 +45,12 @@
 #' By wrapping the deployment template used to create these resources, the `az_vmss_template` class allows managing them all as a single entity.
 #'
 #' @section Instance operations:
-#'
-#' AzureVM has the ability to parallelise scaleset instance operations using a pool of background processes. This can lead to significant speedups when working with scalesets with high instance counts. The pool is created automatically the first time that it is required, and remains persistent for the session. For more information, see [init_pool].
+#' AzureVM has the ability to parallelise scaleset instance operations using a background process pool provided by AzureRMR. This can lead to significant speedups when working with scalesets with high instance counts. The pool is created automatically the first time that it is required, and remains persistent for the session. You can control the size of the process pool with the `azure_vm_minpoolsize` and `azure_vm_maxpoolsize` options, which have default values 2 and 10 respectively.
 #'
 #' The `id` argument lets you specify a subset of instances on which to carry out an operation. This can be a character vector of instance IDs; a list of instance objects such as returned by `list_instances`; or a single instance object. The default (NULL) is to carry out the operation on all instances.
 #'
 #' @seealso
-#' [AzureRMR::az_template], [create_vm_scaleset], [get_vm_scaleset], [delete_vm_scaleset], [init_pool]
+#' [AzureRMR::az_template], [create_vm_scaleset], [get_vm_scaleset], [delete_vm_scaleset], [AzureRMR::init_pool]
 #'
 #' [VM scaleset API reference](https://docs.microsoft.com/en-us/rest/api/compute/virtualmachinescalesets)
 #'
diff --git a/R/build_json.R b/R/build_json.R
index 0e22734..1bbca18 100644
--- a/R/build_json.R
+++ b/R/build_json.R
@@ -145,6 +145,37 @@ build_template_parameters.vmss_config <- function(config, name, login_user, size
 
     do.call(add_parameters, config$options$params)
 
+    # add datadisks to params
+    if(!is_empty(config$datadisks))
+    {
+        # fixup datadisk for scaleset
+        for(i in seq_along(config$datadisks))
+        {
+            config$datadisks[[i]]$vm_spec$lun <- i - 1
+            if(config$datadisks[[i]]$vm_spec$createOption == "attach")
+            {
+                config$datadisks[[i]]$vm_spec$createOption <- "empty"
+                config$datadisks[[i]]$vm_spec$diskSizeGB <- config$datadisks[[i]]$res_spec$diskSizeGB
+                config$datadisks[[i]]$vm_spec$storageAccountType <- config$datadisks[[i]]$res_spec$sku
+            }
+            diskname <- config$datadisks[[i]]$vm_spec$name
+            if(!is.null(diskname))
+            {
+                newdiskname <- paste(name, diskname, i, sep="_")
+                config$datadisks[[i]]$res_spec$name <- newdiskname
+                config$datadisks[[i]]$vm_spec$name <- newdiskname
+            }
+        }
+
+        disk_res_spec <- lapply(config$datadisks, `[[`, "res_spec")
+        null <- sapply(disk_res_spec, is.null)
+
+        add_parameters(
+            dataDisks=lapply(config$datadisks, `[[`, "vm_spec"),
+            dataDiskResources=disk_res_spec[!null]
+        )
+    }
+
     jsonlite::prettify(jsonlite::toJSON(params, auto_unbox=TRUE, null="null"))
 }
 
diff --git a/R/pool.R b/R/pool.R
deleted file mode 100644
index 5267ac9..0000000
--- a/R/pool.R
+++ /dev/null
@@ -1,60 +0,0 @@
-#' Parallelise operations on VM scaleset instances
-#'
-#' @param connections The number of concurrent connections to support, which translates into the number of background R processes to create. Each connection requires a separate R process, so limit this is you are low on memory.
-#' @param restart For `init_pool`, whether to terminate an already running pool first.
-#' @param ... Other arguments passed on to `parallel::makeCluster`.
-#'
-#' @details
-#' AzureVM can parallelise operations on scaleset instances by utilizing a pool of R processes in the background. This can lead to significant speedups when working with scalesets with high instance counts. The pool is created automatically the first time that it is required, or it can be (re)created by calling `init_pool` manually. It remains persistent for the session or until terminated by `delete_pool`.
-#'
-#' If `init_pool` is called and the current pool is smaller than `connections`, it is resized. The size of the pool can be controlled by the global options `azure_vm_minpoolsize` and `azure_vm_maxpoolsize`, which have default values of 2 and 10 respectively. To disable parallel operations, set `options(azure_vm_maxpoolsize=0)`.
-#'
-#' Note that the pool size is unrelated to the _scaleset_ size, it only controls how many instances can communicate simultaneously with AzureVM.
-#'
-#' @seealso
-#' [az_vmss_template], [parallel::makeCluster]
-#' @rdname pool
-#' @aliases azure_vm_minpoolsize azure_vm_maxpoolsize
-#' @export
-init_pool <- function(connections, restart=FALSE, ...)
-{
-    if(restart)
-        delete_pool()
-
-    minsize <- getOption("azure_vm_minpoolsize")
-    maxsize <- getOption("azure_vm_maxpoolsize")
-    size <- min(max(connections, minsize), maxsize)
-    if(size < 1)
-        stop("Invalid pool size ", size, call.=FALSE)
-
-    if(!exists("pool", envir=.AzureVM) || length(.AzureVM$pool) < size)
-    {
-        delete_pool()
-        message("Creating background pool")
-        .AzureVM$pool <- parallel::makeCluster(size)
-    }
-    else
-    {
-        # restore original state, set working directory to master working directory
-        parallel::clusterCall(.AzureVM$pool, function(wd)
-        {
-            setwd(wd)
-            rm(list=ls(all.names=TRUE), envir=.GlobalEnv)
-        }, wd=getwd())
-    }
-
-    invisible(NULL)
-}
-
-
-#' @rdname pool
-#' @export
-delete_pool <- function()
-{
-    if(!exists("pool", envir=.AzureVM))
-        return()
-
-    message("Deleting background pool")
-    parallel::stopCluster(.AzureVM$pool)
-    rm(pool, envir=.AzureVM)
-}
diff --git a/R/vm_config.R b/R/vm_config.R
index 375d05d..d011bc4 100644
--- a/R/vm_config.R
+++ b/R/vm_config.R
@@ -4,6 +4,8 @@
 #' @param keylogin Whether to use an SSH public key to login (TRUE) or a password (FALSE). Note that Windows does not support SSH key logins.
 #' @param managed Whether to provide a managed system identity for the VM.
 #' @param datadisks The data disks to attach to the VM. Specify this as either a vector of numeric disk sizes in GB, or a list of `datadisk_config` objects for more control over the specification.
+#' @param os_disk_type The type of primary disk for the VM. Can be "Premium_LRS" (the default), "StandardSSD_LRS", or "Standard_LRS". Of these, "Standard_LRS" uses hard disks and the others use SSDs as the underlying hardware. Change this to "StandardSSD_LRS" or "Standard_LRS" if the VM size doesn't support premium storage.
+#' @param dsvm_disk_type The Ubuntu DSVM image comes with one additional datadisk that holds some installed tools. This argument sets what type of disk is used. Change this to "StandardSSD_LRS" or "Standard_LRS" if the VM size doesn't support premium storage.
 #' @param nsg The network security group for the VM. Can be a call to `nsg_config` to create a new NSG; an AzureRMR resource object or resource ID to reuse an existing NSG; or NULL to not use an NSG (not recommended).
 #' @param ip The public IP address for the VM. Can be a call to `ip_config` to create a new IP address; an AzureRMR resource object or resource ID to reuse an existing address resource; or NULL if the VM should not be accessible from outside its subnet.
 #' @param vnet The virtual network for the VM. Can be a call to `vnet_config` to create a new virtual network, or an AzureRMR resource object or resource ID to reuse an existing virtual network. Note that by default, AzureVM will associate the NSG with the virtual network/subnet, not with the VM's network interface.
@@ -139,6 +141,7 @@
 #' }
 #' @export
 vm_config <- function(image, keylogin, managed=TRUE,
+                      os_disk_type=c("Premium_LRS", "StandardSSD_LRS", "Standard_LRS"),
                       datadisks=numeric(0),
                       nsg=nsg_config(),
                       ip=ip_config(),
@@ -160,6 +163,7 @@ vm_config <- function(image, keylogin, managed=TRUE,
         image=image,
         keylogin=keylogin,
         managed=managed,
+        os_disk_type=match.arg(os_disk_type),
         datadisks=datadisks,
         nsg=nsg,
         ip=ip,
@@ -196,12 +200,14 @@ vm_fixup_ip <- function(ip)
 #' @rdname vm_config
 #' @export
 ubuntu_dsvm <- function(keylogin=TRUE, managed=TRUE, datadisks=numeric(0),
+                        dsvm_disk_type=c("Premium_LRS", "StandardSSD_LRS", "Standard_LRS"),
                         nsg=nsg_config(list(nsg_rule_allow_ssh, nsg_rule_allow_jupyter, nsg_rule_allow_rstudio)),
                         ...)
 {
     if(is.numeric(datadisks))
         datadisks <- lapply(datadisks, datadisk_config)
-    disk0 <- datadisk_config(NULL, NULL, "fromImage", "Premium_LRS")
+    dsvm_disk_type <- match.arg(dsvm_disk_type)
+    disk0 <- datadisk_config(NULL, NULL, "fromImage", dsvm_disk_type)
     vm_config(image_config("microsoft-dsvm", "linux-data-science-vm-ubuntu", "linuxdsvmubuntu"),
               keylogin=keylogin, managed=managed, datadisks=c(list(disk0), datadisks), nsg=nsg, ...)
 }
diff --git a/R/vm_resource_config.R b/R/vm_resource_config.R
index 50ca06b..f75eb3f 100644
--- a/R/vm_resource_config.R
+++ b/R/vm_resource_config.R
@@ -31,8 +31,11 @@ user_config <- function(username, sshkey=NULL, password=NULL)
 
 #' @rdname vm_resource_config
 #' @export
-datadisk_config <- function(size, name="datadisk", create="empty", type="StandardSSD_LRS", write_accelerator=FALSE)
+datadisk_config <- function(size, name="datadisk", create="empty",
+                            type=c("StandardSSD_LRS", "Premium_LRS", "Standard_LRS", "UltraSSD_LRS"),
+                            write_accelerator=FALSE)
 {
+    type <- match.arg(type)
     vm_caching <- if(type == "Premium_LRS") "ReadOnly" else "None"
     vm_create <- if(create == "empty") "attach" else "fromImage"
     vm_storage <- if(create == "empty") NULL else type
diff --git a/R/vm_template_builders.R b/R/vm_template_builders.R
index c851eb6..0547164 100644
--- a/R/vm_template_builders.R
+++ b/R/vm_template_builders.R
@@ -50,15 +50,13 @@ add_template_resources.vm_config <- function(config, ...)
     else 0
 
     if(n_disks > 0)
-    {
         vm$properties$storageProfile$copy <- vm_datadisk
-        if(n_disk_resources > 0)
-            vm$dependsOn <- c(vm$dependsOn, "managedDiskResources")
-    }
 
     if(config$managed)
         vm$identity <- list(type="systemAssigned")
 
+    vm$properties$storageProfile$osDisk$managedDisk$storageAccountType <- config$os_disk_type
+
     vm$properties$osProfile <- c(vm$properties$osProfile,
         if(config$keylogin) vm_key_login else vm_pwd_login)
 
@@ -101,7 +99,11 @@ add_template_resources.vm_config <- function(config, ...)
     else vm$dependsOn <- NULL
 
     if(n_disk_resources > 0)
+    {
         resources <- c(resources, list(disk_default))
+        if(n_disks > 0)
+            vm$dependsOn <- c(vm$dependsOn, "managedDiskResources")
+    }
 
     resources <- c(resources, list(vm))
 
diff --git a/R/vmss_config.R b/R/vmss_config.R
index 8ced719..e345313 100644
--- a/R/vmss_config.R
+++ b/R/vmss_config.R
@@ -2,6 +2,8 @@
 #'
 #' @param image For `vmss_config`, the VM image to deploy. This should be an object of class `image_config`, created by the function of the same name.
 #' @param options Scaleset options, as obtained via a call to `scaleset_options`.
+#' @param datadisks The data disks to attach to the VM. Specify this as either a vector of numeric disk sizes in GB, or a list of `datadisk_config` objects for more control over the specification.
+#' @param dsvm_disk_type The Ubuntu DSVM image comes with one additional datadisk that holds some installed tools. This argument sets what type of disk is used. Change this to "StandardSSD_LRS" or "Standard_LRS" if the VM size doesn't support premium storage.
 #' @param nsg The network security group for the scaleset. Can be a call to `nsg_config` to create a new NSG; an AzureRMR resource object or resource ID to reuse an existing NSG; or NULL to not use an NSG (not recommended).
 #' @param vnet The virtual network for the scaleset. Can be a call to `vnet_config` to create a new virtual network, or an AzureRMR resource object or resource ID to reuse an existing virtual network. Note that by default, AzureVM will associate the NSG with the virtual network/subnet, not with the VM's network interface.
 #' @param load_balancer The load balancer for the scaleset. Can be a call to `lb_config` to create a new load balancer;  an AzureRMR resource object or resource ID to reuse an existing load balancer; or NULL if load balancing is not required.
@@ -111,6 +113,7 @@
 #' }
 #' @export
 vmss_config <- function(image, options=scaleset_options(),
+                        datadisks=numeric(0),
                         nsg=nsg_config(),
                         vnet=vnet_config(),
                         load_balancer=lb_config(),
@@ -120,8 +123,12 @@ vmss_config <- function(image, options=scaleset_options(),
                         variables=list(),
                         ...)
 {
+    if(is.numeric(datadisks))
+        datadisks <- lapply(datadisks, datadisk_config)
+
     stopifnot(inherits(image, "image_config"))
     stopifnot(inherits(options, "scaleset_options"))
+    stopifnot(is.list(datadisks) && all(sapply(datadisks, inherits, "datadisk_config")))
 
     # make IP sku, balancer sku and scaleset size consistent with each other
     load_balancer <- vmss_fixup_lb(options, load_balancer)
@@ -130,6 +137,7 @@ vmss_config <- function(image, options=scaleset_options(),
     obj <- list(
         image=image,
         options=options,
+        datadisks=datadisks,
         nsg=nsg,
         vnet=vnet,
         lb=load_balancer,
@@ -217,18 +225,25 @@ vmss_fixup_ip <- function(options, lb, ip)
 
 #' @rdname vmss_config
 #' @export
-ubuntu_dsvm_ss <- function(nsg=nsg_config(list(nsg_rule_allow_ssh, nsg_rule_allow_jupyter, nsg_rule_allow_rstudio)),
+ubuntu_dsvm_ss <- function(datadisks=numeric(0),
+                           dsvm_disk_type=c("Premium_LRS", "StandardSSD_LRS", "Standard_LRS"),
+                           nsg=nsg_config(list(nsg_rule_allow_ssh, nsg_rule_allow_jupyter, nsg_rule_allow_rstudio)),
                            load_balancer=lb_config(rules=list(lb_rule_ssh, lb_rule_jupyter, lb_rule_rstudio),
                                                    probes=list(lb_probe_ssh, lb_probe_jupyter, lb_probe_rstudio)),
                            ...)
 {
+    if(is.numeric(datadisks))
+        datadisks <- lapply(datadisks, datadisk_config)
+    dsvm_disk_type <- match.arg(dsvm_disk_type)
+    disk0 <- datadisk_config(NULL, NULL, "fromImage", dsvm_disk_type)
     vmss_config(image_config("microsoft-dsvm", "linux-data-science-vm-ubuntu", "linuxdsvmubuntu"),
-                nsg=nsg, load_balancer=load_balancer, ...)
+                datadisks=c(list(disk0), datadisks), nsg=nsg, load_balancer=load_balancer, ...)
 }
 
 #' @rdname vmss_config
 #' @export
-windows_dsvm_ss <- function(nsg=nsg_config(list(nsg_rule_allow_rdp)),
+windows_dsvm_ss <- function(datadisks=numeric(0),
+                            nsg=nsg_config(list(nsg_rule_allow_rdp)),
                             load_balancer=lb_config(rules=list(lb_rule_rdp),
                                                    probes=list(lb_probe_rdp)),
                             options=scaleset_options(keylogin=FALSE),
@@ -236,34 +251,37 @@ windows_dsvm_ss <- function(nsg=nsg_config(list(nsg_rule_allow_rdp)),
 {
     options$keylogin <- FALSE
     vmss_config(image_config("microsoft-dsvm", "dsvm-windows", "server-2016"),
-                options=options, nsg=nsg, load_balancer=load_balancer, ...)
+                options=options, datadisks=datadisks, nsg=nsg, load_balancer=load_balancer, ...)
 }
 
 #' @rdname vmss_config
 #' @export
-ubuntu_16.04_ss <- function(nsg=nsg_config(list(nsg_rule_allow_ssh)),
-                           load_balancer=lb_config(rules=list(lb_rule_ssh),
-                                                   probes=list(lb_probe_ssh)),
-                           ...)
+ubuntu_16.04_ss <- function(datadisks=numeric(0),
+                            nsg=nsg_config(list(nsg_rule_allow_ssh)),
+                            load_balancer=lb_config(rules=list(lb_rule_ssh),
+                                                    probes=list(lb_probe_ssh)),
+                            ...)
 {
     vmss_config(image_config("Canonical", "UbuntuServer", "16.04-LTS"),
-                nsg=nsg, load_balancer=load_balancer, ...)
+                datadisks=datadisks, nsg=nsg, load_balancer=load_balancer, ...)
 }
 
 #' @rdname vmss_config
 #' @export
-ubuntu_18.04_ss <- function(nsg=nsg_config(list(nsg_rule_allow_ssh)),
-                           load_balancer=lb_config(rules=list(lb_rule_ssh),
-                                                   probes=list(lb_probe_ssh)),
-                           ...)
+ubuntu_18.04_ss <- function(datadisks=numeric(0),
+                            nsg=nsg_config(list(nsg_rule_allow_ssh)),
+                            load_balancer=lb_config(rules=list(lb_rule_ssh),
+                                                    probes=list(lb_probe_ssh)),
+                            ...)
 {
     vmss_config(image_config("Canonical", "UbuntuServer", "18.04-LTS"),
-                nsg=nsg, load_balancer=load_balancer, ...)
+                datadisks=datadisks, nsg=nsg, load_balancer=load_balancer, ...)
 }
 
 #' @rdname vmss_config
 #' @export
-windows_2016_ss <- function(nsg=nsg_config(list(nsg_rule_allow_rdp)),
+windows_2016_ss <- function(datadisks=numeric(0),
+                            nsg=nsg_config(list(nsg_rule_allow_rdp)),
                             load_balancer=lb_config(rules=list(lb_rule_rdp),
                                                     probes=list(lb_probe_rdp)),
                             options=scaleset_options(keylogin=FALSE),
@@ -271,12 +289,13 @@ windows_2016_ss <- function(nsg=nsg_config(list(nsg_rule_allow_rdp)),
 {
     options$keylogin <- FALSE
     vmss_config(image_config("MicrosoftWindowsServer", "WindowsServer", "2016-Datacenter"),
-                options=options, nsg=nsg, load_balancer=load_balancer, ...)
+                options=options, datadisks=datadisks, nsg=nsg, load_balancer=load_balancer, ...)
 }
 
 #' @rdname vmss_config
 #' @export
-windows_2019_ss <- function(nsg=nsg_config(list(nsg_rule_allow_rdp)),
+windows_2019_ss <- function(datadisks=numeric(0),
+                            nsg=nsg_config(list(nsg_rule_allow_rdp)),
                             load_balancer=lb_config(rules=list(lb_rule_rdp),
                                                     probes=list(lb_probe_rdp)),
                             options=scaleset_options(keylogin=FALSE),
@@ -284,73 +303,79 @@ windows_2019_ss <- function(nsg=nsg_config(list(nsg_rule_allow_rdp)),
 {
     options$keylogin <- FALSE
     vmss_config(image_config("MicrosoftWindowsServer", "WindowsServer", "2019-Datacenter"),
-                options=options, nsg=nsg, load_balancer=load_balancer, ...)
+                options=options, datadisks=datadisks, nsg=nsg, load_balancer=load_balancer, ...)
 }
 
 #' @rdname vmss_config
 #' @export
-rhel_7.6_ss <- function(nsg=nsg_config(list(nsg_rule_allow_ssh)),
+rhel_7.6_ss <- function(datadisks=numeric(0),
+                        nsg=nsg_config(list(nsg_rule_allow_ssh)),
                         load_balancer=lb_config(rules=list(lb_rule_ssh),
                                                 probes=list(lb_probe_ssh)),
                         ...)
 {
     vmss_config(image_config("RedHat", "RHEL", "7-RAW"),
-                nsg=nsg, load_balancer=load_balancer, ...)
+                datadisks=datadisks, nsg=nsg, load_balancer=load_balancer, ...)
 }
 
 #' @rdname vmss_config
 #' @export
-rhel_8_ss <- function(nsg=nsg_config(list(nsg_rule_allow_ssh)),
+rhel_8_ss <- function(datadisks=numeric(0),
+                      nsg=nsg_config(list(nsg_rule_allow_ssh)),
                       load_balancer=lb_config(rules=list(lb_rule_ssh),
                                               probes=list(lb_probe_ssh)),
                       ...)
 {
     vmss_config(image_config("RedHat", "RHEL", "8"),
-                nsg=nsg, load_balancer=load_balancer, ...)
+                datadisks=datadisks, nsg=nsg, load_balancer=load_balancer, ...)
 }
 
 #' @rdname vmss_config
 #' @export
-centos_7.5_ss <- function(nsg=nsg_config(list(nsg_rule_allow_ssh)),
+centos_7.5_ss <- function(datadisks=numeric(0),
+                          nsg=nsg_config(list(nsg_rule_allow_ssh)),
                           load_balancer=lb_config(rules=list(lb_rule_ssh),
                                                   probes=list(lb_probe_ssh)),
                           ...)
 {
     vmss_config(image_config("OpenLogic", "CentOS", "7.5"),
-                nsg=nsg, load_balancer=load_balancer, ...)
+                datadisks=datadisks, nsg=nsg, load_balancer=load_balancer, ...)
 }
 
 #' @rdname vmss_config
 #' @export
-centos_7.6_ss <- function(nsg=nsg_config(list(nsg_rule_allow_ssh)),
+centos_7.6_ss <- function(datadisks=numeric(0),
+                          nsg=nsg_config(list(nsg_rule_allow_ssh)),
                           load_balancer=lb_config(rules=list(lb_rule_ssh),
                                                   probes=list(lb_probe_ssh)),
                           ...)
 {
     vmss_config(image_config("OpenLogic", "CentOS", "7.6"),
-                nsg=nsg, load_balancer=load_balancer, ...)
+                datadisks=datadisks, nsg=nsg, load_balancer=load_balancer, ...)
 }
 
 #' @rdname vmss_config
 #' @export
-debian_8_backports_ss <- function(nsg=nsg_config(list(nsg_rule_allow_ssh)),
+debian_8_backports_ss <- function(datadisks=numeric(0),
+                                  nsg=nsg_config(list(nsg_rule_allow_ssh)),
                                   load_balancer=lb_config(rules=list(lb_rule_ssh),
                                                           probes=list(lb_probe_ssh)),
                                   ...)
 {
     vmss_config(image_config("Credativ", "Debian", "8-backports"),
-                nsg=nsg, load_balancer=load_balancer, ...)
+                datadisks=datadisks, nsg=nsg, load_balancer=load_balancer, ...)
 }
 
 #' @rdname vmss_config
 #' @export
-debian_9_backports_ss <- function(nsg=nsg_config(list(nsg_rule_allow_ssh)),
+debian_9_backports_ss <- function(datadisks=numeric(0),
+                                  nsg=nsg_config(list(nsg_rule_allow_ssh)),
                                   load_balancer=lb_config(rules=list(lb_rule_ssh),
                                                           probes=list(lb_probe_ssh)),
                                   ...)
 {
     vmss_config(image_config("Credativ", "Debian", "9-backports"),
-                nsg=nsg, load_balancer=load_balancer, ...)
+                datadisks=datadisks, nsg=nsg, load_balancer=load_balancer, ...)
 }
 
 
@@ -365,12 +390,14 @@ debian_9_backports_ss <- function(nsg=nsg_config(list(nsg_rule_allow_ssh)),
 #' @param large_scaleset Whether to enable scaleset sizes > 100 instances.
 #' @param overprovision Whether to overprovision the scaleset on creation.
 #' @param upgrade_policy A list, giving the VM upgrade policy for the scaleset.
+#' @param os_disk_type The type of primary disk for the VM. Change this to "StandardSSD_LRS" or "Standard_LRS" if the VM size doesn't support premium storage.
 #'
 #' @export
 scaleset_options <- function(keylogin=TRUE, managed=TRUE, public=FALSE,
                              low_priority=FALSE, delete_on_evict=FALSE,
                              network_accel=FALSE, large_scaleset=FALSE,
-                             overprovision=TRUE, upgrade_policy=list(mode="manual"))
+                             overprovision=TRUE, upgrade_policy=list(mode="manual"),
+                             os_disk_type=c("Premium_LRS", "StandardSSD_LRS", "Standard_LRS"))
 {
     params <- list(
         priority=if(low_priority) "low" else "regular",
@@ -381,7 +408,8 @@ scaleset_options <- function(keylogin=TRUE, managed=TRUE, public=FALSE,
         upgradePolicy=upgrade_policy
     )
 
-    out <- list(keylogin=keylogin, managed=managed, public=public, params=params)
+    os_disk_type <- match.arg(os_disk_type)
+    out <- list(keylogin=keylogin, managed=managed, public=public, os_disk_type=os_disk_type, params=params)
     structure(out, class="scaleset_options")
 }
 
diff --git a/R/vmss_template_builders.R b/R/vmss_template_builders.R
index 518c4db..401466a 100644
--- a/R/vmss_template_builders.R
+++ b/R/vmss_template_builders.R
@@ -16,6 +16,9 @@ add_template_parameters.vmss_config <- function(config, ...)
         add_param(imagePublisher="string", imageOffer="string", imageSku="string", imageVersion="string")
     else add_param(imageId="string")
 
+    if(length(config$datadisks) > 0)
+        add_param(dataDisks="array", dataDiskResources="array")
+
     params
 }
 
@@ -48,6 +51,19 @@ add_template_resources.vmss_config <- function(config, ...)
     # fixup VM properties
     vm <- vmss$properties$virtualMachineProfile
 
+    n_disks <- length(config$datadisks)
+    n_disk_resources <- if(n_disks > 0)
+        sum(sapply(config$datadisks, function(x) !is.null(x$res_spec)))
+    else 0
+
+    if(n_disks > 0)
+    {
+        vm_datadisk[[1]]$input$managedDisk$id <- NULL
+        vm$storageProfile$copy <- vm_datadisk
+        if(n_disk_resources > 0)
+            vmss$dependsOn <- c(vmss$dependsOn, "managedDiskResources")
+    }
+
     vm$osProfile <- c(vm$osProfile,
         if(config$options$keylogin) vm_key_login else vm_pwd_login)
 
@@ -88,6 +104,8 @@ add_template_resources.vmss_config <- function(config, ...)
                                 )
     }
 
+    vm$storageProfile$osDisk$managedDisk$storageAccountType <- config$options$os_disk_type
+
     vmss$properties$virtualMachineProfile <- vm
     if(!is_empty(config$vmss_fields))
         vmss <- utils::modifyList(vmss, config$vmss_fields)
@@ -119,8 +137,16 @@ add_template_resources.vmss_config <- function(config, ...)
         vmss_depends <- c(vmss_depends, "[variables('lbRef')]")
     if(create["vnet"])
         vmss_depends <- c(vmss_depends, "[variables('vnetRef')]")
+
     vmss$dependsOn <- I(vmss_depends)
 
+    if(n_disk_resources > 0)
+    {
+        resources <- c(resources, list(disk_default))
+        if(n_disks > 0)
+            vmss$dependsOn <- c(vmss$dependsOn, "managedDiskResources")
+    }
+
     resources <- c(resources, list(vmss))
 
     if(!is_empty(config$other))
diff --git a/README.md b/README.md
index 88e51f3..24f8954 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 ![Downloads](https://cranlogs.r-pkg.org/badges/AzureVM)
 <a href="https://asiadatascience.visualstudio.com/AzureR/_build/latest?definitionId=7&branchName=master"><img src="https://asiadatascience.visualstudio.com/AzureR/_apis/build/status/Azure.AzureVM?branchName=master" alt="Build Status"></a>
 
-AzureVM is a package for interacting with [virtual machines](https://azure.microsoft.com/services/virtual-machines/) and [virtual machine scalesets](https://azure.microsoft.com/services/virtual-machine-scale-sets/) in Azure. You can deploy, start up, shut down, run scripts, deallocate and delete VMs and scalesets from the R command line. It uses the tools provided by the [AzureRMR package](https://github.com/Azure/AzureRMR) to manage VM resources and templates. Version 2.0 of AzureVM is a complete rewrite of the package, aiming to make it a truly generic and flexible interface to VMs.
+AzureVM is a package for interacting with [virtual machines](https://azure.microsoft.com/services/virtual-machines/) and [virtual machine scalesets](https://azure.microsoft.com/services/virtual-machine-scale-sets/) in Azure. You can deploy, start up, shut down, run scripts, deallocate and delete VMs and scalesets from the R command line. It uses the tools provided by the [AzureRMR package](https://github.com/Azure/AzureRMR) to manage VM resources and templates.
 
 The primary repo for this package is at https://github.com/Azure/AzureVM; please submit issues and PRs there. It is also mirrored at the Cloudyr org at https://github.com/cloudyr/AzureVM. You can install the development version of the package with `devtools::install_github("Azure/AzureVM")`.
 
@@ -152,7 +152,7 @@ sub$create_vm_scaleset("mylargess", user_config("myname", "~/.ssh/id_rsa.pub"),
                        location="australiaeast")
 ```
 
-Working with scaleset instances can be tedious if you have a large scaleset, since R can only connect to one instance at a time. To solve this problem, AzureVM creates a pool of background processes that connect in parallel with the scaleset, leading to significant speedups. The pool is created automatically the first time it is needed, and is deleted at the end of the session.
+Working with scaleset instances can be tedious if you have a large scaleset, since R can only connect to one instance at a time. To solve this problem, AzureVM can leverage the process pool functionality provided by AzureRMR to connect in parallel with the scaleset, leading to significant speedups. The pool is created automatically the first time it is needed, and is deleted at the end of the session.
 
 ```r
 # this will create a pool of up to 10 processes that talk to the scaleset
diff --git a/man/az_vmss_resource.Rd b/man/az_vmss_resource.Rd
index 83565b1..6dbf03c 100644
--- a/man/az_vmss_resource.Rd
+++ b/man/az_vmss_resource.Rd
@@ -51,13 +51,13 @@ The following methods are available, in addition to those provided by the \link[
 
 \section{Instance operations}{
 
-AzureVM has the ability to parallelise scaleset instance operations using a pool of background processes. This can lead to significant speedups when working with scalesets with high instance counts. The pool is created automatically the first time that it is required, and remains persistent for the session. For more information, see \link{init_pool}.
+AzureVM has the ability to parallelise scaleset instance operations using a background process pool provided by AzureRMR. This can lead to significant speedups when working with scalesets with high instance counts. The pool is created automatically the first time that it is required, and remains persistent for the session. You can control the size of the process pool with the \code{azure_vm_minpoolsize} and \code{azure_vm_maxpoolsize} options, which have default values 2 and 10 respectively.
 
 The \code{id} argument lets you specify a subset of instances on which to carry out an operation. This can be a character vector of instance IDs; a list of instance objects such as returned by \code{list_instances}; or a single instance object. The default (NULL) is to carry out the operation on all instances.
 }
 
 \seealso{
-\link[AzureRMR:az_resource]{AzureRMR::az_resource}, \link{get_vm_scaleset_resource}, \link{az_vmss_template}, \link{init_pool}
+\link[AzureRMR:az_resource]{AzureRMR::az_resource}, \link{get_vm_scaleset_resource}, \link{az_vmss_template}, \link[AzureRMR:init_pool]{AzureRMR::init_pool}
 
 \href{https://docs.microsoft.com/en-us/rest/api/compute/virtualmachinescalesets}{VM scaleset API reference}
 }
diff --git a/man/az_vmss_template.Rd b/man/az_vmss_template.Rd
index 7b5c3ad..06ed777 100644
--- a/man/az_vmss_template.Rd
+++ b/man/az_vmss_template.Rd
@@ -65,8 +65,7 @@ Many of these methods are actually provided by the \link{az_vmss_resource} class
 
 \section{Instance operations}{
 
-
-AzureVM has the ability to parallelise scaleset instance operations using a pool of background processes. This can lead to significant speedups when working with scalesets with high instance counts. The pool is created automatically the first time that it is required, and remains persistent for the session. For more information, see \link{init_pool}.
+AzureVM has the ability to parallelise scaleset instance operations using a background process pool provided by AzureRMR. This can lead to significant speedups when working with scalesets with high instance counts. The pool is created automatically the first time that it is required, and remains persistent for the session. You can control the size of the process pool with the \code{azure_vm_minpoolsize} and \code{azure_vm_maxpoolsize} options, which have default values 2 and 10 respectively.
 
 The \code{id} argument lets you specify a subset of instances on which to carry out an operation. This can be a character vector of instance IDs; a list of instance objects such as returned by \code{list_instances}; or a single instance object. The default (NULL) is to carry out the operation on all instances.
 }
@@ -98,7 +97,7 @@ vmss$sync_vmss_status()
 }
 }
 \seealso{
-\link[AzureRMR:az_template]{AzureRMR::az_template}, \link{create_vm_scaleset}, \link{get_vm_scaleset}, \link{delete_vm_scaleset}, \link{init_pool}
+\link[AzureRMR:az_template]{AzureRMR::az_template}, \link{create_vm_scaleset}, \link{get_vm_scaleset}, \link{delete_vm_scaleset}, \link[AzureRMR:init_pool]{AzureRMR::init_pool}
 
 \href{https://docs.microsoft.com/en-us/rest/api/compute/virtualmachinescalesets}{VM scaleset API reference}
 }
diff --git a/man/pool.Rd b/man/pool.Rd
deleted file mode 100644
index 5488307..0000000
--- a/man/pool.Rd
+++ /dev/null
@@ -1,33 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/pool.R
-\name{init_pool}
-\alias{init_pool}
-\alias{azure_vm_minpoolsize}
-\alias{azure_vm_maxpoolsize}
-\alias{delete_pool}
-\title{Parallelise operations on VM scaleset instances}
-\usage{
-init_pool(connections, restart = FALSE, ...)
-
-delete_pool()
-}
-\arguments{
-\item{connections}{The number of concurrent connections to support, which translates into the number of background R processes to create. Each connection requires a separate R process, so limit this is you are low on memory.}
-
-\item{restart}{For \code{init_pool}, whether to terminate an already running pool first.}
-
-\item{...}{Other arguments passed on to \code{parallel::makeCluster}.}
-}
-\description{
-Parallelise operations on VM scaleset instances
-}
-\details{
-AzureVM can parallelise operations on scaleset instances by utilizing a pool of R processes in the background. This can lead to significant speedups when working with scalesets with high instance counts. The pool is created automatically the first time that it is required, or it can be (re)created by calling \code{init_pool} manually. It remains persistent for the session or until terminated by \code{delete_pool}.
-
-If \code{init_pool} is called and the current pool is smaller than \code{connections}, it is resized. The size of the pool can be controlled by the global options \code{azure_vm_minpoolsize} and \code{azure_vm_maxpoolsize}, which have default values of 2 and 10 respectively. To disable parallel operations, set \code{options(azure_vm_maxpoolsize=0)}.
-
-Note that the pool size is unrelated to the \emph{scaleset} size, it only controls how many instances can communicate simultaneously with AzureVM.
-}
-\seealso{
-\link{az_vmss_template}, \link[parallel:makeCluster]{parallel::makeCluster}
-}
diff --git a/man/scaleset_options.Rd b/man/scaleset_options.Rd
index df1d919..4cd1d10 100644
--- a/man/scaleset_options.Rd
+++ b/man/scaleset_options.Rd
@@ -7,7 +7,8 @@
 scaleset_options(keylogin = TRUE, managed = TRUE, public = FALSE,
   low_priority = FALSE, delete_on_evict = FALSE,
   network_accel = FALSE, large_scaleset = FALSE,
-  overprovision = TRUE, upgrade_policy = list(mode = "manual"))
+  overprovision = TRUE, upgrade_policy = list(mode = "manual"),
+  os_disk_type = c("Premium_LRS", "StandardSSD_LRS", "Standard_LRS"))
 }
 \arguments{
 \item{keylogin}{Whether to use an SSH public key to login (TRUE) or a password (FALSE). Note that Windows does not support SSH key logins.}
@@ -27,6 +28,8 @@ scaleset_options(keylogin = TRUE, managed = TRUE, public = FALSE,
 \item{overprovision}{Whether to overprovision the scaleset on creation.}
 
 \item{upgrade_policy}{A list, giving the VM upgrade policy for the scaleset.}
+
+\item{os_disk_type}{The type of primary disk for the VM. Change this to "StandardSSD_LRS" or "Standard_LRS" if the VM size doesn't support premium storage.}
 }
 \description{
 Virtual machine scaleset options
diff --git a/man/vm_config.Rd b/man/vm_config.Rd
index fa6fb68..174a216 100644
--- a/man/vm_config.Rd
+++ b/man/vm_config.Rd
@@ -16,12 +16,14 @@
 \alias{debian_9_backports}
 \title{VM configuration functions}
 \usage{
-vm_config(image, keylogin, managed = TRUE, datadisks = numeric(0),
-  nsg = nsg_config(), ip = ip_config(), vnet = vnet_config(),
-  nic = nic_config(), other_resources = list(), variables = list(),
-  ...)
+vm_config(image, keylogin, managed = TRUE,
+  os_disk_type = c("Premium_LRS", "StandardSSD_LRS", "Standard_LRS"),
+  datadisks = numeric(0), nsg = nsg_config(), ip = ip_config(),
+  vnet = vnet_config(), nic = nic_config(), other_resources = list(),
+  variables = list(), ...)
 
 ubuntu_dsvm(keylogin = TRUE, managed = TRUE, datadisks = numeric(0),
+  dsvm_disk_type = c("Premium_LRS", "StandardSSD_LRS", "Standard_LRS"),
   nsg = nsg_config(list(nsg_rule_allow_ssh, nsg_rule_allow_jupyter,
   nsg_rule_allow_rstudio)), ...)
 
@@ -70,6 +72,8 @@ debian_9_backports(keylogin = TRUE, managed = TRUE,
 
 \item{managed}{Whether to provide a managed system identity for the VM.}
 
+\item{os_disk_type}{The type of primary disk for the VM. Can be "Premium_LRS" (the default), "StandardSSD_LRS", or "Standard_LRS". Of these, "Standard_LRS" uses hard disks and the others use SSDs as the underlying hardware. Change this to "StandardSSD_LRS" or "Standard_LRS" if the VM size doesn't support premium storage.}
+
 \item{datadisks}{The data disks to attach to the VM. Specify this as either a vector of numeric disk sizes in GB, or a list of \code{datadisk_config} objects for more control over the specification.}
 
 \item{nsg}{The network security group for the VM. Can be a call to \code{nsg_config} to create a new NSG; an AzureRMR resource object or resource ID to reuse an existing NSG; or NULL to not use an NSG (not recommended).}
@@ -85,6 +89,8 @@ debian_9_backports(keylogin = TRUE, managed = TRUE,
 \item{variables}{An optional named list of variables to add to the template.}
 
 \item{...}{For the specific VM configurations, other customisation arguments to be passed to \code{vm_config}. For \code{vm_config}, named arguments that will be folded into the VM resource definition in the template.}
+
+\item{dsvm_disk_type}{The Ubuntu DSVM image comes with one additional datadisk that holds some installed tools. This argument sets what type of disk is used. Change this to "StandardSSD_LRS" or "Standard_LRS" if the VM size doesn't support premium storage.}
 }
 \value{
 An object of S3 class \code{vm_config}, that can be used by the \code{create_vm} method.
diff --git a/man/vm_resource_config.Rd b/man/vm_resource_config.Rd
index 9969aa2..e968418 100644
--- a/man/vm_resource_config.Rd
+++ b/man/vm_resource_config.Rd
@@ -9,7 +9,8 @@
 user_config(username, sshkey = NULL, password = NULL)
 
 datadisk_config(size, name = "datadisk", create = "empty",
-  type = "StandardSSD_LRS", write_accelerator = FALSE)
+  type = c("StandardSSD_LRS", "Premium_LRS", "Standard_LRS",
+  "UltraSSD_LRS"), write_accelerator = FALSE)
 
 image_config(publisher = NULL, offer = NULL, sku = NULL,
   version = "latest", id = NULL)
diff --git a/man/vmss_config.Rd b/man/vmss_config.Rd
index ce096c8..b4e6830 100644
--- a/man/vmss_config.Rd
+++ b/man/vmss_config.Rd
@@ -16,59 +16,71 @@
 \alias{debian_9_backports_ss}
 \title{Virtual machine scaleset configuration functions}
 \usage{
-vmss_config(image, options = scaleset_options(), nsg = nsg_config(),
-  vnet = vnet_config(), load_balancer = lb_config(),
-  load_balancer_address = ip_config(),
+vmss_config(image, options = scaleset_options(),
+  datadisks = numeric(0), nsg = nsg_config(), vnet = vnet_config(),
+  load_balancer = lb_config(), load_balancer_address = ip_config(),
   autoscaler = autoscaler_config(), other_resources = list(),
   variables = list(), ...)
 
-ubuntu_dsvm_ss(nsg = nsg_config(list(nsg_rule_allow_ssh,
-  nsg_rule_allow_jupyter, nsg_rule_allow_rstudio)),
-  load_balancer = lb_config(rules = list(lb_rule_ssh, lb_rule_jupyter,
-  lb_rule_rstudio), probes = list(lb_probe_ssh, lb_probe_jupyter,
-  lb_probe_rstudio)), ...)
+ubuntu_dsvm_ss(datadisks = numeric(0),
+  dsvm_disk_type = c("Premium_LRS", "StandardSSD_LRS", "Standard_LRS"),
+  nsg = nsg_config(list(nsg_rule_allow_ssh, nsg_rule_allow_jupyter,
+  nsg_rule_allow_rstudio)), load_balancer = lb_config(rules =
+  list(lb_rule_ssh, lb_rule_jupyter, lb_rule_rstudio), probes =
+  list(lb_probe_ssh, lb_probe_jupyter, lb_probe_rstudio)), ...)
 
-windows_dsvm_ss(nsg = nsg_config(list(nsg_rule_allow_rdp)),
+windows_dsvm_ss(datadisks = numeric(0),
+  nsg = nsg_config(list(nsg_rule_allow_rdp)),
   load_balancer = lb_config(rules = list(lb_rule_rdp), probes =
   list(lb_probe_rdp)), options = scaleset_options(keylogin = FALSE), ...)
 
-ubuntu_16.04_ss(nsg = nsg_config(list(nsg_rule_allow_ssh)),
+ubuntu_16.04_ss(datadisks = numeric(0),
+  nsg = nsg_config(list(nsg_rule_allow_ssh)),
   load_balancer = lb_config(rules = list(lb_rule_ssh), probes =
   list(lb_probe_ssh)), ...)
 
-ubuntu_18.04_ss(nsg = nsg_config(list(nsg_rule_allow_ssh)),
+ubuntu_18.04_ss(datadisks = numeric(0),
+  nsg = nsg_config(list(nsg_rule_allow_ssh)),
   load_balancer = lb_config(rules = list(lb_rule_ssh), probes =
   list(lb_probe_ssh)), ...)
 
-windows_2016_ss(nsg = nsg_config(list(nsg_rule_allow_rdp)),
+windows_2016_ss(datadisks = numeric(0),
+  nsg = nsg_config(list(nsg_rule_allow_rdp)),
   load_balancer = lb_config(rules = list(lb_rule_rdp), probes =
   list(lb_probe_rdp)), options = scaleset_options(keylogin = FALSE), ...)
 
-windows_2019_ss(nsg = nsg_config(list(nsg_rule_allow_rdp)),
+windows_2019_ss(datadisks = numeric(0),
+  nsg = nsg_config(list(nsg_rule_allow_rdp)),
   load_balancer = lb_config(rules = list(lb_rule_rdp), probes =
   list(lb_probe_rdp)), options = scaleset_options(keylogin = FALSE), ...)
 
-rhel_7.6_ss(nsg = nsg_config(list(nsg_rule_allow_ssh)),
+rhel_7.6_ss(datadisks = numeric(0),
+  nsg = nsg_config(list(nsg_rule_allow_ssh)),
   load_balancer = lb_config(rules = list(lb_rule_ssh), probes =
   list(lb_probe_ssh)), ...)
 
-rhel_8_ss(nsg = nsg_config(list(nsg_rule_allow_ssh)),
+rhel_8_ss(datadisks = numeric(0),
+  nsg = nsg_config(list(nsg_rule_allow_ssh)),
   load_balancer = lb_config(rules = list(lb_rule_ssh), probes =
   list(lb_probe_ssh)), ...)
 
-centos_7.5_ss(nsg = nsg_config(list(nsg_rule_allow_ssh)),
+centos_7.5_ss(datadisks = numeric(0),
+  nsg = nsg_config(list(nsg_rule_allow_ssh)),
   load_balancer = lb_config(rules = list(lb_rule_ssh), probes =
   list(lb_probe_ssh)), ...)
 
-centos_7.6_ss(nsg = nsg_config(list(nsg_rule_allow_ssh)),
+centos_7.6_ss(datadisks = numeric(0),
+  nsg = nsg_config(list(nsg_rule_allow_ssh)),
   load_balancer = lb_config(rules = list(lb_rule_ssh), probes =
   list(lb_probe_ssh)), ...)
 
-debian_8_backports_ss(nsg = nsg_config(list(nsg_rule_allow_ssh)),
+debian_8_backports_ss(datadisks = numeric(0),
+  nsg = nsg_config(list(nsg_rule_allow_ssh)),
   load_balancer = lb_config(rules = list(lb_rule_ssh), probes =
   list(lb_probe_ssh)), ...)
 
-debian_9_backports_ss(nsg = nsg_config(list(nsg_rule_allow_ssh)),
+debian_9_backports_ss(datadisks = numeric(0),
+  nsg = nsg_config(list(nsg_rule_allow_ssh)),
   load_balancer = lb_config(rules = list(lb_rule_ssh), probes =
   list(lb_probe_ssh)), ...)
 }
@@ -77,6 +89,8 @@ debian_9_backports_ss(nsg = nsg_config(list(nsg_rule_allow_ssh)),
 
 \item{options}{Scaleset options, as obtained via a call to \code{scaleset_options}.}
 
+\item{datadisks}{The data disks to attach to the VM. Specify this as either a vector of numeric disk sizes in GB, or a list of \code{datadisk_config} objects for more control over the specification.}
+
 \item{nsg}{The network security group for the scaleset. Can be a call to \code{nsg_config} to create a new NSG; an AzureRMR resource object or resource ID to reuse an existing NSG; or NULL to not use an NSG (not recommended).}
 
 \item{vnet}{The virtual network for the scaleset. Can be a call to \code{vnet_config} to create a new virtual network, or an AzureRMR resource object or resource ID to reuse an existing virtual network. Note that by default, AzureVM will associate the NSG with the virtual network/subnet, not with the VM's network interface.}
@@ -92,6 +106,8 @@ debian_9_backports_ss(nsg = nsg_config(list(nsg_rule_allow_ssh)),
 \item{variables}{An optional named list of variables to add to the template.}
 
 \item{...}{For the specific VM configurations, other customisation arguments to be passed to \code{vm_config}. For \code{vmss_config}, named arguments that will be folded into the scaleset resource definition in the template.}
+
+\item{dsvm_disk_type}{The Ubuntu DSVM image comes with one additional datadisk that holds some installed tools. This argument sets what type of disk is used. Change this to "StandardSSD_LRS" or "Standard_LRS" if the VM size doesn't support premium storage.}
 }
 \value{
 An object of S3 class \code{vmss_config}, that can be used by the \code{create_vm_scaleset} method.
diff --git a/tests/testthat/test14_par.R b/tests/testthat/test14_par.R
index 7668297..9149144 100644
--- a/tests/testthat/test14_par.R
+++ b/tests/testthat/test14_par.R
@@ -32,19 +32,19 @@ test_that("Scaleset connection pool works",
     expect_length(inst, 5)
 
     expect_message(vm$run_script("ls /tmp", id=names(inst)[1:2]), "Creating background pool")
-    expect_true(exists("pool", AzureVM:::.AzureVM) && length(AzureVM:::.AzureVM$pool) == 2)
+    expect_identical(pool_size(), 2L)
 
     expect_silent(vm$get_vm_private_ip_addresses(names(inst[1:2])))
     expect_silent(vm$get_vm_private_ip_addresses(inst[1:2]))
 
     expect_message(vm$get_vm_private_ip_addresses(), "Creating background pool")
-    expect_true(exists("pool", AzureVM:::.AzureVM) && length(AzureVM:::.AzureVM$pool) == 5)
+    expect_identical(pool_size(), 5L)
 
     expect_silent(vm$get_vm_private_ip_addresses(inst))
     expect_silent(vm$get_vm_private_ip_addresses(inst[[1]]))
 
     delete_pool()
-    expect_false(exists("pool", AzureVM:::.AzureVM))
+    expect_false(pool_exists())
 })
 
 rg$delete(confirm=FALSE)
diff --git a/tests/testthat/test15_disktype.R b/tests/testthat/test15_disktype.R
new file mode 100644
index 0000000..178d6b0
--- /dev/null
+++ b/tests/testthat/test15_disktype.R
@@ -0,0 +1,53 @@
+context("Disk types")
+
+tenant <- Sys.getenv("AZ_TEST_TENANT_ID")
+app <- Sys.getenv("AZ_TEST_APP_ID")
+password <- Sys.getenv("AZ_TEST_PASSWORD")
+subscription <- Sys.getenv("AZ_TEST_SUBSCRIPTION")
+
+if(tenant == "" || app == "" || password == "" || subscription == "")
+    skip("Tests skipped: ARM credentials not set")
+
+rgname <- paste0("vm", paste0(sample(letters, 10, TRUE), collapse=""))
+location <- "australiaeast"
+user <- user_config("username", "../resources/testkey.pub")
+size <- "Standard_D1_v2"
+
+rg <- AzureRMR::az_rm$
+    new(tenant=tenant, app=app, password=password)$
+    get_subscription(subscription)$
+    create_resource_group(rgname, location)
+
+test_that("OS disk type works",
+{
+    vmname <- paste0(sample(letters, 10, TRUE), collapse="")
+    vm <- rg$create_vm(vmname, user, size, config="ubuntu_18.04",
+        os_disk_type="StandardSSD_LRS")
+    expect_is(vm, "az_vm_template")
+
+    vmssname <- paste0(sample(letters, 10, TRUE), collapse="")
+    vmss <- rg$create_vm_scaleset(vmssname, user, instances=2, size=size, config="ubuntu_18.04_ss",
+        options=scaleset_options(os_disk_type="StandardSSD_LRS"),
+        nsg=NULL, autoscaler=NULL, load_balancer=NULL)
+    expect_is(vmss, "az_vmss_template")
+})
+
+test_that("Data disk type works",
+{
+    vmname <- paste0(sample(letters, 10, TRUE), collapse="")
+    vm <- rg$create_vm(vmname, user, size, config="ubuntu_dsvm",
+        os_disk_type="StandardSSD_LRS", dsvm_disk_type="Standard_LRS",
+        datadisks=list(datadisk_config(400, type="Standard_LRS")))
+    expect_is(vm, "az_vm_template")
+
+    vmssname <- paste0(sample(letters, 10, TRUE), collapse="")
+    vmss <- rg$create_vm_scaleset(vmssname, user, instances=2, size=size, config="ubuntu_dsvm_ss",
+        options=scaleset_options(os_disk_type="StandardSSD_LRS"),
+        dsvm_disk_type="Standard_LRS",
+        datadisks=list(datadisk_config(400, type="Standard_LRS")),
+        nsg=NULL, autoscaler=NULL, load_balancer=NULL)
+    expect_is(vmss, "az_vmss_template")
+})
+
+rg$delete(confirm=FALSE)
+
diff --git a/vignettes/intro.rmd b/vignettes/intro.rmd
index 200a3c4..cc83de6 100644
--- a/vignettes/intro.rmd
+++ b/vignettes/intro.rmd
@@ -129,7 +129,7 @@ Each predefined VM configuration has a corresponding scaleset configuration. To
 ```r
 # Windows Server 2019
 sub$create_vm_scaleset("mywinss", user_config("myname", password="Use-strong-passwords!"), instances=5,
-                       config="windows_2019",
+                       config="windows_2019_ss",
                        location="australiaeast")
 
 # RHEL scaleset, serving HTTP/HTTPS
@@ -153,7 +153,7 @@ sub$create_vm_scaleset("mylargess", user_config("myname", "~/.ssh/id_rsa.pub"),
                        location="australiaeast")
 ```
 
-Working with scaleset instances can be tedious if you have a large scaleset, since R can only connect to one instance at a time. To solve this problem, AzureVM creates a pool of background processes that connect in parallel with the scaleset, leading to significant speedups. The pool is created automatically the first time it is needed, and is deleted at the end of the session.
+Working with scaleset instances can be tedious if you have a large scaleset, since R can only connect to one instance at a time. To solve this problem, AzureVM can leverage the process pool functionality supplied by AzureRMR to connect in parallel with the scaleset, leading to significant speedups. The pool is created automatically the first time it is needed, and is deleted at the end of the session.
 
 ```r
 # this will create a pool of up to 10 processes that talk to the scaleset