Update all recipes to use YAML (#122)
This commit is contained in:
Родитель
a45f3b9a66
Коммит
2c8f6b299a
|
@ -8,13 +8,13 @@ this recipe.
|
|||
|
||||
### Pool Configuration
|
||||
The pool configuration should enable the following properties:
|
||||
* `vm_size` must be either `STANDARD_A8`, `STANDARD_A9`, `STANDARD_H16R`,
|
||||
`STANDARD_H16MR`
|
||||
* `vm_size` should be a CPU-only RDMA-enabled instance:
|
||||
`STANDARD_A8`, `STANDARD_A9`, `STANDARD_H16R`, `STANDARD_H16MR`
|
||||
* `inter_node_communication_enabled` must be set to `true`
|
||||
* `max_tasks_per_node` must be set to 1 or omitted
|
||||
* `publisher` should be `OpenLogic` or `SUSE`.
|
||||
* `offer` should be `CentOS-HPC` for `OpenLogic` or `SLES-HPC` for `SUSE`.
|
||||
* `sku` should be `7.3` for `CentOS-HPC` or `12-SP1` for `SLES-HPC`.
|
||||
* `publisher` should be `OpenLogic` or `SUSE`
|
||||
* `offer` should be `CentOS-HPC` for `OpenLogic` or `SLES-HPC` for `SUSE`
|
||||
* `sku` should be `7.3` for `CentOS-HPC` or `12-SP1` for `SLES-HPC`
|
||||
|
||||
### Global Configuration
|
||||
The global configuration should set the following properties:
|
||||
|
@ -46,12 +46,15 @@ application `command` to run would be:
|
|||
* `-w` for the working directory (not required for this example to run)
|
||||
* `--` parameters specified after this are given verbatim to the
|
||||
Python script
|
||||
* `infiniband` must be set to `true`
|
||||
* `infiniband` can be set to `true`, however, it is implicitly enabled by
|
||||
Batch Shipyard when executing on a RDMA-enabled compute pool.
|
||||
* `multi_instance` property must be defined
|
||||
* `num_instances` should be set to `pool_specification_vm_count_dedicated`,
|
||||
`pool_specification_vm_count_low_priority`, `pool_current_dedicated`, or
|
||||
`pool_current_low_priority`
|
||||
* `coordination_command` should be unset or `null`
|
||||
* `coordination_command` should be unset or `null`. For pools with
|
||||
`native` container support, this command should be supplied if
|
||||
a non-standard `sshd` is required.
|
||||
* `resource_files` should be unset or the array can be empty
|
||||
|
||||
## Dockerfile and supplementary files
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
{
|
||||
"batch_shipyard": {
|
||||
"storage_account_settings": "mystorageaccount"
|
||||
},
|
||||
"global_resources": {
|
||||
"docker_images": [
|
||||
"alfpark/cntk:2.1-cpu-1bitsgd-py36-intelmpi-refdata"
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
batch_shipyard:
|
||||
storage_account_settings: mystorageaccount
|
||||
global_resources:
|
||||
docker_images:
|
||||
- alfpark/cntk:2.1-cpu-1bitsgd-py36-intelmpi-refdata
|
|
@ -1,15 +0,0 @@
|
|||
{
|
||||
"credentials": {
|
||||
"batch": {
|
||||
"account_key": "<batch account key>",
|
||||
"account_service_url": "<batch account service url>"
|
||||
},
|
||||
"storage": {
|
||||
"mystorageaccount": {
|
||||
"account": "<storage account name>",
|
||||
"account_key": "<storage account key>",
|
||||
"endpoint": "core.windows.net"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
credentials:
|
||||
batch:
|
||||
account_key: <batch account key>
|
||||
account_service_url: <batch account service url>
|
||||
storage:
|
||||
mystorageaccount:
|
||||
account: <storage account name>
|
||||
account_key: <storage account key>
|
||||
endpoint: core.windows.net
|
|
@ -1,18 +0,0 @@
|
|||
{
|
||||
"job_specifications": [
|
||||
{
|
||||
"id": "cntkjob",
|
||||
"auto_complete": true,
|
||||
"tasks": [
|
||||
{
|
||||
"image": "alfpark/cntk:2.1-cpu-1bitsgd-py36-intelmpi-refdata",
|
||||
"command": "/cntk/run_cntk.sh -s /cntk/Examples/Image/Classification/ConvNet/Python/ConvNet_CIFAR10_DataAug_Distributed.py -- -q 1 --datadir /cntk/Examples/Image/DataSets/CIFAR-10 --outputdir $AZ_BATCH_TASK_WORKING_DIR/output",
|
||||
"infiniband": true,
|
||||
"multi_instance": {
|
||||
"num_instances": "pool_current_dedicated"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
job_specifications:
|
||||
- id: cntkjob
|
||||
auto_complete: true
|
||||
tasks:
|
||||
- image: alfpark/cntk:2.1-cpu-1bitsgd-py36-intelmpi-refdata
|
||||
multi_instance:
|
||||
num_instances: pool_current_dedicated
|
||||
command: /cntk/run_cntk.sh -s /cntk/Examples/Image/Classification/ConvNet/Python/ConvNet_CIFAR10_DataAug_Distributed.py -- -q 1 --datadir /cntk/Examples/Image/DataSets/CIFAR-10 --outputdir $AZ_BATCH_TASK_WORKING_DIR/output
|
|
@ -1,22 +0,0 @@
|
|||
{
|
||||
"pool_specification": {
|
||||
"id": "docker-cntk-cpu-rdma",
|
||||
"vm_configuration": {
|
||||
"platform_image": {
|
||||
"publisher": "OpenLogic",
|
||||
"offer": "CentOS-HPC",
|
||||
"sku": "7.3"
|
||||
}
|
||||
},
|
||||
"vm_size": "STANDARD_H16R",
|
||||
"vm_count": {
|
||||
"dedicated": 2
|
||||
},
|
||||
"inter_node_communication_enabled": true,
|
||||
"ssh": {
|
||||
"username": "docker"
|
||||
},
|
||||
"reboot_on_start_task_failed": false,
|
||||
"block_until_all_global_resources_loaded": true
|
||||
}
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
pool_specification:
|
||||
id: docker-cntk-cpu-rdma
|
||||
vm_configuration:
|
||||
platform_image:
|
||||
offer: CentOS-HPC
|
||||
publisher: OpenLogic
|
||||
sku: '7.3'
|
||||
vm_count:
|
||||
dedicated: 2
|
||||
low_priority: 0
|
||||
vm_size: STANDARD_H16R
|
||||
inter_node_communication_enabled: true
|
||||
ssh:
|
||||
username: docker
|
|
@ -55,7 +55,9 @@ application `command` to run would be:
|
|||
* `num_instances` should be set to `pool_specification_vm_count_dedicated`,
|
||||
`pool_specification_vm_count_low_priority`, `pool_current_dedicated`, or
|
||||
`pool_current_low_priority`
|
||||
* `coordination_command` should be unset or `null`
|
||||
* `coordination_command` should be unset or `null`. For pools with
|
||||
`native` container support, this command should be supplied if
|
||||
a non-standard `sshd` is required.
|
||||
* `resource_files` should be unset or the array can be empty
|
||||
|
||||
## Dockerfile and supplementary files
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
{
|
||||
"batch_shipyard": {
|
||||
"storage_account_settings": "mystorageaccount"
|
||||
},
|
||||
"global_resources": {
|
||||
"docker_images": [
|
||||
"alfpark/cntk:2.1-cpu-py35-refdata"
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
batch_shipyard:
|
||||
storage_account_settings: mystorageaccount
|
||||
global_resources:
|
||||
docker_images:
|
||||
- alfpark/cntk:2.1-cpu-py35-refdata
|
|
@ -1,15 +0,0 @@
|
|||
{
|
||||
"credentials": {
|
||||
"batch": {
|
||||
"account_key": "<batch account key>",
|
||||
"account_service_url": "<batch account service url>"
|
||||
},
|
||||
"storage": {
|
||||
"mystorageaccount": {
|
||||
"account": "<storage account name>",
|
||||
"account_key": "<storage account key>",
|
||||
"endpoint": "core.windows.net"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
credentials:
|
||||
batch:
|
||||
account_key: <batch account key>
|
||||
account_service_url: <batch account service url>
|
||||
storage:
|
||||
mystorageaccount:
|
||||
account: <storage account name>
|
||||
account_key: <storage account key>
|
||||
endpoint: core.windows.net
|
|
@ -1,17 +0,0 @@
|
|||
{
|
||||
"job_specifications": [
|
||||
{
|
||||
"id": "cntkjob",
|
||||
"auto_complete": true,
|
||||
"tasks": [
|
||||
{
|
||||
"image": "alfpark/cntk:2.1-cpu-py35-refdata",
|
||||
"command": "/cntk/run_cntk.sh -s /cntk/Examples/Image/Classification/ConvNet/Python/ConvNet_CIFAR10_DataAug_Distributed.py -- --datadir /cntk/Examples/Image/DataSets/CIFAR-10 --outputdir $AZ_BATCH_TASK_WORKING_DIR/output",
|
||||
"multi_instance": {
|
||||
"num_instances": "pool_current_dedicated"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
job_specifications:
|
||||
- auto_complete: true
|
||||
id: cntkjob
|
||||
tasks:
|
||||
- image: alfpark/cntk:2.1-cpu-py35-refdata
|
||||
multi_instance:
|
||||
num_instances: pool_current_dedicated
|
||||
command: /cntk/run_cntk.sh -s /cntk/Examples/Image/Classification/ConvNet/Python/ConvNet_CIFAR10_DataAug_Distributed.py -- --datadir /cntk/Examples/Image/DataSets/CIFAR-10 --outputdir $AZ_BATCH_TASK_WORKING_DIR/output
|
|
@ -1,22 +0,0 @@
|
|||
{
|
||||
"pool_specification": {
|
||||
"id": "cntk-cpu-multinode",
|
||||
"vm_configuration": {
|
||||
"platform_image": {
|
||||
"publisher": "Canonical",
|
||||
"offer": "UbuntuServer",
|
||||
"sku": "16.04-LTS"
|
||||
}
|
||||
},
|
||||
"vm_size": "STANDARD_D1_V2",
|
||||
"vm_count": {
|
||||
"dedicated": 3
|
||||
},
|
||||
"inter_node_communication_enabled": true,
|
||||
"ssh": {
|
||||
"username": "docker"
|
||||
},
|
||||
"reboot_on_start_task_failed": false,
|
||||
"block_until_all_global_resources_loaded": true
|
||||
}
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
pool_specification:
|
||||
id: cntk-cpu-multinode
|
||||
vm_configuration:
|
||||
platform_image:
|
||||
offer: UbuntuServer
|
||||
publisher: Canonical
|
||||
sku: 16.04-LTS
|
||||
vm_count:
|
||||
dedicated: 3
|
||||
low_priority: 0
|
||||
vm_size: STANDARD_D1_V2
|
||||
inter_node_communication_enabled: true
|
||||
ssh:
|
||||
username: docker
|
|
@ -1,10 +0,0 @@
|
|||
{
|
||||
"batch_shipyard": {
|
||||
"storage_account_settings": "mystorageaccount"
|
||||
},
|
||||
"global_resources": {
|
||||
"docker_images": [
|
||||
"microsoft/cntk:2.1-cpu-python3.5"
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
batch_shipyard:
|
||||
storage_account_settings: mystorageaccount
|
||||
global_resources:
|
||||
docker_images:
|
||||
- microsoft/cntk:2.1-cpu-python3.5
|
|
@ -1,15 +0,0 @@
|
|||
{
|
||||
"credentials": {
|
||||
"batch": {
|
||||
"account_key": "<batch account key>",
|
||||
"account_service_url": "<batch account service url>"
|
||||
},
|
||||
"storage": {
|
||||
"mystorageaccount": {
|
||||
"account": "<storage account name>",
|
||||
"account_key": "<storage account key>",
|
||||
"endpoint": "core.windows.net"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
credentials:
|
||||
batch:
|
||||
account_key: <batch account key>
|
||||
account_service_url: <batch account service url>
|
||||
storage:
|
||||
mystorageaccount:
|
||||
account: <storage account name>
|
||||
account_key: <storage account key>
|
||||
endpoint: core.windows.net
|
|
@ -1,13 +0,0 @@
|
|||
{
|
||||
"job_specifications": [
|
||||
{
|
||||
"id": "cntkjob",
|
||||
"tasks": [
|
||||
{
|
||||
"image": "microsoft/cntk:2.1-cpu-python3.5",
|
||||
"command": "/bin/bash -c \"source /cntk/activate-cntk && cd /cntk/Examples/Image/DataSets/MNIST && python -u install_mnist.py && cd /cntk/Examples/Image/Classification/ConvNet/Python && python -u ConvNet_MNIST.py\""
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
job_specifications:
|
||||
- id: cntkjob
|
||||
tasks:
|
||||
- image: microsoft/cntk:2.1-cpu-python3.5
|
||||
command: /bin/bash -c "source /cntk/activate-cntk && cd /cntk/Examples/Image/DataSets/MNIST && python -u install_mnist.py && cd /cntk/Examples/Image/Classification/ConvNet/Python && python -u ConvNet_MNIST.py"
|
|
@ -1,22 +0,0 @@
|
|||
{
|
||||
"pool_specification": {
|
||||
"id": "cntk-cpu-singlenode",
|
||||
"vm_configuration": {
|
||||
"platform_image": {
|
||||
"publisher": "Canonical",
|
||||
"offer": "UbuntuServer",
|
||||
"sku": "16.04-LTS"
|
||||
}
|
||||
},
|
||||
"vm_size": "STANDARD_D1_V2",
|
||||
"vm_count": {
|
||||
"dedicated": 1
|
||||
},
|
||||
"inter_node_communication_enabled": true,
|
||||
"ssh": {
|
||||
"username": "docker"
|
||||
},
|
||||
"reboot_on_start_task_failed": false,
|
||||
"block_until_all_global_resources_loaded": true
|
||||
}
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
pool_specification:
|
||||
id: cntk-cpu-singlenode
|
||||
vm_configuration:
|
||||
platform_image:
|
||||
offer: UbuntuServer
|
||||
publisher: Canonical
|
||||
sku: 16.04-LTS
|
||||
vm_count:
|
||||
dedicated: 1
|
||||
low_priority: 0
|
||||
vm_size: STANDARD_D1_V2
|
||||
inter_node_communication_enabled: true
|
||||
ssh:
|
||||
username: docker
|
|
@ -8,7 +8,7 @@ this recipe.
|
|||
|
||||
### Pool Configuration
|
||||
The pool configuration should enable the following properties:
|
||||
* `vm_size` must be `STANDARD_NC24R`
|
||||
* `vm_size` must be an RDMA-enabled GPU vm size, e.g., `STANDARD_NC24R`
|
||||
* `inter_node_communication_enabled` must be set to `true`
|
||||
* `max_tasks_per_node` must be set to 1 or omitted
|
||||
* `publisher` should be `OpenLogic`
|
||||
|
@ -46,14 +46,17 @@ Docker image. The application `command` to run would be:
|
|||
* `-w` for the working directory (not required for this example to run)
|
||||
* `--` parameters specified after this are given verbatim to the
|
||||
Python script
|
||||
* `infiniband` must be set to `true`
|
||||
* `gpu` must be set to `true`. This enables invoking the `nvidia-docker`
|
||||
wrapper.
|
||||
* `infiniband` can be set to `true`, however, it is implicitly enabled by
|
||||
Batch Shipyard when executing on a RDMA-enabled compute pool.
|
||||
* `gpu` can be set to `true`, however, it is implicitly enabled by Batch
|
||||
Shipyard when executing on a GPU-enabled compute pool.
|
||||
* `multi_instance` property must be defined
|
||||
* `num_instances` should be set to `pool_specification_vm_count_dedicated`,
|
||||
`pool_specification_vm_count_low_priority`, `pool_current_dedicated`, or
|
||||
`pool_current_low_priority`
|
||||
* `coordination_command` should be unset or `null`
|
||||
* `coordination_command` should be unset or `null`. For pools with
|
||||
`native` container support, this command should be supplied if
|
||||
a non-standard `sshd` is required.
|
||||
* `resource_files` should be unset or the array can be empty
|
||||
|
||||
## Dockerfile and supplementary files
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
{
|
||||
"batch_shipyard": {
|
||||
"storage_account_settings": "mystorageaccount"
|
||||
},
|
||||
"global_resources": {
|
||||
"docker_images": [
|
||||
"alfpark/cntk:2.1-gpu-1bitsgd-py36-cuda8-cudnn6-intelmpi-refdata"
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
batch_shipyard:
|
||||
storage_account_settings: mystorageaccount
|
||||
global_resources:
|
||||
docker_images:
|
||||
- alfpark/cntk:2.1-gpu-1bitsgd-py36-cuda8-cudnn6-intelmpi-refdata
|
|
@ -1,15 +0,0 @@
|
|||
{
|
||||
"credentials": {
|
||||
"batch": {
|
||||
"account_key": "<batch account key>",
|
||||
"account_service_url": "<batch account service url>"
|
||||
},
|
||||
"storage": {
|
||||
"mystorageaccount": {
|
||||
"account": "<storage account name>",
|
||||
"account_key": "<storage account key>",
|
||||
"endpoint": "core.windows.net"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
credentials:
|
||||
batch:
|
||||
account_key: <batch account key>
|
||||
account_service_url: <batch account service url>
|
||||
storage:
|
||||
mystorageaccount:
|
||||
account: <storage account name>
|
||||
account_key: <storage account key>
|
||||
endpoint: core.windows.net
|
|
@ -1,19 +0,0 @@
|
|||
{
|
||||
"job_specifications": [
|
||||
{
|
||||
"id": "cntkjob",
|
||||
"auto_complete": true,
|
||||
"tasks": [
|
||||
{
|
||||
"image": "alfpark/cntk:2.1-gpu-1bitsgd-py36-cuda8-cudnn6-intelmpi-refdata",
|
||||
"command": "/cntk/run_cntk.sh -s /cntk/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10_Distributed.py -- --network resnet20 -q 1 -a 0 --datadir /cntk/Examples/Image/DataSets/CIFAR-10 --outputdir $AZ_BATCH_TASK_WORKING_DIR/output",
|
||||
"infiniband": true,
|
||||
"gpu": true,
|
||||
"multi_instance": {
|
||||
"num_instances": "pool_current_dedicated"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
job_specifications:
|
||||
- id: cntkjob
|
||||
auto_complete: true
|
||||
tasks:
|
||||
- image: alfpark/cntk:2.1-gpu-1bitsgd-py36-cuda8-cudnn6-intelmpi-refdata
|
||||
multi_instance:
|
||||
num_instances: pool_current_dedicated
|
||||
command: /cntk/run_cntk.sh -s /cntk/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10_Distributed.py -- --network resnet20 -q 1 -a 0 --datadir /cntk/Examples/Image/DataSets/CIFAR-10 --outputdir $AZ_BATCH_TASK_WORKING_DIR/output
|
|
@ -1,22 +0,0 @@
|
|||
{
|
||||
"pool_specification": {
|
||||
"id": "docker-cntk-gpu-rdma",
|
||||
"vm_configuration": {
|
||||
"platform_image": {
|
||||
"publisher": "OpenLogic",
|
||||
"offer": "CentOS-HPC",
|
||||
"sku": "7.3"
|
||||
}
|
||||
},
|
||||
"vm_size": "STANDARD_NC24R",
|
||||
"vm_count": {
|
||||
"dedicated": 2
|
||||
},
|
||||
"inter_node_communication_enabled": true,
|
||||
"ssh": {
|
||||
"username": "docker"
|
||||
},
|
||||
"reboot_on_start_task_failed": false,
|
||||
"block_until_all_global_resources_loaded": true
|
||||
}
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
pool_specification:
|
||||
id: docker-cntk-gpu-rdma
|
||||
vm_configuration:
|
||||
platform_image:
|
||||
offer: CentOS-HPC
|
||||
publisher: OpenLogic
|
||||
sku: '7.3'
|
||||
vm_count:
|
||||
dedicated: 2
|
||||
low_priority: 0
|
||||
vm_size: STANDARD_NC24R
|
||||
inter_node_communication_enabled: true
|
||||
ssh:
|
||||
username: docker
|
|
@ -11,11 +11,9 @@ this recipe.
|
|||
|
||||
### Pool Configuration
|
||||
The pool configuration should enable the following properties:
|
||||
* `vm_size` must be one of `STANDARD_NC6`, `STANDARD_NC12`, `STANDARD_NC24`,
|
||||
`STANDARD_NV6`, `STANDARD_NV12`, `STANDARD_NV24`. `NC` VM instances feature
|
||||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||
M60 GPUs for visualization workloads. Because CNTK is a GPU-accelerated
|
||||
compute application, it is best to choose `NC` VM instances.
|
||||
* `vm_size` must be a GPU enabled VM size. Because CNTK is a GPU-accelerated
|
||||
compute application, you should choose an `ND`, `NC` or `NCv2` VM instance
|
||||
size.
|
||||
* `vm_configuration` is the VM configuration
|
||||
* `platform_image` specifies to use a platform image
|
||||
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||
|
@ -74,7 +72,9 @@ wrapper.
|
|||
* `num_instances` should be set to `pool_specification_vm_count_dedicated`,
|
||||
`pool_specification_vm_count_low_priority`, `pool_current_dedicated`, or
|
||||
`pool_current_low_priority`
|
||||
* `coordination_command` should be unset or `null`
|
||||
* `coordination_command` should be unset or `null`. For pools with
|
||||
`native` container support, this command should be supplied if
|
||||
a non-standard `sshd` is required.
|
||||
* `resource_files` should be unset or the array can be empty
|
||||
|
||||
## Dockerfile and supplementary files
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
{
|
||||
"batch_shipyard": {
|
||||
"storage_account_settings": "mystorageaccount"
|
||||
},
|
||||
"global_resources": {
|
||||
"docker_images": [
|
||||
"alfpark/cntk:2.1-gpu-1bitsgd-py35-cuda8-cudnn6-refdata"
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
batch_shipyard:
|
||||
storage_account_settings: mystorageaccount
|
||||
global_resources:
|
||||
docker_images:
|
||||
- alfpark/cntk:2.1-gpu-1bitsgd-py35-cuda8-cudnn6-refdata
|
|
@ -1,15 +0,0 @@
|
|||
{
|
||||
"credentials": {
|
||||
"batch": {
|
||||
"account_key": "<batch account key>",
|
||||
"account_service_url": "<batch account service url>"
|
||||
},
|
||||
"storage": {
|
||||
"mystorageaccount": {
|
||||
"account": "<storage account name>",
|
||||
"account_key": "<storage account key>",
|
||||
"endpoint": "core.windows.net"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
credentials:
|
||||
batch:
|
||||
account_key: <batch account key>
|
||||
account_service_url: <batch account service url>
|
||||
storage:
|
||||
mystorageaccount:
|
||||
account: <storage account name>
|
||||
account_key: <storage account key>
|
||||
endpoint: core.windows.net
|
|
@ -1,18 +0,0 @@
|
|||
{
|
||||
"job_specifications": [
|
||||
{
|
||||
"id": "cntkjob",
|
||||
"auto_complete": true,
|
||||
"tasks": [
|
||||
{
|
||||
"image": "alfpark/cntk:2.1-gpu-1bitsgd-py35-cuda8-cudnn6-refdata",
|
||||
"command": "/cntk/run_cntk.sh -s /cntk/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10_Distributed.py -- --network resnet20 -q 1 -a 0 --datadir /cntk/Examples/Image/DataSets/CIFAR-10 --outputdir $AZ_BATCH_TASK_WORKING_DIR/output",
|
||||
"multi_instance": {
|
||||
"num_instances": "pool_current_dedicated"
|
||||
},
|
||||
"gpu": true
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
job_specifications:
|
||||
- id: cntkjob
|
||||
auto_complete: true
|
||||
tasks:
|
||||
- image: alfpark/cntk:2.1-gpu-1bitsgd-py35-cuda8-cudnn6-refdata
|
||||
multi_instance:
|
||||
num_instances: pool_current_dedicated
|
||||
command: /cntk/run_cntk.sh -s /cntk/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10_Distributed.py -- --network resnet20 -q 1 -a 0 --datadir /cntk/Examples/Image/DataSets/CIFAR-10 --outputdir $AZ_BATCH_TASK_WORKING_DIR/output
|
|
@ -1,22 +0,0 @@
|
|||
{
|
||||
"pool_specification": {
|
||||
"id": "cntk-multinode-multigpu",
|
||||
"vm_configuration": {
|
||||
"platform_image": {
|
||||
"publisher": "Canonical",
|
||||
"offer": "UbuntuServer",
|
||||
"sku": "16.04-LTS"
|
||||
}
|
||||
},
|
||||
"vm_size": "STANDARD_NC24",
|
||||
"vm_count": {
|
||||
"dedicated": 2
|
||||
},
|
||||
"inter_node_communication_enabled": true,
|
||||
"ssh": {
|
||||
"username": "docker"
|
||||
},
|
||||
"reboot_on_start_task_failed": false,
|
||||
"block_until_all_global_resources_loaded": true
|
||||
}
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
pool_specification:
|
||||
id: cntk-multinode-multigpu
|
||||
vm_configuration:
|
||||
platform_image:
|
||||
offer: UbuntuServer
|
||||
publisher: Canonical
|
||||
sku: 16.04-LTS
|
||||
vm_count:
|
||||
dedicated: 2
|
||||
low_priority: 0
|
||||
vm_size: STANDARD_NC24
|
||||
inter_node_communication_enabled: true
|
||||
ssh:
|
||||
username: docker
|
|
@ -1,10 +0,0 @@
|
|||
{
|
||||
"batch_shipyard": {
|
||||
"storage_account_settings": "mystorageaccount"
|
||||
},
|
||||
"global_resources": {
|
||||
"docker_images": [
|
||||
"alfpark/cntk:2.1-gpu-1bitsgd-py35-cuda8-cudnn6-refdata"
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
batch_shipyard:
|
||||
storage_account_settings: mystorageaccount
|
||||
global_resources:
|
||||
docker_images:
|
||||
- alfpark/cntk:2.1-gpu-1bitsgd-py35-cuda8-cudnn6-refdata
|
|
@ -1,15 +0,0 @@
|
|||
{
|
||||
"credentials": {
|
||||
"batch": {
|
||||
"account_key": "<batch account key>",
|
||||
"account_service_url": "<batch account service url>"
|
||||
},
|
||||
"storage": {
|
||||
"mystorageaccount": {
|
||||
"account": "<storage account name>",
|
||||
"account_key": "<storage account key>",
|
||||
"endpoint": "core.windows.net"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
credentials:
|
||||
batch:
|
||||
account_key: <batch account key>
|
||||
account_service_url: <batch account service url>
|
||||
storage:
|
||||
mystorageaccount:
|
||||
account: <storage account name>
|
||||
account_key: <storage account key>
|
||||
endpoint: core.windows.net
|
|
@ -1,14 +0,0 @@
|
|||
{
|
||||
"job_specifications": [
|
||||
{
|
||||
"id": "cntkjob",
|
||||
"tasks": [
|
||||
{
|
||||
"image": "alfpark/cntk:2.1-gpu-1bitsgd-py35-cuda8-cudnn6-refdata",
|
||||
"command": "/cntk/run_cntk.sh -s /cntk/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10_Distributed.py -- --network resnet20 -q 1 -a 0 --datadir /cntk/Examples/Image/DataSets/CIFAR-10 --outputdir $AZ_BATCH_TASK_WORKING_DIR/output",
|
||||
"gpu": true
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
job_specifications:
|
||||
- id: cntkjob
|
||||
tasks:
|
||||
- image: alfpark/cntk:2.1-gpu-1bitsgd-py35-cuda8-cudnn6-refdata
|
||||
command: /cntk/run_cntk.sh -s /cntk/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10_Distributed.py -- --network resnet20 -q 1 -a 0 --datadir /cntk/Examples/Image/DataSets/CIFAR-10 --outputdir $AZ_BATCH_TASK_WORKING_DIR/output
|
|
@ -1,21 +0,0 @@
|
|||
{
|
||||
"pool_specification": {
|
||||
"id": "cntk-singlenode-multigpu",
|
||||
"vm_configuration": {
|
||||
"platform_image": {
|
||||
"publisher": "Canonical",
|
||||
"offer": "UbuntuServer",
|
||||
"sku": "16.04-LTS"
|
||||
}
|
||||
},
|
||||
"vm_size": "STANDARD_NC24",
|
||||
"vm_count": {
|
||||
"dedicated": 1
|
||||
},
|
||||
"ssh": {
|
||||
"username": "docker"
|
||||
},
|
||||
"reboot_on_start_task_failed": false,
|
||||
"block_until_all_global_resources_loaded": true
|
||||
}
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
pool_specification:
|
||||
id: cntk-singlenode-multigpu
|
||||
vm_configuration:
|
||||
platform_image:
|
||||
offer: UbuntuServer
|
||||
publisher: Canonical
|
||||
sku: 16.04-LTS
|
||||
vm_count:
|
||||
dedicated: 1
|
||||
low_priority: 0
|
||||
vm_size: STANDARD_NC24
|
||||
ssh:
|
||||
username: docker
|
|
@ -1,10 +0,0 @@
|
|||
{
|
||||
"batch_shipyard": {
|
||||
"storage_account_settings": "mystorageaccount"
|
||||
},
|
||||
"global_resources": {
|
||||
"docker_images": [
|
||||
"microsoft/cntk:2.1-gpu-python3.5-cuda8.0-cudnn6.0"
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
batch_shipyard:
|
||||
storage_account_settings: mystorageaccount
|
||||
global_resources:
|
||||
docker_images:
|
||||
- microsoft/cntk:2.1-gpu-python3.5-cuda8.0-cudnn6.0
|
|
@ -1,15 +0,0 @@
|
|||
{
|
||||
"credentials": {
|
||||
"batch": {
|
||||
"account_key": "<batch account key>",
|
||||
"account_service_url": "<batch account service url>"
|
||||
},
|
||||
"storage": {
|
||||
"mystorageaccount": {
|
||||
"account": "<storage account name>",
|
||||
"account_key": "<storage account key>",
|
||||
"endpoint": "core.windows.net"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
credentials:
|
||||
batch:
|
||||
account_key: <batch account key>
|
||||
account_service_url: <batch account service url>
|
||||
storage:
|
||||
mystorageaccount:
|
||||
account: <storage account name>
|
||||
account_key: <storage account key>
|
||||
endpoint: core.windows.net
|
|
@ -1,14 +0,0 @@
|
|||
{
|
||||
"job_specifications": [
|
||||
{
|
||||
"id": "cntkjob",
|
||||
"tasks": [
|
||||
{
|
||||
"image": "microsoft/cntk:2.1-gpu-python3.5-cuda8.0-cudnn6.0",
|
||||
"command": "/bin/bash -c \"source /cntk/activate-cntk && cd /cntk/Examples/Image/DataSets/MNIST && python -u install_mnist.py && cd /cntk/Examples/Image/Classification/ConvNet/Python && python -u ConvNet_MNIST.py\"",
|
||||
"gpu": true
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
job_specifications:
|
||||
- id: cntkjob
|
||||
tasks:
|
||||
- image: microsoft/cntk:2.1-gpu-python3.5-cuda8.0-cudnn6.0
|
||||
command: /bin/bash -c "source /cntk/activate-cntk && cd /cntk/Examples/Image/DataSets/MNIST && python -u install_mnist.py && cd /cntk/Examples/Image/Classification/ConvNet/Python && python -u ConvNet_MNIST.py"
|
|
@ -1,21 +0,0 @@
|
|||
{
|
||||
"pool_specification": {
|
||||
"id": "cntk-singlenode-singlegpu",
|
||||
"vm_configuration": {
|
||||
"platform_image": {
|
||||
"publisher": "Canonical",
|
||||
"offer": "UbuntuServer",
|
||||
"sku": "16.04-LTS"
|
||||
}
|
||||
},
|
||||
"vm_size": "STANDARD_NC6",
|
||||
"vm_count": {
|
||||
"dedicated": 1
|
||||
},
|
||||
"ssh": {
|
||||
"username": "docker"
|
||||
},
|
||||
"reboot_on_start_task_failed": false,
|
||||
"block_until_all_global_resources_loaded": true
|
||||
}
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
pool_specification:
|
||||
id: cntk-singlenode-singlegpu
|
||||
vm_configuration:
|
||||
platform_image:
|
||||
offer: UbuntuServer
|
||||
publisher: Canonical
|
||||
sku: 16.04-LTS
|
||||
vm_count:
|
||||
dedicated: 1
|
||||
low_priority: 0
|
||||
vm_size: STANDARD_NC6
|
||||
ssh:
|
||||
username: docker
|
|
@ -16,8 +16,11 @@ Other pool properties such as `publisher`, `offer`, `sku`, `vm_size` and
|
|||
### Global Configuration
|
||||
The global configuration should set the following properties:
|
||||
* `docker_images` array must have a reference to a valid Caffe CPU-enabled
|
||||
Docker image. [alfpark/caffe:cpu](https://hub.docker.com/r/alfpark/caffe/) can
|
||||
be used for this recipe.
|
||||
Docker image. Although you can use the official
|
||||
[BVLC/caffe](https://hub.docker.com/r/bvlc/caffe/) Docker images, for this
|
||||
recipe the [alfpark/caffe:cpu](https://hub.docker.com/r/alfpark/caffe/)
|
||||
contains all of the required files and scripts to run the MNIST convolutional
|
||||
example.
|
||||
|
||||
### Jobs Configuration
|
||||
The jobs configuration should set the following properties within the `tasks`
|
||||
|
@ -26,7 +29,8 @@ array which should have a task definition containing:
|
|||
e.g., `alfpark/caffe:cpu`
|
||||
* `command` should contain the command to pass to the Docker run invocation.
|
||||
For the `alfpark/caffe:cpu` Docker image and to run the MNIST convolutional
|
||||
example, the `command` would simply be: `"/caffe/run_mnist.sh"`
|
||||
example, we are using a [`run_mnist.sh` helper script](docker/run_mnist.sh).
|
||||
Thus, the `command` would simply be: `"/caffe/run_mnist.sh"`
|
||||
|
||||
## Dockerfile and supplementary files
|
||||
The `Dockerfile` for the Docker image can be found [here](./docker).
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
{
|
||||
"batch_shipyard": {
|
||||
"storage_account_settings": "mystorageaccount"
|
||||
},
|
||||
"global_resources": {
|
||||
"docker_images": [
|
||||
"alfpark/caffe:cpu"
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
batch_shipyard:
|
||||
storage_account_settings: mystorageaccount
|
||||
global_resources:
|
||||
docker_images:
|
||||
- alfpark/caffe:cpu
|
|
@ -1,15 +0,0 @@
|
|||
{
|
||||
"credentials": {
|
||||
"batch": {
|
||||
"account_key": "<batch account key>",
|
||||
"account_service_url": "<batch account service url>"
|
||||
},
|
||||
"storage": {
|
||||
"mystorageaccount": {
|
||||
"account": "<storage account name>",
|
||||
"account_key": "<storage account key>",
|
||||
"endpoint": "core.windows.net"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
credentials:
|
||||
batch:
|
||||
account_key: <batch account key>
|
||||
account_service_url: <batch account service url>
|
||||
storage:
|
||||
mystorageaccount:
|
||||
account: <storage account name>
|
||||
account_key: <storage account key>
|
||||
endpoint: core.windows.net
|
|
@ -1,13 +0,0 @@
|
|||
{
|
||||
"job_specifications": [
|
||||
{
|
||||
"id": "caffejob",
|
||||
"tasks": [
|
||||
{
|
||||
"image": "alfpark/caffe:cpu",
|
||||
"command": "/caffe/run_mnist.sh"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
job_specifications:
|
||||
- id: caffejob
|
||||
tasks:
|
||||
- image: alfpark/caffe:cpu
|
||||
command: /caffe/run_mnist.sh
|
|
@ -1,21 +0,0 @@
|
|||
{
|
||||
"pool_specification": {
|
||||
"id": "caffe-cpu",
|
||||
"vm_configuration": {
|
||||
"platform_image": {
|
||||
"publisher": "Canonical",
|
||||
"offer": "UbuntuServer",
|
||||
"sku": "16.04-LTS"
|
||||
}
|
||||
},
|
||||
"vm_size": "STANDARD_D1_V2",
|
||||
"vm_count": {
|
||||
"dedicated": 1
|
||||
},
|
||||
"ssh": {
|
||||
"username": "docker"
|
||||
},
|
||||
"reboot_on_start_task_failed": false,
|
||||
"block_until_all_global_resources_loaded": true
|
||||
}
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
pool_specification:
|
||||
id: caffe-cpu
|
||||
vm_configuration:
|
||||
platform_image:
|
||||
offer: UbuntuServer
|
||||
publisher: Canonical
|
||||
sku: 16.04-LTS
|
||||
vm_count:
|
||||
dedicated: 1
|
||||
low_priority: 0
|
||||
vm_size: STANDARD_D1_V2
|
||||
ssh:
|
||||
username: docker
|
|
@ -8,11 +8,9 @@ this recipe.
|
|||
|
||||
### Pool Configuration
|
||||
The pool configuration should enable the following properties:
|
||||
* `vm_size` must be one of `STANDARD_NC6`, `STANDARD_NC12`, `STANDARD_NC24`,
|
||||
`STANDARD_NV6`, `STANDARD_NV12`, `STANDARD_NV24`. `NC` VM instances feature
|
||||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||
M60 GPUs for visualization workloads. Because Caffe is a GPU-accelerated
|
||||
compute application, it is best to choose `NC` VM instances.
|
||||
* `vm_size` must be a GPU enabled VM size. Because Caffe is a GPU-accelerated
|
||||
compute application, you should choose an `ND`, `NC` or `NCv2` VM instance
|
||||
size.
|
||||
* `vm_configuration` is the VM configuration
|
||||
* `platform_image` specifies to use a platform image
|
||||
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||
|
@ -22,8 +20,11 @@ compute application, it is best to choose `NC` VM instances.
|
|||
### Global Configuration
|
||||
The global configuration should set the following properties:
|
||||
* `docker_images` array must have a reference to a valid Caffe GPU-enabled
|
||||
Docker image. [alfpark/caffe:gpu](https://hub.docker.com/r/alfpark/caffe/) can
|
||||
be used for this recipe.
|
||||
Docker image. Although you can use the official
|
||||
[BVLC/caffe](https://hub.docker.com/r/bvlc/caffe/) Docker images, for this
|
||||
recipe the [alfpark/caffe:gpu](https://hub.docker.com/r/alfpark/caffe/)
|
||||
contains all of the required files and scripts to run the MNIST convolutional
|
||||
example.
|
||||
|
||||
### Jobs Configuration
|
||||
The jobs configuration should set the following properties within the `tasks`
|
||||
|
@ -32,10 +33,11 @@ array which should have a task definition containing:
|
|||
e.g., `alfpark/caffe:gpu`
|
||||
* `command` should contain the command to pass to the Docker run invocation.
|
||||
For the `alfpark/caffe:gpu` Docker image and to run the MNIST convolutional
|
||||
example on all available GPUs, the `command` would simply be:
|
||||
`"/caffe/run_mnist.sh -gpu all"`
|
||||
* `gpu` must be set to `true`. This enables invoking the `nvidia-docker`
|
||||
wrapper.
|
||||
example on all available GPUs, we are using a
|
||||
[`run_mnist.sh` helper script](docker/run_mnist.sh). Thus, the `command` would
|
||||
simply be: `"/caffe/run_mnist.sh -gpu all"`
|
||||
* `gpu` can be set to `true`, however, it is implicitly enabled by Batch
|
||||
Shipyard when executing on a GPU-enabled compute pool.
|
||||
|
||||
## Dockerfile and supplementary files
|
||||
The `Dockerfile` for the Docker image can be found [here](./docker).
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
{
|
||||
"batch_shipyard": {
|
||||
"storage_account_settings": "mystorageaccount"
|
||||
},
|
||||
"global_resources": {
|
||||
"docker_images": [
|
||||
"alfpark/caffe:gpu"
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
batch_shipyard:
|
||||
storage_account_settings: mystorageaccount
|
||||
global_resources:
|
||||
docker_images:
|
||||
- alfpark/caffe:gpu
|
|
@ -1,15 +0,0 @@
|
|||
{
|
||||
"credentials": {
|
||||
"batch": {
|
||||
"account_key": "<batch account key>",
|
||||
"account_service_url": "<batch account service url>"
|
||||
},
|
||||
"storage": {
|
||||
"mystorageaccount": {
|
||||
"account": "<storage account name>",
|
||||
"account_key": "<storage account key>",
|
||||
"endpoint": "core.windows.net"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
credentials:
|
||||
batch:
|
||||
account_key: <batch account key>
|
||||
account_service_url: <batch account service url>
|
||||
storage:
|
||||
mystorageaccount:
|
||||
account: <storage account name>
|
||||
account_key: <storage account key>
|
||||
endpoint: core.windows.net
|
|
@ -1,14 +0,0 @@
|
|||
{
|
||||
"job_specifications": [
|
||||
{
|
||||
"id": "caffejob",
|
||||
"tasks": [
|
||||
{
|
||||
"image": "alfpark/caffe:gpu",
|
||||
"command": "/caffe/run_mnist.sh -gpu all",
|
||||
"gpu": true
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
job_specifications:
|
||||
- id: caffejob
|
||||
tasks:
|
||||
- image: alfpark/caffe:gpu
|
||||
command: /caffe/run_mnist.sh -gpu all
|
|
@ -1,21 +0,0 @@
|
|||
{
|
||||
"pool_specification": {
|
||||
"id": "caffe-gpu",
|
||||
"vm_configuration": {
|
||||
"platform_image": {
|
||||
"publisher": "Canonical",
|
||||
"offer": "UbuntuServer",
|
||||
"sku": "16.04-LTS"
|
||||
}
|
||||
},
|
||||
"vm_size": "STANDARD_NC6",
|
||||
"vm_count": {
|
||||
"dedicated": 1
|
||||
},
|
||||
"ssh": {
|
||||
"username": "docker"
|
||||
},
|
||||
"reboot_on_start_task_failed": false,
|
||||
"block_until_all_global_resources_loaded": true
|
||||
}
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
pool_specification:
|
||||
id: caffe-gpu
|
||||
vm_configuration:
|
||||
platform_image:
|
||||
offer: UbuntuServer
|
||||
publisher: Canonical
|
||||
sku: 16.04-LTS
|
||||
vm_count:
|
||||
dedicated: 1
|
||||
low_priority: 0
|
||||
vm_size: STANDARD_NC6
|
||||
ssh:
|
||||
username: docker
|
|
@ -23,10 +23,9 @@ $CAFFE_EXAMPLES/mnist/convert_mnist_data.bin $DATA/t10k-images-idx3-ubyte \
|
|||
--backend=${BACKEND}
|
||||
echo "Done."
|
||||
|
||||
# prep train spec and switch solver mode to default to cpu
|
||||
# prep train spec
|
||||
cp $CAFFE_ROOT/examples/mnist/lenet_solver.prototxt $MNIST_DIR
|
||||
cp $CAFFE_ROOT/examples/mnist/lenet_train_test.prototxt $MNIST_DIR
|
||||
sed -i 's#solver_mode: GPU#solver_mode: CPU##' $MNIST_DIR/lenet_solver.prototxt
|
||||
|
||||
# train
|
||||
$CAFFE_BIN/caffe train --solver=$MNIST_DIR/lenet_solver.prototxt $*
|
||||
|
|
|
@ -6,7 +6,7 @@ Please see refer to this [set of sample configuration files](./config) for
|
|||
this recipe.
|
||||
|
||||
### Pool Configuration
|
||||
The pool configuration should enable the following properties:
|
||||
The pool configuration should enable or set the following properties:
|
||||
* `max_tasks_per_node` must be set to 1 or omitted
|
||||
|
||||
Other pool properties such as `publisher`, `offer`, `sku`, `vm_size` and
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
{
|
||||
"batch_shipyard": {
|
||||
"storage_account_settings": "mystorageaccount"
|
||||
},
|
||||
"global_resources": {
|
||||
"docker_images": [
|
||||
"caffe2ai/caffe2:c2v0.8.1.cpu.full.ubuntu14.04"
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
batch_shipyard:
|
||||
storage_account_settings: mystorageaccount
|
||||
global_resources:
|
||||
docker_images:
|
||||
- caffe2ai/caffe2:c2v0.8.1.cpu.full.ubuntu14.04
|
|
@ -1,15 +0,0 @@
|
|||
{
|
||||
"credentials": {
|
||||
"batch": {
|
||||
"account_key": "<batch account key>",
|
||||
"account_service_url": "<batch account service url>"
|
||||
},
|
||||
"storage": {
|
||||
"mystorageaccount": {
|
||||
"account": "<storage account name>",
|
||||
"account_key": "<storage account key>",
|
||||
"endpoint": "core.windows.net"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
credentials:
|
||||
batch:
|
||||
account_key: <batch account key>
|
||||
account_service_url: <batch account service url>
|
||||
storage:
|
||||
mystorageaccount:
|
||||
account: <storage account name>
|
||||
account_key: <storage account key>
|
||||
endpoint: core.windows.net
|
|
@ -1,19 +0,0 @@
|
|||
{
|
||||
"job_specifications": [
|
||||
{
|
||||
"id": "caffe2job",
|
||||
"tasks": [
|
||||
{
|
||||
"image": "caffe2ai/caffe2:c2v0.8.1.cpu.full.ubuntu14.04",
|
||||
"resource_files": [
|
||||
{
|
||||
"file_path": "mnist.py",
|
||||
"blob_source": "https://raw.githubusercontent.com/Azure/batch-shipyard/master/recipes/Caffe2-CPU/scripts/mnist.py"
|
||||
}
|
||||
],
|
||||
"command": "python -u mnist.py"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
job_specifications:
|
||||
- id: caffe2job
|
||||
tasks:
|
||||
- image: caffe2ai/caffe2:c2v0.8.1.cpu.full.ubuntu14.04
|
||||
resource_files:
|
||||
- blob_source: https://raw.githubusercontent.com/Azure/batch-shipyard/master/recipes/Caffe2-CPU/scripts/mnist.py
|
||||
file_path: mnist.py
|
||||
command: python -u mnist.py
|
|
@ -1,21 +0,0 @@
|
|||
{
|
||||
"pool_specification": {
|
||||
"id": "caffe2-cpu",
|
||||
"vm_configuration": {
|
||||
"platform_image": {
|
||||
"publisher": "Canonical",
|
||||
"offer": "UbuntuServer",
|
||||
"sku": "16.04-LTS"
|
||||
}
|
||||
},
|
||||
"vm_size": "STANDARD_D1_V2",
|
||||
"vm_count": {
|
||||
"dedicated": 1
|
||||
},
|
||||
"ssh": {
|
||||
"username": "docker"
|
||||
},
|
||||
"reboot_on_start_task_failed": false,
|
||||
"block_until_all_global_resources_loaded": true
|
||||
}
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
pool_specification:
|
||||
id: caffe2-cpu
|
||||
vm_configuration:
|
||||
platform_image:
|
||||
offer: UbuntuServer
|
||||
publisher: Canonical
|
||||
sku: 16.04-LTS
|
||||
vm_count:
|
||||
dedicated: 1
|
||||
low_priority: 0
|
||||
vm_size: STANDARD_D1_V2
|
||||
ssh:
|
||||
username: docker
|
|
@ -8,16 +8,14 @@ this recipe.
|
|||
|
||||
### Pool Configuration
|
||||
The pool configuration should enable the following properties:
|
||||
* `vm_size` must be one of `STANDARD_NC6`, `STANDARD_NC12`, `STANDARD_NC24`,
|
||||
`STANDARD_NV6`, `STANDARD_NV12`, `STANDARD_NV24`. `NC` VM instances feature
|
||||
K80 GPUs for GPU compute acceleration while `NV` VM instances feature
|
||||
M60 GPUs for visualization workloads. Because Caffe2 is a GPU-accelerated
|
||||
compute application, it is best to choose `NC` VM instances.
|
||||
* `vm_size` must be a GPU enabled VM size. Because Caffe2 is a GPU-accelerated
|
||||
compute application, you should choose an `ND`, `NC` or `NCv2` VM instance
|
||||
size.
|
||||
* `vm_configuration` is the VM configuration
|
||||
* `platform_image` specifies to use a platform image
|
||||
* `publisher` should be `Canonical` or `OpenLogic`.
|
||||
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic.
|
||||
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS.
|
||||
* `publisher` should be `Canonical` or `OpenLogic`
|
||||
* `offer` should be `UbuntuServer` for Canonical or `CentOS` for OpenLogic
|
||||
* `sku` should be `16.04-LTS` for Ubuntu or `7.3` for CentOS
|
||||
|
||||
Other pool properties such as `publisher`, `offer`, `sku`, `vm_size` and
|
||||
`vm_count` should be set to your desired values.
|
||||
|
@ -42,5 +40,5 @@ the download of the training file from the web endpoint:
|
|||
* `command` should contain the command to pass to the Docker run invocation.
|
||||
For the `caffe2ai/caffe2` Docker image and the sample script above, the
|
||||
`command` would be: `python -u mnist.py --gpu`
|
||||
* `gpu` must be set to `true`. This enables invoking the `nvidia-docker`
|
||||
wrapper.
|
||||
* `gpu` can be set to `true`, however, it is implicitly enabled by Batch
|
||||
Shipyard when executing on a GPU-enabled compute pool.
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
{
|
||||
"batch_shipyard": {
|
||||
"storage_account_settings": "mystorageaccount"
|
||||
},
|
||||
"global_resources": {
|
||||
"docker_images": [
|
||||
"caffe2ai/caffe2"
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
batch_shipyard:
|
||||
storage_account_settings: mystorageaccount
|
||||
global_resources:
|
||||
docker_images:
|
||||
- caffe2ai/caffe2
|
|
@ -1,15 +0,0 @@
|
|||
{
|
||||
"credentials": {
|
||||
"batch": {
|
||||
"account_key": "<batch account key>",
|
||||
"account_service_url": "<batch account service url>"
|
||||
},
|
||||
"storage": {
|
||||
"mystorageaccount": {
|
||||
"account": "<storage account name>",
|
||||
"account_key": "<storage account key>",
|
||||
"endpoint": "core.windows.net"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
credentials:
|
||||
batch:
|
||||
account_key: <batch account key>
|
||||
account_service_url: <batch account service url>
|
||||
storage:
|
||||
mystorageaccount:
|
||||
account: <storage account name>
|
||||
account_key: <storage account key>
|
||||
endpoint: core.windows.net
|
|
@ -1,20 +0,0 @@
|
|||
{
|
||||
"job_specifications": [
|
||||
{
|
||||
"id": "caffe2job",
|
||||
"tasks": [
|
||||
{
|
||||
"image": "caffe2ai/caffe2",
|
||||
"resource_files": [
|
||||
{
|
||||
"file_path": "mnist.py",
|
||||
"blob_source": "https://raw.githubusercontent.com/Azure/batch-shipyard/master/recipes/Caffe2-CPU/scripts/mnist.py"
|
||||
}
|
||||
],
|
||||
"command": "python -u mnist.py --gpu",
|
||||
"gpu": true
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
job_specifications:
|
||||
- id: caffe2job
|
||||
tasks:
|
||||
- image: caffe2ai/caffe2
|
||||
resource_files:
|
||||
- blob_source: https://raw.githubusercontent.com/Azure/batch-shipyard/master/recipes/Caffe2-CPU/scripts/mnist.py
|
||||
file_path: mnist.py
|
||||
command: python -u mnist.py --gpu
|
|
@ -1,21 +0,0 @@
|
|||
{
|
||||
"pool_specification": {
|
||||
"id": "caffe2-gpu",
|
||||
"vm_configuration": {
|
||||
"platform_image": {
|
||||
"publisher": "Canonical",
|
||||
"offer": "UbuntuServer",
|
||||
"sku": "16.04-LTS"
|
||||
}
|
||||
},
|
||||
"vm_size": "STANDARD_NC6",
|
||||
"vm_count": {
|
||||
"dedicated": 1
|
||||
},
|
||||
"ssh": {
|
||||
"username": "docker"
|
||||
},
|
||||
"reboot_on_start_task_failed": false,
|
||||
"block_until_all_global_resources_loaded": true
|
||||
}
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
pool_specification:
|
||||
id: caffe2-gpu
|
||||
vm_configuration:
|
||||
platform_image:
|
||||
offer: UbuntuServer
|
||||
publisher: Canonical
|
||||
sku: 16.04-LTS
|
||||
vm_count:
|
||||
dedicated: 1
|
||||
low_priority: 0
|
||||
vm_size: STANDARD_NC6
|
||||
ssh:
|
||||
username: docker
|
|
@ -1,10 +0,0 @@
|
|||
{
|
||||
"batch_shipyard": {
|
||||
"storage_account_settings": "mystorageaccount"
|
||||
},
|
||||
"global_resources": {
|
||||
"docker_images": [
|
||||
"chainer/chainer"
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
batch_shipyard:
|
||||
storage_account_settings: mystorageaccount
|
||||
global_resources:
|
||||
docker_images:
|
||||
- chainer/chainer
|
|
@ -1,15 +0,0 @@
|
|||
{
|
||||
"credentials": {
|
||||
"batch": {
|
||||
"account_key": "<batch account key>",
|
||||
"account_service_url": "<batch account service url>"
|
||||
},
|
||||
"storage": {
|
||||
"mystorageaccount": {
|
||||
"account": "<storage account name>",
|
||||
"account_key": "<storage account key>",
|
||||
"endpoint": "core.windows.net"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче