From edacb925902c1d044c4570b437dab33c22eaf504 Mon Sep 17 00:00:00 2001 From: Fred Park Date: Wed, 22 Feb 2017 19:23:51 -0800 Subject: [PATCH] Add Chainer recipes --- CHANGELOG.md | 4 +++ docs/02-batch-shipyard-quickstart.md | 4 ++- recipes/Chainer-CPU/README.md | 30 ++++++++++++++++ recipes/Chainer-CPU/config/config.json | 10 ++++++ recipes/Chainer-CPU/config/credentials.json | 16 +++++++++ recipes/Chainer-CPU/config/jobs.json | 14 ++++++++ recipes/Chainer-CPU/config/pool.json | 15 ++++++++ recipes/Chainer-GPU/README.md | 39 +++++++++++++++++++++ recipes/Chainer-GPU/config/config.json | 10 ++++++ recipes/Chainer-GPU/config/credentials.json | 16 +++++++++ recipes/Chainer-GPU/config/jobs.json | 15 ++++++++ recipes/Chainer-GPU/config/pool.json | 15 ++++++++ recipes/README.md | 8 +++++ 13 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 recipes/Chainer-CPU/README.md create mode 100644 recipes/Chainer-CPU/config/config.json create mode 100644 recipes/Chainer-CPU/config/credentials.json create mode 100644 recipes/Chainer-CPU/config/jobs.json create mode 100644 recipes/Chainer-CPU/config/pool.json create mode 100644 recipes/Chainer-GPU/README.md create mode 100644 recipes/Chainer-GPU/config/config.json create mode 100644 recipes/Chainer-GPU/config/credentials.json create mode 100644 recipes/Chainer-GPU/config/jobs.json create mode 100644 recipes/Chainer-GPU/config/pool.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 24ac962..4fc36a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,16 @@ # Change Log ## [Unreleased] +### Added +- Chainer-CPU and Chainer-GPU recipes + ### Changed - Allow NAMD-TCP recipe to be run on a single node ### Fixed - CNTK-GPU-OpenMPI recipe to allow multinode singlegpu executions - TensorFlow recipes fixed for 1.0.0 release +- blobxfer data ingress on Windows (#39) ## [2.5.1] - 2017-02-01 ### Added diff --git a/docs/02-batch-shipyard-quickstart.md b/docs/02-batch-shipyard-quickstart.md index df3e346..5a3a9b2 100644 --- a/docs/02-batch-shipyard-quickstart.md +++ b/docs/02-batch-shipyard-quickstart.md @@ -8,6 +8,7 @@ quickstart example, you may select any of the following Deep Learning frameworks to quickly get started: * [CNTK-CPU-OpenMPI](../recipes/CNTK-CPU-OpenMPI) * [Caffe-CPU](../recipes/Caffe-CPU) +* [Chainer-CPU](../recipes/Chainer-CPU) * [Keras+Theano-CPU](../recipes/Keras+Theano-CPU) * [MXNet-CPU](../recipes/MXNet-CPU) * [TensorFlow-CPU](../recipes/TensorFlow-CPU) @@ -27,6 +28,7 @@ guide, create a directory named `config`. of your choice to the `config` directory: * [CNTK-CPU-OpenMPI](../recipes/CNTK-CPU-OpenMPI/config/singlenode/) * [Caffe-CPU](../recipes/Caffe-CPU/config/) + * [Chainer-CPU](../recipes/Chainer-CPU/config/) * [Keras+Theano-CPU](../recipes/Keras+Theano-CPU/config/) * [MXNet-CPU](../recipes/MXNet-CPU/config/singlenode/) * [TensorFlow-CPU](../recipes/TensorFlow-CPU/config/) @@ -62,7 +64,7 @@ SHIPYARD_CONFIGDIR=config ./shipyard pool add # add the training job and tail the output # if CNTK-CPU-OpenMPI or Caffe-CPU ./shipyard jobs add --configdir config --tail stderr.txt -# if Keras+Theano-CPU, MXNet-CPU, TensorFlow-CPU, or Torch-CPU +# if Chainer-CPU, Keras+Theano-CPU, MXNet-CPU, TensorFlow-CPU, or Torch-CPU ./shipyard jobs add --configdir config --tail stdout.txt ``` The `--tail` option of the `jobs add` command will stream the stderr or stdout diff --git a/recipes/Chainer-CPU/README.md b/recipes/Chainer-CPU/README.md new file mode 100644 index 0000000..b766c89 --- /dev/null +++ b/recipes/Chainer-CPU/README.md @@ -0,0 +1,30 @@ +# Chainer-CPU +This recipe shows how to run [Chainer](http://chainer.org/) on +a single node using CPU only. + +## Configuration +Please see refer to this [set of sample configuration files](./config) for +this recipe. + +### Pool Configuration +The pool configuration should enable the following properties: +* `max_tasks_per_node` must be set to 1 or omitted + +Other pool properties such as `publisher`, `offer`, `sku`, `vm_size` and +`vm_count` should be set to your desired values. + +### Global Configuration +The global configuration should set the following properties: +* `docker_images` array must have a reference to a valid Caffe CPU-enabled +Docker image. The official [chainer](https://hub.docker.com/r/chainer/chainer/) +Docker image can be used for this recipe. + +### Jobs Configuration +The jobs configuration should set the following properties within the `tasks` +array which should have a task definition containing: +* `image` should be the name of the Docker image for this container invocation, +e.g., `chainer/chainer` +* `command` should contain the command to pass to the Docker run invocation. +For the `chainer/chainer` Docker image and to run the MNIST MLP example, the +`command` would be: +`"/bin/bash -c \"python -c \\\"import requests; print(requests.get(\\\\\\\"https://raw.githubusercontent.com/pfnet/chainer/master/examples/mnist/train_mnist.py\\\\\\\").text)\\\" > train_mnist.py && python -u train_mnist.py\""` diff --git a/recipes/Chainer-CPU/config/config.json b/recipes/Chainer-CPU/config/config.json new file mode 100644 index 0000000..f7b5870 --- /dev/null +++ b/recipes/Chainer-CPU/config/config.json @@ -0,0 +1,10 @@ +{ + "batch_shipyard": { + "storage_account_settings": "" + }, + "global_resources": { + "docker_images": [ + "chainer/chainer" + ] + } +} diff --git a/recipes/Chainer-CPU/config/credentials.json b/recipes/Chainer-CPU/config/credentials.json new file mode 100644 index 0000000..451e167 --- /dev/null +++ b/recipes/Chainer-CPU/config/credentials.json @@ -0,0 +1,16 @@ +{ + "credentials": { + "batch": { + "account": "", + "account_key": "", + "account_service_url": "" + }, + "storage": { + "mystorageaccount": { + "account": "", + "account_key": "", + "endpoint": "core.windows.net" + } + } + } +} diff --git a/recipes/Chainer-CPU/config/jobs.json b/recipes/Chainer-CPU/config/jobs.json new file mode 100644 index 0000000..e5e0083 --- /dev/null +++ b/recipes/Chainer-CPU/config/jobs.json @@ -0,0 +1,14 @@ +{ + "job_specifications": [ + { + "id": "chainerjob", + "tasks": [ + { + "image": "chainer/chainer", + "remove_container_after_exit": true, + "command": "/bin/bash -c \"python -c \\\"import requests; print(requests.get(\\\\\\\"https://raw.githubusercontent.com/pfnet/chainer/master/examples/mnist/train_mnist.py\\\\\\\").text)\\\" > train_mnist.py && python -u train_mnist.py\"" + } + ] + } + ] +} diff --git a/recipes/Chainer-CPU/config/pool.json b/recipes/Chainer-CPU/config/pool.json new file mode 100644 index 0000000..ab98728 --- /dev/null +++ b/recipes/Chainer-CPU/config/pool.json @@ -0,0 +1,15 @@ +{ + "pool_specification": { + "id": "chainer-cpu", + "vm_size": "STANDARD_D1_V2", + "vm_count": 1, + "publisher": "Canonical", + "offer": "UbuntuServer", + "sku": "16.04.0-LTS", + "ssh": { + "username": "docker" + }, + "reboot_on_start_task_failed": false, + "block_until_all_global_resources_loaded": true + } +} diff --git a/recipes/Chainer-GPU/README.md b/recipes/Chainer-GPU/README.md new file mode 100644 index 0000000..5518bb7 --- /dev/null +++ b/recipes/Chainer-GPU/README.md @@ -0,0 +1,39 @@ +# Chainer-GPU +This recipe shows how to run [Chainer](http://chainer.org/) on +GPUs using N-series Azure VM instances in an Azure Batch compute pool. + +## Configuration +Please see refer to this [set of sample configuration files](./config) for +this recipe. + +### Pool Configuration +The pool configuration should enable the following properties: +* `vm_size` must be one of `STANDARD_NC6`, `STANDARD_NC12`, `STANDARD_NC24`, +`STANDARD_NV6`, `STANDARD_NV12`, `STANDARD_NV24`. `NC` VM instances feature +K80 GPUs for GPU compute acceleration while `NV` VM instances feature +M60 GPUs for visualization workloads. Because Caffe is a GPU-accelerated +compute application, it is best to choose `NC` VM instances. +* `publisher` should be `Canonical`. Other publishers will be supported +once they are available for N-series VMs. +* `offer` should be `UbuntuServer`. Other offers will be supported once they +are available for N-series VMs. +* `sku` should be `16.04.0-LTS`. Other skus will be supported once they are +available for N-series VMs. + +### Global Configuration +The global configuration should set the following properties: +* `docker_images` array must have a reference to a valid Caffe GPU-enabled +Docker image. The official [chainer](https://hub.docker.com/r/chainer/chainer/) +Docker image can be used for this recipe. + +### Jobs Configuration +The jobs configuration should set the following properties within the `tasks` +array which should have a task definition containing: +* `image` should be the name of the Docker image for this container invocation, +e.g., `chainer/chainer` +* `command` should contain the command to pass to the Docker run invocation. +For the `chainer/chainer` Docker image and to run the MNIST MLP example on +all available GPUs, the `command` would be: +`"/bin/bash -c \"python -c \\\"import requests; print(requests.get(\\\\\\\"https://raw.githubusercontent.com/pfnet/chainer/master/examples/mnist/train_mnist.py\\\\\\\").text)\\\" > train_mnist.py && python -u train_mnist.py -g 0\""` +* `gpu` must be set to `true`. This enables invoking the `nvidia-docker` +wrapper. diff --git a/recipes/Chainer-GPU/config/config.json b/recipes/Chainer-GPU/config/config.json new file mode 100644 index 0000000..f7b5870 --- /dev/null +++ b/recipes/Chainer-GPU/config/config.json @@ -0,0 +1,10 @@ +{ + "batch_shipyard": { + "storage_account_settings": "" + }, + "global_resources": { + "docker_images": [ + "chainer/chainer" + ] + } +} diff --git a/recipes/Chainer-GPU/config/credentials.json b/recipes/Chainer-GPU/config/credentials.json new file mode 100644 index 0000000..451e167 --- /dev/null +++ b/recipes/Chainer-GPU/config/credentials.json @@ -0,0 +1,16 @@ +{ + "credentials": { + "batch": { + "account": "", + "account_key": "", + "account_service_url": "" + }, + "storage": { + "mystorageaccount": { + "account": "", + "account_key": "", + "endpoint": "core.windows.net" + } + } + } +} diff --git a/recipes/Chainer-GPU/config/jobs.json b/recipes/Chainer-GPU/config/jobs.json new file mode 100644 index 0000000..621cadb --- /dev/null +++ b/recipes/Chainer-GPU/config/jobs.json @@ -0,0 +1,15 @@ +{ + "job_specifications": [ + { + "id": "chainerjob", + "tasks": [ + { + "image": "chainer/chainer", + "remove_container_after_exit": true, + "command": "/bin/bash -c \"python -c \\\"import requests; print(requests.get(\\\\\\\"https://raw.githubusercontent.com/pfnet/chainer/master/examples/mnist/train_mnist.py\\\\\\\").text)\\\" > train_mnist.py && python -u train_mnist.py -g 0\"", + "gpu": true + } + ] + } + ] +} diff --git a/recipes/Chainer-GPU/config/pool.json b/recipes/Chainer-GPU/config/pool.json new file mode 100644 index 0000000..5e0832f --- /dev/null +++ b/recipes/Chainer-GPU/config/pool.json @@ -0,0 +1,15 @@ +{ + "pool_specification": { + "id": "chainer-gpu", + "vm_size": "STANDARD_NC6", + "vm_count": 1, + "publisher": "Canonical", + "offer": "UbuntuServer", + "sku": "16.04.0-LTS", + "ssh": { + "username": "docker" + }, + "reboot_on_start_task_failed": false, + "block_until_all_global_resources_loaded": true + } +} diff --git a/recipes/README.md b/recipes/README.md index 0ab96b4..1d72635 100644 --- a/recipes/README.md +++ b/recipes/README.md @@ -70,6 +70,14 @@ This Caffe-GPU recipe contains information on how to Dockerize [Caffe](http://caffe.berkeleyvision.org/) on GPUs for use with N-Series Azure VMs. +#### [Chainer-CPU](./Chainer-CPU) +This Chainer-CPU recipe contains information on how to Dockerize +[Chainer](http://chainer.org/) for use on Azure Batch compute nodes. + +#### [Chainer-GPU](./Chainer-GPU) +This Chainer-GPU recipe contains information on how to Dockerize +[Chainer](http://chainer.org/) on GPUs for use with N-Series Azure VMs. + #### [Keras+Theano-CPU](./Keras+Theano-CPU) This Keras+Theano-CPU recipe contains information on how to Dockerize [Keras](https://keras.io/) with the