Fix CNTK sample configs
This commit is contained in:
Родитель
7254e217ec
Коммит
f16e185520
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"pool_specification": {
|
||||
"id": "<your pool id>",
|
||||
"vm_size": "<vm size>",
|
||||
"vm_size": "STANDARD_F1",
|
||||
"vm_count": 3,
|
||||
"inter_node_communication_enabled": true,
|
||||
"publisher": "Canonical",
|
||||
|
|
|
@ -69,7 +69,8 @@ For this example, we will run the Multigpu Simple2d Example in the
|
|||
`alfpark/cntk:gpu-openmpi` Docker image. The application `command` to run
|
||||
would be:
|
||||
`"mpirun --allow-run-as-root --host $AZ_BATCH_HOST_LIST --mca btl_tcp_if_exclude docker0 /bin/bash -c \"export LD_LIBRARY_PATH=/usr/local/openblas/lib:/usr/local/nvidia/lib64 && cp -r /cntk/Examples/Other/Simple2d/* . && /cntk/build/gpu/release/bin/cntk configFile=Config/Multigpu.cntk RootDir=. parallelTrain=true\""`
|
||||
* **NOTE:** tasks that span multiple compute nodes will need their output
|
||||
* **NOTE:** tasks that span multiple compute nodes
|
||||
(i.e., MultiNode+SingleGPU or MultiNode+MultiGPU) will need their output
|
||||
stored on a shared file system, otherwise CNTK will fail during test
|
||||
as all of the output is written by rank 0 to the specified output
|
||||
directory only on that compute node. To override the output directory for
|
||||
|
|
|
@ -11,11 +11,12 @@
|
|||
"shared_data_volumes": [
|
||||
"<azure file shared volume name>"
|
||||
],
|
||||
"command": "mpirun --allow-run-as-root --host $AZ_BATCH_HOST_LIST,$AZ_BATCH_HOST_LIST,$AZ_BATCH_HOST_LIST,$AZ_BATCH_HOST_LIST --mca btl_tcp_if_exclude docker0 /bin/bash -c \"cp -r /cntk/Examples/Other/Simple2d/* . && /cntk/build/gpu/release/bin/cntk configFile=Config/Multigpu.cntk RootDir=. OutputDir=$AZ_BATCH_NODE_SHARED_DIR/azurefileshare/Output parallelTrain=true\"",
|
||||
"command": "mpirun --allow-run-as-root --host $AZ_BATCH_HOST_LIST,$AZ_BATCH_HOST_LIST,$AZ_BATCH_HOST_LIST,$AZ_BATCH_HOST_LIST --mca btl_tcp_if_exclude docker0 /bin/bash -c \"export LD_LIBRARY_PATH=/usr/local/openblas/lib:/usr/local/nvidia/lib64 && cp -r /cntk/Examples/Other/Simple2d/* . && /cntk/build/gpu/release/bin/cntk configFile=Config/Multigpu.cntk RootDir=. OutputDir=$AZ_BATCH_NODE_SHARED_DIR/azurefileshare/Output parallelTrain=true\"",
|
||||
"multi_instance": {
|
||||
"num_instances": "pool_specification_vm_count",
|
||||
"coordination_command": null
|
||||
}
|
||||
},
|
||||
"gpu": true
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
@ -8,11 +8,12 @@
|
|||
"image": "alfpark/cntk:gpu-openmpi",
|
||||
"name": "cntk",
|
||||
"remove_container_after_exit": true,
|
||||
"command": "mpirun --allow-run-as-root --host $AZ_BATCH_HOST_LIST,$AZ_BATCH_HOST_LIST --mca btl_tcp_if_exclude docker0 /bin/bash -c \"cp -r /cntk/Examples/Other/Simple2d/* . && /cntk/build/gpu/release/bin/cntk configFile=Config/Multigpu.cntk RootDir=. OutputDir=$AZ_BATCH_NODE_SHARED_DIR/azurefileshare/Output parallelTrain=true\"",
|
||||
"command": "mpirun --allow-run-as-root --host $AZ_BATCH_HOST_LIST,$AZ_BATCH_HOST_LIST --mca btl_tcp_if_exclude docker0 /bin/bash -c \"export LD_LIBRARY_PATH=/usr/local/openblas/lib:/usr/local/nvidia/lib64 && cp -r /cntk/Examples/Other/Simple2d/* . && /cntk/build/gpu/release/bin/cntk configFile=Config/Multigpu.cntk RootDir=. parallelTrain=true\"",
|
||||
"multi_instance": {
|
||||
"num_instances": "pool_specification_vm_count",
|
||||
"coordination_command": null
|
||||
}
|
||||
},
|
||||
"gpu": true
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
@ -7,7 +7,8 @@
|
|||
"image": "alfpark/cntk:gpu-openmpi-mnist-cifar",
|
||||
"remove_container_after_exit": true,
|
||||
"command": "/bin/bash -c \"cp -r /cntk/Examples/Image/MNIST/* . && /cntk/build/gpu/release/bin/cntk configFile=Config/02_Convolution_ndl_deprecated.cntk RootDir=.\""
|
||||
}
|
||||
},
|
||||
"gpu": true
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
@ -1,8 +1,11 @@
|
|||
# Batch Shipyard Recipes
|
||||
This directory contains recipes and sample batch-style Docker workloads for
|
||||
use with Batch Shipyard on Azure Batch. Please note that all recipes have
|
||||
sample configurations that you can use to quickly get going. However, as they
|
||||
are samples only, you will need to configure them to your liking for actual
|
||||
sample configurations that you can use to quickly get going, however, some
|
||||
of the config files cannot be used as-is as they need input such as
|
||||
the pool `id` in `pool_specification`. Please review each config file you
|
||||
are planning on using and modify accordingly. As the config samples are bare
|
||||
examples only, you will need to configure them to your liking for actual
|
||||
workloads.
|
||||
|
||||
**NOTE: Not all recipes are populated.**
|
||||
|
|
Загрузка…
Ссылка в новой задаче