dan2,cudamatrix: GPU selection logic is simplified; --use-gpu-id=N option is now replaced by --use-gpu=yes|no|optionaly,

"yes" -- Select GPU automatically (or get one by exclusive mode) and die if this fails. 
"optionaly" -- Do as above, but if it fails, back off to CPU
"no"  -- Run on CPU.

The recommended GPU setup is to use compute-exclusive mode, otherwise GPU gets selected automatically.



git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/dan2@3117 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Karel Vesely 2013-10-25 23:09:58 +00:00
Родитель b2ab82374b
Коммит 2be9da4cbb
29 изменённых файлов: 148 добавлений и 263 удалений

Просмотреть файл

@ -25,7 +25,7 @@ scoring_opts="--min-lmwt 4 --max-lmwt 15"
num_threads=1 # if >1, will use latgen-faster-parallel
parallel_opts="-pe smp $((num_threads+1))" # use 2 CPUs (1 DNN-forward, 1 decoder)
use_gpu_id=-1 # -1 disable gpu
use_gpu="no" # yes|no|optionaly
# End configuration section.
echo "$0 $@" # Print the command line for logging
@ -104,7 +104,7 @@ fi
# Run the decoding in the queue
if [ $stage -le 0 ]; then
$cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
nnet-forward --feature-transform=$feature_transform --no-softmax=true --class-frame-counts=$class_frame_counts --use-gpu-id=$use_gpu_id $nnet "$feats" ark:- \| \
nnet-forward --feature-transform=$feature_transform --no-softmax=true --class-frame-counts=$class_frame_counts --use-gpu=$use_gpu $nnet "$feats" ark:- \| \
latgen-faster-mapped$thread_string --max-active=$max_active --max-mem=$max_mem --beam=$beam \
--lattice-beam=$latbeam --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
$model $graphdir/HCLG.fst ark:- "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;

Просмотреть файл

@ -46,7 +46,6 @@ train_opts= # options, passed to the training script
train_tool= # optionally change the training tool
# OTHER
use_gpu_id= # manually select GPU id to run on, (-1 disables GPU)
analyze_alignments=true # run the alignment analysis script
seed=777 # seed value used for training data shuffling and initialization
# End configuration.
@ -258,7 +257,7 @@ else
feature_transform_old=$feature_transform
feature_transform=${feature_transform%.nnet}_cmvn-g.nnet
echo "Renormalizing MLP input features into $feature_transform"
nnet-forward ${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
nnet-forward --use-gpu=yes \
$feature_transform_old "$(echo $feats_tr | sed 's|train.scp|train.scp.10k|')" \
ark:- 2>$dir/log/nnet-forward-cmvn.log |\
compute-cmvn-stats ark:- - | cmvn-to-nnet - - |\
@ -315,7 +314,6 @@ steps/train_nnet_scheduler.sh \
${train_opts} \
${train_tool:+ --train-tool "$train_tool"} \
${config:+ --config $config} \
${use_gpu_id:+ --use-gpu-id $use_gpu_id} \
$mlp_init "$feats_tr" "$feats_cv" "$labels_tr" "$labels_cv" $dir || exit 1

Просмотреть файл

@ -21,7 +21,6 @@ learn_rate=0.00001
halving_factor=1.0 #ie. disable halving
drop_frames=true
verbose=1
use_gpu_id=
seed=777 # seed value used for training data shuffling
# End configuration section
@ -168,7 +167,6 @@ while [ $x -le $num_iters ]; do
--learn-rate=$learn_rate \
--drop-frames=$drop_frames \
--verbose=$verbose \
${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
$cur_mdl $alidir/final.mdl "$feats" "$lats" "$ali" $dir/$x.nnet || exit 1
fi
cur_mdl=$dir/$x.nnet

Просмотреть файл

@ -21,7 +21,6 @@ halving_factor=1.0 #ie. disable halving
do_smbr=true
use_silphones=false #setting this to something will enable giving siphones to nnet-mpe
verbose=1
use_gpu_id=
seed=777 # seed value used for training data shuffling
# End configuration section
@ -151,7 +150,6 @@ while [ $x -le $num_iters ]; do
--do-smbr=$do_smbr \
--verbose=$verbose \
$mpe_silphones_arg \
${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
$cur_mdl $alidir/final.mdl "$feats" "$lats" "$ali" $dir/$x.nnet || exit 1
fi
cur_mdl=$dir/$x.nnet

Просмотреть файл

@ -25,8 +25,6 @@ end_halving_inc=0.1
halving_factor=0.5
# misc.
verbose=1
# gpu
use_gpu_id=
# tool
train_tool="nnet-train-xent-hardlab-frmshuff"
@ -73,7 +71,6 @@ mlp_base=${mlp_init##*/}; mlp_base=${mlp_base%.*}
$train_tool --cross-validate=true \
--bunchsize=$bunch_size --cachesize=$cache_size --verbose=$verbose \
${feature_transform:+ --feature-transform=$feature_transform} \
${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
$mlp_best "$feats_cv" "$labels_cv" \
2> $dir/log/prerun.log || exit 1;
@ -97,7 +94,6 @@ for iter in $(seq -w $max_iters); do
--learn-rate=$learn_rate --momentum=$momentum --l1-penalty=$l1_penalty --l2-penalty=$l2_penalty \
--bunchsize=$bunch_size --cachesize=$cache_size --randomize=true --verbose=$verbose \
${feature_transform:+ --feature-transform=$feature_transform} \
${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
${seed:+ --seed=$seed} \
$mlp_best "$feats_tr" "$labels_tr" $mlp_next \
2> $dir/log/iter$iter.log || exit 1;
@ -110,7 +106,6 @@ for iter in $(seq -w $max_iters); do
$train_tool --cross-validate=true \
--bunchsize=$bunch_size --cachesize=$cache_size --verbose=$verbose \
${feature_transform:+ --feature-transform=$feature_transform} \
${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
$mlp_next "$feats_cv" "$labels_cv" \
2>>$dir/log/iter$iter.log || exit 1;

Просмотреть файл

@ -102,9 +102,9 @@ int main() {
for (int32 loop = 0; loop < 2; loop++) {
#if HAVE_CUDA == 1
if (loop == 0)
CuDevice::Instantiate().SelectGpuId(-1); // -1 means no GPU
CuDevice::Instantiate().SelectGpuId("no");
else
CuDevice::Instantiate().SelectGpuId(-2); // -2 .. automatic selection
CuDevice::Instantiate().SelectGpuId("yes");
#endif
//kaldi::UnitTestCuArray<float>();

Просмотреть файл

@ -212,9 +212,9 @@ int main() {
for (int32 loop = 0; loop < 2; loop++) {
#if HAVE_CUDA == 1
if (loop == 0)
CuDevice::Instantiate().SelectGpuId(-1); // -1 means no GPU
CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU
else
CuDevice::Instantiate().SelectGpuId(-2); // -2 .. automatic selection
CuDevice::Instantiate().SelectGpuId("yes"); // -2 .. automatic selection
#endif
kaldi::CuBlockMatrixUnitTest<float>();

Просмотреть файл

@ -27,6 +27,7 @@
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <string>
#include <vector>
#include <algorithm>
#include <dlfcn.h>
@ -40,105 +41,113 @@
namespace kaldi {
/**
* SelectGpuId(gpu_id)
* SelectGpuId(use_gpu)
*
* The argument 'gpu_id' meaning: 0..N selects a GPU,
* -1 disables CUDA, -2 performs GPU auto-detection.
* There are 3 'use_gpu' modes for GPU selection:
* "yes" -- Select GPU automatically (or get one by exclusive mode)
* and die if this fails.
* "optional" -- Do as above, but if it fails, back off to CPU.
* "no" -- Run on CPU.
*
* If there is no GPU in the system, and we have GPU auto-detection,
* or GPU is manually disabled the computation will run on CPU.
* In other cases it is an error (manual selection).
* In case of Compute exclusive mode, the GPU is selected by OS.
*
* In case of Compute exclusive mode, the GPU is selected by OS,
* this has priority over manual/auto selection of GPU.
* Otherwise GPU selection is based on largest proportion of free memory.
* This can eventually lead to multiple processes computing on single GPU,
* which is slow. More practical is to use "compute exclusive mode".
*
* Since the autoselection of GPU is not perfect, it may still
* happen that two processes compute on single GPU, which is slow.
* The users are advised to use manual selection or exclusive mode.
*
* This method must be called at the very beginning of the program
* (before the cudamatrix objects allocate memory for the data),
* or not at all (when we intentionally want to run on the CPU).
* This method is to be called at the very beginning of the program
* (before first allocation in cudamatrix), or not at all (default to CPU).
*
*/
void CuDevice::SelectGpuId(int32 gpu_id, bool abort_on_error) {
void CuDevice::SelectGpuId(std::string use_gpu) {
// Possible modes
if (use_gpu != "yes" && use_gpu != "no" && use_gpu != "optionaly") {
KALDI_ERR << "Please choose : --use-gpu=yes|no|optionaly, passed '" << use_gpu << "'";
}
// Make sure this function is not called twice!
if (Enabled()) {
KALDI_ERR << "There is already an active GPU " << active_gpu_id_
<< ", cannot change it on the fly!";
}
// Allow the GPU to stay disabled
if(!Enabled() && gpu_id == -1) {
KALDI_LOG << "Selected device: " << gpu_id
<< ", we don't even try to get a GPU. We run on CPU.";
active_gpu_id_ = -1;
if(!Enabled() && use_gpu == "no") {
KALDI_LOG << "Manually selected to compute on CPU.";
return;
}
// Check that we have a gpu available
int32 n_gpu = 0;
cudaGetDeviceCount(&n_gpu);
if(n_gpu == 0) {
// If we do automatic selection and no GPU is found, we run on a CPU
if (abort_on_error) {
KALDI_ERR << "No CUDA capable GPU was detected";
} else {
KALDI_WARN << "CUDA will NOT be used!!! No CUDA capable GPU detected...";
active_gpu_id_ = -2;
if (use_gpu == "yes") {
KALDI_ERR << "No CUDA GPU detected!";
}
if (use_gpu == "optional") {
KALDI_WARN << "Running on CPU!!! No CUDA GPU detected...";
return;
}
}
// Now we know that there is a GPU in the system,
// and we don't want to have it disabled.
//
// For the GPU selection there are 3 possibilities,
// with priorities according to the order:
// Create a CUDA context : in case of compute-exclusive mode OS selects gpu_id,
// or default gpu_id=0. In the case with no free GPUs a context cannot be created
// (compute-exclusive mode).
//
// 1.) We have compute exclusive mode on (GPU is selected by OS)
// 2.) User did not specify the GPU-id (default value -2),
// we will do automatic selection.
// 3.) User specified the GPU to run on, so we select it.
bool error;
if (IsComputeExclusive(&error)) {
FinalizeActiveGpu();
return;
}
if (error) { // There was some error detecting compute-exclusive status
// (perhaps no GPU available). Sleep a bit and retry.
cudaError_t e;
e = cudaThreadSynchronize(); //<< CUDA context gets created here.
if (e != cudaSuccess) {
// So far no we don't have context, sleep a bit and retry.
int32 sec_sleep = 2;
KALDI_WARN << "Will try again to get a GPU after " << sec_sleep
<< " seconds.";
sleep(sec_sleep);
if (IsComputeExclusive(&error)) {
FinalizeActiveGpu();
return;
} else {
if (abort_on_error) {
KALDI_ERR << "Error acquiring GPU in exclusive mode.";
} else {
KALDI_WARN << "Error selecting GPU. CUDA will NOT be used!!!.";
active_gpu_id_ = -2;
//
e = cudaThreadSynchronize(); //<< 2nd trial to get CUDA context.
if (e != cudaSuccess) {
if (use_gpu == "yes") {
KALDI_ERR << "Failed to create CUDA context, no more unused GPUs?";
}
if (use_gpu == "optional") {
KALDI_WARN << "Running on CPU!!! No more unused CUDA GPUs?";
return;
}
}
}
bool ans = (gpu_id == -2 ? SelectGpuIdAuto() : SelectGpuIdManual(gpu_id));
if (ans) {
// Re-assure we have the context
KALDI_ASSERT(cudaSuccess == cudaThreadSynchronize());
// Check if the machine use compute exclusive mode
if (IsComputeExclusive()) {
FinalizeActiveGpu();
return;
} else {
if (abort_on_error) {
KALDI_ERR << "Error acquiring GPU.";
} else {
KALDI_WARN << "Error selecting GPU. CUDA will NOT be used!!!.";
active_gpu_id_ = -2;
// Or suggest to use compute exclusive mode
if(n_gpu > 1) {
KALDI_WARN << "Hint: It is practical to set the GPUs into ``compute exclusive mode''."
<< " Selection of free GPUs would be done by OS automatically.";
}
// And select the GPU according to proportion of free memory
if(SelectGpuIdAuto()) {
FinalizeActiveGpu();
return;
} else {
// Could not get GPU, after prevously having the CUDA context?
// Strange but not impossible...
if (use_gpu == "yes") {
KALDI_ERR << "Error acquiring GPU.";
}
if (use_gpu == "optional") {
KALDI_WARN << "Running on CPU!!! Error acquiring GPU.";
return;
}
}
}
}
void CuDevice::FinalizeActiveGpu() {
// The device at this point should have active GPU, so we can query its name
// and memory stats and notify user which GPU is finally used.
@ -171,41 +180,6 @@ void CuDevice::FinalizeActiveGpu() {
return;
}
bool CuDevice::SelectGpuIdManual(int32 gpu_id) {
// The user selected a particular GPU using --use-gpu-id=X; try to select
// that one.
int32 ret = cudaSetDevice(gpu_id);
//handle the possible errors (no recovery!!!)
switch(ret) {
case cudaSuccess : {
//create the GPU context
cudaError_t e;
e = cudaThreadSynchronize(); //deprecated, but for legacy not cudaDeviceSynchronize
if(e != cudaSuccess) {
KALDI_WARN << "Failed to create CUDA context on a GPU.";
return false;
}
//this was okay, so we are done!
KALDI_LOG << "Selected device: " << gpu_id << " (manually)";
return true;
}
case cudaErrorInvalidDevice : {
int32 n_gpu = 0;
cudaGetDeviceCount(&n_gpu);
KALDI_WARN << "cudaSetDevice(" << gpu_id << "):"
<< " '" << gpu_id << "' is not a VALID CUDA device! "
<< " (system has " << n_gpu << " GPUs,"
<< " valid IDs 0.." << n_gpu-1 << ")";
return false;
}
default :
KALDI_WARN << "cudaSetDevice(" << gpu_id << "): "
<< "returned " << ret << ", "
<< cudaGetErrorString((cudaError_t)ret);
return false;
}
}
bool CuDevice::DoublePrecisionSupported() {
if (!Enabled()) return true;
@ -214,45 +188,20 @@ bool CuDevice::DoublePrecisionSupported() {
}
bool CuDevice::IsComputeExclusive(bool *error) {
// check that we have a gpu
*error = false;
int32 n_gpu = 0;
cudaGetDeviceCount(&n_gpu);
if(n_gpu == 0) {
KALDI_LOG << "No CUDA devices found";
return false;
}
// Create a GPU context
// This will be kept if we detect compute exclusive mode
// or released in the other case.
//
// It does not harm if the function gets called twice,
// and the context is already created.
cudaError_t e;
e = cudaThreadSynchronize(); //deprecated, but for legacy not cudaDeviceSynchronize
if (e != cudaSuccess) {
KALDI_WARN << "Failed to create CUDA context on a GPU. No more unused GPUs "
<< "in compute exclusive mode?";
*error = true;
return false;
}
bool CuDevice::IsComputeExclusive() {
// assume we already have an CUDA context created
KALDI_ASSERT(cudaSuccess == cudaThreadSynchronize());
// get the device-id and its device-properties
int32 gpu_id = -1;
e = cudaGetDevice(&gpu_id);
cudaError_t e = cudaGetDevice(&gpu_id);
if(e != cudaSuccess) {
KALDI_WARN << "Failed to get current device";
*error = true;
return false;
KALDI_ERR << "Failed to get current device";
}
struct cudaDeviceProp gpu_prop;
e = cudaGetDeviceProperties(&gpu_prop, gpu_id);
if(e != cudaSuccess) {
KALDI_WARN << "Failed to get device properties";
*error = true;
return false;
KALDI_ERR << "Failed to get device properties";
}
// find out whether compute exclusive mode is used
switch (gpu_prop.computeMode) {
@ -271,9 +220,7 @@ bool CuDevice::IsComputeExclusive(bool *error) {
// in this case we release the GPU context...
e = cudaThreadExit(); //deprecated, but for legacy reason not cudaDeviceReset
if(e != cudaSuccess) {
KALDI_WARN << "Failed to release CUDA context on a GPU";
*error = true;
return false;
KALDI_ERR << "Failed to release CUDA context on a GPU";
}
return false;
}
@ -281,7 +228,7 @@ bool CuDevice::IsComputeExclusive(bool *error) {
bool CuDevice::SelectGpuIdAuto() {
// check that we have at least one gpu
// Check that we have at least one gpu
int32 n_gpu = 0;
cudaGetDeviceCount(&n_gpu);
if(n_gpu == 0) {
@ -291,7 +238,7 @@ bool CuDevice::SelectGpuIdAuto() {
// The GPU is selected according to maximal free memory ratio
std::vector<float> free_mem_ratio(n_gpu+1, 0.0);
//get ratios of memory use, if possible
// Get ratios of memory use, if possible
KALDI_LOG << "Selecting from " << n_gpu << " GPUs";
for(int32 n = 0; n < n_gpu; n++) {
int32 ret = cudaSetDevice(n);
@ -337,10 +284,7 @@ bool CuDevice::SelectGpuIdAuto() {
if(free_mem_ratio[n] > free_mem_ratio[max_id]) max_id=n;
}
//the free_mem_ratio should be bigger than zero
if(!free_mem_ratio[max_id] > 0.0) {
KALDI_WARN << "No device could be selected (this should never happen)";
return false;
}
KALDI_ASSERT(free_mem_ratio[max_id] > 0.0);
//finally select the GPU
KALDI_LOG << "Selected device: " << max_id << " (automatically)";
@ -738,7 +682,7 @@ void* CuDevice::Malloc(size_t size) {
return allocator_->Malloc(size);
}
CuDevice::CuDevice(): active_gpu_id_(-3), verbose_(true),
CuDevice::CuDevice(): active_gpu_id_(-1), verbose_(true),
allocator_(new CuAllocator(CuAllocatorOptions(), this))
{ }
@ -746,8 +690,8 @@ CuDevice::CuDevice(): active_gpu_id_(-3), verbose_(true),
CuDevice::~CuDevice() {
if (Enabled()) {
CU_SAFE_CALL(cublasShutdown());
} else if (active_gpu_id_ == -2) {
KALDI_WARN << "CUDA was NOT used! No CUDA GPU detected!";
} else if (active_gpu_id_ == -1) {
KALDI_WARN << "CUDA was NOT used....";
}
if (allocator_ != NULL)
delete allocator_;

Просмотреть файл

@ -55,18 +55,18 @@ class CuDevice {
void Free(void *ptr);
/**********************************/
// Instance interface
/// Check if the CUDA device is selected for use
/// Select a GPU for computation, the 'use_gpu' modes are:
/// "yes" -- Select GPU automatically and die if this fails.
/// "optional" -- Do as above, but if it fails, back off to CPU.
/// "no" -- Run on CPU.
/// (more comments in cu-device.cc)
void SelectGpuId(std::string use_gpu);
/// Check if the CUDA GPU is selected for use
bool Enabled() const {
return (active_gpu_id_ > -1);
}
/// Manually select GPU by id (more comments in cu-device.cc)
void SelectGpuId(int32 gpu_id,
bool abort_on_failure = true);
/// Get the active GPU id
int32 ActiveGpuId() {
return active_gpu_id_;
@ -105,7 +105,7 @@ class CuDevice {
/// otherwise. Sets error to true if there was some error, such as that we
/// were running in compute exclusive modes but no GPUs available; otherwise
/// sets it to false.
bool IsComputeExclusive(bool *error);
bool IsComputeExclusive();
/// Automatically select GPU and get CUDA context. Returns true on success.
bool SelectGpuIdAuto();

Просмотреть файл

@ -151,9 +151,9 @@ int main() {
for (int32 loop = 0; loop < 2; loop++) {
#if HAVE_CUDA == 1
if (loop == 0)
CuDevice::Instantiate().SelectGpuId(-1); // -1 means no GPU
CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU
else
CuDevice::Instantiate().SelectGpuId(-2); // -2 .. automatic selection
CuDevice::Instantiate().SelectGpuId("yes"); // -2 .. automatic selection
#endif
srand(time(NULL));
kaldi::CudaMathUnitTest<float>();

Просмотреть файл

@ -176,7 +176,7 @@ template<typename Real> void CudaMatrixSpeedTest() {
int main() {
//Select the GPU
#if HAVE_CUDA == 1
CuDevice::Instantiate().SelectGpuId(-2); //-2 .. automatic selection
CuDevice::Instantiate().SelectGpuId("yes"); //-2 .. automatic selection
#endif
kaldi::CudaMatrixSpeedTest<float>();

Просмотреть файл

@ -1733,9 +1733,9 @@ int main() {
for (int32 loop = 0; loop < 2; loop++) {
#if HAVE_CUDA == 1
if (loop == 0)
CuDevice::Instantiate().SelectGpuId(-1); // -1 means no GPU
CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU
else
CuDevice::Instantiate().SelectGpuId(-2); // -2 .. automatic selection
CuDevice::Instantiate().SelectGpuId("yes"); // -2 .. automatic selection
#endif
kaldi::CudaMatrixUnitTest<float>();

Просмотреть файл

@ -244,8 +244,7 @@ int main() {
using namespace kaldi;
#if HAVE_CUDA == 1
// Select the GPU
kaldi::int32 use_gpu_id = -2;
CuDevice::Instantiate().SelectGpuId(use_gpu_id);
CuDevice::Instantiate().SelectGpuId("yes");
#endif
kaldi::CudaPackedMatrixUnitTest<float>();
#if HAVE_CUDA == 1

Просмотреть файл

@ -167,7 +167,7 @@ template<typename Real> void CuSpMatrixSpeedTest() {
int main() {
//Select the GPU
#if HAVE_CUDA == 1
CuDevice::Instantiate().SelectGpuId(-2); //-2 .. automatic selection
CuDevice::Instantiate().SelectGpuId("yes"); //-2 .. automatic selection
#endif
kaldi::CuSpMatrixSpeedTest<float>();

Просмотреть файл

@ -403,9 +403,9 @@ int main() {
for (int32 loop = 0; loop < 2; loop++) {
#if HAVE_CUDA == 1
if (loop == 0)
CuDevice::Instantiate().SelectGpuId(-1); // -1 means no GPU
CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU
else
CuDevice::Instantiate().SelectGpuId(-2); // -2 .. automatic selection
CuDevice::Instantiate().SelectGpuId("yes"); // -2 .. automatic selection
#endif
kaldi::CudaSpMatrixUnitTest<float>();

Просмотреть файл

@ -558,8 +558,7 @@ static void CuMatrixUnitTest() {
int main() {
using namespace kaldi;
#if HAVE_CUDA == 1
kaldi::int32 use_gpu_id = -2; // -2 means automatic selection.
kaldi::CuDevice::Instantiate().SelectGpuId(use_gpu_id);
kaldi::CuDevice::Instantiate().SelectGpuId("yes");
#endif
kaldi::CuMatrixUnitTest<float>();

Просмотреть файл

@ -191,9 +191,9 @@ int main() {
for (int32 loop = 0; loop < 2; loop++) {
#if HAVE_CUDA == 1
if (loop == 0)
CuDevice::Instantiate().SelectGpuId(-1); // -1 means no GPU
CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU
else
CuDevice::Instantiate().SelectGpuId(-2); // -2 .. automatic selection
CuDevice::Instantiate().SelectGpuId("yes"); // -2 .. automatic selection
#endif
kaldi::CudaTpMatrixUnitTest<float>();
#if HAVE_CUDA == 1

Просмотреть файл

@ -151,7 +151,7 @@ template<typename Real> void CudaVectorSpeedTest() {
int main() {
//Select the GPU
#if HAVE_CUDA == 1
CuDevice::Instantiate().SelectGpuId(-2); //-2 .. automatic selection
CuDevice::Instantiate().SelectGpuId("yes"); //-2 .. automatic selection
#endif
kaldi::CudaVectorSpeedTest<float>();

Просмотреть файл

@ -695,10 +695,8 @@ int main(int argc, char *argv[]) {
const char *usage = "Usage: cu-vector-test [options]";
ParseOptions po(usage);
int32 use_gpu_id = -2;
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic "
"selection, -1 disable GPU, 0..N select GPU). Only has effect if compiled "
"with CUDA");
std::string use_gpu = "yes";
po.Register("use-gpu", &use_gpu, "yes|no|optional");
po.Read(argc, argv);
if (po.NumArgs() != 0) {
@ -709,9 +707,9 @@ int main(int argc, char *argv[]) {
for (int32 loop = 0; loop < 2; loop++) {
#if HAVE_CUDA == 1
if (loop == 0)
CuDevice::Instantiate().SelectGpuId(-1); // -1 means no GPU
CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU
else
CuDevice::Instantiate().SelectGpuId(use_gpu_id);
CuDevice::Instantiate().SelectGpuId(use_gpu);
#endif

Просмотреть файл

@ -47,13 +47,11 @@ int main(int argc, char *argv[]) {
bool binary_write = true;
NnetCombineFastConfig combine_config;
int32 use_gpu_id = -2;
std::string use_gpu = "yes";
ParseOptions po(usage);
po.Register("binary", &binary_write, "Write output in binary mode");
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic "
"selection, -1 disable GPU, 0..N select GPU). Only has effect if compiled "
"with CUDA and --num-threads=1");
po.Register("use-gpu", &use_gpu, "yes|no|optionaly, only has effect if compiled with CUDA");
combine_config.Register(&po);
@ -71,7 +69,7 @@ int main(int argc, char *argv[]) {
#if HAVE_CUDA==1
if (combine_config.num_threads == 1)
CuDevice::Instantiate().SelectGpuId(use_gpu_id);
CuDevice::Instantiate().SelectGpuId(use_gpu);
#endif

Просмотреть файл

@ -45,13 +45,11 @@ int main(int argc, char *argv[]) {
int32 num_segments = 1;
int32 batch_size = 1024;
int32 use_gpu_id = -2;
std::string use_gpu = "yes";
po.Register("num-segments", &num_segments,
"Number of line segments used for computing derivatives");
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic "
"selection, -1 disable GPU, 0..N select GPU). Only has effect if compiled "
"with CUDA");
po.Register("use-gpu", &use_gpu, "yes|no|optionaly, only has effect if compiled with CUDA");
po.Read(argc, argv);
@ -61,7 +59,7 @@ int main(int argc, char *argv[]) {
}
#if HAVE_CUDA==1
CuDevice::Instantiate().SelectGpuId(use_gpu_id);
CuDevice::Instantiate().SelectGpuId(use_gpu);
#endif
std::string nnet1_rxfilename = po.GetArg(1),

Просмотреть файл

@ -47,7 +47,7 @@ int main(int argc, char *argv[]) {
bool binary_write = true;
bool zero_stats = true;
int32 srand_seed = 0;
int32 use_gpu_id = -2;
std::string use_gpu="yes";
NnetSimpleTrainerConfig train_config;
ParseOptions po(usage);
@ -57,9 +57,7 @@ int main(int argc, char *argv[]) {
po.Register("srand", &srand_seed, "Seed for random number generator "
"(relevant if you have layers of type AffineComponentPreconditioned "
"with l2-penalty != 0.0");
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic "
"selection, -1 disable GPU, 0..N select GPU). Only has effect if compiled "
"with CUDA");
po.Register("use-gpu", &use_gpu, "yes|no|optionaly, only has effect if compiled with CUDA");
train_config.Register(&po);
@ -72,7 +70,7 @@ int main(int argc, char *argv[]) {
srand(srand_seed);
#if HAVE_CUDA==1
CuDevice::Instantiate().SelectGpuId(use_gpu_id);
CuDevice::Instantiate().SelectGpuId(use_gpu);
#endif
std::string nnet_rxfilename = po.GetArg(1),

Просмотреть файл

@ -51,13 +51,8 @@ int main(int argc, char *argv[]) {
bool apply_log = false;
po.Register("apply-log", &apply_log, "Transform MLP output to logscale");
#if HAVE_CUDA==1
int32 use_gpu_id=-2;
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic selection, -1 disable GPU, 0..N select GPU)");
#else
int32 use_gpu_id=0;
po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
#endif
std::string use_gpu="no";
po.Register("use-gpu", &use_gpu, "yes|no|optionaly, only has effect if compiled with CUDA");
po.Read(argc, argv);
@ -76,7 +71,7 @@ int main(int argc, char *argv[]) {
//Select the GPU
#if HAVE_CUDA==1
CuDevice::Instantiate().SelectGpuId(use_gpu_id);
CuDevice::Instantiate().SelectGpuId(use_gpu);
#endif
Nnet nnet_transf;

Просмотреть файл

@ -128,16 +128,9 @@ int main(int argc, char *argv[]) {
po.Register("drop-frames", &drop_frames,
"Drop frames, where is zero den-posterior under numerator path "
"(ie. path not in lattice)");
#if HAVE_CUDA == 1
kaldi::int32 use_gpu_id=-2;
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID "
"(-2 automatic selection, -1 disable GPU, 0..N select GPU)");
#else
int32 use_gpu_id=0;
po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
#endif
std::string use_gpu="yes";
po.Register("use-gpu", &use_gpu, "yes|no|optionaly, only has effect if compiled with CUDA");
po.Read(argc, argv);
@ -162,7 +155,7 @@ int main(int argc, char *argv[]) {
// Select the GPU
#if HAVE_CUDA == 1
CuDevice::Instantiate().SelectGpuId(use_gpu_id);
CuDevice::Instantiate().SelectGpuId(use_gpu);
#endif
Nnet nnet_transf;

Просмотреть файл

@ -129,15 +129,9 @@ int main(int argc, char *argv[]) {
po.Register("do-smbr", &do_smbr, "Use state-level accuracies instead of "
"phone accuracies.");
#if HAVE_CUDA == 1
kaldi::int32 use_gpu_id=-2;
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID "
"(-2 automatic selection, -1 disable GPU, 0..N select GPU)");
#else
int32 use_gpu_id=0;
po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
#endif
std::string use_gpu="yes";
po.Register("use-gpu", &use_gpu, "yes|no|optionaly, only has effect if compiled with CUDA");
po.Read(argc, argv);
if (po.NumArgs() != 6) {
@ -164,7 +158,7 @@ int main(int argc, char *argv[]) {
// Select the GPU
#if HAVE_CUDA == 1
CuDevice::Instantiate().SelectGpuId(use_gpu_id);
CuDevice::Instantiate().SelectGpuId(use_gpu);
#endif
Nnet nnet_transf;

Просмотреть файл

@ -61,13 +61,8 @@ int main(int argc, char *argv[]) {
po.Register("cachesize", &cachesize, "Size of cache for frame level shuffling (max 8388479)");
po.Register("seed", &seed, "Seed value for srand, sets fixed order of frame-shuffling");
#if HAVE_CUDA==1
int32 use_gpu_id=-2;
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic selection, -1 disable GPU, 0..N select GPU)");
#else
int32 use_gpu_id=0;
po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
#endif
std::string use_gpu="yes";
po.Register("use-gpu", &use_gpu, "yes|no|optionaly, only has effect if compiled with CUDA");
po.Read(argc, argv);
@ -94,7 +89,7 @@ int main(int argc, char *argv[]) {
//Select the GPU
#if HAVE_CUDA==1
CuDevice::Instantiate().SelectGpuId(use_gpu_id);
CuDevice::Instantiate().SelectGpuId(use_gpu);
#endif
Nnet nnet_transf;

Просмотреть файл

@ -60,13 +60,8 @@ int main(int argc, char *argv[]) {
kaldi::int32 max_frames = 6000; // Allow segments maximum of one minute by default
po.Register("max-frames",&max_frames, "Maximum number of frames a segment can have to be processed");
#if HAVE_CUDA==1
int32 use_gpu_id=-2;
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic selection, -1 disable GPU, 0..N select GPU)");
#else
int32 use_gpu_id=0;
po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
#endif
std::string use_gpu="yes";
po.Register("use-gpu", &use_gpu, "yes|no|optionaly, only has effect if compiled with CUDA");
po.Read(argc, argv);
@ -93,7 +88,7 @@ int main(int argc, char *argv[]) {
//Select the GPU
#if HAVE_CUDA==1
CuDevice::Instantiate().SelectGpuId(use_gpu_id);
CuDevice::Instantiate().SelectGpuId(use_gpu);
#endif
Nnet nnet_transf;

Просмотреть файл

@ -49,13 +49,8 @@ int main(int argc, char *argv[]) {
std::string feature_transform;
po.Register("feature-transform", &feature_transform, "Feature transform in Nnet format");
#if HAVE_CUDA==1
int32 use_gpu_id=-2;
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic selection, -1 disable GPU, 0..N select GPU)");
#else
int32 use_gpu_id=0;
po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
#endif
std::string use_gpu="yes";
po.Register("use-gpu", &use_gpu, "yes|no|optionaly, only has effect if compiled with CUDA");
po.Read(argc, argv);
@ -80,7 +75,7 @@ int main(int argc, char *argv[]) {
//Select the GPU
#if HAVE_CUDA==1
CuDevice::Instantiate().SelectGpuId(use_gpu_id);
CuDevice::Instantiate().SelectGpuId(use_gpu);
#endif
Nnet nnet_transf;

Просмотреть файл

@ -65,13 +65,8 @@ int main(int argc, char *argv[]) {
BaseFloat drop_data = 0.0;
po.Register("drop-data", &drop_data, "Threshold for random dropping of the data (0 no-drop, 1 drop-all)");
#if HAVE_CUDA==1
int32 use_gpu_id=-2 ;
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic selection, -1 disable GPU, 0..N select GPU)");
#else
int32 use_gpu_id=0;
po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
#endif
std::string use_gpu="yes";
po.Register("use-gpu", &use_gpu, "yes|no|optionaly, only has effect if compiled with CUDA");
po.Read(argc, argv);
@ -93,7 +88,7 @@ int main(int argc, char *argv[]) {
//Select the GPU
#if HAVE_CUDA==1
CuDevice::Instantiate().SelectGpuId(use_gpu_id);
CuDevice::Instantiate().SelectGpuId(use_gpu);
#endif
Nnet rbm_transf;