This commit is contained in:
Roman Grundkiewicz 2018-08-01 16:43:05 +01:00
Родитель c3807da737
Коммит c73a2cf6fb
48 изменённых файлов: 194 добавлений и 197 удалений

Просмотреть файл

@ -27,7 +27,7 @@ message(STATUS "Project version: ${PROJECT_VERSION_STRING_FULL}")
set(CMAKE_CXX_FLAGS_RELEASE " -std=c++11 -O3 -Ofast -m64 -pthread -march=native -Wl,--no-as-needed -funroll-loops -ffinite-math-only -fPIC -Wno-unused-result -Wno-deprecated -Wno-deprecated-gpu-targets")
set(CMAKE_CXX_FLAGS_DEBUG " -std=c++11 -g -O0 -pthread -fPIC -Wno-unused-result -Wno-deprecated -Wno-deprecated-gpu-targets")
set(CMAKE_CXX_FLAGS_ST "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG")
set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg -g")
set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg -g")
set(CMAKE_CXX_FLAGS_PROFGEN "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-generate -fprofile-correction")
set(CMAKE_CXX_FLAGS_PROFUSE "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-use -fprofile-correction")
set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS_RELEASE})

Просмотреть файл

@ -132,6 +132,9 @@ static void processPaths(
processPaths(sub.second, TransformPath, PATHS.count(key) > 0);
}
break;
default:
// it is OK
break;
}
}
}
@ -1132,8 +1135,7 @@ void ConfigParser::parseOptions(int argc, char** argv, bool doValidate) {
"(--config option)");
auto configDir = boost::filesystem::path{configPaths.front()}.parent_path();
for(const auto& configPath : configPaths)
ABORT_IF(boost::filesystem::path{configPaths.front()}.parent_path()
!= configDir,
ABORT_IF(boost::filesystem::path{configPath}.parent_path() != configDir,
"relative-paths option requires all config files to be in the "
"same directory");
processPaths(config_, [&](const std::string& nodePath) -> std::string {

Просмотреть файл

@ -41,7 +41,7 @@ public:
inline int& dim(int i) {
if(i >= 0) {
ABORT_IF(i >= size(),
ABORT_IF(i >= (int)size(),
"Index {} is out of bounds, shape has {} dimension",
i,
size());
@ -93,7 +93,7 @@ public:
for(int j = shape_.size() - 2; j >= 0; --j)
stride[j] = stride[j + 1] * shape_[j + 1];
for(int j = 0; j < d.size(); ++j)
for(size_t j = 0; j < d.size(); ++j)
d[j] = (i / stride[j]) % shape_[j];
}
@ -118,7 +118,7 @@ public:
std::string toString() const {
std::stringstream strm;
strm << "shape=" << (*this)[0];
for(int i = 1; i < size(); ++i)
for(size_t i = 1; i < size(); ++i)
strm << "x" << (*this)[i];
strm << " size=" << elements();
return strm.str();
@ -143,7 +143,7 @@ public:
}
static Shape broadcast(const std::vector<Shape>& shapes) {
int maxDims = 0;
size_t maxDims = 0;
for(auto& s : shapes)
if(s.size() > maxDims)
maxDims = s.size();
@ -152,7 +152,7 @@ public:
shape.resize(maxDims);
for(auto& s : shapes) {
for(int i = 0; i < s.size(); ++i) {
for(int i = 0; i < (int)s.size(); ++i) {
ABORT_IF(shape[-i] != s[-i] && shape[-i] != 1 && s[-i] != 1,
"Shapes {} and {} cannot be broadcasted",
(std::string)shape,
@ -170,7 +170,7 @@ public:
template <typename T>
static Shape broadcast(const std::vector<T>& nodes) {
int maxDims = 0;
size_t maxDims = 0;
for(auto& n : nodes)
if(n->shape().size() > maxDims)
maxDims = n->shape().size();
@ -180,7 +180,7 @@ public:
for(auto& node : nodes) {
const Shape& shapen = node->shape();
for(int i = 1; i <= shapen.size(); ++i) {
for(int i = 1; i <= (int)shapen.size(); ++i) {
ABORT_IF(shape[-i] != shapen[-i] && shape[-i] != 1 && shapen[-i] != 1,
"Shapes {} and {} cannot be broadcasted",
(std::string)shape,
@ -193,7 +193,7 @@ public:
size_t hash() const {
size_t seed = boost::hash<int>()(shape_[0]);
for(int i = 1; i < shape_.size(); ++i)
for(size_t i = 1; i < shape_.size(); ++i)
boost::hash_combine(seed, shape_[i]);
return seed;
}

Просмотреть файл

@ -77,8 +77,8 @@ private:
maxiBatch.reset(new sample_queue(cmpNone));
}
int maxBatchSize = options_->get<int>("mini-batch");
int maxSize = maxBatchSize * options_->get<int>("maxi-batch");
size_t maxBatchSize = options_->get<int>("mini-batch");
size_t maxSize = maxBatchSize * options_->get<int>("maxi-batch");
// consume data from corpus into maxi-batch (single sentences)
// sorted into specified order (due to queue)
@ -185,7 +185,7 @@ public:
currentBatch_ = bufferedBatches_.front();
if(loadReady_
&& bufferedBatches_.size()
&& (int)bufferedBatches_.size()
<= std::max(options_->get<int>("maxi-batch") / 5, 1)) {
{
std::unique_lock<std::mutex> lock(loadMutex_);
@ -239,7 +239,7 @@ public:
}
prepare(shuffle);
for(int i = 0; i < state->batchesEpoch; ++i)
for(size_t i = 0; i < state->batchesEpoch; ++i)
next();
return true;

Просмотреть файл

@ -18,7 +18,7 @@ private:
public:
size_t getBatchSize(const std::vector<size_t>& lengths) {
auto it = map_.lower_bound(lengths);
for(int i = 0; i < lengths.size(); ++i)
for(size_t i = 0; i < lengths.size(); ++i)
while(it != map_.end() && it->first[i] < lengths[i])
it++;
@ -28,7 +28,7 @@ public:
void add(Ptr<data::CorpusBatch> batch, size_t multiplier = 1) {
std::vector<size_t> lengths;
for(int i = 0; i < batch->sets(); ++i)
for(size_t i = 0; i < batch->sets(); ++i)
lengths.push_back((*batch)[i]->batchWidth());
size_t batchSize = batch->size() * multiplier;

Просмотреть файл

@ -57,7 +57,7 @@ public:
std::vector<Ptr<Vocab>>& getVocabs() { return vocabs_; }
batch_ptr toBatch(const std::vector<sample>& batchVector) {
int batchSize = batchVector.size();
size_t batchSize = batchVector.size();
std::vector<size_t> sentenceIds;
@ -73,14 +73,14 @@ public:
}
std::vector<Ptr<SubBatch>> subBatches;
for(int j = 0; j < maxDims.size(); ++j) {
for(size_t j = 0; j < maxDims.size(); ++j) {
subBatches.emplace_back(New<SubBatch>(batchSize, maxDims[j], vocabs_[j]));
}
std::vector<size_t> words(maxDims.size(), 0);
for(int i = 0; i < batchSize; ++i) {
for(int j = 0; j < maxDims.size(); ++j) {
for(int k = 0; k < batchVector[i][j].size(); ++k) {
for(size_t i = 0; i < batchSize; ++i) {
for(size_t j = 0; j < maxDims.size(); ++j) {
for(size_t k = 0; k < batchVector[i][j].size(); ++k) {
subBatches[j]->data()[k * batchSize + i] = batchVector[i][j][k];
subBatches[j]->mask()[k * batchSize + i] = 1.f;
words[j]++;

Просмотреть файл

@ -107,13 +107,14 @@ class SubBatch {
private:
std::vector<Word> indices_;
std::vector<float> mask_;
Ptr<Vocab> vocab_;
// ... TODO: add the length information (remember it)
size_t size_;
size_t width_;
size_t words_;
Ptr<Vocab> vocab_;
// ... TODO: add the length information (remember it)
public:
/**
* @brief Creates an empty subbatch of specified size.
@ -178,15 +179,15 @@ public:
size_t subSize = std::ceil(size_ / (float)n);
size_t restSize = size_;
int pos = 0;
for(int k = 0; k < n; ++k) {
size_t pos = 0;
for(size_t k = 0; k < n; ++k) {
size_t __size__ = std::min(subSize, restSize);
if(__size__ > 0) {
auto sb = New<SubBatch>(__size__, width_, vocab_);
size_t __words__ = 0;
for(int j = 0; j < width_; ++j) {
for(int i = 0; i < __size__; ++i) {
for(size_t j = 0; j < width_; ++j) {
for(size_t i = 0; i < __size__; ++i) {
sb->data()[j * __size__ + i] = indices_[j * size_ + pos + i];
sb->mask()[j * __size__ + i] = mask_[j * size_ + pos + i];
@ -367,7 +368,7 @@ public:
size_t pos = 0;
for(auto split : splits) {
std::vector<size_t> ids;
for(int i = pos; i < pos + split->size(); ++i)
for(size_t i = pos; i < pos + split->size(); ++i)
ids.push_back(sentenceIds_[i]);
split->setSentenceIds(ids);
pos += split->size();
@ -394,8 +395,8 @@ public:
// this needs to be split along the batch dimension
// which is here the innermost dimension.
// Should work for sentence-based weights, too.
for(int j = 0; j < width; ++j) {
for(int i = 0; i < split->size(); ++i) {
for(size_t j = 0; j < width; ++j) {
for(size_t i = 0; i < split->size(); ++i) {
ws[j * split->size() + i] = dataWeights_[j * oldSize + i + pos];
}
}

Просмотреть файл

@ -48,7 +48,7 @@ public:
std::vector<Ptr<Vocab>>& getVocabs() { return vocabs_; }
batch_ptr toBatch(const std::vector<sample>& batchVector) {
int batchSize = batchVector.size();
size_t batchSize = batchVector.size();
std::vector<size_t> sentenceIds;
@ -64,14 +64,14 @@ public:
}
std::vector<Ptr<SubBatch>> subBatches;
for(int j = 0; j < maxDims.size(); ++j) {
for(size_t j = 0; j < maxDims.size(); ++j) {
subBatches.emplace_back(New<SubBatch>(batchSize, maxDims[j], vocabs_[j]));
}
std::vector<size_t> words(maxDims.size(), 0);
for(int i = 0; i < batchSize; ++i) {
for(int j = 0; j < maxDims.size(); ++j) {
for(int k = 0; k < batchVector[i][j].size(); ++k) {
for(size_t i = 0; i < batchSize; ++i) {
for(size_t j = 0; j < maxDims.size(); ++j) {
for(size_t k = 0; k < batchVector[i][j].size(); ++k) {
subBatches[j]->data()[k * batchSize + i] = batchVector[i][j][k];
subBatches[j]->mask()[k * batchSize + i] = 1.f;
words[j]++;

Просмотреть файл

@ -59,7 +59,7 @@ void CorpusSQLite::fillSQLite() {
if(fill) {
std::string createStr = "create table lines (_id integer";
std::string insertStr = "insert into lines values (?";
for(int i = 0; i < files_.size(); ++i) {
for(size_t i = 0; i < files_.size(); ++i) {
createStr += ", line" + std::to_string(i) + " text";
insertStr += ", ?";
}
@ -79,7 +79,7 @@ void CorpusSQLite::fillSQLite() {
ps.bind(1, (int)lines);
std::string line;
for(int i = 0; i < files_.size(); ++i) {
for(size_t i = 0; i < files_.size(); ++i) {
cont = cont && GetLine((std::istream&)*files_[i], line);
if(cont)
ps.bind(i + 2, line);

Просмотреть файл

@ -67,7 +67,7 @@ public:
std::vector<Ptr<Vocab>>& getVocabs() { return vocabs_; }
batch_ptr toBatch(const std::vector<sample>& batchVector) {
int batchSize = batchVector.size();
size_t batchSize = batchVector.size();
std::vector<size_t> sentenceIds;
@ -83,14 +83,14 @@ public:
}
std::vector<Ptr<SubBatch>> subBatches;
for(int j = 0; j < maxDims.size(); ++j) {
for(size_t j = 0; j < maxDims.size(); ++j) {
subBatches.emplace_back(New<SubBatch>(batchSize, maxDims[j], vocabs_[j]));
}
std::vector<size_t> words(maxDims.size(), 0);
for(int i = 0; i < batchSize; ++i) {
for(int j = 0; j < maxDims.size(); ++j) {
for(int k = 0; k < batchVector[i][j].size(); ++k) {
for(size_t i = 0; i < batchSize; ++i) {
for(size_t j = 0; j < maxDims.size(); ++j) {
for(size_t k = 0; k < batchVector[i][j].size(); ++k) {
subBatches[j]->data()[k * batchSize + i] = batchVector[i][j][k];
subBatches[j]->mask()[k * batchSize + i] = 1.f;
words[j]++;

Просмотреть файл

@ -34,10 +34,9 @@ private:
class TextInput : public DatasetBase<SentenceTuple, TextIterator, CorpusBatch> {
private:
Ptr<Config> options_;
std::vector<UPtr<std::istringstream>> files_;
std::vector<Ptr<Vocab>> vocabs_;
Ptr<Config> options_;
size_t pos_{0};
@ -57,7 +56,7 @@ public:
// TODO: There are half dozen functions called toBatch(), which are very
// similar. Factor them.
batch_ptr toBatch(const std::vector<sample>& batchVector) {
int batchSize = batchVector.size();
size_t batchSize = batchVector.size();
std::vector<size_t> sentenceIds;
@ -73,14 +72,14 @@ public:
}
std::vector<Ptr<SubBatch>> subBatches;
for(int j = 0; j < maxDims.size(); ++j) {
for(size_t j = 0; j < maxDims.size(); ++j) {
subBatches.emplace_back(New<SubBatch>(batchSize, maxDims[j], vocabs_[j]));
}
std::vector<size_t> words(maxDims.size(), 0);
for(int i = 0; i < batchSize; ++i) {
for(int j = 0; j < maxDims.size(); ++j) {
for(int k = 0; k < batchVector[i][j].size(); ++k) {
for(size_t i = 0; i < batchSize; ++i) {
for(size_t j = 0; j < maxDims.size(); ++j) {
for(size_t k = 0; k < batchVector[i][j].size(); ++k) {
subBatches[j]->data()[k * batchSize + i] = batchVector[i][j][k];
subBatches[j]->mask()[k * batchSize + i] = 1.f;
words[j]++;

Просмотреть файл

@ -109,7 +109,7 @@ struct Loop {
float sum = 0;
functional::Array<int, K> acc;
for(int i = 0; i < length[N - n]; ++i) {
for(int j = 0; j < K; ++j) {
for(size_t j = 0; j < K; ++j) {
acc[j] = pAcc[j] + (dim[N - n] + i) * in[j].shape().bstride(N - n);
}
sum += Loop<n - 1, N, K>::result(functor, in, acc, length, dim);
@ -130,7 +130,7 @@ struct Loop<1, N, K> {
float sum = 0;
functional::Array<int, K> acc;
for(int i = 0; i < length[N - 1]; ++i) {
for(int j = 0; j < K; ++j) {
for(size_t j = 0; j < K; ++j) {
acc[j] = pAcc[j] + (dim[N - 1] + i) * in[j].shape().bstride(N - 1);
}
sum += apply<K>(functor, in, acc);

Просмотреть файл

@ -7,9 +7,7 @@
#include "3rd_party/exception.h"
#include "common/definitions.h"
/**
* @brief Parent namespace for the Marian project
*/
// Parent namespace for the Marian project
namespace marian {
#define NodeOp(op) [=]() { op; }
@ -19,8 +17,10 @@ class AutoTunerRecorder;
template <class DataType>
class Chainable;
/** @brief Defines a convenience type to represent a shared pointer to a
* Chainable<Tensor> object. */
/**
* A convenience type to represent a shared pointer to a Chainable<Tensor>
* object.
*/
typedef Ptr<Chainable<Tensor>> Expr;
typedef Weak<Chainable<Tensor>> WExpr;

Просмотреть файл

@ -119,12 +119,11 @@ private:
Ptr<Parameters> params_;
Ptr<Tensors> tensors_;
Ptr<Backend> backend_;
std::unordered_map<size_t, std::vector<Expr>> memoized_;
bool inferenceOnly_{false};
bool optimized_{false};
Ptr<Backend> backend_;
bool reloaded_{false};
std::string namespace_;
@ -439,7 +438,7 @@ public:
shape.set(1, it.second->shape[0]);
} else {
shape.resize(it.second->shape.size());
for(int i = 0; i < it.second->shape.size(); ++i)
for(size_t i = 0; i < it.second->shape.size(); ++i)
shape.set(i, it.second->shape[i]);
}

Просмотреть файл

@ -181,7 +181,7 @@ Expr atleast_nd(Expr a, size_t dims) {
Shape nShape;
nShape.resize(dims);
for(int i = 1; i <= a->shape().size(); ++i)
for(int i = 1; i <= (int)a->shape().size(); ++i)
nShape.set(-i, a->shape()[-i]);
return reshape(a, nShape);
@ -267,7 +267,7 @@ Expr affine(Expr a, Expr b, Expr bias, bool transA, bool transB, float scale) {
// lower precicion for shapes, reduces data sparsity
auto sh = [](Shape sh) {
for(int i = 0; i < sh.size(); ++i)
for(size_t i = 0; i < sh.size(); ++i)
sh.set(i, sh[i] / 4);
return sh;
};
@ -353,7 +353,7 @@ Expr affine(Expr a, Expr b, Expr bias, bool transA, bool transB, float scale) {
// swap the last two axes
Expr transpose(Expr a) {
std::vector<int> axes(a->shape().size());
for(int i = 0; i < axes.size(); ++i) {
for(size_t i = 0; i < axes.size(); ++i) {
axes[i] = i;
}
if(axes.size() > 1) {

Просмотреть файл

@ -163,7 +163,7 @@ struct NaryNodeOp : public Node {
Type value_type = Type::float32)
: Node(nodes.front()->graph(), shape, value_type) {
children_.resize(nodes.size());
for(int i = 0; i < nodes.size(); ++i)
for(size_t i = 0; i < nodes.size(); ++i)
children_[i] = nodes[i];
setTrainable(std::any_of(
@ -187,7 +187,7 @@ struct NaryNodeOp : public Node {
if(!hash_) {
std::size_t seed = boost::hash<std::string>()(name());
boost::hash_combine(seed, type());
for(int i = 0; i < children_.size(); ++i)
for(size_t i = 0; i < children_.size(); ++i)
boost::hash_combine(seed, child(i)->hash());
hash_ = seed;
}
@ -201,7 +201,7 @@ struct NaryNodeOp : public Node {
return false;
if(children().size() != node->children().size())
return false;
for(int i = 0; i < children().size(); ++i)
for(size_t i = 0; i < children().size(); ++i)
if(children()[i]->getId() != node->children()[i]->getId())
return false;
return true;

Просмотреть файл

@ -687,14 +687,14 @@ struct ConcatenateNodeOp : public NaryNodeOp {
void forward() {
std::vector<Tensor> concatenees;
for(int i = 0; i < children_.size(); ++i)
for(size_t i = 0; i < children_.size(); ++i)
concatenees.push_back(child(i)->val());
Concatenate(val_, concatenees, ax_);
}
void backward() {
std::vector<Tensor> deconcatenees;
for(int i = 0; i < children_.size(); ++i) {
for(size_t i = 0; i < children_.size(); ++i) {
auto childPtr = child(i);
childPtr
->set_zero_adjoint(); // @TODO: this is a hotfix, do this properly

Просмотреть файл

@ -227,7 +227,7 @@ struct TanhNodeOp : public NaryNodeOp {
child(0)->val(),
child(1)->val(),
child(2)->val());
for(int i = 3; i < children_.size(); ++i)
for(size_t i = 3; i < children_.size(); ++i)
Element(_1 = _1 + _2, val_, child(i)->val());
Element(_1 = tanh(_1), val_);)
};
@ -237,7 +237,7 @@ struct TanhNodeOp : public NaryNodeOp {
NodeOps backwardOps() {
using namespace functional;
NodeOps ops;
for(int i = 0; i < children_.size(); i++) {
for(size_t i = 0; i < children_.size(); i++) {
ops.push_back(
NodeOp(Add(_1 * (1.0f - (_2 * _2)), child(i)->grad(), adj_, val_)));
}
@ -828,7 +828,7 @@ struct TransposeNodeOp : public UnaryNodeOp {
ABORT_IF(shape.size() != axes.size(),
"Shape and transpose axes have different number of dimensions");
for(int i = 0; i < shape.size(); ++i)
for(size_t i = 0; i < shape.size(); ++i)
shape.set(i, a->shape()[axes[i]]);
return shape;

Просмотреть файл

@ -117,7 +117,7 @@ public:
else
output = layers_[0]->apply(av);
for(int i = 1; i < layers_.size(); ++i)
for(size_t i = 1; i < layers_.size(); ++i)
output = layers_[i]->apply(output);
return output;

Просмотреть файл

@ -5,6 +5,7 @@ namespace marian {
Ptr<WeightingBase> WeightingFactory(Ptr<Options> options) {
if(options->has("data-weighting"))
return New<DataWeighting>(options->get<std::string>("data-weighting-type"));
return nullptr;
}
Expr DataWeighting::getWeights(Ptr<ExpressionGraph> graph,

Просмотреть файл

@ -49,16 +49,11 @@ public:
auto state = encdec->stepAll(graph, corpusBatch, clearGraph);
float ls = inference_ ? 0.f : options_->get<float>("label-smoothing");
Expr weights;
Expr cost;
bool sentenceWeighting = false;
if(toBeWeighted_) {
if(toBeWeighted_)
weights = weighter_->getWeights(graph, corpusBatch);
}
Expr cost;
cost = loss_->getCost(state->getProbs(),
state->getTargetIndices(),
state->getTargetMask(),

Просмотреть файл

@ -49,7 +49,7 @@ public:
virtual void blacklist(Expr totalCosts, Ptr<data::CorpusBatch> batch) {
auto attentionIdx = getAttentionIndices();
int dimVoc = totalCosts->shape()[-1];
for(int i = 0; i < attentionIdx.size(); i++) {
for(size_t i = 0; i < attentionIdx.size(); i++) {
if(batch->front()->data()[attentionIdx[i]] != 0) {
totalCosts->val()->set(
i * dimVoc + DEFAULT_EOS_ID, // this is checked at vocab-load time
@ -167,7 +167,7 @@ public:
auto attCell = rnn::stacked_cell(graph) //
.push_back(rnn::cell(graph) //
("prefix", prefix_ + "_cell1"));
for(int i = 0; i < state->getEncoderStates().size(); ++i) {
for(size_t i = 0; i < state->getEncoderStates().size(); ++i) {
std::string prefix = prefix_;
if(state->getEncoderStates().size() > 1)
prefix += "_att" + std::to_string(i + 1);
@ -185,7 +185,7 @@ public:
rnn.push_back(rnn::cell(graph)("prefix", prefix_));
}
for(int i = 0; i < decoderLayers - 1; ++i)
for(size_t i = 0; i < decoderLayers - 1; ++i)
rnn.push_back(rnn::cell(graph) //
("prefix", prefix_ + "_l" + std::to_string(i)));
@ -209,7 +209,7 @@ public:
Expr logits;
if(type == "hard-soft-att") {
std::vector<Expr> alignedContexts;
for(int k = 0; k < state->getEncoderStates().size(); ++k) {
for(size_t k = 0; k < state->getEncoderStates().size(); ++k) {
// retrieve all the aligned contexts computed by the attention mechanism
auto att = rnn_->at(0)
->as<rnn::StackedCell>()
@ -279,7 +279,7 @@ public:
auto stateHardAtt = std::dynamic_pointer_cast<DecoderStateHardAtt>(state);
int dimSrcWords = state->getEncoderStates()[0]->getContext()->shape()[-3];
size_t dimSrcWords = state->getEncoderStates()[0]->getContext()->shape()[-3];
if(embIdx.empty()) {
stateHardAtt->setAttentionIndices({0});

Просмотреть файл

@ -200,7 +200,7 @@ private:
// setting up conditional (transitional) cell
auto baseCell = rnn::stacked_cell(graph);
for(int i = 1; i <= decoderBaseDepth; ++i) {
for(size_t i = 1; i <= decoderBaseDepth; ++i) {
bool transition = (i > 2);
auto paramPrefix = prefix_ + "_cell" + std::to_string(i);
baseCell.push_back(rnn::cell(graph) //
@ -208,7 +208,7 @@ private:
("final", i > 1) //
("transition", transition));
if(i == 1) {
for(int k = 0; k < state->getEncoderStates().size(); ++k) {
for(size_t k = 0; k < state->getEncoderStates().size(); ++k) {
auto attPrefix = prefix_;
if(state->getEncoderStates().size() > 1)
attPrefix += "_att" + std::to_string(k + 1);
@ -224,11 +224,11 @@ private:
rnn.push_back(baseCell);
// Add more cells to RNN (stacked RNN)
for(int i = 2; i <= decoderLayers; ++i) {
for(size_t i = 2; i <= decoderLayers; ++i) {
// deep transition
auto highCell = rnn::stacked_cell(graph);
for(int j = 1; j <= decoderHighDepth; j++) {
for(size_t j = 1; j <= decoderHighDepth; j++) {
auto paramPrefix
= prefix_ + "_l" + std::to_string(i) + "_cell" + std::to_string(j);
highCell.push_back(rnn::cell(graph)("prefix", paramPrefix));
@ -309,7 +309,7 @@ public:
rnn::States decoderStates = rnn_->lastCellStates();
std::vector<Expr> alignedContexts;
for(int k = 0; k < state->getEncoderStates().size(); ++k) {
for(size_t k = 0; k < state->getEncoderStates().size(); ++k) {
// retrieve all the aligned contexts computed by the attention mechanism
auto att = rnn_->at(0)
->as<rnn::StackedCell>()

Просмотреть файл

@ -28,16 +28,15 @@ public:
class DecoderState {
protected:
rnn::States states_;
Expr probs_;
std::vector<Ptr<EncoderState>> encStates_;
Ptr<data::CorpusBatch> batch_;
Expr targetEmbeddings_;
Expr targetMask_;
Expr targetIndices_;
Expr probs_;
rnn::States states_;
Ptr<data::CorpusBatch> batch_;
// Keep track of current target token position during translation
size_t position_{0};

Просмотреть файл

@ -699,7 +699,7 @@ public:
int dimSrcWords = encoderContext->shape()[-2];
int dims = encoderMask->shape().size();
//int dims = encoderMask->shape().size();
encoderMask = atleast_nd(encoderMask, 4);
encoderMask = reshape(transposeTimeBatch(encoderMask),
{1, dimBatch, 1, dimSrcWords});
@ -748,7 +748,7 @@ public:
// Iterate over multiple encoders and simply stack the attention blocks
if(encoderContexts.size() > 0) {
// multiple encoders are applied one after another
for(int j = 0; j < encoderContexts.size(); ++j) {
for(size_t j = 0; j < encoderContexts.size(); ++j) {
std::string prefix = prefix_ + "_l" + std::to_string(i) + "_context";
if(j > 0)
prefix += "_enc" + std::to_string(j + 1);
@ -775,7 +775,7 @@ public:
// [-4: beam depth=1, -3: max length, -2: batch size, -1: vocab dim]
Expr logits = output_->apply(decoderContext);
int dimTrgVoc = opt<std::vector<int>>("dim-vocabs")[batchIndex_];
//int dimTrgVoc = opt<std::vector<int>>("dim-vocabs")[batchIndex_];
// return unormalized(!) probabilities
auto nextState = New<TransformerState>(

Просмотреть файл

@ -68,7 +68,7 @@ public:
models_.resize(graphs_.size());
ThreadPool pool(graphs_.size(), graphs_.size());
for(int i = 0; i < graphs_.size(); ++i) {
for(size_t i = 0; i < graphs_.size(); ++i) {
pool.enqueue(
[=](int j) {
models_[j] = New<Model>(temp);

Просмотреть файл

@ -119,12 +119,14 @@ public:
recState = dropout(recState, dropMaskState_);
auto mappedState = dot(recState, Wa_);
if(layerNorm_)
if(nematusNorm_)
if(layerNorm_) {
if(nematusNorm_) {
mappedState = layerNorm(
mappedState, W_comb_att_lns_, W_comb_att_lnb_, NEMATUS_LN_EPS);
else
} else {
mappedState = layerNorm(mappedState, gammaState_);
}
}
auto attReduce = attOps(va_, mappedContext_, mappedState);

Просмотреть файл

@ -14,7 +14,7 @@ struct GRUFastNodeOp : public NaryNodeOp {
NodeOps forwardOps() {
std::vector<Tensor> inputs;
for(int i = 0; i < children_.size(); ++i)
for(size_t i = 0; i < children_.size(); ++i)
inputs.push_back(child(i)->val());
return {NodeOp(GRUFastForward(val_, inputs, final_))};
@ -56,7 +56,7 @@ struct LSTMCellNodeOp : public NaryNodeOp {
NodeOps forwardOps() {
std::vector<Tensor> inputs;
for(int i = 0; i < children_.size(); ++i)
for(size_t i = 0; i < children_.size(); ++i)
inputs.push_back(child(i)->val());
return {NodeOp(LSTMCellForward(val_, inputs))};
@ -92,7 +92,7 @@ struct LSTMOutputNodeOp : public NaryNodeOp {
NodeOps forwardOps() {
std::vector<Tensor> inputs;
for(int i = 0; i < children_.size(); ++i)
for(size_t i = 0; i < children_.size(); ++i)
inputs.push_back(child(i)->val());
return {NodeOp(LSTMOutputForward(val_, inputs))};

Просмотреть файл

@ -98,7 +98,7 @@ public:
int lastDimInput = options_->get<int>("dimInput");
for(int i = 0; i < stackableFactories_.size(); ++i) {
for(size_t i = 0; i < stackableFactories_.size(); ++i) {
auto sf = stackableFactories_[i];
if(sf->is<CellFactory>()) {
@ -142,7 +142,7 @@ public:
Ptr<RNN> construct() {
auto rnn = New<RNN>(graph_, options_);
for(int i = 0; i < layerFactories_.size(); ++i) {
for(size_t i = 0; i < layerFactories_.size(); ++i) {
auto lf = layerFactories_[i];
lf->getOptions()->merge(options_);

Просмотреть файл

@ -161,7 +161,7 @@ public:
Expr output;
Expr layerInput = input;
for(int i = 0; i < rnns_.size(); ++i) {
for(size_t i = 0; i < rnns_.size(); ++i) {
auto lazyInput = layerInput;
auto cell = rnns_[i]->at(0);
@ -188,7 +188,7 @@ public:
Expr output;
Expr layerInput = input;
for(int i = 0; i < rnns_.size(); ++i) {
for(size_t i = 0; i < rnns_.size(); ++i) {
Expr lazyInput;
auto cell = rnns_[i]->at(0);
auto lazyInputs = cell->getLazyInputs(shared_from_this());
@ -217,7 +217,7 @@ public:
Expr output;
Expr layerInput = input;
for(int i = 0; i < rnns_.size(); ++i) {
for(size_t i = 0; i < rnns_.size(); ++i) {
auto lazyInput = layerInput;
auto cell = rnns_[i]->at(0);

Просмотреть файл

@ -221,7 +221,7 @@ public:
= stackables_[0]->as<Cell>()->applyState(mappedInputs, state, mask);
;
for(int i = 1; i < stackables_.size(); ++i) {
for(size_t i = 1; i < stackables_.size(); ++i) {
if(stackables_[i]->is<Cell>()) {
auto hiddenNext
= stackables_[i]->as<Cell>()->apply(lastInputs_, hidden, mask);

Просмотреть файл

@ -84,6 +84,7 @@ private:
size_t available_{0};
size_t step_{128 * 1024 * 1024};
size_t alignment_{256};
bool throw_{false};
std::set<Gap> gaps_;
@ -161,8 +162,8 @@ public:
size_t step,
size_t alignment = 256)
: device_(DispatchDevice(deviceId, alignment)),
step_(step),
available_(0),
step_(step),
alignment_(alignment) {
reserve(bytes);
}

Просмотреть файл

@ -23,12 +23,12 @@ void gAddGeneric(Functor functor,
float scale = 1.0) {
int outLength = out.shape().elements();
bool same = outLength == full.elements();
for(int i = 0; i < K; ++i)
for(size_t i = 0; i < K; ++i)
same = same && outLength == ins[i].shape().elements();
constexpr size_t N = functional::Shape::size();
functional::Array<int, N> len;
for(int i = 0; i < N; ++i)
for(size_t i = 0; i < N; ++i)
len[i] = full[i] / out.shape()[i];
functional::Array<int, N> dims;
@ -75,7 +75,7 @@ void gAddReduce(Functor functor,
int cols = full.back();
bool same = true;
for(int i = 0; i < K; ++i)
for(size_t i = 0; i < K; ++i)
same = same && ins[i].shape().elements() == full.elements();
for(int j = 0; j < rows; ++j) {
@ -88,7 +88,7 @@ void gAddReduce(Functor functor,
for(int id = 0; id < cols; ++id) {
full.dims(j * cols + id, dims);
functional::Array<int, K> indices;
for(int i = 0; i < K; ++i)
for(size_t i = 0; i < K; ++i)
indices[i] = ins[i].shape().bindex(dims);
sum += functional::apply(functor, ins, indices);
}
@ -114,7 +114,7 @@ void Add(Functor functor, float scale, marian::Tensor out, Tensors... tensors) {
cpu::gAddReduce(functor, full, gOut, gIns, scale);
} else if(out->shape() == full) {
bool broadcast = false;
for(int i = 0; i < K; ++i)
for(size_t i = 0; i < K; ++i)
broadcast = broadcast || gOut.shape() != gIns[i].shape();
cpu::gAddEqual(functor, gOut, gIns, scale, broadcast);
} else {

Просмотреть файл

@ -34,7 +34,7 @@ struct E {
// increase index for current dimension by stride or 0 if broadcasting.
// bstride(i) is look-up value, either equal to stride if the
// corresponding dim is larger 1 or 0 if the dim is 1.
for(int k = 0; k < K; ++k)
for(size_t k = 0; k < K; ++k)
indices[k] += tensors[k].shape().bstride(I);
}
}

Просмотреть файл

@ -52,10 +52,10 @@ inline void gInsertCols(float* out,
size_t offset_out,
size_t offset_in,
float beta) {
for(int j = 0; j < rows; ++j) {
for(size_t j = 0; j < rows; ++j) {
float* rowOut = out + j * cols_out + offset_out;
const float* rowIn = in + j * cols_in + offset_in;
for(int i = 0; i < cols; ++i) {
for(size_t i = 0; i < cols; ++i) {
rowOut[i] = rowIn[i] + beta * rowOut[i];
}
}
@ -85,7 +85,7 @@ void Concatenate1(Tensor out, const std::vector<Tensor>& inputs) {
}
void Concatenate(Tensor out, const std::vector<Tensor>& inputs, int ax) {
if(ax == out->shape().size() - 1)
if(ax == (int)out->shape().size() - 1)
Concatenate1(out, inputs);
else
ConcatCont(out, inputs, ax);
@ -144,7 +144,7 @@ void SplitCont(std::vector<Tensor>& outputs, const Tensor in, int axis) {
}
void Deconcatenate(std::vector<Tensor>& outputs, const Tensor in, int ax) {
if(ax == in->shape().size() - 1)
if(ax == (int)in->shape().size() - 1)
Split1(outputs, in);
else
SplitCont(outputs, in, ax);
@ -228,8 +228,8 @@ template <bool add>
void TransposeGeneric(Tensor out, Tensor in, const std::vector<int>& vAxis) {
functional::Array<int, functional::Shape::size()> permute;
int diff = functional::Shape::size() - vAxis.size();
for(int i = 0; i < permute.size(); ++i)
if(i < diff)
for(size_t i = 0; i < permute.size(); ++i)
if((int)i < diff)
permute[i] = i;
else
permute[i] = vAxis[i - diff] + diff;
@ -244,7 +244,7 @@ void TransposeGeneric(Tensor out, Tensor in, const std::vector<int>& vAxis) {
for(int index = 0; index < length; ++index) {
gOut.shape().dims(index, oDims);
for(int i = 0; i < N; ++i)
for(size_t i = 0; i < N; ++i)
pDims[permute[i]] = oDims[i];
if(add)
gOut[index] += gIn[pDims];
@ -339,17 +339,17 @@ void SoftmaxGrad(Tensor grad_, Tensor adj_, Tensor val_) {
const float* adj = adj_->data();
const float* val = val_->data();
for(size_t j = 0; j < rows; ++j) {
for(int j = 0; j < rows; ++j) {
float* gradRow = grad + j * cols;
const float* adjRow = adj + j * cols;
const float* valRow = val + j * cols;
float sum = 0.f;
for(size_t i = 0; i < cols; ++i) {
for(int i = 0; i < cols; ++i) {
sum += valRow[i] * adjRow[i];
}
for(size_t i = 0; i < cols; ++i) {
for(int i = 0; i < cols; ++i) {
gradRow[i] += valRow[i] * (adjRow[i] - sum);
}
}
@ -389,7 +389,7 @@ void CopyRows(Tensor out_,
const float* in = in_->data();
#pragma omp parallel for
for(int j = 0; j < rows; ++j) {
for(size_t j = 0; j < rows; ++j) {
size_t dst = j;
size_t src = indices[j];
@ -409,14 +409,14 @@ void PasteRows(Tensor out_,
float* out = out_->data();
const float* in = in_->data();
for(int j = 0; j < rows; ++j) {
for(size_t j = 0; j < rows; ++j) {
size_t dst = indices[j]; // not a permutation - may alias, unlike PasteCols
size_t src = j;
float* rowOut = out + dst * cols;
const float* rowIn = in + src * cols;
for(int i = 0; i < cols; ++i) {
for(size_t i = 0; i < cols; ++i) {
rowOut[i] += rowIn[i];
}
}
@ -433,11 +433,11 @@ void CopyCols(Tensor out_,
const float* in = in_->data();
#pragma omp parallel for
for(int j = 0; j < rows; ++j) {
for(size_t j = 0; j < rows; ++j) {
const float* rowIn = in + j * colsIn;
float* rowOut = out + j * colsOut;
for(int i = 0; i < colsOut; ++i) {
for(size_t i = 0; i < colsOut; ++i) {
rowOut[i] = rowIn[indices[i]];
}
}
@ -456,11 +456,11 @@ void PasteCols(Tensor out_,
/* n.b. Unlike PasteRows, currently appears safe to assume indices[i] is a
* permutation i.e. no racy aliases, and no need to sum vs. just assign.
*/
for(int j = 0; j < rows; ++j) {
for(size_t j = 0; j < rows; ++j) {
const float* rowIn = in + j * colsIn;
float* rowOut = out + j * colsOut;
for(int i = 0; i < colsIn; ++i) {
for(size_t i = 0; i < colsIn; ++i) {
rowOut[indices[i]] += rowIn[i];
}
}
@ -606,18 +606,19 @@ void GRUFastBackward(std::vector<Tensor> outputs,
rowOutXW[l] += dfdxW_x;
if(outSU)
rowOutSU[l] += dfdxW_x * r;
if(outB)
if(outB) {
if(final)
outB[l] += dfdxW_x * r;
else
outB[l] += dfdxW_x;
}
}
}
}
void CrossEntropyPick(Tensor out_, Tensor in_, Tensor pick_) {
float* out = out_->data();
Shape& outShape = out_->shape();
//Shape& outShape = out_->shape();
const float* in = in_->data();
Shape& inShape = in_->shape();
float* pick = pick_->data();
@ -709,14 +710,14 @@ void Att(Tensor out_, Tensor va_, Tensor context_, Tensor state_) {
int cols = k;
#pragma omp parallel for
for(size_t j = 0; j < rows; ++j) {
for(int j = 0; j < rows; ++j) {
const float* vaRow = va;
const float* ctxRow = ctx + (j % (b * t)) * cols;
const float* stateRow = state + ((j / (b * t)) * b + j % b) * cols;
float sum = 0.f;
#pragma omp simd reduction(+ : sum)
for(size_t i = 0; i < cols; ++i) {
for(int i = 0; i < cols; ++i) {
float z = ctxRow[i] + stateRow[i];
sum += std::tanh(z) * vaRow[i];
}
@ -930,7 +931,7 @@ void Shift(Tensor out_,
float padValue,
bool invert) {
int offset = 0;
for(int i = 0; i < shift.size(); ++i)
for(size_t i = 0; i < shift.size(); ++i)
offset += in_->shape().stride(i) * shift[i];
if(invert)
@ -953,7 +954,7 @@ void Shift(Tensor out_,
void ShiftGrad(Tensor out_, Tensor in_, marian::Shape shift, bool invert) {
int offset = 0;
for(int i = 0; i < shift.size(); ++i)
for(size_t i = 0; i < shift.size(); ++i)
offset += in_->shape().stride(i) * shift[i];
if(invert)

Просмотреть файл

@ -21,10 +21,9 @@ namespace marian {
class TensorBase : public std::enable_shared_from_this<TensorBase> {
private:
Type type_{Type::float32};
Shape shape_;
Ptr<MemoryPiece> memory_;
Shape shape_;
Type type_{Type::float32};
Ptr<Backend> backend_;
public:
@ -221,7 +220,7 @@ public:
type_);
if(backend_->getDevice().type == DeviceType::cpu) {
for(int i = 0; i < k.size(); ++i)
for(size_t i = 0; i < k.size(); ++i)
data()[k[i]] = v[i];
}
#ifdef CUDA_FOUND
@ -270,18 +269,18 @@ public:
std::vector<T> values(totSize);
get(values);
size_t dispCols = 5;
int dispCols = 5;
if(isFloat(type_))
strm << std::fixed << std::setprecision(8) << std::setfill(' ');
else
strm << std::fixed << std::setprecision(0) << std::setfill(' ');
for(int i = 0; i < values.size(); ++i) {
for(size_t i = 0; i < values.size(); ++i) {
std::vector<int> dims;
shape().dims(i, dims);
bool disp = true;
for(int j = 0; j < dims.size(); ++j)
for(size_t j = 0; j < dims.size(); ++j)
disp = disp && (dims[j] < dispCols || dims[j] >= shape()[j] - dispCols);
if(disp) {
@ -320,14 +319,14 @@ public:
bool prev = true;
for(int j = dims.size() - 1; j >= 0; --j) {
if(j < dims.size() - 1)
if(j < (int)dims.size() - 1)
prev = prev && dims[j + 1] + 1 == shape()[j + 1];
if(prev && dims[j] + 1 == dispCols && shape()[j] > 2 * dispCols) {
if(j < dims.size() - 1)
if(j < (int)dims.size() - 1)
for(int k = 0; k <= j; ++k)
strm << " ";
strm << "... ";
if(j < dims.size() - 1)
if(j < (int)dims.size() - 1)
strm << std::endl;
break;
}

Просмотреть файл

@ -23,7 +23,7 @@ public:
int pos = 0;
std::vector<std::thread> group;
// iterate over all shards
for(int idx = 0; idx < graphs_.size(); ++idx) {
for(size_t idx = 0; idx < graphs_.size(); ++idx) {
int size = std::min(shardSize, totalSize);
group.emplace_back(func, idx, pos);
@ -160,7 +160,7 @@ public:
auto gather = [this, params](size_t idx, int pos) {
// copy parameter shard to each graph, apart from last graph
for(int i = 0; i < graphs_.size() - 1; ++i) {
for(int i = 0; i < (int)graphs_.size() - 1; ++i) {
auto subParam
= graphs_[i]->params()->vals()->subtensor(pos, params[idx]->size());
subParam->copyFrom(params[idx]);

Просмотреть файл

@ -21,7 +21,7 @@ void AsyncGraphGroup::fetchParams(Tensor oldParams,
int pos = 0;
std::vector<std::thread> threads;
for(int idx = 0; idx < devices_.size(); idx++) {
for(size_t idx = 0; idx < devices_.size(); idx++) {
threads.emplace_back(std::thread(
[&](int idx, int pos) {
// individual mutex per-shard
@ -44,7 +44,7 @@ void AsyncGraphGroup::pushGradients(Tensor newGrads,
// add instead of copy?
std::vector<std::thread> threads;
int pos = 0;
for(int idx = 0; idx < devices_.size(); idx++) {
for(size_t idx = 0; idx < devices_.size(); idx++) {
threads.emplace_back(std::thread(
[&](int idx, int pos) {
// individual mutex per-shard

Просмотреть файл

@ -132,8 +132,8 @@ public:
}
void save(Ptr<ExpressionGraph> graph, bool final = false) {
int idx = 0;
for(int i = 0; i < graphs_.size(); ++i) {
size_t idx = 0;
for(size_t i = 0; i < graphs_.size(); ++i) {
if(graph == graphs_[i]) {
idx = i;
break;

Просмотреть файл

@ -99,7 +99,7 @@ void MultiNodeGraphGroup::setupClients(Ptr<data::Batch> batch) {
* batch.
*/
void MultiNodeGraphGroup::runBatchThroughClientGraphs(Ptr<data::Batch> batch) {
for(int i = 0; i < devices_.size(); i++) {
for(size_t i = 0; i < devices_.size(); i++) {
THREAD_GUARD(clientBuilders_[i]->build(clientGraphs_[i], batch);
clientGraphs_[i]->forward();
clientGraphs_[i]->getBackend()->synchronize(););
@ -130,7 +130,7 @@ void MultiNodeGraphGroup::calculateNodeSizes() {
void MultiNodeGraphGroup::initClientCpuBuffers() {
// Initialize CPU buffers used to send GPU data through MPI (can't send
// directly from GPUs)
for(int i = 0; i < devices_.size(); i++) {
for(size_t i = 0; i < devices_.size(); i++) {
// @TODO Optimization: Use full size to copy in one go, then send gradients
// and receive parameters in parallel
size_t size = nodeSizes_[mpi_my_rank_];
@ -163,7 +163,7 @@ void MultiNodeGraphGroup::initClientCommOverlapVars() {
*/
void MultiNodeGraphGroup::initClientCommOverlapGpuTensors() {
size_t modelSize = clientGraphs_[0]->params()->vals()->size();
for(int client = 0; client < devices_.size(); client++) {
for(size_t client = 0; client < devices_.size(); client++) {
// Communication overlap buffer (for grads + params)
Tensor commOverlapBuffer
= newTensor(modelSize, clientGraphs_[client]->getBackend());
@ -193,7 +193,7 @@ void MultiNodeGraphGroup::setupServerShards() {
// CPU buffer for receiving/sending grads/params
serverShardBufferCPU_ = std::vector<float>(nodeSizes_[mpi_my_rank_]);
// Shard optimizers
for(int shard = 0; shard < devices_.size(); shard++) {
for(size_t shard = 0; shard < devices_.size(); shard++) {
shardOptimizers_.push_back(Optimizer(options_));
}
// Mutexes to prevent simultaneous access to tensors and/or optimizers
@ -208,7 +208,7 @@ void MultiNodeGraphGroup::setupServerShards() {
void MultiNodeGraphGroup::calculateShardSizes() {
size_t nodeSize = nodeSizes_[mpi_my_rank_];
size_t shardSize = ceilf(((float)nodeSize) / devices_.size());
for(int shard = 0; shard < devices_.size(); shard++) {
for(size_t shard = 0; shard < devices_.size(); shard++) {
size_t remainingNodeSize = nodeSize - (shardSize * shard);
// Takes care of edge case where last shard is smaller than the others
shardSizes_.push_back(std::min(shardSize, remainingNodeSize));
@ -224,7 +224,7 @@ void MultiNodeGraphGroup::initShardGpuTensors() {
for(int i = 0; i < mpi_my_rank_; i++) {
offset += nodeSizes_[i];
}
for(int shard = 0; shard < devices_.size(); shard++) {
for(size_t shard = 0; shard < devices_.size(); shard++) {
Tensor gpuParams
= newTensor(shardSizes_[shard], clientGraphs_[shard]->getBackend());
gpuParams->copyFrom(clientGraphs_[0]->params()->vals()->subtensor(
@ -379,7 +379,7 @@ void MultiNodeGraphGroup::launchCommOverlapThreads() {
*/
void MultiNodeGraphGroup::shutDownCommOverlapThreads() {
stopClientCommThreads_ = true;
for(int gpu = 0; gpu < devices_.size(); gpu++) {
for(size_t gpu = 0; gpu < devices_.size(); gpu++) {
clientCommOverlapBuffersFilled_[gpu] = true;
cvClientCommOverlapBuffersFilled_[gpu]
.notify_one(); // Unblock thread from lock, then join it

Просмотреть файл

@ -382,7 +382,9 @@ protected:
* number of GPUs on the other nodes.
*/
void loadDeviceConfig(std::vector<size_t> deviceConfig) {
size_t index = 0, node = 0, nClientsSeen = 0;
size_t index = 0;
int node = 0;
int nClientsSeen = 0;
numberClientsOfNodes_ = std::vector<int>(mpi_comm_world_size_, 0);
while(index < deviceConfig.size()) {
if(numberClientsOfNodes_[node] == 0) {
@ -407,9 +409,8 @@ public:
*/
MultiNodeGraphGroup(Ptr<Config> options)
: GraphGroup(options),
tau_{options_->get<size_t>("optimizer-delay")},
// useLocalOpt_{options_->get<bool>("multi-node-local-optimizers")},
clientCommOverlap{options_->get<bool>("multi-node-overlap")} {
clientCommOverlap{options_->get<bool>("multi-node-overlap")},
tau_{options_->get<size_t>("optimizer-delay")} {
// Set up devices for this node
setupMPI(); // Setup MPI before creating device vectors
std::vector<size_t> devices;
@ -448,8 +449,8 @@ public:
*/
void update(Ptr<data::Batch> batch) {
ABORT_IF(finalized_, "Training has already finished.");
if(batchIter_ % mpi_comm_world_size_
== mpi_my_rank_) { // Only take batch assigned to this node
// Only take batch assigned to this node
if(batchIter_ % mpi_comm_world_size_ == (size_t)mpi_my_rank_) {
execute(batch);
}
batchIter_++;
@ -489,8 +490,8 @@ public:
* Save model of given graph to disk.
*/
void save(Ptr<ExpressionGraph> graph, bool final = false) {
int idx = 0;
for(int i = 0; i < clientGraphs_.size(); ++i) {
size_t idx = 0;
for(size_t i = 0; i < clientGraphs_.size(); ++i) {
if(graph == clientGraphs_[i]) {
idx = i;
break;

Просмотреть файл

@ -101,10 +101,10 @@ void SyncGraphGroup::execute(Ptr<data::Batch> batch) {
std::vector<std::vector<Ptr<data::Batch>>> delayedBatches;
for(int i = 0; i < delay_; ++i) {
for(size_t i = 0; i < delay_; ++i) {
if(i * devs < batches.size()) {
delayedBatches.emplace_back();
for(int j = 0; j < devs; ++j) {
for(size_t j = 0; j < devs; ++j) {
size_t index = i * devs + j;
if(index < batches.size())
delayedBatches.back().push_back(batches[i * devs + j]);
@ -249,8 +249,8 @@ void SyncGraphGroup::save(bool final) {
}
void SyncGraphGroup::save(Ptr<ExpressionGraph> graph, bool final) {
int idx = 0;
for(int i = 0; i < graphs_.size(); ++i) {
size_t idx = 0;
for(size_t i = 0; i < graphs_.size(); ++i) {
if(graph == graphs_[i]) {
idx = i;
break;

Просмотреть файл

@ -52,17 +52,17 @@ public:
bool keepGoing() {
// stop if it reached the maximum number of epochs
int stopAfterEpochs = options_->get<size_t>("after-epochs");
size_t stopAfterEpochs = options_->get<size_t>("after-epochs");
if(stopAfterEpochs > 0 && state_->epochs > stopAfterEpochs)
return false;
// stop if it reached the maximum number of batch updates
int stopAfterBatches = options_->get<size_t>("after-batches");
size_t stopAfterBatches = options_->get<size_t>("after-batches");
if(stopAfterBatches > 0 && state_->batches >= stopAfterBatches)
return false;
// stop if the first validator did not improve for a given number of checks
int stopAfterStalled = options_->get<size_t>("early-stopping");
size_t stopAfterStalled = options_->get<size_t>("early-stopping");
if(stopAfterStalled > 0 && !validators_.empty()
&& stalled() >= stopAfterStalled)
return false;
@ -313,20 +313,20 @@ public:
if(strategy == "epoch" || strategy == "epoch+batches"
|| strategy == "epoch+stalled") {
int startEpoch
size_t startEpoch
= options_->get<std::vector<size_t>>("lr-decay-start").front();
if(startEpoch && state.epochs >= startEpoch)
decay = true;
}
if(strategy == "epoch+batches") {
int startBatches
size_t startBatches
= options_->get<std::vector<size_t>>("lr-decay-start")[1];
if(startBatches && state.batches >= startBatches)
decay = true;
}
if(strategy == "epoch+stalled") {
int startStalled
size_t startStalled
= options_->get<std::vector<size_t>>("lr-decay-start")[1];
if(startStalled && state.maxStalled >= startStalled)
decay = true;
@ -361,7 +361,7 @@ public:
if(factor > 0.0) {
if("batches" == options_->get<std::string>("lr-decay-strategy")) {
int start
size_t start
= options_->get<std::vector<size_t>>("lr-decay-start").front();
int freq = options_->get<size_t>("lr-decay-freq");

Просмотреть файл

@ -92,7 +92,7 @@ public:
observer->actAfterBatches(*this);
}
void newStalled(int num) {
void newStalled(size_t num) {
stalled = num;
if(num > maxStalled)
++maxStalled;

Просмотреть файл

@ -58,7 +58,7 @@ public:
Validator(std::vector<Ptr<Vocab>> vocabs,
Ptr<Config> options,
bool lowerIsBetter = true)
: ValidatorBase(lowerIsBetter), options_(options), vocabs_(vocabs) {}
: ValidatorBase(lowerIsBetter), vocabs_(vocabs), options_(options) {}
virtual float validate(const std::vector<Ptr<ExpressionGraph>>& graphs) {
using namespace data;
@ -508,7 +508,7 @@ protected:
size_t width = subBatch->batchWidth();
Words ref; // fill ref
for(int i = 0; i < width; ++i) {
for(size_t i = 0; i < width; ++i) {
Word w = subBatch->data()[i * size + no];
if(w == eos)
break;

Просмотреть файл

@ -46,7 +46,7 @@ public:
// Use alignments from the first scorer, even if ensemble
alignments = scorers_[0]->getAlignment();
for(int i = 0; i < keys.size(); ++i) {
for(size_t i = 0; i < keys.size(); ++i) {
// Keys contains indices to vocab items in the entire beam.
// Values can be between 0 and beamSize * vocabSize.
int embIdx = keys[i] % vocabSize;
@ -72,7 +72,7 @@ public:
hypIdxTrans = hypIdx;
int beamHypIdx = hypIdx % beamSize;
if(beamHypIdx >= beam.size())
if(beamHypIdx >= (int)beam.size())
beamHypIdx = beamHypIdx % beam.size();
if(first)
@ -84,7 +84,7 @@ public:
if(options_->get<bool>("n-best")) {
std::vector<float> breakDown(states.size(), 0);
beam[beamHypIdx]->GetCostBreakdown().resize(states.size(), 0);
for(int j = 0; j < states.size(); ++j) {
for(size_t j = 0; j < states.size(); ++j) {
int key = embIdx + hypIdxTrans * vocabSize;
breakDown[j] = states[j]->breakDown(key)
+ beam[beamHypIdx]->GetCostBreakdown()[j];
@ -213,8 +213,8 @@ public:
int dimBatch = batch->size();
for(int i = 0; i < localBeamSize; ++i) {
for(int j = 0; j < beams.size(); ++j) {
for(size_t i = 0; i < localBeamSize; ++i) {
for(size_t j = 0; j < beams.size(); ++j) {
auto& beam = beams[j];
if(i < beam.size()) {
auto hyp = beam[i];
@ -238,7 +238,7 @@ public:
auto totalCosts = prevCosts;
// BUGBUG: it's not cost but score (higher=better)
for(int i = 0; i < scorers_.size(); ++i) {
for(size_t i = 0; i < scorers_.size(); ++i) {
states[i] = scorers_[i]->step(
graph, states[i], hypIndices, embIndices, dimBatch, localBeamSize);

Просмотреть файл

@ -63,14 +63,12 @@ public:
}
protected:
UPtr<OutputFileStream> outStrm_;
boost::mutex mutex_;
long nextId_;
typedef std::map<long, std::pair<std::string, std::string>> Outputs;
Outputs outputs_;
long nextId_;
UPtr<OutputFileStream> outStrm_;
Ptr<PrintingStrategy> printing_;
boost::mutex mutex_;
};
class StringCollector {

Просмотреть файл

@ -31,13 +31,12 @@ std::vector<Ptr<Scorer>> createScorers(Ptr<Config> options) {
std::vector<Ptr<Scorer>> scorers;
auto models = options->get<std::vector<std::string>>("models");
int dimVocab = options->get<std::vector<int>>("dim-vocabs").back();
std::vector<float> weights(models.size(), 1.f);
if(options->has("weights"))
weights = options->get<std::vector<float>>("weights");
int i = 0;
size_t i = 0;
for(auto model : models) {
std::string fname = "F" + std::to_string(i);
auto modelOptions = New<Config>(*options);