зеркало из https://github.com/mozilla/marian.git
fixed all warnings discovered by Visual Studio
This commit is contained in:
Родитель
2bf44365ff
Коммит
14631160db
|
@ -70,19 +70,19 @@ void cnpy::parse_npy_header(FILE* fp, unsigned int& word_size, unsigned int*& sh
|
|||
int loc1, loc2;
|
||||
|
||||
//fortran order
|
||||
loc1 = header.find("fortran_order")+16;
|
||||
loc1 = (int)header.find("fortran_order")+16;
|
||||
fortran_order = (header.substr(loc1,5) == "True" ? true : false);
|
||||
|
||||
//shape
|
||||
loc1 = header.find("(");
|
||||
loc2 = header.find(")");
|
||||
loc1 = (int)header.find("(");
|
||||
loc2 = (int)header.find(")");
|
||||
std::string str_shape = header.substr(loc1+1,loc2-loc1-1);
|
||||
if(str_shape.length() == 0) ndims = 0;
|
||||
else if(str_shape[str_shape.size()-1] == ',') ndims = 1;
|
||||
else ndims = std::count(str_shape.begin(),str_shape.end(),',')+1;
|
||||
else ndims = (unsigned int)std::count(str_shape.begin(),str_shape.end(),',')+1;
|
||||
shape = new unsigned int[ndims];
|
||||
for(unsigned int i = 0;i < ndims;i++) {
|
||||
loc1 = str_shape.find(",");
|
||||
loc1 = (int)str_shape.find(",");
|
||||
shape[i] = atoi(str_shape.substr(0,loc1).c_str());
|
||||
str_shape = str_shape.substr(loc1+1);
|
||||
}
|
||||
|
@ -90,15 +90,15 @@ void cnpy::parse_npy_header(FILE* fp, unsigned int& word_size, unsigned int*& sh
|
|||
//endian, word size, data type
|
||||
//byte order code | stands for not applicable.
|
||||
//not sure when this applies except for byte array
|
||||
loc1 = header.find("descr")+9;
|
||||
loc1 = (int)header.find("descr")+9;
|
||||
bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
|
||||
assert(littleEndian);
|
||||
assert(littleEndian); littleEndian;
|
||||
|
||||
//char type = header[loc1+1];
|
||||
//assert(type == map_type(T));
|
||||
|
||||
std::string str_ws = header.substr(loc1+2);
|
||||
loc2 = str_ws.find("'");
|
||||
loc2 = (int)str_ws.find("'");
|
||||
word_size = atoi(str_ws.substr(0,loc2).c_str());
|
||||
}
|
||||
|
||||
|
|
|
@ -70,7 +70,7 @@ namespace cnpy {
|
|||
template<> std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs);
|
||||
|
||||
|
||||
template<typename T> std::string tostring(T i, int pad = 0, char padval = ' ') {
|
||||
template<typename T> std::string tostring(T i, int /*pad*/ = 0, char /*padval*/ = ' ') {
|
||||
std::stringstream s;
|
||||
s << i;
|
||||
return s.str();
|
||||
|
@ -162,7 +162,7 @@ namespace cnpy {
|
|||
|
||||
unsigned long nels = 1;
|
||||
for (int m=0; m<ndims; m++ ) nels *= shape[m];
|
||||
int nbytes = nels*sizeof(T) + npy_header.size();
|
||||
auto nbytes = nels*sizeof(T) + npy_header.size();
|
||||
|
||||
//get the CRC of the data to be added
|
||||
unsigned int crc = crc32(0L,(unsigned char*)&npy_header[0],npy_header.size());
|
||||
|
@ -250,7 +250,7 @@ namespace cnpy {
|
|||
name(name), type(type_)
|
||||
{
|
||||
shape = dataShape;
|
||||
word_size = word_size_;
|
||||
word_size = (unsigned int)word_size_;
|
||||
bytes.resize(data.size());
|
||||
std::copy(data.begin(), data.end(), bytes.begin());
|
||||
}
|
||||
|
@ -278,15 +278,15 @@ namespace cnpy {
|
|||
const auto* shape = item.shape.data();
|
||||
const auto type = item.type;
|
||||
const auto word_size = item.word_size;
|
||||
const unsigned int ndims = item.shape.size();
|
||||
const unsigned int ndims = (unsigned int)item.shape.size();
|
||||
std::vector<char> npy_header = create_npy_header(type,word_size,shape,ndims);
|
||||
|
||||
unsigned long nels = 1;
|
||||
for (int m=0; m<ndims; m++ ) nels *= shape[m];
|
||||
int nbytes = nels*word_size + npy_header.size();
|
||||
for (size_t m=0; m<ndims; m++ ) nels *= shape[m];
|
||||
auto nbytes = nels*word_size + npy_header.size();
|
||||
|
||||
//get the CRC of the data to be added
|
||||
unsigned int crc = crc32(0L,(unsigned char*)&npy_header[0],npy_header.size());
|
||||
unsigned int crc = crc32(0L,(unsigned char*)&npy_header[0],(uInt)npy_header.size());
|
||||
crc = crc32(crc,(unsigned char*)data,nels*word_size);
|
||||
|
||||
//build the local header
|
||||
|
@ -330,7 +330,7 @@ namespace cnpy {
|
|||
fwrite(&global_header[0],sizeof(char),global_header.size(),fp);
|
||||
|
||||
//build footer
|
||||
unsigned short nrecs = items.size();
|
||||
auto nrecs = items.size();
|
||||
std::vector<char> footer;
|
||||
footer += "PK"; //first part of sig
|
||||
footer += (unsigned short) 0x0605; //second part of sig
|
||||
|
@ -347,7 +347,7 @@ namespace cnpy {
|
|||
|
||||
//close up
|
||||
fflush(fp);
|
||||
bool bad = ferror(fp);
|
||||
bool bad = ferror(fp) != 0;
|
||||
fclose(fp);
|
||||
|
||||
// move to final location (atomically)
|
||||
|
@ -370,7 +370,7 @@ namespace cnpy {
|
|||
dict += tostring(word_size);
|
||||
dict += "', 'fortran_order': False, 'shape': (";
|
||||
dict += tostring(shape[0]);
|
||||
for(int i = 1;i < ndims;i++) {
|
||||
for(size_t i = 1;i < ndims;i++) {
|
||||
dict += ", ";
|
||||
dict += tostring(shape[i]);
|
||||
}
|
||||
|
@ -382,7 +382,7 @@ namespace cnpy {
|
|||
dict.back() = '\n';
|
||||
|
||||
std::vector<char> header;
|
||||
header += (char) 0x93;
|
||||
header += (char) (0x93 - 0x100);
|
||||
header += "NUMPY";
|
||||
header += (char) 0x01; //major version of numpy format
|
||||
header += (char) 0x00; //minor version of numpy format
|
||||
|
|
|
@ -33,7 +33,7 @@ static double PYTHAG(double a, double b)
|
|||
|
||||
int dsvd(float *a, int m, int n, float *w, float *v)
|
||||
{
|
||||
int flag, i, its, j, jj, k, l, nm;
|
||||
int flag, i, its, j, jj, k, l = 0, nm = 0; // (initializing to keep compiler happy)
|
||||
double c, f, h, s, x, y, z;
|
||||
double anorm = 0.0, g = 0.0, scale = 0.0;
|
||||
double *rv1;
|
||||
|
|
|
@ -79,11 +79,11 @@ std::vector<unsigned char> DecodeBase64(const std::string &input) {
|
|||
|
||||
value = (value << 6) | d;
|
||||
if (i % 4 == 3) {
|
||||
*out++ = value >> 16;
|
||||
*out++ = (unsigned char)(value >> 16);
|
||||
if (i > 0 && input[i - 1] != '=')
|
||||
*out++ = value >> 8;
|
||||
*out++ = (unsigned char)(value >> 8);
|
||||
if (input[i] != '=')
|
||||
*out++ = value;
|
||||
*out++ = (unsigned char)value;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ class CollectionStack {
|
|||
collectionStack.push(type);
|
||||
}
|
||||
void PopCollectionType(CollectionType::value type) {
|
||||
assert(type == GetCurCollectionType());
|
||||
assert(type == GetCurCollectionType()); type;
|
||||
collectionStack.pop();
|
||||
}
|
||||
|
||||
|
|
|
@ -98,10 +98,6 @@ EmitterNodeType::value EmitterState::NextGroupType(
|
|||
else
|
||||
return EmitterNodeType::FlowMap;
|
||||
}
|
||||
|
||||
// can't happen
|
||||
assert(false);
|
||||
return EmitterNodeType::NoType;
|
||||
}
|
||||
|
||||
void EmitterState::StartedDoc() {
|
||||
|
|
|
@ -167,10 +167,6 @@ class EmitterState {
|
|||
else
|
||||
return EmitterNodeType::BlockMap;
|
||||
}
|
||||
|
||||
// can't get here
|
||||
assert(false);
|
||||
return EmitterNodeType::NoType;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -126,6 +126,8 @@ struct convert<_Null> {
|
|||
} \
|
||||
}
|
||||
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable: 4127) // conditional expression is constant (the std::numeric_limits constants in macro above)
|
||||
#define YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(type) \
|
||||
YAML_DEFINE_CONVERT_STREAMABLE(type, -)
|
||||
|
||||
|
@ -148,6 +150,7 @@ YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned char);
|
|||
YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(float);
|
||||
YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(double);
|
||||
YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(long double);
|
||||
#pragma warning(pop)
|
||||
|
||||
#undef YAML_DEFINE_CONVERT_STREAMABLE_SIGNED
|
||||
#undef YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED
|
||||
|
|
|
@ -91,7 +91,6 @@ std::size_t node_data::size() const {
|
|||
default:
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void node_data::compute_seq_size() const {
|
||||
|
|
|
@ -166,10 +166,10 @@ void SingleDocParser::HandleBlockSequence(EventHandler& eventHandler) {
|
|||
|
||||
// check for null
|
||||
if (!m_scanner.empty()) {
|
||||
const Token& token = m_scanner.peek();
|
||||
if (token.type == Token::BLOCK_ENTRY ||
|
||||
token.type == Token::BLOCK_SEQ_END) {
|
||||
eventHandler.OnNull(token.mark, NullAnchor);
|
||||
const Token& token1 = m_scanner.peek();
|
||||
if (token1.type == Token::BLOCK_ENTRY ||
|
||||
token1.type == Token::BLOCK_SEQ_END) {
|
||||
eventHandler.OnNull(token1.mark, NullAnchor);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,6 +68,8 @@ bool configureMPI(int argc, char** argv, bool sync) {
|
|||
"Your version of MPI does not support multi-threaded communication.");
|
||||
|
||||
enable = true;
|
||||
#else
|
||||
argc; argv; sync; // (unused)
|
||||
#endif
|
||||
return enable;
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@ namespace cli {
|
|||
|
||||
// helper to replace environment-variable expressions of the form ${VARNAME} in
|
||||
// a string
|
||||
static std::string InterpolateEnvVars(std::string str) {
|
||||
static inline std::string InterpolateEnvVars(std::string str) {
|
||||
// temporary workaround for MS-internal PhillyOnAzure cluster: warm storage
|
||||
// presently has the form /hdfs/VC instead of /{gfs,hdfs}/CLUSTER/VC
|
||||
#if 1
|
||||
|
|
|
@ -74,7 +74,7 @@ constexpr uint32_t crc32(const char* str) {
|
|||
|
||||
// This is the stop-recursion function
|
||||
template <>
|
||||
constexpr uint32_t crc32<size_t(-1)>(const char* str) {
|
||||
constexpr uint32_t crc32<size_t(-1)>(const char*) {
|
||||
return 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ namespace marian {
|
|||
size_t Config::seed = (size_t)time(0);
|
||||
|
||||
bool Config::has(const std::string& key) const {
|
||||
return config_[key];
|
||||
return !!config_[key];
|
||||
}
|
||||
|
||||
YAML::Node Config::get(const std::string& key) const {
|
||||
|
|
|
@ -30,7 +30,7 @@ public:
|
|||
bool validate = false) {
|
||||
std::vector<std::string> sargv;
|
||||
utils::Split(options, sargv, " ");
|
||||
int argc = sargv.size();
|
||||
int argc = (int)sargv.size();
|
||||
|
||||
std::vector<char*> argv(argc);
|
||||
for(int i = 0; i < argc; ++i)
|
||||
|
@ -67,7 +67,7 @@ public:
|
|||
try {
|
||||
if(!get<bool>("ignore-model-config"))
|
||||
loadModelParameters(get<std::string>("model"));
|
||||
} catch(std::runtime_error& e) {
|
||||
} catch(std::runtime_error&) {
|
||||
LOG(info, "[config] No model configuration found in model file");
|
||||
}
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ public:
|
|||
try {
|
||||
if(!get<bool>("ignore-model-config"))
|
||||
loadModelParameters(model);
|
||||
} catch(std::runtime_error& e) {
|
||||
} catch(std::runtime_error&) {
|
||||
LOG(info, "[config] No model configuration found in model file");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -56,7 +56,7 @@ uint16_t guess_terminal_width(uint16_t max_width) {
|
|||
#endif
|
||||
// couldn't determine terminal width
|
||||
if(cols == 0)
|
||||
cols = po::options_description::m_default_line_length;
|
||||
cols = (uint16_t)po::options_description::m_default_line_length;
|
||||
return max_width ? std::min(cols, max_width) : cols;
|
||||
}
|
||||
|
||||
|
@ -73,7 +73,7 @@ const std::set<std::string> PATHS = {"model",
|
|||
|
||||
|
||||
bool ConfigParser::has(const std::string& key) const {
|
||||
return config_[key];
|
||||
return !!config_[key];
|
||||
}
|
||||
|
||||
void ConfigParser::validateOptions() const {
|
||||
|
@ -288,6 +288,8 @@ void ConfigParser::addOptionsModel(po::options_description& desc) {
|
|||
"Tie all embedding layers and output layer")
|
||||
("transformer-heads", po::value<int>()->default_value(8),
|
||||
"Number of heads in multi-head attention (transformer)")
|
||||
("transformer-dim-ffn", po::value<int>()->default_value(2048),
|
||||
"Size of position-wise feed-forward network (transformer)")
|
||||
("transformer-no-projection", po::value<bool>()->zero_tokens()->default_value(false),
|
||||
"Omit linear projection after multi-head attention (transformer)")
|
||||
("transformer-dim-ffn", po::value<int>()->default_value(2048),
|
||||
|
@ -332,6 +334,20 @@ void ConfigParser::addOptionsModel(po::options_description& desc) {
|
|||
->multitoken(),
|
||||
"Convolution window widths in char-s2s model")
|
||||
#endif
|
||||
// Frank's experiments
|
||||
// Note: Don't forget to add these also in encoder_decoder.cpp, EncoderDecoder().
|
||||
("use-direct-sent-end-prob", po::value<bool>()->zero_tokens()->default_value(false),
|
||||
"Enable Frank's direct sentence-end model (experimental) (transformer, requires --transformer-heads-top)")
|
||||
("transformer-heads-top", po::value<int>(), //->default_value(8),
|
||||
"Number of heads in top layer, multi-head attention (transformer)")
|
||||
("transformer-coverage", po::value<bool>()->zero_tokens()->default_value(false),
|
||||
"Enable Frank's coverage model, top layer only (experimental) (transformer)")
|
||||
("transformer-coverage-all", po::value<bool>()->zero_tokens()->default_value(false),
|
||||
"Enable Frank's coverage model, all layers (experimental) (transformer)")
|
||||
("transformer-alignment-weight-heads", po::value<bool>()->zero_tokens()->default_value(false),
|
||||
"If deriving alignment and/or coverage from multi-head, learn interpolation weights (experimental) (transformer)")
|
||||
("transformer-offset-embedding-range", po::value<int>()->default_value(0),
|
||||
"Clipping range of offset embedding, 0 to disable (transformer)")
|
||||
;
|
||||
|
||||
if(mode_ == ConfigMode::training) {
|
||||
|
@ -488,7 +504,7 @@ void ConfigParser::addOptionsTraining(po::options_description& desc) {
|
|||
"Epsilon for label smoothing (0 to disable)")
|
||||
("clip-norm", po::value<double>()->default_value(1.f),
|
||||
"Clip gradient norm to arg (0 to disable)")
|
||||
("exponential-smoothing", po::value<float>()->default_value(0.f)->implicit_value(1e-4, "1e-4"),
|
||||
("exponential-smoothing", po::value<float>()->default_value(0.f)->implicit_value(1e-4f, "1e-4"),
|
||||
"Maintain smoothed version of parameters for validation and saving with smoothing factor arg. "
|
||||
" 0 to disable.")
|
||||
("guided-alignment", po::value<std::string>(),
|
||||
|
@ -754,7 +770,7 @@ void ConfigParser::parseOptions(int argc, char** argv, bool doValidate) {
|
|||
return str;
|
||||
};
|
||||
|
||||
bool loadConfig = vm_.count("config");
|
||||
bool loadConfig = vm_.count("config") != 0;
|
||||
bool reloadConfig
|
||||
= (mode_ == ConfigMode::training)
|
||||
&& boost::filesystem::exists(InterpolateEnvVarsIfRequested(
|
||||
|
@ -832,6 +848,14 @@ void ConfigParser::parseOptions(int argc, char** argv, bool doValidate) {
|
|||
SET_OPTION("transformer-tied-layers", std::vector<size_t>);
|
||||
SET_OPTION("transformer-guided-alignment-layer", std::string);
|
||||
|
||||
// Frank's experiments:
|
||||
SET_OPTION("use-direct-sent-end-prob", bool);
|
||||
SET_OPTION_NONDEFAULT("transformer-heads-top", int);
|
||||
SET_OPTION("transformer-coverage", bool);
|
||||
SET_OPTION("transformer-coverage-all", bool);
|
||||
SET_OPTION("transformer-alignment-weight-heads", bool);
|
||||
SET_OPTION("transformer-offset-embedding-range", int);
|
||||
|
||||
#ifdef CUDNN
|
||||
SET_OPTION("char-stride", int);
|
||||
SET_OPTION("char-highway", int);
|
||||
|
|
|
@ -108,7 +108,7 @@ KEY(axis, int);
|
|||
KEY(shape, Shape);
|
||||
KEY(value, float);
|
||||
KEY(fixed, bool);
|
||||
KEY(prefix, std::string);
|
||||
//KEY(prefix, std::string); // (conflicts with local variables named prefix)
|
||||
KEY(final, bool);
|
||||
KEY(output_last, bool);
|
||||
KEY(mask, Expr);
|
||||
|
@ -132,5 +132,5 @@ KEY(valid, Ptr<RunBase>);
|
|||
KEY(lex_probs, Ptr<LexProbs>);
|
||||
} // namespace keywords
|
||||
|
||||
const float NEMATUS_LN_EPS = 1e-5;
|
||||
const float NEMATUS_LN_EPS = 1e-5f;
|
||||
} // namespace marian
|
||||
|
|
|
@ -3,7 +3,12 @@
|
|||
#include <boost/filesystem.hpp>
|
||||
#include <boost/filesystem/fstream.hpp>
|
||||
#include <boost/iostreams/device/file_descriptor.hpp>
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable: 4458) // declaration of 'traits_type' hides class member
|
||||
#pragma warning(disable: 4456) // declaration of 'c' hides previous local declaration
|
||||
#pragma warning(disable: 4244) // conversion from 'int' to 'char', possible loss of data
|
||||
#include <boost/iostreams/filter/gzip.hpp>
|
||||
#pragma warning(pop)
|
||||
#include <boost/iostreams/filtering_stream.hpp>
|
||||
#include <iostream>
|
||||
#include "3rd_party/exception.h"
|
||||
|
|
|
@ -83,11 +83,11 @@ void loadItemsFromNpz(const std::string& fileName, std::vector<Item>& items) {
|
|||
if(it.second->shape.size() == 1) {
|
||||
shape.resize(2);
|
||||
shape.set(0, 1);
|
||||
shape.set(1, it.second->shape[0]);
|
||||
shape.set(1, (size_t)it.second->shape[0]);
|
||||
} else {
|
||||
shape.resize(it.second->shape.size());
|
||||
for(size_t i = 0; i < it.second->shape.size(); ++i)
|
||||
shape.set(i, it.second->shape[i]);
|
||||
for(int i = 0; i < it.second->shape.size(); ++i)
|
||||
shape.set(i, (size_t)it.second->shape[i]);
|
||||
}
|
||||
|
||||
Item item;
|
||||
|
|
|
@ -71,7 +71,7 @@ public:
|
|||
*
|
||||
* @arg value The value to store in this object
|
||||
*/
|
||||
Keyword(Value value) : value_(value) {}
|
||||
Keyword(Value val) : value_(val) {}
|
||||
|
||||
/**
|
||||
* @brief Constructs a <code>Keyword</code> with no specified value.
|
||||
|
@ -90,8 +90,8 @@ public:
|
|||
*
|
||||
* @return a new <code>Keyword</code> object containing the specified value
|
||||
*/
|
||||
Keyword<key, Value> operator=(Value value) const {
|
||||
return Keyword<key, Value>(value);
|
||||
Keyword<key, Value> operator=(Value val) const {
|
||||
return Keyword<key, Value>(val);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -141,22 +141,22 @@ struct True {};
|
|||
struct False {};
|
||||
|
||||
template <typename Match, typename... Args>
|
||||
typename Match::value_type opt(True foo,
|
||||
typename Match::value_type dflt,
|
||||
typename Match::value_type opt(True /*foo*/,
|
||||
typename Match::value_type /*dflt*/,
|
||||
Args... args) {
|
||||
std::tuple<Args...> t(args...);
|
||||
return std::get<Index<Match, std::tuple<Args...>>::value>(t)();
|
||||
}
|
||||
|
||||
template <typename Match, typename... Args>
|
||||
typename Match::value_type opt(False foo,
|
||||
typename Match::value_type opt(False /*foo*/,
|
||||
typename Match::value_type dflt,
|
||||
Args... args) {
|
||||
Args... /*args*/) {
|
||||
return dflt;
|
||||
}
|
||||
|
||||
template <typename Match, typename... Args>
|
||||
typename Match::value_type Get(Match key,
|
||||
typename Match::value_type Get(Match /*key*/,
|
||||
typename Match::value_type dflt,
|
||||
Args... args) {
|
||||
constexpr bool match = is_one_of<Match, Args...>::value;
|
||||
|
@ -165,7 +165,7 @@ typename Match::value_type Get(Match key,
|
|||
}
|
||||
|
||||
template <typename Match, typename... Args>
|
||||
constexpr bool Has(Match key, Args... args) {
|
||||
constexpr bool Has(Match /*key*/, Args... args) {
|
||||
return is_one_of<Match, Args...>::value;
|
||||
}
|
||||
|
||||
|
|
|
@ -74,6 +74,6 @@ public:
|
|||
return defaultValue;
|
||||
}
|
||||
|
||||
bool has(const std::string& key) const { return options_[key]; }
|
||||
bool has(const std::string& key) const { return !!options_[key]; }
|
||||
};
|
||||
} // namespace marian
|
||||
|
|
|
@ -38,7 +38,10 @@ public:
|
|||
const int* data() const { return shape_.data(); }
|
||||
int* data() { return shape_.data(); }
|
||||
|
||||
inline void set(int i, int val) { dim(i) = val; }
|
||||
inline void set(int i, int val) { dim(i) = val; }
|
||||
inline void set(size_t i, int val) { dim(i) = val; }
|
||||
inline void set(int i, size_t val) { dim(i) = (int)val; }
|
||||
inline void set(size_t i, size_t val) { dim(i) = (int)val; }
|
||||
|
||||
inline int& dim(int i) {
|
||||
if(i >= 0) {
|
||||
|
@ -55,20 +58,24 @@ public:
|
|||
return shape_[size() + i];
|
||||
}
|
||||
}
|
||||
|
||||
inline const int& dim(int i) const {
|
||||
return const_cast<Shape&>(*this).dim(i);
|
||||
}
|
||||
|
||||
inline int& dim(size_t i) { return dim(int(i)); }
|
||||
inline const int& dim(size_t i) const { return dim(int(i)); }
|
||||
|
||||
inline int operator[](int i) const { return dim(i); }
|
||||
inline int operator[](int i) { return dim(i); }
|
||||
inline int operator[](int i) { return dim(i); }
|
||||
inline int operator[](size_t i) const { return dim(i); }
|
||||
inline int operator[](size_t i) { return dim(i); }
|
||||
|
||||
inline int back() const { return shape_.back(); }
|
||||
inline int& back() { return shape_.back(); }
|
||||
|
||||
inline int stride(int i) const {
|
||||
std::vector<int> stride(shape_.size(), 1);
|
||||
for(int j = shape_.size() - 2; j >= 0; --j)
|
||||
for(int j = (int)shape_.size() - 2; j >= 0; --j)
|
||||
stride[j] = stride[j + 1] * shape_[j + 1];
|
||||
|
||||
if(i >= 0)
|
||||
|
@ -88,7 +95,7 @@ public:
|
|||
d.resize(shape_.size());
|
||||
|
||||
std::vector<int> stride(shape_.size(), 1);
|
||||
for(int j = shape_.size() - 2; j >= 0; --j)
|
||||
for(int j = (int)shape_.size() - 2; j >= 0; --j)
|
||||
stride[j] = stride[j + 1] * shape_[j + 1];
|
||||
|
||||
for(size_t j = 0; j < d.size(); ++j)
|
||||
|
@ -116,7 +123,7 @@ public:
|
|||
std::string toString() const {
|
||||
std::stringstream strm;
|
||||
strm << "shape=" << (*this)[0];
|
||||
for(size_t i = 1; i < size(); ++i)
|
||||
for(int i = 1; i < size(); ++i)
|
||||
strm << "x" << (*this)[i];
|
||||
strm << " size=" << elements();
|
||||
return strm.str();
|
||||
|
@ -135,7 +142,7 @@ public:
|
|||
|
||||
int axis(int ax) const {
|
||||
if(ax < 0)
|
||||
return size() + ax;
|
||||
return (int)size() + ax;
|
||||
else
|
||||
return ax;
|
||||
}
|
||||
|
|
|
@ -40,11 +40,11 @@ static inline size_t sizeOf(Type type) {
|
|||
}
|
||||
|
||||
static inline bool isSignedInt(Type type) {
|
||||
return TypeClass::signed_type & type;
|
||||
return (TypeClass::signed_type & type) != 0;
|
||||
}
|
||||
|
||||
static inline bool isUnsignedInt(Type type) {
|
||||
return TypeClass::unsigned_type & type;
|
||||
return (TypeClass::unsigned_type & type) != 0;
|
||||
}
|
||||
|
||||
static inline bool isInt(Type type) {
|
||||
|
@ -52,7 +52,7 @@ static inline bool isInt(Type type) {
|
|||
}
|
||||
|
||||
static inline bool isFloat(Type type) {
|
||||
return TypeClass::float_type & type;
|
||||
return (TypeClass::float_type & type) != 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
|
|
@ -10,7 +10,7 @@ namespace data {
|
|||
class Batch {
|
||||
public:
|
||||
virtual size_t size() const = 0;
|
||||
virtual size_t words(int which = 0) const { return 0; };
|
||||
virtual size_t words(int /*which*/ = 0) const { return 0; };
|
||||
virtual size_t width() const { return 0; };
|
||||
|
||||
virtual size_t sizeTrg() const { return 0; };
|
||||
|
|
|
@ -110,7 +110,7 @@ private:
|
|||
while(!maxiBatch->empty()) {
|
||||
// push item onto batch
|
||||
batchVector.push_back(maxiBatch->top());
|
||||
currentWords += batchVector.back()[0].size();
|
||||
currentWords += (int)batchVector.back()[0].size();
|
||||
maxiBatch->pop();
|
||||
|
||||
// Batch size based on sentences
|
||||
|
|
|
@ -67,7 +67,7 @@ public:
|
|||
maxDims.resize(ex.size(), 0);
|
||||
for(size_t i = 0; i < ex.size(); ++i) {
|
||||
if(ex[i].size() > (size_t)maxDims[i])
|
||||
maxDims[i] = ex[i].size();
|
||||
maxDims[i] = (int)ex[i].size();
|
||||
}
|
||||
sentenceIds.push_back(ex.getId());
|
||||
}
|
||||
|
|
|
@ -216,9 +216,9 @@ void CorpusBase::addWeightsToSentenceTuple(const std::string& line,
|
|||
|
||||
void CorpusBase::addAlignmentsToBatch(Ptr<CorpusBatch> batch,
|
||||
const std::vector<sample>& batchVector) {
|
||||
int srcWords = batch->front()->batchWidth();
|
||||
int trgWords = batch->back()->batchWidth();
|
||||
int dimBatch = batch->getSentenceIds().size();
|
||||
int srcWords = (int)batch->front()->batchWidth();
|
||||
int trgWords = (int)batch->back()->batchWidth();
|
||||
int dimBatch = (int)batch->getSentenceIds().size();
|
||||
|
||||
std::vector<float> aligns(srcWords * dimBatch * trgWords, 0.f);
|
||||
|
||||
|
@ -235,8 +235,8 @@ void CorpusBase::addAlignmentsToBatch(Ptr<CorpusBatch> batch,
|
|||
|
||||
void CorpusBase::addWeightsToBatch(Ptr<CorpusBatch> batch,
|
||||
const std::vector<sample>& batchVector) {
|
||||
int dimBatch = batch->size();
|
||||
int trgWords = batch->back()->batchWidth();
|
||||
int dimBatch = (int)batch->size();
|
||||
int trgWords = (int)batch->back()->batchWidth();
|
||||
|
||||
auto sentenceLevel
|
||||
= options_->get<std::string>("data-weighting-type") == "sentence";
|
||||
|
|
|
@ -122,7 +122,7 @@ public:
|
|||
* @param size Number of sentences
|
||||
* @param width Number of words in the longest sentence
|
||||
*/
|
||||
SubBatch(int size, int width, const Ptr<Vocab>& vocab)
|
||||
SubBatch(size_t size, size_t width, const Ptr<Vocab>& vocab)
|
||||
: indices_(size * width, 0),
|
||||
mask_(size * width, 0),
|
||||
size_(size),
|
||||
|
@ -176,31 +176,31 @@ public:
|
|||
ABORT_IF(size_ == 0, "Encoutered sub-batch size of 0");
|
||||
|
||||
std::vector<Ptr<SubBatch>> splits;
|
||||
size_t subSize = std::ceil(size_ / (float)n);
|
||||
size_t subSize = size_t(std::ceil(size_ / (float)n));
|
||||
|
||||
size_t restSize = size_;
|
||||
size_t pos = 0;
|
||||
for(size_t k = 0; k < n; ++k) {
|
||||
size_t __size__ = std::min(subSize, restSize);
|
||||
if(__size__ > 0) {
|
||||
auto sb = New<SubBatch>(__size__, width_, vocab_);
|
||||
size_t size = std::min(subSize, restSize);
|
||||
if(size > 0) {
|
||||
auto sb = New<SubBatch>(size, width_, vocab_);
|
||||
|
||||
size_t __words__ = 0;
|
||||
size_t words = 0;
|
||||
for(size_t j = 0; j < width_; ++j) {
|
||||
for(size_t i = 0; i < __size__; ++i) {
|
||||
sb->data()[j * __size__ + i] = indices_[j * size_ + pos + i];
|
||||
sb->mask()[j * __size__ + i] = mask_[j * size_ + pos + i];
|
||||
for(size_t i = 0; i < size; ++i) {
|
||||
sb->data()[j * size + i] = indices_[j * size_ + pos + i];
|
||||
sb->mask()[j * size + i] = mask_[j * size_ + pos + i];
|
||||
|
||||
if(mask_[j * size_ + pos + i] != 0)
|
||||
__words__++;
|
||||
words++;
|
||||
}
|
||||
}
|
||||
|
||||
sb->setWords(__words__);
|
||||
sb->setWords(words);
|
||||
splits.push_back(sb);
|
||||
|
||||
restSize -= __size__;
|
||||
pos += __size__;
|
||||
restSize -= size;
|
||||
pos += size;
|
||||
}
|
||||
}
|
||||
return splits;
|
||||
|
@ -309,7 +309,7 @@ public:
|
|||
// set word indices to different values to avoid same hashes
|
||||
std::fill(sb->data().begin(), sb->data().end(), idx++);
|
||||
// mask: no items ask being masked out
|
||||
std::fill(sb->mask().begin(), sb->mask().end(), 1);
|
||||
std::fill(sb->mask().begin(), sb->mask().end(), 1.f);
|
||||
|
||||
batches.push_back(sb);
|
||||
}
|
||||
|
@ -326,7 +326,7 @@ public:
|
|||
}
|
||||
|
||||
if(options->has("data-weighting")) {
|
||||
int weightsSize = batchSize;
|
||||
auto weightsSize = batchSize;
|
||||
if(options->get<std::string>("data-weighting-type") != "sentence")
|
||||
weightsSize *= lengths.back();
|
||||
std::vector<float> weights(weightsSize, 1.f);
|
||||
|
|
|
@ -58,7 +58,7 @@ public:
|
|||
maxDims.resize(ex.size(), 0);
|
||||
for(size_t i = 0; i < ex.size(); ++i) {
|
||||
if(ex[i].size() > (size_t)maxDims[i])
|
||||
maxDims[i] = ex[i].size();
|
||||
maxDims[i] = (int)ex[i].size();
|
||||
}
|
||||
sentenceIds.push_back(ex.getId());
|
||||
}
|
||||
|
|
|
@ -82,7 +82,7 @@ public:
|
|||
|
||||
void push_back(Input input) { inputs_.push_back(input); }
|
||||
|
||||
virtual std::vector<Ptr<Batch>> split(size_t n) override { ABORT("Not implemented"); }
|
||||
virtual std::vector<Ptr<Batch>> split(size_t /*n*/) override { ABORT("Not implemented"); }
|
||||
|
||||
Data& features() { return inputs_[0].data(); }
|
||||
|
||||
|
@ -115,7 +115,7 @@ public:
|
|||
void shuffle() override { std::shuffle(examples_.begin(), examples_.end(), eng_); }
|
||||
|
||||
batch_ptr toBatch(const Examples& batchVector) override {
|
||||
int batchSize = batchVector.size();
|
||||
int batchSize = (int)batchVector.size();
|
||||
|
||||
std::vector<int> maxDims;
|
||||
for(auto& ex : batchVector) {
|
||||
|
@ -123,7 +123,7 @@ public:
|
|||
maxDims.resize(ex.size(), 0);
|
||||
for(size_t i = 0; i < ex.size(); ++i) {
|
||||
if(ex[i].size() > (size_t)maxDims[i])
|
||||
maxDims[i] = ex[i].size();
|
||||
maxDims[i] = (int)ex[i].size();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -16,8 +16,8 @@ protected:
|
|||
std::mt19937 eng_;
|
||||
|
||||
public:
|
||||
RNGEngine() : eng_(Config::seed) {}
|
||||
RNGEngine(size_t eng) : eng_(eng) {}
|
||||
RNGEngine() : eng_((unsigned int)Config::seed) {}
|
||||
RNGEngine(size_t eng) : eng_((unsigned int)eng) {}
|
||||
|
||||
std::string getRNGState() {
|
||||
std::ostringstream oss;
|
||||
|
|
|
@ -79,7 +79,7 @@ public:
|
|||
for(auto i : srcBatch->data())
|
||||
idxSet.insert(i);
|
||||
|
||||
std::uniform_int_distribution<> dis(firstNum_, maxVocab_);
|
||||
std::uniform_int_distribution<> dis((int)firstNum_, (int)maxVocab_);
|
||||
while(idxSet.size() < total_ && idxSet.size() < maxVocab_)
|
||||
idxSet.insert(dis(gen_));
|
||||
|
||||
|
|
|
@ -66,7 +66,7 @@ public:
|
|||
maxDims.resize(ex.size(), 0);
|
||||
for(size_t i = 0; i < ex.size(); ++i) {
|
||||
if(ex[i].size() > (size_t)maxDims[i])
|
||||
maxDims[i] = ex[i].size();
|
||||
maxDims[i] = (int)ex[i].size();
|
||||
}
|
||||
sentenceIds.push_back(ex.getId());
|
||||
}
|
||||
|
|
|
@ -51,8 +51,8 @@ private:
|
|||
typedef std::vector<std::string> Id2Str;
|
||||
Id2Str id2str_;
|
||||
|
||||
Word eosId_ = -1;
|
||||
Word unkId_ = -1;
|
||||
Word eosId_ = (Word)-1;
|
||||
Word unkId_ = (Word)-1;
|
||||
|
||||
class VocabFreqOrderer;
|
||||
};
|
||||
|
|
|
@ -59,7 +59,7 @@ struct Approx {
|
|||
if(x <= -radius)
|
||||
return 0;
|
||||
if(x < radius) // +1 because 0 holds value for x < -radius
|
||||
return (x + radius - offset) / ((2.f * radius) / pieces) + 1;
|
||||
return int((x + radius - offset) / ((2.f * radius) / pieces) + 1);
|
||||
return pieces + 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ using IsClass = typename std::enable_if<std::is_class<C>::value, C>::type;
|
|||
template <int N>
|
||||
struct Select {
|
||||
template <typename T, typename... Args>
|
||||
__HDI__ static auto apply(T&& arg, Args&&... args)
|
||||
__HDI__ static auto apply(T&& /*arg*/, Args&&... args)
|
||||
-> decltype(Select<N - 1>::apply(args...)) {
|
||||
return Select<N - 1>::apply(args...);
|
||||
}
|
||||
|
@ -22,7 +22,7 @@ struct Select {
|
|||
template <>
|
||||
struct Select<0> {
|
||||
template <typename T, typename... Args>
|
||||
__HDI__ static T apply(T&& arg, Args&&... args) {
|
||||
__HDI__ static T apply(T&& arg, Args&&... /*args*/) {
|
||||
return arg;
|
||||
}
|
||||
};
|
||||
|
@ -49,7 +49,7 @@ struct Capture {
|
|||
Capture(float val) : value(val){};
|
||||
|
||||
template <typename... Args>
|
||||
__HDI__ float operator()(Args&&... args) {
|
||||
__HDI__ float operator()(Args&&... /*args*/) {
|
||||
return value;
|
||||
}
|
||||
|
||||
|
|
|
@ -51,10 +51,10 @@ struct BinaryFunctor {
|
|||
template <class X> \
|
||||
using name = UnaryFunctor<elem::name, X>; \
|
||||
template <typename X> \
|
||||
name<IsClass<X>> name2(X x) { \
|
||||
static inline name<IsClass<X>> name2(X x) { \
|
||||
return name<X>(x); \
|
||||
} \
|
||||
static name<Capture> name2(Capture x) { return name<Capture>(x); }
|
||||
static inline name<Capture> name2(Capture x) { return name<Capture>(x); }
|
||||
|
||||
#define BINARY(name, name2, func) \
|
||||
namespace elem { \
|
||||
|
@ -120,7 +120,7 @@ BINARY(Or, operator||, x || y);
|
|||
|
||||
template <typename T>
|
||||
__HDI__ T sgn(T val) {
|
||||
return (float(0) < val) - (val < float(0));
|
||||
return T((0 < val) - (val < 0));
|
||||
}
|
||||
|
||||
UNARY(Sgn, sgn, sgn(x));
|
||||
|
|
|
@ -89,7 +89,7 @@ struct ConstantShape {
|
|||
|
||||
__HDI__ static constexpr size_t size() { return N; }
|
||||
|
||||
__HDI__ int elements() const { return elements_; }
|
||||
__HDI__ int elements() const { return (int)elements_; }
|
||||
|
||||
__HDI__ int index(const Array<int, N>& d) const {
|
||||
int i = 0;
|
||||
|
|
|
@ -51,7 +51,8 @@ class ExpressionGraph;
|
|||
* or formally \f$\bar{w}_i = \frac{\partial y}{\partial w_i}\f$
|
||||
*/
|
||||
template <class DataType>
|
||||
struct Chainable {
|
||||
class Chainable {
|
||||
public:
|
||||
Chainable() {}
|
||||
virtual ~Chainable(){};
|
||||
|
||||
|
|
|
@ -203,7 +203,7 @@ public:
|
|||
tensors_->throwAtReallocation(true);
|
||||
backprop();
|
||||
tensors_->throwAtReallocation(false);
|
||||
} catch(AllocationException& e) {
|
||||
} catch(AllocationException&) {
|
||||
tensors_->throwAtReallocation(false);
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -353,12 +353,12 @@ Expr affine(Expr a, Expr b, Expr bias, bool transA, bool transB, float scale) {
|
|||
// swap the last two axes
|
||||
Expr transpose(Expr a) {
|
||||
std::vector<int> axes(a->shape().size());
|
||||
for(size_t i = 0; i < axes.size(); ++i) {
|
||||
for(int i = 0; i < axes.size(); ++i) {
|
||||
axes[i] = i;
|
||||
}
|
||||
if(axes.size() > 1) {
|
||||
axes[axes.size() - 1] = axes.size() - 2;
|
||||
axes[axes.size() - 2] = axes.size() - 1;
|
||||
axes[axes.size() - 1] = (int)axes.size() - 2;
|
||||
axes[axes.size() - 2] = (int)axes.size() - 1;
|
||||
}
|
||||
return Expression<TransposeNodeOp>(a, axes);
|
||||
}
|
||||
|
@ -405,7 +405,7 @@ Expr leakyrelu(const std::vector<Expr>&) {
|
|||
ABORT("Not implemented");
|
||||
}
|
||||
|
||||
Expr prelu(const std::vector<Expr>&, float alpha) {
|
||||
Expr prelu(const std::vector<Expr>&, float /*alpha*/) {
|
||||
ABORT("Not implemented");
|
||||
}
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ float xor128() {
|
|||
y = z;
|
||||
z = w;
|
||||
w = (w ^ (w >> 19) ^ t ^ (t >> 8)) % 1000;
|
||||
return 0.1 * ((w % 1000) / 1000.f) - 0.05;
|
||||
return 0.1f * ((w % 1000) / 1000.f) - 0.05f;
|
||||
}
|
||||
|
||||
void zeros(Tensor t) {
|
||||
|
@ -50,7 +50,7 @@ NodeInitializer diag(float val) {
|
|||
};
|
||||
}
|
||||
|
||||
NodeInitializer normal(float scale, bool ortho /*= true*/) {
|
||||
NodeInitializer normal(float scale, bool /*ortho*/ /*= true*/) {
|
||||
return [scale](Tensor t) {
|
||||
distribution<std::normal_distribution<float>>(t, 0, scale);
|
||||
};
|
||||
|
@ -113,7 +113,10 @@ NodeInitializer from_vector(const std::vector<float>& v) {
|
|||
}
|
||||
|
||||
NodeInitializer from_vector(const std::vector<size_t>& v) {
|
||||
std::vector<float> vf(v.begin(), v.end());
|
||||
auto n = v.size();
|
||||
std::vector<float> vf(n);
|
||||
for (size_t i = 0; i < n; i++)
|
||||
vf[i] = (float)v[i];
|
||||
return from_vector(vf);
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ NodeInitializer diag(float val);
|
|||
|
||||
template <class Distribution, class Iterator>
|
||||
void distribution(Iterator begin, Iterator end, float a, float b) {
|
||||
std::default_random_engine engine(Config::seed++);
|
||||
std::default_random_engine engine((unsigned int)Config::seed++);
|
||||
Distribution dist(a, b);
|
||||
auto gen = std::bind(dist, engine);
|
||||
std::generate(begin, end, gen);
|
||||
|
|
|
@ -119,12 +119,10 @@ public:
|
|||
|
||||
virtual Tensor vals() override {
|
||||
ABORT("Not implemented for memory-mapped parameters");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
virtual Tensor grads() override {
|
||||
ABORT("Not implemented for memory-mapped parameters");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
virtual void clear() override {
|
||||
|
|
|
@ -155,7 +155,7 @@ public:
|
|||
return affine(input, W_, b_, false, transposeW_);
|
||||
}
|
||||
|
||||
virtual Expr apply(const std::vector<Expr>& inputs) override {
|
||||
virtual Expr apply(const std::vector<Expr>& /*inputs*/) override {
|
||||
ABORT("Not implemented");
|
||||
};
|
||||
};
|
||||
|
|
|
@ -38,13 +38,13 @@ static inline Expr guidedAlignmentCost(Ptr<ExpressionGraph> graph,
|
|||
}
|
||||
|
||||
Expr alnCost;
|
||||
float eps = 1e-6;
|
||||
float epsilon = 1e-6f;
|
||||
if(guidedCostType == "mse") {
|
||||
alnCost = sum(flatten(square(att - aln))) / (2 * div);
|
||||
alnCost = sum(flatten(square(att - aln))) / (float)(2 * div);
|
||||
} else if(guidedCostType == "mult") {
|
||||
alnCost = -log(sum(flatten(att * aln)) + eps) / div;
|
||||
alnCost = -log(sum(flatten(att * aln)) + epsilon) / (float)div;
|
||||
} else if(guidedCostType == "ce") {
|
||||
alnCost = -sum(flatten(aln * log(att + eps))) / div;
|
||||
alnCost = -sum(flatten(aln * log(att + epsilon))) / (float)div;
|
||||
} else {
|
||||
ABORT("Unknown alignment cost type");
|
||||
}
|
||||
|
|
|
@ -24,13 +24,11 @@ Expr LossBase::getCrossEntropy(Expr logits,
|
|||
Expr indices,
|
||||
Expr mask,
|
||||
Expr weights) {
|
||||
using namespace keywords;
|
||||
|
||||
auto ce = cross_entropy(logits, indices);
|
||||
|
||||
if(smoothing_ > 0) {
|
||||
// @TODO: add this to CE kernels instead
|
||||
auto ceq = mean(logsoftmax(logits), axis = -1);
|
||||
auto ceq = mean(logsoftmax(logits), /*axis=*/ -1);
|
||||
ce = (1 - smoothing_) * ce - smoothing_ * ceq;
|
||||
}
|
||||
|
||||
|
|
|
@ -13,8 +13,8 @@ Expr DataWeighting::getWeights(Ptr<ExpressionGraph> graph,
|
|||
ABORT_IF(batch->getDataWeights().empty(),
|
||||
"Vector of weights is unexpectedly empty!");
|
||||
bool sentenceWeighting = weightingType_ == "sentence";
|
||||
int dimBatch = batch->size();
|
||||
int dimWords = sentenceWeighting ? 1 : batch->back()->batchWidth();
|
||||
int dimBatch = (int)batch->size();
|
||||
int dimWords = sentenceWeighting ? 1 : (int)batch->back()->batchWidth();
|
||||
auto weights = graph->constant({1, dimWords, dimBatch, 1},
|
||||
inits::from_vector(batch->getDataWeights()));
|
||||
return weights;
|
||||
|
|
|
@ -37,9 +37,7 @@ public:
|
|||
|
||||
void load(Ptr<ExpressionGraph> graph,
|
||||
const std::string& name,
|
||||
bool markedReloaded = true) override {
|
||||
using namespace keywords;
|
||||
|
||||
bool /*markedReloaded*/ = true) override {
|
||||
std::map<std::string, std::string> nameMap
|
||||
= {{"decoder_U", "decoder_cell1_U"},
|
||||
{"decoder_Ux", "decoder_cell1_Ux"},
|
||||
|
|
|
@ -175,9 +175,9 @@ public:
|
|||
return cost_->apply(nextState);
|
||||
}
|
||||
|
||||
virtual Expr build(Ptr<ExpressionGraph> graph,
|
||||
Ptr<data::CorpusBatch> batch,
|
||||
bool clearGraph = true) override {
|
||||
virtual Expr build(Ptr<ExpressionGraph> /*graph*/,
|
||||
Ptr<data::CorpusBatch> /*batch*/,
|
||||
bool /*clearGraph*/ = true) override {
|
||||
ABORT("Wrong wrapper. Use models::Trainer or models::Scorer");
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -60,8 +60,8 @@ public:
|
|||
auto yEmb = yEmbFactory.construct();
|
||||
|
||||
auto subBatch = (*batch)[batchIndex_];
|
||||
int dimBatch = subBatch->batchSize();
|
||||
int dimWords = subBatch->batchWidth();
|
||||
int dimBatch = (int)subBatch->batchSize();
|
||||
int dimWords = (int)subBatch->batchWidth();
|
||||
|
||||
auto chosenEmbeddings = rows(yEmb, subBatch->data());
|
||||
|
||||
|
@ -119,7 +119,7 @@ public:
|
|||
state->setTargetEmbeddings(selectedEmbs);
|
||||
}
|
||||
|
||||
virtual const std::vector<Expr> getAlignments(int i = 0) { return {}; };
|
||||
virtual const std::vector<Expr> getAlignments(int /*i*/ = 0) { return {}; };
|
||||
|
||||
virtual Ptr<data::Shortlist> getShortlist() { return shortlist_; }
|
||||
virtual void setShortlist(Ptr<data::Shortlist> shortlist) {
|
||||
|
|
|
@ -21,9 +21,9 @@ protected:
|
|||
|
||||
auto subBatch = (*batch)[batchIndex_];
|
||||
|
||||
int dimBatch = subBatch->batchSize();
|
||||
int dimBatch = (int)subBatch->batchSize();
|
||||
int dimEmb = srcEmbeddings->shape()[-1];
|
||||
int dimWords = subBatch->batchWidth();
|
||||
int dimWords = (int)subBatch->batchWidth();
|
||||
|
||||
auto chosenEmbeddings = rows(srcEmbeddings, subBatch->data());
|
||||
|
||||
|
|
|
@ -215,7 +215,7 @@ public:
|
|||
Expr logits;
|
||||
if(type == "hard-soft-att") {
|
||||
std::vector<Expr> alignedContexts;
|
||||
for(size_t k = 0; k < state->getEncoderStates().size(); ++k) {
|
||||
for(int k = 0; k < state->getEncoderStates().size(); ++k) {
|
||||
// retrieve all the aligned contexts computed by the attention mechanism
|
||||
auto att = rnn_->at(0)
|
||||
->as<rnn::StackedCell>()
|
||||
|
@ -257,8 +257,8 @@ public:
|
|||
DecoderBase::embeddingsFromBatch(graph, state, batch);
|
||||
|
||||
auto subBatch = (*batch)[batchIndex_];
|
||||
int dimBatch = subBatch->batchSize();
|
||||
int dimWords = subBatch->batchWidth();
|
||||
int dimBatch = (int)subBatch->batchSize();
|
||||
int dimWords = (int)subBatch->batchWidth();
|
||||
|
||||
std::vector<size_t> attentionIndices(dimBatch, 0);
|
||||
std::vector<size_t> currentPos(dimBatch, 0);
|
||||
|
|
|
@ -29,7 +29,7 @@ public:
|
|||
|
||||
void load(Ptr<ExpressionGraph> graph,
|
||||
const std::string& name,
|
||||
bool markedReloaded = true) override {
|
||||
bool /*markedReloaded*/ = true) override {
|
||||
graph->load(name, nameMap_);
|
||||
}
|
||||
|
||||
|
|
|
@ -274,7 +274,7 @@ public:
|
|||
|
||||
start = mlp->apply(meanContexts);
|
||||
} else {
|
||||
int dimBatch = batch->size();
|
||||
int dimBatch = (int)batch->size();
|
||||
int dimRnn = opt<int>("dim-rnn");
|
||||
|
||||
start = graph->constant({dimBatch, dimRnn}, inits::zeros);
|
||||
|
@ -309,7 +309,7 @@ public:
|
|||
rnn::States decoderStates = rnn_->lastCellStates();
|
||||
|
||||
std::vector<Expr> alignedContexts;
|
||||
for(size_t k = 0; k < state->getEncoderStates().size(); ++k) {
|
||||
for(int k = 0; k < state->getEncoderStates().size(); ++k) {
|
||||
// retrieve all the aligned contexts computed by the attention mechanism
|
||||
auto att = rnn_->at(0)
|
||||
->as<rnn::StackedCell>()
|
||||
|
@ -337,7 +337,7 @@ public:
|
|||
|
||||
int dimTrgVoc = opt<std::vector<int>>("dim-vocabs")[batchIndex_];
|
||||
|
||||
auto final = mlp::output(graph) //
|
||||
auto last = mlp::output(graph) //
|
||||
("prefix", prefix_ + "_ff_logit_l2") //
|
||||
("dim", dimTrgVoc);
|
||||
|
||||
|
@ -345,17 +345,17 @@ public:
|
|||
std::string tiedPrefix = prefix_ + "_Wemb";
|
||||
if(opt<bool>("tied-embeddings-all") || opt<bool>("tied-embeddings-src"))
|
||||
tiedPrefix = "Wemb";
|
||||
final.tie_transposed("W", tiedPrefix);
|
||||
last.tie_transposed("W", tiedPrefix);
|
||||
}
|
||||
|
||||
if(shortlist_)
|
||||
final.set_shortlist(shortlist_);
|
||||
last.set_shortlist(shortlist_);
|
||||
|
||||
// assemble layers into MLP and apply to embeddings, decoder context and
|
||||
// aligned source context
|
||||
output_ = mlp::mlp(graph) //
|
||||
.push_back(hidden) //
|
||||
.push_back(final)
|
||||
.push_back(last)
|
||||
.construct();
|
||||
}
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ public:
|
|||
int dimEmb = input->shape()[-1];
|
||||
int dimWords = input->shape()[-3];
|
||||
|
||||
float num_timescales = dimEmb / 2;
|
||||
float num_timescales = (float)dimEmb / 2;
|
||||
float log_timescale_increment = std::log(10000.f) / (num_timescales - 1.f);
|
||||
|
||||
std::vector<float> vPos(dimEmb * dimWords, 0);
|
||||
|
@ -59,7 +59,7 @@ public:
|
|||
for(int i = 0; i < num_timescales; ++i) {
|
||||
float v = p * std::exp(i * -log_timescale_increment);
|
||||
vPos[(p - start) * dimEmb + i] = std::sin(v);
|
||||
vPos[(p - start) * dimEmb + num_timescales + i] = std::cos(v);
|
||||
vPos[(p - start) * dimEmb + (int)num_timescales + i] = std::cos(v); // @TODO: is int vs. float correct for num_timescales?
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -134,7 +134,7 @@ public:
|
|||
int dimModel = x->shape()[-1];
|
||||
auto scale = graph_->param(prefix + "_ln_scale" + suffix, { 1, dimModel }, inits::ones);
|
||||
auto bias = graph_->param(prefix + "_ln_bias" + suffix, { 1, dimModel }, inits::zeros);
|
||||
return marian::layerNorm(x, scale, bias, 1e-6);
|
||||
return marian::layerNorm(x, scale, bias, 1e-6f);
|
||||
}
|
||||
|
||||
Expr preProcess(std::string prefix, std::string ops, Expr input, float dropProb = 0.0f) const {
|
||||
|
@ -212,7 +212,7 @@ public:
|
|||
// time steps and batch entries), also add mask for illegal connections
|
||||
|
||||
// multiplicative attention with flattened softmax
|
||||
float scale = 1.0 / std::sqrt((float)dk); // scaling to avoid extreme values due to matrix multiplication
|
||||
float scale = 1.0f / std::sqrt((float)dk); // scaling to avoid extreme values due to matrix multiplication
|
||||
auto z = bdot(q, k, false, true, scale); // [-4: beam depth * batch size, -3: num heads, -2: max tgt length, -1: max src length]
|
||||
|
||||
// mask out garbage beyond end of sequences
|
||||
|
@ -425,7 +425,7 @@ public:
|
|||
auto output = input;
|
||||
if(startPos > 0) {
|
||||
// we are decoding at a position after 0
|
||||
output = (prevDecoderState.output * startPos + input) / (startPos + 1);
|
||||
output = (prevDecoderState.output * (float)startPos + input) / float(startPos + 1);
|
||||
}
|
||||
else if(startPos == 0 && output->shape()[-2] > 1) {
|
||||
// we are training or scoring, because there is no history and
|
||||
|
@ -444,7 +444,7 @@ public:
|
|||
std::string prefix,
|
||||
Expr input,
|
||||
Expr selfMask,
|
||||
int startPos) const {
|
||||
int /*startPos*/) const {
|
||||
float dropoutRnn = inference_ ? 0.f : opt<float>("dropout-rnn");
|
||||
|
||||
auto rnn = rnn::rnn(graph_) //
|
||||
|
@ -479,7 +479,7 @@ public:
|
|||
|
||||
// returns the embedding matrix based on options
|
||||
// and based on batchIndex_.
|
||||
Expr wordEmbeddings(int subBatchIndex) const {
|
||||
Expr wordEmbeddings(size_t subBatchIndex) const {
|
||||
// standard encoder word embeddings
|
||||
|
||||
int dimVoc = opt<std::vector<int>>("dim-vocabs")[subBatchIndex];
|
||||
|
@ -513,8 +513,8 @@ public:
|
|||
|
||||
Ptr<EncoderState> apply(Ptr<data::CorpusBatch> batch) {
|
||||
int dimEmb = opt<int>("dim-emb");
|
||||
int dimBatch = batch->size();
|
||||
int dimSrcWords = (*batch)[batchIndex_]->batchWidth();
|
||||
int dimBatch = (int)batch->size();
|
||||
int dimSrcWords = (int)(*batch)[batchIndex_]->batchWidth();
|
||||
|
||||
auto embeddings = wordEmbeddings(batchIndex_); // embedding matrix, considering tying and some other options
|
||||
|
||||
|
@ -531,7 +531,7 @@ public:
|
|||
}
|
||||
|
||||
// according to paper embeddings are scaled up by \sqrt(d_m)
|
||||
auto scaledEmbeddings = std::sqrt(dimEmb) * batchEmbeddings;
|
||||
auto scaledEmbeddings = std::sqrt((float)dimEmb) * batchEmbeddings;
|
||||
|
||||
scaledEmbeddings = addPositionalEmbeddings(scaledEmbeddings);
|
||||
|
||||
|
@ -637,7 +637,7 @@ public:
|
|||
|
||||
std::string layerType = opt<std::string>("transformer-decoder-autoreg", "self-attention");
|
||||
if (layerType == "rnn") {
|
||||
int dimBatch = batch->size();
|
||||
int dimBatch = (int)batch->size();
|
||||
int dim = opt<int>("dim-emb");
|
||||
|
||||
auto start = graph->constant({1, 1, dimBatch, dim}, inits::zeros);
|
||||
|
@ -678,12 +678,12 @@ public:
|
|||
dimBeam = embeddings->shape()[-4];
|
||||
|
||||
// according to paper embeddings are scaled by \sqrt(d_m)
|
||||
auto scaledEmbeddings = std::sqrt(dimEmb) * embeddings;
|
||||
auto scaledEmbeddings = std::sqrt((float)dimEmb) * embeddings;
|
||||
|
||||
// set current target token position during decoding or training. At training
|
||||
// this should be 0. During translation the current length of the translation.
|
||||
// Used for position embeddings and creating new decoder states.
|
||||
int startPos = state->getPosition();
|
||||
int startPos = (int)state->getPosition();
|
||||
|
||||
scaledEmbeddings
|
||||
= addPositionalEmbeddings(scaledEmbeddings, startPos);
|
||||
|
@ -828,7 +828,7 @@ public:
|
|||
|
||||
// helper function for guided alignment
|
||||
// @TODO: const vector<> seems wrong. Either make it non-const or a const& (more efficient but dangerous)
|
||||
virtual const std::vector<Expr> getAlignments(int i = 0) override {
|
||||
virtual const std::vector<Expr> getAlignments(int /*i*/ = 0) override {
|
||||
return alignments_;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ void Adagrad::updateImpl(Tensor params, Tensor grads) {
|
|||
alloc_ = New<TensorAllocator>(params->getBackend());
|
||||
|
||||
if(!gt_) {
|
||||
int elements = params->size();
|
||||
int elements = (int)params->size();
|
||||
alloc_->reserveExact(params->memory()->size());
|
||||
alloc_->allocate(gt_, {1, elements});
|
||||
gt_->set(0.f);
|
||||
|
@ -68,13 +68,13 @@ void Adagrad::load(const std::string& name,
|
|||
}
|
||||
|
||||
// get the size of params which should go
|
||||
size_t shardSize = ceil(totalSize / (float)backends.size());
|
||||
size_t shardSize = size_t(ceil(totalSize / (float)backends.size()));
|
||||
|
||||
size_t id = 0;
|
||||
for(auto optBase : opts) {
|
||||
auto opt = std::dynamic_pointer_cast<Adagrad>(optBase);
|
||||
|
||||
int size = std::min(shardSize, totalSize);
|
||||
int size = (int)std::min(shardSize, totalSize);
|
||||
totalSize -= size;
|
||||
|
||||
if(!opt->alloc_)
|
||||
|
@ -95,7 +95,7 @@ void Adagrad::load(const std::string& name,
|
|||
|
||||
void Adagrad::save(const std::string& name,
|
||||
std::vector<Ptr<OptimizerBase>> opts,
|
||||
size_t totalSize) {
|
||||
size_t /*totalSize*/) {
|
||||
LOG(info, "Saving Adagrad parameters to {}", name);
|
||||
|
||||
std::vector<float> vGt;
|
||||
|
@ -130,7 +130,7 @@ void Adam::updateImpl(Tensor params, Tensor grads) {
|
|||
alloc_ = New<TensorAllocator>(params->getBackend());
|
||||
|
||||
if(!mt_) {
|
||||
int elements = params->size();
|
||||
int elements = (int)params->size();
|
||||
alloc_->reserveExact(2 * params->memory()->size());
|
||||
alloc_->allocate(mt_, {1, elements});
|
||||
mt_->set(0.f);
|
||||
|
@ -140,8 +140,8 @@ void Adam::updateImpl(Tensor params, Tensor grads) {
|
|||
}
|
||||
|
||||
t_++;
|
||||
float denom1 = 1 - std::pow(beta1_, t_);
|
||||
float denom2 = 1 - std::pow(beta2_, t_);
|
||||
float denom1 = 1 - (float)std::pow(beta1_, t_);
|
||||
float denom2 = 1 - (float)std::pow(beta2_, t_);
|
||||
|
||||
using namespace functional;
|
||||
|
||||
|
@ -193,13 +193,13 @@ void Adam::load(const std::string& name,
|
|||
}
|
||||
|
||||
// get the size of params which should go
|
||||
size_t shardSize = ceil(totalSize / (float)backends.size());
|
||||
size_t shardSize = size_t(ceil(totalSize / (float)backends.size()));
|
||||
|
||||
size_t id = 0;
|
||||
for(auto optBase : opts) {
|
||||
auto opt = std::dynamic_pointer_cast<Adam>(optBase);
|
||||
|
||||
int size = std::min(shardSize, totalSize);
|
||||
int size = (int)std::min(shardSize, totalSize);
|
||||
totalSize -= size;
|
||||
|
||||
if(!opt->alloc_)
|
||||
|
@ -223,7 +223,7 @@ void Adam::load(const std::string& name,
|
|||
|
||||
void Adam::save(const std::string& name,
|
||||
std::vector<Ptr<OptimizerBase>> opts,
|
||||
size_t totalSize) {
|
||||
size_t /*totalSize*/) {
|
||||
LOG(info, "Saving Adam parameters to {}", name);
|
||||
|
||||
std::vector<float> vMt;
|
||||
|
@ -267,13 +267,13 @@ void Adam::resetStats() {
|
|||
}
|
||||
|
||||
Ptr<OptimizerBase> Optimizer(Ptr<Config> options) {
|
||||
float lrate = options->get<double>("learn-rate");
|
||||
float lrate = (float)options->get<double>("learn-rate"); // @TODO: should this be <float>?
|
||||
auto params = options->has("optimizer-params")
|
||||
? options->get<std::vector<float>>("optimizer-params")
|
||||
: std::vector<float>({});
|
||||
|
||||
Ptr<ClipperBase> clipper = nullptr;
|
||||
float clipNorm = options->get<double>("clip-norm");
|
||||
float clipNorm = (float)options->get<double>("clip-norm"); // @TODO: should this be <float>?
|
||||
if(clipNorm > 0)
|
||||
clipper = Clipper<Norm>(clipNorm);
|
||||
|
||||
|
|
|
@ -63,12 +63,12 @@ public:
|
|||
|
||||
void setParams(const std::vector<float>& params) { parseParams(params); }
|
||||
|
||||
virtual void load(const std::string& name,
|
||||
std::vector<Ptr<OptimizerBase>> opts,
|
||||
std::vector<Ptr<Backend>> backends) {}
|
||||
virtual void save(const std::string& name,
|
||||
std::vector<Ptr<OptimizerBase>> opts,
|
||||
size_t totalSize) {}
|
||||
virtual void load(const std::string& /*name*/,
|
||||
std::vector<Ptr<OptimizerBase>> /*opts*/,
|
||||
std::vector<Ptr<Backend>> /*backends*/) {}
|
||||
virtual void save(const std::string& /*name*/,
|
||||
std::vector<Ptr<OptimizerBase>> /*opts*/,
|
||||
size_t /*totalSize*/) {}
|
||||
|
||||
protected:
|
||||
virtual void updateImpl(Tensor params, Tensor grads) = 0;
|
||||
|
@ -94,7 +94,7 @@ public:
|
|||
private:
|
||||
void updateImpl(Tensor params, Tensor grads) override;
|
||||
|
||||
virtual void parseParams(const std::vector<float>& params) override {}
|
||||
virtual void parseParams(const std::vector<float>& /*params*/) override {}
|
||||
virtual void resetStats() override {}
|
||||
};
|
||||
|
||||
|
@ -124,7 +124,7 @@ private:
|
|||
eps_ = params[0];
|
||||
}
|
||||
|
||||
float eps_ = 1e-8;
|
||||
float eps_ = 1e-8f;
|
||||
Ptr<TensorAllocator> alloc_;
|
||||
Tensor gt_;
|
||||
};
|
||||
|
@ -159,9 +159,9 @@ private:
|
|||
eps_ = params[2];
|
||||
}
|
||||
|
||||
float beta1_ = 0.9;
|
||||
float beta2_ = 0.999;
|
||||
float eps_ = 1e-8;
|
||||
float beta1_ = 0.9f;
|
||||
float beta2_ = 0.999f;
|
||||
float eps_ = 1e-8f;
|
||||
size_t t_;
|
||||
|
||||
Ptr<TensorAllocator> alloc_;
|
||||
|
|
|
@ -64,10 +64,10 @@ private:
|
|||
|
||||
auto xWs = cell_->applyInput({input});
|
||||
|
||||
size_t timeSteps = input->shape()[-3];
|
||||
auto timeSteps = input->shape()[-3];
|
||||
|
||||
States outputs;
|
||||
for(size_t i = 0; i < timeSteps; ++i) {
|
||||
for(int i = 0; i < timeSteps; ++i) {
|
||||
int j = i;
|
||||
|
||||
if(direction_ == dir::backward)
|
||||
|
|
|
@ -28,7 +28,7 @@ private:
|
|||
|
||||
sel = atleast_4d(sel);
|
||||
|
||||
int dimBatch = selIdx.size() / beamSize;
|
||||
int dimBatch = (int)selIdx.size() / beamSize;
|
||||
int dimDepth = sel->shape()[-1];
|
||||
int dimTime = isBatchMajor ? sel->shape()[-2] : sel->shape()[-3];
|
||||
|
||||
|
@ -93,7 +93,7 @@ public:
|
|||
};
|
||||
|
||||
class Cell;
|
||||
struct CellInput;
|
||||
class CellInput;
|
||||
|
||||
class Stackable : public std::enable_shared_from_this<Stackable> {
|
||||
protected:
|
||||
|
|
|
@ -91,7 +91,7 @@ private:
|
|||
std::unordered_map<uint8_t*, Ptr<MemoryPiece>> allocated_;
|
||||
|
||||
size_t align(size_t size) {
|
||||
return ceil(size / (float)alignment_) * alignment_;
|
||||
return size_t(ceil(size / (float)alignment_) * alignment_);
|
||||
}
|
||||
|
||||
void grow(size_t add) {
|
||||
|
@ -168,7 +168,7 @@ public:
|
|||
reserve(bytes);
|
||||
}
|
||||
|
||||
Allocator(DeviceId deviceId,
|
||||
Allocator(DeviceId /*deviceId*/,
|
||||
Ptr<Device> device,
|
||||
size_t bytes,
|
||||
size_t step,
|
||||
|
|
|
@ -28,7 +28,7 @@ void gAddGeneric(Functor functor,
|
|||
|
||||
constexpr size_t N = functional::Shape::size();
|
||||
functional::Array<int, N> len;
|
||||
for(size_t i = 0; i < N; ++i)
|
||||
for(int i = 0; i < N; ++i)
|
||||
len[i] = full[i] / out.shape()[i];
|
||||
|
||||
functional::Array<int, N> dims;
|
||||
|
@ -101,7 +101,7 @@ template <class Functor, class... Tensors>
|
|||
void Add(Functor functor, float scale, marian::Tensor out, Tensors... tensors) {
|
||||
auto full = marian::Shape::broadcast({out, tensors...});
|
||||
|
||||
int length = out->shape().elements();
|
||||
//int length = out->shape().elements();
|
||||
|
||||
constexpr size_t K = sizeof...(Tensors);
|
||||
|
||||
|
@ -109,8 +109,8 @@ void Add(Functor functor, float scale, marian::Tensor out, Tensors... tensors) {
|
|||
functional::Array<functional::Tensor<float>, K> gIns = {tensors...};
|
||||
|
||||
if(full.back() != 1 && out->shape().back() == 1) {
|
||||
size_t m = full.elements() / length;
|
||||
size_t k = full.back();
|
||||
//size_t m = full.elements() / length;
|
||||
//size_t k = full.back();
|
||||
cpu::gAddReduce(functor, full, gOut, gIns, scale);
|
||||
} else if(out->shape() == full) {
|
||||
bool broadcast = false;
|
||||
|
|
|
@ -15,7 +15,7 @@ private:
|
|||
|
||||
public:
|
||||
Backend(DeviceId deviceId, size_t seed)
|
||||
: marian::Backend(deviceId, seed), gen_(seed_) {}
|
||||
: marian::Backend(deviceId, seed), gen_((unsigned int)seed_) {}
|
||||
|
||||
void setDevice() override {}
|
||||
|
||||
|
|
|
@ -131,21 +131,21 @@ void ProdBatched(marian::Tensor C,
|
|||
auto strideA = batchA == 1 ? 0 : m * k;
|
||||
auto strideC = n * m;
|
||||
|
||||
int batchC = std::max(batchA, batchB);
|
||||
for(int i = 0; i < batchC; ++i) {
|
||||
auto batchC = std::max(batchA, batchB);
|
||||
for(size_t i = 0; i < batchC; ++i) {
|
||||
sgemm(transA,
|
||||
transB,
|
||||
m,
|
||||
n,
|
||||
k,
|
||||
(int)m,
|
||||
(int)n,
|
||||
(int)k,
|
||||
alpha,
|
||||
A->data() + (i % batchA) * strideA,
|
||||
lda,
|
||||
(int)lda,
|
||||
B->data() + (i % batchB) * strideB,
|
||||
ldb,
|
||||
(int)ldb,
|
||||
beta,
|
||||
C->data() + i * strideC,
|
||||
ldc);
|
||||
(int)ldc);
|
||||
}
|
||||
#else
|
||||
ABORT("Not implemented!");
|
||||
|
|
|
@ -56,8 +56,8 @@ void SSE_MatrixMult16(const __m128i* A,
|
|||
|
||||
static inline void Quantize16(marian::Tensor out,
|
||||
const marian::Tensor in,
|
||||
float clipValue) {
|
||||
float quant_mult = pow(2.0, (float)BITS);
|
||||
float /*clipValue*/) {
|
||||
float quant_mult = (float)pow(2.0, BITS);
|
||||
#ifdef __AVX512F__
|
||||
AVX_Quantize16(
|
||||
in->data(), out->data<int16_t>(), quant_mult, in->shape().elements());
|
||||
|
@ -76,6 +76,7 @@ static inline void Quantize8(marian::Tensor out,
|
|||
AVX_Quantize8(
|
||||
in->data(), out->data<int8_t>(), quant_mult, in->shape().elements());
|
||||
#else
|
||||
out; in; clipValue;
|
||||
ABORT("8-bit is currently only AVX512");
|
||||
#endif
|
||||
}
|
||||
|
@ -118,19 +119,19 @@ static void AddBias(marian::Tensor C, const marian::Tensor Bias) {
|
|||
}
|
||||
}
|
||||
|
||||
static void ProdInt16(marian::Tensor C,
|
||||
const marian::Tensor A,
|
||||
const marian::Tensor B,
|
||||
static inline void ProdInt16(marian::Tensor C,
|
||||
const marian::Tensor A,
|
||||
const marian::Tensor B,
|
||||
float scale) {
|
||||
ABORT_IF(scale != 1, "Scale other than 1 not supported");
|
||||
|
||||
// @TODO: make this a parameter
|
||||
float quant_mult = pow(2.0, (float)BITS);
|
||||
float quant_mult = (float)pow(2.0, BITS);
|
||||
|
||||
// If we quantize to n bits and then multiple the values together, the result
|
||||
// will be quantized to n^2 bits. So we must divide by 1.0/(n^2) to get back
|
||||
// the original value.
|
||||
float unquant_mult = 1.0 / (quant_mult * quant_mult);
|
||||
float unquant_mult = 1.0f / (quant_mult * quant_mult);
|
||||
|
||||
float* fC = C->data();
|
||||
int num_A_rows = A->shape().elements() / A->shape()[-1];
|
||||
|
@ -155,11 +156,11 @@ static void ProdInt16(marian::Tensor C,
|
|||
#endif
|
||||
}
|
||||
|
||||
static void ProdInt8(marian::Tensor C,
|
||||
const marian::Tensor A,
|
||||
const marian::Tensor B,
|
||||
float scale,
|
||||
float clipValue) {
|
||||
static inline void ProdInt8(marian::Tensor C,
|
||||
const marian::Tensor A,
|
||||
const marian::Tensor B,
|
||||
float scale,
|
||||
float clipValue) {
|
||||
#ifdef __AVX512F__
|
||||
// This would be easy...
|
||||
ABORT_IF(scale != 1, "Scale other than 1 not supported");
|
||||
|
@ -178,6 +179,7 @@ static void ProdInt8(marian::Tensor C,
|
|||
num_B_rows,
|
||||
width);
|
||||
#else
|
||||
C; A; B; scale; clipValue;
|
||||
ABORT("8-bit is currently only AVX512");
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -17,10 +17,10 @@ namespace cpu {
|
|||
inline float stableSigmoid(float x) {
|
||||
if(x >= 0) {
|
||||
float z = expf(-x);
|
||||
return 1.0 / (1.0 + z);
|
||||
return 1.0f / (1.0f + z);
|
||||
} else {
|
||||
float z = expf(x);
|
||||
return z / (1.0 + z);
|
||||
return z / (1.0f + z);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -228,9 +228,9 @@ void Transpose10(Tensor out, const Tensor in) {
|
|||
template <bool add>
|
||||
void TransposeGeneric(Tensor out, Tensor in, const std::vector<int>& vAxis) {
|
||||
functional::Array<int, functional::Shape::size()> permute;
|
||||
int diff = functional::Shape::size() - vAxis.size();
|
||||
for(size_t i = 0; i < permute.size(); ++i)
|
||||
if((int)i < diff)
|
||||
int diff = int(functional::Shape::size() - vAxis.size());
|
||||
for(int i = 0; i < permute.size(); ++i)
|
||||
if(i < diff)
|
||||
permute[i] = i;
|
||||
else
|
||||
permute[i] = vAxis[i - diff] + diff;
|
||||
|
@ -483,7 +483,7 @@ void Select(Tensor out,
|
|||
|
||||
for(int index = 0; index < length; ++index) {
|
||||
outShape.dims(index, dims);
|
||||
dims[axis] = indices[dims[axis]];
|
||||
dims[axis] = (int)indices[dims[axis]];
|
||||
int inIndex = inShape.index(dims);
|
||||
out->data()[index] = in->data()[inIndex];
|
||||
}
|
||||
|
@ -505,7 +505,7 @@ void Insert(Tensor out,
|
|||
|
||||
for(int index = 0; index < length; ++index) {
|
||||
inShape.dims(index, dims);
|
||||
dims[axis] = indices[dims[axis]];
|
||||
dims[axis] = (int)indices[dims[axis]];
|
||||
int outIndex = outShape.index(dims);
|
||||
out->data()[outIndex] += in->data()[index];
|
||||
}
|
||||
|
@ -547,8 +547,8 @@ void GRUFastForward(Tensor out_, std::vector<Tensor> inputs, bool final) {
|
|||
else
|
||||
h = std::tanh(xWrow[l] + sUrow[l] * r + b[l]);
|
||||
|
||||
float out = (1.0f - z) * h + z * rowState[i];
|
||||
rowOut[i] = m * out + (1 - m) * rowState[i];
|
||||
float o = (1.0f - z) * h + z * rowState[i];
|
||||
rowOut[i] = m * o + (1 - m) * rowState[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -599,16 +599,16 @@ void GRUFastBackward(std::vector<Tensor> outputs,
|
|||
else
|
||||
h = std::tanh(rowXW[l] + rowSU[l] * r + b[l]);
|
||||
|
||||
float adj = rowAdj[i];
|
||||
float a = rowAdj[i];
|
||||
|
||||
float t = (1 - z) * (1 - h * h);
|
||||
|
||||
// df/ds
|
||||
if(outState)
|
||||
rowOutState[i] += (m * z - m + 1) * adj;
|
||||
rowOutState[i] += (m * z - m + 1) * a;
|
||||
|
||||
// df/d(xW_r) ...
|
||||
float dfdxW_r = m * r * (1 - r) * t * adj;
|
||||
float dfdxW_r = m * r * (1 - r) * t * a;
|
||||
if(final)
|
||||
dfdxW_r *= rowSU[l] + b[l];
|
||||
else
|
||||
|
@ -621,7 +621,7 @@ void GRUFastBackward(std::vector<Tensor> outputs,
|
|||
outB[i] += dfdxW_r;
|
||||
|
||||
// df/d(xW_z) ...
|
||||
float dfdxW_z = m * (1 - z) * z * (rowState[i] - h) * adj;
|
||||
float dfdxW_z = m * (1 - z) * z * (rowState[i] - h) * a;
|
||||
if(outXW)
|
||||
rowOutXW[k] += dfdxW_z;
|
||||
if(outSU)
|
||||
|
@ -630,7 +630,7 @@ void GRUFastBackward(std::vector<Tensor> outputs,
|
|||
outB[k] += dfdxW_z;
|
||||
|
||||
// df/d(xW_x) ...
|
||||
float dfdxW_x = m * t * adj;
|
||||
float dfdxW_x = m * t * a;
|
||||
if(outXW)
|
||||
rowOutXW[l] += dfdxW_x;
|
||||
if(outSU)
|
||||
|
@ -671,7 +671,7 @@ void CrossEntropyPick(Tensor out_, Tensor in_, Tensor pick_) {
|
|||
}
|
||||
|
||||
// cross-entropy
|
||||
int i = pick[j];
|
||||
int i = (int)pick[j];
|
||||
// This appears to be safe i.e. that i >= 0 && i < cols is known
|
||||
out[j] = std::log(sum) - sp[i] + max;
|
||||
}
|
||||
|
@ -960,7 +960,7 @@ void Shift(Tensor out_,
|
|||
float padValue,
|
||||
bool invert) {
|
||||
int offset = 0;
|
||||
for(size_t i = 0; i < shift.size(); ++i)
|
||||
for(int i = 0; i < shift.size(); ++i)
|
||||
offset += in_->shape().stride(i) * shift[i];
|
||||
|
||||
if(invert)
|
||||
|
@ -983,7 +983,7 @@ void Shift(Tensor out_,
|
|||
|
||||
void ShiftGrad(Tensor out_, Tensor in_, marian::Shape shift, bool invert) {
|
||||
int offset = 0;
|
||||
for(size_t i = 0; i < shift.size(); ++i)
|
||||
for(int i = 0; i < shift.size(); ++i)
|
||||
offset += in_->shape().stride(i) * shift[i];
|
||||
|
||||
if(invert)
|
||||
|
@ -1004,7 +1004,7 @@ void ShiftGrad(Tensor out_, Tensor in_, marian::Shape shift, bool invert) {
|
|||
void SetSparse(float* out,
|
||||
const std::vector<size_t>& indices,
|
||||
const std::vector<float>& values) {
|
||||
int length = indices.size();
|
||||
int length = (int)indices.size();
|
||||
for(int index = 0; index < length; ++index) {
|
||||
out[indices[index]] = values[index];
|
||||
}
|
||||
|
@ -1112,15 +1112,15 @@ void LSTMCellBackward(std::vector<Tensor> outputs,
|
|||
int l = i + 2 * cols;
|
||||
float gc = std::tanh(xWrow[l] + sUrow[l] + b[l]);
|
||||
|
||||
float adj = rowAdj[i];
|
||||
float a = rowAdj[i];
|
||||
|
||||
// dc/dx_{t-1}
|
||||
if(outCell) {
|
||||
rowOutCell[i] += (m * gf - m + 1) * adj;
|
||||
rowOutCell[i] += (m * gf - m + 1) * a;
|
||||
}
|
||||
|
||||
// dc/d(b_f) = dc/d(xW_f) ...
|
||||
float dcdxf = m * rowCell[i] * gf * (1 - gf) * adj;
|
||||
float dcdxf = m * rowCell[i] * gf * (1 - gf) * a;
|
||||
if(outXW) {
|
||||
rowOutXW[i] += dcdxf;
|
||||
}
|
||||
|
@ -1132,7 +1132,7 @@ void LSTMCellBackward(std::vector<Tensor> outputs,
|
|||
}
|
||||
|
||||
// dc/d(b_i) ...
|
||||
float dcdb_i = m * gc * gi * (1 - gi) * adj;
|
||||
float dcdb_i = m * gc * gi * (1 - gi) * a;
|
||||
if(outXW) {
|
||||
rowOutXW[k] += dcdb_i;
|
||||
}
|
||||
|
@ -1144,7 +1144,7 @@ void LSTMCellBackward(std::vector<Tensor> outputs,
|
|||
}
|
||||
|
||||
// dc/d(b_c) ...
|
||||
float dcdxc = m * gi * (1 - gc * gc) * adj;
|
||||
float dcdxc = m * gi * (1 - gc * gc) * a;
|
||||
if(outXW) {
|
||||
rowOutXW[l] += dcdxc;
|
||||
}
|
||||
|
@ -1193,15 +1193,15 @@ void LSTMOutputBackward(std::vector<Tensor> outputs,
|
|||
|
||||
float t = std::tanh(rowCell[i]);
|
||||
|
||||
float adj = rowAdj[i];
|
||||
float a = rowAdj[i];
|
||||
|
||||
// dc/dc_{t-1}
|
||||
if(outCell) {
|
||||
rowOutCell[i] += go * (1 - t * t) * adj;
|
||||
rowOutCell[i] += go * (1 - t * t) * a;
|
||||
}
|
||||
|
||||
// dc/d(b_o) = dc/d(xW_f) ...
|
||||
float dcdxo = t * go * (1 - go) * adj;
|
||||
float dcdxo = t * go * (1 - go) * a;
|
||||
if(outXW) {
|
||||
rowOutXW[k] += dcdxo;
|
||||
}
|
||||
|
@ -1240,30 +1240,30 @@ void HighwayForward(Tensor out,
|
|||
}
|
||||
}
|
||||
|
||||
void HighwayBackward(Tensor out1,
|
||||
Tensor out2,
|
||||
Tensor outt,
|
||||
const Tensor in1,
|
||||
const Tensor in2,
|
||||
const Tensor t,
|
||||
const Tensor adj) {
|
||||
void HighwayBackward(Tensor /*out1*/,
|
||||
Tensor /*out2*/,
|
||||
Tensor /*outt*/,
|
||||
const Tensor /*in1*/,
|
||||
const Tensor /*in2*/,
|
||||
const Tensor /*t*/,
|
||||
const Tensor /*adj*/) {
|
||||
ABORT("Not implemented!");
|
||||
}
|
||||
|
||||
void PoolingWithMaskingForward(Tensor out,
|
||||
Tensor in,
|
||||
Tensor mask,
|
||||
int width,
|
||||
bool isEven) {
|
||||
void PoolingWithMaskingForward(Tensor /*out*/,
|
||||
Tensor /*in*/,
|
||||
Tensor /*mask*/,
|
||||
int /*width*/,
|
||||
bool /*isEven*/) {
|
||||
ABORT("Not implemented!");
|
||||
}
|
||||
|
||||
void PoolingWithMaskingBackward(Tensor adj,
|
||||
Tensor adjIn,
|
||||
Tensor in,
|
||||
Tensor mask,
|
||||
int width,
|
||||
bool isEven) {
|
||||
void PoolingWithMaskingBackward(Tensor /*adj*/,
|
||||
Tensor /*adjIn*/,
|
||||
Tensor /*in*/,
|
||||
Tensor /*mask*/,
|
||||
int /*width*/,
|
||||
bool /*isEven*/) {
|
||||
ABORT("Not implemented!");
|
||||
}
|
||||
} // namespace cpu
|
||||
|
|
|
@ -16,7 +16,7 @@ protected:
|
|||
size_t alignment_;
|
||||
|
||||
size_t align(size_t size) {
|
||||
return ceil(size / (float)alignment_) * alignment_;
|
||||
return size_t(ceil(size / (float)alignment_) * alignment_);
|
||||
}
|
||||
|
||||
public:
|
||||
|
|
|
@ -91,7 +91,7 @@ public:
|
|||
request<float>(),
|
||||
type_);
|
||||
|
||||
float temp;
|
||||
float temp = 0; // (initialize to keep compiler happy)
|
||||
if(backend_->getDeviceId().type == DeviceType::cpu) {
|
||||
std::copy(data() + i, data() + i + 1, &temp);
|
||||
}
|
||||
|
@ -183,16 +183,16 @@ public:
|
|||
void set(T value) {
|
||||
if(!matchType<T>(type_)) {
|
||||
switch(type_) {
|
||||
case Type::float32: set<float>(value); break;
|
||||
case Type::float64: set<double>(value); break;
|
||||
case Type::int8: set<int8_t>(value); break;
|
||||
case Type::int16: set<int16_t>(value); break;
|
||||
case Type::int32: set<int32_t>(value); break;
|
||||
case Type::int64: set<int64_t>(value); break;
|
||||
case Type::uint8: set<uint8_t>(value); break;
|
||||
case Type::uint16: set<uint16_t>(value); break;
|
||||
case Type::uint32: set<uint32_t>(value); break;
|
||||
case Type::uint64: set<uint64_t>(value); break;
|
||||
case Type::float32: set<float >((float )value); break;
|
||||
case Type::float64: set<double >((double )value); break;
|
||||
case Type::int8: set<int8_t >((int8_t )value); break;
|
||||
case Type::int16: set<int16_t >((int16_t )value); break;
|
||||
case Type::int32: set<int32_t >((int32_t )value); break;
|
||||
case Type::int64: set<int64_t >((int64_t )value); break;
|
||||
case Type::uint8: set<uint8_t >((uint8_t )value); break;
|
||||
case Type::uint16: set<uint16_t>((uint16_t)value); break;
|
||||
case Type::uint32: set<uint32_t>((uint32_t)value); break;
|
||||
case Type::uint64: set<uint64_t>((uint64_t)value); break;
|
||||
default:
|
||||
ABORT(
|
||||
"Requested type ({}) cannot be converted to underlying type ({})",
|
||||
|
@ -273,19 +273,19 @@ public:
|
|||
else
|
||||
strm << std::fixed << std::setprecision(0) << std::setfill(' ');
|
||||
|
||||
for(size_t i = 0; i < values.size(); ++i) {
|
||||
for(int i = 0; i < values.size(); ++i) {
|
||||
std::vector<int> dims;
|
||||
shape().dims(i, dims);
|
||||
|
||||
bool disp = true;
|
||||
for(size_t j = 0; j < dims.size(); ++j)
|
||||
for(int j = 0; j < dims.size(); ++j)
|
||||
disp = disp && (dims[j] < dispCols || dims[j] >= shape()[j] - dispCols);
|
||||
|
||||
if(disp) {
|
||||
if(dims.back() == 0) {
|
||||
bool par = true;
|
||||
std::vector<std::string> p;
|
||||
for(int j = dims.size() - 1; j >= 0; --j) {
|
||||
for(int j = (int)dims.size() - 1; j >= 0; --j) {
|
||||
if(dims[j] != 0)
|
||||
par = false;
|
||||
|
||||
|
@ -307,7 +307,7 @@ public:
|
|||
strm << " ";
|
||||
|
||||
if(dims.back() + 1 == shape().back()) {
|
||||
for(int j = dims.size() - 1; j >= 0; --j) {
|
||||
for(int j = (int)dims.size() - 1; j >= 0; --j) {
|
||||
if(dims[j] + 1 != shape()[j])
|
||||
break;
|
||||
strm << "]";
|
||||
|
@ -316,7 +316,7 @@ public:
|
|||
}
|
||||
|
||||
bool prev = true;
|
||||
for(int j = dims.size() - 1; j >= 0; --j) {
|
||||
for(int j = (int)dims.size() - 1; j >= 0; --j) {
|
||||
if(j < (int)dims.size() - 1)
|
||||
prev = prev && dims[j + 1] + 1 == shape()[j + 1];
|
||||
if(prev && dims[j] + 1 == dispCols && shape()[j] > 2 * dispCols) {
|
||||
|
|
|
@ -35,7 +35,7 @@ public:
|
|||
}
|
||||
|
||||
void reserve(size_t bytes = 0) {
|
||||
float mult = bytes / GROW + 1;
|
||||
auto mult = bytes / GROW + 1;
|
||||
LOG(info,
|
||||
"[memory] Extending reserved space to {} MB (device {})",
|
||||
mult * CHUNK,
|
||||
|
@ -78,8 +78,8 @@ public:
|
|||
|
||||
Tensor asTensor() {
|
||||
auto mem = allocator_->memory();
|
||||
int size = mem->size() / sizeof(float);
|
||||
return Tensor(new TensorBase(mem, {1, size}, backend_));
|
||||
auto size = mem->size() / sizeof(float);
|
||||
return Tensor(new TensorBase(mem, {1, (int)size}, backend_));
|
||||
}
|
||||
|
||||
size_t size() { return allocator_->size() / sizeof(float); }
|
||||
|
|
|
@ -7,7 +7,7 @@ namespace marian {
|
|||
#ifndef CUDA_FOUND
|
||||
Ptr<Communicator> createCommunicator(
|
||||
const std::vector<Ptr<ExpressionGraph>>& graphs,
|
||||
bool noNccl) {
|
||||
bool /*noNccl*/) {
|
||||
return New<DefaultCommunicator>(graphs);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -17,8 +17,8 @@ public:
|
|||
virtual ~Communicator() {}
|
||||
|
||||
virtual void foreach(const std::function<void(size_t, int)>& func) {
|
||||
int totalSize = graphs_[0]->params()->vals()->size();
|
||||
int shardSize = ceil(totalSize / (float)graphs_.size());
|
||||
int totalSize = (int)graphs_[0]->params()->vals()->size();
|
||||
int shardSize = (int)ceil(totalSize / (float)graphs_.size());
|
||||
|
||||
int pos = 0;
|
||||
std::vector<std::thread> group;
|
||||
|
@ -50,8 +50,8 @@ private:
|
|||
|
||||
void init() {
|
||||
if(tmpTensors_.size() == 0) {
|
||||
int totalSize = graphs_[0]->params()->vals()->size();
|
||||
int shardSize = ceil(totalSize / (float)graphs_.size());
|
||||
int totalSize = (int)graphs_[0]->params()->vals()->size();
|
||||
int shardSize = (int)ceil(totalSize / (float)graphs_.size());
|
||||
|
||||
int pos = 0;
|
||||
for(auto graph : graphs_) {
|
||||
|
@ -83,8 +83,8 @@ public:
|
|||
void scatterReduce() override {
|
||||
init();
|
||||
|
||||
int totalSize = graphs_[0]->params()->vals()->size();
|
||||
int shardSize = ceil(totalSize / (float)graphs_.size());
|
||||
int totalSize = (int)graphs_[0]->params()->vals()->size();
|
||||
int shardSize = (int)ceil(totalSize / (float)graphs_.size());
|
||||
|
||||
// Gather gradients from different devices into current gradient shards
|
||||
auto scatter = [this, shardSize](size_t idx, int pos) {
|
||||
|
@ -107,8 +107,8 @@ public:
|
|||
}
|
||||
|
||||
void allGather() override {
|
||||
int totalSize = graphs_[0]->params()->vals()->size();
|
||||
int shardSize = ceil(totalSize / (float)graphs_.size());
|
||||
int totalSize = (int)graphs_[0]->params()->vals()->size();
|
||||
int shardSize = (int)ceil(totalSize / (float)graphs_.size());
|
||||
|
||||
// Update all graphs with parameter shard
|
||||
auto gather = [this, shardSize](size_t idx, int pos) {
|
||||
|
@ -133,7 +133,7 @@ public:
|
|||
auto copy = [this, params](size_t idx, int pos) {
|
||||
// copy parameter shard to each graph
|
||||
auto subParam
|
||||
= graphs_[idx]->params()->vals()->subtensor(pos, params[idx]->size());
|
||||
= graphs_[idx]->params()->vals()->subtensor(pos, (int)params[idx]->size());
|
||||
params[idx]->copyFrom(subParam);
|
||||
};
|
||||
|
||||
|
@ -147,7 +147,7 @@ public:
|
|||
// copy parameter shard to each graph
|
||||
for(auto graph : graphs_) {
|
||||
auto subParam
|
||||
= graph->params()->vals()->subtensor(pos, params[idx]->size());
|
||||
= graph->params()->vals()->subtensor(pos, (int)params[idx]->size());
|
||||
subParam->copyFrom(params[idx]);
|
||||
}
|
||||
};
|
||||
|
@ -162,17 +162,17 @@ public:
|
|||
// copy parameter shard to each graph, apart from last graph
|
||||
for(int i = 0; i < (int)graphs_.size() - 1; ++i) {
|
||||
auto subParam
|
||||
= graphs_[i]->params()->vals()->subtensor(pos, params[idx]->size());
|
||||
= graphs_[i]->params()->vals()->subtensor(pos, (int)params[idx]->size());
|
||||
subParam->copyFrom(params[idx]);
|
||||
}
|
||||
|
||||
// back-up shard from last graph
|
||||
auto subParamLast = graphs_.back()->params()->vals()->subtensor(
|
||||
pos, params[idx]->size());
|
||||
auto subParamLast =
|
||||
graphs_.back()->params()->vals()->subtensor(pos, (int)params[idx]->size());
|
||||
params[idx]->copyFrom(subParamLast);
|
||||
|
||||
auto subParamFirst
|
||||
= graphs_[0]->params()->vals()->subtensor(pos, params[idx]->size());
|
||||
= graphs_[0]->params()->vals()->subtensor(pos, (int)params[idx]->size());
|
||||
subParamLast->copyFrom(subParamFirst);
|
||||
};
|
||||
// execute for each shard
|
||||
|
|
|
@ -102,6 +102,8 @@ public:
|
|||
gpu::copy(backend_, ndata, ndata + nsize, data());
|
||||
gpu::copy(backend_, nindices, nindices + nsize, indices());
|
||||
}
|
||||
#else
|
||||
ndata; nindices; // (unused)
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -137,6 +139,8 @@ public:
|
|||
else {
|
||||
gpu::scatterAdd(t, data(), indices(), size(), offset);
|
||||
}
|
||||
#else
|
||||
t; offset; // (unused)
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -149,6 +153,8 @@ public:
|
|||
else {
|
||||
gpu::scatterUpdate(t, data(), indices(), size(), offset);
|
||||
}
|
||||
#else
|
||||
t; offset; // (unused)
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -161,6 +167,8 @@ public:
|
|||
else {
|
||||
gpu::gather(t, data(), indices(), size(), offset);
|
||||
}
|
||||
#else
|
||||
t; offset; // (unused)
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -63,7 +63,7 @@ public:
|
|||
size_t step = options_->get<size_t>("mini-batch-fit-step");
|
||||
|
||||
size_t maxLength = options_->get<size_t>("max-length");
|
||||
maxLength = std::ceil(maxLength / (float)step) * step;
|
||||
maxLength = (size_t)(std::ceil(maxLength / (float)step) * step);
|
||||
|
||||
// @TODO: ugly
|
||||
auto toptions = New<Options>();
|
||||
|
@ -85,7 +85,7 @@ public:
|
|||
size_t end = maxBatch;
|
||||
|
||||
std::vector<size_t> lengths(numFiles, i);
|
||||
bool fits = true;
|
||||
fits = true;
|
||||
|
||||
do {
|
||||
size_t current = (start + end) / 2;
|
||||
|
|
|
@ -36,7 +36,7 @@ void AsyncGraphGroup::setScheduler(Ptr<Scheduler> scheduler) {
|
|||
|
||||
void AsyncGraphGroup::fetchParams(Tensor oldParams,
|
||||
const std::vector<Tensor>& params,
|
||||
int device_id) {
|
||||
int /*device_id*/) {
|
||||
// @TODO read guard on parameters
|
||||
int pos = 0;
|
||||
|
||||
|
@ -46,7 +46,7 @@ void AsyncGraphGroup::fetchParams(Tensor oldParams,
|
|||
[&](int idx, int pos) {
|
||||
// individual mutex per-shard
|
||||
std::lock_guard<std::mutex> guard(shardSync_[idx]);
|
||||
oldParams->subtensor(pos, params[idx]->size())->copyFrom(params[idx]);
|
||||
oldParams->subtensor((int)pos, (int)params[idx]->size())->copyFrom(params[idx]);
|
||||
},
|
||||
idx,
|
||||
pos));
|
||||
|
@ -60,7 +60,7 @@ void AsyncGraphGroup::fetchParams(Tensor oldParams,
|
|||
|
||||
void AsyncGraphGroup::pushGradients(Tensor newGrads,
|
||||
size_t batch_words,
|
||||
int device_id) {
|
||||
int /*device_id*/) {
|
||||
// add instead of copy?
|
||||
std::vector<std::thread> threads;
|
||||
int pos = 0;
|
||||
|
@ -69,7 +69,7 @@ void AsyncGraphGroup::pushGradients(Tensor newGrads,
|
|||
[&](int idx, int pos) {
|
||||
// individual mutex per-shard
|
||||
std::lock_guard<std::mutex> guard(shardSync_[idx]);
|
||||
grads_[idx]->copyFrom(newGrads->subtensor(pos, grads_[idx]->size()));
|
||||
grads_[idx]->copyFrom(newGrads->subtensor(pos, (int)grads_[idx]->size()));
|
||||
|
||||
if(scaleLearningRate_) {
|
||||
shardOpt_[idx]->update(
|
||||
|
@ -105,8 +105,8 @@ void AsyncGraphGroup::init(Ptr<data::Batch> batch) {
|
|||
}
|
||||
|
||||
if(params_.empty()) {
|
||||
int totalSize = graphs_[0]->params()->vals()->size();
|
||||
shardSize_ = ceil(totalSize / (float)devices_.size());
|
||||
int totalSize = (int)graphs_[0]->params()->vals()->size();
|
||||
shardSize_ = (int)ceil(totalSize / (float)devices_.size());
|
||||
|
||||
int pos = 0;
|
||||
// parameter sharding
|
||||
|
@ -128,7 +128,7 @@ void AsyncGraphGroup::init(Ptr<data::Batch> batch) {
|
|||
}
|
||||
}
|
||||
if(grads_.empty()) {
|
||||
int totalSize = graphs_[0]->params()->vals()->size();
|
||||
int totalSize = (int)graphs_[0]->params()->vals()->size();
|
||||
|
||||
for(auto graph : graphs_) {
|
||||
int __size__ = std::min(shardSize_, totalSize);
|
||||
|
@ -154,7 +154,7 @@ void AsyncGraphGroup::init(Ptr<data::Batch> batch) {
|
|||
graphAvg->forward();
|
||||
}
|
||||
|
||||
int totalSize = graphs_[0]->params()->vals()->size();
|
||||
int totalSize = (int)graphs_[0]->params()->vals()->size();
|
||||
|
||||
int i = 0;
|
||||
for(auto graph : graphs_) {
|
||||
|
@ -203,7 +203,7 @@ void AsyncGraphGroup::execute(Ptr<data::Batch> batch) {
|
|||
|
||||
if(!graph) {
|
||||
std::lock_guard<std::mutex> lock(sync_);
|
||||
t_id = i;
|
||||
t_id = (int)i;
|
||||
graph = graphs_[i];
|
||||
builder = builders_[i++];
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ void AsyncGraphGroupDrop::fetchParams(Tensor oldParams,
|
|||
sparseShard->gather(params[idx]);
|
||||
sparseGrad->copyFrom(sparseShard);
|
||||
sparseGrad->scatterUpdate(
|
||||
oldParams->subtensor(pos, params[idx]->size()));
|
||||
oldParams->subtensor((int)pos, (int)params[idx]->size()));
|
||||
},
|
||||
idx,
|
||||
pos));
|
||||
|
@ -59,7 +59,7 @@ void AsyncGraphGroupDrop::pushGradients(Tensor newGrads,
|
|||
auto dropper = droppers_[device_id][idx];
|
||||
auto sparseGrad = sparseGrads_[device_id][idx];
|
||||
auto sparseShard = sparseShards_[device_id][idx];
|
||||
auto tensor = newGrads->subtensor(pos, grads_[idx]->size());
|
||||
auto tensor = newGrads->subtensor((int)pos, (int)grads_[idx]->size());
|
||||
// individual mutex per-shard
|
||||
std::lock_guard<std::mutex> guard(shardSync_[idx]);
|
||||
|
||||
|
@ -107,8 +107,8 @@ void AsyncGraphGroupDrop::init(Ptr<data::Batch> batch) {
|
|||
fetch_ready.push_back(false);
|
||||
|
||||
// Size of the sparse tensor
|
||||
int totalSize = graphs_[0]->params()->vals()->size();
|
||||
int sparseCap = totalSize * 1.2 * (1.0 - droping_rate);
|
||||
int totalSize = (int)graphs_[0]->params()->vals()->size();
|
||||
int sparseCap = (int)(totalSize * 1.2 * (1.0 - droping_rate));
|
||||
|
||||
// prepare droppers
|
||||
std::vector<GradientDrop> tmpDropper;
|
||||
|
@ -120,13 +120,13 @@ void AsyncGraphGroupDrop::init(Ptr<data::Batch> batch) {
|
|||
std::vector<SparseTensor> tmp;
|
||||
for(int j = 0; j < devices_.size(); j++)
|
||||
tmp.push_back(SparseTensor(new SparseTensorBase(
|
||||
sparseCap / devices_.size(), graphs_[i]->getBackend())));
|
||||
sparseCap / (int)devices_.size(), graphs_[i]->getBackend())));
|
||||
sparseGrads_.push_back(tmp);
|
||||
|
||||
std::vector<SparseTensor> tmp2;
|
||||
for(int j = 0; j < devices_.size(); j++)
|
||||
tmp2.push_back(SparseTensor(new SparseTensorBase(
|
||||
sparseCap / devices_.size(), graphs_[j]->getBackend())));
|
||||
sparseCap / (int)devices_.size(), graphs_[j]->getBackend())));
|
||||
sparseShards_.push_back(tmp2);
|
||||
}
|
||||
drop_first = false;
|
||||
|
|
|
@ -60,8 +60,8 @@ void MultiNodeGraphGroup::init(Ptr<data::Batch> batch) {
|
|||
for(int i = 0; i < mpi_comm_world_size_; i++) {
|
||||
// Shard buffers across GPUs
|
||||
auto backend = clientGraphs_[i % devices_.size()]->getBackend();
|
||||
Tensor accGrad = newTensor(nodeSizes_[i], backend);
|
||||
Tensor accGradBuff = newTensor(nodeSizes_[i], backend);
|
||||
Tensor accGrad = newTensor((int)nodeSizes_[i], backend);
|
||||
Tensor accGradBuff = newTensor((int)nodeSizes_[i], backend);
|
||||
accGradients.push_back(accGrad);
|
||||
accGradientBuffer.push_back(accGradBuff);
|
||||
}
|
||||
|
@ -113,7 +113,7 @@ void MultiNodeGraphGroup::runBatchThroughClientGraphs(Ptr<data::Batch> batch) {
|
|||
*/
|
||||
void MultiNodeGraphGroup::calculateNodeSizes() {
|
||||
size_t modelSize = clientGraphs_[0]->params()->vals()->size();
|
||||
size_t nodeSize = ceilf(((float)modelSize) / mpi_comm_world_size_);
|
||||
size_t nodeSize = (size_t)ceilf(((float)modelSize) / mpi_comm_world_size_);
|
||||
for(int node = 0; node < mpi_comm_world_size_; node++) {
|
||||
size_t remainingModelSize = modelSize - (nodeSize * node);
|
||||
// Takes care of edge case where last node is smaller than the others
|
||||
|
@ -166,11 +166,11 @@ void MultiNodeGraphGroup::initClientCommOverlapGpuTensors() {
|
|||
for(size_t client = 0; client < devices_.size(); client++) {
|
||||
// Communication overlap buffer (for grads + params)
|
||||
Tensor commOverlapBuffer
|
||||
= newTensor(modelSize, clientGraphs_[client]->getBackend());
|
||||
= newTensor((int)modelSize, clientGraphs_[client]->getBackend());
|
||||
commOverlapBuffer->copyFrom(clientGraphs_[0]->params()->vals());
|
||||
clientCommOverlapBuffersGPU_.push_back(commOverlapBuffer);
|
||||
// Gradients local sum buffer
|
||||
Tensor sumGrads = newTensor(modelSize, clientGraphs_[client]->getBackend());
|
||||
Tensor sumGrads = newTensor((int)modelSize, clientGraphs_[client]->getBackend());
|
||||
sumGrads->set(0);
|
||||
clientSummedGradsGPU.push_back(sumGrads);
|
||||
// Local optimizer to apply summed gradients
|
||||
|
@ -207,7 +207,7 @@ void MultiNodeGraphGroup::setupServerShards() {
|
|||
*/
|
||||
void MultiNodeGraphGroup::calculateShardSizes() {
|
||||
size_t nodeSize = nodeSizes_[mpi_my_rank_];
|
||||
size_t shardSize = ceilf(((float)nodeSize) / devices_.size());
|
||||
size_t shardSize = (size_t)ceilf(((float)nodeSize) / devices_.size());
|
||||
for(size_t shard = 0; shard < devices_.size(); shard++) {
|
||||
size_t remainingNodeSize = nodeSize - (shardSize * shard);
|
||||
// Takes care of edge case where last shard is smaller than the others
|
||||
|
@ -226,12 +226,12 @@ void MultiNodeGraphGroup::initShardGpuTensors() {
|
|||
}
|
||||
for(size_t shard = 0; shard < devices_.size(); shard++) {
|
||||
Tensor gpuParams
|
||||
= newTensor(shardSizes_[shard], clientGraphs_[shard]->getBackend());
|
||||
= newTensor((int)shardSizes_[shard], clientGraphs_[shard]->getBackend());
|
||||
gpuParams->copyFrom(clientGraphs_[0]->params()->vals()->subtensor(
|
||||
offset, shardSizes_[shard]));
|
||||
(int)offset, (int)shardSizes_[shard]));
|
||||
shardParams_.push_back(gpuParams);
|
||||
shardGrads_.push_back(
|
||||
newTensor(shardSizes_[shard], clientGraphs_[shard]->getBackend()));
|
||||
newTensor((int)shardSizes_[shard], clientGraphs_[shard]->getBackend()));
|
||||
offset += shardSizes_[shard];
|
||||
}
|
||||
}
|
||||
|
@ -519,6 +519,8 @@ void MultiNodeGraphGroup::synchronizeWithServerShards(Tensor newGrads,
|
|||
|
||||
offset += nodeSize;
|
||||
}
|
||||
#else
|
||||
newGrads; oldParams; gpu; batchWords; // (unused)
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -578,7 +580,7 @@ void MultiNodeGraphGroup::execute(Ptr<data::Batch> batch) {
|
|||
if(!clientCommOverlap) {
|
||||
synchronizeWithServerShards(graph->params()->grads(),
|
||||
graph->params()->vals(),
|
||||
my_id,
|
||||
(int)my_id,
|
||||
batch->wordsTrg());
|
||||
}
|
||||
|
||||
|
|
|
@ -388,7 +388,7 @@ protected:
|
|||
numberClientsOfNodes_ = std::vector<int>(mpi_comm_world_size_, 0);
|
||||
while(index < deviceConfig.size()) {
|
||||
if(numberClientsOfNodes_[node] == 0) {
|
||||
numberClientsOfNodes_[node] = deviceConfig[index];
|
||||
numberClientsOfNodes_[node] = (int)deviceConfig[index];
|
||||
nClientsSeen = 0;
|
||||
} else if(nClientsSeen < numberClientsOfNodes_[node]) {
|
||||
if(node == mpi_my_rank_) {
|
||||
|
|
|
@ -45,7 +45,7 @@ Tensor MultiNodeGraphGroupSync::newTensor(int size, Ptr<Backend> backend) {
|
|||
void MultiNodeGraphGroupSync::init(Ptr<data::Batch> batch) {
|
||||
// Setup clients and shards
|
||||
setupClients(batch);
|
||||
int network_size = clientGraphs_[0]->params()->vals()->size();
|
||||
int network_size = (int)clientGraphs_[0]->params()->vals()->size();
|
||||
LOG(info, "model size = {} float params", network_size);
|
||||
if(movingAvg_)
|
||||
paramsAvg_ = newTensor(network_size, clientGraphs_.back()->getBackend());
|
||||
|
|
|
@ -166,7 +166,7 @@ protected:
|
|||
numberClientsOfNodes_ = std::vector<int>(mpi_comm_world_size_, 0);
|
||||
while(index < deviceConfig.size()) {
|
||||
if(numberClientsOfNodes_[node] == 0) {
|
||||
numberClientsOfNodes_[node] = deviceConfig[index];
|
||||
numberClientsOfNodes_[node] = (int)deviceConfig[index];
|
||||
nClientsSeen = 0;
|
||||
} else if(nClientsSeen < numberClientsOfNodes_[node]) {
|
||||
if(node == mpi_my_rank_) {
|
||||
|
|
|
@ -64,8 +64,8 @@ void SyncGraphGroup::initializeAvg() {
|
|||
graphAvg->forward();
|
||||
}
|
||||
|
||||
int totalSize = graphs_[0]->params()->vals()->size();
|
||||
shardSize_ = ceil(totalSize / (float)devices_.size());
|
||||
int totalSize = (int)graphs_[0]->params()->vals()->size();
|
||||
shardSize_ = (int)ceil(totalSize / (float)devices_.size());
|
||||
|
||||
int pos = 0;
|
||||
for(auto graph : graphs_) {
|
||||
|
@ -99,7 +99,7 @@ void SyncGraphGroup::execute(Ptr<data::Batch> batch) {
|
|||
size_t devs = devices_.size();
|
||||
auto batches = batch->split(delay_ * devs);
|
||||
|
||||
float div = batches.size(); // no. of batches
|
||||
float div = (float)batches.size(); // no. of batches
|
||||
// do not average gradients if cost type is sum.
|
||||
if(options_->get<std::string>("cost-type") == "ce-sum")
|
||||
div = 1;
|
||||
|
@ -131,7 +131,7 @@ void SyncGraphGroup::execute(Ptr<data::Batch> batch) {
|
|||
}
|
||||
|
||||
// Execute single forward/backward step
|
||||
auto forwardBackward = [this, &costs, curBatches, t](size_t idx, int pos) {
|
||||
auto forwardBackward = [this, &costs, curBatches, t](size_t idx, int /*pos*/) {
|
||||
auto graph = graphs_[idx];
|
||||
auto batch = curBatches[idx];
|
||||
|
||||
|
@ -152,8 +152,8 @@ void SyncGraphGroup::execute(Ptr<data::Batch> batch) {
|
|||
|
||||
// Update parameter shard with gradient shard
|
||||
auto update = [this, div](size_t idx, int pos) {
|
||||
int totalSize = graphs_[0]->params()->vals()->size();
|
||||
int shardSize = ceil(totalSize / (float)devices_.size());
|
||||
int totalSize = (int)graphs_[0]->params()->vals()->size();
|
||||
int shardSize = (int)ceil(totalSize / (float)devices_.size());
|
||||
|
||||
int size = std::min(totalSize - pos, shardSize);
|
||||
|
||||
|
|
|
@ -20,12 +20,12 @@ private:
|
|||
float getLearningRate(TrainingState& state) {
|
||||
float baselr = options_->get<float>("learn-rate");
|
||||
|
||||
float bno = state.batches - state.warmupStart;
|
||||
auto bno = state.batches - state.warmupStart;
|
||||
|
||||
size_t warmup = options_->get<size_t>("lr-warmup");
|
||||
float mult1 = 1.f;
|
||||
if(warmup > 0) {
|
||||
mult1 = std::min(1.f, bno / (float)warmup);
|
||||
mult1 = std::min(1.f, (float)bno / (float)warmup);
|
||||
}
|
||||
|
||||
size_t decayGoogle = options_->get<size_t>("lr-decay-inv-sqrt");
|
||||
|
@ -164,8 +164,8 @@ public:
|
|||
void update(float cost, const std::vector<Ptr<data::Batch>>& batches) {
|
||||
state_->validated = false;
|
||||
|
||||
auto batchSize = 0; // number of sentences in batch
|
||||
auto batchLabels = 0; // number of target words in batch
|
||||
size_t batchSize = 0; // number of sentences in batch
|
||||
size_t batchLabels = 0; // number of target words in batch
|
||||
|
||||
for(const auto& batch : batches) {
|
||||
batchSize += batch->size();
|
||||
|
@ -303,7 +303,7 @@ public:
|
|||
}
|
||||
|
||||
void actAfterEpoch(TrainingState& state) override {
|
||||
float factor = options_->get<double>("lr-decay");
|
||||
float factor = (float)options_->get<double>("lr-decay"); // @TODO: <float>?
|
||||
|
||||
float baselr = getLearningRate(state);
|
||||
state.eta = baselr * state.factor;
|
||||
|
@ -355,7 +355,7 @@ public:
|
|||
}
|
||||
|
||||
void actAfterBatches(TrainingState& state) override {
|
||||
float factor = options_->get<double>("lr-decay");
|
||||
float factor = (float)options_->get<double>("lr-decay"); // @TODO: <float>?
|
||||
state.reset = false;
|
||||
|
||||
float baselr = getLearningRate(state);
|
||||
|
@ -365,7 +365,7 @@ public:
|
|||
if("batches" == options_->get<std::string>("lr-decay-strategy")) {
|
||||
size_t start
|
||||
= options_->get<std::vector<size_t>>("lr-decay-start").front();
|
||||
int freq = options_->get<size_t>("lr-decay-freq");
|
||||
size_t freq = options_->get<size_t>("lr-decay-freq");
|
||||
|
||||
if(start > 0 && freq > 0 && state.batches >= start
|
||||
&& ((state.batches - start) % freq == 0)) {
|
||||
|
@ -403,7 +403,7 @@ public:
|
|||
}
|
||||
|
||||
void actAfterStalled(TrainingState& state) override {
|
||||
float factor = options_->get<double>("lr-decay");
|
||||
float factor = (float)options_->get<double>("lr-decay"); // @TODO: <float>?
|
||||
state.reset = false;
|
||||
|
||||
float baselr = getLearningRate(state);
|
||||
|
@ -411,7 +411,7 @@ public:
|
|||
|
||||
if(factor > 0.0) {
|
||||
if(options_->get<std::string>("lr-decay-strategy") == "stalled") {
|
||||
int startStalled
|
||||
size_t startStalled
|
||||
= options_->get<std::vector<size_t>>("lr-decay-start").front();
|
||||
if(startStalled && state.stalled && state.stalled % startStalled == 0) {
|
||||
state.factor *= factor;
|
||||
|
|
|
@ -11,11 +11,11 @@ class TrainingState;
|
|||
|
||||
class TrainingObserver {
|
||||
public:
|
||||
virtual void init(TrainingState& state) {}
|
||||
virtual void actAfterEpoch(TrainingState& state) {}
|
||||
virtual void actAfterBatches(TrainingState& state) {}
|
||||
virtual void actAfterStalled(TrainingState& state) {}
|
||||
virtual void actAfterLoaded(TrainingState& state) {}
|
||||
virtual void init(TrainingState&) {}
|
||||
virtual void actAfterEpoch(TrainingState&) {}
|
||||
virtual void actAfterBatches(TrainingState&) {}
|
||||
virtual void actAfterStalled(TrainingState&) {}
|
||||
virtual void actAfterLoaded(TrainingState&) {}
|
||||
};
|
||||
|
||||
class TrainingState {
|
||||
|
|
|
@ -209,7 +209,7 @@ public:
|
|||
|
||||
auto command = options_->get<std::string>("valid-script-path");
|
||||
auto valStr = utils::Exec(command);
|
||||
float val = std::atof(valStr.c_str());
|
||||
float val = (float)std::atof(valStr.c_str());
|
||||
updateStalled(graphs, val);
|
||||
|
||||
return val;
|
||||
|
@ -219,8 +219,8 @@ public:
|
|||
|
||||
protected:
|
||||
virtual float validateBG(
|
||||
const std::vector<Ptr<ExpressionGraph>>& graphs,
|
||||
Ptr<data::BatchGenerator<data::Corpus>> batchGenerator) override {
|
||||
const std::vector<Ptr<ExpressionGraph>>& /*graphs*/,
|
||||
Ptr<data::BatchGenerator<data::Corpus>> /*batchGenerator*/) override {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
@ -332,7 +332,7 @@ public:
|
|||
std::stringstream best1;
|
||||
std::stringstream bestn;
|
||||
printer->print(history, best1, bestn);
|
||||
collector->Write(history->GetLineNum(),
|
||||
collector->Write((long)history->GetLineNum(),
|
||||
best1.str(),
|
||||
bestn.str(),
|
||||
options_->get<bool>("n-best"));
|
||||
|
@ -357,7 +357,7 @@ public:
|
|||
auto command
|
||||
= options_->get<std::string>("valid-script-path") + " " + fileName;
|
||||
auto valStr = utils::Exec(command);
|
||||
val = std::atof(valStr.c_str());
|
||||
val = (float)std::atof(valStr.c_str());
|
||||
updateStalled(graphs, val);
|
||||
}
|
||||
|
||||
|
@ -370,8 +370,8 @@ protected:
|
|||
bool quiet_{false};
|
||||
|
||||
virtual float validateBG(
|
||||
const std::vector<Ptr<ExpressionGraph>>& graphs,
|
||||
Ptr<data::BatchGenerator<data::Corpus>> batchGenerator) override {
|
||||
const std::vector<Ptr<ExpressionGraph>>& /*graphs*/,
|
||||
Ptr<data::BatchGenerator<data::Corpus>> /*batchGenerator*/) override {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
@ -562,8 +562,8 @@ protected:
|
|||
}
|
||||
|
||||
virtual float validateBG(
|
||||
const std::vector<Ptr<ExpressionGraph>>& graphs,
|
||||
Ptr<data::BatchGenerator<data::Corpus>> batchGenerator) override {
|
||||
const std::vector<Ptr<ExpressionGraph>>& /*graphs*/,
|
||||
Ptr<data::BatchGenerator<data::Corpus>> /*batchGenerator*/) override {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -15,8 +15,8 @@ private:
|
|||
Ptr<Options> options_;
|
||||
std::vector<Ptr<Scorer>> scorers_;
|
||||
size_t beamSize_;
|
||||
Word trgEosId_ = -1;
|
||||
Word trgUnkId_ = -1;
|
||||
Word trgEosId_ = (Word)-1;
|
||||
Word trgUnkId_ = (Word)-1;
|
||||
|
||||
public:
|
||||
BeamSearch(Ptr<Options> options,
|
||||
|
@ -49,8 +49,8 @@ public:
|
|||
for(size_t i = 0; i < keys.size(); ++i) {
|
||||
// Keys contains indices to vocab items in the entire beam.
|
||||
// Values can be between 0 and beamSize * vocabSize.
|
||||
int embIdx = keys[i] % vocabSize;
|
||||
int beamIdx = i / beamSize;
|
||||
size_t embIdx = keys[i] % vocabSize;
|
||||
auto beamIdx = i / beamSize;
|
||||
|
||||
// Retrieve short list for final softmax (based on words aligned
|
||||
// to source sentences). If short list has been set, map the indices
|
||||
|
@ -63,15 +63,15 @@ public:
|
|||
auto& beam = beams[beamIdx];
|
||||
auto& newBeam = newBeams[beamIdx];
|
||||
|
||||
int hypIdx = keys[i] / vocabSize;
|
||||
size_t hypIdx = keys[i] / vocabSize;
|
||||
float pathScore = pathScores[i];
|
||||
|
||||
int hypIdxTrans
|
||||
size_t hypIdxTrans
|
||||
= (hypIdx / beamSize) + (hypIdx % beamSize) * beams.size();
|
||||
if(first)
|
||||
hypIdxTrans = hypIdx;
|
||||
|
||||
int beamHypIdx = hypIdx % beamSize;
|
||||
size_t beamHypIdx = hypIdx % beamSize;
|
||||
if(beamHypIdx >= (int)beam.size())
|
||||
beamHypIdx = beamHypIdx % beam.size();
|
||||
|
||||
|
@ -85,7 +85,7 @@ public:
|
|||
std::vector<float> breakDown(states.size(), 0);
|
||||
beam[beamHypIdx]->GetScoreBreakdown().resize(states.size(), 0);
|
||||
for(size_t j = 0; j < states.size(); ++j) {
|
||||
int key = embIdx + hypIdxTrans * vocabSize;
|
||||
size_t key = embIdx + hypIdxTrans * vocabSize;
|
||||
breakDown[j] = states[j]->breakDown(key)
|
||||
+ beam[beamHypIdx]->GetScoreBreakdown()[j];
|
||||
}
|
||||
|
@ -95,7 +95,7 @@ public:
|
|||
// Set alignments
|
||||
if(!align.empty()) {
|
||||
hyp->SetAlignment(
|
||||
getAlignmentsForHypothesis(align, batch, beamHypIdx, beamIdx));
|
||||
getAlignmentsForHypothesis(align, batch, (int)beamHypIdx, (int)beamIdx));
|
||||
}
|
||||
|
||||
newBeam.push_back(hyp);
|
||||
|
@ -156,7 +156,7 @@ public:
|
|||
|
||||
// main decoding function
|
||||
Histories search(Ptr<ExpressionGraph> graph, Ptr<data::CorpusBatch> batch) {
|
||||
int dimBatch = batch->size();
|
||||
int dimBatch = (int)batch->size();
|
||||
|
||||
Histories histories;
|
||||
for(int i = 0; i < dimBatch; ++i) {
|
||||
|
@ -212,7 +212,7 @@ public:
|
|||
} else {
|
||||
std::vector<float> beamScores;
|
||||
|
||||
int dimBatch = batch->size();
|
||||
dimBatch = (int)batch->size();
|
||||
|
||||
for(size_t i = 0; i < localBeamSize; ++i) {
|
||||
for(size_t j = 0; j < beams.size(); ++j) { // loop over batch entries (active sentences)
|
||||
|
@ -240,7 +240,7 @@ public:
|
|||
|
||||
for(size_t i = 0; i < scorers_.size(); ++i) {
|
||||
states[i] = scorers_[i]->step(
|
||||
graph, states[i], hypIndices, embIndices, dimBatch, localBeamSize);
|
||||
graph, states[i], hypIndices, embIndices, dimBatch, (int)localBeamSize);
|
||||
|
||||
if(scorers_[i]->getWeight() != 1.f)
|
||||
pathScores = pathScores + scorers_[i]->getWeight() * states[i]->getLogProbs();
|
||||
|
|
|
@ -19,7 +19,7 @@ void SetColumn(Tensor in_, size_t col, float value) {
|
|||
|
||||
float* in = in_->data();
|
||||
for(int rowNumber = 0; rowNumber < nRows; ++rowNumber) {
|
||||
int index = col + rowNumber * nColumns;
|
||||
auto index = col + rowNumber * nColumns;
|
||||
in[index] = value;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,8 +30,8 @@ void NthElementCPU::getNBestList(float* scores,
|
|||
std::vector<int> idxs(numProbs);
|
||||
std::iota(idxs.begin(), idxs.end(), 0);
|
||||
|
||||
int numBatches = batchFirstElementIdxs.size() - 1;
|
||||
for(int batchIdx = 0; batchIdx < numBatches; ++batchIdx) {
|
||||
size_t numBatches = batchFirstElementIdxs.size() - 1;
|
||||
for(size_t batchIdx = 0; batchIdx < numBatches; ++batchIdx) {
|
||||
int pos = cumulativeBeamSizes[batchIdx];
|
||||
int beamSize = cumulativeBeamSizes[batchIdx + 1] - pos;
|
||||
|
||||
|
@ -61,9 +61,9 @@ void NthElementCPU::getNBestList(const std::vector<size_t>& beamSizes,
|
|||
std::vector<int> cumulativeBeamSizes(beamSizes.size() + 1, 0);
|
||||
std::vector<int> batchFirstElementIdxs(beamSizes.size() + 1, 0);
|
||||
|
||||
size_t vocabSize = scores->shape()[-1];
|
||||
for(size_t i = 0; i < beamSizes.size(); ++i) {
|
||||
cumulativeBeamSizes[i + 1] = cumulativeBeamSizes[i] + beamSizes[i];
|
||||
auto vocabSize = scores->shape()[-1];
|
||||
for(int i = 0; i < beamSizes.size(); ++i) {
|
||||
cumulativeBeamSizes[i + 1] = cumulativeBeamSizes[i] + (int)beamSizes[i];
|
||||
batchFirstElementIdxs[i + 1]
|
||||
+= (isFirst ? i + 1 : cumulativeBeamSizes[i + 1]) * vocabSize;
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@ class GeometricPrinting : public PrintingStrategy {
|
|||
public:
|
||||
bool shouldBePrinted(long id) override {
|
||||
if(id == 0)
|
||||
next_ = start_;
|
||||
next_ = (long)start_;
|
||||
if(id <= 5)
|
||||
return true;
|
||||
if(next_ == id) {
|
||||
|
|
|
@ -24,8 +24,6 @@ std::string OutputPrinter::getAlignment(const Ptr<Hypothesis>& hyp) {
|
|||
} else {
|
||||
ABORT("Unrecognized word alignment type");
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
} // namespace marian
|
||||
|
|
|
@ -69,7 +69,7 @@ std::vector<Ptr<Scorer>> createScorers(Ptr<Config> options) {
|
|||
try {
|
||||
if(!options->get<bool>("ignore-model-config"))
|
||||
modelOptions->loadModelParameters(model);
|
||||
} catch(std::runtime_error& e) {
|
||||
} catch(std::runtime_error&) {
|
||||
LOG(warn, "No model settings found in model file");
|
||||
}
|
||||
|
||||
|
@ -96,7 +96,7 @@ std::vector<Ptr<Scorer>> createScorers(Ptr<Config> options,
|
|||
try {
|
||||
if(!options->get<bool>("ignore-model-config"))
|
||||
modelOptions->loadModelParameters(ptr);
|
||||
} catch(std::runtime_error& e) {
|
||||
} catch(std::runtime_error&) {
|
||||
LOG(warn, "No model settings found in model file");
|
||||
}
|
||||
|
||||
|
|
|
@ -111,7 +111,7 @@ public:
|
|||
std::stringstream best1;
|
||||
std::stringstream bestn;
|
||||
printer->print(history, best1, bestn);
|
||||
collector->Write(history->GetLineNum(),
|
||||
collector->Write((long)history->GetLineNum(),
|
||||
best1.str(),
|
||||
bestn.str(),
|
||||
options_->get<bool>("n-best"));
|
||||
|
|
|
@ -63,14 +63,14 @@
|
|||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level1</WarningLevel>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>MKL_FOUND=1; BLAS_FOUND=1; MKL_ILP64; WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<TreatWarningAsError>false</TreatWarningAsError>
|
||||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
<AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
|
||||
<RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">MultiThreadedDebugDLL</RuntimeLibrary>
|
||||
<DisableSpecificWarnings>4996;4244</DisableSpecificWarnings>
|
||||
<DisableSpecificWarnings>4996; 4702</DisableSpecificWarnings>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<MinimalRebuild>false</MinimalRebuild>
|
||||
</ClCompile>
|
||||
|
@ -79,6 +79,7 @@
|
|||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>zlib.lib; mkl_intel_ilp64.lib; mkl_sequential.lib; mkl_core.lib; kernel32.lib; user32.lib; gdi32.lib; winspool.lib; comdlg32.lib; advapi32.lib; shell32.lib; ole32.lib; oleaut32.lib; uuid.lib; odbc32.lib; odbccp32.lib; %(AdditionalDependencies)</AdditionalDependencies>
|
||||
<StackReserveSize>100000000</StackReserveSize>
|
||||
<TreatLinkerWarningAsErrors>true</TreatLinkerWarningAsErrors>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
|
@ -93,12 +94,12 @@
|
|||
<SDLCheck>true</SDLCheck>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
<AdditionalOptions>/d2Zi+ /bigobj %(AdditionalOptions)</AdditionalOptions>
|
||||
<TreatWarningAsError>false</TreatWarningAsError>
|
||||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
<RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MultiThreadedDLL</RuntimeLibrary>
|
||||
<RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Release_NoOpt|x64'">MultiThreaded</RuntimeLibrary>
|
||||
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
|
||||
<OmitFramePointers>true</OmitFramePointers>
|
||||
<DisableSpecificWarnings>4996</DisableSpecificWarnings>
|
||||
<DisableSpecificWarnings>4996; 4702</DisableSpecificWarnings>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
|
@ -108,6 +109,7 @@
|
|||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalDependencies>zlib.lib; mkl_intel_ilp64.lib; mkl_sequential.lib; mkl_core.lib; kernel32.lib; user32.lib; gdi32.lib; winspool.lib; comdlg32.lib; advapi32.lib; shell32.lib; ole32.lib; oleaut32.lib; uuid.lib; odbc32.lib; odbccp32.lib; %(AdditionalDependencies)</AdditionalDependencies>
|
||||
<StackReserveSize>100000000</StackReserveSize>
|
||||
<TreatLinkerWarningAsErrors>true</TreatLinkerWarningAsErrors>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
|
|
|
@ -220,9 +220,6 @@
|
|||
<ClCompile Include="..\src\common\binary.cpp">
|
||||
<Filter>common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\command\marian_conv.cpp">
|
||||
<Filter>command</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\data\alignment.cpp">
|
||||
<Filter>data</Filter>
|
||||
</ClCompile>
|
||||
|
@ -241,9 +238,6 @@
|
|||
<ClCompile Include="..\src\3rd_party\yaml-cpp\binary_renamed.cpp">
|
||||
<Filter>3rd_party\yaml-cpp</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\command\marian.cpp">
|
||||
<Filter>command</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\training\graph_group_multinode_sync.cpp">
|
||||
<Filter>training</Filter>
|
||||
</ClCompile>
|
||||
|
@ -1033,6 +1027,13 @@
|
|||
<ClInclude Include="..\src\command\marian_vocab.cpp">
|
||||
<Filter>command</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\command\marian.cpp">
|
||||
<Filter>command</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\command\marian_decoder.cpp">
|
||||
<Filter>command</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\command\marian_conv.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="3rd_party">
|
||||
|
|
Загрузка…
Ссылка в новой задаче