Parameterloader optimization
This commit is contained in:
Родитель
c146688429
Коммит
30a756e6fe
|
@ -20,7 +20,8 @@ namespace multiverso
|
|||
class DataBlock : public multiverso::DataBlockBase
|
||||
{
|
||||
public:
|
||||
std::vector <int> input_nodes, output_nodes;
|
||||
std::unordered_set <int> input_nodes, output_nodes;
|
||||
std::unordered_set <int> negativesample_pools;
|
||||
DataBlock(){}
|
||||
~DataBlock();
|
||||
|
||||
|
|
|
@ -280,6 +280,16 @@ namespace multiverso
|
|||
|
||||
}
|
||||
multiverso::Multiverso::EndClock();
|
||||
|
||||
//Dump input-embedding weight
|
||||
multiverso::Multiverso::BeginClock();
|
||||
++data_block_count;
|
||||
DataBlock *data_block = new (std::nothrow)DataBlock();
|
||||
assert(data_block != nullptr);
|
||||
data_block->SetType(DataBlockType::Test);
|
||||
PushDataBlock(datablock_queue, data_block);
|
||||
multiverso::Multiverso::EndClock();
|
||||
|
||||
}
|
||||
|
||||
//Dump input-embedding weight
|
||||
|
|
|
@ -38,19 +38,16 @@ namespace multiverso
|
|||
//input_nodes,output_nodes
|
||||
multiverso::Log::Debug("Rank %d ParameterLoader parse begin %d\n",
|
||||
multiverso::Multiverso::ProcessRank(), parse_and_request_count_);
|
||||
WordEmbedding_->PrepareParameter(data, input_nodes, output_nodes);
|
||||
WordEmbedding_->PrepareParameter(data);
|
||||
multiverso::Log::Debug("Rank %d ParameterLoader parse end %d\n",
|
||||
multiverso::Multiverso::ProcessRank(), parse_and_request_count_);
|
||||
//Step 2, Request the parameter
|
||||
multiverso::Log::Debug("Rank %d ParameterLoader request begin %d\n",
|
||||
multiverso::Multiverso::ProcessRank(), parse_and_request_count_);
|
||||
RequestParameter(data, input_nodes, output_nodes);
|
||||
RequestParameter(data);
|
||||
multiverso::Log::Debug("Rank %d ParameterLoader request end %d\n",
|
||||
multiverso::Multiverso::ProcessRank(), parse_and_request_count_);
|
||||
//Step 3, store the needed parameters in data_block
|
||||
//it will be used to copy parameter from multiverso in trainer
|
||||
data->input_nodes = std::move(input_nodes);
|
||||
data->output_nodes = std::move(output_nodes);
|
||||
|
||||
multiverso::Log::Info("Rank %d ParameterLoader finish %d\n",
|
||||
multiverso::Multiverso::ProcessRank(), parse_and_request_count_ - 1);
|
||||
|
@ -58,9 +55,7 @@ namespace multiverso
|
|||
fflush(log_file_);
|
||||
}
|
||||
|
||||
void ParameterLoader::RequestParameter(DataBlock *data_block,
|
||||
std::vector<int>& input_nodes,
|
||||
std::vector<int>& output_nodes)
|
||||
void ParameterLoader::RequestParameter(DataBlock *data_block)
|
||||
{
|
||||
//If the data_block is the last one, we need to dump
|
||||
//the input-embedding weights
|
||||
|
@ -68,16 +63,17 @@ namespace multiverso
|
|||
RequestTable(kInputEmbeddingTableId);
|
||||
|
||||
RequestRow(kWordCountActualTableId, 0);
|
||||
for (int i = 0; i < input_nodes.size(); ++i)
|
||||
RequestRow(kInputEmbeddingTableId, input_nodes[i]);
|
||||
for (int i = 0; i < output_nodes.size(); ++i)
|
||||
RequestRow(kEmbeddingOutputTableId, output_nodes[i]);
|
||||
for (auto node : data_block->input_nodes)
|
||||
RequestRow(kInputEmbeddingTableId, node);
|
||||
for (auto node : data_block->output_nodes)
|
||||
RequestRow(kEmbeddingOutputTableId, node);
|
||||
|
||||
if (option_->use_adagrad)
|
||||
{
|
||||
for (int i = 0; i < input_nodes.size(); ++i)
|
||||
RequestRow(kSumGradient2IETableId, input_nodes[i]);
|
||||
for (int i = 0; i < output_nodes.size(); ++i)
|
||||
RequestRow(kSumGradient2EOTableId, output_nodes[i]);
|
||||
for (auto node : data_block->input_nodes)
|
||||
RequestRow(kSumGradient2IETableId, node);
|
||||
for (auto node : data_block->output_nodes)
|
||||
RequestRow(kSumGradient2EOTableId, node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,9 +43,7 @@ namespace multiverso
|
|||
* \param input_nodes stores the input words'index
|
||||
* \param output_nodes stores the output words'index
|
||||
*/
|
||||
void RequestParameter(DataBlock *data_block,
|
||||
std::vector<int>& input_nodes,
|
||||
std::vector<int>& output_nodes);
|
||||
void RequestParameter(DataBlock *data_block);
|
||||
//No copying allowed
|
||||
ParameterLoader(const ParameterLoader&);
|
||||
void operator=(const ParameterLoader&);
|
||||
|
|
|
@ -48,16 +48,16 @@ namespace multiverso
|
|||
//Compute the total number of processes
|
||||
if (process_count_ == -1)
|
||||
process_count_ = multiverso::Multiverso::TotalProcessCount();
|
||||
//Get the input_nodes and output_nodes from data_block
|
||||
//The input_nodes and output_nodes are stored by ParameterLoader
|
||||
|
||||
DataBlock *data = reinterpret_cast<DataBlock*>(data_block);
|
||||
std::vector<int>& input_nodes = data->input_nodes;
|
||||
std::vector<int>& output_nodes = data->output_nodes;
|
||||
std::vector<int> input_nodes(data->input_nodes.begin(), data->input_nodes.end());
|
||||
std::vector<int> output_nodes(data->output_nodes.begin(), data->output_nodes.end());
|
||||
//A trainer only copy or add apart of parameters
|
||||
//This trainer should copy or add the parameters according to
|
||||
//local_input_nodes and local_output_nodes
|
||||
std::vector<int> local_input_nodes;
|
||||
std::vector<int> local_output_nodes;
|
||||
|
||||
for (int i = trainer_id_; i < input_nodes.size(); i += option_->thread_cnt)
|
||||
local_input_nodes.push_back(input_nodes[i]);
|
||||
for (int i = trainer_id_; i < output_nodes.size(); i += option_->thread_cnt)
|
||||
|
@ -121,6 +121,7 @@ namespace multiverso
|
|||
{
|
||||
SaveEmbedding(option_->output_file, option_->output_binary);
|
||||
}
|
||||
|
||||
if (trainer_id_ == 0)
|
||||
{
|
||||
fprintf(log_file_, "%lf\n",
|
||||
|
|
|
@ -49,6 +49,8 @@ namespace multiverso
|
|||
void WordEmbedding::Train(DataBlock *data_block, int index_start, int interval,
|
||||
int64& word_count, real* hidden_act, real* hidden_err)
|
||||
{
|
||||
std::vector <int> negativesample(data_block->negativesample_pools.begin(),
|
||||
data_block->negativesample_pools.end());
|
||||
for (int i = index_start; i < data_block->Size(); i += interval)
|
||||
{
|
||||
int sentence_length;
|
||||
|
@ -59,7 +61,7 @@ namespace multiverso
|
|||
word_count_deta, next_random);
|
||||
|
||||
this->Train(sentence, sentence_length,
|
||||
next_random, hidden_act, hidden_err);
|
||||
next_random, hidden_act, hidden_err, negativesample);
|
||||
|
||||
word_count += word_count_deta;
|
||||
}
|
||||
|
@ -77,10 +79,10 @@ namespace multiverso
|
|||
}
|
||||
|
||||
void WordEmbedding::Train(int* sentence, int sentence_length,
|
||||
uint64 next_random, real* hidden_act, real* hidden_err)
|
||||
uint64 next_random, real* hidden_act, real* hidden_err, std::vector <int> &negativesample_pools)
|
||||
{
|
||||
ParseSentence(sentence, sentence_length,
|
||||
next_random, hidden_act, hidden_err, &WordEmbedding::TrainSample);
|
||||
next_random, hidden_act, hidden_err, &WordEmbedding::TrainSample, negativesample_pools);
|
||||
}
|
||||
//Train with forward direction and get the input-hidden layer vector
|
||||
void WordEmbedding::FeedForward(std::vector<int>& input_nodes, real* hidden_act)
|
||||
|
@ -190,37 +192,52 @@ namespace multiverso
|
|||
}
|
||||
}
|
||||
//Parapare the parameter for the datablock
|
||||
void WordEmbedding::PrepareParameter(DataBlock* data_block,
|
||||
std::vector<int>& input_nodes,
|
||||
std::vector<int>& output_nodes)
|
||||
void WordEmbedding::PrepareParameter(DataBlock* data_block)
|
||||
{
|
||||
input_nodes_.clear();
|
||||
output_nodes_.clear();
|
||||
|
||||
int sentence_length;
|
||||
int64 word_count_deta;
|
||||
int64 word_count_delta;
|
||||
int *sentence;
|
||||
uint64 next_random;
|
||||
if (option_->hs)
|
||||
{
|
||||
for (int i = 0; i < data_block->Size(); ++i)
|
||||
{
|
||||
data_block->GetSentence(i, sentence, sentence_length, word_count_deta,
|
||||
next_random);
|
||||
ParseSentence(sentence, sentence_length, next_random,
|
||||
nullptr, nullptr, &WordEmbedding::DealPrepareParameter);
|
||||
}
|
||||
data_block->GetSentence(i, sentence, sentence_length, word_count_delta, next_random);
|
||||
|
||||
for (auto it = input_nodes_.begin(); it != input_nodes_.end(); it++)
|
||||
for (int sentence_position = 0; sentence_position < sentence_length; ++sentence_position)
|
||||
{
|
||||
input_nodes.push_back(*it);
|
||||
assert((*it) >= 0);
|
||||
assert((*it) < dictionary_size_);
|
||||
data_block->input_nodes.insert(sentence[sentence_position]);
|
||||
}
|
||||
|
||||
for (auto it = output_nodes_.begin(); it != output_nodes_.end(); it++)
|
||||
}
|
||||
for (auto input_node : data_block->input_nodes)
|
||||
{
|
||||
output_nodes.push_back(*it);
|
||||
assert((*it) >= 0);
|
||||
assert((*it) < dictionary_size_);
|
||||
auto info = huffmanEncoder_->GetLabelInfo(input_node);
|
||||
for (int d = 0; d < info->codelen; d++)
|
||||
data_block->output_nodes.insert(info->point[d]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < data_block->Size(); ++i)
|
||||
{
|
||||
data_block->GetSentence(i, sentence, sentence_length, word_count_delta, next_random);
|
||||
|
||||
for (int sentence_position = 0; sentence_position < sentence_length; ++sentence_position)
|
||||
{
|
||||
data_block->input_nodes.insert(sentence[sentence_position]);
|
||||
}
|
||||
}
|
||||
for (auto input_node : data_block->input_nodes)
|
||||
{
|
||||
data_block->output_nodes.insert(input_node);
|
||||
}
|
||||
for (int d = 0; d < option_->negative_num * data_block->input_nodes.size(); d++)
|
||||
{
|
||||
next_random = sampler_->GetNextRandom(next_random);
|
||||
int target = sampler_->NegativeSampling(next_random);
|
||||
data_block->output_nodes.insert(target);
|
||||
data_block->negativesample_pools.insert(target);
|
||||
}
|
||||
}
|
||||
}
|
||||
//Copy the input&ouput nodes
|
||||
|
@ -236,7 +253,7 @@ namespace multiverso
|
|||
//Parse the sentence and deepen into two branches
|
||||
void WordEmbedding::ParseSentence(int* sentence, int sentence_length,
|
||||
uint64 next_random, real* hidden_act, real* hidden_err,
|
||||
FunctionType function)
|
||||
FunctionType function, std::vector <int> &negativesample_pools)
|
||||
{
|
||||
if (sentence_length == 0)
|
||||
return;
|
||||
|
@ -263,7 +280,7 @@ namespace multiverso
|
|||
input_nodes.clear();
|
||||
output_nodes.clear();
|
||||
Parse(feat + feat_size - 1, 1, sentence[sentence_position],
|
||||
next_random, input_nodes, output_nodes);
|
||||
next_random, input_nodes, output_nodes, negativesample_pools);
|
||||
(this->*function)(input_nodes, output_nodes, hidden_act, hidden_err);
|
||||
}
|
||||
}
|
||||
|
@ -273,7 +290,8 @@ namespace multiverso
|
|||
input_nodes.clear();
|
||||
output_nodes.clear();
|
||||
Parse(feat, feat_size, sentence[sentence_position],
|
||||
next_random, input_nodes, output_nodes);
|
||||
next_random, input_nodes, output_nodes, negativesample_pools);
|
||||
|
||||
(this->*function)(input_nodes, output_nodes, hidden_act, hidden_err);
|
||||
}
|
||||
}
|
||||
|
@ -281,7 +299,7 @@ namespace multiverso
|
|||
//Parse the windows's input&output nodes
|
||||
inline void WordEmbedding::Parse(int *feat, int feat_cnt, int word_idx,
|
||||
uint64 &next_random, std::vector<int>& input_nodes,
|
||||
std::vector<std::pair<int, int> >& output_nodes)
|
||||
std::vector<std::pair<int, int> >& output_nodes, std::vector <int> &negativesample_pools)
|
||||
{
|
||||
for (int i = 0; i < feat_cnt; ++i)
|
||||
{
|
||||
|
@ -301,7 +319,8 @@ namespace multiverso
|
|||
for (int d = 0; d < option_->negative_num; d++)
|
||||
{
|
||||
next_random = sampler_->GetNextRandom(next_random);
|
||||
int target = sampler_->NegativeSampling(next_random);
|
||||
int index = (next_random >> 8) % negativesample_pools.size();
|
||||
int target = negativesample_pools[index];
|
||||
if (target == word_idx) continue;
|
||||
output_nodes.push_back(std::make_pair(target, 0));
|
||||
}
|
||||
|
|
|
@ -48,8 +48,7 @@ namespace multiverso
|
|||
* \param input_nodes input_nodes represent the parameter which input_layer includes
|
||||
* \param output_nodes output_nodes represent the parameter which output_layer inclueds
|
||||
*/
|
||||
void PrepareParameter(DataBlock *data_block,
|
||||
std::vector<int>& input_nodes, std::vector<int>& output_nodes);
|
||||
void PrepareParameter(DataBlock *data_block);
|
||||
/*!
|
||||
* \brief Update the learning rate
|
||||
*/
|
||||
|
@ -92,7 +91,7 @@ namespace multiverso
|
|||
*/
|
||||
void Parse(int *feat, int feat_cnt, int word_idx, uint64 &next_random,
|
||||
std::vector<int>& input_nodes,
|
||||
std::vector<std::pair<int, int> >& output_nodes);
|
||||
std::vector<std::pair<int, int> >& output_nodes, std::vector <int> &negativesample_pools);
|
||||
/*!
|
||||
* \brief Parse a sentence and deepen into two branchs
|
||||
* \one for TrainNN,the other one is for Parameter_parse&request
|
||||
|
@ -100,7 +99,7 @@ namespace multiverso
|
|||
void ParseSentence(int* sentence, int sentence_length,
|
||||
uint64 next_random,
|
||||
real* hidden_act, real* hidden_err,
|
||||
FunctionType function);
|
||||
FunctionType function, std::vector <int> &negativesample_pools);
|
||||
/*!
|
||||
* \brief Get the hidden layer vector
|
||||
* \param input_nodes represent the input nodes
|
||||
|
@ -139,7 +138,7 @@ namespace multiverso
|
|||
* \brief Train the sentence actually
|
||||
*/
|
||||
void Train(int* sentence, int sentence_length,
|
||||
uint64 next_random, real* hidden_act, real* hidden_err);
|
||||
uint64 next_random, real* hidden_act, real* hidden_err, std::vector <int> &negativesample_pools);
|
||||
|
||||
//No copying allowed
|
||||
WordEmbedding(const WordEmbedding&);
|
||||
|
|
Загрузка…
Ссылка в новой задаче