Name change and add scripts.
1. Change some variable names. 2. Add a python script to run the program.
This commit is contained in:
Родитель
44f0633180
Коммит
23477e87c5
|
@ -0,0 +1,55 @@
|
||||||
|
import os
|
||||||
|
import copy
|
||||||
|
import time
|
||||||
|
import random
|
||||||
|
import sys
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
from subprocess import STDOUT
|
||||||
|
|
||||||
|
def execute(command):
|
||||||
|
popen = subprocess.Popen(command, stdout=subprocess.PIPE)
|
||||||
|
lines_iterator = iter(popen.stdout.readline, b"")
|
||||||
|
for line in lines_iterator:
|
||||||
|
print(line) # yield line
|
||||||
|
|
||||||
|
#parameter w.r.t. MPI
|
||||||
|
work_dir = 'D:\\Your Directory'
|
||||||
|
port = '5719'
|
||||||
|
machinefile= 'host.txt'
|
||||||
|
|
||||||
|
#parameter w.r.t. SG-Mixture Training
|
||||||
|
size = 50
|
||||||
|
train = 'Your Training File'
|
||||||
|
read_vocab = 'Your Vocab File'
|
||||||
|
sense_file = 'Your Sense File, see sense_file.txt as an example'
|
||||||
|
binary = 2
|
||||||
|
init_learning_rate = 0.025
|
||||||
|
epoch = 1
|
||||||
|
window = 5
|
||||||
|
threads = 8
|
||||||
|
mincount = 5
|
||||||
|
EM_iteration = 1
|
||||||
|
momentum = 0.05
|
||||||
|
top_n = 0
|
||||||
|
top_ratio = 0
|
||||||
|
default_sense = 1
|
||||||
|
sense_num_multi = 5
|
||||||
|
binary_embedding_file = 'emb.bin'
|
||||||
|
text_embedding_file = 'emb.txt'
|
||||||
|
huff_tree_file = 'huff.txt'
|
||||||
|
outputlayer_binary_file = 'emb_out.bin'
|
||||||
|
outputlayer_text_file = 'emb_out.txt'
|
||||||
|
preload_cnt = 5
|
||||||
|
data_block_size = 50000
|
||||||
|
pipline = '0'
|
||||||
|
multinomial = '0'
|
||||||
|
|
||||||
|
mpi_args = '-port {0} -wdir {1} -machinefile {2} '.format(port, work_dir, machinefile)
|
||||||
|
sg_mixture_args = ' -train_file {0} -binary_embedding_file {1} -text_embedding_file {2} -threads {3} -size {4} -binary {5} -epoch {6} -init_learning_rate {7} -min_count {8} -window {9} -momentum {12} -EM_iteration {13} -top_n {14} -top_ratio {14} -default_sense {16} -sense_num_multi {17} -huff_tree_file {18} -vocab_file {19} -outputlayer_binary_file {20} -outputlayer_text_file {21} -read_sense {22} -data_block_size {23} -is_pipline {24} -store_multinomial {25} -max_preload_size {26}'.format(train, binary_embedding_file, text_embedding_file, threads, size, binary, epoch, init_learning_rate, mincount, window, momentum, EM_iteration, top_n, top_ratio, default_sense, sense_num_multi, huff_tree_file, read_vocab, outputlayer_binary_file, outputlayer_text_file, sense_file, data_block_size, pipline, multinomial, preload_cnt)
|
||||||
|
|
||||||
|
print mpi_args
|
||||||
|
print sg_mixture_args
|
||||||
|
|
||||||
|
proc = execute("mpiexec " + mpi_args + 'distributed_skipgram_mixture ' + sg_mixture_args)
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,4 +1,4 @@
|
||||||
#include "multiverso_skipgram_mixture.h"
|
#include "MultiversoSkipGramMixture.h"
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
MultiversoSkipGramMixture::MultiversoSkipGramMixture(Option *option, Dictionary *dictionary, HuffmanEncoder *huffman_encoder, Reader *reader)
|
MultiversoSkipGramMixture::MultiversoSkipGramMixture(Option *option, Dictionary *dictionary, HuffmanEncoder *huffman_encoder, Reader *reader)
|
||||||
|
@ -35,7 +35,7 @@ void MultiversoSkipGramMixture::InitSenseCntInfo()
|
||||||
m_word_sense_info.word_sense_cnts_info[wordlist[i].first] = 1;
|
m_word_sense_info.word_sense_cnts_info[wordlist[i].first] = 1;
|
||||||
|
|
||||||
//Then, read words #sense info from the sense file
|
//Then, read words #sense info from the sense file
|
||||||
if (m_option->sense_file)
|
if (m_option->sense_file)
|
||||||
{
|
{
|
||||||
FILE* fid = fopen(m_option->sense_file, "r");
|
FILE* fid = fopen(m_option->sense_file, "r");
|
||||||
char word[1000];
|
char word[1000];
|
||||||
|
@ -58,7 +58,7 @@ void MultiversoSkipGramMixture::InitSenseCntInfo()
|
||||||
int cnt = 0;
|
int cnt = 0;
|
||||||
m_word_sense_info.multi_senses_words_cnt = 0;
|
m_word_sense_info.multi_senses_words_cnt = 0;
|
||||||
|
|
||||||
for (int i = 0; i < m_dictionary->Size(); ++i)
|
for (int i = 0; i < m_dictionary->Size(); ++i)
|
||||||
{
|
{
|
||||||
m_word_sense_info.p_input_embedding[i] = cnt;
|
m_word_sense_info.p_input_embedding[i] = cnt;
|
||||||
if (m_word_sense_info.word_sense_cnts_info[i] > 1)
|
if (m_word_sense_info.word_sense_cnts_info[i] > 1)
|
||||||
|
@ -107,7 +107,7 @@ void MultiversoSkipGramMixture::Train(int argc, char *argv[])
|
||||||
multiverso::Log::ResetLogFile("log.txt");
|
multiverso::Log::ResetLogFile("log.txt");
|
||||||
m_process_id = multiverso::Multiverso::ProcessRank();
|
m_process_id = multiverso::Multiverso::ProcessRank();
|
||||||
PrepareMultiversoParameterTables(m_option, m_dictionary);
|
PrepareMultiversoParameterTables(m_option, m_dictionary);
|
||||||
|
|
||||||
printf("Start to train ...\n");
|
printf("Start to train ...\n");
|
||||||
TrainNeuralNetwork();
|
TrainNeuralNetwork();
|
||||||
printf("Rank %d Finish training\n", m_process_id);
|
printf("Rank %d Finish training\n", m_process_id);
|
||||||
|
@ -156,7 +156,7 @@ void MultiversoSkipGramMixture::PrepareMultiversoParameterTables(Option *opt, Di
|
||||||
{
|
{
|
||||||
for (int col = 0; col < opt->sense_num_multi; ++col)
|
for (int col = 0; col < opt->sense_num_multi; ++col)
|
||||||
{
|
{
|
||||||
multiverso::Multiverso::AddToServer<real>(kWordSensePriorTableId, row, col,
|
multiverso::Multiverso::AddToServer<real>(kWordSensePriorTableId, row, col,
|
||||||
static_cast<real>(m_option->store_multinomial ? 1.0 / m_option->sense_num_multi : log(1.0 / m_option->sense_num_multi)));
|
static_cast<real>(m_option->store_multinomial ? 1.0 / m_option->sense_num_multi : log(1.0 / m_option->sense_num_multi)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -198,7 +198,7 @@ void MultiversoSkipGramMixture::PushDataBlock(
|
||||||
{
|
{
|
||||||
std::chrono::milliseconds dura(200);
|
std::chrono::milliseconds dura(200);
|
||||||
std::this_thread::sleep_for(dura);
|
std::this_thread::sleep_for(dura);
|
||||||
|
|
||||||
RemoveDoneDataBlock(datablock_queue);
|
RemoveDoneDataBlock(datablock_queue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#include "param_loader.h"
|
#include "ParamLoader.h"
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
ParameterLoader<T>::ParameterLoader(Option *option, void** word2vector_neural_networks, WordSenseInfo* word_sense_info)
|
ParameterLoader<T>::ParameterLoader(Option *option, void** word2vector_neural_networks, WordSenseInfo* word_sense_info)
|
||||||
|
@ -21,12 +21,12 @@ void ParameterLoader<T>::ParseAndRequest(multiverso::DataBlockBase *data_block)
|
||||||
fprintf(m_log_file, "%lf\n", (clock() - m_start_time) / (double)CLOCKS_PER_SEC);
|
fprintf(m_log_file, "%lf\n", (clock() - m_start_time) / (double)CLOCKS_PER_SEC);
|
||||||
multiverso::Log::Info("Rank %d ParameterLoader begin %d\n", multiverso::Multiverso::ProcessRank(), m_parse_and_request_count);
|
multiverso::Log::Info("Rank %d ParameterLoader begin %d\n", multiverso::Multiverso::ProcessRank(), m_parse_and_request_count);
|
||||||
DataBlock *data = reinterpret_cast<DataBlock*>(data_block);
|
DataBlock *data = reinterpret_cast<DataBlock*>(data_block);
|
||||||
|
|
||||||
SkipGramMixtureNeuralNetwork<T>* sg_mixture_neural_network = reinterpret_cast<SkipGramMixtureNeuralNetwork<T>*>(m_sgmixture_neural_networks[m_parse_and_request_count % 2]);
|
SkipGramMixtureNeuralNetwork<T>* sg_mixture_neural_network = reinterpret_cast<SkipGramMixtureNeuralNetwork<T>*>(m_sgmixture_neural_networks[m_parse_and_request_count % 2]);
|
||||||
++m_parse_and_request_count;
|
++m_parse_and_request_count;
|
||||||
data->UpdateNextRandom();
|
data->UpdateNextRandom();
|
||||||
sg_mixture_neural_network->PrepareParmeter(data);
|
sg_mixture_neural_network->PrepareParmeter(data);
|
||||||
|
|
||||||
std::vector<int>& input_layer_nodes = sg_mixture_neural_network->GetInputLayerNodes();
|
std::vector<int>& input_layer_nodes = sg_mixture_neural_network->GetInputLayerNodes();
|
||||||
std::vector<int>& output_layer_nodes = sg_mixture_neural_network->GetOutputLayerNodes();
|
std::vector<int>& output_layer_nodes = sg_mixture_neural_network->GetOutputLayerNodes();
|
||||||
assert(sg_mixture_neural_network->status == 0);
|
assert(sg_mixture_neural_network->status == 0);
|
||||||
|
@ -62,4 +62,4 @@ void ParameterLoader<T>::ParseAndRequest(multiverso::DataBlockBase *data_block)
|
||||||
}
|
}
|
||||||
|
|
||||||
template class ParameterLoader<float>;
|
template class ParameterLoader<float>;
|
||||||
template class ParameterLoader<double>;
|
template class ParameterLoader<double>;
|
|
@ -1,7 +1,7 @@
|
||||||
#include "skipgram_mixture_neural_network.h"
|
#include "SkipGramMixtureNeuralNetwork.h"
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
SkipGramMixtureNeuralNetwork<T>::SkipGramMixtureNeuralNetwork(Option* option, HuffmanEncoder* huffmanEncoder, WordSenseInfo* word_sense_info, Dictionary* dic, int dicSize)
|
SkipGramMixtureNeuralNetwork<T>::SkipGramMixtureNeuralNetwork(Option* option, HuffmanEncoder* huffmanEncoder, WordSenseInfo* word_sense_info, Dictionary* dic, int dicSize)
|
||||||
{
|
{
|
||||||
status = 0;
|
status = 0;
|
||||||
m_option = option;
|
m_option = option;
|
||||||
|
@ -37,18 +37,18 @@ SkipGramMixtureNeuralNetwork<T>::~SkipGramMixtureNeuralNetwork()
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void SkipGramMixtureNeuralNetwork<T>::Train(int* sentence, int sentence_length, T* gamma, T* fTable, T* input_backup)
|
void SkipGramMixtureNeuralNetwork<T>::Train(int* sentence, int sentence_length, T* gamma, T* f_table, T* input_backup)
|
||||||
{
|
{
|
||||||
ParseSentence(sentence, sentence_length, gamma, fTable, input_backup, &SkipGramMixtureNeuralNetwork<T>::TrainSample);
|
ParseSentence(sentence, sentence_length, gamma, f_table, input_backup, &SkipGramMixtureNeuralNetwork<T>::TrainSample);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
//The E - step, estimate the posterior multinomial probabilities
|
//The E - step, estimate the posterior multinomial probabilities
|
||||||
T SkipGramMixtureNeuralNetwork<T>::Estimate_Gamma_m(int word_input, std::vector<std::pair<int, int> >& output_nodes, T* posterior_ll, T* estimation, T* sense_prior, T* f_m)
|
T SkipGramMixtureNeuralNetwork<T>::EstimateGamma(int word_input, std::vector<std::pair<int, int> >& output_nodes, T* posterior_ll, T* estimation, T* sense_prior, T* f_m)
|
||||||
{
|
{
|
||||||
T* inputEmbedding = m_input_embedding_weights_ptr[word_input];
|
T* input_embedding = m_input_embedding_weights_ptr[word_input];
|
||||||
T f, log_likelihood = 0;
|
T f, log_likelihood = 0;
|
||||||
for (int sense_idx = 0; sense_idx < m_word_sense_info->word_sense_cnts_info[word_input]; ++sense_idx, inputEmbedding += m_option->embeding_size)
|
for (int sense_idx = 0; sense_idx < m_word_sense_info->word_sense_cnts_info[word_input]; ++sense_idx, input_embedding += m_option->embeding_size)
|
||||||
{
|
{
|
||||||
posterior_ll[sense_idx] = sense_prior[sense_idx] < eps ? MIN_LOG : log(sense_prior[sense_idx]); //posterior likelihood for each sense
|
posterior_ll[sense_idx] = sense_prior[sense_idx] < eps ? MIN_LOG : log(sense_prior[sense_idx]); //posterior likelihood for each sense
|
||||||
|
|
||||||
|
@ -56,7 +56,7 @@ T SkipGramMixtureNeuralNetwork<T>::Estimate_Gamma_m(int word_input, std::vector<
|
||||||
|
|
||||||
for (int d = 0; d < output_nodes.size(); ++d, fidx++)
|
for (int d = 0; d < output_nodes.size(); ++d, fidx++)
|
||||||
{
|
{
|
||||||
f = Util::InnerProduct(inputEmbedding, m_output_embedding_weights_ptr[output_nodes[d].first], m_option->embeding_size);
|
f = Util::InnerProduct(input_embedding, m_output_embedding_weights_ptr[output_nodes[d].first], m_option->embeding_size);
|
||||||
f = Util::Sigmoid(f);
|
f = Util::Sigmoid(f);
|
||||||
f_m[fidx] = f;
|
f_m[fidx] = f;
|
||||||
if (output_nodes[d].second) //huffman code, 0 or 1
|
if (output_nodes[d].second) //huffman code, 0 or 1
|
||||||
|
@ -78,7 +78,7 @@ T SkipGramMixtureNeuralNetwork<T>::Estimate_Gamma_m(int word_input, std::vector<
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
//The M Step: update the sense prior probabilities to maximize the Q function
|
//The M Step: update the sense prior probabilities to maximize the Q function
|
||||||
void SkipGramMixtureNeuralNetwork<T>::Maximize_Pi(int word_input, T* log_likelihood)
|
void SkipGramMixtureNeuralNetwork<T>::MaximizeSensePriors(int word_input, T* log_likelihood)
|
||||||
{
|
{
|
||||||
if (m_word_sense_info->word_sense_cnts_info[word_input] == 1)
|
if (m_word_sense_info->word_sense_cnts_info[word_input] == 1)
|
||||||
{
|
{
|
||||||
|
@ -101,11 +101,11 @@ void SkipGramMixtureNeuralNetwork<T>::UpdateEmbeddings(int word_input, std::vect
|
||||||
{
|
{
|
||||||
T g;
|
T g;
|
||||||
T* output_embedding;
|
T* output_embedding;
|
||||||
T* inputEmbedding;
|
T* input_embedding;
|
||||||
if (direction == UpdateDirection::UPDATE_INPUT)
|
if (direction == UpdateDirection::UPDATE_INPUT)
|
||||||
inputEmbedding = m_input_embedding_weights_ptr[word_input];
|
input_embedding = m_input_embedding_weights_ptr[word_input];
|
||||||
else inputEmbedding = input_backup;
|
else input_embedding = input_backup;
|
||||||
for (int sense_idx = 0; sense_idx < m_word_sense_info->word_sense_cnts_info[word_input]; ++sense_idx, inputEmbedding += m_option->embeding_size)
|
for (int sense_idx = 0; sense_idx < m_word_sense_info->word_sense_cnts_info[word_input]; ++sense_idx, input_embedding += m_option->embeding_size)
|
||||||
{
|
{
|
||||||
int64_t fidx = sense_idx * MAX_CODE_LENGTH;
|
int64_t fidx = sense_idx * MAX_CODE_LENGTH;
|
||||||
for (int d = 0; d < output_nodes.size(); ++d, ++fidx)
|
for (int d = 0; d < output_nodes.size(); ++d, ++fidx)
|
||||||
|
@ -115,12 +115,12 @@ void SkipGramMixtureNeuralNetwork<T>::UpdateEmbeddings(int word_input, std::vect
|
||||||
if (direction == UpdateDirection::UPDATE_INPUT) //Update Input
|
if (direction == UpdateDirection::UPDATE_INPUT) //Update Input
|
||||||
{
|
{
|
||||||
for (int j = 0; j < m_option->embeding_size; ++j)
|
for (int j = 0; j < m_option->embeding_size; ++j)
|
||||||
inputEmbedding[j] += g * output_embedding[j];
|
input_embedding[j] += g * output_embedding[j];
|
||||||
}
|
}
|
||||||
else // Update Output
|
else // Update Output
|
||||||
{
|
{
|
||||||
for (int j = 0; j < m_option->embeding_size; ++j)
|
for (int j = 0; j < m_option->embeding_size; ++j)
|
||||||
output_embedding[j] += g * inputEmbedding[j];
|
output_embedding[j] += g * input_embedding[j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -132,7 +132,7 @@ template<typename T>
|
||||||
void SkipGramMixtureNeuralNetwork<T>::TrainSample(int input_node, std::vector<std::pair<int, int> >& output_nodes, void* v_gamma, void* v_fTable, void* v_input_backup)
|
void SkipGramMixtureNeuralNetwork<T>::TrainSample(int input_node, std::vector<std::pair<int, int> >& output_nodes, void* v_gamma, void* v_fTable, void* v_input_backup)
|
||||||
{
|
{
|
||||||
T* gamma = (T*)v_gamma; //stores the posterior probabilities
|
T* gamma = (T*)v_gamma; //stores the posterior probabilities
|
||||||
T* fTable = (T*)v_fTable; //stores the inner product values of input and output embeddings
|
T* f_table = (T*)v_fTable; //stores the inner product values of input and output embeddings
|
||||||
T* input_backup = (T*)v_input_backup;
|
T* input_backup = (T*)v_input_backup;
|
||||||
|
|
||||||
T posterior_ll[MAX_SENSE_CNT]; //stores the posterior log likelihood
|
T posterior_ll[MAX_SENSE_CNT]; //stores the posterior log likelihood
|
||||||
|
@ -149,16 +149,16 @@ void SkipGramMixtureNeuralNetwork<T>::TrainSample(int input_node, std::vector<st
|
||||||
log_likelihood = 0;
|
log_likelihood = 0;
|
||||||
|
|
||||||
// E-Step
|
// E-Step
|
||||||
log_likelihood += Estimate_Gamma_m(input_node, output_nodes, posterior_ll, gamma, sense_prior, fTable);
|
log_likelihood += EstimateGamma(input_node, output_nodes, posterior_ll, gamma, sense_prior, f_table);
|
||||||
|
|
||||||
// M-Step
|
// M-Step
|
||||||
if (m_option->store_multinomial)
|
if (m_option->store_multinomial)
|
||||||
Maximize_Pi(input_node, gamma);
|
MaximizeSensePriors(input_node, gamma);
|
||||||
else
|
else
|
||||||
Maximize_Pi(input_node, posterior_ll);
|
MaximizeSensePriors(input_node, posterior_ll);
|
||||||
|
|
||||||
UpdateEmbeddings(input_node, output_nodes, gamma, fTable, input_backup, UpdateDirection::UPDATE_INPUT);
|
UpdateEmbeddings(input_node, output_nodes, gamma, f_table, input_backup, UpdateDirection::UPDATE_INPUT);
|
||||||
UpdateEmbeddings(input_node, output_nodes, gamma, fTable, input_backup, UpdateDirection::UPDATE_OUTPUT);
|
UpdateEmbeddings(input_node, output_nodes, gamma, f_table, input_backup, UpdateDirection::UPDATE_OUTPUT);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -205,10 +205,10 @@ void SkipGramMixtureNeuralNetwork<T>::DealPrepareParameter(int input_node, std::
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
/*
|
/*
|
||||||
Parse a sentence and deepen into two branchs:
|
Parse a sentence and deepen into two branchs:
|
||||||
one for TrainNN, the other one is for Parameter_parse&request
|
one for TrainNN, the other one is for Parameter_parse&request
|
||||||
*/
|
*/
|
||||||
void SkipGramMixtureNeuralNetwork<T>::ParseSentence(int* sentence, int sentence_length, T* gamma, T* fTable, T* input_backup, FunctionType function)
|
void SkipGramMixtureNeuralNetwork<T>::ParseSentence(int* sentence, int sentence_length, T* gamma, T* f_table, T* input_backup, FunctionType function)
|
||||||
{
|
{
|
||||||
if (sentence_length == 0)
|
if (sentence_length == 0)
|
||||||
return;
|
return;
|
||||||
|
@ -220,7 +220,7 @@ void SkipGramMixtureNeuralNetwork<T>::ParseSentence(int* sentence, int sentence_
|
||||||
{
|
{
|
||||||
if (sentence[sentence_position] == -1) continue;
|
if (sentence[sentence_position] == -1) continue;
|
||||||
int feat_size = 0;
|
int feat_size = 0;
|
||||||
|
|
||||||
for (int i = 0; i < m_option->window_size * 2 + 1; ++i)
|
for (int i = 0; i < m_option->window_size * 2 + 1; ++i)
|
||||||
if (i != m_option->window_size)
|
if (i != m_option->window_size)
|
||||||
{
|
{
|
||||||
|
@ -233,7 +233,7 @@ void SkipGramMixtureNeuralNetwork<T>::ParseSentence(int* sentence, int sentence_
|
||||||
input_node = feat[feat_size - 1];
|
input_node = feat[feat_size - 1];
|
||||||
output_nodes.clear();
|
output_nodes.clear();
|
||||||
Parse(input_node, sentence[sentence_position], output_nodes);
|
Parse(input_node, sentence[sentence_position], output_nodes);
|
||||||
(this->*function)(input_node, output_nodes, gamma, fTable, input_backup);
|
(this->*function)(input_node, output_nodes, gamma, f_table, input_backup);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -282,7 +282,7 @@ std::vector<int>& SkipGramMixtureNeuralNetwork<T>::GetOutputLayerNodes()
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void SkipGramMixtureNeuralNetwork<T>::SetInputEmbeddingWeights(int input_node_id, T* ptr)
|
void SkipGramMixtureNeuralNetwork<T>::SetinputEmbeddingWeights(int input_node_id, T* ptr)
|
||||||
{
|
{
|
||||||
m_input_embedding_weights_ptr[input_node_id] = ptr;
|
m_input_embedding_weights_ptr[input_node_id] = ptr;
|
||||||
}
|
}
|
||||||
|
@ -306,7 +306,7 @@ void SkipGramMixtureNeuralNetwork<T>::SetSensePriorParaWeights(int input_node_id
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
T* SkipGramMixtureNeuralNetwork<T>::GetInputEmbeddingWeights(int input_node_id)
|
T* SkipGramMixtureNeuralNetwork<T>::GetinputEmbeddingWeights(int input_node_id)
|
||||||
{
|
{
|
||||||
return m_input_embedding_weights_ptr[input_node_id];
|
return m_input_embedding_weights_ptr[input_node_id];
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,146 @@ enum class UpdateDirection
|
||||||
template<typename T>
|
template<typename T>
|
||||||
class SkipGramMixtureNeuralNetwork
|
class SkipGramMixtureNeuralNetwork
|
||||||
{
|
{
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "Util.h"
|
||||||
|
#include <multiverso.h>
|
||||||
|
#include "HuffmanEncoder.h"
|
||||||
|
#include "MultiversoSkipGramMixture.h"
|
||||||
|
#include "cstring"
|
||||||
|
|
||||||
|
enum class UpdateDirection
|
||||||
|
{
|
||||||
|
UPDATE_INPUT,
|
||||||
|
UPDATE_OUTPUT
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
class SkipGramMixtureNeuralNetwork
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
T learning_rate;
|
||||||
|
T sense_prior_momentum;
|
||||||
|
|
||||||
|
int status;
|
||||||
|
SkipGramMixtureNeuralNetwork(Option* option, HuffmanEncoder* huffmanEncoder, WordSenseInfo* word_sense_info, Dictionary* dic, int dicSize);
|
||||||
|
~SkipGramMixtureNeuralNetwork();
|
||||||
|
|
||||||
|
void Train(int* sentence, int sentence_length, T* gamma, T* fTable, T* input_backup);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Collect all the input words and output nodes in the data block
|
||||||
|
*/
|
||||||
|
void PrepareParmeter(DataBlock *data_block);
|
||||||
|
|
||||||
|
std::vector<int>& GetInputLayerNodes();
|
||||||
|
std::vector<int>& GetOutputLayerNodes();
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Set the pointers to those local parameters
|
||||||
|
*/
|
||||||
|
void SetInputEmbeddingWeights(int input_node_id, T* ptr);
|
||||||
|
void SetOutputEmbeddingWeights(int output_node_id, T* ptr);
|
||||||
|
void SetSensePriorWeights(int input_node_id, T*ptr);
|
||||||
|
void SetSensePriorParaWeights(int input_node_id, T* ptr);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Get the pointers to those locally updated parameters
|
||||||
|
*/
|
||||||
|
T* GetInputEmbeddingWeights(int input_node_id);
|
||||||
|
T* GetEmbeddingOutputWeights(int output_node_id);
|
||||||
|
T* GetSensePriorWeights(int input_node_id);
|
||||||
|
T* GetSensePriorParaWeights(int input_node_id);
|
||||||
|
|
||||||
|
private:
|
||||||
|
Option *m_option;
|
||||||
|
Dictionary *m_dictionary;
|
||||||
|
HuffmanEncoder *m_huffman_encoder;
|
||||||
|
int m_dictionary_size;
|
||||||
|
|
||||||
|
WordSenseInfo* m_word_sense_info;
|
||||||
|
|
||||||
|
T** m_input_embedding_weights_ptr; //Points to every word's input embedding vector
|
||||||
|
bool *m_seleted_input_embedding_weights;
|
||||||
|
T** m_output_embedding_weights_ptr; //Points to every huffman node's embedding vector
|
||||||
|
bool *m_selected_output_embedding_weights;
|
||||||
|
|
||||||
|
T** m_sense_priors_ptr; //Points to the multinomial parameters, if store_multinomial is set to zero.
|
||||||
|
T** m_sense_priors_paras_ptr;//Points to sense prior parameters. If store_multinomial is zero, then it points to the log of multinomial, otherwise points to the multinomial parameters
|
||||||
|
|
||||||
|
std::vector<int> m_input_layer_nodes;
|
||||||
|
std::vector<int> m_output_layer_nodes;
|
||||||
|
|
||||||
|
typedef void(SkipGramMixtureNeuralNetwork<T>::*FunctionType)(int input_node, std::vector<std::pair<int, int> >& output_nodes, void* v_gamma, void* v_fTable, void* v_input_backup);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Parse the needed parameter in a window
|
||||||
|
*/
|
||||||
|
void Parse(int feat, int word_idx, std::vector<std::pair<int, int> >& output_nodes);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Parse a sentence and deepen into two branchs
|
||||||
|
* \one for TrainNN,the other one is for Parameter_parse&request
|
||||||
|
*/
|
||||||
|
void ParseSentence(int* sentence, int sentence_length, T* gamma, T* fTable, T* input_backup, FunctionType function);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Copy the input_nodes&output_nodes to WordEmbedding private set
|
||||||
|
*/
|
||||||
|
void DealPrepareParameter(int input_nodes, std::vector<std::pair<int, int> >& output_nodes, void* v_gamma, void* v_fTable, void* v_input_backup);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Train a window sample and update the
|
||||||
|
* \input-embedding&output-embedding vectors
|
||||||
|
* \param word_input represent the input words
|
||||||
|
* \param output_nodes represent the ouput nodes on huffman tree, including the node index and path label
|
||||||
|
* \param v_gamma is the temp memory to store the posterior probabilities of each sense
|
||||||
|
* \param v_fTable is the temp memory to store the sigmoid value of inner product of input and output embeddings
|
||||||
|
* \param v_input_backup stores the input embedding vectors as backup
|
||||||
|
*/
|
||||||
|
void TrainSample(int word_input, std::vector<std::pair<int, int> >& output_nodes, void* v_gamma, void* v_fTable, void* v_input_backup);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief The E-step, estimate the posterior multinomial probabilities
|
||||||
|
* \param word_input represent the input words
|
||||||
|
* \param output_nodes represent the ouput nodes on huffman tree, including the node index and path label
|
||||||
|
* \param posterior represents the calculated posterior log likelihood
|
||||||
|
* \param estimation represents the calculated gammas (see the paper), that is, the softmax terms of posterior
|
||||||
|
* \param sense_prior represents the parameters of sense prior probablities for each polysemous words
|
||||||
|
* \param f_m is the temp memory to store the sigmoid value of inner products of input and output embeddings
|
||||||
|
*/
|
||||||
|
T EstimateGamma(int word_input, std::vector<std::pair<int, int> >& output_nodes, T* posterior, T* estimation, T* sense_prior, T* f_m);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief The M step: update the embedding vectors to maximize the Q function
|
||||||
|
* \param word_input represent the input words
|
||||||
|
* \param output_nodes represent the ouput nodes on huffman tree, including the node index and path label
|
||||||
|
* \param estimation represents the calculated gammas (see the paper), that is, the softmax terms of posterior
|
||||||
|
* \param f_m is the temp memory to store the sigmoid value of inner products of input and output embeddings
|
||||||
|
* \param input_backup stores the input embedding vectors as backup
|
||||||
|
* \param direction: update input vectors or output vectors
|
||||||
|
*/
|
||||||
|
void UpdateEmbeddings(int word_input, std::vector<std::pair<int, int> >& output_nodes, T* estimation, T* f_m, T* input_backup, UpdateDirection direction);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief The M Step: update the sense prior probabilities to maximize the Q function
|
||||||
|
* \param word_input represent the input words
|
||||||
|
* \param curr_priors are the closed form values of the sense priors in this iteration
|
||||||
|
*/
|
||||||
|
void MaximizeSensePriors(int word_input, T* curr_priors);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* \brief Record the input word so that parameter loader can be performed
|
||||||
|
*/
|
||||||
|
void AddInputLayerNode(int node_id);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* \brief Record the huffman tree node so that parameter loader can be performed
|
||||||
|
*/
|
||||||
|
void AddOutputLayerNode(int node_id);
|
||||||
|
};
|
||||||
public:
|
public:
|
||||||
T learning_rate;
|
T learning_rate;
|
||||||
T sense_prior_momentum;
|
T sense_prior_momentum;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#include "trainer.h"
|
#include "Trainer.h"
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
Trainer<T>::Trainer(int trainer_id, Option *option, void** word2vector_neural_networks, multiverso::Barrier *barrier, Dictionary* dictionary, WordSenseInfo* word_sense_info, HuffmanEncoder* huff_encoder)
|
Trainer<T>::Trainer(int trainer_id, Option *option, void** word2vector_neural_networks, multiverso::Barrier *barrier, Dictionary* dictionary, WordSenseInfo* word_sense_info, HuffmanEncoder* huff_encoder)
|
||||||
|
@ -12,8 +12,8 @@ Trainer<T>::Trainer(int trainer_id, Option *option, void** word2vector_neural_ne
|
||||||
m_word_sense_info = word_sense_info;
|
m_word_sense_info = word_sense_info;
|
||||||
m_huffman_encoder = huff_encoder;
|
m_huffman_encoder = huff_encoder;
|
||||||
|
|
||||||
gamma = (T*)calloc(m_option-> window_size * MAX_SENSE_CNT, sizeof(T));
|
gamma = (T*)calloc(m_option->window_size * MAX_SENSE_CNT, sizeof(T));
|
||||||
fTable = (T*)calloc(m_option-> window_size * MAX_CODE_LENGTH * MAX_SENSE_CNT, sizeof(T));
|
fTable = (T*)calloc(m_option->window_size * MAX_CODE_LENGTH * MAX_SENSE_CNT, sizeof(T));
|
||||||
input_backup = (T*)calloc(m_option->embeding_size * MAX_SENSE_CNT, sizeof(T));
|
input_backup = (T*)calloc(m_option->embeding_size * MAX_SENSE_CNT, sizeof(T));
|
||||||
|
|
||||||
m_start_time = 0;
|
m_start_time = 0;
|
||||||
|
@ -62,7 +62,7 @@ void Trainer<T>::TrainIteration(multiverso::DataBlockBase *data_block)
|
||||||
{
|
{
|
||||||
local_output_layer_nodes.push_back(output_layer_nodes[i]);
|
local_output_layer_nodes.push_back(output_layer_nodes[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
CopyParameterFromMultiverso(local_input_layer_nodes, local_output_layer_nodes, word2vector_neural_network);
|
CopyParameterFromMultiverso(local_input_layer_nodes, local_output_layer_nodes, word2vector_neural_network);
|
||||||
|
|
||||||
multiverso::Row<int64_t>& word_count_actual_row = GetRow<int64_t>(kWordCountActualTableId, 0);
|
multiverso::Row<int64_t>& word_count_actual_row = GetRow<int64_t>(kWordCountActualTableId, 0);
|
||||||
|
@ -72,11 +72,11 @@ void Trainer<T>::TrainIteration(multiverso::DataBlockBase *data_block)
|
||||||
word2vector_neural_network->learning_rate = learning_rate;
|
word2vector_neural_network->learning_rate = learning_rate;
|
||||||
|
|
||||||
//Linearly increase the momentum from init_sense_prior_momentum to 1
|
//Linearly increase the momentum from init_sense_prior_momentum to 1
|
||||||
word2vector_neural_network->sense_prior_momentum = m_option->init_sense_prior_momentum +
|
word2vector_neural_network->sense_prior_momentum = m_option->init_sense_prior_momentum +
|
||||||
(1 - m_option->init_sense_prior_momentum) * word_count_actual_row.At(0) / (T)(m_option->total_words * m_option->epoch + 1);
|
(1 - m_option->init_sense_prior_momentum) * word_count_actual_row.At(0) / (T)(m_option->total_words * m_option->epoch + 1);
|
||||||
|
|
||||||
m_barrier->Wait();
|
m_barrier->Wait();
|
||||||
|
|
||||||
for (int i = m_trainer_id; i < data->Size(); i += m_option->thread_cnt) //i iterates over all sentences
|
for (int i = m_trainer_id; i < data->Size(); i += m_option->thread_cnt) //i iterates over all sentences
|
||||||
{
|
{
|
||||||
int sentence_length;
|
int sentence_length;
|
||||||
|
@ -86,7 +86,7 @@ void Trainer<T>::TrainIteration(multiverso::DataBlockBase *data_block)
|
||||||
data->Get(i, sentence, sentence_length, word_count_deta, next_random);
|
data->Get(i, sentence, sentence_length, word_count_deta, next_random);
|
||||||
|
|
||||||
word2vector_neural_network->Train(sentence, sentence_length, gamma, fTable, input_backup);
|
word2vector_neural_network->Train(sentence, sentence_length, gamma, fTable, input_backup);
|
||||||
|
|
||||||
m_word_count += word_count_deta;
|
m_word_count += word_count_deta;
|
||||||
if (m_word_count - m_last_word_count > 10000)
|
if (m_word_count - m_last_word_count > 10000)
|
||||||
{
|
{
|
||||||
|
@ -94,7 +94,7 @@ void Trainer<T>::TrainIteration(multiverso::DataBlockBase *data_block)
|
||||||
Add<int64_t>(kWordCountActualTableId, 0, 0, m_word_count - m_last_word_count);
|
Add<int64_t>(kWordCountActualTableId, 0, 0, m_word_count - m_last_word_count);
|
||||||
m_last_word_count = m_word_count;
|
m_last_word_count = m_word_count;
|
||||||
m_now_time = clock();
|
m_now_time = clock();
|
||||||
|
|
||||||
if (m_trainer_id % 3 == 0)
|
if (m_trainer_id % 3 == 0)
|
||||||
{
|
{
|
||||||
multiverso::Log::Info("Rank %d Trainer %d lr: %.5f Mom: %.4f Progress: %.2f%% Words/thread/sec(total): %.2fk W/t/sec(executive): %.2fk\n",
|
multiverso::Log::Info("Rank %d Trainer %d lr: %.5f Mom: %.4f Progress: %.2f%% Words/thread/sec(total): %.2fk W/t/sec(executive): %.2fk\n",
|
||||||
|
@ -115,12 +115,12 @@ void Trainer<T>::TrainIteration(multiverso::DataBlockBase *data_block)
|
||||||
word2vector_neural_network->sense_prior_momentum = m_option->init_sense_prior_momentum + (1 - m_option->init_sense_prior_momentum) * word_count_actual_row.At(0) / (T)(m_option->total_words * m_option->epoch + 1);
|
word2vector_neural_network->sense_prior_momentum = m_option->init_sense_prior_momentum + (1 - m_option->init_sense_prior_momentum) * word_count_actual_row.At(0) / (T)(m_option->total_words * m_option->epoch + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
m_barrier->Wait();
|
m_barrier->Wait();
|
||||||
AddParameterToMultiverso(local_input_layer_nodes, local_output_layer_nodes, word2vector_neural_network);
|
AddParameterToMultiverso(local_input_layer_nodes, local_output_layer_nodes, word2vector_neural_network);
|
||||||
|
|
||||||
m_executive_time += clock() - train_interation_start;
|
m_executive_time += clock() - train_interation_start;
|
||||||
|
|
||||||
multiverso::Log::Info("Rank %d Train %d end at %lfs, cost %lfs, total cost %lfs\n",
|
multiverso::Log::Info("Rank %d Train %d end at %lfs, cost %lfs, total cost %lfs\n",
|
||||||
m_process_id,
|
m_process_id,
|
||||||
m_trainer_id, clock() / (double)CLOCKS_PER_SEC,
|
m_trainer_id, clock() / (double)CLOCKS_PER_SEC,
|
||||||
|
@ -159,7 +159,7 @@ template<typename T>
|
||||||
int Trainer<T>::CopyParameterFromMultiverso(std::vector<int>& input_layer_nodes, std::vector<int>& output_layer_nodes, void* local_word2vector_neural_network)
|
int Trainer<T>::CopyParameterFromMultiverso(std::vector<int>& input_layer_nodes, std::vector<int>& output_layer_nodes, void* local_word2vector_neural_network)
|
||||||
{
|
{
|
||||||
SkipGramMixtureNeuralNetwork<T>* word2vector_neural_network = (SkipGramMixtureNeuralNetwork<T>*)local_word2vector_neural_network;
|
SkipGramMixtureNeuralNetwork<T>* word2vector_neural_network = (SkipGramMixtureNeuralNetwork<T>*)local_word2vector_neural_network;
|
||||||
|
|
||||||
//Copy input embedding
|
//Copy input embedding
|
||||||
for (int i = 0; i < input_layer_nodes.size(); ++i)
|
for (int i = 0; i < input_layer_nodes.size(); ++i)
|
||||||
{
|
{
|
||||||
|
@ -169,7 +169,7 @@ int Trainer<T>::CopyParameterFromMultiverso(std::vector<int>& input_layer_nodes,
|
||||||
CopyMemory(ptr + j * m_option->embeding_size, GetRow<T>(kInputEmbeddingTableId, row_id), m_option->embeding_size);
|
CopyMemory(ptr + j * m_option->embeding_size, GetRow<T>(kInputEmbeddingTableId, row_id), m_option->embeding_size);
|
||||||
word2vector_neural_network->SetInputEmbeddingWeights(input_layer_nodes[i], ptr);
|
word2vector_neural_network->SetInputEmbeddingWeights(input_layer_nodes[i], ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Copy output embedding
|
//Copy output embedding
|
||||||
for (int i = 0; i < output_layer_nodes.size(); ++i)
|
for (int i = 0; i < output_layer_nodes.size(); ++i)
|
||||||
{
|
{
|
||||||
|
@ -184,7 +184,7 @@ int Trainer<T>::CopyParameterFromMultiverso(std::vector<int>& input_layer_nodes,
|
||||||
}
|
}
|
||||||
word2vector_neural_network->SetOutputEmbeddingWeights(output_layer_nodes[i], ptr);
|
word2vector_neural_network->SetOutputEmbeddingWeights(output_layer_nodes[i], ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Copy sense prior
|
//Copy sense prior
|
||||||
for (int i = 0; i < input_layer_nodes.size(); ++i)
|
for (int i = 0; i < input_layer_nodes.size(); ++i)
|
||||||
{
|
{
|
||||||
|
@ -285,7 +285,7 @@ void Trainer<T>::SaveMultiInputEmbedding(const int epoch_id)
|
||||||
|
|
||||||
fid = fopen(outfile, "wb");
|
fid = fopen(outfile, "wb");
|
||||||
|
|
||||||
fprintf(fid, "%d %d %d\n", m_dictionary->Size(), m_word_sense_info->total_senses_cnt, m_option->embeding_size);
|
fprintf(fid, "%d %d %d\n", m_dictionary->Size(), m_word_sense_info->total_senses_cnt, m_option->embeding_size);
|
||||||
for (int i = 0; i < m_dictionary->Size(); ++i)
|
for (int i = 0; i < m_dictionary->Size(); ++i)
|
||||||
{
|
{
|
||||||
fprintf(fid, "%s %d ", m_dictionary->GetWordInfo(i)->word.c_str(), m_word_sense_info->word_sense_cnts_info[i]);
|
fprintf(fid, "%s %d ", m_dictionary->GetWordInfo(i)->word.c_str(), m_word_sense_info->word_sense_cnts_info[i]);
|
||||||
|
@ -297,7 +297,7 @@ void Trainer<T>::SaveMultiInputEmbedding(const int epoch_id)
|
||||||
CopyMemory(sense_priors_ptr, GetRow<T>(kWordSensePriorTableId, m_word_sense_info->p_wordidx2sense_idx[i]), m_option->sense_num_multi);
|
CopyMemory(sense_priors_ptr, GetRow<T>(kWordSensePriorTableId, m_word_sense_info->p_wordidx2sense_idx[i]), m_option->sense_num_multi);
|
||||||
if (!m_option->store_multinomial)
|
if (!m_option->store_multinomial)
|
||||||
Util::SoftMax(sense_priors_ptr, sense_priors_ptr, m_option->sense_num_multi);
|
Util::SoftMax(sense_priors_ptr, sense_priors_ptr, m_option->sense_num_multi);
|
||||||
|
|
||||||
for (int j = 0; j < m_option->sense_num_multi; ++j)
|
for (int j = 0; j < m_option->sense_num_multi; ++j)
|
||||||
{
|
{
|
||||||
fwrite(sense_priors_ptr + j, sizeof(real), 1, fid);
|
fwrite(sense_priors_ptr + j, sizeof(real), 1, fid);
|
||||||
|
@ -317,7 +317,7 @@ void Trainer<T>::SaveMultiInputEmbedding(const int epoch_id)
|
||||||
fwrite(&prob, sizeof(real), 1, fid);
|
fwrite(&prob, sizeof(real), 1, fid);
|
||||||
emb_row_id = m_word_sense_info->p_input_embedding[i];
|
emb_row_id = m_word_sense_info->p_input_embedding[i];
|
||||||
multiverso::Row<real>& embedding = GetRow<real>(kInputEmbeddingTableId, emb_row_id);
|
multiverso::Row<real>& embedding = GetRow<real>(kInputEmbeddingTableId, emb_row_id);
|
||||||
|
|
||||||
for (int k = 0; k < m_option->embeding_size; ++k)
|
for (int k = 0; k < m_option->embeding_size; ++k)
|
||||||
{
|
{
|
||||||
emb_tmp = embedding.At(k);
|
emb_tmp = embedding.At(k);
|
||||||
|
@ -442,4 +442,4 @@ void Trainer<T>::SaveHuffEncoder()
|
||||||
}
|
}
|
||||||
|
|
||||||
template class Trainer<float>;
|
template class Trainer<float>;
|
||||||
template class Trainer<double>;
|
template class Trainer<double>;
|
Загрузка…
Ссылка в новой задаче