Name change and add scripts.

1. Change some variable names. 2. Add a python script to run the program.
2015-10-21 16:38:14 +08:00 · 2015-10-21 16:38:14 +08:00 · 23477e87c5
--- a/run.py
+++ b/run.py
@ -0,0 +1,55 @@
+import os
+import copy
+import time
+import random
+import sys
+import shutil
+import subprocess
+
+from subprocess import STDOUT
+
+def execute(command):    
+    popen = subprocess.Popen(command, stdout=subprocess.PIPE)
+    lines_iterator = iter(popen.stdout.readline, b"")
+    for line in lines_iterator:
+        print(line) # yield line
+		
+#parameter w.r.t. MPI
+work_dir = 'D:\\Your Directory'
+port = '5719'
+machinefile= 'host.txt'
+
+#parameter w.r.t. SG-Mixture Training
+size = 50
+train = 'Your Training File'
+read_vocab = 'Your Vocab File'
+sense_file = 'Your Sense File, see sense_file.txt as an example'
+binary = 2
+init_learning_rate = 0.025
+epoch = 1
+window = 5
+threads = 8
+mincount = 5
+EM_iteration = 1
+momentum = 0.05
+top_n = 0
+top_ratio = 0
+default_sense = 1
+sense_num_multi = 5
+binary_embedding_file = 'emb.bin'
+text_embedding_file = 'emb.txt'
+huff_tree_file = 'huff.txt'
+outputlayer_binary_file = 'emb_out.bin'
+outputlayer_text_file = 'emb_out.txt'
+preload_cnt = 5
+data_block_size = 50000
+pipline = '0'
+multinomial = '0'
+
+mpi_args = '-port {0} -wdir {1} -machinefile {2} '.format(port, work_dir, machinefile)
+sg_mixture_args  = ' -train_file {0} -binary_embedding_file {1} -text_embedding_file {2} -threads {3} -size {4} -binary {5} -epoch {6} -init_learning_rate {7} -min_count {8} -window {9} -momentum {12} -EM_iteration {13} -top_n {14} -top_ratio {14} -default_sense {16} -sense_num_multi {17} -huff_tree_file {18} -vocab_file {19} -outputlayer_binary_file {20} -outputlayer_text_file {21} -read_sense {22} -data_block_size {23} -is_pipline {24} -store_multinomial {25} -max_preload_size {26}'.format(train, binary_embedding_file, text_embedding_file, threads, size, binary, epoch, init_learning_rate, mincount, window, momentum, EM_iteration, top_n,  top_ratio, default_sense, sense_num_multi, huff_tree_file, read_vocab, outputlayer_binary_file, outputlayer_text_file, sense_file, data_block_size, pipline, multinomial, preload_cnt)
+
+print mpi_args
+print sg_mixture_args
+
+proc = execute("mpiexec " + mpi_args + 'distributed_skipgram_mixture ' + sg_mixture_args)
--- a/sense_file.txt
+++ b/sense_file.txt
--- a/src/multiverso_skipgram_mixture.cpp
+++ b/src/multiverso_skipgram_mixture.cpp
@ -1,4 +1,4 @@
-#include "multiverso_skipgram_mixture.h"
+#include "MultiversoSkipGramMixture.h"
 #include <algorithm>

 MultiversoSkipGramMixture::MultiversoSkipGramMixture(Option *option, Dictionary *dictionary, HuffmanEncoder *huffman_encoder, Reader *reader)
--- a/src/param_loader.cpp
+++ b/src/param_loader.cpp
@ -1,4 +1,4 @@
-#include "param_loader.h"
+#include "ParamLoader.h"

 template<typename T>
 ParameterLoader<T>::ParameterLoader(Option *option, void** word2vector_neural_networks, WordSenseInfo* word_sense_info)
--- a/src/skipgram_mixture_neural_network.cpp
+++ b/src/skipgram_mixture_neural_network.cpp
@ -1,4 +1,4 @@
-#include "skipgram_mixture_neural_network.h"
+#include "SkipGramMixtureNeuralNetwork.h"

 template<typename T>
 SkipGramMixtureNeuralNetwork<T>::SkipGramMixtureNeuralNetwork(Option* option, HuffmanEncoder* huffmanEncoder, WordSenseInfo* word_sense_info, Dictionary* dic, int dicSize)
@ -37,18 +37,18 @@ SkipGramMixtureNeuralNetwork<T>::~SkipGramMixtureNeuralNetwork()
 }

 template<typename T>
-void SkipGramMixtureNeuralNetwork<T>::Train(int* sentence, int sentence_length, T* gamma, T* fTable, T* input_backup)
+void SkipGramMixtureNeuralNetwork<T>::Train(int* sentence, int sentence_length, T* gamma, T* f_table, T* input_backup)
 {
-	ParseSentence(sentence, sentence_length, gamma, fTable, input_backup, &SkipGramMixtureNeuralNetwork<T>::TrainSample);
+	ParseSentence(sentence, sentence_length, gamma, f_table, input_backup, &SkipGramMixtureNeuralNetwork<T>::TrainSample);
 }

 template<typename T>
 //The E - step, estimate the posterior multinomial probabilities
-T SkipGramMixtureNeuralNetwork<T>::Estimate_Gamma_m(int word_input, std::vector<std::pair<int, int> >& output_nodes, T* posterior_ll, T* estimation, T* sense_prior, T* f_m)
+T SkipGramMixtureNeuralNetwork<T>::EstimateGamma(int word_input, std::vector<std::pair<int, int> >& output_nodes, T* posterior_ll, T* estimation, T* sense_prior, T* f_m)
 {
-	T* inputEmbedding = m_input_embedding_weights_ptr[word_input];
+	T* input_embedding = m_input_embedding_weights_ptr[word_input];
 	T f, log_likelihood = 0;
-	for (int sense_idx = 0; sense_idx < m_word_sense_info->word_sense_cnts_info[word_input]; ++sense_idx, inputEmbedding += m_option->embeding_size)
+	for (int sense_idx = 0; sense_idx < m_word_sense_info->word_sense_cnts_info[word_input]; ++sense_idx, input_embedding += m_option->embeding_size)
 	{
 		posterior_ll[sense_idx] = sense_prior[sense_idx] < eps ? MIN_LOG : log(sense_prior[sense_idx]); //posterior likelihood for each sense

@ -56,7 +56,7 @@ T SkipGramMixtureNeuralNetwork<T>::Estimate_Gamma_m(int word_input, std::vector<

 		for (int d = 0; d < output_nodes.size(); ++d, fidx++)
 		{
-			f = Util::InnerProduct(inputEmbedding, m_output_embedding_weights_ptr[output_nodes[d].first], m_option->embeding_size);
+			f = Util::InnerProduct(input_embedding, m_output_embedding_weights_ptr[output_nodes[d].first], m_option->embeding_size);
 			f = Util::Sigmoid(f);
 			f_m[fidx] = f;
 			if (output_nodes[d].second) //huffman code, 0 or 1
@ -78,7 +78,7 @@ T SkipGramMixtureNeuralNetwork<T>::Estimate_Gamma_m(int word_input, std::vector<

 template<typename T>
 //The M Step: update the sense prior probabilities to maximize the Q function
-void SkipGramMixtureNeuralNetwork<T>::Maximize_Pi(int word_input, T* log_likelihood)
+void SkipGramMixtureNeuralNetwork<T>::MaximizeSensePriors(int word_input, T* log_likelihood)
 {
 	if (m_word_sense_info->word_sense_cnts_info[word_input] == 1)
 	{
@ -101,11 +101,11 @@ void SkipGramMixtureNeuralNetwork<T>::UpdateEmbeddings(int word_input, std::vect
 {
 	T g;
 	T* output_embedding;
-	T* inputEmbedding;
+	T* input_embedding;
 	if (direction == UpdateDirection::UPDATE_INPUT)
-		inputEmbedding = m_input_embedding_weights_ptr[word_input];
-	else inputEmbedding = input_backup;
-	for (int sense_idx = 0; sense_idx < m_word_sense_info->word_sense_cnts_info[word_input]; ++sense_idx, inputEmbedding += m_option->embeding_size)
+		input_embedding = m_input_embedding_weights_ptr[word_input];
+	else input_embedding = input_backup;
+	for (int sense_idx = 0; sense_idx < m_word_sense_info->word_sense_cnts_info[word_input]; ++sense_idx, input_embedding += m_option->embeding_size)
 	{
 		int64_t fidx = sense_idx * MAX_CODE_LENGTH;
 		for (int d = 0; d < output_nodes.size(); ++d, ++fidx)
@ -115,12 +115,12 @@ void SkipGramMixtureNeuralNetwork<T>::UpdateEmbeddings(int word_input, std::vect
 			if (direction == UpdateDirection::UPDATE_INPUT) //Update Input
 			{
 				for (int j = 0; j < m_option->embeding_size; ++j)
-					inputEmbedding[j] += g * output_embedding[j];
+					input_embedding[j] += g * output_embedding[j];
 			}
 			else  // Update Output
 			{
 				for (int j = 0; j < m_option->embeding_size; ++j)
-					output_embedding[j] += g * inputEmbedding[j];
+					output_embedding[j] += g * input_embedding[j];
 			}
 		}
 	}
@ -132,7 +132,7 @@ template<typename T>
 void SkipGramMixtureNeuralNetwork<T>::TrainSample(int input_node, std::vector<std::pair<int, int> >& output_nodes, void* v_gamma, void* v_fTable, void* v_input_backup)
 {
 	T* gamma = (T*)v_gamma; //stores the posterior probabilities
-	T* fTable = (T*)v_fTable; //stores the inner product values of input and output embeddings
+	T* f_table = (T*)v_fTable; //stores the inner product values of input and output embeddings
 	T* input_backup = (T*)v_input_backup;

 	T posterior_ll[MAX_SENSE_CNT]; //stores the posterior log likelihood
@ -149,16 +149,16 @@ void SkipGramMixtureNeuralNetwork<T>::TrainSample(int input_node, std::vector<st
 		log_likelihood = 0;

 		// E-Step
-		log_likelihood += Estimate_Gamma_m(input_node, output_nodes, posterior_ll, gamma, sense_prior, fTable);
+		log_likelihood += EstimateGamma(input_node, output_nodes, posterior_ll, gamma, sense_prior, f_table);

 		// M-Step
 		if (m_option->store_multinomial)
-			Maximize_Pi(input_node, gamma);
+			MaximizeSensePriors(input_node, gamma);
 		else
-			Maximize_Pi(input_node, posterior_ll);
+			MaximizeSensePriors(input_node, posterior_ll);

-		UpdateEmbeddings(input_node, output_nodes, gamma, fTable, input_backup, UpdateDirection::UPDATE_INPUT);
-		UpdateEmbeddings(input_node, output_nodes, gamma, fTable, input_backup, UpdateDirection::UPDATE_OUTPUT);
+		UpdateEmbeddings(input_node, output_nodes, gamma, f_table, input_backup, UpdateDirection::UPDATE_INPUT);
+		UpdateEmbeddings(input_node, output_nodes, gamma, f_table, input_backup, UpdateDirection::UPDATE_OUTPUT);

 	}
 }
@ -208,7 +208,7 @@ template<typename T>
 Parse a sentence and deepen into two branchs:
 one for TrainNN, the other one is for Parameter_parse&request
 */
-void SkipGramMixtureNeuralNetwork<T>::ParseSentence(int* sentence, int sentence_length, T* gamma, T* fTable, T* input_backup, FunctionType function)
+void SkipGramMixtureNeuralNetwork<T>::ParseSentence(int* sentence, int sentence_length, T* gamma, T* f_table, T* input_backup, FunctionType function)
 {
 	if (sentence_length == 0)
 		return;
@ -233,7 +233,7 @@ void SkipGramMixtureNeuralNetwork<T>::ParseSentence(int* sentence, int sentence_
 					input_node = feat[feat_size - 1];
 					output_nodes.clear();
 					Parse(input_node, sentence[sentence_position], output_nodes);
-					(this->*function)(input_node, output_nodes, gamma, fTable, input_backup);
+					(this->*function)(input_node, output_nodes, gamma, f_table, input_backup);
 				}
 			}
 	}
@ -282,7 +282,7 @@ std::vector<int>& SkipGramMixtureNeuralNetwork<T>::GetOutputLayerNodes()
 }

 template<typename T>
-void SkipGramMixtureNeuralNetwork<T>::SetInputEmbeddingWeights(int input_node_id, T* ptr)
+void SkipGramMixtureNeuralNetwork<T>::SetinputEmbeddingWeights(int input_node_id, T* ptr)
 {
 	m_input_embedding_weights_ptr[input_node_id] = ptr;
 }
@ -306,7 +306,7 @@ void SkipGramMixtureNeuralNetwork<T>::SetSensePriorParaWeights(int input_node_id
 }

 template<typename T>
-T* SkipGramMixtureNeuralNetwork<T>::GetInputEmbeddingWeights(int input_node_id)
+T* SkipGramMixtureNeuralNetwork<T>::GetinputEmbeddingWeights(int input_node_id)
 {
 	return m_input_embedding_weights_ptr[input_node_id];
 }
--- a/src/skipgram_mixture_neural_network.h
+++ b/src/skipgram_mixture_neural_network.h
@ -17,6 +17,146 @@ enum class UpdateDirection
 template<typename T>
 class SkipGramMixtureNeuralNetwork
 {
+#pragma once
+
+#include <vector>
+
+#include "Util.h"
+#include <multiverso.h>
+#include "HuffmanEncoder.h"
+#include "MultiversoSkipGramMixture.h"
+#include "cstring"
+
+	enum class UpdateDirection
+	{
+		UPDATE_INPUT,
+		UPDATE_OUTPUT
+	};
+
+	template<typename T>
+	class SkipGramMixtureNeuralNetwork
+	{
+	public:
+		T learning_rate;
+		T sense_prior_momentum;
+
+		int status;
+		SkipGramMixtureNeuralNetwork(Option* option, HuffmanEncoder* huffmanEncoder, WordSenseInfo* word_sense_info, Dictionary* dic, int dicSize);
+		~SkipGramMixtureNeuralNetwork();
+
+		void Train(int* sentence, int sentence_length, T* gamma, T* fTable, T* input_backup);
+
+		/*!
+		* \brief Collect all the input words and output nodes in the data block
+		*/
+		void PrepareParmeter(DataBlock *data_block);
+
+		std::vector<int>& GetInputLayerNodes();
+		std::vector<int>& GetOutputLayerNodes();
+
+		/*!
+		* \brief Set the pointers to those local parameters
+		*/
+		void SetInputEmbeddingWeights(int input_node_id, T* ptr);
+		void SetOutputEmbeddingWeights(int output_node_id, T* ptr);
+		void SetSensePriorWeights(int input_node_id, T*ptr);
+		void SetSensePriorParaWeights(int input_node_id, T* ptr);
+
+		/*!
+		* \brief Get the pointers to those locally updated parameters
+		*/
+		T* GetInputEmbeddingWeights(int input_node_id);
+		T* GetEmbeddingOutputWeights(int output_node_id);
+		T* GetSensePriorWeights(int input_node_id);
+		T* GetSensePriorParaWeights(int input_node_id);
+
+	private:
+		Option *m_option;
+		Dictionary *m_dictionary;
+		HuffmanEncoder *m_huffman_encoder;
+		int m_dictionary_size;
+
+		WordSenseInfo* m_word_sense_info;
+
+		T** m_input_embedding_weights_ptr; //Points to every word's input embedding vector
+		bool *m_seleted_input_embedding_weights;
+		T** m_output_embedding_weights_ptr;  //Points to every huffman node's embedding vector
+		bool *m_selected_output_embedding_weights;
+
+		T** m_sense_priors_ptr; //Points to the multinomial parameters, if store_multinomial is set to zero.
+		T** m_sense_priors_paras_ptr;//Points to sense prior parameters. If store_multinomial is zero, then it points to the log of multinomial, otherwise points to the multinomial parameters
+
+		std::vector<int> m_input_layer_nodes;
+		std::vector<int> m_output_layer_nodes;
+
+		typedef void(SkipGramMixtureNeuralNetwork<T>::*FunctionType)(int input_node, std::vector<std::pair<int, int> >& output_nodes, void* v_gamma, void* v_fTable, void* v_input_backup);
+
+		/*!
+		* \brief Parse the needed parameter in a window
+		*/
+		void Parse(int feat, int word_idx, std::vector<std::pair<int, int> >& output_nodes);
+
+		/*!
+		* \brief Parse a sentence and deepen into two branchs
+		* \one for TrainNN,the other one is for Parameter_parse&request
+		*/
+		void ParseSentence(int* sentence, int sentence_length, T* gamma, T* fTable, T* input_backup, FunctionType function);
+
+		/*!
+		* \brief Copy the input_nodes&output_nodes to WordEmbedding private set
+		*/
+		void DealPrepareParameter(int input_nodes, std::vector<std::pair<int, int> >& output_nodes, void* v_gamma, void* v_fTable, void* v_input_backup);
+
+		/*!
+		* \brief Train a window sample and update the
+		* \input-embedding&output-embedding vectors
+		* \param word_input represent the input words
+		* \param output_nodes represent the ouput nodes on huffman tree, including the node index and path label
+		* \param v_gamma is the temp memory to store the posterior probabilities of each sense
+		* \param v_fTable is the temp memory to store the sigmoid value of inner product of input and output embeddings
+		* \param v_input_backup stores the input embedding vectors as backup
+		*/
+		void TrainSample(int word_input, std::vector<std::pair<int, int> >& output_nodes, void* v_gamma, void* v_fTable, void* v_input_backup);
+
+		/*!
+		* \brief The E-step, estimate the posterior multinomial probabilities
+		* \param word_input represent the input words
+		* \param output_nodes represent the ouput nodes on huffman tree, including the node index and path label
+		* \param posterior represents the calculated posterior log likelihood
+		* \param estimation represents the calculated gammas (see the paper), that is, the softmax terms of posterior
+		* \param sense_prior represents the parameters of sense prior probablities for each polysemous words
+		* \param f_m is the temp memory to store the sigmoid value of inner products of input and output embeddings
+		*/
+		T EstimateGamma(int word_input, std::vector<std::pair<int, int> >& output_nodes, T* posterior, T* estimation, T* sense_prior, T* f_m);
+
+		/*!
+		* \brief The M step: update the embedding vectors to maximize the Q function
+		* \param word_input represent the input words
+		* \param output_nodes represent the ouput nodes on huffman tree, including the node index and path label
+		* \param estimation represents the calculated gammas (see the paper), that is, the softmax terms of posterior
+		* \param f_m is the temp memory to store the sigmoid value of inner products of input and output embeddings
+		* \param input_backup stores the input embedding vectors as backup
+		* \param direction: update input vectors or output vectors
+		*/
+		void UpdateEmbeddings(int word_input, std::vector<std::pair<int, int> >& output_nodes, T* estimation, T* f_m, T* input_backup, UpdateDirection direction);
+
+		/*!
+		* \brief The M Step: update the sense prior probabilities to maximize the Q function
+		* \param word_input represent the input words
+		* \param curr_priors are the closed form values of the sense priors in this iteration
+		*/
+		void MaximizeSensePriors(int word_input, T* curr_priors);
+
+		/*
+		* \brief Record the input word so that parameter loader can be performed
+		*/
+		void AddInputLayerNode(int node_id);
+
+		/*
+		* \brief Record the huffman tree node so that parameter loader can be performed
+		*/
+		void AddOutputLayerNode(int node_id);
+	};
 public:
 	T learning_rate;
 	T sense_prior_momentum;
--- a/src/trainer.cpp
+++ b/src/trainer.cpp
@ -1,4 +1,4 @@
-#include "trainer.h"
+#include "Trainer.h"

 template<typename T>
 Trainer<T>::Trainer(int trainer_id, Option *option, void** word2vector_neural_networks, multiverso::Barrier *barrier, Dictionary* dictionary, WordSenseInfo* word_sense_info, HuffmanEncoder* huff_encoder)