Fix bugs and add DLL complie method

2017-07-13 22:13:30 -07:00 · 2017-07-13 22:13:30 -07:00 · 7b7c1abb2d
--- a/Examples/Text/LightRNN/DLL/DLL.sln
+++ b/Examples/Text/LightRNN/DLL/DLL.sln
@ -0,0 +1,28 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.25420.1
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DLL", "DLL\DLL.vcxproj", "{E5924A3A-7A86-402D-A7A3-49EE12A5F1B7}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{E5924A3A-7A86-402D-A7A3-49EE12A5F1B7}.Debug|x64.ActiveCfg = Debug|x64
+		{E5924A3A-7A86-402D-A7A3-49EE12A5F1B7}.Debug|x64.Build.0 = Debug|x64
+		{E5924A3A-7A86-402D-A7A3-49EE12A5F1B7}.Debug|x86.ActiveCfg = Debug|Win32
+		{E5924A3A-7A86-402D-A7A3-49EE12A5F1B7}.Debug|x86.Build.0 = Debug|Win32
+		{E5924A3A-7A86-402D-A7A3-49EE12A5F1B7}.Release|x64.ActiveCfg = Release|x64
+		{E5924A3A-7A86-402D-A7A3-49EE12A5F1B7}.Release|x64.Build.0 = Release|x64
+		{E5924A3A-7A86-402D-A7A3-49EE12A5F1B7}.Release|x86.ActiveCfg = Release|Win32
+		{E5924A3A-7A86-402D-A7A3-49EE12A5F1B7}.Release|x86.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
--- a/Examples/Text/LightRNN/DLL/DLL/DLL.vcxproj
+++ b/Examples/Text/LightRNN/DLL/DLL/DLL.vcxproj
@ -0,0 +1,158 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{E5924A3A-7A86-402D-A7A3-49EE12A5F1B7}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>DLL</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;DLL_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <OutputFile>$(OutDir)libpyreallocate$(TargetExt)</OutputFile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;_WINDOWS;_USRDLL;DLL_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;DLL_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <OutputFile>$(OutDir)libpyreallocate$(TargetExt)</OutputFile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_WINDOWS;_USRDLL;DLL_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <OutputFile>$(OutDir)libpyreallocate$(TargetExt)</OutputFile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="pyreallocate.cpp" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
--- a/Examples/Text/LightRNN/DLL/DLL/DLL.vcxproj.filters
+++ b/Examples/Text/LightRNN/DLL/DLL/DLL.vcxproj.filters
@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="pyreallocate.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
--- a/Examples/Text/LightRNN/DLL/DLL/pyreallocate.cpp
+++ b/Examples/Text/LightRNN/DLL/DLL/pyreallocate.cpp
@ -0,0 +1,232 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+// pyreallocate.cpp : allocate table algorithm, time complexity = O(V * sqrt(V) * log(sqrt(V)))
+//
+//
+
+#include <iostream>
+#include <fstream>
+#include <algorithm>
+#include <vector>
+#include <utility>
+#include <queue>
+#include <cassert>
+#include <string>
+#include <ios>
+#include <ctime>
+#include <iomanip>
+
+#define loss first
+#define id second
+
+typedef std::vector< std::pair<double, int> > DIVector;
+typedef std::vector< int > IVector;
+
+int g_vocab_size;
+int g_vocab_sqrt;
+
+struct SortNode {
+	int sort_id, word_id;
+	double value;
+
+	SortNode(int sort_id, int word_id, double value) : sort_id(sort_id), word_id(word_id), value(value) {
+	}
+
+	bool operator < (const SortNode &next) const {
+		return value < next.value;
+	}
+};
+
+struct InsertNode {
+	/*
+	* The Word Node,
+	* It include the sorted loss vector of row and col,
+	* and the next great postion row or col for the curr word.
+	* */
+	DIVector prob_row;
+	DIVector prob_col;
+	int word_id;
+	int row_id;
+	int col_id;
+	double row_loss_sum;
+	double col_loss_sum;
+
+	InsertNode(DIVector prob_row, DIVector prob_col, int word_id) :
+		prob_row(prob_row), prob_col(prob_col), word_id(word_id), row_id(0), col_id(0) {
+		for (int i = 0; i < g_vocab_sqrt; i++) {
+			row_loss_sum += prob_row[i].loss;
+			col_loss_sum += prob_col[i].loss;
+		}
+	}
+
+	SortNode next_row() {
+		int sort_id = prob_row[row_id].id;
+		row_loss_sum -= prob_row[row_id].loss;
+		row_id++;
+		double value = row_id == g_vocab_sqrt - 1 ? 0 : row_loss_sum / (g_vocab_sqrt - row_id - 1);
+		return SortNode(sort_id, word_id, value);
+	}
+
+	SortNode next_col() {
+		int sort_id = prob_col[col_id].id;
+		col_loss_sum -= prob_col[col_id].loss;
+		col_id++;
+		double value = col_id == g_vocab_sqrt - 1 ? 0 : col_loss_sum / (g_vocab_sqrt - col_id - 1);
+		return SortNode(sort_id, word_id, value);
+	}
+};
+
+
+std::vector < InsertNode > g_prob_table;
+std::vector < IVector > g_table;
+std::priority_queue < SortNode > search_Queue;
+std::vector < std::string > index_word;
+
+/*
+* read vocab from file
+* */
+void get_word_location(std::string word_path) {
+	std::fstream input_file(word_path, std::ios::in);
+	std::string word;
+	while (input_file >> word) {
+		index_word.push_back(word);
+	}
+	input_file.close();
+}
+
+/*
+* The function of saving the reallocated word table
+* */
+void save_allocate_word_location(std::string save_path) {
+	std::fstream output_file(save_path, std::ios::out);
+	std::fstream output_string_file(save_path + ".string", std::ios::out);
+	for (int i = 0; i < g_vocab_sqrt; i++) {
+		for (int j = 0; j < g_vocab_sqrt; j++) {
+			if (g_table[i][j] == -1) {
+				output_string_file << "<null>" << " ";
+			}
+			else {
+				output_string_file << index_word[g_table[i][j]] << " ";
+			}
+			output_file << g_table[i][j] << " ";
+		}
+		output_file << "\n";
+		output_string_file << "\n";
+	}
+	output_file.close();
+	output_string_file.close();
+}
+
+/*
+* content_row        : the loss vector of row
+* content_col        : the loss vector of col
+* vocabsize          : the size of vocabulary
+* vocabbase          : the sqrt of vocabuary size
+* save_location_path : the path of next word location, the reallocated table will be saved
+*                      into this path
+* word_path          : the path of word table
+* */
+void allocate(double *content_row, double *content_col,
+	int vocabsize, int vocabbase,
+	char* save_location_path, char* word_path) {
+	clock_t start = clock();
+	std::cout << "Wait for initial ... \n";
+
+	// initial
+	std::vector<InsertNode>().swap(g_prob_table);
+	std::vector<IVector>().swap(g_table);
+	std::priority_queue<SortNode>().swap(search_Queue);
+	std::vector<std::string>().swap(index_word);
+
+	g_vocab_size = vocabsize;
+	g_vocab_sqrt = vocabbase;
+	int freq = g_vocab_size / 20;
+	// sort every node's position probability by loss
+	for (int i = 0; i < g_vocab_size; i++) {
+		DIVector current_row, current_col;
+		for (int j = 0; j < g_vocab_sqrt; j++) {
+			current_row.push_back(std::make_pair(content_row[i * g_vocab_sqrt + j], j));
+			current_col.push_back(std::make_pair(content_col[i * g_vocab_sqrt + j], j));
+		}
+		sort(current_row.begin(), current_row.end());
+		sort(current_col.begin(), current_col.end());
+		g_prob_table.push_back(InsertNode(current_row, current_col, i));
+		if (i % freq == 0) {
+			std::cout << "\t\t\tFinish " << std::setw(8) << i << " / " << std::setw(8) << g_vocab_size << " Line\n";
+		}
+	}
+	for (int i = 0; i < g_vocab_sqrt; i++) {
+		g_table.push_back(IVector());
+	}
+
+	std::cout << "Ready ... \n";
+	std::cout << "Start to assign row for every word\n";
+	for (int i = 0; i < g_vocab_size; i++) {
+		SortNode row_node = g_prob_table[i].next_row();
+		search_Queue.push(row_node);
+	}
+
+	while (!search_Queue.empty()) {
+		SortNode top_node = search_Queue.top();
+		search_Queue.pop();
+		int word_id = top_node.word_id;
+		int row_id = top_node.sort_id;
+		if (static_cast<int>(g_table[row_id].size()) == g_vocab_sqrt) {
+			search_Queue.push(g_prob_table[word_id].next_row());
+		}
+		else {
+			g_table[row_id].push_back(word_id);
+		}
+	}
+
+	std::cout << "Finish assigning row\n";
+	std::cout << "Start to assign col for every word\n";
+	std::cout << "Finish assigning col\n";
+
+	for (int i = 0; i < g_vocab_sqrt; i++) {
+		for (auto &word_id : g_table[i]) {
+			SortNode col_node = g_prob_table[word_id].next_col();
+			search_Queue.push(col_node);
+			word_id = -1;
+		}
+		for (int j = static_cast<int>(g_table[i].size()); j < g_vocab_sqrt; j++) {
+			g_table[i].push_back(-1);
+		}
+		while (!search_Queue.empty()) {
+			SortNode top_node = search_Queue.top();
+			search_Queue.pop();
+			int word_id = top_node.word_id;
+			int col_id = top_node.sort_id;
+			if (g_table[i][col_id] == -1) {
+				g_table[i][col_id] = word_id;
+			}
+			else {
+				search_Queue.push(g_prob_table[word_id].next_col());
+			}
+		}
+	}
+	get_word_location(word_path);
+	save_allocate_word_location(save_location_path);
+	clock_t end = clock();
+	double cost_time = static_cast<double>((end - start) / CLOCKS_PER_SEC);
+	std::cout << "Reallocate word location cost " << cost_time << " seconds\n";
+}
+
+extern "C" {
+// Under Visual Studio environ
+#ifdef _MT
+	__declspec(dllexport) void allocate_table(double *content_row, double *content_col,
+		int vocabsize, int vocabbase,
+		char* save_location_path, char* word_path) {
+		allocate(content_row, content_col, vocabsize, vocabbase, save_location_path, word_path);
+	}
+#else
+	void allocate_table(double *content_row, double *content_col,
+		int vocabsize, int vocabbase,
+		char* save_location_path, char* word_path) {
+		allocate(content_row, content_col, vocabsize, vocabbase, save_location_path, word_path);
+	}
+#endif
+}
--- a/Examples/Text/LightRNN/LightRNN/libpyreallocate.dll
+++ b/Examples/Text/LightRNN/LightRNN/libpyreallocate.dll
--- a/Examples/Text/LightRNN/LightRNN/pyreallocate.cpp
+++ b/Examples/Text/LightRNN/LightRNN/pyreallocate.cpp
@ -1,214 +0,0 @@
-//
-// Copyright (c) Microsoft. All rights reserved.
-// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
-//
-// pyreallocate.cpp : allocate table algorithm, time complexity = O(V * sqrt(V) * log(sqrt(V)))
-//
-//
-
-#include <iostream>
-#include <fstream>
-#include <algorithm>
-#include <vector>
-#include <utility>
-#include <queue>
-#include <cassert>
-#include <string>
-#include <ios>
-#include <ctime>
-#include <iomanip>
-
-#define loss first
-#define id second
-
-typedef std::vector< std::pair<double, int> > DIVector;
-typedef std::vector< int > IVector;
-
-int g_vocab_size;
-int g_vocab_sqrt;
-
-struct SortNode {
-    int sort_id, word_id;
-    double value;
-
-    SortNode(int sort_id, int word_id, double value) : sort_id(sort_id), word_id(word_id), value(value) {
-    }
-
-    bool operator < (const SortNode &next) const {
-        return value < next.value;
-    }
-};
-
-struct InsertNode {
-    /*
-     * The Word Node, 
-     * It include the sorted loss vector of row and col,
-     * and the next great postion row or col for the curr word.
-     * */
-    DIVector prob_row;
-    DIVector prob_col;
-    int word_id;
-    int row_id;
-    int col_id;
-    double row_loss_sum;
-    double col_loss_sum;
-
-    InsertNode(DIVector prob_row, DIVector prob_col, int word_id) :
-               prob_row(prob_row), prob_col(prob_col), word_id(word_id), row_id(0), col_id(0) {
-        for (int i = 0; i < g_vocab_sqrt; i ++) {
-            row_loss_sum += prob_row[i].loss;
-            col_loss_sum += prob_col[i].loss;
-        }
-    }
-
-    SortNode next_row() {
-        int sort_id = prob_row[row_id].id;
-        row_loss_sum -= prob_row[row_id].loss;
-        row_id++;
-        double value = row_id == g_vocab_sqrt - 1 ? 0 : row_loss_sum / (g_vocab_sqrt - row_id - 1);
-        return SortNode(sort_id, word_id, value);
-    }
-
-    SortNode next_col() {
-        int sort_id = prob_col[col_id].id;
-        col_loss_sum -= prob_col[col_id].loss;
-        col_id++;
-        double value = col_id == g_vocab_sqrt - 1 ? 0 : col_loss_sum / (g_vocab_sqrt - col_id - 1);
-        return SortNode(sort_id, word_id, value);
-    }
-};
-
-
-std::vector < InsertNode > g_prob_table;
-std::vector < IVector > g_table;
-std::priority_queue < SortNode > search_Queue;
-std::vector < std::string > index_word;
-
-/*
- * read vocab from file
- * */
-void get_word_location(std::string word_path) {
-    std::fstream input_file(word_path, std::ios::in);
-    std::string word;
-    while (input_file >> word) {
-        index_word.push_back(word);
-    }
-    input_file.close();
-}
-
-/*
- * The function of saving the reallocated word table
- * */
-void save_allocate_word_location(std::string save_path) {
-    std::fstream output_file(save_path, std::ios::out);
-    std::fstream output_string_file(save_path + ".string", std::ios::out);
-    for (int i = 0; i < g_vocab_sqrt; i ++) {
-        for (int j = 0; j < g_vocab_sqrt; j ++) {
-            if (g_table[i][j] == -1) {
-                output_string_file << "<null>" << " ";
-            } else {
-                output_string_file << index_word[g_table[i][j]] << " ";
-            }
-            output_file << g_table[i][j] << " ";
-        }
-        output_file << "\n";
-        output_string_file << "\n";
-    }
-    output_file.close();
-    output_string_file.close();
-}
-
-extern "C" {
-    /*
-     * content_row        : the loss vector of row
-     * content_col        : the loss vector of col
-     * vocabsize          : the size of vocabulary
-     * vocabbase          : the sqrt of vocabuary size
-     * save_location_path : the path of next word location, the reallocated table will be saved
-     *                      into this path
-     * word_path          : the path of word table
-     * */
-    void allocate_table(double *content_row, double *content_col,
-                        int vocabsize, int vocabbase,
-                        char* save_location_path, char* word_path) {
-        clock_t start = clock();
-        std::cout << "Wait for initial ... \n";
-
-        // initial
-        std::vector<InsertNode>().swap(g_prob_table);
-        std::vector<IVector>().swap(g_table);
-        std::priority_queue<SortNode>().swap(search_Queue);
-        std::vector<std::string>().swap(index_word);
-
-        g_vocab_size = vocabsize;
-        g_vocab_sqrt = vocabbase;
-        int freq = g_vocab_size / 20;
-        // sort every node's position probability by loss
-        for (int i = 0; i < g_vocab_size; i ++) {
-            DIVector current_row, current_col;
-            for (int j = 0; j < g_vocab_sqrt; j ++) {
-                current_row.push_back(std::make_pair(content_row[i * g_vocab_sqrt + j], j));
-                current_col.push_back(std::make_pair(content_col[i * g_vocab_sqrt + j], j));
-            }
-            sort(current_row.begin(), current_row.end());
-            sort(current_col.begin(), current_col.end());
-            g_prob_table.push_back(InsertNode(current_row, current_col, i));
-            if (i % freq == 0) {
-                std::cout << "\t\t\tFinish " << std::setw(8) << i << " / " << std::setw(8) << g_vocab_size << " Line\n";
-            }
-        }
-        for (int i = 0; i < g_vocab_sqrt; i ++) {
-            g_table.push_back(IVector());
-        }
-
-        std::cout << "Ready ... \n";
-        std::cout << "Start to assign row for every word\n";
-        for (int i = 0; i < g_vocab_size; i ++) {
-            SortNode row_node = g_prob_table[i].next_row();
-            search_Queue.push(row_node);
-        }
-
-        while (!search_Queue.empty()) {
-            SortNode top_node = search_Queue.top();
-            search_Queue.pop();
-            int word_id = top_node.word_id;
-            int row_id = top_node.sort_id;
-            if (static_cast<int>(g_table[row_id].size()) == g_vocab_sqrt) {
-                search_Queue.push(g_prob_table[word_id].next_row());
-            } else {
-                g_table[row_id].push_back(word_id);
-            }
-        }
-
-        std::cout << "Finish assigning row\n";
-        std::cout << "Start to assign col for every word\n";
-        std::cout << "Finish assigning col\n";
-
-        for (int i = 0; i < g_vocab_sqrt; i ++) {
-            for (auto &word_id : g_table[i]) {
-                SortNode col_node = g_prob_table[word_id].next_col();
-                search_Queue.push(col_node);
-                word_id = -1;
-            }
-            for (int j = static_cast<int>(g_table[i].size()); j < g_vocab_sqrt; j ++) {
-                g_table[i].push_back(-1);
-            }
-            while (!search_Queue.empty()) {
-                SortNode top_node = search_Queue.top();
-                search_Queue.pop();
-                int word_id = top_node.word_id;
-                int col_id = top_node.sort_id;
-                if (g_table[i][col_id] == -1) {
-                    g_table[i][col_id] = word_id;
-                } else {
-                    search_Queue.push(g_prob_table[word_id].next_col());
-                }
-            }
-        }
-        get_word_location(word_path);
-        save_allocate_word_location(save_location_path);
-        clock_t end = clock();
-        double cost_time = static_cast<double>((end - start) / CLOCKS_PER_SEC);
-        std::cout << "Reallocate word location cost " << cost_time << " seconds\n";
-    }
-}
--- a/Examples/Text/LightRNN/LightRNN/train.py
+++ b/Examples/Text/LightRNN/LightRNN/train.py
@ -99,30 +99,6 @@ def get_k_round_location_path(k):
    return os.path.join(opt.vocabdir, 'word-%d.location' % (k))


-####################################
-# Generate the c++ dynamic library #
-####################################
-
-def generate_dll():
-    if platform.system() == 'Linux':
-        dll_name = 'libpyreallocate.so'
-    else:
-        dll_name = 'libpyreallocate.dll'
-
-    path_dir = os.path.split(os.path.realpath(__file__))[0]
-    dll_path = os.path.join(path_dir, dll_name)
-    if os.path.exists(dll_path):
-        return
-    command = ['g++', '-o', dll_name, '-shared',
-               '-fPIC', 'pyreallocate.cpp', '-std=c++11']
-    try:
-        command = ' '.join(command)
-        os.system(command)
-        print('Successfully generated the dynamic library')
-    except:
-        print('Fail to generate the dynamic library, falling back to a slower implementation')
-
-
 ###########################
 # Word allocate algorithm #
 ###########################
@ -379,7 +355,6 @@ def train(network, location_path, id):
 #################

 def main():
-    generate_dll()  # Generate the CPP dynamic library
    prepare_dir()  # create the vocab dir and model dir

    network = create_model(vocab_sqrt)
--- a/Examples/Text/LightRNN/Makefile
+++ b/Examples/Text/LightRNN/Makefile
@ -0,0 +1,22 @@
+CPP = g++
+CFLAGS := -Wall -O3 -std=c++11
+
+OBJS = pyreallocate.o
+LIB = LightRNN/libpyreallocate.so
+DIR_SRC = DLL/DLL/
+
+all : $(LIB)
+
+%.o : ${DIR_SRC}%.cpp
+	$(CC) $(CFLAGS) -fpic -c $< -o $@
+
+$(LIB) : $(OBJS)
+	rm -f $@
+	g++ -shared -o $@ $(OBJS)
+	rm -f $(OBJS)
+
+tags:
+	ctags -R *
+
+clean:
+	rm -f $(OBJS) $(TARGET) $(LIB)
--- a/Examples/Text/LightRNN/PTB/Allocation/generate.py
+++ b/Examples/Text/LightRNN/PTB/Allocation/generate.py
@ -0,0 +1,6 @@
+import os
+
+dir_path = os.path.dirname(os.path.abspath(__file__))
+preprocess_file = os.path.join(dir_path, '..', '..', 'LightRNN', 'preprocess.py')
+assert os.path.exists(os.path.join(dir_path, '..', 'Data'))
+os.system('python {} -datadir ../Data -outputdir . -vocab_file vocab.txt -alloc_file word-0.location -vocabsize 10000 -seed 0'.format(preprocess_file))
--- a/Examples/Text/LightRNN/PTB/Allocation/generate.sh
+++ b/Examples/Text/LightRNN/PTB/Allocation/generate.sh
@ -1,3 +0,0 @@
-cd `dirname $0`
-cd ../../LightRNN/
-python preprocess.py -datadir ../PTB/Data -outputdir ../PTB/Allocation -vocab_file vocab.txt -alloc_file word-0.location -vocabsize 10000 -seed 0
--- a/Examples/Text/LightRNN/PTB/Data/download_data.py
+++ b/Examples/Text/LightRNN/PTB/Data/download_data.py
@ -4,46 +4,38 @@
 # for full license information.
 # ==============================================================================

-from six.moves.urllib import request
-# from urllib import request
+try:
+    from urllib.request import urlretrieve
+except:
+    from urllib import urlretrieve

 import os
 import tarfile
 import shutil

-url = "http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz"
+download_url = "http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz"
+save_file = 'temp.tgz'

-tmptgz = "tmp.tgz"
-tmpdir = './tmp/'
+def get_filename(name):
+    return './simple-examples/data/ptb.{}'.format(name)

-tar_path_test  = './simple-examples/data/ptb.test.txt'
-tar_path_train = './simple-examples/data/ptb.train.txt'
-tar_path_valid = './simple-examples/data/ptb.valid.txt'
-
-
-def append_eos(input_path, output_path):
+def add_eos(input_path, output_path):
    with open(input_path, 'r') as input_file, \
         open(output_path, 'w') as output_file:
        for line in input_file:
            line = line.strip()
-            output_file.write(line + " <eos>\n")
+            output_file.write(line + " </s>\n")

 if __name__=='__main__':
+    if not os.path.isfile(save_file):
+        urlretrieve(download_url, save_file)

-    if not os.path.isfile(tmptgz):
-        request.urlretrieve(url, tmptgz)
-
-    # extracting the files we need from the tarfile
-    fileReader = tarfile.open(tmptgz, 'r') 
-    fileReader.extract(tar_path_test,  path=tmpdir)
-    fileReader.extract(tar_path_train, path=tmpdir)
-    fileReader.extract(tar_path_valid, path=tmpdir)
-
-    append_eos(os.path.join(tmpdir, tar_path_test),  'test.txt')
-    append_eos(os.path.join(tmpdir, tar_path_train), 'train.txt')
-    append_eos(os.path.join(tmpdir, tar_path_valid), 'valid.txt')
+    fileReader = tarfile.open(save_file, 'r') 
+    for name in ['train.txt', 'test.txt', 'valid.txt']:
+        filename = get_filename(name)
+        fileReader.extract(filename, path='.')
+        add_eos(filename, name)

    fileReader.close()
-
-    os.remove(tmptgz)
-    shutil.rmtree(tmpdir)
+    os.remove(save_file)
+    shutil.rmtree('./simple-examples')
--- a/Examples/Text/LightRNN/README.md
+++ b/Examples/Text/LightRNN/README.md
@ -1,20 +1,21 @@
-# LightRNN
+  LightRNN

 This is the official implementation for [LightRNN: Memory and Computation-Efficient Recurrent Neural Networks](https://arxiv.org/abs/1610.09893) in CNTK.
-    
-## LightRNN: Memory and Computation-Efficient Recurrent Neural Networks
-To address the both model size and running time, especially for text corpora with large vocabularies, the author proposed to use 2-Component (2C) shared embedding for word representations. They allocate every word in the vocabulary into a table, each row of which is associated with a vector, and each column associated with another vector. Depending on its position in the table, a word is jointly represented by two components: a row vector and a column vector. Since the words in the same row share the row vector and the words in the same column share the column vector, we only need 2 \sqrt(V) vectors to represent a vocabulary of |V| unique words, which are far less than the |V| vectors required by existing approaches. The LightRNN algorithm significantly reduces the model size and speeds up the training process, without sacrifice of accuracy (it achieves similar, if not better, perplexity as compared to state-of-the-art language models).

-More details please refer the [LightRNN paper](https://arxiv.org/abs/1610.09893)
+## LightRNN: Memory and Computation-Efficient Recurrent Neural Networks
+Recurrent neural networks (RNNs) have achieved state-of-the-art performances in many natural language processing tasks, such as language modeling and machine translation. However, when the vocabulary is large, the RNN model will become very big (e.g., possibly beyond the memory capacity of a GPU device) and its training/inference will become very inefficient. LightRNN addesses this challenge using 2-Component (2C) shared embedding for word representations. It allocates every word in the vocabulary into a table, each row of which is associated with a vector, and each column associated with another vector. Depending on its position in the table, a word is jointly represented by two components: a row vector and a column vector. Since the words in the same row share the row vector and the words in the same column share the column vector, we only need $2 \sqrt{|V|}$ vectors to represent a vocabulary of $|V|$ unique words, which are far less than the $|V|$ vectors required by existing approaches. The LightRNN algorithm significantly reduces the model size and speeds up the training/inference process for corpora with large vocabularies.
+
+#### More details please refer to the NIPS 2016 paper (https://arxiv.org/abs/1610.09893)
+

 ## Requirements

- CNTK binary: set up CNTK following this guide
+- CNTK
 - Python 2.7 or later. 
- g++ 4.8 or later
+- g++

 __For multi gpu version__
- openmpi
+- openmpi or other mpi program
 - mpi4py

 ## Details
@ -28,8 +29,8 @@ The folder [LightRNN](LightRNN/) contains main structure of LightRNN.
    A overridden UserMinibatchSource which maps text to streams.
 - __[lightrnn.py](LightRNN/lightrnn.py)__
    The computation graph of LightRNN
- - __[pyreallocate.cpp](LightRNN/pyreallocate.cpp)__
-    Word reallocation.
+ - __[reallocate.py](LightRNN/reallocate.py)__
+    Word reallocation implemented by Python.
 - __[preprocess.py](LightRNN/preprocess.py)__
    The preprocess procedure of LightRNN
    - Options
@ -57,7 +58,7 @@ The folder [LightRNN](LightRNN/) contains main structure of LightRNN.
        - `-layer <int> (default: 2)`, Number of layers.
        - `-dropout <float> (default: 0.2)`, Dropout rate.
        - `-lr <float> (default: 0.15)`, Learning rate.
-        - `-optim <string> (accepted: sgd, adam, adagrad, default: sgd)`, The optim method which provides sgd, adam and adagrad.
+        - `-optim <string> (accepted: sgd, adam, adagrad, default: sgd)`, The optimization method which provides sgd, adam and adagrad.
        - `-seqlength <int> (default: 32)`, number of timesteps to unroll for.
        - `-vocabsize <int> (default: 10000)`, Vocabulary size.
        - `-batchsize <int> (default: 20)`, Minibatch size.
@ -85,17 +86,32 @@ __Multi-GPU__

 `mpiexec -n 2 python train.py -datadir ../PTB/Data -vocab_file ../PTB/Allocation/vocab.txt -vocabdir ../PTB/Allocation -vocabsize 10000 -epochs 12 13 -nhid 1000 -embed 1000 -optim adam -lr 0.1 -batchsize 20 -layer 2 -dropout 0.5`

-This command will train a LightRNN model on two GPU, you can specify the gpu number by using `mpiexec -n [gpus]`.
+This command will train a LightRNN model on two GPUs, you can specify the GPU number by using `mpiexec -n [gpus]`.

 ### [PTB/](PTB/)
-This folder contains an example of PTB dataset. You can use [download_data.py](PTB/Data/download_data.py) under [Data/](PTB/Data) to download the data and [generate.sh](PTB/Allocation/generate.sh) under [Allocation/](PTB/Allocation) to generate a vocabulary file and random table.
+This folder contains an example of PTB dataset. You can use [download_data.py](PTB/Data/download_data.py) under [Data/](PTB/Data) to download the data and [generate.py](PTB/Allocation/generate.py) under [Allocation/](PTB/Allocation) to generate a vocabulary file and random table.
+
+### [Test/](Test/)
+Include a test program. Run this file as follow:
+
+`python test_train.py`
+
+### Generate CPP dynamic library
+
+__For Linux User__
+
+run the [Makefile](Makefile) under the root directory by `make`.
+
+__For Windows User__
+
+Use the Visual Studio to open the project under the [DLL](https://github.com/Microsoft/LightRNN/tree/master/DLL) and build, put the dll file under the [LightRNN](LightRNN/).

 ## Experiment

 ### [ACL-French](https://www.dropbox.com/s/m83wwnlz3dw5zhk/large.zip?dl=0)
 The ACLW French corpus contains about 56M tokens, with a vocabulary of 136912 words. The parameters used in the experiment are as below.

-|Paramters Name|Value|
+|Parameter Name|Value|
 |:---|:---|
 |Vocabulary size|136912|
 |Hidden dim|1000|
@ -138,7 +154,7 @@ We can achieve 122 perplexity (on the test set) after one epoch of training with
 The ClueWeb09 Data contains over 177 billion tokens. We select the top 10240000 most frequent words as the vocabulary, covering 99.057% tokens. We randomly sampled 1GB/1GB for evaluation/test.
 The model parameters include:

-|Paramters Name|Value|
+|Parameter Name|Value|
 |:---|:---|
 |Vocabulary size|10240000|
 |Hidden dim|512|
@ -152,7 +168,7 @@ The model parameters include:
 |GPU Type|GeForce GTX Titan x|
 |GPU Number|4|

-We achieve a training speed of 77873 tokens/s with 4 GPUs. It takes 630 hours (26.7 days) to finish a epoch. 
+We achieve a training speed of 77873 tokens/s with 4 GPUs. It takes 630 hours (26.7 days) to finish an epoch. 

 __Train-Valid loss__

@ -168,7 +184,6 @@ If you find LightRNN useful in your work, you can cite the paper as below:
        Year = {2016}
    }

-### Release Notes
-1. You need to ensure the version of openmpi correspond to the mpi which mpi4py uses. We recommend you to build mpi4py from source.
-2. We provide two word allocation alogrithm code which implemented by python and c++. If you don't use a c++ dynamix library, the program will use a python implentation. Besides, we has compared the perfermance between the c++ and python. The python version will be 5 times or more slower than c++. We recommend you use a c++ implemenation.
-3. We provide two dynamic libraries under [LightRNN](LightRNN/) for Ununtu and Windows. If you find these library are not useful(eg. MAC User), we suggest you install g++, and the program can generate it by self.
+### Notes
+1. It is recommended to build mpi4py from source to avoid mulitply-version conflicts of mpi.
+2. We provide two implemenations of word allocation using Python and C++ separately. If you don't use a C++ dynamic library, the Python implementation will be used. The Python version will be 5 times or more slower than C++ version. Therefore, C++ version is preferred.
--- a/Tests/EndToEndTests/CNTKv2Python/Examples/lightrnn_test.py
+++ b/Tests/EndToEndTests/CNTKv2Python/Examples/lightrnn_test.py
@ -31,10 +31,10 @@ def run_command(**kwargs):
    return command

 def lightrnn_test(device_id):
-    expected_valid_error = 7.251514
-    expected_test_error = 7.305801
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
+    expected_valid_error = 7.251514
+    expected_test_error = 7.305801

    command = run_command(datadir=os.path.join(example_dir, '..', 'test'),
                          outputdir=os.path.join(example_dir, '..', 'LightRNN'),
--- a/Tools/samples.json
+++ b/Tools/samples.json
@ -485,6 +485,15 @@
        "type":  ["Recipe"],
 	"dateadded": "4/14/2017"
    },
+    {
+        "category": ["Text"],
+        "name": "LightRNN",
+        "url":  "https://github.com/Microsoft/CNTK/tree/master/Examples/Text/LightRNN",
+        "description": "LightRNN: Memory and Computation-Efficient Recurrent Neural Networks",
+        "language": ["Python"],
+        "type": ["Recipe"],
+    "dateadded": "07/14/2017"
+    },
    {
        "category": ["Reinforcement Learning"],
        "name": "Deep Q Neural Network",