Conflicts:
	DataReader/HTKMLFReader/HTKMLFReader.cpp
	DataReader/HTKMLFReader/HTKMLFReader.h
This commit is contained in:
Mike Seltzer 2015-02-06 16:14:47 -08:00
Родитель 26d9e66b87 f3dfe81034
Коммит b4f465b8c4
34 изменённых файлов: 21951 добавлений и 21411 удалений

330
.gitignore поставляемый
Просмотреть файл

@ -1,162 +1,168 @@
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
# User-specific files
*.suo
*.user
*.sln.docstates
*.orig
# Build results
[Dd]ebug/
[Rr]elease/
x64/
build/
[Bb]in/
[Oo]bj/
# Enable "build/" folder in the NuGet Packages folder since NuGet packages use it for MSBuild targets
!packages/*/build/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
*_i.c
*_p.c
*.ilk
*.meta
*.obj
*.pch
*.pdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*.log
*.vspscc
*.vssscc
.builds
*.pidb
*.log
*.scc
*.dep
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opensdf
*.sdf
*.cachefile
# Visual Studio profiler
*.psess
*.vsp
*.vspx
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# NCrunch
*.ncrunch*
.*crunch*.local.xml
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.Publish.xml
# NuGet Packages Directory
## TODO: If you have NuGet Package Restore enabled, uncomment the next line
#packages/
# Windows Azure Build Output
csx
*.build.csdef
# Windows Store app package directory
AppPackages/
# Others
sql/
*.Cache
ClientBin/
[Ss]tyle[Cc]op.*
~$*
*~
*.dbmdl
*.[Pp]ublish.xml
*.pfx
*.publishsettings
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file to a newer
# Visual Studio version. Backup files are not needed, because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
# SQL Server files
App_Data/*.mdf
App_Data/*.ldf
#LightSwitch generated files
GeneratedArtifacts/
_Pvt_Extensions/
ModelManifest.xml
# =========================
# Windows detritus
# =========================
# Windows image file caches
Thumbs.db
ehthumbs.db
# Folder config file
Desktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Mac desktop service store files
.DS_Store
*.lyx~
*.bak
*.lyx#
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
# User-specific files
*.suo
*.user
*.sln.docstates
*.orig
# Build results
[Dd]ebug/
[Rr]elease/
x64/
build/
[Bb]in/
[Oo]bj/
# Enable "build/" folder in the NuGet Packages folder since NuGet packages use it for MSBuild targets
!packages/*/build/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
*_i.c
*_p.c
*.ilk
*.meta
*.obj
*.pch
*.pdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*.log
*.vspscc
*.vssscc
.builds
*.pidb
*.log
*.scc
*.dep
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opensdf
*.sdf
*.cachefile
# Visual Studio profiler
*.psess
*.vsp
*.vspx
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# NCrunch
*.ncrunch*
.*crunch*.local.xml
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.Publish.xml
# NuGet Packages Directory
## TODO: If you have NuGet Package Restore enabled, uncomment the next line
#packages/
# Windows Azure Build Output
csx
*.build.csdef
# Windows Store app package directory
AppPackages/
# Others
sql/
*.Cache
ClientBin/
[Ss]tyle[Cc]op.*
~$*
*~
*.dbmdl
*.[Pp]ublish.xml
*.pfx
*.publishsettings
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file to a newer
# Visual Studio version. Backup files are not needed, because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
# SQL Server files
App_Data/*.mdf
App_Data/*.ldf
#LightSwitch generated files
GeneratedArtifacts/
_Pvt_Extensions/
ModelManifest.xml
# =========================
# Windows detritus
# =========================
# Windows image file caches
Thumbs.db
ehthumbs.db
# Folder config file
Desktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Mac desktop service store files
.DS_Store
*.lyx~
*.bak
*.lyx#
# =========================
# prebuild file
# =========================
MachineLearning/cn/buildinfo.h

Просмотреть файл

@ -1,279 +1,280 @@
//
// <copyright file="ConfigFile.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// ConfigFile.cpp : Defines the configuration file loader.
//
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
#include "File.h"
#include "commandArgUtil.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// ParseCommandLine - parse the command line parameters
// argc - count of arguments
// argv - array of argument parameters
// config - config to return
std::string ConfigParameters::ParseCommandLine(int argc, wchar_t* argv[], ConfigParameters& config)
{
config.SetName(std::string("global"));
// This vector keeps track of the config files we have already read
std::vector<std::string> resolvedConfigFiles;
std::string configString;
// start at 1, because 0 is the name of the EXE
for (int i=1; i < argc; ++i)
{
wstring str = argv[i];
// see if they are loading a config file
wstring configDescriptor = L"configFile=";
int compare = _wcsnicmp(configDescriptor.c_str(), str.c_str(), configDescriptor.length());
// no config file, parse as regular argument
if (compare)
{
configString += (msra::strfun::utf8(str) + "\n");
}
else // One or more config file paths specified in a "+"-separated list.
{
const std::string filePaths = msra::strfun::utf8(str.substr(configDescriptor.length()));
std::vector<std::string> filePathsVec = msra::strfun::split(filePaths, "+");
for (auto filePath : filePathsVec)
{
if (std::find(resolvedConfigFiles.begin(), resolvedConfigFiles.end(), filePath) == resolvedConfigFiles.end())
{
// if haven't already read this file, read it
resolvedConfigFiles.push_back(filePath);
configString += config.ReadConfigFile(filePath);
}
else
RuntimeError("Cannot specify same config file multiple times at the command line.");
}
}
}
configString = config.ResolveIncludeStatements(configString, resolvedConfigFiles);
config.FileParse(configString);
return configString;
}
// ResolveIncludeStatements - this function takes a config string, and looks for all lines of the
// form "include=configPaths", where 'configPaths' is a "+" separated list of paths to config files.
// If it encounters one of these lines, it reads the config files listed in 'configPaths' (in the specified order),
// and includes the body of each file in the string which is eventually returned by this function. If the included
// config file includes other config files, this function will recursively include those files as well.
// configString - the config string within which to look for "include" statements
// resolvedConfigFiles - the paths to all the config files that have already been resolved. This vector is used to prevent include loops,
// and to prevent files from being included multiple times.
// returns: The config string, with all the "include" statements replaced with the bodies of the specified config files.
std::string ConfigParser::ResolveIncludeStatements(const std::string &configString, std::vector<std::string> &resolvedConfigFiles)
{
std::vector<std::string> lines = msra::strfun::split(configString, "\n");
std::string includeKeyword = "include=";
std::size_t includeKeywordSize = includeKeyword.size();
std::string newConfigString;
for (std::string line : lines)
{
if (line.compare(0, includeKeywordSize, includeKeyword) == 0)
{
std::string filePaths = line.substr(includeKeywordSize, line.size() - includeKeywordSize);
if (filePaths.find(openBraceVar) != std::string::npos)
{
RuntimeError("Variable usage (eg, \"$varName$\") not supported in \"include\" statements. Explicit path to config file must be provided");
}
std::vector<std::string> filePathVec = msra::strfun::split (filePaths, "+");
for (auto filePath : filePathVec)
{
// if file hasn't already been resolved (the resolvedPaths vector doesn't contain it), resolve it.
if (std::find(resolvedConfigFiles.begin(), resolvedConfigFiles.end(), filePath) == resolvedConfigFiles.end())
{
// Recursively resolve the include statements in the included config files.
// Ensure that the same config file isn't included twice, by keeping track of the config
// files that have already been resolved in the resolvedPaths vector.
resolvedConfigFiles.push_back(filePath);
newConfigString += ResolveIncludeStatements(
ReadConfigFile(filePath),
resolvedConfigFiles
);
}
else
{
// We already resolved this path. Write a warning so that user is aware of this.
// TODO: This message is written to stderr before stderr gets redirected to the specified file. Fix this.
fprintf(stderr, "Warning: Config file included multiple times. Not including config file again: %s", filePath.c_str());
}
}
}
else
{
newConfigString += (line + "\n");
}
}
return newConfigString;
}
// LoadConfigFiles - load multiple configuration file, and adds to config parameters
// filePaths - A "+" delimited list of file paths, corresponding to config files to load
// configStringToAppend - A config string which should be processed together with the config files
void ConfigParser::LoadConfigFiles(const std::wstring &filePaths, const std::string *configStringToAppend)
{
std::string configString = ReadConfigFiles(filePaths);
if(configStringToAppend != nullptr)
{
configString += *configStringToAppend;
}
FileParse(configString);
}
// LoadConfigFileAndResolveVariables - load a configuration file, and add to config parameters.
// If the config file contains references to variables, which are defined in the 'config' ConfigParameters,
// then this method will resolve those variables. This method is meant for the processing of NDL/MEL config files,
// in order to allow them to access variables defined in the primary config file via $varName$ syntax.
// filePath - filePath to the file to load
// config - These ConfigParameters are used in order to resolve the $varName$ instances in the config file.
void ConfigParser::LoadConfigFileAndResolveVariables(const std::wstring &filePath, const ConfigParameters& config)
{
// read file, resolve variables, and then parse.
std::string fileContents = ReadConfigFile(filePath);
fileContents = config.ResolveVariables(fileContents);
FileParse(fileContents);
}
// LoadConfigFile - load a configuration file, and add to config parameters
// filePath - filePath to the file to read
void ConfigParser::LoadConfigFile(const std::wstring &filePath)
{
// read and then parse
FileParse(ReadConfigFile(filePath));
}
// Same as "ReadConfigFiles" function below, but takes as input string instead of wstring
std::string ConfigParser::ReadConfigFiles(const std::string &filePaths)
{
return ReadConfigFiles(msra::strfun::utf16(filePaths));
}
// ReadConfigFiles - reads multiple config files, concatenates the content from each file, and returns a string
// filePaths - A "+" delimited list of file paths, corresponding to config files to read
// returns: a string with the concatentated file contents
std::string ConfigParser::ReadConfigFiles(const std::wstring &filePaths)
{
std::string configString;
std::vector<std::wstring> filePathVec = msra::strfun::split (filePaths, L"+");
for (auto filePath : filePathVec)
{
configString += ReadConfigFile(filePath);
}
return configString;
}
// Same as "ReadConfigFile" function below, but takes as input string instead of wstring
std::string ConfigParser::ReadConfigFile(const std::string &filePath)
{
return ReadConfigFile(msra::strfun::utf16(filePath));
}
// ReadConfigFile - read a configuration file, and return as a string
// filePath - the path to the config file to read
// returns: a string with the concatentated file contents
std::string ConfigParser::ReadConfigFile(const std::wstring &filePath)
{
File file(filePath, fileOptionsRead);
// initialize with file name
std::string path = msra::strfun::utf8(filePath);
auto location = path.find_last_of("/\\");
if (location != npos)
path = path.substr(location+1);
m_configName = move(path);
// read the entire file into a string
// CONSIDER: should the File API support this, instead of line by line?
size_t fileLength = file.Size();
string str;
string configFile;
configFile.reserve(fileLength);
while (!file.IsEOF())
{
file.GetLine(str);
str = PreprocessConfigLine(str);
if (str != "")
{
configFile.append(str);
configFile.append("\n");
}
}
return configFile;
}
// GetFileConfigNames - determine the names of the features and labels sections in the config file
// features - [in,out] a vector of feature name strings
// labels - [in,out] a vector of label name strings
void GetFileConfigNames(const ConfigParameters& readerConfig, std::vector<std::wstring>& features, std::vector<std::wstring>& labels)
{
for (auto iter = readerConfig.begin(); iter != readerConfig.end(); ++iter)
{
auto pair = *iter;
ConfigParameters temp (iter->second);
// see if we have a config parameters that contains a "dim" element, it's a sub key, use it
if (temp.ExistsCurrent("dim"))
{
if (temp.ExistsCurrent("labelMappingFile")
|| temp.ExistsCurrent("labelDim")
|| temp.ExistsCurrent("labelType")
|| (temp.ExistsCurrent("sectionType") && temp("sectionType") == "labels"))
{
labels.push_back(msra::strfun::utf16(iter->first));
}
else
{
features.push_back(msra::strfun::utf16(iter->first));
}
}
}
}
// FindConfigNames - determine the names of the heirarchy of sections in the config file that contain a particular key
// config - configuration to search
// key - string we ar searching for in each config section
// names - [in,out] a vector of section names in "path" format (i.e. base\subsection)
void FindConfigNames(const ConfigParameters& config, std::string key, std::vector<std::wstring>& names)
{
for (auto iter = config.begin(); iter != config.end(); ++iter)
{
auto pair = *iter;
ConfigParameters temp (iter->second);
// see if we have a config parameters that contains a "key" element, if so use it
if (temp.ExistsCurrent(key))
{
names.push_back(msra::strfun::utf16(iter->first));
}
}
}
// Trim - trim white space off the start and end of the string
// str - string to trim
// NOTE: if the entire string is empty, then the string will be set to an empty string
void Trim(std::string& str)
{
auto found = str.find_first_not_of(" \t");
if (found == npos)
{
str.erase(0);
return;
}
str.erase(0, found);
found = str.find_last_not_of(" \t");
if (found != npos)
str.erase(found+1);
}
//
// <copyright file="ConfigFile.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// ConfigFile.cpp : Defines the configuration file loader.
//
#ifndef _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
#endif
#include "File.h"
#include "commandArgUtil.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// ParseCommandLine - parse the command line parameters
// argc - count of arguments
// argv - array of argument parameters
// config - config to return
std::string ConfigParameters::ParseCommandLine(int argc, wchar_t* argv[], ConfigParameters& config)
{
config.SetName(std::string("global"));
// This vector keeps track of the config files we have already read
std::vector<std::string> resolvedConfigFiles;
std::string configString;
// start at 1, because 0 is the name of the EXE
for (int i=1; i < argc; ++i)
{
wstring str = argv[i];
// see if they are loading a config file
wstring configDescriptor = L"configFile=";
int compare = _wcsnicmp(configDescriptor.c_str(), str.c_str(), configDescriptor.length());
// no config file, parse as regular argument
if (compare)
{
configString += (msra::strfun::utf8(str) + "\n");
}
else // One or more config file paths specified in a "+"-separated list.
{
const std::string filePaths = msra::strfun::utf8(str.substr(configDescriptor.length()));
std::vector<std::string> filePathsVec = msra::strfun::split(filePaths, "+");
for (auto filePath : filePathsVec)
{
if (std::find(resolvedConfigFiles.begin(), resolvedConfigFiles.end(), filePath) == resolvedConfigFiles.end())
{
// if haven't already read this file, read it
resolvedConfigFiles.push_back(filePath);
configString += config.ReadConfigFile(filePath);
}
else
RuntimeError("Cannot specify same config file multiple times at the command line.");
}
}
}
configString = config.ResolveIncludeStatements(configString, resolvedConfigFiles);
config.FileParse(configString);
return configString;
}
// ResolveIncludeStatements - this function takes a config string, and looks for all lines of the
// form "include=configPaths", where 'configPaths' is a "+" separated list of paths to config files.
// If it encounters one of these lines, it reads the config files listed in 'configPaths' (in the specified order),
// and includes the body of each file in the string which is eventually returned by this function. If the included
// config file includes other config files, this function will recursively include those files as well.
// configString - the config string within which to look for "include" statements
// resolvedConfigFiles - the paths to all the config files that have already been resolved. This vector is used to prevent include loops,
// and to prevent files from being included multiple times.
// returns: The config string, with all the "include" statements replaced with the bodies of the specified config files.
std::string ConfigParser::ResolveIncludeStatements(const std::string &configString, std::vector<std::string> &resolvedConfigFiles)
{
std::vector<std::string> lines = msra::strfun::split(configString, "\n");
std::string includeKeyword = "include=";
std::size_t includeKeywordSize = includeKeyword.size();
std::string newConfigString;
for (std::string line : lines)
{
if (line.compare(0, includeKeywordSize, includeKeyword) == 0)
{
std::string filePaths = line.substr(includeKeywordSize, line.size() - includeKeywordSize);
if (filePaths.find(openBraceVar) != std::string::npos)
{
RuntimeError("Variable usage (eg, \"$varName$\") not supported in \"include\" statements. Explicit path to config file must be provided");
}
std::vector<std::string> filePathVec = msra::strfun::split (filePaths, "+");
for (auto filePath : filePathVec)
{
// if file hasn't already been resolved (the resolvedPaths vector doesn't contain it), resolve it.
if (std::find(resolvedConfigFiles.begin(), resolvedConfigFiles.end(), filePath) == resolvedConfigFiles.end())
{
// Recursively resolve the include statements in the included config files.
// Ensure that the same config file isn't included twice, by keeping track of the config
// files that have already been resolved in the resolvedPaths vector.
resolvedConfigFiles.push_back(filePath);
newConfigString += ResolveIncludeStatements(
ReadConfigFile(filePath),
resolvedConfigFiles
);
}
else
{
// We already resolved this path. Write a warning so that user is aware of this.
// TODO: This message is written to stderr before stderr gets redirected to the specified file. Fix this.
fprintf(stderr, "Warning: Config file included multiple times. Not including config file again: %s", filePath.c_str());
}
}
}
else
{
newConfigString += (line + "\n");
}
}
return newConfigString;
}
// LoadConfigFiles - load multiple configuration file, and adds to config parameters
// filePaths - A "+" delimited list of file paths, corresponding to config files to load
// configStringToAppend - A config string which should be processed together with the config files
void ConfigParser::LoadConfigFiles(const std::wstring &filePaths, const std::string *configStringToAppend)
{
std::string configString = ReadConfigFiles(filePaths);
if(configStringToAppend != nullptr)
{
configString += *configStringToAppend;
}
FileParse(configString);
}
// LoadConfigFileAndResolveVariables - load a configuration file, and add to config parameters.
// If the config file contains references to variables, which are defined in the 'config' ConfigParameters,
// then this method will resolve those variables. This method is meant for the processing of NDL/MEL config files,
// in order to allow them to access variables defined in the primary config file via $varName$ syntax.
// filePath - filePath to the file to load
// config - These ConfigParameters are used in order to resolve the $varName$ instances in the config file.
void ConfigParser::LoadConfigFileAndResolveVariables(const std::wstring &filePath, const ConfigParameters& config)
{
// read file, resolve variables, and then parse.
std::string fileContents = ReadConfigFile(filePath);
fileContents = config.ResolveVariables(fileContents);
FileParse(fileContents);
}
// LoadConfigFile - load a configuration file, and add to config parameters
// filePath - filePath to the file to read
void ConfigParser::LoadConfigFile(const std::wstring &filePath)
{
// read and then parse
FileParse(ReadConfigFile(filePath));
}
// Same as "ReadConfigFiles" function below, but takes as input string instead of wstring
std::string ConfigParser::ReadConfigFiles(const std::string &filePaths)
{
return ReadConfigFiles(msra::strfun::utf16(filePaths));
}
// ReadConfigFiles - reads multiple config files, concatenates the content from each file, and returns a string
// filePaths - A "+" delimited list of file paths, corresponding to config files to read
// returns: a string with the concatentated file contents
std::string ConfigParser::ReadConfigFiles(const std::wstring &filePaths)
{
std::string configString;
std::vector<std::wstring> filePathVec = msra::strfun::split (filePaths, L"+");
for (auto filePath : filePathVec)
{
configString += ReadConfigFile(filePath);
}
return configString;
}
// Same as "ReadConfigFile" function below, but takes as input string instead of wstring
std::string ConfigParser::ReadConfigFile(const std::string &filePath)
{
return ReadConfigFile(msra::strfun::utf16(filePath));
}
// ReadConfigFile - read a configuration file, and return as a string
// filePath - the path to the config file to read
// returns: a string with the concatentated file contents
std::string ConfigParser::ReadConfigFile(const std::wstring &filePath)
{
File file(filePath, fileOptionsRead);
// initialize with file name
std::string path = msra::strfun::utf8(filePath);
auto location = path.find_last_of("/\\");
if (location != npos)
path = path.substr(location+1);
m_configName = move(path);
// read the entire file into a string
// CONSIDER: should the File API support this, instead of line by line?
size_t fileLength = file.Size();
string str;
string configFile;
configFile.reserve(fileLength);
while (!file.IsEOF())
{
file.GetLine(str);
str = PreprocessConfigLine(str);
if (str != "")
{
configFile.append(str);
configFile.append("\n");
}
}
return configFile;
}
// GetFileConfigNames - determine the names of the features and labels sections in the config file
// features - [in,out] a vector of feature name strings
// labels - [in,out] a vector of label name strings
void GetFileConfigNames(const ConfigParameters& readerConfig, std::vector<std::wstring>& features, std::vector<std::wstring>& labels)
{
for (auto iter = readerConfig.begin(); iter != readerConfig.end(); ++iter)
{
auto pair = *iter;
ConfigParameters temp (iter->second);
// see if we have a config parameters that contains a "dim" element, it's a sub key, use it
if (temp.ExistsCurrent("dim"))
{
if (temp.ExistsCurrent("labelMappingFile")
|| temp.ExistsCurrent("labelDim")
|| temp.ExistsCurrent("labelType")
|| (temp.ExistsCurrent("sectionType") && temp("sectionType") == "labels"))
{
labels.push_back(msra::strfun::utf16(iter->first));
}
else
{
features.push_back(msra::strfun::utf16(iter->first));
}
}
}
}
// FindConfigNames - determine the names of the heirarchy of sections in the config file that contain a particular key
// config - configuration to search
// key - string we ar searching for in each config section
// names - [in,out] a vector of section names in "path" format (i.e. base\subsection)
void FindConfigNames(const ConfigParameters& config, std::string key, std::vector<std::wstring>& names)
{
for (auto iter = config.begin(); iter != config.end(); ++iter)
{
auto pair = *iter;
ConfigParameters temp (iter->second);
// see if we have a config parameters that contains a "key" element, if so use it
if (temp.ExistsCurrent(key))
{
names.push_back(msra::strfun::utf16(iter->first));
}
}
}
// Trim - trim white space off the start and end of the string
// str - string to trim
// NOTE: if the entire string is empty, then the string will be set to an empty string
void Trim(std::string& str)
{
auto found = str.find_first_not_of(" \t");
if (found == npos)
{
str.erase(0);
return;
}
str.erase(0, found);
found = str.find_last_not_of(" \t");
if (found != npos)
str.erase(found+1);
}
}}}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,13 @@
#pragma once
#define MS_PER_SEC 1000
namespace Microsoft{namespace MSR {namespace CNTK {
class Timer
{
public:
Timer(){};
~Timer(){};
static unsigned long long MilliSecondElapsed();
};
}}}

39
Common/TimerUtility.cpp Normal file
Просмотреть файл

@ -0,0 +1,39 @@
#include "TimerUtility.h"
#ifdef WIN32
#include <Windows.h>
#else
#include <time.h>
#endif
namespace Microsoft{
namespace MSR {
namespace CNTK {
//Returns the amount of milliseconds elapsed
unsigned long long Timer::MilliSecondElapsed()
{
#ifdef WIN32
FILETIME ft;
LARGE_INTEGER li;
GetSystemTimeAsFileTime(&ft); //ideally we should use GetSystemTimePreciseAsFileTime. But it's only avaiable with Win8+ and Win Server 2012+
li.LowPart = ft.dwLowDateTime;
li.HighPart = ft.dwHighDateTime;
unsigned long long ret = li.QuadPart;
ret -= 116444736000000000LL; // Make the values consistent with Linux.
ret /= 10000; // From 100 nano seconds (10^-7) to 1 millisecond (10^-3)
return ret;
#else
timespec ts;
clock_gettime(CLOCK_REALTIME, &ts); // Works on Linux
UINT64 ret = ts.tv_sec * 1000 + ts.tv_nsec/1000000;
return ret;
#endif
}
}
}
}

Просмотреть файл

@ -4,7 +4,10 @@
// </copyright>
//
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
#ifndef _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
#endif
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
#pragma warning (disable: 4996) // ^^ this does not seem to work--TODO: make it work
#define _FILE_OFFSET_BITS 64 // to force fseeko() and ftello() 64 bit in Linux

Просмотреть файл

@ -49,17 +49,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_truncated = readerConfig("Truncated", "false");
m_convertLabelsToTargets = false;
m_numberOfuttsPerMinibatch = readerConfig("nbruttsineachrecurrentiter", "1");
ConfigArray numberOfuttsPerMinibatchForAllEpochs = readerConfig("nbruttsineachrecurrentiter", "1");
m_numberOfuttsPerMinibatchForAllEpochs = numberOfuttsPerMinibatchForAllEpochs;
if (m_numberOfuttsPerMinibatch < 1)
for (int i = 0; i < m_numberOfuttsPerMinibatchForAllEpochs.size(); i++)
{
LogicError("nbrUttsInEachRecurrentIter cannot be less than 1.");
m_numberOfuttsPerMinibatch = m_numberOfuttsPerMinibatchForAllEpochs[i];
if (m_numberOfuttsPerMinibatch < 1)
{
LogicError("nbrUttsInEachRecurrentIter cannot be less than 1.");
}
if (!m_truncated && m_numberOfuttsPerMinibatch != 1)
{
LogicError("nbrUttsInEachRecurrentIter has to be 1 if Truncated is set to false.");
}
}
if (!m_truncated && m_numberOfuttsPerMinibatch != 1)
{
LogicError("nbrUttsInEachRecurrentIter has to be 1 if Truncated is set to false.");
}
m_numberOfuttsPerMinibatch = m_numberOfuttsPerMinibatchForAllEpochs[0];
m_actualnumberOfuttsPerMinibatch = m_numberOfuttsPerMinibatch;
m_sentenceEnd.assign(m_numberOfuttsPerMinibatch, true);
@ -264,6 +271,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// get the read method, defaults to "blockRandomize" other option is "rollingWindow"
std::string readMethod(readerConfig("readMethod","blockRandomize"));
if (readMethod == "blockRandomize" && randomize == randomizeNone)
{
fprintf(stderr, "WARNING: Randomize cannot be set to None when readMethod is set to blockRandomize. Change it Auto");
randomize = randomizeAuto;
}
// see if they want to use readAhead
m_readAhead = readerConfig("readAhead", "false");
@ -352,6 +365,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// now get the frame source. This has better randomization and doesn't create temp files
m_frameSource = new msra::dbn::minibatchutterancesourcemulti(infilesmulti, labelsmulti, m_featDims, m_labelDims, numContextLeft, numContextRight, randomize, *m_lattices, m_latticeMap, framemode);
m_frameSource->setverbosity(verbosity);
//m_frameSource = new msra::dbn::minibatchutterancesource(infilesmulti[0], labelsmulti[0], m_featDims[0], m_labelDims[0], numContextLeft[0], numContextRight[0], randomize, *m_lattices, m_latticeMap, framemode);
}
@ -562,6 +576,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
m_mbSize = mbSize;
m_numberOfuttsPerMinibatch = m_numberOfuttsPerMinibatchForAllEpochs[epoch];
m_actualnumberOfuttsPerMinibatch = m_numberOfuttsPerMinibatch;
m_sentenceEnd.assign(m_numberOfuttsPerMinibatch, true);
m_processedFrame.assign(m_numberOfuttsPerMinibatch, 0);
m_toProcess.assign(m_numberOfuttsPerMinibatch, 0);
m_switchFrame.assign(m_numberOfuttsPerMinibatch, 0);
if (m_trainOrTest)
{
StartMinibatchLoopToTrainOrTest(mbSize,epoch,requestedEpochSamples);

Просмотреть файл

@ -1,3 +1,4 @@
<<<<<<< HEAD
//
// <copyright file="HTKMLFReader.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
@ -111,4 +112,117 @@ public:
void SetSentenceEnd(int /*actualMbSize*/){};
};
=======
//
// <copyright file="HTKMLFReader.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// HTKMLFReader.h - Include file for the MTK and MLF format of features and samples
#pragma once
#include "DataReader.h"
#include "commandArgUtil.h"
namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
class HTKMLFReader : public IDataReader<ElemType>
{
private:
msra::dbn::minibatchiterator* m_mbiter;
msra::dbn::minibatchsource* m_frameSource;
msra::dbn::minibatchreadaheadsource* m_readAheadSource;
msra::dbn::FileEvalSource* m_fileEvalSource;
msra::dbn::latticesource* m_lattices;
map<wstring,msra::lattices::lattice::htkmlfwordsequence> m_latticeMap;
vector<bool> m_sentenceEnd;
bool m_readAhead;
bool m_truncated;
vector<size_t> m_processedFrame;
intargvector m_numberOfuttsPerMinibatchForAllEpochs;
size_t m_numberOfuttsPerMinibatch;
size_t m_actualnumberOfuttsPerMinibatch;
size_t m_mbSize;
vector<size_t> m_toProcess;
vector<size_t> m_switchFrame;
bool m_noData;
bool m_trainOrTest; // if false, in file writing mode
std::map<LabelIdType, LabelType> m_idToLabelMap;
bool m_partialMinibatch; // allow partial minibatches?
std::vector<ElemType*> m_featuresBufferMultiUtt;
std::vector<size_t> m_featuresBufferAllocatedMultiUtt;
std::vector<ElemType*> m_labelsBufferMultiUtt;
std::vector<size_t> m_labelsBufferAllocatedMultiUtt;
std::vector<size_t> m_featuresStartIndexMultiUtt;
std::vector<size_t> m_labelsStartIndexMultiUtt;
std::vector<ElemType*> m_featuresBufferMultiIO;
std::vector<size_t> m_featuresBufferAllocatedMultiIO;
std::vector<ElemType*> m_labelsBufferMultiIO;
std::vector<size_t> m_labelsBufferAllocatedMultiIO;
std::map<std::wstring,size_t> m_featureNameToIdMap;
std::map<std::wstring,size_t> m_labelNameToIdMap;
std::map<std::wstring,size_t> m_nameToTypeMap;
std::map<std::wstring,size_t> m_featureNameToDimMap;
std::map<std::wstring,size_t> m_labelNameToDimMap;
// for writing outputs to files (standard single input/output network) - deprecate eventually
bool m_checkDictionaryKeys;
bool m_convertLabelsToTargets;
std::vector <bool> m_convertLabelsToTargetsMultiIO;
std::vector<std::vector<std::wstring>> m_inputFilesMultiIO;
size_t m_inputFileIndex;
std::vector<size_t> m_featDims;
std::vector<size_t> m_labelDims;
std::vector<std::vector<std::vector<ElemType>>>m_labelToTargetMapMultiIO;
void PrepareForTrainingOrTesting(const ConfigParameters& config);
void PrepareForWriting(const ConfigParameters& config);
bool GetMinibatchToTrainOrTest(std::map<std::wstring, Matrix<ElemType>*>&matrices);
bool GetMinibatchToWrite(std::map<std::wstring, Matrix<ElemType>*>&matrices);
void StartMinibatchLoopToTrainOrTest(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
void StartMinibatchLoopToWrite(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
bool ReNewBufferForMultiIO(size_t i);
size_t NumberSlicesInEachRecurrentIter() { return m_numberOfuttsPerMinibatch ;}
void SetNbrSlicesEachRecurrentIter(const size_t) { };
void GetDataNamesFromConfig(const ConfigParameters& readerConfig, std::vector<std::wstring>& features, std::vector<std::wstring>& labels);
size_t ReadLabelToTargetMappingFile (const std::wstring& labelToTargetMappingFile, const std::wstring& labelListFile, std::vector<std::vector<ElemType>>& labelToTargetMap);
enum InputOutputTypes
{
real,
category,
};
public:
virtual void Init(const ConfigParameters& config);
virtual void Destroy() {delete this;}
virtual ~HTKMLFReader();
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
virtual bool GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices);
virtual const std::map<LabelIdType, LabelType>& GetLabelMapping(const std::wstring& sectionName);
virtual void SetLabelMapping(const std::wstring& sectionName, const std::map<unsigned, LabelType>& labelMapping);
virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart=0);
virtual bool DataEnd(EndDataType endDataType);
void SetSentenceEndInBatch(vector<size_t> &/*sentenceEnd*/);
void SetSentenceEnd(int /*actualMbSize*/){};
};
>>>>>>> bd4866bec82772b2e984f7e897b1e64cd0855d7d
}}}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -768,6 +768,7 @@ private:
if (chunkdata.isinram())
return false;
if (verbosity)
fprintf (stderr, "requirerandomizedchunk: paging in randomized chunk %d (frame range [%d..%d]), %d resident in RAM\n", chunkindex, chunk.globalts, chunk.globalte()-1, chunksinram+1);
msra::util::attempt (5, [&]() // (reading from network)
{
@ -858,6 +859,7 @@ public:
transcripts.clear();
// return these utterances
if (verbosity > 0)
fprintf (stderr, "getbatch: getting utterances %d..%d (%d frames out of %d requested) in sweep %d\n", spos, epos -1, mbframes, framesrequested, sweep);
size_t tspos = 0; // relative start of utterance 'pos' within the returned minibatch
for (size_t pos = spos; pos < epos; pos++)
@ -922,6 +924,7 @@ public:
const size_t lastchunk = chunkforframepos (globalte-1);
const size_t windowbegin = randomizedchunks[firstchunk].windowbegin;
const size_t windowend = randomizedchunks[lastchunk].windowend;
if (verbosity > 0)
fprintf (stderr, "getbatch: getting randomized frames [%d..%d] (%d frames out of %d requested) in sweep %d; chunks [%d..%d] -> chunk window [%d..%d)\n",
globalts, globalte, mbframes, framesrequested, sweep, firstchunk, lastchunk, windowbegin, windowend);
// release all data outside, and page in all data inside

Просмотреть файл

@ -102,7 +102,7 @@ class minibatchutterancesourcemulti : public minibatchsource
bool isinram() const { return !frames.empty(); }
// page in data for this chunk
// We pass in the feature info variables by ref which will be filled lazily upon first read
void requiredata (string & featkind, size_t & featdim, unsigned int & sampperiod, const latticesource & latticesource) const
void requiredata (string & featkind, size_t & featdim, unsigned int & sampperiod, const latticesource & latticesource, int verbosity=0) const
{
if (numutterances() == 0)
throw std::logic_error ("requiredata: cannot page in virgin block");
@ -132,6 +132,7 @@ class minibatchutterancesourcemulti : public minibatchsource
latticesource.getlattices (utteranceset[i].key(), lattices[i], uttframes.cols());
}
//fprintf (stderr, "\n");
if (verbosity)
fprintf (stderr, "requiredata: %d utterances read\n", utteranceset.size());
}
catch (...)
@ -568,6 +569,7 @@ private:
return sweep;
currentsweep = sweep;
if (verbosity>0)
fprintf (stderr, "lazyrandomization: re-randomizing for sweep %d in %s mode\n", currentsweep, framemode ? "frame" : "utterance");
const size_t sweepts = sweep * _totalframes; // first global frame index for this sweep
@ -919,10 +921,11 @@ private:
{
auto & chunk = randomizedchunks[m][chunkindex];
auto & chunkdata = chunk.getchunkdata();
if (verbosity)
fprintf (stderr, "feature set %d: requirerandomizedchunk: paging in randomized chunk %d (frame range [%d..%d]), %d resident in RAM\n", m, chunkindex, chunk.globalts, chunk.globalte()-1, chunksinram+1);
msra::util::attempt (5, [&]() // (reading from network)
{
chunkdata.requiredata (featkind[m], featdim[m], sampperiod[m], this->lattices);
chunkdata.requiredata (featkind[m], featdim[m], sampperiod[m], this->lattices, verbosity);
});
}
chunksinram++;
@ -1029,7 +1032,8 @@ public:
}
}
// return these utterances
fprintf (stderr, "getbatch: getting utterances %d..%d (%d frames out of %d requested) in sweep %d\n", spos, epos -1, mbframes, framesrequested, sweep);
if (verbosity > 0)
fprintf (stderr, "getbatch: getting utterances %d..%d (%d frames out of %d requested) in sweep %d\n", spos, epos -1, mbframes, framesrequested, sweep);
size_t tspos = 0; // relative start of utterance 'pos' within the returned minibatch
for (size_t pos = spos; pos < epos; pos++)
{
@ -1107,6 +1111,7 @@ public:
const size_t lastchunk = chunkforframepos (globalte-1);
const size_t windowbegin = randomizedchunks[0][firstchunk].windowbegin;
const size_t windowend = randomizedchunks[0][lastchunk].windowend;
if (verbosity)
fprintf (stderr, "getbatch: getting randomized frames [%d..%d] (%d frames out of %d requested) in sweep %d; chunks [%d..%d] -> chunk window [%d..%d)\n",
globalts, globalte, mbframes, framesrequested, sweep, firstchunk, lastchunk, windowbegin, windowend);
// release all data outside, and page in all data inside
@ -1230,3 +1235,4 @@ public:
};
};};

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -60,7 +60,7 @@
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>EVALDLL;WIN32;_DEBUG;_WINDOWS;_USRDLL;UCIREADER_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
@ -79,7 +79,7 @@
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level4</WarningLevel>
<PrecompiledHeader>Use</PrecompiledHeader>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
@ -107,6 +107,7 @@
<ClInclude Include="..\..\Common\Include\Eval.h" />
<ClInclude Include="..\..\Common\Include\File.h" />
<ClInclude Include="..\..\Common\Include\fileutil.h" />
<ClInclude Include="..\..\Common\Include\TimerUtility.h" />
<ClInclude Include="EvalReader.h" />
<ClInclude Include="EvalWriter.h" />
<ClInclude Include="stdafx.h" />
@ -127,6 +128,7 @@
<ClCompile Include="..\..\Common\fileutil.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\TimerUtility.cpp" />
<ClCompile Include="..\cn\ComputationNode.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>

Просмотреть файл

@ -1,50 +1,56 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="..\..\Common\BestGpu.cpp" />
<ClCompile Include="..\cn\ComputationNode.cpp" />
<ClCompile Include="..\cn\PTaskGraphBuilder.cpp" />
<ClCompile Include="dllmain.cpp" />
<ClCompile Include="stdafx.cpp" />
<ClCompile Include="CNTKEval.cpp" />
<ClCompile Include="..\..\Common\ConfigFile.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\..\Common\Eval.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\..\Common\fileutil.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\..\Common\File.cpp">
<Filter>Common</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="EvalReader.h" />
<ClInclude Include="EvalWriter.h" />
<ClInclude Include="stdafx.h" />
<ClInclude Include="targetver.h" />
<ClInclude Include="CNTKEval.h" />
<ClInclude Include="..\..\Common\Include\Eval.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\basetypes.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\File.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\fileutil.h">
<Filter>Common\Include</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="Common">
<UniqueIdentifier>{bed53b47-70b1-494c-824d-0748362003b2}</UniqueIdentifier>
</Filter>
<Filter Include="Common\Include">
<UniqueIdentifier>{f3bf0104-8a08-40c9-a4d9-af8411c49669}</UniqueIdentifier>
</Filter>
</ItemGroup>
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="..\..\Common\BestGpu.cpp" />
<ClCompile Include="..\cn\ComputationNode.cpp" />
<ClCompile Include="..\cn\PTaskGraphBuilder.cpp" />
<ClCompile Include="dllmain.cpp" />
<ClCompile Include="stdafx.cpp" />
<ClCompile Include="CNTKEval.cpp" />
<ClCompile Include="..\..\Common\ConfigFile.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\..\Common\Eval.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\..\Common\fileutil.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\..\Common\File.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\..\Common\TimerUtility.cpp">
<Filter>Common</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="EvalReader.h" />
<ClInclude Include="EvalWriter.h" />
<ClInclude Include="stdafx.h" />
<ClInclude Include="targetver.h" />
<ClInclude Include="CNTKEval.h" />
<ClInclude Include="..\..\Common\Include\Eval.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\basetypes.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\File.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\fileutil.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\TimerUtility.h">
<Filter>Common\Include</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="Common">
<UniqueIdentifier>{bed53b47-70b1-494c-824d-0748362003b2}</UniqueIdentifier>
</Filter>
<Filter Include="Common\Include">
<UniqueIdentifier>{f3bf0104-8a08-40c9-a4d9-af8411c49669}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>

Просмотреть файл

@ -3111,6 +3111,9 @@ protected: \
inputGradientValues.Print("child Gradient-in/out");
inputFunctionValues.Print("child Function values");
#endif
//currently we only support one combination when the input is sparse.
if (inputFunctionValues.GetMatrixType() == SPARSE && inputGradientValues.GetMatrixType() == DENSE && gradientValues.GetMatrixType() == DENSE)
inputGradientValues.SwitchToMatrixType(SPARSE, MatrixFormat::matrixFormatSparseBlockCol);
Matrix<ElemType>::MultiplyAndAdd(gradientValues, false, inputFunctionValues, true, inputGradientValues);
#if DUMPOUTPUT

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,350 +1,349 @@
//
// <copyright file="SimpleEvaluator.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
#include "ComputationNetwork.h"
#include "ComputationNetworkHelper.h"
#include "DataReader.h"
#include <vector>
#include <string>
#include <stdexcept>
#include "basetypes.h"
#include "fileutil.h"
#include "commandArgUtil.h"
#include <fstream>
using namespace std;
namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
class SimpleEvaluator : ComputationNetworkHelper<ElemType>
{
typedef ComputationNetworkHelper<ElemType> B;
using B::UpdateEvalTimeStamps;
protected:
typedef ComputationNode<ElemType>* ComputationNodePtr;
typedef ClassBasedCrossEntropyWithSoftmaxNode<ElemType>* ClassBasedCrossEntropyWithSoftmaxNodePtr;
public:
SimpleEvaluator(ComputationNetwork<ElemType>& net, const size_t numMBsToShowResult=100, const int traceLevel=0)
: m_net(net), m_numMBsToShowResult(numMBsToShowResult), m_traceLevel(traceLevel)
{
}
//returns evaluation node values per sample determined by evalNodeNames (which can include both training and eval criterion nodes)
vector<ElemType> Evaluate(IDataReader<ElemType>& dataReader, const vector<wstring>& evalNodeNames, const size_t mbSize, const size_t testSize=requestDataSize)
{
//specify evaluation nodes
std::vector<ComputationNodePtr> evalNodes;
if (evalNodeNames.size() == 0)
{
fprintf (stderr, "evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.\n");
if (m_net.EvaluationNodes().size() == 0 && m_net.FinalCriterionNodes().size() == 0)
throw std::logic_error("There is no default evalnodes or training criterion node specified in the network.");
for (int i=0; i< m_net.EvaluationNodes().size(); i++)
evalNodes.push_back(m_net.EvaluationNodes()[i]);
for (int i=0; i< m_net.FinalCriterionNodes().size(); i++)
evalNodes.push_back(m_net.FinalCriterionNodes()[i]);
}
else
{
for (int i=0; i<evalNodeNames.size(); i++)
{
ComputationNodePtr node = m_net.GetNodeFromName(evalNodeNames[i]);
m_net.BuildAndValidateNetwork(node);
if (!node->FunctionValues().GetNumElements() == 1)
{
throw std::logic_error("The nodes passed to SimpleEvaluator::Evaluate function must be either eval or training criterion nodes (which evalues to 1x1 value).");
}
evalNodes.push_back(node);
}
}
//initialize eval results
std::vector<ElemType> evalResults;
for (int i=0; i< evalNodes.size(); i++)
{
evalResults.push_back((ElemType)0);
evalNodes[i]->Reset();
}
//prepare features and labels
std::vector<ComputationNodePtr> & FeatureNodes = m_net.FeatureNodes();
std::vector<ComputationNodePtr> & labelNodes = m_net.LabelNodes();
std::map<std::wstring, Matrix<ElemType>*> inputMatrices;
for (size_t i=0; i<FeatureNodes.size(); i++)
{
inputMatrices[FeatureNodes[i]->NodeName()] = &FeatureNodes[i]->FunctionValues();
}
for (size_t i=0; i<labelNodes.size(); i++)
{
inputMatrices[labelNodes[i]->NodeName()] = &labelNodes[i]->FunctionValues();
}
//evaluate through minibatches
size_t totalEpochSamples = 0;
size_t numMBsRun = 0;
size_t actualMBSize = 0;
size_t numSamplesLastMBs = 0;
size_t lastMBsRun = 0; //MBs run before this display
std::vector<ElemType> evalResultsLastMBs;
for (int i=0; i< evalResults.size(); i++)
evalResultsLastMBs.push_back((ElemType)0);
dataReader.StartMinibatchLoop(mbSize, 0, testSize);
dataReader.SetNbrSlicesEachRecurrentIter(1);
for (int i=0; i<evalNodes.size(); i++)
{
if (evalNodes[i]->OperationName() == L"ClassBasedCrossEntropyWithSoftmax")
{
size_t vSz = FeatureNodes[0]->FunctionValues().GetNumRows();
if(inputMatrices.find(L"classinfo") == inputMatrices.end())
{
inputMatrices[L"idx2cls"] = new Matrix<ElemType>(vSz, 1, m_net.GetDeviceID());
inputMatrices[L"classinfo"] = new Matrix<ElemType>(vSz, 1, m_net.GetDeviceID());
}
ClassBasedCrossEntropyWithSoftmaxNodePtr crtNode = (ClassBasedCrossEntropyWithSoftmaxNodePtr) evalNodes[i];
crtNode->AddClassInfo(inputMatrices[L"classinfo"], inputMatrices[L"idx2cls"]);
}
}
while (dataReader.GetMinibatch(inputMatrices))
{
UpdateEvalTimeStamps(FeatureNodes);
UpdateEvalTimeStamps(labelNodes);
actualMBSize = m_net.GetActualMBSize();
m_net.SetActualMiniBatchSize(actualMBSize);
m_net.SetActualNbrSlicesInEachRecIter(dataReader.NumberSlicesInEachRecurrentIter());
dataReader.SetSentenceEndInBatch(m_net.m_sentenceEnd);
for (int i=0; i<evalNodes.size(); i++)
{
m_net.Evaluate(evalNodes[i]);
evalResults[i] += evalNodes[i]->FunctionValues().Get00Element(); //criterionNode should be a scalar
}
totalEpochSamples += actualMBSize;
numMBsRun++;
if (m_traceLevel > 0)
{
numSamplesLastMBs += actualMBSize;
if (numMBsRun % m_numMBsToShowResult == 0)
{
DisplayEvalStatistics(lastMBsRun+1, numMBsRun, numSamplesLastMBs, evalNodes, evalResults, evalResultsLastMBs);
for (int i=0; i<evalResults.size(); i++)
{
evalResultsLastMBs[i] = evalResults[i];
}
numSamplesLastMBs = 0;
lastMBsRun = numMBsRun;
}
}
/// call DataEnd to check if end of sentence is reached
/// datareader will do its necessary/specific process for sentence ending
dataReader.DataEnd(endDataSentence);
}
// show last batch of results
if (m_traceLevel > 0 && numSamplesLastMBs > 0)
{
DisplayEvalStatistics(lastMBsRun+1, numMBsRun, numSamplesLastMBs, evalNodes, evalResults, evalResultsLastMBs);
}
//final statistics
for (int i=0; i<evalResultsLastMBs.size(); i++)
{
evalResultsLastMBs[i] = 0;
}
fprintf(stderr,"Final Results: ");
DisplayEvalStatistics(1, numMBsRun, totalEpochSamples, evalNodes, evalResults, evalResultsLastMBs);
for (int i=0; i<evalResults.size(); i++)
{
evalResults[i] /= totalEpochSamples;
}
if (inputMatrices[L"classinfo"])
{
delete inputMatrices[L"classinfo"];
inputMatrices.erase(L"classinfo");
}
if (inputMatrices[L"idx2cls"])
{
delete inputMatrices[L"idx2cls"];
inputMatrices.erase(L"idx2cls");
}
return evalResults;
}
//returns error rate
ElemType EvaluateUnroll(IDataReader<ElemType>& dataReader, const size_t mbSize, ElemType &evalSetCrossEntropy, const wchar_t* output = nullptr, const size_t testSize = requestDataSize)
{
std::vector<ComputationNodePtr> FeatureNodes = m_net.FeatureNodes();
std::vector<ComputationNodePtr> labelNodes = m_net.LabelNodes();
std::vector<ComputationNodePtr> criterionNodes = m_net.FinalCriterionNodes();
std::vector<ComputationNodePtr> evaluationNodes = m_net.EvaluationNodes();
if (criterionNodes.size()==0)
{
throw std::runtime_error("No CrossEntropyWithSoftmax node found\n");
}
if (evaluationNodes.size()==0)
{
throw std::runtime_error("No Evaluation node found\n");
}
std::map<std::wstring, Matrix<ElemType>*> inputMatrices;
for (size_t i=0; i<FeatureNodes.size(); i++)
{
inputMatrices[FeatureNodes[i]->NodeName()] = &FeatureNodes[i]->FunctionValues();
}
for (size_t i=0; i<labelNodes.size(); i++)
{
inputMatrices[labelNodes[i]->NodeName()] = &labelNodes[i]->FunctionValues();
}
inputMatrices[L"numberobs"] = new Matrix<ElemType>(1,1, m_net.GetDeviceID());
dataReader.StartMinibatchLoop(mbSize, 0, testSize);
ElemType epochEvalError = 0;
ElemType epochCrossEntropy = 0;
size_t totalEpochSamples = 0;
ElemType prevEpochEvalError = 0;
ElemType prevEpochCrossEntropy = 0;
size_t prevTotalEpochSamples = 0;
size_t prevStart = 1;
size_t numSamples = 0;
ElemType crossEntropy = 0;
ElemType evalError = 0;
ofstream outputStream;
if (output)
{
#ifdef _MSC_VER
outputStream.open(output);
#else
outputStream.open(charpath(output)); // GCC does not implement wide-char pathnames here
#endif
}
size_t numMBsRun = 0;
size_t actualMBSize = 0;
while (dataReader.GetMinibatch(inputMatrices))
{
size_t nbrSamples = (size_t)(*inputMatrices[L"numberobs"])(0, 0);
actualMBSize = nbrSamples;
for (int npos = 0; npos < nbrSamples ; npos++)
{
FeatureNodes[npos]->UpdateEvalTimeStamp();
labelNodes[npos]->UpdateEvalTimeStamp();
m_net.Evaluate(criterionNodes[npos]); //use only the first criterion. Is there any possibility to use more?
m_net.Evaluate(evaluationNodes[npos]);
ElemType mbCrossEntropy = criterionNodes[npos]->FunctionValues().Get00Element(); // criterionNode should be a scalar
epochCrossEntropy += mbCrossEntropy;
ElemType mbEvalError = evaluationNodes[npos]->FunctionValues().Get00Element(); //criterionNode should be a scalar
epochEvalError += mbEvalError;
}
totalEpochSamples += actualMBSize;
if (outputStream.is_open())
{
//TODO: add support to dump multiple outputs
ComputationNodePtr outputNode = m_net.OutputNodes()[0];
foreach_column(j, outputNode->FunctionValues())
{
foreach_row(i,outputNode->FunctionValues())
{
outputStream<<outputNode->FunctionValues()(i,j)<<" ";
}
outputStream<<endl;
}
}
numMBsRun++;
if (numMBsRun % m_numMBsToShowResult == 0)
{
numSamples = (totalEpochSamples - prevTotalEpochSamples);
crossEntropy = epochCrossEntropy - prevEpochCrossEntropy;
evalError = epochEvalError - prevEpochEvalError;
fprintf(stderr, "Minibatch[%lu-%lu]: Samples Evaluated = %lu EvalErr Per Sample = %.8g Loss Per Sample = %.8g\n",
prevStart, numMBsRun, numSamples, evalError / numSamples, crossEntropy / numSamples);
prevTotalEpochSamples = totalEpochSamples;
prevEpochCrossEntropy = epochCrossEntropy;
prevEpochEvalError = epochEvalError;
prevStart = numMBsRun + 1;
}
}
// show final grouping of output
numSamples = totalEpochSamples - prevTotalEpochSamples;
if (numSamples > 0)
{
crossEntropy = epochCrossEntropy - prevEpochCrossEntropy;
evalError = epochEvalError - prevEpochEvalError;
fprintf(stderr, "Minibatch[%lu-%lu]: Samples Evaluated = %lu EvalErr Per Sample = %.8g Loss Per Sample = %.8g\n",
prevStart, numMBsRun, numSamples, evalError / numSamples, crossEntropy / numSamples);
}
//final statistics
epochEvalError /= (ElemType)totalEpochSamples;
epochCrossEntropy /= (ElemType)totalEpochSamples;
fprintf(stderr, "Overall: Samples Evaluated = %lu EvalErr Per Sample = %.8g Loss Per Sample = %.8g\n", totalEpochSamples, epochEvalError, epochCrossEntropy);
if (outputStream.is_open())
{
outputStream.close();
}
evalSetCrossEntropy = epochCrossEntropy;
return epochEvalError;
}
protected:
void DisplayEvalStatistics(const size_t startMBNum, const size_t endMBNum, const size_t numSamplesLastMBs, const vector<ComputationNodePtr>& evalNodes,
const vector<ElemType> & evalResults, const vector<ElemType> & evalResultsLastMBs)
{
fprintf(stderr,"Minibatch[%lu-%lu]: Samples Seen = %lu ", startMBNum, endMBNum, numSamplesLastMBs);
for (size_t i=0; i<evalResults.size(); i++)
{
fprintf(stderr, "%ls/Sample = %.8g ", evalNodes[i]->NodeName().c_str(), (evalResults[i]-evalResultsLastMBs[i])/numSamplesLastMBs);
}
fprintf(stderr, "\n");
}
protected:
ComputationNetwork<ElemType>& m_net;
size_t m_numMBsToShowResult;
int m_traceLevel;
void operator=(const SimpleEvaluator&); // (not assignable)
};
}}}
//
// <copyright file="SimpleEvaluator.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
#include "ComputationNetwork.h"
#include "ComputationNetworkHelper.h"
#include "DataReader.h"
#include <vector>
#include <string>
#include <stdexcept>
#include "basetypes.h"
#include "fileutil.h"
#include "commandArgUtil.h"
#include <fstream>
using namespace std;
namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
class SimpleEvaluator : ComputationNetworkHelper<ElemType>
{
typedef ComputationNetworkHelper<ElemType> B;
using B::UpdateEvalTimeStamps;
protected:
typedef ComputationNode<ElemType>* ComputationNodePtr;
typedef ClassBasedCrossEntropyWithSoftmaxNode<ElemType>* ClassBasedCrossEntropyWithSoftmaxNodePtr;
public:
SimpleEvaluator(ComputationNetwork<ElemType>& net, const size_t numMBsToShowResult=100, const int traceLevel=0)
: m_net(net), m_numMBsToShowResult(numMBsToShowResult), m_traceLevel(traceLevel)
{
}
//returns evaluation node values per sample determined by evalNodeNames (which can include both training and eval criterion nodes)
vector<ElemType> Evaluate(IDataReader<ElemType>& dataReader, const vector<wstring>& evalNodeNames, const size_t mbSize, const size_t testSize=requestDataSize)
{
//specify evaluation nodes
std::vector<ComputationNodePtr> evalNodes;
if (evalNodeNames.size() == 0)
{
fprintf (stderr, "evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.\n");
if (m_net.EvaluationNodes().size() == 0 && m_net.FinalCriterionNodes().size() == 0)
throw std::logic_error("There is no default evalnodes or training criterion node specified in the network.");
for (int i=0; i< m_net.EvaluationNodes().size(); i++)
evalNodes.push_back(m_net.EvaluationNodes()[i]);
for (int i=0; i< m_net.FinalCriterionNodes().size(); i++)
evalNodes.push_back(m_net.FinalCriterionNodes()[i]);
}
else
{
for (int i=0; i<evalNodeNames.size(); i++)
{
ComputationNodePtr node = m_net.GetNodeFromName(evalNodeNames[i]);
m_net.BuildAndValidateNetwork(node);
if (!node->FunctionValues().GetNumElements() == 1)
{
throw std::logic_error("The nodes passed to SimpleEvaluator::Evaluate function must be either eval or training criterion nodes (which evalues to 1x1 value).");
}
evalNodes.push_back(node);
}
}
//initialize eval results
std::vector<ElemType> evalResults;
for (int i=0; i< evalNodes.size(); i++)
{
evalResults.push_back((ElemType)0);
evalNodes[i]->Reset();
}
//prepare features and labels
std::vector<ComputationNodePtr> & FeatureNodes = m_net.FeatureNodes();
std::vector<ComputationNodePtr> & labelNodes = m_net.LabelNodes();
std::map<std::wstring, Matrix<ElemType>*> inputMatrices;
for (size_t i=0; i<FeatureNodes.size(); i++)
{
inputMatrices[FeatureNodes[i]->NodeName()] = &FeatureNodes[i]->FunctionValues();
}
for (size_t i=0; i<labelNodes.size(); i++)
{
inputMatrices[labelNodes[i]->NodeName()] = &labelNodes[i]->FunctionValues();
}
//evaluate through minibatches
size_t totalEpochSamples = 0;
size_t numMBsRun = 0;
size_t actualMBSize = 0;
size_t numSamplesLastMBs = 0;
size_t lastMBsRun = 0; //MBs run before this display
std::vector<ElemType> evalResultsLastMBs;
for (int i=0; i< evalResults.size(); i++)
evalResultsLastMBs.push_back((ElemType)0);
dataReader.StartMinibatchLoop(mbSize, 0, testSize);
for (int i=0; i<evalNodes.size(); i++)
{
if (evalNodes[i]->OperationName() == L"ClassBasedCrossEntropyWithSoftmax")
{
size_t vSz = FeatureNodes[0]->FunctionValues().GetNumRows();
if(inputMatrices.find(L"classinfo") == inputMatrices.end())
{
inputMatrices[L"idx2cls"] = new Matrix<ElemType>(vSz, 1, m_net.GetDeviceID());
inputMatrices[L"classinfo"] = new Matrix<ElemType>(vSz, 1, m_net.GetDeviceID());
}
ClassBasedCrossEntropyWithSoftmaxNodePtr crtNode = (ClassBasedCrossEntropyWithSoftmaxNodePtr) evalNodes[i];
crtNode->AddClassInfo(inputMatrices[L"classinfo"], inputMatrices[L"idx2cls"]);
}
}
while (dataReader.GetMinibatch(inputMatrices))
{
UpdateEvalTimeStamps(FeatureNodes);
UpdateEvalTimeStamps(labelNodes);
actualMBSize = m_net.GetActualMBSize();
m_net.SetActualMiniBatchSize(actualMBSize);
m_net.SetActualNbrSlicesInEachRecIter(dataReader.NumberSlicesInEachRecurrentIter());
dataReader.SetSentenceEndInBatch(m_net.m_sentenceEnd);
for (int i=0; i<evalNodes.size(); i++)
{
m_net.Evaluate(evalNodes[i]);
evalResults[i] += evalNodes[i]->FunctionValues().Get00Element(); //criterionNode should be a scalar
}
totalEpochSamples += actualMBSize;
numMBsRun++;
if (m_traceLevel > 0)
{
numSamplesLastMBs += actualMBSize;
if (numMBsRun % m_numMBsToShowResult == 0)
{
DisplayEvalStatistics(lastMBsRun+1, numMBsRun, numSamplesLastMBs, evalNodes, evalResults, evalResultsLastMBs);
for (int i=0; i<evalResults.size(); i++)
{
evalResultsLastMBs[i] = evalResults[i];
}
numSamplesLastMBs = 0;
lastMBsRun = numMBsRun;
}
}
/// call DataEnd to check if end of sentence is reached
/// datareader will do its necessary/specific process for sentence ending
dataReader.DataEnd(endDataSentence);
}
// show last batch of results
if (m_traceLevel > 0 && numSamplesLastMBs > 0)
{
DisplayEvalStatistics(lastMBsRun+1, numMBsRun, numSamplesLastMBs, evalNodes, evalResults, evalResultsLastMBs);
}
//final statistics
for (int i=0; i<evalResultsLastMBs.size(); i++)
{
evalResultsLastMBs[i] = 0;
}
fprintf(stderr,"Final Results: ");
DisplayEvalStatistics(1, numMBsRun, totalEpochSamples, evalNodes, evalResults, evalResultsLastMBs);
for (int i=0; i<evalResults.size(); i++)
{
evalResults[i] /= totalEpochSamples;
}
if (inputMatrices[L"classinfo"])
{
delete inputMatrices[L"classinfo"];
inputMatrices.erase(L"classinfo");
}
if (inputMatrices[L"idx2cls"])
{
delete inputMatrices[L"idx2cls"];
inputMatrices.erase(L"idx2cls");
}
return evalResults;
}
//returns error rate
ElemType EvaluateUnroll(IDataReader<ElemType>& dataReader, const size_t mbSize, ElemType &evalSetCrossEntropy, const wchar_t* output = nullptr, const size_t testSize = requestDataSize)
{
std::vector<ComputationNodePtr> FeatureNodes = m_net.FeatureNodes();
std::vector<ComputationNodePtr> labelNodes = m_net.LabelNodes();
std::vector<ComputationNodePtr> criterionNodes = m_net.FinalCriterionNodes();
std::vector<ComputationNodePtr> evaluationNodes = m_net.EvaluationNodes();
if (criterionNodes.size()==0)
{
throw std::runtime_error("No CrossEntropyWithSoftmax node found\n");
}
if (evaluationNodes.size()==0)
{
throw std::runtime_error("No Evaluation node found\n");
}
std::map<std::wstring, Matrix<ElemType>*> inputMatrices;
for (size_t i=0; i<FeatureNodes.size(); i++)
{
inputMatrices[FeatureNodes[i]->NodeName()] = &FeatureNodes[i]->FunctionValues();
}
for (size_t i=0; i<labelNodes.size(); i++)
{
inputMatrices[labelNodes[i]->NodeName()] = &labelNodes[i]->FunctionValues();
}
inputMatrices[L"numberobs"] = new Matrix<ElemType>(1,1, m_net.GetDeviceID());
dataReader.StartMinibatchLoop(mbSize, 0, testSize);
ElemType epochEvalError = 0;
ElemType epochCrossEntropy = 0;
size_t totalEpochSamples = 0;
ElemType prevEpochEvalError = 0;
ElemType prevEpochCrossEntropy = 0;
size_t prevTotalEpochSamples = 0;
size_t prevStart = 1;
size_t numSamples = 0;
ElemType crossEntropy = 0;
ElemType evalError = 0;
ofstream outputStream;
if (output)
{
#ifdef _MSC_VER
outputStream.open(output);
#else
outputStream.open(charpath(output)); // GCC does not implement wide-char pathnames here
#endif
}
size_t numMBsRun = 0;
size_t actualMBSize = 0;
while (dataReader.GetMinibatch(inputMatrices))
{
size_t nbrSamples = (size_t)(*inputMatrices[L"numberobs"])(0, 0);
actualMBSize = nbrSamples;
for (int npos = 0; npos < nbrSamples ; npos++)
{
FeatureNodes[npos]->UpdateEvalTimeStamp();
labelNodes[npos]->UpdateEvalTimeStamp();
m_net.Evaluate(criterionNodes[npos]); //use only the first criterion. Is there any possibility to use more?
m_net.Evaluate(evaluationNodes[npos]);
ElemType mbCrossEntropy = criterionNodes[npos]->FunctionValues().Get00Element(); // criterionNode should be a scalar
epochCrossEntropy += mbCrossEntropy;
ElemType mbEvalError = evaluationNodes[npos]->FunctionValues().Get00Element(); //criterionNode should be a scalar
epochEvalError += mbEvalError;
}
totalEpochSamples += actualMBSize;
if (outputStream.is_open())
{
//TODO: add support to dump multiple outputs
ComputationNodePtr outputNode = m_net.OutputNodes()[0];
foreach_column(j, outputNode->FunctionValues())
{
foreach_row(i,outputNode->FunctionValues())
{
outputStream<<outputNode->FunctionValues()(i,j)<<" ";
}
outputStream<<endl;
}
}
numMBsRun++;
if (numMBsRun % m_numMBsToShowResult == 0)
{
numSamples = (totalEpochSamples - prevTotalEpochSamples);
crossEntropy = epochCrossEntropy - prevEpochCrossEntropy;
evalError = epochEvalError - prevEpochEvalError;
fprintf(stderr, "Minibatch[%lu-%lu]: Samples Evaluated = %lu EvalErr Per Sample = %.8g Loss Per Sample = %.8g\n",
prevStart, numMBsRun, numSamples, evalError / numSamples, crossEntropy / numSamples);
prevTotalEpochSamples = totalEpochSamples;
prevEpochCrossEntropy = epochCrossEntropy;
prevEpochEvalError = epochEvalError;
prevStart = numMBsRun + 1;
}
}
// show final grouping of output
numSamples = totalEpochSamples - prevTotalEpochSamples;
if (numSamples > 0)
{
crossEntropy = epochCrossEntropy - prevEpochCrossEntropy;
evalError = epochEvalError - prevEpochEvalError;
fprintf(stderr, "Minibatch[%lu-%lu]: Samples Evaluated = %lu EvalErr Per Sample = %.8g Loss Per Sample = %.8g\n",
prevStart, numMBsRun, numSamples, evalError / numSamples, crossEntropy / numSamples);
}
//final statistics
epochEvalError /= (ElemType)totalEpochSamples;
epochCrossEntropy /= (ElemType)totalEpochSamples;
fprintf(stderr, "Overall: Samples Evaluated = %lu EvalErr Per Sample = %.8g Loss Per Sample = %.8g\n", totalEpochSamples, epochEvalError, epochCrossEntropy);
if (outputStream.is_open())
{
outputStream.close();
}
evalSetCrossEntropy = epochCrossEntropy;
return epochEvalError;
}
protected:
void DisplayEvalStatistics(const size_t startMBNum, const size_t endMBNum, const size_t numSamplesLastMBs, const vector<ComputationNodePtr>& evalNodes,
const vector<ElemType> & evalResults, const vector<ElemType> & evalResultsLastMBs)
{
fprintf(stderr,"Minibatch[%lu-%lu]: Samples Seen = %lu ", startMBNum, endMBNum, numSamplesLastMBs);
for (size_t i=0; i<evalResults.size(); i++)
{
fprintf(stderr, "%ls/Sample = %.8g ", evalNodes[i]->NodeName().c_str(), (evalResults[i]-evalResultsLastMBs[i])/numSamplesLastMBs);
}
fprintf(stderr, "\n");
}
protected:
ComputationNetwork<ElemType>& m_net;
size_t m_numMBsToShowResult;
int m_traceLevel;
void operator=(const SimpleEvaluator&); // (not assignable)
};
}}}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -139,6 +139,9 @@
<TreatOutputAsContent>true</TreatOutputAsContent>
<Message>Copy content files to target directory</Message>
</CustomBuildStep>
<PreBuildEvent>
<Command>prebuild.bat</Command>
</PreBuildEvent>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
@ -199,6 +202,9 @@
<Message>
</Message>
</CustomBuildStep>
<PreBuildEvent>
<Command>prebuild.bat</Command>
</PreBuildEvent>
</ItemDefinitionGroup>
<ItemGroup>
<Text Include="DefaultMacros.txt" />
@ -216,6 +222,7 @@
<ClInclude Include="..\..\Common\Include\fileutil.h" />
<ClInclude Include="..\..\Common\Include\hostname.h" />
<ClInclude Include="..\..\Common\Include\nvml.h" />
<ClInclude Include="..\..\Common\Include\TimerUtility.h" />
<ClInclude Include="CompositeComputationNode.h" />
<ClInclude Include="ComputationNetwork.h" />
<ClInclude Include="ComputationNetworkHelper.h" />
@ -249,6 +256,7 @@
<ClCompile Include="..\..\Common\fileutil.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\TimerUtility.cpp" />
<ClCompile Include="cn.cpp" />
<ClCompile Include="ComputationNode.cpp" />
<ClCompile Include="ModelEditLanguage.cpp" />

Просмотреть файл

@ -43,6 +43,9 @@
<ClCompile Include="NetworkDescriptionLanguage.cpp">
<Filter>Network</Filter>
</ClCompile>
<ClCompile Include="..\..\Common\TimerUtility.cpp">
<Filter>Common</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\Common\Include\basetypes.h">
@ -138,6 +141,9 @@
<ClInclude Include="..\..\Common\Include\hostname.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\TimerUtility.h">
<Filter>Common\Include</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Text Include="modelEditor.txt">

Просмотреть файл

@ -0,0 +1,30 @@
@echo off
echo #ifndef _BUILDINFO_H > buildinfo.h
echo #define _BUILDINFO_H >> buildinfo.h
FOR /F "usebackq" %%i IN (`hostname`) DO SET HOST=%%i
:: assuming hostname always exists
:: not sure whether git in path ?
git --version 2 > nul
if not %ERRORLEVEL% == 9909 (
echo #define _GIT_EXIST >> buildinfo.h
FOR /F "usebackq" %%i IN (`git rev-parse --abbrev-ref HEAD`) DO SET BRANCH=%%i
FOR /F "usebackq" %%i IN (`git rev-parse HEAD`) DO SET COMMIT=%%i
echo #define _BUILDBRANCH_ "%BRANCH%" >> buildinfo.h
echo #define _BUILDSHA1_ "%COMMIT%" >> buildinfo.h
)
echo #define _BUILDER_ "%USERNAME%" >> buildinfo.h
echo #define _BUILDMACHINE_ "%HOST%" >> buildinfo.h
set a=%~dp0
set buildpath="%a:\=\\%"
echo #define _BUILDPATH_ %buildpath% >> buildinfo.h
echo #endif >> buildinfo.h

Просмотреть файл

@ -1,217 +1,264 @@
//
// <copyright file="MatrixUnitTests.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#include "stdafx.h"
#include "CppUnitTest.h"
#include "..\Math\Matrix.h"
#pragma warning (disable: 4244 4245 4305) // conversions and truncations; we don't care in this test project
#define epsilon 0.000001
#define IDX2C(i,j,ld) (((j)*(ld))+(i)) // 0 based indexing
using namespace Microsoft::MSR::CNTK;
using namespace Microsoft::VisualStudio::CppUnitTestFramework;
namespace CNTKMathTest
{
TEST_CLASS(MatrixUnitTest)
{
public:
//This test should fail if you don't have CUDA GPU (or working under remote desktop)
TEST_METHOD(MatrixChangeModesBetweenDenseAndSparseTests_Simple)
{
Matrix<float> A;
A.AssignTruncateBottomOf(Matrix<float>::RandomUniform(4096,2048,-3,0.1,0),0);
long n0 = A.MatrixNorm0();
Assert::IsTrue(MatrixType::DENSE==A.GetMatrixType());
A.SwitchToMatrixType(MatrixType::SPARSE);
Assert::IsTrue(MatrixType::SPARSE==A.GetMatrixType());
long n1 = A.MatrixNorm0();
Assert::AreEqual<long>(n0,n1);
A.SwitchToMatrixType(MatrixType::DENSE);
Assert::IsTrue(MatrixType::DENSE==A.GetMatrixType());
}
TEST_METHOD(MatrixSparseTimesDense)
{
Matrix<float> Ad; //DENSE
Ad.AssignTruncateBottomOf(Matrix<float>::RandomUniform(4096,2048,-3,0.1,0),0);//DENSE
Matrix<float> As(Ad);//DENSE
As.SwitchToMatrixType(MatrixType::SPARSE); //!!! MATRIX As becomes sparse
Matrix<float> B = Matrix<float>::RandomGaussian(2048,128,1,4); //DENSE
Matrix<float> C = Matrix<float>::RandomGaussian(4096,128,1,2); //DENSE
Matrix<float> C1(C); //DENSE
float alpha = 0.3, beta = 2;
bool transposeA=false, transposeB=false;
Matrix<float>::MultiplyAndWeightedAdd(alpha,Ad,transposeA,B,transposeB,beta,C); // DENSE*DENSE
Matrix<float>::MultiplyAndWeightedAdd(alpha,As,transposeA,B,transposeB,beta,C1);// SPARSE*DENSE
Assert::IsTrue(C1.IsEqualTo(C,0.00001));
}
TEST_METHOD(MatrixDenseTimesSparse)
{
Matrix<float> Ad;
Ad.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-3,0.1,0),0);
Matrix<float> As(Ad);
As.SwitchToMatrixType(MatrixType::SPARSE, matrixFormatSparseCSC);
Matrix<float> B = Matrix<float>::RandomGaussian(2048,1024,1,4);
Matrix<float> C = Matrix<float>::RandomGaussian(2048,2048,1,2);
Matrix<float> C1(C);
float alpha = 0.3, beta = 0;
bool transposeA=false, transposeB=false;
Matrix<float>::MultiplyAndWeightedAdd(alpha,B,transposeA,Ad,transposeB,beta,C);
Matrix<float>::MultiplyAndWeightedAdd(alpha,B,transposeA,As,transposeB,beta,C1);
Assert::IsTrue(C1.IsEqualTo(C,0.0001));
alpha = 3.3, beta = 1.3;
Matrix<float>::MultiplyAndWeightedAdd(alpha,B,transposeA,Ad,transposeB,beta,C);
Matrix<float>::MultiplyAndWeightedAdd(alpha,B,transposeA,As,transposeB,beta,C1);
Assert::IsTrue(C1.IsEqualTo(C,0.00005)); //Seems like bad precision
}
TEST_METHOD(MatrixSparseTimesSparse)
{
Matrix<float> Ad;
Ad.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-3,0.1,0),0);
Matrix<float> As(Ad);
Matrix<float> Bd;
Bd.AssignTruncateBottomOf(Matrix<float>::RandomUniform(2048,1024,-5,0.4,0),0);
Matrix<float> Bs(Bd);
Matrix<float> Cd;
Cd.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,1024,-4,0.2,0),0);
Matrix<float> Cs(Cd);
float alpha = 2.4, beta=0;
bool transposeA = false, transposeB=false;
Matrix<float>::MultiplyAndWeightedAdd(alpha,Ad,transposeA,Bd,transposeB,beta,Cd);
As.SwitchToMatrixType(MatrixType::SPARSE);
Bs.SwitchToMatrixType(MatrixType::SPARSE);
Cs.SwitchToMatrixType(MatrixType::SPARSE);
Matrix<float>::MultiplyAndWeightedAdd(alpha,As,transposeA,Bs,transposeB,beta,Cs);
Cs.SwitchToMatrixType(MatrixType::DENSE);
Assert::IsTrue(Cs.IsEqualTo(Cd,0.00001));
alpha = 2.4, beta=3.4;
Cs.SwitchToMatrixType(MatrixType::SPARSE);
Matrix<float>::MultiplyAndWeightedAdd(alpha,Ad,transposeA,Bd,transposeB,beta,Cd);
As.SwitchToMatrixType(MatrixType::SPARSE);
Bs.SwitchToMatrixType(MatrixType::SPARSE);
Cs.SwitchToMatrixType(MatrixType::SPARSE);
Matrix<float>::MultiplyAndWeightedAdd(alpha,As,transposeA,Bs,transposeB,beta,Cs);
Cs.SwitchToMatrixType(MatrixType::DENSE);
Assert::IsTrue(Cs.IsEqualTo(Cd,0.00001));
}
TEST_METHOD(MatrixSparsePlusSparse)
{
Matrix<float> Ad;
Ad.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-3,0.1,0),0);
Matrix<float> As(Ad);
Matrix<float> Bd;
Bd.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-5,0.4,0),0);
Matrix<float> Bs(Bd);
float alpha = 1.0*rand() / RAND_MAX;
Matrix<float>::ScaleAndAdd(alpha,Ad,Bd);
As.SwitchToMatrixType(MatrixType::SPARSE);
Bs.SwitchToMatrixType(MatrixType::SPARSE);
Matrix<float>::ScaleAndAdd(alpha,As,Bs);
Bs.SwitchToMatrixType(MatrixType::DENSE);
Assert::IsTrue(Bs.IsEqualTo(Bd,0.00001));
}
TEST_METHOD(MatrixDensePlusSparse)
{
Matrix<float> Ad;
Ad.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-3,0.1,0),0);
Matrix<float> Bd;
Bd.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-5,0.4,0),0);
Matrix<float> Bs(Bd);
float alpha = 1.0*rand() / RAND_MAX;
Matrix<float>::ScaleAndAdd(alpha,Ad,Bd);
Bs.SwitchToMatrixType(MatrixType::SPARSE);
Matrix<float>::ScaleAndAdd(alpha,Ad,Bs);
Bs.SwitchToMatrixType(MatrixType::DENSE);
Assert::IsTrue(Bs.IsEqualTo(Bd,0.00001));
}
TEST_METHOD(MatrixSparsePlusDense)
{
Matrix<float> Ad;
Ad.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-3,0.1,0),0);
Matrix<float> As(Ad);
Matrix<float> Bd;
Bd.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-5,0.4,0),0);
Matrix<float> Bd1(Bd);
float alpha = 1.0*rand() / RAND_MAX;
Matrix<float>::ScaleAndAdd(alpha,Ad,Bd);
As.SwitchToMatrixType(MatrixType::SPARSE);
Matrix<float>::ScaleAndAdd(alpha,As,Bd1);
Assert::IsTrue(Bd1.IsEqualTo(Bd,0.00001));
}
TEST_METHOD(MatrixSparseElementWisePower)
{
Matrix<float> Ad;
Ad.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-3,0.1,0),0);
Matrix<float> As(Ad);
As.SwitchToMatrixType(MatrixType::SPARSE);
Matrix<float> Bd;
Bd.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-5,0.4,0),0);
Matrix<float> Bs(Bd);
Bs.SwitchToMatrixType(MatrixType::SPARSE);
Ad^=2.3; //DENSE
As^=2.3; //SPARSE
Assert::IsTrue(As.IsEqualTo(Ad,0.00001));
Assert::IsTrue(Ad.IsEqualTo(As,0.00001));
Bd.AssignElementPowerOf(Ad,3.2);
Bs.AssignElementPowerOf(As,3.2);
#ifdef CHECK
Bs.SwitchToMatrixType(DENSE);
Bd.TransferFromDeviceToDevice(0,CPUDEVICE);
Bs.TransferFromDeviceToDevice(0,CPUDEVICE);
for (int r = 0; r < Bd.GetNumRows(); ++r)
for (int c = 0; c < Bd.GetNumCols(); ++c)
{
float dVal = Bd(r,c);
float sVal = Bs(r,c);
float diff = sVal - dVal;
if (fabs(diff) > 0.001)
cout << "[" << r << ", " << c << "]: " << sVal << " and " << dVal;
}
#endif
Assert::IsTrue(Bs.IsEqualTo(Bd,0.0001));
Assert::IsTrue(Bd.IsEqualTo(Bs,0.0001));
}
};
//
// <copyright file="MatrixUnitTests.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#include "stdafx.h"
#include "CppUnitTest.h"
#include "..\Math\Matrix.h"
#pragma warning (disable: 4244 4245 4305) // conversions and truncations; we don't care in this test project
#define epsilon 0.000001
#define IDX2C(i,j,ld) (((j)*(ld))+(i)) // 0 based indexing
using namespace Microsoft::MSR::CNTK;
using namespace Microsoft::VisualStudio::CppUnitTestFramework;
namespace CNTKMathTest
{
TEST_CLASS(MatrixUnitTest)
{
public:
//This test should fail if you don't have CUDA GPU (or working under remote desktop)
TEST_METHOD(MatrixChangeModesBetweenDenseAndSparseTests_Simple)
{
Matrix<float> A;
A.AssignTruncateBottomOf(Matrix<float>::RandomUniform(4096,2048,-3,0.1,0),0);
long n0 = A.MatrixNorm0();
Assert::IsTrue(MatrixType::DENSE==A.GetMatrixType());
A.SwitchToMatrixType(MatrixType::SPARSE);
Assert::IsTrue(MatrixType::SPARSE==A.GetMatrixType());
long n1 = A.MatrixNorm0();
Assert::AreEqual<long>(n0,n1);
A.SwitchToMatrixType(MatrixType::DENSE);
Assert::IsTrue(MatrixType::DENSE==A.GetMatrixType());
}
TEST_METHOD(MatrixSparseTimesDense)
{
Matrix<float> Ad; //DENSE
Ad.AssignTruncateBottomOf(Matrix<float>::RandomUniform(4096,2048,-3,0.1,0),0);//DENSE
Matrix<float> As(Ad);//DENSE
As.SwitchToMatrixType(MatrixType::SPARSE); //!!! MATRIX As becomes sparse
Matrix<float> B = Matrix<float>::RandomGaussian(2048,128,1,4); //DENSE
Matrix<float> C = Matrix<float>::RandomGaussian(4096,128,1,2); //DENSE
Matrix<float> C1(C); //DENSE
float alpha = 0.3, beta = 2;
bool transposeA=false, transposeB=false;
Matrix<float>::MultiplyAndWeightedAdd(alpha,Ad,transposeA,B,transposeB,beta,C); // DENSE*DENSE
Matrix<float>::MultiplyAndWeightedAdd(alpha,As,transposeA,B,transposeB,beta,C1);// SPARSE*DENSE
Assert::IsTrue(C1.IsEqualTo(C,0.00001));
}
TEST_METHOD(MatrixDenseTimesSparse)
{
Matrix<float> Ad;
Ad.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-3,0.1,0),0);
Matrix<float> As(Ad);
As.SwitchToMatrixType(MatrixType::SPARSE, matrixFormatSparseCSC);
Matrix<float> B = Matrix<float>::RandomGaussian(2048,1024,1,4);
Matrix<float> C = Matrix<float>::RandomGaussian(2048,2048,1,2);
Matrix<float> C1(C);
float alpha = 0.3, beta = 0;
bool transposeA=false, transposeB=false;
Matrix<float>::MultiplyAndWeightedAdd(alpha,B,transposeA,Ad,transposeB,beta,C);
Matrix<float>::MultiplyAndWeightedAdd(alpha,B,transposeA,As,transposeB,beta,C1);
Assert::IsTrue(C1.IsEqualTo(C,0.0001));
alpha = 3.3, beta = 1.3;
Matrix<float>::MultiplyAndWeightedAdd(alpha,B,transposeA,Ad,transposeB,beta,C);
Matrix<float>::MultiplyAndWeightedAdd(alpha,B,transposeA,As,transposeB,beta,C1);
Assert::IsTrue(C1.IsEqualTo(C,0.00005)); //Seems like bad precision
}
TEST_METHOD(CPUMatrixDenseTimesSparse)
{
Matrix<float> Ad(CPUDEVICE);
Ad.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024, 2048, -3, 0.1, 0), 0);
Matrix<float> As(Ad);
As.SwitchToMatrixType(MatrixType::SPARSE, matrixFormatSparseCSC);
Matrix<float> B = Matrix<float>::RandomGaussian(2048, 1024, 1, 4, USE_TIME_BASED_SEED, CPUDEVICE);
Matrix<float> C = Matrix<float>::RandomGaussian(2048, 2048, 1, 2, USE_TIME_BASED_SEED, CPUDEVICE);
Matrix<float> C1(C);
float alpha = 0.3, beta = 0;
bool transposeA = false, transposeB = false;
Matrix<float>::MultiplyAndWeightedAdd(alpha, B, transposeA, Ad, transposeB, beta, C);
Matrix<float>::MultiplyAndWeightedAdd(alpha, B, transposeA, As, transposeB, beta, C1);
Assert::IsTrue(C1.IsEqualTo(C, 0.0001));
alpha = 3.3, beta = 1.3;
Matrix<float>::MultiplyAndWeightedAdd(alpha, B, transposeA, Ad, transposeB, beta, C);
Matrix<float>::MultiplyAndWeightedAdd(alpha, B, transposeA, As, transposeB, beta, C1);
// TODO IsEqualTo NYI
// Assert::IsTrue(C1.IsEqualTo(C, 0.00005));
}
TEST_METHOD(CPUMatrixDenseTimesSparseAsSparse)
{
Matrix<float> Ad(CPUDEVICE);
Ad.AssignTruncateBottomOf(Matrix<float>::RandomUniform(2048, 1024, -3, 0.1, 0), 0);
Matrix<float> As(Ad);
As.SwitchToMatrixType(MatrixType::SPARSE, matrixFormatSparseCSC);
Matrix<float> B = Matrix<float>::RandomGaussian(2048, 1024, 1, 4, USE_TIME_BASED_SEED, CPUDEVICE);
Matrix<float> AsCsc = Matrix<float>::RandomGaussian(2048, 2048, 1, 2, USE_TIME_BASED_SEED, CPUDEVICE);
Matrix<float> AsBlock(CPUDEVICE);
AsBlock.SwitchToMatrixType(MatrixType::SPARSE, matrixFormatSparseBlockCol);
float alpha = 0.3, beta = 0;
bool transposeA = false, transposeB = true;
Matrix<float>::MultiplyAndWeightedAdd(alpha, B, transposeA, As, transposeB, beta, AsBlock);
Matrix<float>::MultiplyAndWeightedAdd(alpha, B, transposeA, As, transposeB, beta, AsCsc);
// TODO IsEqualTo NYI
// Assert::IsTrue(AsBlock.IsEqualTo(AsCsc, 0.0001));
}
TEST_METHOD(MatrixSparseTimesSparse)
{
Matrix<float> Ad;
Ad.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-3,0.1,0),0);
Matrix<float> As(Ad);
Matrix<float> Bd;
Bd.AssignTruncateBottomOf(Matrix<float>::RandomUniform(2048,1024,-5,0.4,0),0);
Matrix<float> Bs(Bd);
Matrix<float> Cd;
Cd.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,1024,-4,0.2,0),0);
Matrix<float> Cs(Cd);
float alpha = 2.4, beta=0;
bool transposeA = false, transposeB=false;
Matrix<float>::MultiplyAndWeightedAdd(alpha,Ad,transposeA,Bd,transposeB,beta,Cd);
As.SwitchToMatrixType(MatrixType::SPARSE);
Bs.SwitchToMatrixType(MatrixType::SPARSE);
Cs.SwitchToMatrixType(MatrixType::SPARSE);
Matrix<float>::MultiplyAndWeightedAdd(alpha,As,transposeA,Bs,transposeB,beta,Cs);
Cs.SwitchToMatrixType(MatrixType::DENSE);
Assert::IsTrue(Cs.IsEqualTo(Cd,0.00001));
alpha = 2.4, beta=3.4;
Cs.SwitchToMatrixType(MatrixType::SPARSE);
Matrix<float>::MultiplyAndWeightedAdd(alpha,Ad,transposeA,Bd,transposeB,beta,Cd);
As.SwitchToMatrixType(MatrixType::SPARSE);
Bs.SwitchToMatrixType(MatrixType::SPARSE);
Cs.SwitchToMatrixType(MatrixType::SPARSE);
Matrix<float>::MultiplyAndWeightedAdd(alpha,As,transposeA,Bs,transposeB,beta,Cs);
Cs.SwitchToMatrixType(MatrixType::DENSE);
Assert::IsTrue(Cs.IsEqualTo(Cd,0.00001));
}
TEST_METHOD(MatrixSparsePlusSparse)
{
Matrix<float> Ad;
Ad.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-3,0.1,0),0);
Matrix<float> As(Ad);
Matrix<float> Bd;
Bd.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-5,0.4,0),0);
Matrix<float> Bs(Bd);
float alpha = 1.0*rand() / RAND_MAX;
Matrix<float>::ScaleAndAdd(alpha,Ad,Bd);
As.SwitchToMatrixType(MatrixType::SPARSE);
Bs.SwitchToMatrixType(MatrixType::SPARSE);
Matrix<float>::ScaleAndAdd(alpha,As,Bs);
Bs.SwitchToMatrixType(MatrixType::DENSE);
Assert::IsTrue(Bs.IsEqualTo(Bd,0.00001));
}
TEST_METHOD(MatrixDensePlusSparse)
{
Matrix<float> Ad;
Ad.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-3,0.1,0),0);
Matrix<float> Bd;
Bd.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-5,0.4,0),0);
Matrix<float> Bs(Bd);
float alpha = 1.0*rand() / RAND_MAX;
Matrix<float>::ScaleAndAdd(alpha,Ad,Bd);
Bs.SwitchToMatrixType(MatrixType::SPARSE);
Matrix<float>::ScaleAndAdd(alpha,Ad,Bs);
Bs.SwitchToMatrixType(MatrixType::DENSE);
Assert::IsTrue(Bs.IsEqualTo(Bd,0.00001));
}
TEST_METHOD(MatrixSparsePlusDense)
{
Matrix<float> Ad;
Ad.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-3,0.1,0),0);
Matrix<float> As(Ad);
Matrix<float> Bd;
Bd.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-5,0.4,0),0);
Matrix<float> Bd1(Bd);
float alpha = 1.0*rand() / RAND_MAX;
Matrix<float>::ScaleAndAdd(alpha,Ad,Bd);
As.SwitchToMatrixType(MatrixType::SPARSE);
Matrix<float>::ScaleAndAdd(alpha,As,Bd1);
Assert::IsTrue(Bd1.IsEqualTo(Bd,0.00001));
}
TEST_METHOD(MatrixSparseElementWisePower)
{
Matrix<float> Ad;
Ad.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-3,0.1,0),0);
Matrix<float> As(Ad);
As.SwitchToMatrixType(MatrixType::SPARSE);
Matrix<float> Bd;
Bd.AssignTruncateBottomOf(Matrix<float>::RandomUniform(1024,2048,-5,0.4,0),0);
Matrix<float> Bs(Bd);
Bs.SwitchToMatrixType(MatrixType::SPARSE);
Ad^=2.3; //DENSE
As^=2.3; //SPARSE
Assert::IsTrue(As.IsEqualTo(Ad,0.00001));
Assert::IsTrue(Ad.IsEqualTo(As,0.00001));
Bd.AssignElementPowerOf(Ad,3.2);
Bs.AssignElementPowerOf(As,3.2);
#ifdef CHECK
Bs.SwitchToMatrixType(DENSE);
Bd.TransferFromDeviceToDevice(0,CPUDEVICE);
Bs.TransferFromDeviceToDevice(0,CPUDEVICE);
for (int r = 0; r < Bd.GetNumRows(); ++r)
for (int c = 0; c < Bd.GetNumCols(); ++c)
{
float dVal = Bd(r,c);
float sVal = Bs(r,c);
float diff = sVal - dVal;
if (fabs(diff) > 0.001)
cout << "[" << r << ", " << c << "]: " << sVal << " and " << dVal;
}
#endif
Assert::IsTrue(Bs.IsEqualTo(Bd,0.0001));
Assert::IsTrue(Bd.IsEqualTo(Bs,0.0001));
}
};
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -33,7 +33,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
public:
CPUSparseMatrix(const MatrixFormat format);
CPUSparseMatrix(const MatrixFormat format, const size_t numRows, const size_t numCols, const size_t size);
~CPUSparseMatrix();
public:
@ -76,6 +77,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
static void ScaleAndAdd(const ElemType alpha, const CPUSparseMatrix<ElemType>& lhs, CPUMatrix<ElemType>& c);
static bool AreEqual(const CPUSparseMatrix<ElemType>& a, const CPUSparseMatrix<ElemType>& b, const ElemType threshold = 1e-8);
/// sum(vec(a).*vec(b))
static ElemType InnerProductOfMatrices(const CPUSparseMatrix<ElemType>& /*a*/, const CPUMatrix<ElemType>& /*b*/) { NOT_IMPLEMENTED; }
@ -89,6 +92,41 @@ namespace Microsoft { namespace MSR { namespace CNTK {
void Resize(const size_t numRows, const size_t numCols, size_t numNZElemToReserve = 0, const bool growOnly = true, const bool keepExistingValues = true);
void Reset();
inline ElemType defaultElem()
{
ElemType default;
memset(&default, 0, sizeof(ElemType));
return default;
}
const ElemType& operator() (const size_t row, const size_t col) const
{
if (col >= m_numCols || row >= m_numRows)
{
throw std::runtime_error("Position outside matrix dimensions");
}
if (m_format == MatrixFormat::matrixFormatSparseCSC)
{
size_t start = m_compIndex[col];
size_t end = m_compIndex[col + 1];
for (size_t p = start; p < end; p++)
{
size_t i = m_unCompIndex[p];
if (i == row)
{
return m_pArray[p];
}
}
return m_default;
}
else
{
NOT_IMPLEMENTED;
}
}
public:
void NormalGrad(CPUMatrix<ElemType>& c, const ElemType momentum);
void Adagrad(CPUMatrix<ElemType>& c);
@ -103,7 +141,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
public:
const ElemType* NzValues() const { return m_pArray; }
ElemType* NzValues() { return m_pArray; }
inline ElemType* NzValues() { return m_pArray; }
size_t NzSize() const { return sizeof(ElemType)*m_nz; } // actual number of element bytes in use
CPUSPARSE_INDEX_TYPE* MajorIndexLocation() const { return m_unCompIndex; } //this is the major index, row/col ids in CSC/CSR format
@ -139,9 +177,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
CPUSPARSE_INDEX_TYPE *m_unCompIndex; //row/col ids in CSC/CSR format
CPUSPARSE_INDEX_TYPE *m_compIndex; //begin ids of col/row in CSC/CSR format
size_t m_blockSize; //block size
ElemType *m_blockVal; //block values
size_t m_blockSize; //block size
size_t *m_blockIds; //block ids
ElemType m_default;
};
typedef CPUSparseMatrix<float> CPUSingleSparseMatrix;

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,266 +1,265 @@
//
// <copyright file="GPUSparseMatrix.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
#include "GPUMatrix.h"
#include "CPUSparseMatrix.h"
#include <functional>
namespace Microsoft { namespace MSR { namespace CNTK {
//GPU Sparse Matrix, using cuSPARSE library.
//By default we are assuming CSR representation
// NOTE m_elemSizeAllocated (in base matrix) means the number of non-zero elements we have allocated space
// We are packing the CSR format (pointed to by m_pArray) as follows:
// ElemType elements[m_elemSizeAllocated]
// int colIdx[m_elemSizeAllocated]
// int rowIdxStart[m_numRows+1]
template<class ElemType>
class MATH_API GPUSparseMatrix : public BaseMatrix<ElemType>
{
typedef BaseMatrix<ElemType> B; using B::m_numRows; using B::m_numCols; using B::m_pArray; using B::m_elemSizeAllocated; using B::m_nz; using B::m_format; // without this, base members would require to use thi-> in GCC
public:
GPUSparseMatrix(const size_t numRows, const size_t numCols, const size_t numNZ, const MatrixFormat matrixFormat = MatrixFormat::matrixFormatSparseCSR, const DEVICEID_TYPE computeDevice = AUTOPLACEMATRIX);
GPUSparseMatrix(const MatrixFormat matrixFormat = MatrixFormat::matrixFormatSparseCSR,
const DEVICEID_TYPE computeDevice = AUTOPLACEMATRIX);
GPUSparseMatrix(const GPUSparseMatrix<ElemType>&);
GPUSparseMatrix(const GPUMatrix<ElemType>&, const MatrixFormat matrixFormat = MatrixFormat::matrixFormatSparseCSR);
#ifndef LINUX
GPUSparseMatrix(GPUSparseMatrix<ElemType>&&);
#endif /* LINUX */
~GPUSparseMatrix();
public:
void Reset();
public:
// return col pointer, which is immediately following the non-zero element
// in memory format is always in the following order:
// Non-zero data elements, Full index locations, compressed index locations
// In CSR row data is compressed, in CSC col data is compressed
const ElemType* NzValues() const {return m_pArray;}
ElemType* NzValues() {return m_pArray;}
size_t NzSize() const {return sizeof(ElemType)*m_nz;} // actual number of element bytes in use
GPUSPARSE_INDEX_TYPE* MajorIndexLocation() const { return (GPUSPARSE_INDEX_TYPE*)(m_pArray + m_elemSizeAllocated); } //this is the major index, row/col ids in CSC/CSR format
size_t MajorIndexCount() const { return m_nz; }
size_t MajorIndexSize() const { return sizeof(GPUSPARSE_INDEX_TYPE)*MajorIndexCount(); } // actual number of major index bytes in use
GPUSPARSE_INDEX_TYPE* SecondaryIndexLocation() const { return MajorIndexLocation() + m_elemSizeAllocated; } //this is the compressed index, col/row in CSC/CSR format
size_t SecondaryIndexCount(const size_t numNZ) const
{
if (m_format&matrixFormatCompressed)
{
size_t cnt = (m_format&matrixFormatRowMajor)?m_numRows:m_numCols;
if (cnt > 0) cnt++; // add an extra element on the end for the "max" value
return cnt;
}
else
return numNZ; // COO format
}
size_t SecondaryIndexCount() const
{
return SecondaryIndexCount(m_nz);
}
// get size for compressed index
size_t SecondaryIndexSize() const { return (SecondaryIndexCount())*sizeof(GPUSPARSE_INDEX_TYPE); }
size_t BufferSizeNeeded() const { return NzSize() + MajorIndexSize() + SecondaryIndexSize(); }
size_t BufferSizeNeeded(const size_t numNZ) const
{ return sizeof(ElemType)*numNZ + sizeof(GPUSPARSE_INDEX_TYPE)*(numNZ + SecondaryIndexCount(numNZ)); }
size_t BufferSizeAllocated() const { return m_totalBufferSizeAllocated; }
ElemType* BufferPointer() const;
// the column and row locations will swap based on what format we are in. Full index always follows the data array
GPUSPARSE_INDEX_TYPE* RowLocation() const { return (m_format&matrixFormatRowMajor) ? SecondaryIndexLocation() : MajorIndexLocation(); }
size_t RowSize() const {return (m_format&matrixFormatRowMajor)?SecondaryIndexSize():MajorIndexSize();}
GPUSPARSE_INDEX_TYPE* ColLocation() const { return (m_format&matrixFormatRowMajor) ? MajorIndexLocation() : SecondaryIndexLocation(); }
size_t ColSize() const {return (m_format&matrixFormatRowMajor)?MajorIndexSize():SecondaryIndexSize();} // actual number of bytes in use
void SetValue(const GPUSparseMatrix<ElemType>& deepCopyFrom);
void SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom);
void SetValue(const GPUMatrix<ElemType>& denseMatrix, const MatrixFormat matrixFormat);
void SetValue(const GPUMatrix<ElemType>& denseMatrix);
void ResizeAsAndCopyIndexFrom(const GPUSparseMatrix<ElemType>& a, const bool growOnly = true);
void Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat matrixFormat, const bool growOnly = true); //matrix format will affect the size to allocate
void Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const bool growOnly = true);
GPUSparseMatrix<ElemType> Transpose() const;
void InplaceTranspose();
GPUSparseMatrix<ElemType>& AssignTransposeOf(const GPUSparseMatrix<ElemType>& a);
GPUMatrix<ElemType> CopyToDenseMatrix() const;
void CopyToDenseMatrix(GPUMatrix<ElemType> &denseMatrix) const;
void CopyToCPUSparseMatrix(CPUSparseMatrix<ElemType> &cpuSparseMatrix) const;
void ChangeDeviceTo(DEVICEID_TYPE toId);
GPUSparseMatrix<ElemType>& operator=(const GPUSparseMatrix<ElemType>& deepCopy);
#ifndef LINUX
GPUSparseMatrix<ElemType>& operator=(GPUSparseMatrix<ElemType>&& moveFrom);
#endif /* LINUX */
GPUSparseMatrix<ElemType> operator+ (const GPUSparseMatrix<ElemType>& a) const;
GPUSparseMatrix<ElemType> operator- (const GPUSparseMatrix<ElemType>& a) const;
GPUSparseMatrix<ElemType>& operator^= (const ElemType alpha); //element-wise power
GPUSparseMatrix<ElemType> operator^ (const ElemType alpha) const; //element-wise power
GPUSparseMatrix<ElemType>& operator*= (const ElemType alpha);
GPUSparseMatrix<ElemType> operator*(const ElemType alpha) const;
GPUSparseMatrix<ElemType>& AssignElementPowerOf(const GPUSparseMatrix<ElemType>& a, const ElemType power);
bool IsEqualTo(const GPUSparseMatrix<ElemType>& a, const ElemType threshold = 1e-8) const;
bool IsEqualTo(const GPUMatrix<ElemType>& a, const ElemType threshold = 1e-8) const;
public:
virtual DEVICEID_TYPE GetComputeDeviceId(void) const;
size_t GetNumNZElements() const {return m_nz;}
//Sets sparse matrix in CSR format. this acts as deep copy
void SetMatrixFromCSRFormat(const GPUSPARSE_INDEX_TYPE *h_CSRRow, const GPUSPARSE_INDEX_TYPE *h_Col, const ElemType *h_Val,
const size_t nz, const size_t numRows, const size_t numCols, const bool IsOnDevice = false, const DEVICEID_TYPE devId = -1);
void SetMatrixFromCSCFormat(const GPUSPARSE_INDEX_TYPE *h_CSCCol, const GPUSPARSE_INDEX_TYPE *h_Row, const ElemType *h_Val,
const size_t nz, const size_t numRows, const size_t numCols, const bool IsOnDevice = false, const DEVICEID_TYPE devId = -1);
void SetMatrixFromLabelAndClass(CPUSPARSE_INDEX_TYPE *h_row, size_t *h_block2Id, size_t *h_block2UniqId, size_t labelSize, size_t expandedSize, size_t blockSize);
//Gets sparse matrix in CSR format. this acts as deep copy. All passed pointers must be NULL. the function will allocate memory itself.
void GetMatrixFromCSRFormat(GPUSPARSE_INDEX_TYPE*& h_CSRRow, GPUSPARSE_INDEX_TYPE*& h_Col, ElemType*& h_Val, size_t &nz, size_t &numRows, size_t &numCols) const;
void GetMatrixFromCSCFormat(GPUSPARSE_INDEX_TYPE*& h_CSCCol, GPUSPARSE_INDEX_TYPE*& h_Row, ElemType*& h_Val, size_t &nz, size_t &numRows, size_t &numCols) const;
void ConvertToSparseFormat(MatrixFormat newFormat);
void ConvertToSparseFormat(MatrixFormat newFormat, GPUSparseMatrix<ElemType>& outMatrix) const;
public:
GPUSparseMatrix<ElemType>& ElementInverse ();
GPUSparseMatrix<ElemType>& AssignElementInverseOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceLinearRectifierDerivative();
GPUSparseMatrix<ElemType>& AssignLinearRectifierDerivativeOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceSigmoid ();
GPUSparseMatrix<ElemType>& AssignSigmoidOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceTanh ();
GPUSparseMatrix<ElemType>& AssignTanhOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceSqrt ();
GPUSparseMatrix<ElemType>& AssignSqrtOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceExp ();
GPUSparseMatrix<ElemType>& AssignExpOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceLog ();
GPUSparseMatrix<ElemType>& AssignLogOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceAbs ();
GPUSparseMatrix<ElemType>& AssignAbsOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceTruncate (const ElemType threshold);
GPUSparseMatrix<ElemType>& InplaceTruncateBottom (const ElemType threshold);
GPUSparseMatrix<ElemType>& AssignTruncateBottomOf (const GPUSparseMatrix<ElemType>& a, const ElemType threshold);
GPUSparseMatrix<ElemType>& InplaceTruncateTop (const ElemType threshold);
GPUSparseMatrix<ElemType>& AssignTruncateTopOf (const GPUSparseMatrix<ElemType>& a, const ElemType threshold);
GPUSparseMatrix<ElemType>& SetToZeroIfAbsLessThan (const ElemType threshold);
ElemType SumOfElements () const; //sum of all elements
ElemType SumOfAbsElements () const; //sum of all abs(elements)
ElemType FrobeniusNorm() const;
ElemType MatrixNormInf() const;
ElemType MatrixNorm1() const;
ElemType MatrixNorm0() const { return (ElemType)GetNumNZElements(); };
public:
//Performs C = alpha op ( S ) D + beta C; Where S is sparse and D and C are dense
static void MultiplyAndWeightedAdd(ElemType alpha, const GPUMatrix<ElemType>& a, const bool transposeA, const GPUSparseMatrix<ElemType>& b,
const bool transposeB, ElemType beta, GPUMatrix<ElemType>& c);
static void MultiplyAndWeightedAdd(ElemType alpha, const GPUSparseMatrix<ElemType>& S, const bool transposeS, const GPUMatrix<ElemType>& D,
const bool transposeD, ElemType beta, GPUMatrix<ElemType>& C);
static void MultiplyAndAdd(ElemType alpha, const GPUMatrix<ElemType>& lhs, const bool transposeA, const GPUSparseMatrix<ElemType>& rhs,
const bool transposeB, GPUSparseMatrix<ElemType>& c);
static void ScaleAndAdd(const ElemType alpha, const GPUSparseMatrix<ElemType>& lhs, GPUMatrix<ElemType>& c);
static void ClassEntropy(const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& weight,
const GPUSparseMatrix<ElemType> & label, const GPUMatrix<ElemType>& cls,
const GPUMatrix<ElemType>& idx2cls, GPUSparseMatrix<ElemType>& etp, GPUMatrix<ElemType>& entropyScore);
static void ClassEntropyError(GPUSparseMatrix<ElemType>& a);
static void ClassEntropyGradientOfInput(const GPUSparseMatrix<ElemType>& error, const GPUMatrix<ElemType>& weight, GPUMatrix<ElemType>& grd);
static void ClassEntropyGradientOfWeight(const GPUSparseMatrix<ElemType>& error, const GPUMatrix<ElemType>& input, const GPUSparseMatrix<ElemType> & label, const GPUMatrix<ElemType>& cls,
const GPUMatrix<ElemType>& idx2cls, GPUSparseMatrix<ElemType>& grd);
void NormalGrad(GPUMatrix<ElemType>& c, const ElemType momentum);
static void Multiply(const GPUSparseMatrix<ElemType>& S, const GPUMatrix<ElemType>& D, GPUMatrix<ElemType>& C);
static void Multiply(const GPUMatrix<ElemType>& D, const GPUSparseMatrix<ElemType>& S, GPUMatrix<ElemType>& C);
static void Multiply(const GPUSparseMatrix<ElemType>& S1, bool transposeS1, const GPUSparseMatrix<ElemType>& S2, bool transposeS2, GPUSparseMatrix<ElemType> &C);
GPUSparseMatrix<ElemType>& AssignProductOf(const GPUSparseMatrix<ElemType>& a, const bool transposeA, const GPUSparseMatrix<ElemType>& b, const bool transposeB);
static ElemType InnerProductOfMatrices(const GPUSparseMatrix<ElemType>& a, const GPUMatrix<ElemType>& b);
static ElemType InnerProductOfMatrices(const GPUMatrix<ElemType>& a, const GPUSparseMatrix<ElemType>& b);
static void ScaleAndAdd(ElemType alpha,const GPUSparseMatrix<ElemType>& a, ElemType beta, const GPUSparseMatrix<ElemType>& b, GPUSparseMatrix<ElemType>& c);
static void ScaleAndAdd(ElemType alpha,const GPUSparseMatrix<ElemType>& a, ElemType beta, const GPUMatrix<ElemType>& b, GPUMatrix<ElemType>& c);
static void ScaleAndAdd(ElemType alpha,const GPUMatrix<ElemType>& a, ElemType beta, const GPUSparseMatrix<ElemType>& b, GPUMatrix<ElemType>& c);
static void Scale(ElemType alpha, GPUSparseMatrix<ElemType>& a);
static void ElementWisePower (ElemType alpha, const GPUSparseMatrix<ElemType>& a, GPUSparseMatrix<ElemType>& c);
static bool AreEqual(const GPUSparseMatrix<ElemType>& a, const GPUSparseMatrix<ElemType>& b, const ElemType threshold = 1e-8);
static bool AreEqual(const GPUSparseMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, const ElemType threshold = 1e-8);
static bool AreEqual(const GPUMatrix<ElemType>& a, const GPUSparseMatrix<ElemType>& b, const ElemType threshold = 1e-8);
//For these two, I should also add a version which would return GPUSparseMatrix, since Dense.*Sparse =Sparse.*Dense=Sparse
static GPUMatrix<ElemType> ElementProductOf (const GPUSparseMatrix<ElemType>& a, const GPUMatrix<ElemType>& b);
static GPUMatrix<ElemType> ElementProductOf (const GPUMatrix<ElemType>& a, const GPUSparseMatrix<ElemType>& b);
public:
// See: http://stackoverflow.com/questions/4660123/overloading-friend-operator-for-template-class/4661372#4661372
template <class ElemTypeDummy>
friend MATH_API File& operator>>(File& stream, GPUSparseMatrix<ElemTypeDummy>& us);
template <class ElemTypeDummy>
friend MATH_API File& operator<<(File& stream, const GPUSparseMatrix<ElemTypeDummy>& us);
private:
void* ReserveTempHostBuffer(const size_t sizeInByte) const;
template <class OutType, class InType>
static void CopyBuffer(OutType * outBuffer, const InType * inBuffer, const size_t size);
private:
void ZeroInit(const MatrixFormat matrixFormat, const DEVICEID_TYPE deviceId);
private:
void performInplaceFunction(const int kind);
void DeepCopy(const GPUSparseMatrix<ElemType>& deepCopyFrom);
void Clear();
void PrepareBuffer(const size_t numRows, const size_t numCols, const bool canReuseBuffer, std::function<size_t(GPUSPARSE_INDEX_TYPE* csrRowPtrC)> func);
size_t ElemCountFromBufferSize(const size_t totalBufferSize) const;
size_t ElemCountFromBufferSize() const;
DEVICEID_TYPE PrepareDevice(const DEVICEID_TYPE deviceId = -1) const;
private:
size_t m_totalBufferSizeAllocated;
size_t m_blockSize; //block size
ElemType *m_blockVal; //block values
size_t *m_blockIds; //block ids
size_t *m_rowToId; //the id showing the order row number is observed in the nnz values.
size_t m_expandedSize; // expanded label size
size_t* m_block2Id; // label block id to first word location
size_t* m_block2UniqId; // label block id to unique first word location
mutable void* m_tempHostBuffer; //used to copy values.
mutable size_t m_tempHostBufferSize;
static bool do_sync;
};
}}}
//
// <copyright file="GPUSparseMatrix.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
#include "GPUMatrix.h"
#include "CPUSparseMatrix.h"
#include <functional>
namespace Microsoft { namespace MSR { namespace CNTK {
//GPU Sparse Matrix, using cuSPARSE library.
//By default we are assuming CSR representation
// NOTE m_elemSizeAllocated (in base matrix) means the number of non-zero elements we have allocated space
// We are packing the CSR format (pointed to by m_pArray) as follows:
// ElemType elements[m_elemSizeAllocated]
// int colIdx[m_elemSizeAllocated]
// int rowIdxStart[m_numRows+1]
template<class ElemType>
class MATH_API GPUSparseMatrix : public BaseMatrix<ElemType>
{
typedef BaseMatrix<ElemType> B; using B::m_numRows; using B::m_numCols; using B::m_pArray; using B::m_elemSizeAllocated; using B::m_nz; using B::m_format; // without this, base members would require to use thi-> in GCC
public:
GPUSparseMatrix(const size_t numRows, const size_t numCols, const size_t numNZ, const MatrixFormat matrixFormat = MatrixFormat::matrixFormatSparseCSR, const DEVICEID_TYPE computeDevice = AUTOPLACEMATRIX);
GPUSparseMatrix(const MatrixFormat matrixFormat = MatrixFormat::matrixFormatSparseCSR,
const DEVICEID_TYPE computeDevice = AUTOPLACEMATRIX);
GPUSparseMatrix(const GPUSparseMatrix<ElemType>&);
GPUSparseMatrix(const GPUMatrix<ElemType>&, const MatrixFormat matrixFormat = MatrixFormat::matrixFormatSparseCSR);
#ifndef LINUX
GPUSparseMatrix(GPUSparseMatrix<ElemType>&&);
#endif /* LINUX */
~GPUSparseMatrix();
public:
void Reset();
public:
// return col pointer, which is immediately following the non-zero element
// in memory format is always in the following order:
// Non-zero data elements, Full index locations, compressed index locations
// In CSR row data is compressed, in CSC col data is compressed
inline const ElemType* NzValues() const {return m_pArray;}
inline ElemType* NzValues() {return m_pArray;}
inline size_t NzSize() const {return sizeof(ElemType)*m_nz;} // actual number of element bytes in use
GPUSPARSE_INDEX_TYPE* MajorIndexLocation() const { return (GPUSPARSE_INDEX_TYPE*)(m_pArray + m_elemSizeAllocated); } //this is the major index, row/col ids in CSC/CSR format
size_t MajorIndexCount() const { return m_nz; }
size_t MajorIndexSize() const { return sizeof(GPUSPARSE_INDEX_TYPE)*MajorIndexCount(); } // actual number of major index bytes in use
GPUSPARSE_INDEX_TYPE* SecondaryIndexLocation() const { return MajorIndexLocation() + m_elemSizeAllocated; } //this is the compressed index, col/row in CSC/CSR format
size_t SecondaryIndexCount(const size_t numNZ) const
{
if (m_format&matrixFormatCompressed)
{
size_t cnt = (m_format&matrixFormatRowMajor)?m_numRows:m_numCols;
if (cnt > 0) cnt++; // add an extra element on the end for the "max" value
return cnt;
}
else
return numNZ; // COO format
}
size_t SecondaryIndexCount() const
{
return SecondaryIndexCount(m_nz);
}
// get size for compressed index
size_t SecondaryIndexSize() const { return (SecondaryIndexCount())*sizeof(GPUSPARSE_INDEX_TYPE); }
size_t BufferSizeNeeded() const { return NzSize() + MajorIndexSize() + SecondaryIndexSize(); }
size_t BufferSizeNeeded(const size_t numNZ) const
{ return sizeof(ElemType)*numNZ + sizeof(GPUSPARSE_INDEX_TYPE)*(numNZ + SecondaryIndexCount(numNZ)); }
inline size_t BufferSizeAllocated() const { return m_totalBufferSizeAllocated; }
inline ElemType* BufferPointer() const { return m_pArray; }
// the column and row locations will swap based on what format we are in. Full index always follows the data array
GPUSPARSE_INDEX_TYPE* RowLocation() const { return (m_format&matrixFormatRowMajor) ? SecondaryIndexLocation() : MajorIndexLocation(); }
size_t RowSize() const {return (m_format&matrixFormatRowMajor)?SecondaryIndexSize():MajorIndexSize();}
GPUSPARSE_INDEX_TYPE* ColLocation() const { return (m_format&matrixFormatRowMajor) ? MajorIndexLocation() : SecondaryIndexLocation(); }
size_t ColSize() const {return (m_format&matrixFormatRowMajor)?MajorIndexSize():SecondaryIndexSize();} // actual number of bytes in use
void SetValue(const GPUSparseMatrix<ElemType>& deepCopyFrom);
void SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom);
void SetValue(const GPUMatrix<ElemType>& denseMatrix, const MatrixFormat matrixFormat);
void SetValue(const GPUMatrix<ElemType>& denseMatrix);
void ResizeAsAndCopyIndexFrom(const GPUSparseMatrix<ElemType>& a, const bool growOnly = true);
void Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat matrixFormat, const bool growOnly = true); //matrix format will affect the size to allocate
void Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const bool growOnly = true);
GPUSparseMatrix<ElemType> Transpose() const;
void InplaceTranspose();
GPUSparseMatrix<ElemType>& AssignTransposeOf(const GPUSparseMatrix<ElemType>& a);
GPUMatrix<ElemType> CopyToDenseMatrix() const;
void CopyToDenseMatrix(GPUMatrix<ElemType> &denseMatrix) const;
void CopyToCPUSparseMatrix(CPUSparseMatrix<ElemType> &cpuSparseMatrix) const;
void ChangeDeviceTo(DEVICEID_TYPE toId);
GPUSparseMatrix<ElemType>& operator=(const GPUSparseMatrix<ElemType>& deepCopy);
#ifndef LINUX
GPUSparseMatrix<ElemType>& operator=(GPUSparseMatrix<ElemType>&& moveFrom);
#endif /* LINUX */
GPUSparseMatrix<ElemType> operator+ (const GPUSparseMatrix<ElemType>& a) const;
GPUSparseMatrix<ElemType> operator- (const GPUSparseMatrix<ElemType>& a) const;
GPUSparseMatrix<ElemType>& operator^= (const ElemType alpha); //element-wise power
GPUSparseMatrix<ElemType> operator^ (const ElemType alpha) const; //element-wise power
GPUSparseMatrix<ElemType>& operator*= (const ElemType alpha);
GPUSparseMatrix<ElemType> operator*(const ElemType alpha) const;
GPUSparseMatrix<ElemType>& AssignElementPowerOf(const GPUSparseMatrix<ElemType>& a, const ElemType power);
bool IsEqualTo(const GPUSparseMatrix<ElemType>& a, const ElemType threshold = 1e-8) const;
bool IsEqualTo(const GPUMatrix<ElemType>& a, const ElemType threshold = 1e-8) const;
public:
virtual DEVICEID_TYPE GetComputeDeviceId(void) const;
inline size_t GetNumNZElements() const {return m_nz;}
//Sets sparse matrix in CSR format. this acts as deep copy
void SetMatrixFromCSRFormat(const GPUSPARSE_INDEX_TYPE *h_CSRRow, const GPUSPARSE_INDEX_TYPE *h_Col, const ElemType *h_Val,
const size_t nz, const size_t numRows, const size_t numCols, const bool IsOnDevice = false, const DEVICEID_TYPE devId = -1);
void SetMatrixFromCSCFormat(const GPUSPARSE_INDEX_TYPE *h_CSCCol, const GPUSPARSE_INDEX_TYPE *h_Row, const ElemType *h_Val,
const size_t nz, const size_t numRows, const size_t numCols, const bool IsOnDevice = false, const DEVICEID_TYPE devId = -1);
void SetMatrixFromLabelAndClass(CPUSPARSE_INDEX_TYPE *h_row, size_t *h_block2Id, size_t *h_block2UniqId, size_t labelSize, size_t expandedSize, size_t blockSize);
//Gets sparse matrix in CSR format. this acts as deep copy. All passed pointers must be NULL. the function will allocate memory itself.
void GetMatrixFromCSRFormat(GPUSPARSE_INDEX_TYPE*& h_CSRRow, GPUSPARSE_INDEX_TYPE*& h_Col, ElemType*& h_Val, size_t &nz, size_t &numRows, size_t &numCols) const;
void GetMatrixFromCSCFormat(GPUSPARSE_INDEX_TYPE*& h_CSCCol, GPUSPARSE_INDEX_TYPE*& h_Row, ElemType*& h_Val, size_t &nz, size_t &numRows, size_t &numCols) const;
void ConvertToSparseFormat(MatrixFormat newFormat);
void ConvertToSparseFormat(MatrixFormat newFormat, GPUSparseMatrix<ElemType>& outMatrix) const;
public:
GPUSparseMatrix<ElemType>& ElementInverse ();
GPUSparseMatrix<ElemType>& AssignElementInverseOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceLinearRectifierDerivative();
GPUSparseMatrix<ElemType>& AssignLinearRectifierDerivativeOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceSigmoid ();
GPUSparseMatrix<ElemType>& AssignSigmoidOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceTanh ();
GPUSparseMatrix<ElemType>& AssignTanhOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceSqrt ();
GPUSparseMatrix<ElemType>& AssignSqrtOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceExp ();
GPUSparseMatrix<ElemType>& AssignExpOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceLog ();
GPUSparseMatrix<ElemType>& AssignLogOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceAbs ();
GPUSparseMatrix<ElemType>& AssignAbsOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceTruncate (const ElemType threshold);
GPUSparseMatrix<ElemType>& InplaceTruncateBottom (const ElemType threshold);
GPUSparseMatrix<ElemType>& AssignTruncateBottomOf (const GPUSparseMatrix<ElemType>& a, const ElemType threshold);
GPUSparseMatrix<ElemType>& InplaceTruncateTop (const ElemType threshold);
GPUSparseMatrix<ElemType>& AssignTruncateTopOf (const GPUSparseMatrix<ElemType>& a, const ElemType threshold);
GPUSparseMatrix<ElemType>& SetToZeroIfAbsLessThan (const ElemType threshold);
ElemType SumOfElements () const; //sum of all elements
ElemType SumOfAbsElements () const; //sum of all abs(elements)
ElemType FrobeniusNorm() const;
ElemType MatrixNormInf() const;
ElemType MatrixNorm1() const;
ElemType MatrixNorm0() const { return (ElemType)GetNumNZElements(); };
public:
//Performs C = alpha op ( S ) D + beta C; Where S is sparse and D and C are dense
static void MultiplyAndWeightedAdd(ElemType alpha, const GPUMatrix<ElemType>& a, const bool transposeA, const GPUSparseMatrix<ElemType>& b,
const bool transposeB, ElemType beta, GPUMatrix<ElemType>& c);
static void MultiplyAndWeightedAdd(ElemType alpha, const GPUSparseMatrix<ElemType>& S, const bool transposeS, const GPUMatrix<ElemType>& D,
const bool transposeD, ElemType beta, GPUMatrix<ElemType>& C);
static void MultiplyAndAdd(ElemType alpha, const GPUMatrix<ElemType>& lhs, const bool transposeA, const GPUSparseMatrix<ElemType>& rhs,
const bool transposeB, GPUSparseMatrix<ElemType>& c);
static void ScaleAndAdd(const ElemType alpha, const GPUSparseMatrix<ElemType>& lhs, GPUMatrix<ElemType>& c);
static void ClassEntropy(const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& weight,
const GPUSparseMatrix<ElemType> & label, const GPUMatrix<ElemType>& cls,
const GPUMatrix<ElemType>& idx2cls, GPUSparseMatrix<ElemType>& etp, GPUMatrix<ElemType>& entropyScore);
static void ClassEntropyError(GPUSparseMatrix<ElemType>& a);
static void ClassEntropyGradientOfInput(const GPUSparseMatrix<ElemType>& error, const GPUMatrix<ElemType>& weight, GPUMatrix<ElemType>& grd);
static void ClassEntropyGradientOfWeight(const GPUSparseMatrix<ElemType>& error, const GPUMatrix<ElemType>& input, const GPUSparseMatrix<ElemType> & label, const GPUMatrix<ElemType>& cls,
const GPUMatrix<ElemType>& idx2cls, GPUSparseMatrix<ElemType>& grd);
void NormalGrad(GPUMatrix<ElemType>& c, const ElemType momentum);
static void Multiply(const GPUSparseMatrix<ElemType>& S, const GPUMatrix<ElemType>& D, GPUMatrix<ElemType>& C);
static void Multiply(const GPUMatrix<ElemType>& D, const GPUSparseMatrix<ElemType>& S, GPUMatrix<ElemType>& C);
static void Multiply(const GPUSparseMatrix<ElemType>& S1, bool transposeS1, const GPUSparseMatrix<ElemType>& S2, bool transposeS2, GPUSparseMatrix<ElemType> &C);
GPUSparseMatrix<ElemType>& AssignProductOf(const GPUSparseMatrix<ElemType>& a, const bool transposeA, const GPUSparseMatrix<ElemType>& b, const bool transposeB);
static ElemType InnerProductOfMatrices(const GPUSparseMatrix<ElemType>& a, const GPUMatrix<ElemType>& b);
static ElemType InnerProductOfMatrices(const GPUMatrix<ElemType>& a, const GPUSparseMatrix<ElemType>& b);
static void ScaleAndAdd(ElemType alpha,const GPUSparseMatrix<ElemType>& a, ElemType beta, const GPUSparseMatrix<ElemType>& b, GPUSparseMatrix<ElemType>& c);
static void ScaleAndAdd(ElemType alpha,const GPUSparseMatrix<ElemType>& a, ElemType beta, const GPUMatrix<ElemType>& b, GPUMatrix<ElemType>& c);
static void ScaleAndAdd(ElemType alpha,const GPUMatrix<ElemType>& a, ElemType beta, const GPUSparseMatrix<ElemType>& b, GPUMatrix<ElemType>& c);
static void Scale(ElemType alpha, GPUSparseMatrix<ElemType>& a);
static void ElementWisePower (ElemType alpha, const GPUSparseMatrix<ElemType>& a, GPUSparseMatrix<ElemType>& c);
static bool AreEqual(const GPUSparseMatrix<ElemType>& a, const GPUSparseMatrix<ElemType>& b, const ElemType threshold = 1e-8);
static bool AreEqual(const GPUSparseMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, const ElemType threshold = 1e-8);
static bool AreEqual(const GPUMatrix<ElemType>& a, const GPUSparseMatrix<ElemType>& b, const ElemType threshold = 1e-8);
//For these two, I should also add a version which would return GPUSparseMatrix, since Dense.*Sparse =Sparse.*Dense=Sparse
static GPUMatrix<ElemType> ElementProductOf (const GPUSparseMatrix<ElemType>& a, const GPUMatrix<ElemType>& b);
static GPUMatrix<ElemType> ElementProductOf (const GPUMatrix<ElemType>& a, const GPUSparseMatrix<ElemType>& b);
public:
// See: http://stackoverflow.com/questions/4660123/overloading-friend-operator-for-template-class/4661372#4661372
template <class ElemTypeDummy>
friend MATH_API File& operator>>(File& stream, GPUSparseMatrix<ElemTypeDummy>& us);
template <class ElemTypeDummy>
friend MATH_API File& operator<<(File& stream, const GPUSparseMatrix<ElemTypeDummy>& us);
private:
void* ReserveTempHostBuffer(const size_t sizeInByte) const;
template <class OutType, class InType>
static void CopyBuffer(OutType * outBuffer, const InType * inBuffer, const size_t size);
private:
void ZeroInit(const MatrixFormat matrixFormat, const DEVICEID_TYPE deviceId);
private:
void performInplaceFunction(const int kind);
void DeepCopy(const GPUSparseMatrix<ElemType>& deepCopyFrom);
void Clear();
void PrepareBuffer(const size_t numRows, const size_t numCols, const bool canReuseBuffer, std::function<size_t(GPUSPARSE_INDEX_TYPE* csrRowPtrC)> func);
size_t ElemCountFromBufferSize(const size_t totalBufferSize) const;
size_t ElemCountFromBufferSize() const;
DEVICEID_TYPE PrepareDevice(const DEVICEID_TYPE deviceId = -1) const;
private:
size_t m_totalBufferSizeAllocated;
size_t m_blockSize; //block size
size_t *m_blockIds; //block ids
size_t *m_rowToId; //the id showing the order row number is observed in the nnz values.
size_t m_expandedSize; // expanded label size
size_t* m_block2Id; // label block id to first word location
size_t* m_block2UniqId; // label block id to unique first word location
mutable void* m_tempHostBuffer; //used to copy values.
mutable size_t m_tempHostBufferSize;
static bool do_sync;
};
}}}

Просмотреть файл

@ -733,6 +733,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (m_CPUSparseMatrix == nullptr)
{
m_CPUSparseMatrix = new CPUSparseMatrix<ElemType>(newMatrixFormat);
if (GetMatrixType() == MatrixType::DENSE && m_CPUMatrix != nullptr)
{
m_CPUSparseMatrix->Resize(GetNumRows(), GetNumCols());
CopyElementsFromDenseToSparse(*m_CPUMatrix, *m_CPUSparseMatrix);
}
else
{
// TODO: Assign Sparse from Sparse!
}
delete m_CPUMatrix;
m_CPUMatrix = nullptr;
}
@ -801,6 +812,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
template<class ElemType>
void Matrix<ElemType>::CopyElementsFromDenseToSparse(CPUMatrix<ElemType>& from, CPUSparseMatrix<ElemType>& dest)
{
foreach_coord(row, col, from)
{
auto val = from(row, col);
dest.SetValue(row, col, val);
}
}
template<class ElemType>
ElemType Matrix<ElemType>::Get00Element() const
@ -3992,7 +4012,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
nullptr,
return CPUMatrix<ElemType>::AreEqual(*a.m_CPUMatrix, *b.m_CPUMatrix, threshold),
return GPUMatrix<ElemType>::AreEqual(*a.m_GPUMatrix, *b.m_GPUMatrix, threshold),
NOT_IMPLEMENTED; return false ,
return CPUSparseMatrix<ElemType>::AreEqual(*a.m_CPUSparseMatrix, *b.m_CPUSparseMatrix, threshold),
return GPUSparseMatrix<ElemType>::AreEqual(*a.m_GPUSparseMatrix, *b.m_GPUSparseMatrix, threshold)
);
}

Просмотреть файл

@ -1,439 +1,440 @@
//
// <copyright file="Matrix.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
#include "CPUMatrix.h"
#include "CPUSparseMatrix.h"
#include "GPUMatrix.h"
#include "GPUSparseMatrix.h"
// This class is exported from the Math.dll
namespace Microsoft { namespace MSR { namespace CNTK {
enum CurrentDataLocation
{
NONE, CPU, GPU, BOTH
};
enum MatrixType
{
UNDETERMINED, DENSE, SPARSE
};
//To compy with BLAS libraries matrices are stored in ColMajor. However, by default C/C++/C# use RowMajor
//convertion is need when passing data between Matrix and C++ matrices
//For the best performance compile CNTKMath project with NO_SYNC preprocessor directive
//!!!WARNING!!! This class is NOT THREAD SAFE. Test and add necessary modifications if using in multi-threaded environment
template<class ElemType>
class MATH_API Matrix
{
private:
mutable BaseMatrix<ElemType> *m_baseMatrix;
mutable GPUMatrix<ElemType> *m_GPUMatrix;
mutable CPUMatrix<ElemType> *m_CPUMatrix;
mutable GPUSparseMatrix<ElemType> *m_GPUSparseMatrix;
mutable CPUSparseMatrix<ElemType> *m_CPUSparseMatrix;
mutable MatrixType m_matrixType;
mutable CurrentDataLocation m_currentDataLocation; //Indicates which matrix is current
mutable DEVICEID_TYPE m_preferredDeviceId;
//Moves matrix from device id_from to device with id_to. This method doesn't change preferred device Id
void _transferFromDeviceToDevice(int id_from, int id_to, bool ismoved=true,bool emptyTransfer=false) const;
//Moves matrix from current device to device with id_to. This method doesn't change preferred device Id
void _transferToDevice(int id_to, bool ismoved=true, bool emptyTransfer=false) const;
static void DecideAndMoveToRightDevice(const Matrix<ElemType>& a, const Matrix<ElemType>& b);
static void DecideAndMoveToRightDevice(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c);
public:
//Constructors, destructors and other static matrix builders
//Each constructor can take deviceId as parameter.
//If deviceId<0 then the matrix will be based in RAM (CPUMatrix)
//Elseif deviceId>=0 and <AUTOPLACEMATRIX, then the matrix will be based on GPU with specified deviceId
//Else (default) if deviceId=AUTOPLACEMATRIX, the class will try to place itself on the best GPU, if fails it will go to CPU
//The default behaiviour should be deviceId=AUTOPLACEMATRIX
Matrix(DEVICEID_TYPE deviceId=AUTOPLACEMATRIX);
Matrix(BaseMatrix<ElemType>* baseMatrix, ElemType *pArray, DEVICEID_TYPE deviceId); // constructor for setting Matrix from a base matrix (externally managed butter pArray)
Matrix(FILE* f, const char * matrixName, DEVICEID_TYPE deviceId=AUTOPLACEMATRIX, const MatrixType matrixType = DENSE); //matrixName is used to verify that correct matrix is read.
Matrix(const size_t numRows, const size_t numCols, DEVICEID_TYPE deviceId=AUTOPLACEMATRIX, const MatrixType matrixType = DENSE);
Matrix(const size_t numRows, const size_t numCols, ElemType *pArray, const size_t matrixFlags=matrixFlagNormal, DEVICEID_TYPE deviceId=AUTOPLACEMATRIX, const size_t nnz=0);
Matrix(const Matrix<ElemType>& deepCopyFrom, DEVICEID_TYPE deviceId=AUTOPLACEMATRIX); //copy constructor, deep copy
Matrix<ElemType>& operator=(const Matrix<ElemType>& deepCopyFrom); //assignment operator, deep copy
Matrix(Matrix<ElemType>&& moveFrom); //move constructor, shallow copy
Matrix<ElemType>& operator=(Matrix<ElemType>&& moveFrom); //move coment operator, shallow copy
static Matrix<ElemType> Ones(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId=AUTOPLACEMATRIX);
static Matrix<ElemType> Zeros(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId=AUTOPLACEMATRIX);
static Matrix<ElemType> Eye(const size_t rows, DEVICEID_TYPE deviceId=AUTOPLACEMATRIX);
static Matrix<ElemType> RandomUniform(const size_t rows, const size_t cols, const ElemType low, const ElemType high, unsigned long seed=USE_TIME_BASED_SEED, DEVICEID_TYPE deviceId=AUTOPLACEMATRIX);
static Matrix<ElemType> RandomGaussian(const size_t rows, const size_t cols, const ElemType mean, const ElemType sigma, unsigned long seed=USE_TIME_BASED_SEED, DEVICEID_TYPE deviceId=AUTOPLACEMATRIX);
void Clear();
~Matrix();
private:
Matrix(const MatrixFlags matrixFlags, const MatrixType matrixType, const MatrixFormat matrixFormat, DEVICEID_TYPE deviceID); //only used internally to initialize a blank matrix
Matrix(const MatrixFlags matrixFlags, const MatrixType matrixType, DEVICEID_TYPE deviceID); //only used internally to initialize a blank matrix
Matrix(const MatrixFlags matrixFlags, DEVICEID_TYPE deviceID); //only used internally to initialize a blank matrix
void Init(DEVICEID_TYPE deviceID); //only used internally to initialize a blank matrix
void SetDataLocation(CurrentDataLocation location, MatrixType type=UNDETERMINED) const;
public:
MatrixType GetMatrixType() const {return m_matrixType;};
bool OwnBuffer() const {return m_baseMatrix->OwnBuffer();}
int GetDeviceId() const; //-1 if CPU, otherwise GPU CUDA device id
DEVICEID_TYPE GetPreferredDeviceId() const { return m_preferredDeviceId; }; //-1 if CPU, otherwise GPU CUDA device id
void SetPreferredDeviceId(DEVICEID_TYPE preferredDeviceId){ if (m_preferredDeviceId != preferredDeviceId) m_preferredDeviceId = preferredDeviceId; }
//Moves matrix from device id_from to device with id_to.
//If emptyTransfer=true, then no data is ever moved, just corresponding GPU/CPU matrices are deleted and then created using empty constructor
void TransferFromDeviceToDevice(int id_from, int id_to, bool ismoved=false, bool emptyTransfer=false, bool updatePreferredDevice=true) const;
CurrentDataLocation GetCurrentMatrixLocation() const { return m_currentDataLocation; };
void SwitchToMatrixType(MatrixType newMatrixType, MatrixFormat newMatrixFormat = matrixFormatSparseCSR); //sets matrix type between dense and sparse
size_t GetNumRows() const;
size_t GetNumCols() const;
size_t GetNumElements() const;
wchar_t* GetMatrixName() const;
void SetMatrixName(const wchar_t* s);
bool IsEmpty() const;
size_t BufferSize() const;
ElemType* BufferPointer() const;
size_t NzCount() const;
ElemType* CopyToArray() const; //allocated by the callee but need to be deleted by the caller
size_t CopyToArray(ElemType*& arrayCopyTo, size_t& currentArraySize) const; //allocated by the callee but need to be deleted by the caller
Matrix<ElemType> ColumnSlice(size_t startColumn, size_t numCols) const;
Matrix<ElemType>& AssignColumnSlice(const Matrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols);
void ShiftBy(int numShift) ;
void NormalGrad(Matrix<ElemType>& gradients, Matrix<ElemType>& functionValues, const ElemType learnRatePerSample, const ElemType momentum);
void Adagrad(Matrix<ElemType>& gradients);
void RmsProp(Matrix<ElemType>& gradients, ElemType RMS_GAMMA, ElemType RMS_WGT_INC, ElemType RMS_WGT_MAX, ElemType RMS_WGT_DEC, ElemType RMS_WGT_MIN);
void Reshape(const size_t numRows, const size_t numCols);
void Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve = 0, bool growOnly = true); //by default we only reallocate if need to grow
size_t GetAllocatedSize() const;
void Reset(); //reset for sparse matrix
const ElemType operator() (const size_t row, const size_t col) const;
ElemType& operator() (const size_t row, const size_t col);
ElemType Get00Element() const;
void SetValue(const ElemType v);
void SetValue(const DeviceBoundNumber<ElemType>& db_number);
void SetValue(const Matrix<ElemType>& deepCopyFrom, const MatrixFormat format=matrixFormatSparseCSR);
void SetValue(const size_t numRows, const size_t numCols, ElemType *pArray, const size_t matrixFlags=matrixFlagNormal, int deviceId=MANAGEDEXTERN);
void SetValue(const size_t rIdx, const size_t cIdx, ElemType val); // set matrix sparsely
void SetMatrixFromCSCFormat(const GPUSPARSE_INDEX_TYPE *h_CSCCol, const GPUSPARSE_INDEX_TYPE *h_Row, const ElemType *h_Val,
const size_t nz, const size_t numRows, const size_t numCols);
void SetMatrixFromLabelAndClass(CPUSPARSE_INDEX_TYPE *h_row, size_t *h_block2Id, size_t *h_block2UniqId, size_t labelSize, size_t expandedSize, size_t blockSize);
void SetColumn(const ElemType* colPointer, size_t colInd);
void SetColumn(const ElemType val, size_t colInd);
void SetColumn(const Matrix<ElemType>& valMat, size_t colInd);
void SetDiagonalValue(const ElemType v);
void SetDiagonalValue(Matrix<ElemType>& vector);
void SetUniformRandomValue(const ElemType low, const ElemType high, unsigned long seed=USE_TIME_BASED_SEED);
void SetGaussianRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed=USE_TIME_BASED_SEED);
void SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, unsigned long seed=USE_TIME_BASED_SEED);
void AddGaussianRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed=USE_TIME_BASED_SEED);
Matrix<ElemType> Transpose();
Matrix<ElemType>& AssignTransposeOf (const Matrix<ElemType>& a);
Matrix<ElemType>& operator+= (const ElemType alpha);
Matrix<ElemType> operator+ (const ElemType alpha) const;
Matrix<ElemType>& AssignSumOf(const ElemType alpha, const Matrix<ElemType>& a);
Matrix<ElemType>& operator+= (const Matrix<ElemType>& a);
Matrix<ElemType> operator+ (const Matrix<ElemType>& a) const;
Matrix<ElemType>& AssignSumOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b);
Matrix<ElemType>& operator-= (const ElemType alpha);
Matrix<ElemType> operator- (const ElemType alpha) const;
Matrix<ElemType>& AssignDifferenceOf(const ElemType alpha, const Matrix<ElemType>& a);
Matrix<ElemType>& AssignDifferenceOf(const Matrix<ElemType>& a, const ElemType alpha);
Matrix<ElemType>& operator-= (const Matrix<ElemType>& a);
Matrix<ElemType> operator- (const Matrix<ElemType>& a) const;
Matrix<ElemType>& AssignDifferenceOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b);
Matrix<ElemType>& operator*= (const ElemType alpha);
Matrix<ElemType> operator* (const ElemType alpha) const;
Matrix<ElemType>& AssignProductOf(const ElemType alpha, const Matrix<ElemType>& a);
Matrix<ElemType> operator* (const Matrix<ElemType>& a) const;
Matrix<ElemType>& AssignProductOf (const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB);
Matrix<ElemType>& operator/= (ElemType alpha);
Matrix<ElemType> operator/ (ElemType alpha) const;
Matrix<ElemType>& operator^= (ElemType alpha); //element-wise power
Matrix<ElemType> operator^ (ElemType alpha) const; //element-wise power
Matrix<ElemType>& AssignElementPowerOf(const Matrix<ElemType>& a, const ElemType power);
Matrix<ElemType>& ElementMultiplyWith (const Matrix<ElemType>& a);
Matrix<ElemType>& AssignElementProductOf (const Matrix<ElemType>& a, const Matrix<ElemType>& b);
Matrix<ElemType>& AddElementProductOf (const Matrix<ElemType>& a, const Matrix<ElemType>& b);
Matrix<ElemType>& AssignElementDivisionOf (const Matrix<ElemType>& a, const Matrix<ElemType>& b);
Matrix<ElemType>& ElementDivideBy(const Matrix<ElemType>& a);
Matrix<ElemType>& ColumnElementMultiplyWith(const Matrix<ElemType>& a);
Matrix<ElemType>& RowElementMultiplyWith(const Matrix<ElemType>& a);
Matrix<ElemType>& ColumnElementDivideBy(const Matrix<ElemType>& a);
Matrix<ElemType>& RowElementDivideBy(const Matrix<ElemType>& a);
Matrix<ElemType>& ElementInverse ();
Matrix<ElemType>& AssignElementInverseOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceLinearRectifierDerivative();
Matrix<ElemType>& AssignLinearRectifierDerivativeOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceSigmoidDerivative();
Matrix<ElemType>& AssignSigmoidDerivativeOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceSigmoid ();
Matrix<ElemType>& AssignSigmoidOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceTanh ();
Matrix<ElemType>& AssignTanhOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceLogSoftmax (const bool isColWise);
Matrix<ElemType>& AssignLogSoftmaxOf (const Matrix<ElemType>& a, const bool isColWise);
Matrix<ElemType>& InplaceSqrt ();
Matrix<ElemType>& AssignSqrtOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceExp ();
Matrix<ElemType>& AssignExpOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceLog ();
Matrix<ElemType>& AssignLogOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceCosine ();
Matrix<ElemType>& AssignCosineOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceNegativeSine ();
Matrix<ElemType>& AssignNegativeSineOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceLog10 ();
Matrix<ElemType>& AssignLog10Of (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceAbs ();
Matrix<ElemType>& AssignAbsOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceTruncateBottom (const ElemType threshold);
Matrix<ElemType>& AssignTruncateBottomOf (const Matrix<ElemType>& a, const ElemType threshold);
Matrix<ElemType>& InplaceTruncateTop (const ElemType threshold);
Matrix<ElemType>& AssignTruncateTopOf (const Matrix<ElemType>& a, const ElemType threshold);
Matrix<ElemType>& InplaceTruncate (const ElemType threshold);
Matrix<ElemType>& SetToZeroIfAbsLessThan (const ElemType threshold);
DeviceBoundNumber<ElemType> Sum_AsDeviceBoundNum() const;
ElemType SumOfAbsElements () const; //sum of all abs(elements)
ElemType SumOfElements () const; //sum of all elements
Matrix<ElemType>& AssignSumOfElements(const Matrix<ElemType>& a);
Matrix<ElemType>& AssignRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows);
Matrix<ElemType>& AddToRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows);
Matrix<ElemType>& AddWithRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows);
Matrix<ElemType>& AssignRepeatOf(const Matrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats);
bool IsEqualTo(const Matrix<ElemType>& a, const ElemType threshold = 1e-8) const;
void VectorNorm1(Matrix<ElemType>& c, const bool isColWise) const;
Matrix<ElemType>& AssignVectorNorm1Of(Matrix<ElemType>& a, const bool isColWise);
void VectorNorm2(Matrix<ElemType>& c, const bool isColWise) const;
Matrix<ElemType>& AssignVectorNorm2Of(Matrix<ElemType>& a, const bool isColWise);
void VectorNormInf(Matrix<ElemType>& c, const bool isColWise) const;
Matrix<ElemType>& AssignVectorNormInfOf(Matrix<ElemType>& a, const bool isColWise);
Matrix<ElemType>& AssignInnerProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const bool isColWise);
Matrix<ElemType>& AssignKhatriRaoProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b);
Matrix<ElemType>& AddColumnReshapeProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const bool transposeAColumn);
Matrix<ElemType>& AddWithScaleOf(ElemType alpha, const Matrix<ElemType>& a);
ElemType FrobeniusNorm() const;
Matrix<ElemType>& AssignFrobeniusNormOf(const Matrix<ElemType>& a);
ElemType MatrixNormInf() const;
ElemType MatrixNorm1() const;
ElemType MatrixNorm0() const; //number of non-zero elemets
Matrix<ElemType>& AssignSignOf(const Matrix<ElemType>& a);
Matrix<ElemType>& AddSignOf(const Matrix<ElemType>& a);
void VectorMax(Matrix<ElemType>& maxIndexes, Matrix<ElemType>& maxValues, const bool isColWise) const;
void VectorMin(Matrix<ElemType>& mainndexes, Matrix<ElemType>& minValues, const bool isColWise) const;
Matrix<ElemType>& AssignNumOfDiff(const Matrix<ElemType>& a, const Matrix<ElemType>& b);
Matrix<ElemType>& AssignInnerProductOfMatrices(const Matrix<ElemType>& a, const Matrix<ElemType>& b); //this method will resize(1,1) first
bool HasNan (const char * name) const;
size_t CountNanInf() const;
void Print(const char* matrixName, size_t rowStart, size_t rowEnd, size_t colStart, size_t colEnd) const;
void Print(const char* matrixName = nullptr) const; //print whole matrix. can be expensive
Matrix<ElemType>& AssignPackedConvolutionInput(const Matrix<ElemType>& inputSubBatch,
const size_t inputWidth, const size_t inputHeight, const size_t inputChannels,
const size_t outputWidth, const size_t outputHeight, const size_t outputChannels,
const size_t kernelWidth, const size_t kernelHeight, const size_t horizontalSubsample, const size_t verticalSubsample,
const bool zeroPadding = false);
Matrix<ElemType>& UnpackConvolutionInput(Matrix<ElemType>& inputSubBatch,
const size_t inputWidth, const size_t inputHeight, const size_t inputChannels,
const size_t outputWidth, const size_t outputHeight, const size_t outputChannels,
const size_t kernelWidth, const size_t kernelHeight, const size_t horizontalSubsample, const size_t verticalSubsample,
const bool zeroPadding = false) const;
Matrix<ElemType>& AssignMaxPoolingResult(const Matrix<ElemType>& inputBatch, const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample);
Matrix<ElemType>& AddMaxPoolingGradient(const Matrix<ElemType>& outputGradientBatch, const Matrix<ElemType>& inputBatch, const Matrix<ElemType>& outputBatch,
const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample);
Matrix<ElemType>& AssignAveragePoolingResult(const Matrix<ElemType>& inputBatch, const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample);
Matrix<ElemType>& AddAveragePoolingGradient(const Matrix<ElemType>& outputGradientBatch,
const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample);
public:
ElemType Exp10(ElemType num);
ElemType Mod(ElemType x , ElemType y);
ElemType LogAdd(ElemType x, ElemType y);
public:
static DEVICEID_TYPE GetBestGPUDeviceId(); //{ return GPUMatrix<ElemType>::GetBestGPUDeviceId();}
//static BLAS functions
// singular value decomposition of A as A = U*SIGMA*VT
static void SVD(const Matrix<ElemType>& A, Matrix<ElemType>& SIGMA, Matrix<ElemType>& U, Matrix<ElemType>& VT);
static void MultiplyAndWeightedAdd(ElemType alpha, const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB,
ElemType beta, Matrix<ElemType>& c);
static void MultiplyAndAdd(const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB, Matrix<ElemType>& c);
static void Multiply(const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB, Matrix<ElemType>& c);
static void Multiply(const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c);
static void ScaleAndAdd(ElemType alpha, const Matrix<ElemType>& a, Matrix<ElemType>& c);
static void ScaleAndAdd(ElemType alpha, const Matrix<ElemType>& a, ElemType beta, Matrix<ElemType>& c);
static void AddScaledDifference(const ElemType alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c);
static void AssignScaledDifference(const ElemType alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c);
static void AddScaledDifference(const Matrix<ElemType>& alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c);
static void AssignScaledDifference(const Matrix<ElemType>& alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c);
static void AddElementToElement(const Matrix<ElemType>& a, const size_t ai, const size_t aj, Matrix<ElemType>& c, const size_t ci, const size_t cj);
//static void AddLogElementToElement(const Matrix<ElemType>& a, const size_t ai, const size_t aj, Matrix<ElemType>& c, const size_t ci, const size_t cj);
static void AssignElementToElement(const Matrix<ElemType>& a, const size_t ai, const size_t aj, Matrix<ElemType>& c, const size_t ci, const size_t cj);
static void Scale(ElemType alpha, Matrix<ElemType>& a);
static void Scale(Matrix<ElemType>& alpha, Matrix<ElemType>& a); //In this case Matrix alpha must be 1x1
static void Scale(ElemType alpha, const Matrix<ElemType>& a, Matrix<ElemType>& c);
static void InnerProduct (const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c, const bool isColWise);
static ElemType InnerProductOfMatrices(const Matrix<ElemType>& a, const Matrix<ElemType>& b);
static void ElementWisePower (ElemType alpha, const Matrix<ElemType>& a, Matrix<ElemType>& c);
static bool AreEqual(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const ElemType threshold = 1e-8);
public:
friend File& operator>>(File& stream, Matrix<ElemType>& M)
{
char type;
stream>>type;
if (type=='d')
{
if (M.GetDeviceId()<0)
{
if (M.m_CPUMatrix==NULL) M.m_CPUMatrix = new CPUMatrix<ElemType>();
stream>>(*M.m_CPUMatrix);
M.SetDataLocation(CPU, DENSE);
}
else
{
if (M.m_GPUMatrix==NULL) M.m_GPUMatrix = new GPUMatrix<ElemType>();
stream>>(*M.m_GPUMatrix);
M.SetDataLocation(GPU, DENSE);
}
}
else if (type=='s')
{
if (M.GetDeviceId()<0)
{
NOT_IMPLEMENTED;//You might want to tranfer your matrix to GPU
}
else
{
if (M.m_GPUSparseMatrix==NULL) M.m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>();
stream>>(*M.m_GPUSparseMatrix);
M.SetDataLocation(GPU, SPARSE);
}
}
else
LogicError("wrong matrix type!");
return stream;
}
friend File& operator<<(File& stream, const Matrix<ElemType>& M)
{
if (M.GetMatrixType()==MatrixType::DENSE)
{
stream<<'d';
if (M.GetDeviceId()<0)
{
stream<<(*M.m_CPUMatrix);
}
else
{
stream<<(*M.m_GPUMatrix);
}
}
else
{
stream<<'s';
if (M.GetDeviceId()<0)
{
NOT_IMPLEMENTED;
//stream<<(*M.m_CPUMatrix);
}
else
{
stream<<(*M.m_GPUSparseMatrix);
}
}
return stream;
}
public:
static void ClassEntropy(const Matrix<ElemType>& a, const Matrix<ElemType>& weight,
const Matrix<ElemType> & label, const Matrix<ElemType>* cls,
const Matrix<ElemType>* idx2cls, Matrix<ElemType>& etp, Matrix<ElemType>& entropyScore);
static void ClassEntropyError(const Matrix<ElemType>& a);
static void ClassEntropyGradientOfInput(const Matrix<ElemType>& error, const Matrix<ElemType>& weight, Matrix<ElemType>& grd);
static void ClassEntropyGradientOfWeight(
const Matrix<ElemType>& error,
const Matrix<ElemType>& input,
const Matrix<ElemType>& weight,
const Matrix<ElemType> & label,
const Matrix<ElemType>* cls,
const Matrix<ElemType>* idx2cls,
Matrix<ElemType>& grd);
};
typedef Matrix<float> SingleMatrix;
typedef Matrix<double> DoubleMatrix;
}}}
//
// <copyright file="Matrix.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
#include "CPUMatrix.h"
#include "CPUSparseMatrix.h"
#include "GPUMatrix.h"
#include "GPUSparseMatrix.h"
// This class is exported from the Math.dll
namespace Microsoft { namespace MSR { namespace CNTK {
enum CurrentDataLocation
{
NONE, CPU, GPU, BOTH
};
enum MatrixType
{
UNDETERMINED, DENSE, SPARSE
};
//To compy with BLAS libraries matrices are stored in ColMajor. However, by default C/C++/C# use RowMajor
//convertion is need when passing data between Matrix and C++ matrices
//For the best performance compile CNTKMath project with NO_SYNC preprocessor directive
//!!!WARNING!!! This class is NOT THREAD SAFE. Test and add necessary modifications if using in multi-threaded environment
template<class ElemType>
class MATH_API Matrix
{
private:
mutable BaseMatrix<ElemType> *m_baseMatrix;
mutable GPUMatrix<ElemType> *m_GPUMatrix;
mutable CPUMatrix<ElemType> *m_CPUMatrix;
mutable GPUSparseMatrix<ElemType> *m_GPUSparseMatrix;
mutable CPUSparseMatrix<ElemType> *m_CPUSparseMatrix;
mutable MatrixType m_matrixType;
mutable CurrentDataLocation m_currentDataLocation; //Indicates which matrix is current
mutable DEVICEID_TYPE m_preferredDeviceId;
//Moves matrix from device id_from to device with id_to. This method doesn't change preferred device Id
void _transferFromDeviceToDevice(int id_from, int id_to, bool ismoved=true,bool emptyTransfer=false) const;
//Moves matrix from current device to device with id_to. This method doesn't change preferred device Id
void _transferToDevice(int id_to, bool ismoved=true, bool emptyTransfer=false) const;
static void DecideAndMoveToRightDevice(const Matrix<ElemType>& a, const Matrix<ElemType>& b);
static void DecideAndMoveToRightDevice(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c);
static void CopyElementsFromDenseToSparse(CPUMatrix<ElemType>& from, CPUSparseMatrix<ElemType>& dest);
public:
//Constructors, destructors and other static matrix builders
//Each constructor can take deviceId as parameter.
//If deviceId<0 then the matrix will be based in RAM (CPUMatrix)
//Elseif deviceId>=0 and <AUTOPLACEMATRIX, then the matrix will be based on GPU with specified deviceId
//Else (default) if deviceId=AUTOPLACEMATRIX, the class will try to place itself on the best GPU, if fails it will go to CPU
//The default behaiviour should be deviceId=AUTOPLACEMATRIX
Matrix(DEVICEID_TYPE deviceId=AUTOPLACEMATRIX);
Matrix(BaseMatrix<ElemType>* baseMatrix, ElemType *pArray, DEVICEID_TYPE deviceId); // constructor for setting Matrix from a base matrix (externally managed butter pArray)
Matrix(FILE* f, const char * matrixName, DEVICEID_TYPE deviceId=AUTOPLACEMATRIX, const MatrixType matrixType = DENSE); //matrixName is used to verify that correct matrix is read.
Matrix(const size_t numRows, const size_t numCols, DEVICEID_TYPE deviceId=AUTOPLACEMATRIX, const MatrixType matrixType = DENSE);
Matrix(const size_t numRows, const size_t numCols, ElemType *pArray, const size_t matrixFlags=matrixFlagNormal, DEVICEID_TYPE deviceId=AUTOPLACEMATRIX, const size_t nnz=0);
Matrix(const Matrix<ElemType>& deepCopyFrom, DEVICEID_TYPE deviceId=AUTOPLACEMATRIX); //copy constructor, deep copy
Matrix<ElemType>& operator=(const Matrix<ElemType>& deepCopyFrom); //assignment operator, deep copy
Matrix(Matrix<ElemType>&& moveFrom); //move constructor, shallow copy
Matrix<ElemType>& operator=(Matrix<ElemType>&& moveFrom); //move coment operator, shallow copy
static Matrix<ElemType> Ones(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId=AUTOPLACEMATRIX);
static Matrix<ElemType> Zeros(const size_t rows, const size_t cols, DEVICEID_TYPE deviceId=AUTOPLACEMATRIX);
static Matrix<ElemType> Eye(const size_t rows, DEVICEID_TYPE deviceId=AUTOPLACEMATRIX);
static Matrix<ElemType> RandomUniform(const size_t rows, const size_t cols, const ElemType low, const ElemType high, unsigned long seed=USE_TIME_BASED_SEED, DEVICEID_TYPE deviceId=AUTOPLACEMATRIX);
static Matrix<ElemType> RandomGaussian(const size_t rows, const size_t cols, const ElemType mean, const ElemType sigma, unsigned long seed=USE_TIME_BASED_SEED, DEVICEID_TYPE deviceId=AUTOPLACEMATRIX);
void Clear();
~Matrix();
private:
Matrix(const MatrixFlags matrixFlags, const MatrixType matrixType, const MatrixFormat matrixFormat, DEVICEID_TYPE deviceID); //only used internally to initialize a blank matrix
Matrix(const MatrixFlags matrixFlags, const MatrixType matrixType, DEVICEID_TYPE deviceID); //only used internally to initialize a blank matrix
Matrix(const MatrixFlags matrixFlags, DEVICEID_TYPE deviceID); //only used internally to initialize a blank matrix
void Init(DEVICEID_TYPE deviceID); //only used internally to initialize a blank matrix
void SetDataLocation(CurrentDataLocation location, MatrixType type=UNDETERMINED) const;
public:
MatrixType GetMatrixType() const {return m_matrixType;};
bool OwnBuffer() const {return m_baseMatrix->OwnBuffer();}
int GetDeviceId() const; //-1 if CPU, otherwise GPU CUDA device id
DEVICEID_TYPE GetPreferredDeviceId() const { return m_preferredDeviceId; }; //-1 if CPU, otherwise GPU CUDA device id
void SetPreferredDeviceId(DEVICEID_TYPE preferredDeviceId){ if (m_preferredDeviceId != preferredDeviceId) m_preferredDeviceId = preferredDeviceId; }
//Moves matrix from device id_from to device with id_to.
//If emptyTransfer=true, then no data is ever moved, just corresponding GPU/CPU matrices are deleted and then created using empty constructor
void TransferFromDeviceToDevice(int id_from, int id_to, bool ismoved=false, bool emptyTransfer=false, bool updatePreferredDevice=true) const;
CurrentDataLocation GetCurrentMatrixLocation() const { return m_currentDataLocation; };
void SwitchToMatrixType(MatrixType newMatrixType, MatrixFormat newMatrixFormat = matrixFormatSparseCSR); //sets matrix type between dense and sparse
size_t GetNumRows() const;
size_t GetNumCols() const;
size_t GetNumElements() const;
wchar_t* GetMatrixName() const;
void SetMatrixName(const wchar_t* s);
bool IsEmpty() const;
size_t BufferSize() const;
ElemType* BufferPointer() const;
size_t NzCount() const;
ElemType* CopyToArray() const; //allocated by the callee but need to be deleted by the caller
size_t CopyToArray(ElemType*& arrayCopyTo, size_t& currentArraySize) const; //allocated by the callee but need to be deleted by the caller
Matrix<ElemType> ColumnSlice(size_t startColumn, size_t numCols) const;
Matrix<ElemType>& AssignColumnSlice(const Matrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols);
void ShiftBy(int numShift) ;
void NormalGrad(Matrix<ElemType>& gradients, Matrix<ElemType>& functionValues, const ElemType learnRatePerSample, const ElemType momentum);
void Adagrad(Matrix<ElemType>& gradients);
void RmsProp(Matrix<ElemType>& gradients, ElemType RMS_GAMMA, ElemType RMS_WGT_INC, ElemType RMS_WGT_MAX, ElemType RMS_WGT_DEC, ElemType RMS_WGT_MIN);
void Reshape(const size_t numRows, const size_t numCols);
void Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve = 0, bool growOnly = true); //by default we only reallocate if need to grow
size_t GetAllocatedSize() const;
void Reset(); //reset for sparse matrix
const ElemType operator() (const size_t row, const size_t col) const;
ElemType& operator() (const size_t row, const size_t col);
ElemType Get00Element() const;
void SetValue(const ElemType v);
void SetValue(const DeviceBoundNumber<ElemType>& db_number);
void SetValue(const Matrix<ElemType>& deepCopyFrom, const MatrixFormat format=matrixFormatSparseCSR);
void SetValue(const size_t numRows, const size_t numCols, ElemType *pArray, const size_t matrixFlags=matrixFlagNormal, int deviceId=MANAGEDEXTERN);
void SetValue(const size_t rIdx, const size_t cIdx, ElemType val); // set matrix sparsely
void SetMatrixFromCSCFormat(const GPUSPARSE_INDEX_TYPE *h_CSCCol, const GPUSPARSE_INDEX_TYPE *h_Row, const ElemType *h_Val,
const size_t nz, const size_t numRows, const size_t numCols);
void SetMatrixFromLabelAndClass(CPUSPARSE_INDEX_TYPE *h_row, size_t *h_block2Id, size_t *h_block2UniqId, size_t labelSize, size_t expandedSize, size_t blockSize);
void SetColumn(const ElemType* colPointer, size_t colInd);
void SetColumn(const ElemType val, size_t colInd);
void SetColumn(const Matrix<ElemType>& valMat, size_t colInd);
void SetDiagonalValue(const ElemType v);
void SetDiagonalValue(Matrix<ElemType>& vector);
void SetUniformRandomValue(const ElemType low, const ElemType high, unsigned long seed=USE_TIME_BASED_SEED);
void SetGaussianRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed=USE_TIME_BASED_SEED);
void SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, unsigned long seed=USE_TIME_BASED_SEED);
void AddGaussianRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed=USE_TIME_BASED_SEED);
Matrix<ElemType> Transpose();
Matrix<ElemType>& AssignTransposeOf (const Matrix<ElemType>& a);
Matrix<ElemType>& operator+= (const ElemType alpha);
Matrix<ElemType> operator+ (const ElemType alpha) const;
Matrix<ElemType>& AssignSumOf(const ElemType alpha, const Matrix<ElemType>& a);
Matrix<ElemType>& operator+= (const Matrix<ElemType>& a);
Matrix<ElemType> operator+ (const Matrix<ElemType>& a) const;
Matrix<ElemType>& AssignSumOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b);
Matrix<ElemType>& operator-= (const ElemType alpha);
Matrix<ElemType> operator- (const ElemType alpha) const;
Matrix<ElemType>& AssignDifferenceOf(const ElemType alpha, const Matrix<ElemType>& a);
Matrix<ElemType>& AssignDifferenceOf(const Matrix<ElemType>& a, const ElemType alpha);
Matrix<ElemType>& operator-= (const Matrix<ElemType>& a);
Matrix<ElemType> operator- (const Matrix<ElemType>& a) const;
Matrix<ElemType>& AssignDifferenceOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b);
Matrix<ElemType>& operator*= (const ElemType alpha);
Matrix<ElemType> operator* (const ElemType alpha) const;
Matrix<ElemType>& AssignProductOf(const ElemType alpha, const Matrix<ElemType>& a);
Matrix<ElemType> operator* (const Matrix<ElemType>& a) const;
Matrix<ElemType>& AssignProductOf (const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB);
Matrix<ElemType>& operator/= (ElemType alpha);
Matrix<ElemType> operator/ (ElemType alpha) const;
Matrix<ElemType>& operator^= (ElemType alpha); //element-wise power
Matrix<ElemType> operator^ (ElemType alpha) const; //element-wise power
Matrix<ElemType>& AssignElementPowerOf(const Matrix<ElemType>& a, const ElemType power);
Matrix<ElemType>& ElementMultiplyWith (const Matrix<ElemType>& a);
Matrix<ElemType>& AssignElementProductOf (const Matrix<ElemType>& a, const Matrix<ElemType>& b);
Matrix<ElemType>& AddElementProductOf (const Matrix<ElemType>& a, const Matrix<ElemType>& b);
Matrix<ElemType>& AssignElementDivisionOf (const Matrix<ElemType>& a, const Matrix<ElemType>& b);
Matrix<ElemType>& ElementDivideBy(const Matrix<ElemType>& a);
Matrix<ElemType>& ColumnElementMultiplyWith(const Matrix<ElemType>& a);
Matrix<ElemType>& RowElementMultiplyWith(const Matrix<ElemType>& a);
Matrix<ElemType>& ColumnElementDivideBy(const Matrix<ElemType>& a);
Matrix<ElemType>& RowElementDivideBy(const Matrix<ElemType>& a);
Matrix<ElemType>& ElementInverse ();
Matrix<ElemType>& AssignElementInverseOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceLinearRectifierDerivative();
Matrix<ElemType>& AssignLinearRectifierDerivativeOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceSigmoidDerivative();
Matrix<ElemType>& AssignSigmoidDerivativeOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceSigmoid ();
Matrix<ElemType>& AssignSigmoidOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceTanh ();
Matrix<ElemType>& AssignTanhOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceLogSoftmax (const bool isColWise);
Matrix<ElemType>& AssignLogSoftmaxOf (const Matrix<ElemType>& a, const bool isColWise);
Matrix<ElemType>& InplaceSqrt ();
Matrix<ElemType>& AssignSqrtOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceExp ();
Matrix<ElemType>& AssignExpOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceLog ();
Matrix<ElemType>& AssignLogOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceCosine ();
Matrix<ElemType>& AssignCosineOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceNegativeSine ();
Matrix<ElemType>& AssignNegativeSineOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceLog10 ();
Matrix<ElemType>& AssignLog10Of (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceAbs ();
Matrix<ElemType>& AssignAbsOf (const Matrix<ElemType>& a);
Matrix<ElemType>& InplaceTruncateBottom (const ElemType threshold);
Matrix<ElemType>& AssignTruncateBottomOf (const Matrix<ElemType>& a, const ElemType threshold);
Matrix<ElemType>& InplaceTruncateTop (const ElemType threshold);
Matrix<ElemType>& AssignTruncateTopOf (const Matrix<ElemType>& a, const ElemType threshold);
Matrix<ElemType>& InplaceTruncate (const ElemType threshold);
Matrix<ElemType>& SetToZeroIfAbsLessThan (const ElemType threshold);
DeviceBoundNumber<ElemType> Sum_AsDeviceBoundNum() const;
ElemType SumOfAbsElements () const; //sum of all abs(elements)
ElemType SumOfElements () const; //sum of all elements
Matrix<ElemType>& AssignSumOfElements(const Matrix<ElemType>& a);
Matrix<ElemType>& AssignRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows);
Matrix<ElemType>& AddToRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows);
Matrix<ElemType>& AddWithRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows);
Matrix<ElemType>& AssignRepeatOf(const Matrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats);
bool IsEqualTo(const Matrix<ElemType>& a, const ElemType threshold = 1e-8) const;
void VectorNorm1(Matrix<ElemType>& c, const bool isColWise) const;
Matrix<ElemType>& AssignVectorNorm1Of(Matrix<ElemType>& a, const bool isColWise);
void VectorNorm2(Matrix<ElemType>& c, const bool isColWise) const;
Matrix<ElemType>& AssignVectorNorm2Of(Matrix<ElemType>& a, const bool isColWise);
void VectorNormInf(Matrix<ElemType>& c, const bool isColWise) const;
Matrix<ElemType>& AssignVectorNormInfOf(Matrix<ElemType>& a, const bool isColWise);
Matrix<ElemType>& AssignInnerProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const bool isColWise);
Matrix<ElemType>& AssignKhatriRaoProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b);
Matrix<ElemType>& AddColumnReshapeProductOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const bool transposeAColumn);
Matrix<ElemType>& AddWithScaleOf(ElemType alpha, const Matrix<ElemType>& a);
ElemType FrobeniusNorm() const;
Matrix<ElemType>& AssignFrobeniusNormOf(const Matrix<ElemType>& a);
ElemType MatrixNormInf() const;
ElemType MatrixNorm1() const;
ElemType MatrixNorm0() const; //number of non-zero elemets
Matrix<ElemType>& AssignSignOf(const Matrix<ElemType>& a);
Matrix<ElemType>& AddSignOf(const Matrix<ElemType>& a);
void VectorMax(Matrix<ElemType>& maxIndexes, Matrix<ElemType>& maxValues, const bool isColWise) const;
void VectorMin(Matrix<ElemType>& mainndexes, Matrix<ElemType>& minValues, const bool isColWise) const;
Matrix<ElemType>& AssignNumOfDiff(const Matrix<ElemType>& a, const Matrix<ElemType>& b);
Matrix<ElemType>& AssignInnerProductOfMatrices(const Matrix<ElemType>& a, const Matrix<ElemType>& b); //this method will resize(1,1) first
bool HasNan (const char * name) const;
size_t CountNanInf() const;
void Print(const char* matrixName, size_t rowStart, size_t rowEnd, size_t colStart, size_t colEnd) const;
void Print(const char* matrixName = nullptr) const; //print whole matrix. can be expensive
Matrix<ElemType>& AssignPackedConvolutionInput(const Matrix<ElemType>& inputSubBatch,
const size_t inputWidth, const size_t inputHeight, const size_t inputChannels,
const size_t outputWidth, const size_t outputHeight, const size_t outputChannels,
const size_t kernelWidth, const size_t kernelHeight, const size_t horizontalSubsample, const size_t verticalSubsample,
const bool zeroPadding = false);
Matrix<ElemType>& UnpackConvolutionInput(Matrix<ElemType>& inputSubBatch,
const size_t inputWidth, const size_t inputHeight, const size_t inputChannels,
const size_t outputWidth, const size_t outputHeight, const size_t outputChannels,
const size_t kernelWidth, const size_t kernelHeight, const size_t horizontalSubsample, const size_t verticalSubsample,
const bool zeroPadding = false) const;
Matrix<ElemType>& AssignMaxPoolingResult(const Matrix<ElemType>& inputBatch, const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample);
Matrix<ElemType>& AddMaxPoolingGradient(const Matrix<ElemType>& outputGradientBatch, const Matrix<ElemType>& inputBatch, const Matrix<ElemType>& outputBatch,
const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample);
Matrix<ElemType>& AssignAveragePoolingResult(const Matrix<ElemType>& inputBatch, const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample);
Matrix<ElemType>& AddAveragePoolingGradient(const Matrix<ElemType>& outputGradientBatch,
const size_t channels,
const size_t inputWidth, const size_t inputHeight, const size_t inputSizePerSample,
const size_t outputWidth, const size_t outputHeight, const size_t outputSizePerSample,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample);
public:
ElemType Exp10(ElemType num);
ElemType Mod(ElemType x , ElemType y);
ElemType LogAdd(ElemType x, ElemType y);
public:
static DEVICEID_TYPE GetBestGPUDeviceId(); //{ return GPUMatrix<ElemType>::GetBestGPUDeviceId();}
//static BLAS functions
// singular value decomposition of A as A = U*SIGMA*VT
static void SVD(const Matrix<ElemType>& A, Matrix<ElemType>& SIGMA, Matrix<ElemType>& U, Matrix<ElemType>& VT);
static void MultiplyAndWeightedAdd(ElemType alpha, const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB,
ElemType beta, Matrix<ElemType>& c);
static void MultiplyAndAdd(const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB, Matrix<ElemType>& c);
static void Multiply(const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB, Matrix<ElemType>& c);
static void Multiply(const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c);
static void ScaleAndAdd(ElemType alpha, const Matrix<ElemType>& a, Matrix<ElemType>& c);
static void ScaleAndAdd(ElemType alpha, const Matrix<ElemType>& a, ElemType beta, Matrix<ElemType>& c);
static void AddScaledDifference(const ElemType alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c);
static void AssignScaledDifference(const ElemType alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c);
static void AddScaledDifference(const Matrix<ElemType>& alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c);
static void AssignScaledDifference(const Matrix<ElemType>& alpha, const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c);
static void AddElementToElement(const Matrix<ElemType>& a, const size_t ai, const size_t aj, Matrix<ElemType>& c, const size_t ci, const size_t cj);
//static void AddLogElementToElement(const Matrix<ElemType>& a, const size_t ai, const size_t aj, Matrix<ElemType>& c, const size_t ci, const size_t cj);
static void AssignElementToElement(const Matrix<ElemType>& a, const size_t ai, const size_t aj, Matrix<ElemType>& c, const size_t ci, const size_t cj);
static void Scale(ElemType alpha, Matrix<ElemType>& a);
static void Scale(Matrix<ElemType>& alpha, Matrix<ElemType>& a); //In this case Matrix alpha must be 1x1
static void Scale(ElemType alpha, const Matrix<ElemType>& a, Matrix<ElemType>& c);
static void InnerProduct (const Matrix<ElemType>& a, const Matrix<ElemType>& b, Matrix<ElemType>& c, const bool isColWise);
static ElemType InnerProductOfMatrices(const Matrix<ElemType>& a, const Matrix<ElemType>& b);
static void ElementWisePower (ElemType alpha, const Matrix<ElemType>& a, Matrix<ElemType>& c);
static bool AreEqual(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const ElemType threshold = 1e-8);
public:
friend File& operator>>(File& stream, Matrix<ElemType>& M)
{
char type;
stream>>type;
if (type=='d')
{
if (M.GetDeviceId()<0)
{
if (M.m_CPUMatrix==NULL) M.m_CPUMatrix = new CPUMatrix<ElemType>();
stream>>(*M.m_CPUMatrix);
M.SetDataLocation(CPU, DENSE);
}
else
{
if (M.m_GPUMatrix==NULL) M.m_GPUMatrix = new GPUMatrix<ElemType>();
stream>>(*M.m_GPUMatrix);
M.SetDataLocation(GPU, DENSE);
}
}
else if (type=='s')
{
if (M.GetDeviceId()<0)
{
NOT_IMPLEMENTED;//You might want to tranfer your matrix to GPU
}
else
{
if (M.m_GPUSparseMatrix==NULL) M.m_GPUSparseMatrix = new GPUSparseMatrix<ElemType>();
stream>>(*M.m_GPUSparseMatrix);
M.SetDataLocation(GPU, SPARSE);
}
}
else
LogicError("wrong matrix type!");
return stream;
}
friend File& operator<<(File& stream, const Matrix<ElemType>& M)
{
if (M.GetMatrixType()==MatrixType::DENSE)
{
stream<<'d';
if (M.GetDeviceId()<0)
{
stream<<(*M.m_CPUMatrix);
}
else
{
stream<<(*M.m_GPUMatrix);
}
}
else
{
stream<<'s';
if (M.GetDeviceId()<0)
{
NOT_IMPLEMENTED;
//stream<<(*M.m_CPUMatrix);
}
else
{
stream<<(*M.m_GPUSparseMatrix);
}
}
return stream;
}
public:
static void ClassEntropy(const Matrix<ElemType>& a, const Matrix<ElemType>& weight,
const Matrix<ElemType> & label, const Matrix<ElemType>* cls,
const Matrix<ElemType>* idx2cls, Matrix<ElemType>& etp, Matrix<ElemType>& entropyScore);
static void ClassEntropyError(const Matrix<ElemType>& a);
static void ClassEntropyGradientOfInput(const Matrix<ElemType>& error, const Matrix<ElemType>& weight, Matrix<ElemType>& grd);
static void ClassEntropyGradientOfWeight(
const Matrix<ElemType>& error,
const Matrix<ElemType>& input,
const Matrix<ElemType>& weight,
const Matrix<ElemType> & label,
const Matrix<ElemType>* cls,
const Matrix<ElemType>* idx2cls,
Matrix<ElemType>& grd);
};
typedef Matrix<float> SingleMatrix;
typedef Matrix<double> DoubleMatrix;
}}}