This commit is contained in:
Yu 2015-02-08 02:55:40 -05:00
Родитель db9b222b4e
Коммит b37fc78688
11 изменённых файлов: 38 добавлений и 12251 удалений

Просмотреть файл

@ -1,4 +1,3 @@
<<<<<<< HEAD
#include "TimerUtility.h"
#ifdef WIN32
@ -38,44 +37,3 @@ namespace Microsoft{
}
}
}
=======
#include "TimerUtility.h"
#ifdef WIN32
#include <Windows.h>
#else
#include <time.h>
#endif
namespace Microsoft{
namespace MSR {
namespace CNTK {
//Returns the amount of milliseconds elapsed
unsigned long long Timer::MilliSecondElapsed()
{
#ifdef WIN32
FILETIME ft;
LARGE_INTEGER li;
GetSystemTimeAsFileTime(&ft); //ideally we should use GetSystemTimePreciseAsFileTime. But it's only avaiable with Win8+ and Win Server 2012+
li.LowPart = ft.dwLowDateTime;
li.HighPart = ft.dwHighDateTime;
unsigned long long ret = li.QuadPart;
ret -= 116444736000000000LL; // Make the values consistent with Linux.
ret /= 10000; // From 100 nano seconds (10^-7) to 1 millisecond (10^-3)
return ret;
#else
timespec ts;
clock_gettime(CLOCK_REALTIME, &ts); // Works on Linux
UINT64 ret = ts.tv_sec * 1000 + ts.tv_nsec/1000000;
return ret;
#endif
}
}
}
}
>>>>>>> origin/master

Просмотреть файл

@ -498,17 +498,13 @@ public:
// I.e. our chunks are a little larger than wanted (on av. half the av. utterance length).
if (thisallchunks.empty() || thisallchunks.back().totalframes > chunkframes || thisallchunks.back().numutterances() >= frameref::maxutterancesperchunk)
{
//fprintf(stderr, "hahahahahaahhaha %d %d %d %d %d\n", chunkframes,_totalframes, i, thisallchunks.back().totalframes,thisallchunks.back().numutterances());
//fprintf(stderr, "hahahahahaahahah %d\n", thisallchunks.size());
thisallchunks.push_back (utterancechunkdata());
fprintf(stderr, "after hahahahahaahhaha %d %d %d %d %d %d\n", chunkframes,_totalframes, i, thisallchunks.back().totalframes,thisallchunks.back().numutterances(),utteranceset.size());
}
// append utterance to last chunk
utterancechunkdata & currentchunk = thisallchunks.back();
//std::move(utteranceset[i]);
//fprintf(stderr, "after hahahahahaahhaha %d %d %d %d %d %d\n", chunkframes,_totalframes, i, thisallchunks.back().totalframes,thisallchunks.back().numutterances(),utteranceset.size());
currentchunk.push_back (std::move (utteranceset[i])); // move it out from our temp array into the chunk
// TODO: above push_back does not actually 'move' because the internal push_back does not accept that

Просмотреть файл

@ -1,4 +1,3 @@
<<<<<<< HEAD
// SequenceParser.h : Parses the UCI format using a custom state machine (for speed)
//
//
@ -636,621 +635,3 @@ public:
long Parse(size_t recordsRequested, std::vector<LabelType> *labels, std::vector<NumType> *numbers, std::vector<SequencePosition> *seqPos);
};
=======
// SequenceParser.h : Parses the UCI format using a custom state machine (for speed)
//
//
// <copyright file="SequenceParser.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#include <string>
#include <vector>
#include <assert.h>
#include <fstream>
#include <map>
#include <stdint.h>
using namespace std;
#define MAXSTRING 500000
// UCI label location types
enum LabelMode
{
LabelNone = 0,
LabelFirst = 1,
LabelLast = 2,
};
enum ParseMode
{
ParseNormal = 0,
ParseLineCount = 1
};
enum SequenceFlags
{
seqFlagNull = 0,
seqFlagLineBreak = 1, // line break on the parsed line
seqFlagEmptyLine = 2, // empty line
seqFlagStartLabel = 4,
seqFlagStopLabel = 8
};
// SequencePosition, save the ending indexes into the array for a sequence
struct SequencePosition
{
size_t numberPos; // max position in the number array for this sequence
size_t labelPos; // max position in the label array for this sequence
unsigned flags; // flags that apply to this sequence
SequencePosition(size_t numPos, size_t labelPos, unsigned flags):
numberPos(numPos), labelPos(labelPos), flags(flags)
{}
};
// SequenceParser - the parser for the UCI format files
// for ultimate speed, this class implements a state machine to read these format files
template <typename NumType, typename LabelType=int>
class SequenceParser
{
protected:
enum ParseState
{
WholeNumber = 0,
Remainder = 1,
Exponent = 2,
Whitespace = 3,
Sign = 4,
ExponentSign = 5,
Period = 6,
TheLetterE = 7,
EndOfLine = 8,
Label = 9, // any non-number things we run into
ParseStateMax = 10, // number of parse states
LineCountEOL = 10,
LineCountOther = 11,
AllStateMax = 12
};
// type of label processing
ParseMode m_parseMode;
// definition of label and feature dimensions
size_t m_dimFeatures;
size_t m_dimLabelsIn;
std::string m_beginSequenceIn; // starting sequence string (i.e. <s>)
std::string m_endSequenceIn; // ending sequence string (i.e. </s>)
size_t m_dimLabelsOut;
std::string m_beginSequenceOut; // starting sequence string (i.e. 'O')
std::string m_endSequenceOut; // ending sequence string (i.e. 'O')
// level of screen output
int m_traceLevel;
// current state of the state machine
ParseState m_current_state;
// state tables
DWORD *m_stateTable;
// numeric state machine variables
double m_partialResult;
double m_builtUpNumber;
double m_divider;
double m_wholeNumberMultiplier;
double m_exponentMultiplier;
// label state machine variables
size_t m_spaceDelimitedStart;
size_t m_spaceDelimitedMax; // start of the next whitespace sequence (one past the end of the last word)
int m_numbersConvertedThisLine;
int m_labelsConvertedThisLine;
int m_elementsConvertedThisLine;
// sequence state machine variables
bool m_beginSequence;
bool m_endSequence;
std::string m_beginTag;
std::string m_endTag;
// global stats
int m_totalNumbersConverted;
int m_totalLabelsConverted;
// file positions/buffer
FILE * m_pFile;
int64_t m_byteCounter;
int64_t m_fileSize;
BYTE * m_fileBuffer;
size_t m_bufferStart;
size_t m_bufferSize;
// last label was a string (for last label processing)
bool m_lastLabelIsString;
// vectors to append to
std::vector<NumType>* m_numbers; // pointer to vectors to append with numbers
std::vector<LabelType>* m_labels; // pointer to vector to append with labels (may be numeric)
// FUTURE: do we want a vector to collect string labels in the non string label case? (signifies an error)
// SetState for a particular value
void SetState(int value, ParseState m_current_state, ParseState next_state);
// SetStateRange - set states transitions for a range of values
void SetStateRange(int value1, int value2, ParseState m_current_state, ParseState next_state);
// SetupStateTables - setup state transition tables for each state
// each state has a block of 256 states indexed by the incoming character
void SetupStateTables();
// reset all line state variables
void PrepareStartLine();
// reset all number accumulation variables
void PrepareStartNumber();
// reset all state variables to start reading at a new position
void PrepareStartPosition(size_t position);
// UpdateBuffer - load the next buffer full of data
// returns - number of records read
size_t UpdateBuffer();
public:
// SequenceParser constructor
SequenceParser();
// setup all the state variables and state tables for state machine
void Init();
// Parser destructor
~SequenceParser();
private:
// DoneWithLabel - Called when a string label is found
void DoneWithLabel();
// Called when a number is complete
void DoneWithValue();
// store label is specialized by LabelType
void StoreLabel(NumType value);
// StoreLastLabel - store the last label (for numeric types), tranfers to label vector
// string label types handled in specialization
void StoreLastLabel();
public:
// SetParseMode - Set the parsing mode
// mode - set mode to either ParseLineCount, or ParseNormal
void SetParseMode(ParseMode mode);
// SetTraceLevel - Set the level of screen output
// traceLevel - traceLevel, zero means no output, 1 epoch related output, > 1 all output
void SetTraceLevel(int traceLevel);
// ParseInit - Initialize a parse of a file
// fileName - path to the file to open
// dimFeatures - number of features for precomputed features
// dimLabelsIn - number of lables possible on input
// dimLabelsOut - number of labels possible on output
// beginSequenceIn - beginSequence input label
// endSequenceIn - endSequence input label
// beginSequenceOut - beginSequence output label
// endSequenceOut - endSequence output label
// bufferSize - size of temporary buffer to store reads
// startPosition - file position on which we should start
void ParseInit(LPCWSTR fileName, size_t dimFeatures, size_t dimLabelsIn, size_t dimLabelsOut, std::string beginSequenceIn="<s>", std::string endSequenceIn="</s>", std::string beginSequenceOut="O", std::string endSequenceOut="O", size_t bufferSize=1024*256, size_t startPosition=0)
{
assert(fileName != NULL);
m_dimFeatures = dimFeatures;
m_dimLabelsIn = dimLabelsIn;
m_beginSequenceIn = beginSequenceIn;
m_endSequenceIn = endSequenceIn;
m_dimLabelsOut = dimLabelsOut;
m_beginSequenceOut = beginSequenceOut;
m_endSequenceOut = endSequenceOut;
m_parseMode = ParseNormal;
m_traceLevel = 0;
m_bufferSize = bufferSize;
m_bufferStart = startPosition;
m_beginTag = m_beginSequenceIn;
m_endTag = m_endSequenceIn;
// if we have a file already open, cleanup
if (m_pFile != NULL)
SequenceParser<NumType, LabelType>::~SequenceParser();
errno_t err = _wfopen_s( &m_pFile, fileName, L"rb" );
if (err)
RuntimeError("SequenceParser::ParseInit - error opening file");
int rc = _fseeki64(m_pFile, 0, SEEK_END);
if (rc)
RuntimeError("SequenceParser::ParseInit - error seeking in file");
m_fileSize = GetFilePosition();
m_fileBuffer = new BYTE[m_bufferSize];
SetFilePosition(startPosition);
}
// Parse - Parse the data
// recordsRequested - number of records requested
// labels - pointer to vector to return the labels
// numbers - pointer to vector to return the numbers
// seqPos - pointers to the other two arrays showing positions of each sequence
// returns - number of records actually read, if the end of file is reached the return value will be < requested records
long Parse(size_t recordsRequested, std::vector<LabelType> *labels, std::vector<NumType> *numbers, std::vector<SequencePosition> *seqPos)
{
assert(numbers != NULL || m_dimFeatures == 0 || m_parseMode == ParseLineCount);
assert(labels != NULL || m_dimLabelsIn == 0 && m_dimLabelsOut == 0|| m_parseMode == ParseLineCount);
// transfer to member variables
m_numbers = numbers;
m_labels = labels;
long TickStart = GetTickCount( );
long recordCount = 0;
long lineCount = 0;
size_t bufferIndex = m_byteCounter-m_bufferStart;
SequencePosition sequencePositionLast(0,0,seqFlagNull);
while (m_byteCounter < m_fileSize && recordCount < recordsRequested)
{
// check to see if we need to update the buffer
if (bufferIndex >= m_bufferSize)
{
UpdateBuffer();
bufferIndex = m_byteCounter-m_bufferStart;
}
char ch = m_fileBuffer[bufferIndex];
ParseState nextState = (ParseState)m_stateTable[(m_current_state<<8)+ch];
if( nextState <= Exponent )
{
m_builtUpNumber = m_builtUpNumber * 10 + (ch - '0');
// if we are in the decimal portion of a number increase the divider
if (nextState == Remainder)
m_divider *= 10;
}
// only do a test on a state transition
if (m_current_state != nextState)
{
// System.Diagnostics.Debug.WriteLine("Current state = " + m_current_state + ", next state = " + nextState);
// if the nextState is a label, we don't want to do any number processing, it's a number prefixed string
if (nextState != Label)
{
// do the numeric processing
switch (m_current_state)
{
case TheLetterE:
if (m_divider != 0) // decimal number
m_partialResult += m_builtUpNumber / m_divider;
else // integer
m_partialResult = m_builtUpNumber;
m_builtUpNumber = 0;
break;
case WholeNumber:
// could be followed by a remainder, or an exponent
if (nextState != TheLetterE)
if( nextState != Period)
DoneWithValue();
if (nextState == Period)
{
m_partialResult = m_builtUpNumber;
m_divider = 1;
m_builtUpNumber = 0;
}
break;
case Remainder:
// can only be followed by a exponent
if (nextState != TheLetterE)
DoneWithValue();
break;
case Exponent:
DoneWithValue();
break;
}
}
// label handling
switch (m_current_state)
{
case Label:
DoneWithLabel();
break;
case EndOfLine:
if (seqPos)
{
SequencePosition sequencePos(numbers->size(), labels->size(),
m_beginSequence?seqFlagStartLabel:0 | m_endSequence?seqFlagStopLabel:0 | seqFlagLineBreak);
// add a sequence element to the list
seqPos->push_back(sequencePos);
sequencePositionLast = sequencePos;
}
// end of sequence determines record separation
if (m_endSequence)
recordCount = (long)labels->size();
PrepareStartLine();
break;
case Whitespace:
// this is the start of the next space delimited entity
if (nextState != EndOfLine)
m_spaceDelimitedStart = m_byteCounter;
break;
}
// label handling for next state
switch (nextState)
{
// do sign processing on nextState, since we still have the character handy
case Sign:
if (ch == '-')
m_wholeNumberMultiplier = -1;
break;
case ExponentSign:
if (ch == '-')
m_exponentMultiplier = -1;
break;
// going into whitespace or endOfLine, so end of space delimited entity
case Whitespace:
m_spaceDelimitedMax = m_byteCounter;
// hit whitespace and nobody processed anything, so add as label
//if (m_elementsConvertedThisLine == elementsProcessed)
// DoneWithLabel();
break;
case EndOfLine:
if (m_current_state != Whitespace)
{
m_spaceDelimitedMax = m_byteCounter;
// hit whitespace and nobody processed anything, so add as label
//if (m_elementsConvertedThisLine == elementsProcessed)
// DoneWithLabel();
}
// process the label at the end of a line
//if (m_labelMode == LabelLast && m_labels != NULL)
//{
// StoreLastLabel();
//}
// intentional fall-through
case LineCountEOL:
lineCount++; // done with another record
if (m_traceLevel > 1)
{
// print progress dots
if (recordCount % 100 == 0)
{
if (recordCount % 1000 == 0)
{
if (recordCount % 10000 == 0)
{
fprintf(stderr, "#");
}
else
{
fprintf(stderr, "+");
}
}
else
{
fprintf(stderr, ".");
}
}
}
break;
case LineCountOther:
m_spaceDelimitedStart = m_byteCounter;
break;
}
}
m_current_state = nextState;
// move to next character
m_byteCounter++;
bufferIndex++;
} // while
// at the end of the file we may need to add an additional sequencePosition push
// this could probably be fixed by taking another pass through the loop above, but this is easier
if (seqPos)
{
SequencePosition sequencePos(numbers->size(), labels->size(),
m_beginSequence?seqFlagStartLabel:0 | m_endSequence?seqFlagStopLabel:0 | seqFlagLineBreak);
// add the final sequence element if needed
if (!(sequencePos.labelPos == sequencePositionLast.labelPos && sequencePos.numberPos == sequencePositionLast.numberPos))
{
seqPos->push_back(sequencePos);
}
}
long TickStop = GetTickCount( );
long TickDelta = TickStop - TickStart;
if (m_traceLevel > 2)
fprintf(stderr, "\n%d ms, %d numbers parsed\n\n", TickDelta, m_totalNumbersConverted );
return lineCount;
}
int64_t GetFilePosition();
void SetFilePosition(int64_t position);
// HasMoreData - test if the current dataset have more data
// returns - true if it does, false if not
bool HasMoreData();
};
// StoreLabel - string version gets last space delimited string and stores in labels vector
template <>
void SequenceParser<float, std::string>::StoreLabel(float finalResult);
// DoneWithLabel - string version stores string label
template <>
void SequenceParser<float, std::string>::DoneWithLabel();
// StoreLastLabel - string version
template <>
void SequenceParser<float, std::string>::StoreLastLabel();
// NOTE: Current code is identical to float, don't know how to specialize with template parameter that only covers one parameter
// StoreLabel - string version gets last space delimited string and stores in labels vector
template <>
void SequenceParser<double, std::string>::StoreLabel(double finalResult);
// DoneWithLabel - string version stores string label
template <>
void SequenceParser<double, std::string>::DoneWithLabel();
// StoreLastLabel - string version
template <>
void SequenceParser<double, std::string>::StoreLastLabel();
/// language model sequence parser
template <typename NumType, typename LabelType>
class LMSequenceParser : public SequenceParser<NumType, LabelType>
{
protected:
FILE * mFile;
std::wstring mFileName;
public:
LMSequenceParser() {
mFile = nullptr;
};
~LMSequenceParser() {
if (mFile) fclose(mFile);
}
void ParseInit(LPCWSTR fileName, size_t dimFeatures, size_t dimLabelsIn, size_t dimLabelsOut, std::string beginSequenceIn="<s>", std::string endSequenceIn="</s>", std::string beginSequenceOut="O", std::string endSequenceOut="O")
{
assert(fileName != NULL);
mFileName = fileName;
m_dimFeatures = dimFeatures;
m_dimLabelsIn = dimLabelsIn;
m_beginSequenceIn = beginSequenceIn;
m_endSequenceIn = endSequenceIn;
m_dimLabelsOut = dimLabelsOut;
m_beginSequenceOut = beginSequenceOut;
m_endSequenceOut = endSequenceOut;
m_parseMode = ParseNormal;
m_traceLevel = 0;
m_bufferSize = 0;
m_bufferStart = 0;
m_beginTag = m_beginSequenceIn;
m_endTag = m_endSequenceIn;
m_fileSize = -1;
m_fileBuffer = NULL;
if (mFile) fclose(mFile);
if (_wfopen_s(&mFile, fileName, L"rt") != 0)
RuntimeError("cannot open file %s", fileName);
}
void ParseReset()
{
if (mFile) fseek(mFile, 0, SEEK_SET);
}
// Parse - Parse the data
// recordsRequested - number of records requested
// labels - pointer to vector to return the labels
// numbers - pointer to vector to return the numbers
// seqPos - pointers to the other two arrays showing positions of each sequence
// returns - number of records actually read, if the end of file is reached the return value will be < requested records
long Parse(size_t recordsRequested, std::vector<LabelType> *labels, std::vector<NumType> *numbers, std::vector<SequencePosition> *seqPos)
{
assert(numbers != NULL || m_dimFeatures == 0 || m_parseMode == ParseLineCount);
assert(labels != NULL || m_dimLabelsIn == 0 && m_dimLabelsOut == 0|| m_parseMode == ParseLineCount);
// transfer to member variables
m_numbers = numbers;
m_labels = labels;
long TickStart = GetTickCount( );
long recordCount = 0;
long orgRecordCount = (long)labels->size();
long lineCount = 0;
SequencePosition sequencePositionLast(0,0,seqFlagNull);
/// get line
char ch2[MAXSTRING];
while (recordCount < recordsRequested && fgets(ch2, MAXSTRING, mFile) != nullptr)
{
string ch = ch2;
std::vector<string> vstr;
vstr = sep_string(ch, " ");
if (vstr.size() < 3)
continue;
for (size_t i = 0; i < vstr.size(); i++)
{
labels->push_back(vstr[i]);
}
SequencePosition sequencePos(numbers->size(), labels->size(),
m_beginSequence?seqFlagStartLabel:0 | m_endSequence?seqFlagStopLabel:0 | seqFlagLineBreak);
// add a sequence element to the list
seqPos->push_back(sequencePos);
sequencePositionLast = sequencePos;
recordCount = (long)labels->size() - orgRecordCount;
lineCount ++;
} // while
long TickStop = GetTickCount( );
long TickDelta = TickStop - TickStart;
if (m_traceLevel > 2)
fprintf(stderr, "\n%d ms, %d numbers parsed\n\n", TickDelta, m_totalNumbersConverted );
return lineCount;
}
};
typedef struct{
size_t sLen;
size_t sBegin;
size_t sEnd;
} stSentenceInfo;
/// language model sequence parser
template <typename NumType, typename LabelType>
class LMBatchSequenceParser: public LMSequenceParser<NumType, LabelType>
{
public:
vector<stSentenceInfo> mSentenceIndex2SentenceInfo;
public:
LMBatchSequenceParser() { };
~LMBatchSequenceParser() { }
void ParseInit(LPCWSTR fileName, size_t dimFeatures, size_t dimLabelsIn, size_t dimLabelsOut, std::string beginSequenceIn="<s>", std::string endSequenceIn="</s>", std::string beginSequenceOut="O", std::string endSequenceOut="O");
// Parse - Parse the data
// recordsRequested - number of records requested
// labels - pointer to vector to return the labels
// numbers - pointer to vector to return the numbers
// seqPos - pointers to the other two arrays showing positions of each sequence
// returns - number of records actually read, if the end of file is reached the return value will be < requested records
long Parse(size_t recordsRequested, std::vector<LabelType> *labels, std::vector<NumType> *numbers, std::vector<SequencePosition> *seqPos);
};
>>>>>>> origin/master

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -3,7 +3,7 @@ command=Simple_Demo:Simple_Demo_Output
# deviceId=-1 for CPU, >=0 for GPU devices
DeviceNumber=0
stderr=Demo
#stderr=Demo
precision=float

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,4 +1,3 @@
<<<<<<< HEAD
//
// <copyright file="cn.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
@ -792,777 +791,3 @@ int main(int argc, char* argv[])
return ret;
}
#endif
=======
//
// <copyright file="cn.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// cn.cpp : Defines the entry point for the console application.
//
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
#include "stdafx.h"
#include "ComputationNetwork.h"
#include "ComputationNode.h"
#include "DataReader.h"
#include "DataWriter.h"
#include "SimpleNetworkBuilder.h"
#include "NDLNetworkBuilder.h"
#include "SynchronousExecutionEngine.h"
#include "ModelEditLanguage.h"
#include "SGD.h"
#include <string>
#include "commandArgUtil.h"
#include "SimpleEvaluator.h"
#include "SimpleOutputWriter.h"
#include <chrono>
#include <algorithm>
#if defined(_WIN32)
#include "io.h"
#endif
#include "hostname.h"
#include "buildinfo.h"
#ifdef LEAKDETECT
#include "vld.h" // for memory leak detection
#endif
#include <vector>
#include "BestGpu.h"
// MPI builds on windows require the following installed to "c:\program files\Microsoft MPI\"
// HPC Pack 2012 R2 MS-MPI Redistributable Package
// http://www.microsoft.com/en-us/download/details.aspx?id=41634
#ifdef MPI_SUPPORT
#include "mpi.h"
#pragma comment(lib, "msmpi.lib")
#endif
int numProcs;
int myRank;
using namespace std;
using namespace Microsoft::MSR::CNTK;
// internal test routine forward declaration
template <typename ElemType>
void TestCn(const ConfigParameters& config);
void RedirectStdErr(wstring logpath)
{
fprintf (stderr, "Redirecting stderr to file %S\n", logpath.c_str());
msra::files::make_intermediate_dirs (logpath);
auto_file_ptr f (logpath.c_str(), "wb");
if (dup2 (fileno (f), 2) == -1)
RuntimeError ("unexpected failure to redirect stderr to log file");
setvbuf (stderr, NULL, _IONBF, 16384); // unbuffer it
}
std::string WCharToString(const wchar_t* wst)
{
std::wstring ws(wst);
std::string s(ws.begin(), ws.end());
s.assign(ws.begin(), ws.end());
return s;
}
template <typename ElemType>
void DumpNodeInfo(const ConfigParameters& config)
{
wstring modelPath = config("modelPath");
wstring nodeName = config("nodeName",L"__AllNodes__");
wstring defOutFilePath = modelPath + L"." + nodeName + L".txt";
wstring outputFile = config("outputFile", WCharToString(defOutFilePath.c_str()).c_str());
bool printValues = config("printValues", "true");
ComputationNetwork<ElemType> net(-1); //always use CPU
net.LoadFromFile(modelPath);
net.DumpNodeInfoToFile(nodeName, printValues, outputFile);
}
template <typename ElemType>
void DoEvalBase(const ConfigParameters& config, IDataReader<ElemType>& reader)
{
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
ConfigArray minibatchSize = config("minibatchSize", "40960");
size_t epochSize = config("epochSize", "0");
if (epochSize == 0)
{
epochSize = requestDataSize;
}
wstring modelPath = config("modelPath");
intargvector mbSize = minibatchSize;
int traceLevel = config("traceLevel", "0");
size_t numMBsToShowResult = config("numMBsToShowResult", "100");
ConfigArray evalNodeNames = config("evalNodeNames","");
vector<wstring> evalNodeNamesVector;
for (int i=0; i < evalNodeNames.size(); ++i)
{
evalNodeNamesVector.push_back(evalNodeNames[i]);
}
ComputationNetwork<ElemType> net(deviceId);
net.LoadFromFile(modelPath);
net.ResetEvalTimeStamp();
SimpleEvaluator<ElemType> eval(net, numMBsToShowResult, traceLevel);
eval.Evaluate(reader, evalNodeNamesVector, mbSize[0], epochSize);
}
template <typename ElemType>
void DoEval(const ConfigParameters& config)
{
//test
ConfigParameters readerConfig (config("reader"));
readerConfig.Insert("traceLevel",config("traceLevel","0"));
DataReader<ElemType> testDataReader(readerConfig);
DoEvalBase(config, testDataReader);
}
template <typename ElemType>
void DoEvalUnroll(const ConfigParameters& config)
{
//test
ConfigParameters readerConfig (config("reader"));
readerConfig.Insert("traceLevel",config("traceLevel","0"));
DataReader<ElemType> testDataReader(readerConfig);
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
ConfigArray minibatchSize = config("minibatchSize", "40960");
size_t epochSize = config("epochSize", "0");
if (epochSize == 0)
{
epochSize = requestDataSize;
}
wstring modelPath = config("modelPath");
intargvector mbSize = minibatchSize;
wstring path2EvalResults = config("path2EvalResults", L"");
ComputationNetwork<ElemType> net(deviceId);
net.LoadFromFile(modelPath);
net.ResetEvalTimeStamp();
SimpleEvaluator<ElemType> eval(net);
ElemType evalEntropy;
eval.EvaluateUnroll(testDataReader, mbSize[0], evalEntropy, path2EvalResults == L""? nullptr : path2EvalResults.c_str(), epochSize);
}
template <typename ElemType>
void DoCrossValidate(const ConfigParameters& config)
{
//test
ConfigParameters readerConfig (config("reader"));
readerConfig.Insert("traceLevel",config("traceLevel","0"));
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
ConfigArray minibatchSize = config("minibatchSize", "40960");
size_t epochSize = config("epochSize", "0");
if (epochSize == 0)
{
epochSize = requestDataSize;
}
wstring modelPath = config("modelPath");
intargvector mbSize = minibatchSize;
ConfigArray cvIntervalConfig = config("crossValidationInterval");
intargvector cvInterval = cvIntervalConfig;
size_t sleepSecondsBetweenRuns = config("sleepTimeBetweenRuns", "0");
int traceLevel = config("traceLevel", "0");
size_t numMBsToShowResult = config("numMBsToShowResult", "100");
ConfigArray evalNodeNames = config("evalNodeNames","");
vector<wstring> evalNodeNamesVector;
for (int i=0; i < evalNodeNames.size(); ++i)
{
evalNodeNamesVector.push_back(evalNodeNames[i]);
}
std::vector<std::vector<ElemType>> cvErrorResults;
std::vector<std::wstring> cvModels;
DataReader<ElemType> cvDataReader(readerConfig);
bool finalModelEvaluated = false;
for (size_t i=cvInterval[0]; i<=cvInterval[2]; i+=cvInterval[1])
{
wstring cvModelPath = msra::strfun::wstrprintf (L"%ls.%lld", modelPath.c_str(), i);
if (!fexists (cvModelPath))
{
fprintf(stderr, "model %ls does not exist.\n", cvModelPath.c_str());
if (finalModelEvaluated || !fexists (modelPath))
continue; // file missing
else
{
cvModelPath = modelPath;
finalModelEvaluated = true;
}
}
cvModels.push_back(cvModelPath);
ComputationNetwork<ElemType> net(deviceId);
net.LoadFromFile(cvModelPath);
net.ResetEvalTimeStamp();
SimpleEvaluator<ElemType> eval(net, numMBsToShowResult, traceLevel);
fprintf(stderr, "model %ls --> \n",cvModelPath.c_str());
std::vector<ElemType> evalErrors;
evalErrors = eval.Evaluate(cvDataReader, evalNodeNamesVector, mbSize[0], epochSize);
cvErrorResults.push_back(evalErrors);
::Sleep(1000*sleepSecondsBetweenRuns);
}
//find best model
if (cvErrorResults.size() == 0)
throw std::logic_error("No model is evaluated.");
std::vector<ElemType> minErrors;
std::vector<int> minErrIds;
std::vector<ElemType> evalErrors = cvErrorResults[0];
for (int i=0; i < evalErrors.size(); ++i)
{
minErrors.push_back(evalErrors[i]);
minErrIds.push_back(0);
}
for (int i=0; i<cvErrorResults.size(); i++)
{
evalErrors = cvErrorResults[i];
for (int j=0; j<evalErrors.size(); j++)
{
if (evalErrors[j] < minErrors[j])
{
minErrors[j] = evalErrors[j];
minErrIds[j] = i;
}
}
}
fprintf(stderr, "Best models:\n");
fprintf(stderr,"------------\n");
for (int i=0; i < minErrors.size(); ++i)
{
fprintf(stderr,"Based on Err[%d]: Best model = %ls with min err %.8g\n", i, cvModels[minErrIds[i]].c_str(), minErrors[i]);
}
}
template <typename ElemType>
void DoWriteOutput(const ConfigParameters& config)
{
ConfigParameters readerConfig (config("reader"));
readerConfig.Insert("traceLevel",config("traceLevel","0"));
readerConfig.Insert("randomize","None"); //we don't want randomization when output results
DataReader<ElemType> testDataReader(readerConfig);
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
ConfigArray minibatchSize = config("minibatchSize", "2048");
wstring modelPath = config("modelPath");
intargvector mbSize = minibatchSize;
size_t epochSize = config("epochSize", "0");
if (epochSize == 0)
{
epochSize = requestDataSize;
}
ConfigArray outputNodeNames = config("outputNodeNames","");
vector<wstring> outputNodeNamesVector;
for (int i=0; i < outputNodeNames.size(); ++i)
{
outputNodeNamesVector.push_back(outputNodeNames[i]);
}
ComputationNetwork<ElemType> net(deviceId);
net.LoadFromFile(modelPath);
net.ResetEvalTimeStamp();
SimpleOutputWriter<ElemType> writer(net, 1);
if (config.Exists("writer"))
{
ConfigParameters writerConfig (config("writer"));
bool bWriterUnittest = writerConfig("unittest","false");
DataWriter<ElemType> testDataWriter(writerConfig);
writer.WriteOutput(testDataReader,mbSize[0], testDataWriter, outputNodeNamesVector, epochSize, bWriterUnittest);
}
else if (config.Exists("outputPath"))
{
wstring outputPath = config("outputPath"); // crashes if no default given?
writer.WriteOutput(testDataReader, mbSize[0], outputPath, outputNodeNamesVector, epochSize);
}
//writer.WriteOutput(testDataReader, mbSize[0], testDataWriter, outputNodeNamesVector, epochSize);
}
namespace Microsoft { namespace MSR { namespace CNTK {
TrainingCriterion ParseTrainingCriterionString(wstring s)
{
msra::strfun::tolower_ascii(s);
if (s==L"crossentropywithsoftmax")
return TrainingCriterion::CrossEntropyWithSoftmax;
else if (s==L"squareerror")
return TrainingCriterion::SquareError;
else if (s!=L"classcrossentropywithsoftmax") // (twisted logic to keep compiler happy w.r.t. not returning from LogicError)
LogicError("trainingCriterion: Invalid trainingCriterion value. Valid values are (CrossEntropyWithSoftmax | SquareError | ClassCrossEntropyWithSoftmax)");
return TrainingCriterion::ClassCrossEntropyWithSoftmax;
}
EvalCriterion ParseEvalCriterionString(wstring s)
{
msra::strfun::tolower_ascii(s);
if (s==L"errorprediction")
return EvalCriterion::ErrorPrediction;
else if (s==L"crossentropywithsoftmax")
return EvalCriterion::CrossEntropyWithSoftmax;
else if (s==L"classcrossentropywithsoftmax")
return EvalCriterion::ClassCrossEntropyWithSoftmax;
else if (s!=L"squareerror")
LogicError("evalCriterion: Invalid trainingCriterion value. Valid values are (ErrorPrediction | CrossEntropyWithSoftmax | SquareError)");
return EvalCriterion::SquareError;
}
}}};
template <typename ElemType>
void DoCreateLabelMap(const ConfigParameters& config)
{
// this gets the section name we are interested in
std::string section = config("section");
// get that section (probably a peer config section, which works thanks to heirarchal symbol resolution)
ConfigParameters configSection (config(section));
ConfigParameters readerConfig (configSection("reader"));
readerConfig.Insert("allowMapCreation","true");
DEVICEID_TYPE deviceId = CPUDEVICE;
size_t minibatchSize = config("minibatchSize", "2048");
int traceLevel = config("traceLevel","0");
std::vector<std::wstring> featureNames;
std::vector<std::wstring> labelNames;
GetFileConfigNames(readerConfig, featureNames, labelNames);
// setup minibatch matrices
Matrix<ElemType> featuresMatrix(deviceId);
Matrix<ElemType> labelsMatrix(deviceId);
std::map<std::wstring, Matrix<ElemType>*> matrices;
matrices[featureNames[0]] = &featuresMatrix;
if (labelNames.size() == 0)
RuntimeError("CreateLabelMap: no labels found to process");
// now create the reader and loop through the entire dataset to get all the labels
auto start = std::chrono::system_clock::now();
for (const std::wstring& labelsName: labelNames)
{
// take the last label file defined (the other one might be input)
matrices[labelsName] = &labelsMatrix;
// get the label mapping file name
ConfigParameters labelConfig (readerConfig(labelsName));
std::string labelMappingFile;
if (labelConfig.ExistsCurrent("labelMappingFile"))
labelMappingFile = labelConfig("labelMappingFile");
else if (readerConfig.ExistsCurrent("labelMappingFile"))
labelMappingFile = labelConfig("labelMappingFile");
else
RuntimeError("CreateLabelMap: No labelMappingFile defined");
if (fexists(labelMappingFile))
{
fprintf(stderr,"CreateLabelMap: the label mapping file '%s' already exists, no work to do.\n", labelMappingFile.c_str());
return;
}
fprintf(stderr,"CreateLabelMap: Creating the mapping file '%s' \n", labelMappingFile.c_str());
DataReader<ElemType> dataReader(readerConfig);
dataReader.StartMinibatchLoop(minibatchSize, 0, requestDataSize);
int count = 0;
while (dataReader.GetMinibatch(matrices))
{
Matrix<ElemType>& features = *matrices[featureNames[0]];
count += features.GetNumCols();
if (traceLevel > 1)
fprintf(stderr,"."); // progress meter
}
dataReader.StartMinibatchLoop(minibatchSize, 1, requestDataSize);
// print the results
if (traceLevel > 0)
fprintf(stderr,"\nread %d labels and produced %s\n", count, labelMappingFile.c_str());
}
auto end = std::chrono::system_clock::now();
auto elapsed = end-start;
if (traceLevel > 1)
fprintf(stderr, "%f seconds elapsed\n", (float)(std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count())/1000);
}
template <typename ElemType>
void DoTrain(const ConfigParameters& config)
{
ConfigParameters configSGD (config("SGD"));
bool makeMode = config("makeMode", "true");
ConfigParameters readerConfig (config("reader"));
readerConfig.Insert("traceLevel",config("traceLevel","0"));
IComputationNetBuilder<ElemType>* netBuilder = NULL;
if (config.Exists("NDLNetworkBuilder"))
{
ConfigParameters configNDL (config("NDLNetworkBuilder"));
netBuilder = (IComputationNetBuilder<ElemType>*)new NDLBuilder<ElemType>(configNDL);
}
else if (config.Exists("SimpleNetworkBuilder"))
{
ConfigParameters configSNB (config("SimpleNetworkBuilder"));
netBuilder = (IComputationNetBuilder<ElemType>*)new SimpleNetworkBuilder<ElemType>(configSNB);
}
else
{
RuntimeError("No network builder found in the config file. NDLNetworkBuilder or SimpleNetworkBuilde must be specified" );
}
DataReader<ElemType>* dataReader = new DataReader<ElemType>(readerConfig);
DataReader<ElemType>* cvDataReader = nullptr;
ConfigParameters cvReaderConfig (config("cvReader", L""));
if (cvReaderConfig.size() != 0)
{
cvReaderConfig.Insert("traceLevel",config("traceLevel","0"));
cvDataReader = new DataReader<ElemType>(cvReaderConfig);
}
SGD<ElemType> sgd(configSGD);
sgd.Train(netBuilder, dataReader, cvDataReader, makeMode);
delete netBuilder;
delete dataReader;
delete cvDataReader;
}
template <typename ElemType>
void DoAdapt(const ConfigParameters& config)
{
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
ConfigParameters configSGD (config("SGD"));
bool makeMode = config("makeMode", "true");
ConfigParameters readerConfig (config("reader"));
readerConfig.Insert("traceLevel",config("traceLevel","0"));
DataReader<ElemType>* dataReader = new DataReader<ElemType>(readerConfig);
DataReader<ElemType>* cvDataReader = nullptr;
ConfigParameters cvReaderConfig (config("cvReader", L""));
if (cvReaderConfig.size() != 0)
{
cvReaderConfig.Insert("traceLevel",config("traceLevel","0"));
cvDataReader = new DataReader<ElemType>(cvReaderConfig);
}
wstring origModelFileName = config("origModelFileName", L"");
wstring refNodeName = config("refNodeName", L"");
SGD<ElemType> sgd(configSGD);
sgd.Adapt(origModelFileName, refNodeName, dataReader, cvDataReader, deviceId, makeMode);
delete dataReader;
delete cvDataReader;
}
template <typename ElemType>
void DoEdit(const ConfigParameters& config)
{
wstring editPath = config("editPath");
wstring ndlMacros = config("ndlMacros","");
NDLScript<ElemType> ndlScript;
if (!ndlMacros.empty())
ndlScript.LoadConfigFile(ndlMacros);
MELScript<ElemType> melScript;
melScript.LoadConfigFileAndResolveVariables(editPath, config);
}
template <typename ElemType>
void DoConvertFromDbn(const ConfigParameters& config)
{
//config.Insert("deviceId","-1"); //force using CPU
wstring modelPath = config("modelPath");
wstring dbnModelPath = config("dbnModelPath");
IComputationNetBuilder<ElemType>* netBuilder = (IComputationNetBuilder<ElemType>*)new SimpleNetworkBuilder<ElemType>(config);
ComputationNetwork<ElemType>& net = netBuilder->LoadNetworkFromFile(dbnModelPath);
net.SaveToFile(modelPath);
delete (netBuilder);
}
// process the command
template <typename ElemType>
void DoCommand(const ConfigParameters& config)
{
ConfigArray command = config("command", "train");
for (int i=0; i < command.size(); i++)
{
//get the configuration parameters that match the command
ConfigParameters commandParams (config(command[i]));
ConfigArray action = commandParams("action","train");
// determine the action to perform, and do it
for (int j=0; j < action.size(); j++)
{
if (action[j] == "train" || action[j] == "trainRNN")
DoTrain<ElemType>(commandParams);
else if (action[j] == "adapt")
DoAdapt<ElemType>(commandParams);
else if (action[j] == "test" || action[j] == "eval")
DoEval<ElemType>(commandParams);
else if (action[j] == "testunroll")
DoEvalUnroll<ElemType>(commandParams);
else if (action[j] == "edit")
DoEdit<ElemType>(commandParams);
else if (action[j] == "cv")
DoCrossValidate<ElemType>(commandParams);
else if (action[j] == "write")
DoWriteOutput<ElemType>(commandParams);
else if (action[j] == "devtest")
TestCn<ElemType>(config); // for "devtest" action pass the root config instead
else if (action[j] == "dumpnode")
DumpNodeInfo<ElemType>(commandParams);
else if (action[j] == "convertdbn")
DoConvertFromDbn<ElemType>(commandParams);
else if (action[j] == "createLabelMap")
DoCreateLabelMap<ElemType>(commandParams);
else
RuntimeError("unknown action: %s in command set: %s", action[j].c_str(), command[i].c_str());
NDLScript<ElemType> ndlScript;
ndlScript.ClearGlobal(); // clear global macros between commands
}
}
}
std::string TimeDateStamp()
{
#if 0 // "safe" version for Windows, not needed it seems
__time64_t localtime;
_time64 (&localtime);// get current time and date
struct tm now;
_localtime64_s (&now, &localtime); // convert
#else
time_t t = time(NULL);
struct tm now = *localtime(&t);
#endif
char buf[30];
sprintf (buf, "%04d/%02d/%02d %02d:%02d:%02d", now.tm_year + 1900, now.tm_mon + 1, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec);
return buf;
}
#ifdef MPI_SUPPORT
// Oh, my gosh, this is going to be ugly. MPI_INIT needs a char* argv[], so let's interface.
int MPIAPI MPI_Init(_In_opt_ int *argc, _Inout_count_(*argc) wchar_t*** argv)
{
// this maps from the strings
std::map<std::string, wchar_t*> recover_wstring;
// do the mapping to 8-bit encoding for MPI_Init()
vector<vector<char>> argv_string_vector;
transform(*argv, *argv + *argc, std::back_inserter(argv_string_vector),
[&recover_wstring](wchar_t*pws)->vector<char>
{
std::string tmp = msra::strfun::utf8(std::wstring(pws));
recover_wstring[tmp] = pws;
vector<char> rv(tmp.begin(), tmp.end());
rv.push_back('\0');
return rv;
}
);
vector<char*> argv_charptr_vector;
transform(argv_string_vector.begin(), argv_string_vector.end(), std::back_inserter(argv_charptr_vector),
[](std::vector<char>&cs)->char*{ return &(cs[0]); }
);
char** argv_char = &(argv_charptr_vector[0]);
// Do the initialization
int rv = MPI_Init(argc, &argv_char);
// try and reconstruct how MPI_Init changed the argv
transform(argv_char, argv_char + *argc, stdext::checked_array_iterator<wchar_t**>(*argv, *argc),
[&recover_wstring](char*pc)->wchar_t*
{
auto it = recover_wstring.find(std::string(pc));
if (it == recover_wstring.end())
RuntimeError("Unexpected interaction between MPI_Init and command line parameters");
return it->second;
}
);
// pass through return value from internal call to MPI_Init()
return rv;
}
#endif
void PrintBuiltInfo()
{
fprintf(stderr, "-------------------------------------------------------------------\n");
fprintf(stderr, "Build info: \n\n");
fprintf(stderr, "\t\tBuilt time: %s %s\n", __DATE__, __TIME__);
fprintf(stderr, "\t\tLast modified date: %s\n", __TIMESTAMP__);
fprintf(stderr, "\t\tBuilt by %s on %s\n", _BUILDER_, _BUILDMACHINE_);
fprintf(stderr, "\t\tBuild Path: %s\n", _BUILDPATH_);
#ifdef _GIT_EXIST
fprintf(stderr, "\t\tBuild Branch: %s\n", _BUILDBRANCH_);
fprintf(stderr, "\t\tBuild SHA1: %s\n", _BUILDSHA1_);
#endif
fprintf(stderr, "-------------------------------------------------------------------\n");
}
int wmain(int argc, wchar_t* argv[])
{
try
{
#ifdef MPI_SUPPORT
{
int rc;
rc = MPI_Init(&argc, &argv);
if (rc != MPI_SUCCESS)
{
MPI_Abort(MPI_COMM_WORLD, rc);
RuntimeError("Failure in MPI_Init: %d", rc);
}
MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
fprintf(stderr, "MPI: RUNNING ON (%s), process %d/%d\n", getenv("COMPUTERNAME"), myRank, numProcs);
fflush(stderr);
}
#else
numProcs = 1;
myRank = 0;
#endif
ConfigParameters config;
std::string rawConfigString = ConfigParameters::ParseCommandLine(argc, argv, config);
// get the command param set they want
wstring logpath = config("stderr", L"");
// [1/26/2015 erw, add done file so that it can be used on HPC]
wstring DoneFile = config("DoneFile", L"");
ConfigArray command = config("command", "train");
if (logpath != L"")
{
for (int i=0; i < command.size(); i++)
{
logpath += L"_";
logpath += (wstring)command[i];
}
logpath += L".log";
if (numProcs > 1)
{
std::wostringstream oss;
oss << myRank;
logpath += L"rank" + oss.str();
}
RedirectStdErr(logpath);
}
PrintBuiltInfo();
std::string timestamp = TimeDateStamp();
if (myRank == 0) // main process
{
//dump config info
fprintf(stderr, "running on %s at %s\n", GetHostName().c_str(), timestamp.c_str());
fprintf(stderr, "command line options: \n");
for (int i = 1; i < argc; i++)
fprintf(stderr, "%s ", WCharToString(argv[i]).c_str());
// This simply merges all the different config parameters specified (eg, via config files or via command line directly),
// and prints it.
fprintf(stderr, "\n\n>>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>\n");
fprintf(stderr, "%s\n", rawConfigString.c_str());
fprintf(stderr, "<<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<\n");
// Same as above, but all variables are resolved. If a parameter is set multiple times (eg, set in config, overriden at command line),
// All of these assignments will appear, even though only the last assignment matters.
fprintf(stderr, "\n>>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>\n");
fprintf(stderr, "%s\n", config.ResolveVariables(rawConfigString).c_str());
fprintf(stderr, "<<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<\n");
// This outputs the final value each variable/parameter is assigned to in config (so if a parameter is set multiple times, only the last
// value it is set to will appear).
fprintf(stderr, "\n>>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>\n");
config.dumpWithResolvedVariables();
fprintf(stderr, "<<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<\n");
fprintf(stderr, "command: ");
for (int i = 0; i < command.size(); i++)
{
fprintf(stderr, "%s ", command[i].c_str());
}
}
//run commands
std::string type = config("precision", "float");
// accept old precision key for backward compatibility
if (config.Exists("type"))
type = config("type", "float");
if ( myRank == 0 )
fprintf(stderr, "\nprecision = %s\n", type.c_str());
if (type == "float")
DoCommand<float>(config);
else if (type == "double")
DoCommand<double>(config);
else
RuntimeError("invalid precision specified: %s", type.c_str());
// still here , write a DoneFile if necessary
if (!DoneFile.empty()){
FILE* fp = fopenOrDie(DoneFile.c_str(), L"w");
fprintf(fp, "successfully finished at %s on %s\n", TimeDateStamp().c_str(),GetHostName().c_str());
fcloseOrDie(fp);
}
}
catch (const std::exception &err)
{
fprintf(stderr, "EXCEPTION occurred: %s", err.what());
#ifdef _DEBUG
DebugBreak();
#endif
return EXIT_FAILURE;
}
catch(...)
{
fprintf(stderr, "Unknown ERROR occurred");
#ifdef _DEBUG
DebugBreak();
#endif
return EXIT_FAILURE;
}
#ifdef MPI_SUPPORT
MPI_Finalize();
#endif
return EXIT_SUCCESS;
}
>>>>>>> origin/master

Просмотреть файл

@ -1,4 +1,3 @@
<<<<<<< HEAD
//
// <copyright file="CPUSparseMatrix.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
@ -961,967 +960,3 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template class CPUSparseMatrix<double>;
}}}
=======
//
// <copyright file="CPUSparseMatrix.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// Math.cpp : Defines the exported functions for the DLL application.
//
#include "stdafx.h"
#include <assert.h>
#include <stdexcept>
#include <omp.h>
#include <math.h>
#include "CPUMatrix.h"
#include "CPUSparseMatrix.h"
#include <random>
#include <chrono>
#ifdef _WIN32
#include <Windows.h>
#endif
#ifdef LEAKDETECT
#include <vld.h>
#endif
#include "basetypes.h"
#include "fileutil.h"
#ifndef USE_MKL
// use ACML as default.
// Download ACML 5.3.0 (e.g., acml5.3.0-ifort64.exe) or above
// from http://developer.amd.com/tools/cpu-development/amd-core-math-library-acml/acml-downloads-resources/
// Install the ifort64 variant (compiled with intel compiler) of the library
// Set Environment variable ACML_PATH to C:\AMD\acml5.3.0\ifort64_mp or the folder you installed acml
// to point to your folder for the include file and link library
#include <acml.h> // requires ACML 5.3.0 and above
#else
// requires MKL 10.0 and above
#endif
// This is an example of an exported variable
//MATH_API int nMath=0;
// This is an example of an exported function.
//MATH_API int fnMath(void)
//{
// return 42;
//}
#ifndef USE_MKL //MKL has one additional parameter for different matrix order
#define BLAS_COLMAJOR
#else
#define BLAS_COLMAJOR (int)MatrixOrder::ColMajor,
#endif
#define SWAP(a,b) {(a) ^= (b); (b) ^= (a); (a) ^= (b);}
#define IDX2C(i,j,ld) (((j)*(ld))+(i)) // 0 based indexing
namespace Microsoft { namespace MSR { namespace CNTK {
#pragma region Helpful Enum Definitions
enum class MatrixOrder
{
RowMajor = 101, // row-major arrays
ColMajor = 102 // column-major arrays
};
enum class MatrixTranspose : char
{
NoTrans = 'N', // trans='N'
Trans = 'T', // trans='T'
ConjTrans = 'C' // trans='C'
};
enum class SymMatrixType : char
{
Up = 'U', // symmetric matrix is stored in the upper part
Low = 'L', // symmetric matrix is stored in thelower part
Full = 'F', //full populated
NotSymmetric = 'N' //not a symmetric matrix
};
enum class MatrixOpSide : char
{
Left = 'L', // left multiply
Right = 'R', // right multiply
};
#pragma endregion Helpful Enum Definitions
#pragma region Constructors and Destructor
//should only be used by constructors.
template<class ElemType>
void CPUSparseMatrix<ElemType>::ZeroInit()
{
m_numRows = 0;
m_numCols = 0;
m_elemSizeAllocated = 0;
m_compIndexSize = 0;
m_externalBuffer = false;
m_computeDevice = CPUDEVICE;
m_nz = 0;
m_matrixName = NULL;
//if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
{
m_colIdx = -1;
m_pArray = NULL;
m_unCompIndex = NULL;
m_compIndex = NULL;
}
//else if (m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
{
m_blockSize = 0;
m_pArray = NULL;
m_blockIds = NULL;
}
}
//should only be used by constructors.
template<class ElemType>
void CPUSparseMatrix<ElemType>::CheckInit(const MatrixFormat format)
{
if (format != MatrixFormat::matrixFormatSparseCSC && format != MatrixFormat::matrixFormatSparseCSR && format != MatrixFormat::matrixFormatSparseBlockCol && format != MatrixFormat::matrixFormatSparseBlockRow)
{
throw std::logic_error("CPUSparseMatrix: unsupported sparse matrix format");
}
m_format = format;
m_default = defaultElem();
ZeroInit();
}
template<class ElemType>
CPUSparseMatrix<ElemType>::CPUSparseMatrix(const MatrixFormat format)
{
CheckInit(format);
}
template<class ElemType>
CPUSparseMatrix<ElemType>::CPUSparseMatrix(const MatrixFormat format, const size_t numRows, const size_t numCols, const size_t size)
{
CheckInit(format);
Resize(numRows, numCols, size);
}
template<class ElemType>
CPUSparseMatrix<ElemType>::~CPUSparseMatrix()
{
if (m_matrixName!=NULL)
{
delete[] m_matrixName;
m_matrixName = nullptr;
}
if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
{
if(m_pArray != NULL)
delete[] m_pArray;
if(m_unCompIndex != NULL)
delete[] m_unCompIndex;
if(m_compIndex != NULL)
delete[] m_compIndex;
}
else if (m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
{
if (m_pArray != NULL)
delete[] m_pArray;
if(m_blockIds != NULL)
delete[] m_blockIds;
}
}
#pragma endregion Constructors and Destructor
#pragma region Basic Operators
//make sure call order in colume wise for CSC and row wise for CSR
template<class ElemType>
void CPUSparseMatrix<ElemType>::SetValue(const size_t row, const size_t col, const ElemType v)
{
if(m_format != MatrixFormat::matrixFormatSparseCSC && m_format != MatrixFormat::matrixFormatSparseCSR)
{
throw std::logic_error("CPUSparseMatrix: unsupported SetValue() call.");
}
if(m_elemSizeAllocated < m_nz +1) //automatic resize
{
Resize(m_numRows, m_numCols, m_nz + 100); //allocate 100 more elelemnts and keep existing values
}
if(row < 0 || row >= m_numRows)
{
throw std::logic_error("CPUSparseMatrix: SetValue() invalid row id");
}
if(col < 0 || col >= m_numCols) {
throw std::logic_error("CPUSparseMatrix: SetValue() invalid column id");
}
size_t r = (m_format == matrixFormatSparseCSC) ? row: col;
size_t c = (m_format == matrixFormatSparseCSC) ? col: row;
m_pArray[m_nz] = v;
m_unCompIndex[m_nz] = (CPUSPARSE_INDEX_TYPE)r;
//consistency check
if(c == m_colIdx && r <= m_unCompIndex[m_nz-1])
{
throw std::logic_error("CPUSparseMatrix: SetValue is not called properly");
}
if (c != m_colIdx)
{
m_compIndex[c] = CPUSPARSE_INDEX_TYPE(m_nz);
m_colIdx = (int) c;
}
m_compIndex[c + 1] = CPUSPARSE_INDEX_TYPE(m_nz + 1);
m_nz++;
}
template<class ElemType>
ElemType* CPUSparseMatrix<ElemType>::BufferPointer() const
{
return m_pArray;
}
template<class ElemType>
void CPUSparseMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, size_t numNZElemToReserve, const bool growOnly, const bool keepExistingValues)
{
size_t newCompIndexSize = (numCols > numRows ? numCols : numRows) + 1;
bool reallocate = (m_elemSizeAllocated < numNZElemToReserve || (m_elemSizeAllocated > numNZElemToReserve && !growOnly) || m_compIndexSize < newCompIndexSize);
m_numRows = numRows;
m_numCols = numCols;
if (reallocate)
{
if (m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
{
ElemType *pArray = new ElemType[numNZElemToReserve];
CPUSPARSE_INDEX_TYPE *unCompIndex = new CPUSPARSE_INDEX_TYPE[numNZElemToReserve];
CPUSPARSE_INDEX_TYPE *compIndex = new CPUSPARSE_INDEX_TYPE[newCompIndexSize];
if (keepExistingValues && (m_nz > numNZElemToReserve || m_compIndexSize > newCompIndexSize))
throw std::logic_error("Resize: To keep values m_nz should <= numNZElemToReserve and m_compIndexSize <= newCompIndexSize");
if (keepExistingValues && m_nz > 0)
{
assert(m_compIndexSize > 0 && m_nz < numNZElemToReserve);
memcpy(pArray, m_pArray, NzSize());
memcpy(unCompIndex, m_unCompIndex, MajorIndexSize());
memcpy(compIndex, m_compIndex, SecondaryIndexSize());
}
if (m_pArray != NULL)
delete[] m_pArray;
if (m_unCompIndex != NULL)
delete[] m_unCompIndex;
if (m_compIndex != NULL)
delete[] m_compIndex;
m_pArray = pArray;
m_unCompIndex = unCompIndex;
m_compIndex = compIndex;
}
else if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
{
ElemType *blockVal = new ElemType[numNZElemToReserve];
size_t *blockIds = new size_t[newCompIndexSize];
if (keepExistingValues && (m_nz > numNZElemToReserve || m_compIndexSize > newCompIndexSize))
throw std::logic_error("Resize: To keep values m_nz should <= numNZElemToReserve and m_compIndexSize <= newCompIndexSize");
if (keepExistingValues && m_elemSizeAllocated > 0)
{
assert(m_compIndexSize > 0 && m_elemSizeAllocated < numNZElemToReserve);
memcpy(blockVal, m_pArray, NzSize());
memcpy(blockIds, m_blockIds, sizeof(size_t)*m_compIndexSize);
}
if (m_pArray != NULL)
delete[] m_pArray;
if(m_blockIds != NULL)
delete[] m_blockIds;
m_pArray = blockVal;
m_blockIds = blockIds;
}
m_elemSizeAllocated = numNZElemToReserve;
m_compIndexSize = newCompIndexSize;
}
}
//Reset matrix so it can be reused
template<class ElemType>
void CPUSparseMatrix<ElemType>::Reset()
{
m_nz = 0;
m_colIdx = -1;
m_blockSize = 0;
}
//c = alpha*op(lhs) * op(rhs) + beta*c
template<class ElemType>
void CPUSparseMatrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const CPUMatrix<ElemType>& lhs, const bool transposeA,
const CPUSparseMatrix<ElemType>& rhs, const bool transposeB, ElemType beta, CPUMatrix<ElemType>& c)
{
if (lhs.IsEmpty() || rhs.IsEmpty())
throw std::logic_error("MultiplyAndWeightedAdd: one of the input matrix is empty.");
int m = transposeA? (int)lhs.GetNumCols(): (int)lhs.GetNumRows();
int k = transposeA? (int)lhs.GetNumRows(): (int)lhs.GetNumCols();
int l = transposeB? (int)rhs.GetNumCols(): (int)rhs.GetNumRows();
int n = transposeB? (int)rhs.GetNumRows(): (int)rhs.GetNumCols();
assert (m>0 && k>0 && l>0 && n>0); //converting from size_t to int may cause overflow
assert (k == l);
if (k != l)
{
throw std::invalid_argument("CPUSparseMatrix::MultiplyAndWeightedAdd: The inner dimensions of a and b must match.");
}
if (c.GetNumRows() != m || c.GetNumCols() != n)
{
c.Resize(m,n);
}
if (beta == 0)
{
memset(c.GetArray(), 0, sizeof(ElemType) * c.GetNumElements());
}
else if (beta != 1)
{
#pragma omp parallel for
foreach_coord(i,j,c)
{
c(i,j) = beta * c(i,j);
}
}
if (rhs.GetFormat() != matrixFormatSparseCSC)
NOT_IMPLEMENTED;
if (!transposeA && !transposeB)
{
for(size_t j = 0; j < rhs.GetNumCols(); j++)
{
size_t start = rhs.m_compIndex[j]; //ColLocation
size_t end = rhs.m_compIndex[j+1];
for(size_t p = start; p < end; p++)
{
size_t i = rhs.m_unCompIndex[p]; //RowLocation
ElemType val = rhs.m_pArray[p];
for(size_t h = 0; h < lhs.GetNumRows(); h++)
{
c(h,j) += alpha * lhs(h, i)*val;
}
}
}
}
else if (!transposeA && transposeB)
{
for(size_t j = 0; j < rhs.GetNumCols(); j++)
{
size_t start = rhs.m_compIndex[j];
size_t end = rhs.m_compIndex[j + 1];
for(size_t p = start; p < end; p++)
{
size_t i = rhs.m_unCompIndex[p];
ElemType val = rhs.m_pArray[p];
for(size_t h = 0; h < lhs.GetNumRows(); h++)
{
c(h, i) += alpha * lhs(h, j)*val;
}
}
}
}
else if (transposeA && !transposeB)
{
NOT_IMPLEMENTED;
}
else
{
NOT_IMPLEMENTED;
}
}
//c = alpha * op(lhs) * op(rhs)
template<class ElemType>
void CPUSparseMatrix<ElemType>::MultiplyAndAdd(ElemType alpha, const CPUMatrix<ElemType>& lhs, const bool transposeA,
const CPUSparseMatrix<ElemType>& rhs, const bool transposeB, CPUSparseMatrix<ElemType>& c)
{
if (lhs.IsEmpty() || rhs.IsEmpty())
throw std::logic_error("LeftMultiplyAndAdd: one of the input matrix is empty.");
int m = transposeA? (int)lhs.GetNumCols(): (int)lhs.GetNumRows();
int k = transposeA? (int)lhs.GetNumRows(): (int)lhs.GetNumCols();
int l = transposeB? (int)rhs.GetNumCols(): (int)rhs.GetNumRows();
int n = transposeB? (int)rhs.GetNumRows(): (int)rhs.GetNumCols();
assert (m>0 && k>0 && l>0 && n>0); m; n; //converting from size_t to int may cause overflow
assert (k == l);
if (k != l)
{
throw std::invalid_argument("CPUSparseMatrix::MultiplyAndAdd: The inner dimensions of a and b must match.");
}
c.Reset();
if (!transposeA && !transposeB)
{
NOT_IMPLEMENTED;
}
else if (!transposeA && transposeB)
{
if (rhs.GetFormat() != matrixFormatSparseCSC)
NOT_IMPLEMENTED;
//allocate enough memory
c.SetFormat(matrixFormatSparseBlockCol);
c.Resize(m, n, m*min(n, rhs.m_nz));
map<size_t, size_t> w2Id;
for(size_t j = 0; j < rhs.GetNumCols(); j++)
{ // j ranges over batches
size_t start = rhs.m_compIndex[j];
size_t end = rhs.m_compIndex[j+1];
for(size_t p = start; p < end; p++)
{
size_t i = rhs.m_unCompIndex[p]; //i ranges over words
ElemType val = rhs.m_pArray[p]; //1 for(i, j)
bool first = true;
if(w2Id.find(i) == w2Id.end())
{
w2Id[i] = w2Id.size();
c.m_blockIds[c.m_blockSize]=i;
c.m_blockSize++;
}
else
{
first = false;
}
size_t pos = w2Id[i] * lhs.GetNumRows();
for(size_t h = 0; h < lhs.GetNumRows(); h++)
{ // h range over hidden layer
if(first == true)
{
c.m_pArray[pos] = alpha*lhs(h, j)*val;
} else
{
c.m_pArray[pos] += alpha*lhs(h, j)*val;
}
pos++;
}
}
}
c.m_nz = c.m_blockSize * m;
if(c.m_nz > c.GetSizeAllocated())
{
throw std::logic_error("sparse matrix out of range.");
}
//c.SetFormat(matrixFormatSparseBlockCol);
}
else if (transposeA && !transposeB)
{
NOT_IMPLEMENTED;
}
else
{
NOT_IMPLEMENTED;
}
}
template<class ElemType>
void CPUSparseMatrix<ElemType>::ScaleAndAdd(const ElemType alpha, const CPUSparseMatrix<ElemType>& lhs, CPUMatrix<ElemType>& rhs)
{
if (lhs.IsEmpty() || rhs.IsEmpty())
{
throw std::logic_error("ScaleAndAdd: one of the input matrix is empty.");
}
if (lhs.GetNumRows() != rhs.GetNumRows() || lhs.GetNumCols() != rhs.GetNumCols())
{
throw std::invalid_argument("CPUSparseMatrix::ScaleAndAdd: The dimensions of a and b must match.");
}
if(lhs.GetFormat() == MatrixFormat::matrixFormatSparseCSC || lhs.GetFormat() == MatrixFormat::matrixFormatSparseCSR)
{
size_t col_num = (lhs.m_format == MatrixFormat::matrixFormatSparseCSC) ? lhs.GetNumCols(): lhs.GetNumRows();
for(size_t j = 0; j < col_num; j++)
{
size_t start = lhs.m_compIndex[j];
size_t end = lhs.m_compIndex[j + 1];
for(size_t p = start; p < end; p++)
{
size_t i = lhs.m_unCompIndex[p];
ElemType val = lhs.m_pArray[p];
size_t r = (lhs.m_format == MatrixFormat::matrixFormatSparseCSC) ? i : j;
size_t c = (lhs.m_format == MatrixFormat::matrixFormatSparseCSC) ? j : i;
rhs(r, c) += alpha * val;
}
}
}
else if (lhs.m_format == MatrixFormat::matrixFormatSparseBlockCol || lhs.m_format == MatrixFormat::matrixFormatSparseBlockRow)
{
for(size_t j = 0; j < lhs.m_blockSize; j++)
{
size_t i = lhs.m_blockIds[j];
size_t len = (lhs.m_format == MatrixFormat::matrixFormatSparseBlockCol) ? lhs.GetNumRows() : lhs.GetNumCols();
size_t start = j * len;
for(size_t p = start; p < start+len; p++)
{
ElemType val = lhs.m_pArray[p];
size_t r = (lhs.m_format == MatrixFormat::matrixFormatSparseBlockCol) ? (p - start) : i;
size_t c = (lhs.m_format == MatrixFormat::matrixFormatSparseBlockCol) ? i : (p - start);
rhs(r, c) += alpha * val;
}
}
}
else
{
throw std::runtime_error("CPUSparseMatrix:: ScaleAndAdd() Not implemented");
}
}
template<class ElemType>
bool CPUSparseMatrix<ElemType>::AreEqual(const CPUSparseMatrix<ElemType>& a, const CPUSparseMatrix<ElemType>& b, const ElemType threshold)
{
if (a.IsEmpty() || b.IsEmpty())
throw std::logic_error("AreEqual: one of the input matrices is empty.");
if (a.GetNumRows() != b.GetNumRows() || a.GetNumCols() != b.GetNumCols())
return false;
bool result = true;
#pragma omp parallel for
foreach_coord(i, j, a)
{
if (abs(a(i, j) - b(i, j)) > threshold)
{
result = false;
break;
}
}
return result;
}
// a: H x No: H is hidden layer size and No is mini-batch size
// weight: V x H, V is vocab size
// label: V x No
// cls: 2 x Nc, Nc is number of classes, each col is start and end word ids of a class
// idx2cls: V x 1, mapping from word to class id
// etp: V x No, stores predicted values
template<class ElemType>
void CPUSparseMatrix<ElemType>::ClassEntropy(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& weight,
const CPUSparseMatrix<ElemType> & label, const CPUMatrix<ElemType>& cls,
const CPUMatrix<ElemType>& idx2cls, CPUSparseMatrix<ElemType>& etp, CPUMatrix<ElemType>& entropyScore)
{
if (a.IsEmpty() || cls.IsEmpty() || label.IsEmpty() || idx2cls.IsEmpty())
throw std::logic_error("AssignSoftmaxOf: Matrix a, class, idx2cls or label is empty.");
if(etp.GetFormat() != MatrixFormat::matrixFormatSparseCSC)
throw std::runtime_error("CPUSparseMatrix:: ClassEntropy() only support CSC");
size_t nC = cls.GetNumCols();
size_t nV = label.GetNumRows() - nC;
if (nV != idx2cls.GetNumRows() || idx2cls.GetNumCols() != 1 || cls.GetNumCols() + idx2cls.GetNumRows() != label.GetNumRows())
throw std::logic_error("ClassEntropy: check matrix dimension");
//allocate enough memory
if(etp.m_elemSizeAllocated < etp.GetNumElements())
{
etp.Resize(etp.GetNumRows(), etp.GetNumCols(), etp.GetNumElements(), true, false);
}
etp.Reset();
entropyScore(0, 0) = 0;
for(size_t j = 0; j < label.GetNumCols(); j++)
{
size_t start = label.m_compIndex[j];
size_t end = label.m_compIndex[j + 1];
for (size_t p = start; p < end; p++)
{
size_t i = label.m_unCompIndex[p];
size_t iStt, iEnd;
if (i < nV)
{
size_t clsid = (size_t)idx2cls(i, 0);
iStt = (size_t) cls(0, clsid); //class start word id
iEnd = (size_t) cls(1, clsid); //class end word id
}
else
{
iStt = nV;
iEnd = nV + nC;
}
size_t b = etp.m_nz;
for(size_t ii = iStt; ii < iEnd; ii++) //ii ranges over sub-vocab or class ids
{
ElemType val = 0.0;
foreach_row(rw, a) //rw ranges over hidden units
{
val += weight(ii,rw) * a(rw,j);
}
etp.SetValue(ii, j, val);
}
ElemType maxV = LZERO;
for(size_t ii = b; ii < etp.m_nz; ii++)
{
maxV = (ElemType) logadd(maxV, etp.m_pArray[ii]);
}
for(size_t ii = b; ii < etp.m_nz; ii++)
{
etp.m_pArray[ii] = etp.m_pArray[ii] - maxV;
}
entropyScore(0, 0) -= etp.m_pArray[b+i-iStt];
//negate positive data points
etp.m_pArray[b+i-iStt] *=-1;
}
}
}
template<class ElemType>
void CPUSparseMatrix<ElemType>::ClassEntropyError(CPUSparseMatrix<ElemType>& a)
{
for(int i = 0; i < a.m_nz; i++)
{
if(a.m_pArray[i] < 0)
{
a.m_pArray[i] = exp(a.m_pArray[i]); //negative;
}
else
{
a.m_pArray[i] = exp(-a.m_pArray[i])-1; //positive
}
}
}
template<class ElemType>
void CPUSparseMatrix<ElemType>::ClassEntropyGradientOfInput(
const CPUSparseMatrix<ElemType>& error,
const CPUMatrix<ElemType>& weight,
CPUMatrix<ElemType>& grd)
{
grd.SetValue(0);
for(size_t j = 0; j < error.GetNumCols(); j++)
{
size_t start = error.m_compIndex[j];
size_t end = error.m_compIndex[j+1];
for(size_t p = start; p < end; p++)
{
size_t i = error.m_unCompIndex[p];
for(size_t h = 0; h < grd.GetNumRows(); h++)
{ // h ranges over hidden units
grd(h,j) += weight(i, h) * error.m_pArray[p];
}
}
}
}
template<class ElemType>
void CPUSparseMatrix<ElemType>::ClassEntropyGradientOfWeight(
const CPUSparseMatrix<ElemType>& error,
const CPUMatrix<ElemType>& input,
const CPUSparseMatrix<ElemType> & /*label*/,
const CPUMatrix<ElemType>& /*cls*/,
const CPUMatrix<ElemType>& /*idx2cls*/,
CPUSparseMatrix<ElemType>& grd)
{
grd.SetFormat(matrixFormatSparseBlockRow);
//allocate enough memory
grd.Resize(grd.GetNumRows(), grd.GetNumCols(), error.m_nz*input.GetNumRows(), true, false);
grd.Reset();
map<size_t, size_t> w2Id;
for(size_t j = 0; j < error.GetNumCols(); j++)
{
size_t start = error.m_compIndex[j];
size_t end = error.m_compIndex[j+1];
for(size_t p = start; p < end; p++)
{
size_t i = error.m_unCompIndex[p]; // i ranges over words
bool first = true;
if(w2Id.find(i) == w2Id.end())
{
w2Id[i] = w2Id.size();
grd.m_blockIds[grd.m_blockSize]=i;
grd.m_blockSize++;
}
else
{
first = false;
}
size_t pos = w2Id[i]*input.GetNumRows();
for(size_t h = 0; h < input.GetNumRows(); h++)
{ // h range over hidden layer
if(first == true)
{
grd.m_pArray[pos] = input(h, j)*error.m_pArray[p];
}
else
{
grd.m_pArray[pos] += input(h, j)*error.m_pArray[p];
}
pos++;
}
}
}
grd.m_nz = grd.m_blockSize * input.GetNumRows();
if(grd.m_nz > grd.GetSizeAllocated())
{
throw std::logic_error("sparse matrix out of range.");
}
//grd.SetFormat(matrixFormatSparseBlockRow);
}
// normal update for smoothed gradients c and current gradients (this)
template<class ElemType>
void CPUSparseMatrix<ElemType>::NormalGrad(CPUMatrix<ElemType>& c, const ElemType momentum)
{
if (c.IsEmpty())
{
c.Resize(GetNumRows(), GetNumCols());
c.SetValue(0.0);
}
if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
{
for(size_t j = 0; j < m_blockSize; j++)
{
size_t i = m_blockIds[j];
size_t len = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? GetNumRows() : GetNumCols();
size_t start = j* len;
for(size_t p = start; p < start+len; p++)
{
ElemType val = m_pArray[p];
size_t row = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? (p - start) : i;
size_t col = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? i : (p - start);
c(row, col) = (1-momentum)*val + momentum*c(row, col);
m_pArray[p] = c(row, col);
}
}
}
else
{
throw std::runtime_error("CPUSparseMatrix:: NormalGrad() only support block sparse format");
}
}
// update smoothed gradients c and current gradients (this)
template<class ElemType>
void CPUSparseMatrix<ElemType>::Adagrad(CPUMatrix<ElemType>& c)
{
if (c.IsEmpty())
{
c.Resize(GetNumRows(), GetNumCols());
c.SetValue(0.0);
}
const ElemType floor = 1e-16f;
if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
{
size_t col_num = (m_format == MatrixFormat::matrixFormatSparseCSC) ? GetNumCols() : GetNumRows();
for(size_t j = 0; j < col_num; j++)
{
size_t start = m_compIndex[j];
size_t end = m_compIndex[j+1];
for(size_t p = start; p < end; p++)
{
size_t i = m_unCompIndex[p];
ElemType val = m_pArray[p];
size_t row = (m_format == MatrixFormat::matrixFormatSparseCSC) ? i : j;
size_t col = (m_format == MatrixFormat::matrixFormatSparseCSC) ? j : i;
ElemType adenorm = c(row, col);
adenorm += val * val;
val = val / (floor + sqrt(adenorm));
m_pArray[p] = val;
c(row, col) = adenorm;
}
}
} else if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
{
for(size_t j = 0; j < m_blockSize; j++)
{
size_t i = m_blockIds[j];
size_t len = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? GetNumRows() : GetNumCols();
size_t start = j* len;
for(size_t p = start; p < start+len; p++)
{
ElemType val = m_pArray[p];
size_t row = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? (p - start) : i;
size_t col = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? i : (p - start);
ElemType adenorm = c(row, col);
adenorm += val * val;
val = val / (floor + sqrt(adenorm));
m_pArray[p] = val;
c(row, col) = adenorm;
}
}
}
}
template<class ElemType>
CPUSparseMatrix<ElemType>& CPUSparseMatrix<ElemType>::InplaceTruncate (const ElemType threshold)
{
if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
{
ElemType locThresholdPos = abs(threshold);
ElemType locTHresholdNeg = -locThresholdPos;
for(size_t j = 0; j < m_blockSize; j++)
{
size_t len = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? GetNumRows() : GetNumCols();
size_t start = j* len;
for (size_t p = start; p < start+len; p++)
{
if (m_pArray[p] > locThresholdPos)
{
m_pArray[p] = locThresholdPos;
}
else if (m_pArray[p] < locTHresholdNeg)
{
m_pArray[p] = locTHresholdNeg;
}
}
}
}
else
{
throw std::runtime_error("CPUSparseMatrix:: InplaceTruncate() only support block based sparse matrix");
}
return *this;
}
template <class ElemType>
MATH_API File& operator>>(File& stream, CPUSparseMatrix<ElemType>& us)
{
stream.GetMarker(fileMarkerBeginSection, std::wstring(L"BMAT"));
size_t elsize;
stream >> elsize;
if (sizeof(ElemType) != elsize)
throw std::runtime_error("Template argument size doesn't match those in file");
std::wstring matrixName;
// now prepare this header to receive the data being read
size_t nz, colnum, rownum;
int format;
// read in the header information
stream >> matrixName >> format >> nz >> colnum >> rownum;
us.SetFormat((MatrixFormat)format);
if (us.GetFormat() != matrixFormatSparseCSC && us.GetFormat() != matrixFormatSparseCSR)
NOT_IMPLEMENTED;
us.Resize(rownum, colnum, nz);
if (nz > 0)
{
size_t compressedSize = (us.GetFormat() == matrixFormatSparseCSC) ? colnum + 1 : rownum + 1;
ElemType* dataBuffer = us.NzValues();
CPUSPARSE_INDEX_TYPE* unCompressedIndex = us.MajorIndexLocation();
CPUSPARSE_INDEX_TYPE* compressedIndex = us.SecondaryIndexLocation();
// read in the sparse matrix info
for (size_t i = 0; i < nz; ++i)
{
stream >> dataBuffer[i];
}
for (size_t i = 0; i < nz; ++i)
{
stream >> unCompressedIndex[i];
}
for (size_t i = 0; i < compressedSize; ++i)
{
stream >> compressedIndex[i];
}
}
stream.GetMarker(fileMarkerEndSection, std::wstring(L"EMAT"));
us.SetMatrixName(matrixName.c_str());
return stream;
}
template MATH_API File& operator>>(File& stream, CPUSparseMatrix<float>& us);
template MATH_API File& operator>>(File& stream, CPUSparseMatrix<double>& us);
template <class ElemType>
MATH_API File& operator<<(File& stream, const CPUSparseMatrix<ElemType>& us)
{
if (us.GetFormat() != matrixFormatSparseCSC && us.GetFormat() != matrixFormatSparseCSR)
NOT_IMPLEMENTED;
stream.PutMarker(fileMarkerBeginSection, std::wstring(L"BMAT"));
stream << sizeof(ElemType);
if (us.GetMatrixName() == nullptr)
{
std::wstring s(L"nnmatrix");
stream << s;
}
else
{
stream << us.GetMatrixName();
}
size_t nz, numRows, numCols;
size_t compressedSize = us.SecondaryIndexCount();
int format = us.GetFormat();
stream << format << nz << numCols << numRows;
if (nz > 0)
{
ElemType* dataBuffer = us.NzValues();
CPUSPARSE_INDEX_TYPE* unCompressedIndex = us.MajorIndexLocation();
CPUSPARSE_INDEX_TYPE* compressedIndex = us.SecondaryIndexLocation();
for (size_t i = 0; i < nz; ++i)
{
stream << dataBuffer[i];
}
for (size_t i = 0; i < nz; ++i)
{
stream << unCompressedIndex[i];
}
for (size_t i = 0; i < compressedSize; ++i)
{
stream << compressedIndex[i];
}
}
stream.PutMarker(fileMarkerEndSection, std::wstring(L"EMAT"));
return stream;
}
template class CPUSparseMatrix<float>;
template class CPUSparseMatrix<double>;
}}}
>>>>>>> origin/master

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,5 +1,4 @@
<<<<<<< HEAD
//
//
// <copyright file="GPUSparseMatrix.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
@ -24,8 +23,27 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
class MATH_API GPUSparseMatrix : public BaseMatrix<ElemType>
{
typedef BaseMatrix<ElemType> B; using B::m_numRows; using B::m_numCols; using B::m_pArray; using B::m_elemSizeAllocated; using B::m_nz; using B::m_format; // without this, base members would require to use thi-> in GCC
public:
typedef BaseMatrix<ElemType> B;
using B::m_numRows;
using B::m_numCols;
using B::m_pArray;
using B::m_elemSizeAllocated;
using B::m_nz;
using B::m_format;
using B::m_computeDevice;
using B::m_externalBuffer;
using B::m_matrixName;
using B::OwnBuffer;
using B::GetFormat;
using B::SetFormat;
using B::GetNumRows;
using B::GetNumCols;
using B::IsEmpty;
using B::SetComputeDeviceId;
using B::SetMatrixName;
using B::SetNzCount;
// without this, base members would require to use thi-> in GCC
public:
GPUSparseMatrix(const size_t numRows, const size_t numCols, const size_t numNZ, const MatrixFormat matrixFormat = MatrixFormat::matrixFormatSparseCSR, const DEVICEID_TYPE computeDevice = AUTOPLACEMATRIX);
@ -264,270 +282,3 @@ namespace Microsoft { namespace MSR { namespace CNTK {
};
}}}
=======
//
// <copyright file="GPUSparseMatrix.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
#include "GPUMatrix.h"
#include "CPUSparseMatrix.h"
#include <functional>
namespace Microsoft { namespace MSR { namespace CNTK {
//GPU Sparse Matrix, using cuSPARSE library.
//By default we are assuming CSR representation
// NOTE m_elemSizeAllocated (in base matrix) means the number of non-zero elements we have allocated space
// We are packing the CSR format (pointed to by m_pArray) as follows:
// ElemType elements[m_elemSizeAllocated]
// int colIdx[m_elemSizeAllocated]
// int rowIdxStart[m_numRows+1]
template<class ElemType>
class MATH_API GPUSparseMatrix : public BaseMatrix<ElemType>
{
typedef BaseMatrix<ElemType> B; using B::m_numRows; using B::m_numCols; using B::m_pArray; using B::m_elemSizeAllocated; using B::m_nz; using B::m_format; // without this, base members would require to use thi-> in GCC
public:
GPUSparseMatrix(const size_t numRows, const size_t numCols, const size_t numNZ, const MatrixFormat matrixFormat = MatrixFormat::matrixFormatSparseCSR, const DEVICEID_TYPE computeDevice = AUTOPLACEMATRIX);
GPUSparseMatrix(const MatrixFormat matrixFormat = MatrixFormat::matrixFormatSparseCSR,
const DEVICEID_TYPE computeDevice = AUTOPLACEMATRIX);
GPUSparseMatrix(const GPUSparseMatrix<ElemType>&);
GPUSparseMatrix(const GPUMatrix<ElemType>&, const MatrixFormat matrixFormat = MatrixFormat::matrixFormatSparseCSR);
#ifndef LINUX
GPUSparseMatrix(GPUSparseMatrix<ElemType>&&);
#endif /* LINUX */
~GPUSparseMatrix();
public:
void Reset();
public:
// return col pointer, which is immediately following the non-zero element
// in memory format is always in the following order:
// Non-zero data elements, Full index locations, compressed index locations
// In CSR row data is compressed, in CSC col data is compressed
inline const ElemType* NzValues() const {return m_pArray;}
inline ElemType* NzValues() {return m_pArray;}
inline size_t NzSize() const {return sizeof(ElemType)*m_nz;} // actual number of element bytes in use
GPUSPARSE_INDEX_TYPE* MajorIndexLocation() const { return (GPUSPARSE_INDEX_TYPE*)(m_pArray + m_elemSizeAllocated); } //this is the major index, row/col ids in CSC/CSR format
size_t MajorIndexCount() const { return m_nz; }
size_t MajorIndexSize() const { return sizeof(GPUSPARSE_INDEX_TYPE)*MajorIndexCount(); } // actual number of major index bytes in use
GPUSPARSE_INDEX_TYPE* SecondaryIndexLocation() const { return MajorIndexLocation() + m_elemSizeAllocated; } //this is the compressed index, col/row in CSC/CSR format
size_t SecondaryIndexCount(const size_t numNZ) const
{
if (m_format&matrixFormatCompressed)
{
size_t cnt = (m_format&matrixFormatRowMajor)?m_numRows:m_numCols;
if (cnt > 0) cnt++; // add an extra element on the end for the "max" value
return cnt;
}
else
return numNZ; // COO format
}
size_t SecondaryIndexCount() const
{
return SecondaryIndexCount(m_nz);
}
// get size for compressed index
size_t SecondaryIndexSize() const { return (SecondaryIndexCount())*sizeof(GPUSPARSE_INDEX_TYPE); }
size_t BufferSizeNeeded() const { return NzSize() + MajorIndexSize() + SecondaryIndexSize(); }
size_t BufferSizeNeeded(const size_t numNZ) const
{ return sizeof(ElemType)*numNZ + sizeof(GPUSPARSE_INDEX_TYPE)*(numNZ + SecondaryIndexCount(numNZ)); }
inline size_t BufferSizeAllocated() const { return m_totalBufferSizeAllocated; }
inline ElemType* BufferPointer() const { return m_pArray; }
// the column and row locations will swap based on what format we are in. Full index always follows the data array
GPUSPARSE_INDEX_TYPE* RowLocation() const { return (m_format&matrixFormatRowMajor) ? SecondaryIndexLocation() : MajorIndexLocation(); }
size_t RowSize() const {return (m_format&matrixFormatRowMajor)?SecondaryIndexSize():MajorIndexSize();}
GPUSPARSE_INDEX_TYPE* ColLocation() const { return (m_format&matrixFormatRowMajor) ? MajorIndexLocation() : SecondaryIndexLocation(); }
size_t ColSize() const {return (m_format&matrixFormatRowMajor)?MajorIndexSize():SecondaryIndexSize();} // actual number of bytes in use
void SetValue(const GPUSparseMatrix<ElemType>& deepCopyFrom);
void SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom);
void SetValue(const GPUMatrix<ElemType>& denseMatrix, const MatrixFormat matrixFormat);
void SetValue(const GPUMatrix<ElemType>& denseMatrix);
void ResizeAsAndCopyIndexFrom(const GPUSparseMatrix<ElemType>& a, const bool growOnly = true);
void Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat matrixFormat, const bool growOnly = true); //matrix format will affect the size to allocate
void Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const bool growOnly = true);
GPUSparseMatrix<ElemType> Transpose() const;
void InplaceTranspose();
GPUSparseMatrix<ElemType>& AssignTransposeOf(const GPUSparseMatrix<ElemType>& a);
GPUMatrix<ElemType> CopyToDenseMatrix() const;
void CopyToDenseMatrix(GPUMatrix<ElemType> &denseMatrix) const;
void CopyToCPUSparseMatrix(CPUSparseMatrix<ElemType> &cpuSparseMatrix) const;
void ChangeDeviceTo(DEVICEID_TYPE toId);
GPUSparseMatrix<ElemType>& operator=(const GPUSparseMatrix<ElemType>& deepCopy);
#ifndef LINUX
GPUSparseMatrix<ElemType>& operator=(GPUSparseMatrix<ElemType>&& moveFrom);
#endif /* LINUX */
GPUSparseMatrix<ElemType> operator+ (const GPUSparseMatrix<ElemType>& a) const;
GPUSparseMatrix<ElemType> operator- (const GPUSparseMatrix<ElemType>& a) const;
GPUSparseMatrix<ElemType>& operator^= (const ElemType alpha); //element-wise power
GPUSparseMatrix<ElemType> operator^ (const ElemType alpha) const; //element-wise power
GPUSparseMatrix<ElemType>& operator*= (const ElemType alpha);
GPUSparseMatrix<ElemType> operator*(const ElemType alpha) const;
GPUSparseMatrix<ElemType>& AssignElementPowerOf(const GPUSparseMatrix<ElemType>& a, const ElemType power);
bool IsEqualTo(const GPUSparseMatrix<ElemType>& a, const ElemType threshold = 1e-8) const;
bool IsEqualTo(const GPUMatrix<ElemType>& a, const ElemType threshold = 1e-8) const;
public:
virtual DEVICEID_TYPE GetComputeDeviceId(void) const;
inline size_t GetNumNZElements() const {return m_nz;}
//Sets sparse matrix in CSR format. this acts as deep copy
void SetMatrixFromCSRFormat(const GPUSPARSE_INDEX_TYPE *h_CSRRow, const GPUSPARSE_INDEX_TYPE *h_Col, const ElemType *h_Val,
const size_t nz, const size_t numRows, const size_t numCols, const bool IsOnDevice = false, const DEVICEID_TYPE devId = -1);
void SetMatrixFromCSCFormat(const GPUSPARSE_INDEX_TYPE *h_CSCCol, const GPUSPARSE_INDEX_TYPE *h_Row, const ElemType *h_Val,
const size_t nz, const size_t numRows, const size_t numCols, const bool IsOnDevice = false, const DEVICEID_TYPE devId = -1);
void SetMatrixFromLabelAndClass(CPUSPARSE_INDEX_TYPE *h_row, size_t *h_block2Id, size_t *h_block2UniqId, size_t labelSize, size_t expandedSize, size_t blockSize);
//Gets sparse matrix in CSR format. this acts as deep copy. All passed pointers must be NULL. the function will allocate memory itself.
void GetMatrixFromCSRFormat(GPUSPARSE_INDEX_TYPE*& h_CSRRow, GPUSPARSE_INDEX_TYPE*& h_Col, ElemType*& h_Val, size_t &nz, size_t &numRows, size_t &numCols) const;
void GetMatrixFromCSCFormat(GPUSPARSE_INDEX_TYPE*& h_CSCCol, GPUSPARSE_INDEX_TYPE*& h_Row, ElemType*& h_Val, size_t &nz, size_t &numRows, size_t &numCols) const;
void ConvertToSparseFormat(MatrixFormat newFormat);
void ConvertToSparseFormat(MatrixFormat newFormat, GPUSparseMatrix<ElemType>& outMatrix) const;
public:
GPUSparseMatrix<ElemType>& ElementInverse ();
GPUSparseMatrix<ElemType>& AssignElementInverseOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceLinearRectifierDerivative();
GPUSparseMatrix<ElemType>& AssignLinearRectifierDerivativeOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceSigmoid ();
GPUSparseMatrix<ElemType>& AssignSigmoidOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceTanh ();
GPUSparseMatrix<ElemType>& AssignTanhOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceSqrt ();
GPUSparseMatrix<ElemType>& AssignSqrtOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceExp ();
GPUSparseMatrix<ElemType>& AssignExpOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceLog ();
GPUSparseMatrix<ElemType>& AssignLogOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceAbs ();
GPUSparseMatrix<ElemType>& AssignAbsOf (const GPUSparseMatrix<ElemType>& a);
GPUSparseMatrix<ElemType>& InplaceTruncate (const ElemType threshold);
GPUSparseMatrix<ElemType>& InplaceTruncateBottom (const ElemType threshold);
GPUSparseMatrix<ElemType>& AssignTruncateBottomOf (const GPUSparseMatrix<ElemType>& a, const ElemType threshold);
GPUSparseMatrix<ElemType>& InplaceTruncateTop (const ElemType threshold);
GPUSparseMatrix<ElemType>& AssignTruncateTopOf (const GPUSparseMatrix<ElemType>& a, const ElemType threshold);
GPUSparseMatrix<ElemType>& SetToZeroIfAbsLessThan (const ElemType threshold);
ElemType SumOfElements () const; //sum of all elements
ElemType SumOfAbsElements () const; //sum of all abs(elements)
ElemType FrobeniusNorm() const;
ElemType MatrixNormInf() const;
ElemType MatrixNorm1() const;
ElemType MatrixNorm0() const { return (ElemType)GetNumNZElements(); };
public:
//Performs C = alpha op ( S ) D + beta C; Where S is sparse and D and C are dense
static void MultiplyAndWeightedAdd(ElemType alpha, const GPUMatrix<ElemType>& a, const bool transposeA, const GPUSparseMatrix<ElemType>& b,
const bool transposeB, ElemType beta, GPUMatrix<ElemType>& c);
static void MultiplyAndWeightedAdd(ElemType alpha, const GPUSparseMatrix<ElemType>& S, const bool transposeS, const GPUMatrix<ElemType>& D,
const bool transposeD, ElemType beta, GPUMatrix<ElemType>& C);
static void MultiplyAndAdd(ElemType alpha, const GPUMatrix<ElemType>& lhs, const bool transposeA, const GPUSparseMatrix<ElemType>& rhs,
const bool transposeB, GPUSparseMatrix<ElemType>& c);
static void ScaleAndAdd(const ElemType alpha, const GPUSparseMatrix<ElemType>& lhs, GPUMatrix<ElemType>& c);
static void ClassEntropy(const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& weight,
const GPUSparseMatrix<ElemType> & label, const GPUMatrix<ElemType>& cls,
const GPUMatrix<ElemType>& idx2cls, GPUSparseMatrix<ElemType>& etp, GPUMatrix<ElemType>& entropyScore);
static void ClassEntropyError(GPUSparseMatrix<ElemType>& a);
static void ClassEntropyGradientOfInput(const GPUSparseMatrix<ElemType>& error, const GPUMatrix<ElemType>& weight, GPUMatrix<ElemType>& grd);
static void ClassEntropyGradientOfWeight(const GPUSparseMatrix<ElemType>& error, const GPUMatrix<ElemType>& input, const GPUSparseMatrix<ElemType> & label, const GPUMatrix<ElemType>& cls,
const GPUMatrix<ElemType>& idx2cls, GPUSparseMatrix<ElemType>& grd);
void NormalGrad(GPUMatrix<ElemType>& c, const ElemType momentum);
static void Multiply(const GPUSparseMatrix<ElemType>& S, const GPUMatrix<ElemType>& D, GPUMatrix<ElemType>& C);
static void Multiply(const GPUMatrix<ElemType>& D, const GPUSparseMatrix<ElemType>& S, GPUMatrix<ElemType>& C);
static void Multiply(const GPUSparseMatrix<ElemType>& S1, bool transposeS1, const GPUSparseMatrix<ElemType>& S2, bool transposeS2, GPUSparseMatrix<ElemType> &C);
GPUSparseMatrix<ElemType>& AssignProductOf(const GPUSparseMatrix<ElemType>& a, const bool transposeA, const GPUSparseMatrix<ElemType>& b, const bool transposeB);
static ElemType InnerProductOfMatrices(const GPUSparseMatrix<ElemType>& a, const GPUMatrix<ElemType>& b);
static ElemType InnerProductOfMatrices(const GPUMatrix<ElemType>& a, const GPUSparseMatrix<ElemType>& b);
static void ScaleAndAdd(ElemType alpha,const GPUSparseMatrix<ElemType>& a, ElemType beta, const GPUSparseMatrix<ElemType>& b, GPUSparseMatrix<ElemType>& c);
static void ScaleAndAdd(ElemType alpha,const GPUSparseMatrix<ElemType>& a, ElemType beta, const GPUMatrix<ElemType>& b, GPUMatrix<ElemType>& c);
static void ScaleAndAdd(ElemType alpha,const GPUMatrix<ElemType>& a, ElemType beta, const GPUSparseMatrix<ElemType>& b, GPUMatrix<ElemType>& c);
static void Scale(ElemType alpha, GPUSparseMatrix<ElemType>& a);
static void ElementWisePower (ElemType alpha, const GPUSparseMatrix<ElemType>& a, GPUSparseMatrix<ElemType>& c);
static bool AreEqual(const GPUSparseMatrix<ElemType>& a, const GPUSparseMatrix<ElemType>& b, const ElemType threshold = 1e-8);
static bool AreEqual(const GPUSparseMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, const ElemType threshold = 1e-8);
static bool AreEqual(const GPUMatrix<ElemType>& a, const GPUSparseMatrix<ElemType>& b, const ElemType threshold = 1e-8);
//For these two, I should also add a version which would return GPUSparseMatrix, since Dense.*Sparse =Sparse.*Dense=Sparse
static GPUMatrix<ElemType> ElementProductOf (const GPUSparseMatrix<ElemType>& a, const GPUMatrix<ElemType>& b);
static GPUMatrix<ElemType> ElementProductOf (const GPUMatrix<ElemType>& a, const GPUSparseMatrix<ElemType>& b);
public:
// See: http://stackoverflow.com/questions/4660123/overloading-friend-operator-for-template-class/4661372#4661372
template <class ElemTypeDummy>
friend MATH_API File& operator>>(File& stream, GPUSparseMatrix<ElemTypeDummy>& us);
template <class ElemTypeDummy>
friend MATH_API File& operator<<(File& stream, const GPUSparseMatrix<ElemTypeDummy>& us);
private:
void* ReserveTempHostBuffer(const size_t sizeInByte) const;
template <class OutType, class InType>
static void CopyBuffer(OutType * outBuffer, const InType * inBuffer, const size_t size);
private:
void ZeroInit(const MatrixFormat matrixFormat, const DEVICEID_TYPE deviceId);
private:
void performInplaceFunction(const int kind);
void DeepCopy(const GPUSparseMatrix<ElemType>& deepCopyFrom);
void Clear();
void PrepareBuffer(const size_t numRows, const size_t numCols, const bool canReuseBuffer, std::function<size_t(GPUSPARSE_INDEX_TYPE* csrRowPtrC)> func);
size_t ElemCountFromBufferSize(const size_t totalBufferSize) const;
size_t ElemCountFromBufferSize() const;
DEVICEID_TYPE PrepareDevice(const DEVICEID_TYPE deviceId = -1) const;
private:
size_t m_totalBufferSizeAllocated;
size_t m_blockSize; //block size
size_t *m_blockIds; //block ids
size_t *m_rowToId; //the id showing the order row number is observed in the nnz values.
size_t m_expandedSize; // expanded label size
size_t* m_block2Id; // label block id to first word location
size_t* m_block2UniqId; // label block id to unique first word location
mutable void* m_tempHostBuffer; //used to copy values.
mutable size_t m_tempHostBufferSize;
static bool do_sync;
};
}}}
>>>>>>> origin/master