Fixed the bug by merging
This commit is contained in:
Родитель
db9b222b4e
Коммит
b37fc78688
|
@ -1,4 +1,3 @@
|
|||
<<<<<<< HEAD
|
||||
#include "TimerUtility.h"
|
||||
|
||||
#ifdef WIN32
|
||||
|
@ -38,44 +37,3 @@ namespace Microsoft{
|
|||
}
|
||||
}
|
||||
}
|
||||
=======
|
||||
#include "TimerUtility.h"
|
||||
|
||||
#ifdef WIN32
|
||||
#include <Windows.h>
|
||||
#else
|
||||
#include <time.h>
|
||||
#endif
|
||||
namespace Microsoft{
|
||||
namespace MSR {
|
||||
namespace CNTK {
|
||||
|
||||
//Returns the amount of milliseconds elapsed
|
||||
unsigned long long Timer::MilliSecondElapsed()
|
||||
{
|
||||
#ifdef WIN32
|
||||
FILETIME ft;
|
||||
LARGE_INTEGER li;
|
||||
|
||||
GetSystemTimeAsFileTime(&ft); //ideally we should use GetSystemTimePreciseAsFileTime. But it's only avaiable with Win8+ and Win Server 2012+
|
||||
li.LowPart = ft.dwLowDateTime;
|
||||
li.HighPart = ft.dwHighDateTime;
|
||||
|
||||
unsigned long long ret = li.QuadPart;
|
||||
ret -= 116444736000000000LL; // Make the values consistent with Linux.
|
||||
ret /= 10000; // From 100 nano seconds (10^-7) to 1 millisecond (10^-3)
|
||||
|
||||
return ret;
|
||||
#else
|
||||
timespec ts;
|
||||
clock_gettime(CLOCK_REALTIME, &ts); // Works on Linux
|
||||
|
||||
UINT64 ret = ts.tv_sec * 1000 + ts.tv_nsec/1000000;
|
||||
|
||||
return ret;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
>>>>>>> origin/master
|
||||
|
|
|
@ -498,17 +498,13 @@ public:
|
|||
// I.e. our chunks are a little larger than wanted (on av. half the av. utterance length).
|
||||
if (thisallchunks.empty() || thisallchunks.back().totalframes > chunkframes || thisallchunks.back().numutterances() >= frameref::maxutterancesperchunk)
|
||||
{
|
||||
//fprintf(stderr, "hahahahahaahhaha %d %d %d %d %d\n", chunkframes,_totalframes, i, thisallchunks.back().totalframes,thisallchunks.back().numutterances());
|
||||
//fprintf(stderr, "hahahahahaahahah %d\n", thisallchunks.size());
|
||||
thisallchunks.push_back (utterancechunkdata());
|
||||
|
||||
fprintf(stderr, "after hahahahahaahhaha %d %d %d %d %d %d\n", chunkframes,_totalframes, i, thisallchunks.back().totalframes,thisallchunks.back().numutterances(),utteranceset.size());
|
||||
|
||||
}
|
||||
// append utterance to last chunk
|
||||
utterancechunkdata & currentchunk = thisallchunks.back();
|
||||
//std::move(utteranceset[i]);
|
||||
//fprintf(stderr, "after hahahahahaahhaha %d %d %d %d %d %d\n", chunkframes,_totalframes, i, thisallchunks.back().totalframes,thisallchunks.back().numutterances(),utteranceset.size());
|
||||
|
||||
currentchunk.push_back (std::move (utteranceset[i])); // move it out from our temp array into the chunk
|
||||
// TODO: above push_back does not actually 'move' because the internal push_back does not accept that
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
<<<<<<< HEAD
|
||||
// SequenceParser.h : Parses the UCI format using a custom state machine (for speed)
|
||||
//
|
||||
//
|
||||
|
@ -636,621 +635,3 @@ public:
|
|||
long Parse(size_t recordsRequested, std::vector<LabelType> *labels, std::vector<NumType> *numbers, std::vector<SequencePosition> *seqPos);
|
||||
|
||||
};
|
||||
=======
|
||||
// SequenceParser.h : Parses the UCI format using a custom state machine (for speed)
|
||||
//
|
||||
//
|
||||
// <copyright file="SequenceParser.h" company="Microsoft">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <assert.h>
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <stdint.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define MAXSTRING 500000
|
||||
// UCI label location types
|
||||
enum LabelMode
|
||||
{
|
||||
LabelNone = 0,
|
||||
LabelFirst = 1,
|
||||
LabelLast = 2,
|
||||
};
|
||||
|
||||
enum ParseMode
|
||||
{
|
||||
ParseNormal = 0,
|
||||
ParseLineCount = 1
|
||||
};
|
||||
|
||||
enum SequenceFlags
|
||||
{
|
||||
seqFlagNull = 0,
|
||||
seqFlagLineBreak = 1, // line break on the parsed line
|
||||
seqFlagEmptyLine = 2, // empty line
|
||||
seqFlagStartLabel = 4,
|
||||
seqFlagStopLabel = 8
|
||||
};
|
||||
|
||||
// SequencePosition, save the ending indexes into the array for a sequence
|
||||
struct SequencePosition
|
||||
{
|
||||
size_t numberPos; // max position in the number array for this sequence
|
||||
size_t labelPos; // max position in the label array for this sequence
|
||||
unsigned flags; // flags that apply to this sequence
|
||||
SequencePosition(size_t numPos, size_t labelPos, unsigned flags):
|
||||
numberPos(numPos), labelPos(labelPos), flags(flags)
|
||||
{}
|
||||
};
|
||||
|
||||
// SequenceParser - the parser for the UCI format files
|
||||
// for ultimate speed, this class implements a state machine to read these format files
|
||||
template <typename NumType, typename LabelType=int>
|
||||
class SequenceParser
|
||||
{
|
||||
protected:
|
||||
enum ParseState
|
||||
{
|
||||
WholeNumber = 0,
|
||||
Remainder = 1,
|
||||
Exponent = 2,
|
||||
Whitespace = 3,
|
||||
Sign = 4,
|
||||
ExponentSign = 5,
|
||||
Period = 6,
|
||||
TheLetterE = 7,
|
||||
EndOfLine = 8,
|
||||
Label = 9, // any non-number things we run into
|
||||
ParseStateMax = 10, // number of parse states
|
||||
LineCountEOL = 10,
|
||||
LineCountOther = 11,
|
||||
AllStateMax = 12
|
||||
};
|
||||
|
||||
// type of label processing
|
||||
ParseMode m_parseMode;
|
||||
|
||||
// definition of label and feature dimensions
|
||||
size_t m_dimFeatures;
|
||||
|
||||
size_t m_dimLabelsIn;
|
||||
std::string m_beginSequenceIn; // starting sequence string (i.e. <s>)
|
||||
std::string m_endSequenceIn; // ending sequence string (i.e. </s>)
|
||||
|
||||
size_t m_dimLabelsOut;
|
||||
std::string m_beginSequenceOut; // starting sequence string (i.e. 'O')
|
||||
std::string m_endSequenceOut; // ending sequence string (i.e. 'O')
|
||||
|
||||
// level of screen output
|
||||
int m_traceLevel;
|
||||
|
||||
// current state of the state machine
|
||||
ParseState m_current_state;
|
||||
|
||||
// state tables
|
||||
DWORD *m_stateTable;
|
||||
|
||||
// numeric state machine variables
|
||||
double m_partialResult;
|
||||
double m_builtUpNumber;
|
||||
double m_divider;
|
||||
double m_wholeNumberMultiplier;
|
||||
double m_exponentMultiplier;
|
||||
|
||||
// label state machine variables
|
||||
size_t m_spaceDelimitedStart;
|
||||
size_t m_spaceDelimitedMax; // start of the next whitespace sequence (one past the end of the last word)
|
||||
int m_numbersConvertedThisLine;
|
||||
int m_labelsConvertedThisLine;
|
||||
int m_elementsConvertedThisLine;
|
||||
|
||||
// sequence state machine variables
|
||||
bool m_beginSequence;
|
||||
bool m_endSequence;
|
||||
std::string m_beginTag;
|
||||
std::string m_endTag;
|
||||
|
||||
// global stats
|
||||
int m_totalNumbersConverted;
|
||||
int m_totalLabelsConverted;
|
||||
|
||||
// file positions/buffer
|
||||
FILE * m_pFile;
|
||||
int64_t m_byteCounter;
|
||||
int64_t m_fileSize;
|
||||
|
||||
BYTE * m_fileBuffer;
|
||||
size_t m_bufferStart;
|
||||
size_t m_bufferSize;
|
||||
|
||||
// last label was a string (for last label processing)
|
||||
bool m_lastLabelIsString;
|
||||
|
||||
// vectors to append to
|
||||
std::vector<NumType>* m_numbers; // pointer to vectors to append with numbers
|
||||
std::vector<LabelType>* m_labels; // pointer to vector to append with labels (may be numeric)
|
||||
// FUTURE: do we want a vector to collect string labels in the non string label case? (signifies an error)
|
||||
|
||||
// SetState for a particular value
|
||||
void SetState(int value, ParseState m_current_state, ParseState next_state);
|
||||
|
||||
// SetStateRange - set states transitions for a range of values
|
||||
void SetStateRange(int value1, int value2, ParseState m_current_state, ParseState next_state);
|
||||
|
||||
// SetupStateTables - setup state transition tables for each state
|
||||
// each state has a block of 256 states indexed by the incoming character
|
||||
void SetupStateTables();
|
||||
|
||||
// reset all line state variables
|
||||
void PrepareStartLine();
|
||||
|
||||
// reset all number accumulation variables
|
||||
void PrepareStartNumber();
|
||||
|
||||
// reset all state variables to start reading at a new position
|
||||
void PrepareStartPosition(size_t position);
|
||||
|
||||
// UpdateBuffer - load the next buffer full of data
|
||||
// returns - number of records read
|
||||
size_t UpdateBuffer();
|
||||
|
||||
public:
|
||||
|
||||
// SequenceParser constructor
|
||||
SequenceParser();
|
||||
// setup all the state variables and state tables for state machine
|
||||
void Init();
|
||||
|
||||
// Parser destructor
|
||||
~SequenceParser();
|
||||
|
||||
private:
|
||||
// DoneWithLabel - Called when a string label is found
|
||||
void DoneWithLabel();
|
||||
|
||||
// Called when a number is complete
|
||||
void DoneWithValue();
|
||||
|
||||
// store label is specialized by LabelType
|
||||
void StoreLabel(NumType value);
|
||||
|
||||
// StoreLastLabel - store the last label (for numeric types), tranfers to label vector
|
||||
// string label types handled in specialization
|
||||
void StoreLastLabel();
|
||||
|
||||
public:
|
||||
// SetParseMode - Set the parsing mode
|
||||
// mode - set mode to either ParseLineCount, or ParseNormal
|
||||
void SetParseMode(ParseMode mode);
|
||||
|
||||
// SetTraceLevel - Set the level of screen output
|
||||
// traceLevel - traceLevel, zero means no output, 1 epoch related output, > 1 all output
|
||||
void SetTraceLevel(int traceLevel);
|
||||
|
||||
|
||||
// ParseInit - Initialize a parse of a file
|
||||
// fileName - path to the file to open
|
||||
// dimFeatures - number of features for precomputed features
|
||||
// dimLabelsIn - number of lables possible on input
|
||||
// dimLabelsOut - number of labels possible on output
|
||||
// beginSequenceIn - beginSequence input label
|
||||
// endSequenceIn - endSequence input label
|
||||
// beginSequenceOut - beginSequence output label
|
||||
// endSequenceOut - endSequence output label
|
||||
// bufferSize - size of temporary buffer to store reads
|
||||
// startPosition - file position on which we should start
|
||||
void ParseInit(LPCWSTR fileName, size_t dimFeatures, size_t dimLabelsIn, size_t dimLabelsOut, std::string beginSequenceIn="<s>", std::string endSequenceIn="</s>", std::string beginSequenceOut="O", std::string endSequenceOut="O", size_t bufferSize=1024*256, size_t startPosition=0)
|
||||
{
|
||||
assert(fileName != NULL);
|
||||
m_dimFeatures = dimFeatures;
|
||||
m_dimLabelsIn = dimLabelsIn;
|
||||
m_beginSequenceIn = beginSequenceIn;
|
||||
m_endSequenceIn = endSequenceIn;
|
||||
m_dimLabelsOut = dimLabelsOut;
|
||||
m_beginSequenceOut = beginSequenceOut;
|
||||
m_endSequenceOut = endSequenceOut;
|
||||
|
||||
m_parseMode = ParseNormal;
|
||||
m_traceLevel = 0;
|
||||
m_bufferSize = bufferSize;
|
||||
m_bufferStart = startPosition;
|
||||
|
||||
m_beginTag = m_beginSequenceIn;
|
||||
m_endTag = m_endSequenceIn;
|
||||
|
||||
// if we have a file already open, cleanup
|
||||
if (m_pFile != NULL)
|
||||
SequenceParser<NumType, LabelType>::~SequenceParser();
|
||||
|
||||
errno_t err = _wfopen_s( &m_pFile, fileName, L"rb" );
|
||||
if (err)
|
||||
RuntimeError("SequenceParser::ParseInit - error opening file");
|
||||
int rc = _fseeki64(m_pFile, 0, SEEK_END);
|
||||
if (rc)
|
||||
RuntimeError("SequenceParser::ParseInit - error seeking in file");
|
||||
|
||||
m_fileSize = GetFilePosition();
|
||||
m_fileBuffer = new BYTE[m_bufferSize];
|
||||
SetFilePosition(startPosition);
|
||||
}
|
||||
|
||||
// Parse - Parse the data
|
||||
// recordsRequested - number of records requested
|
||||
// labels - pointer to vector to return the labels
|
||||
// numbers - pointer to vector to return the numbers
|
||||
// seqPos - pointers to the other two arrays showing positions of each sequence
|
||||
// returns - number of records actually read, if the end of file is reached the return value will be < requested records
|
||||
long Parse(size_t recordsRequested, std::vector<LabelType> *labels, std::vector<NumType> *numbers, std::vector<SequencePosition> *seqPos)
|
||||
{
|
||||
assert(numbers != NULL || m_dimFeatures == 0 || m_parseMode == ParseLineCount);
|
||||
assert(labels != NULL || m_dimLabelsIn == 0 && m_dimLabelsOut == 0|| m_parseMode == ParseLineCount);
|
||||
|
||||
// transfer to member variables
|
||||
m_numbers = numbers;
|
||||
m_labels = labels;
|
||||
|
||||
long TickStart = GetTickCount( );
|
||||
long recordCount = 0;
|
||||
long lineCount = 0;
|
||||
size_t bufferIndex = m_byteCounter-m_bufferStart;
|
||||
SequencePosition sequencePositionLast(0,0,seqFlagNull);
|
||||
while (m_byteCounter < m_fileSize && recordCount < recordsRequested)
|
||||
{
|
||||
// check to see if we need to update the buffer
|
||||
if (bufferIndex >= m_bufferSize)
|
||||
{
|
||||
UpdateBuffer();
|
||||
bufferIndex = m_byteCounter-m_bufferStart;
|
||||
}
|
||||
|
||||
char ch = m_fileBuffer[bufferIndex];
|
||||
|
||||
ParseState nextState = (ParseState)m_stateTable[(m_current_state<<8)+ch];
|
||||
|
||||
if( nextState <= Exponent )
|
||||
{
|
||||
m_builtUpNumber = m_builtUpNumber * 10 + (ch - '0');
|
||||
// if we are in the decimal portion of a number increase the divider
|
||||
if (nextState == Remainder)
|
||||
m_divider *= 10;
|
||||
}
|
||||
|
||||
// only do a test on a state transition
|
||||
if (m_current_state != nextState)
|
||||
{
|
||||
// System.Diagnostics.Debug.WriteLine("Current state = " + m_current_state + ", next state = " + nextState);
|
||||
|
||||
// if the nextState is a label, we don't want to do any number processing, it's a number prefixed string
|
||||
if (nextState != Label)
|
||||
{
|
||||
// do the numeric processing
|
||||
switch (m_current_state)
|
||||
{
|
||||
case TheLetterE:
|
||||
if (m_divider != 0) // decimal number
|
||||
m_partialResult += m_builtUpNumber / m_divider;
|
||||
else // integer
|
||||
m_partialResult = m_builtUpNumber;
|
||||
m_builtUpNumber = 0;
|
||||
break;
|
||||
case WholeNumber:
|
||||
// could be followed by a remainder, or an exponent
|
||||
if (nextState != TheLetterE)
|
||||
if( nextState != Period)
|
||||
DoneWithValue();
|
||||
if (nextState == Period)
|
||||
{
|
||||
m_partialResult = m_builtUpNumber;
|
||||
m_divider = 1;
|
||||
m_builtUpNumber = 0;
|
||||
}
|
||||
break;
|
||||
case Remainder:
|
||||
// can only be followed by a exponent
|
||||
if (nextState != TheLetterE)
|
||||
DoneWithValue();
|
||||
break;
|
||||
case Exponent:
|
||||
DoneWithValue();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// label handling
|
||||
switch (m_current_state)
|
||||
{
|
||||
case Label:
|
||||
DoneWithLabel();
|
||||
break;
|
||||
case EndOfLine:
|
||||
if (seqPos)
|
||||
{
|
||||
SequencePosition sequencePos(numbers->size(), labels->size(),
|
||||
m_beginSequence?seqFlagStartLabel:0 | m_endSequence?seqFlagStopLabel:0 | seqFlagLineBreak);
|
||||
// add a sequence element to the list
|
||||
seqPos->push_back(sequencePos);
|
||||
sequencePositionLast = sequencePos;
|
||||
}
|
||||
|
||||
// end of sequence determines record separation
|
||||
if (m_endSequence)
|
||||
recordCount = (long)labels->size();
|
||||
|
||||
PrepareStartLine();
|
||||
break;
|
||||
case Whitespace:
|
||||
// this is the start of the next space delimited entity
|
||||
if (nextState != EndOfLine)
|
||||
m_spaceDelimitedStart = m_byteCounter;
|
||||
break;
|
||||
}
|
||||
|
||||
// label handling for next state
|
||||
switch (nextState)
|
||||
{
|
||||
// do sign processing on nextState, since we still have the character handy
|
||||
case Sign:
|
||||
if (ch == '-')
|
||||
m_wholeNumberMultiplier = -1;
|
||||
break;
|
||||
case ExponentSign:
|
||||
if (ch == '-')
|
||||
m_exponentMultiplier = -1;
|
||||
break;
|
||||
// going into whitespace or endOfLine, so end of space delimited entity
|
||||
case Whitespace:
|
||||
m_spaceDelimitedMax = m_byteCounter;
|
||||
// hit whitespace and nobody processed anything, so add as label
|
||||
//if (m_elementsConvertedThisLine == elementsProcessed)
|
||||
// DoneWithLabel();
|
||||
break;
|
||||
case EndOfLine:
|
||||
if (m_current_state != Whitespace)
|
||||
{
|
||||
m_spaceDelimitedMax = m_byteCounter;
|
||||
// hit whitespace and nobody processed anything, so add as label
|
||||
//if (m_elementsConvertedThisLine == elementsProcessed)
|
||||
// DoneWithLabel();
|
||||
}
|
||||
// process the label at the end of a line
|
||||
//if (m_labelMode == LabelLast && m_labels != NULL)
|
||||
//{
|
||||
// StoreLastLabel();
|
||||
//}
|
||||
// intentional fall-through
|
||||
case LineCountEOL:
|
||||
lineCount++; // done with another record
|
||||
if (m_traceLevel > 1)
|
||||
{
|
||||
// print progress dots
|
||||
if (recordCount % 100 == 0)
|
||||
{
|
||||
if (recordCount % 1000 == 0)
|
||||
{
|
||||
if (recordCount % 10000 == 0)
|
||||
{
|
||||
fprintf(stderr, "#");
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "+");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, ".");
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case LineCountOther:
|
||||
m_spaceDelimitedStart = m_byteCounter;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
m_current_state = nextState;
|
||||
|
||||
// move to next character
|
||||
m_byteCounter++;
|
||||
bufferIndex++;
|
||||
} // while
|
||||
|
||||
// at the end of the file we may need to add an additional sequencePosition push
|
||||
// this could probably be fixed by taking another pass through the loop above, but this is easier
|
||||
if (seqPos)
|
||||
{
|
||||
SequencePosition sequencePos(numbers->size(), labels->size(),
|
||||
m_beginSequence?seqFlagStartLabel:0 | m_endSequence?seqFlagStopLabel:0 | seqFlagLineBreak);
|
||||
// add the final sequence element if needed
|
||||
if (!(sequencePos.labelPos == sequencePositionLast.labelPos && sequencePos.numberPos == sequencePositionLast.numberPos))
|
||||
{
|
||||
seqPos->push_back(sequencePos);
|
||||
}
|
||||
}
|
||||
|
||||
long TickStop = GetTickCount( );
|
||||
|
||||
long TickDelta = TickStop - TickStart;
|
||||
|
||||
if (m_traceLevel > 2)
|
||||
fprintf(stderr, "\n%d ms, %d numbers parsed\n\n", TickDelta, m_totalNumbersConverted );
|
||||
return lineCount;
|
||||
}
|
||||
|
||||
|
||||
int64_t GetFilePosition();
|
||||
void SetFilePosition(int64_t position);
|
||||
|
||||
// HasMoreData - test if the current dataset have more data
|
||||
// returns - true if it does, false if not
|
||||
bool HasMoreData();
|
||||
};
|
||||
|
||||
// StoreLabel - string version gets last space delimited string and stores in labels vector
|
||||
template <>
|
||||
void SequenceParser<float, std::string>::StoreLabel(float finalResult);
|
||||
|
||||
// DoneWithLabel - string version stores string label
|
||||
template <>
|
||||
void SequenceParser<float, std::string>::DoneWithLabel();
|
||||
|
||||
// StoreLastLabel - string version
|
||||
template <>
|
||||
void SequenceParser<float, std::string>::StoreLastLabel();
|
||||
|
||||
// NOTE: Current code is identical to float, don't know how to specialize with template parameter that only covers one parameter
|
||||
|
||||
// StoreLabel - string version gets last space delimited string and stores in labels vector
|
||||
template <>
|
||||
void SequenceParser<double, std::string>::StoreLabel(double finalResult);
|
||||
|
||||
// DoneWithLabel - string version stores string label
|
||||
template <>
|
||||
void SequenceParser<double, std::string>::DoneWithLabel();
|
||||
|
||||
// StoreLastLabel - string version
|
||||
template <>
|
||||
void SequenceParser<double, std::string>::StoreLastLabel();
|
||||
|
||||
/// language model sequence parser
|
||||
template <typename NumType, typename LabelType>
|
||||
class LMSequenceParser : public SequenceParser<NumType, LabelType>
|
||||
{
|
||||
protected:
|
||||
FILE * mFile;
|
||||
std::wstring mFileName;
|
||||
|
||||
public:
|
||||
LMSequenceParser() {
|
||||
mFile = nullptr;
|
||||
};
|
||||
~LMSequenceParser() {
|
||||
if (mFile) fclose(mFile);
|
||||
}
|
||||
|
||||
void ParseInit(LPCWSTR fileName, size_t dimFeatures, size_t dimLabelsIn, size_t dimLabelsOut, std::string beginSequenceIn="<s>", std::string endSequenceIn="</s>", std::string beginSequenceOut="O", std::string endSequenceOut="O")
|
||||
{
|
||||
assert(fileName != NULL);
|
||||
mFileName = fileName;
|
||||
m_dimFeatures = dimFeatures;
|
||||
m_dimLabelsIn = dimLabelsIn;
|
||||
m_beginSequenceIn = beginSequenceIn;
|
||||
m_endSequenceIn = endSequenceIn;
|
||||
m_dimLabelsOut = dimLabelsOut;
|
||||
m_beginSequenceOut = beginSequenceOut;
|
||||
m_endSequenceOut = endSequenceOut;
|
||||
|
||||
m_parseMode = ParseNormal;
|
||||
m_traceLevel = 0;
|
||||
m_bufferSize = 0;
|
||||
m_bufferStart = 0;
|
||||
|
||||
m_beginTag = m_beginSequenceIn;
|
||||
m_endTag = m_endSequenceIn;
|
||||
|
||||
m_fileSize = -1;
|
||||
m_fileBuffer = NULL;
|
||||
|
||||
if (mFile) fclose(mFile);
|
||||
|
||||
if (_wfopen_s(&mFile, fileName, L"rt") != 0)
|
||||
RuntimeError("cannot open file %s", fileName);
|
||||
}
|
||||
|
||||
void ParseReset()
|
||||
{
|
||||
if (mFile) fseek(mFile, 0, SEEK_SET);
|
||||
}
|
||||
|
||||
// Parse - Parse the data
|
||||
// recordsRequested - number of records requested
|
||||
// labels - pointer to vector to return the labels
|
||||
// numbers - pointer to vector to return the numbers
|
||||
// seqPos - pointers to the other two arrays showing positions of each sequence
|
||||
// returns - number of records actually read, if the end of file is reached the return value will be < requested records
|
||||
long Parse(size_t recordsRequested, std::vector<LabelType> *labels, std::vector<NumType> *numbers, std::vector<SequencePosition> *seqPos)
|
||||
{
|
||||
assert(numbers != NULL || m_dimFeatures == 0 || m_parseMode == ParseLineCount);
|
||||
assert(labels != NULL || m_dimLabelsIn == 0 && m_dimLabelsOut == 0|| m_parseMode == ParseLineCount);
|
||||
|
||||
// transfer to member variables
|
||||
m_numbers = numbers;
|
||||
m_labels = labels;
|
||||
|
||||
long TickStart = GetTickCount( );
|
||||
long recordCount = 0;
|
||||
long orgRecordCount = (long)labels->size();
|
||||
long lineCount = 0;
|
||||
SequencePosition sequencePositionLast(0,0,seqFlagNull);
|
||||
/// get line
|
||||
char ch2[MAXSTRING];
|
||||
while (recordCount < recordsRequested && fgets(ch2, MAXSTRING, mFile) != nullptr)
|
||||
{
|
||||
|
||||
string ch = ch2;
|
||||
std::vector<string> vstr;
|
||||
vstr = sep_string(ch, " ");
|
||||
if (vstr.size() < 3)
|
||||
continue;
|
||||
|
||||
for (size_t i = 0; i < vstr.size(); i++)
|
||||
{
|
||||
labels->push_back(vstr[i]);
|
||||
}
|
||||
SequencePosition sequencePos(numbers->size(), labels->size(),
|
||||
m_beginSequence?seqFlagStartLabel:0 | m_endSequence?seqFlagStopLabel:0 | seqFlagLineBreak);
|
||||
// add a sequence element to the list
|
||||
seqPos->push_back(sequencePos);
|
||||
sequencePositionLast = sequencePos;
|
||||
|
||||
recordCount = (long)labels->size() - orgRecordCount;
|
||||
|
||||
lineCount ++;
|
||||
} // while
|
||||
|
||||
long TickStop = GetTickCount( );
|
||||
|
||||
long TickDelta = TickStop - TickStart;
|
||||
|
||||
if (m_traceLevel > 2)
|
||||
fprintf(stderr, "\n%d ms, %d numbers parsed\n\n", TickDelta, m_totalNumbersConverted );
|
||||
return lineCount;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
typedef struct{
|
||||
size_t sLen;
|
||||
size_t sBegin;
|
||||
size_t sEnd;
|
||||
} stSentenceInfo;
|
||||
/// language model sequence parser
|
||||
template <typename NumType, typename LabelType>
|
||||
class LMBatchSequenceParser: public LMSequenceParser<NumType, LabelType>
|
||||
{
|
||||
public:
|
||||
vector<stSentenceInfo> mSentenceIndex2SentenceInfo;
|
||||
|
||||
public:
|
||||
LMBatchSequenceParser() { };
|
||||
~LMBatchSequenceParser() { }
|
||||
|
||||
void ParseInit(LPCWSTR fileName, size_t dimFeatures, size_t dimLabelsIn, size_t dimLabelsOut, std::string beginSequenceIn="<s>", std::string endSequenceIn="</s>", std::string beginSequenceOut="O", std::string endSequenceOut="O");
|
||||
|
||||
// Parse - Parse the data
|
||||
// recordsRequested - number of records requested
|
||||
// labels - pointer to vector to return the labels
|
||||
// numbers - pointer to vector to return the numbers
|
||||
// seqPos - pointers to the other two arrays showing positions of each sequence
|
||||
// returns - number of records actually read, if the end of file is reached the return value will be < requested records
|
||||
long Parse(size_t recordsRequested, std::vector<LabelType> *labels, std::vector<NumType> *numbers, std::vector<SequencePosition> *seqPos);
|
||||
|
||||
};
|
||||
>>>>>>> origin/master
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -3,7 +3,7 @@ command=Simple_Demo:Simple_Demo_Output
|
|||
|
||||
# deviceId=-1 for CPU, >=0 for GPU devices
|
||||
DeviceNumber=0
|
||||
stderr=Demo
|
||||
#stderr=Demo
|
||||
|
||||
precision=float
|
||||
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,4 +1,3 @@
|
|||
<<<<<<< HEAD
|
||||
//
|
||||
// <copyright file="cn.cpp" company="Microsoft">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
|
@ -792,777 +791,3 @@ int main(int argc, char* argv[])
|
|||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
=======
|
||||
//
|
||||
// <copyright file="cn.cpp" company="Microsoft">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//
|
||||
// cn.cpp : Defines the entry point for the console application.
|
||||
//
|
||||
|
||||
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "ComputationNetwork.h"
|
||||
#include "ComputationNode.h"
|
||||
#include "DataReader.h"
|
||||
#include "DataWriter.h"
|
||||
#include "SimpleNetworkBuilder.h"
|
||||
#include "NDLNetworkBuilder.h"
|
||||
#include "SynchronousExecutionEngine.h"
|
||||
#include "ModelEditLanguage.h"
|
||||
#include "SGD.h"
|
||||
#include <string>
|
||||
#include "commandArgUtil.h"
|
||||
#include "SimpleEvaluator.h"
|
||||
#include "SimpleOutputWriter.h"
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
#if defined(_WIN32)
|
||||
#include "io.h"
|
||||
#endif
|
||||
#include "hostname.h"
|
||||
#include "buildinfo.h"
|
||||
#ifdef LEAKDETECT
|
||||
#include "vld.h" // for memory leak detection
|
||||
#endif
|
||||
#include <vector>
|
||||
#include "BestGpu.h"
|
||||
|
||||
// MPI builds on windows require the following installed to "c:\program files\Microsoft MPI\"
|
||||
// HPC Pack 2012 R2 MS-MPI Redistributable Package
|
||||
// http://www.microsoft.com/en-us/download/details.aspx?id=41634
|
||||
|
||||
#ifdef MPI_SUPPORT
|
||||
#include "mpi.h"
|
||||
#pragma comment(lib, "msmpi.lib")
|
||||
#endif
|
||||
int numProcs;
|
||||
int myRank;
|
||||
|
||||
using namespace std;
|
||||
using namespace Microsoft::MSR::CNTK;
|
||||
|
||||
// internal test routine forward declaration
|
||||
template <typename ElemType>
|
||||
void TestCn(const ConfigParameters& config);
|
||||
|
||||
void RedirectStdErr(wstring logpath)
|
||||
{
|
||||
fprintf (stderr, "Redirecting stderr to file %S\n", logpath.c_str());
|
||||
msra::files::make_intermediate_dirs (logpath);
|
||||
auto_file_ptr f (logpath.c_str(), "wb");
|
||||
if (dup2 (fileno (f), 2) == -1)
|
||||
RuntimeError ("unexpected failure to redirect stderr to log file");
|
||||
setvbuf (stderr, NULL, _IONBF, 16384); // unbuffer it
|
||||
}
|
||||
|
||||
std::string WCharToString(const wchar_t* wst)
|
||||
{
|
||||
std::wstring ws(wst);
|
||||
std::string s(ws.begin(), ws.end());
|
||||
s.assign(ws.begin(), ws.end());
|
||||
return s;
|
||||
}
|
||||
|
||||
template <typename ElemType>
|
||||
void DumpNodeInfo(const ConfigParameters& config)
|
||||
{
|
||||
wstring modelPath = config("modelPath");
|
||||
wstring nodeName = config("nodeName",L"__AllNodes__");
|
||||
wstring defOutFilePath = modelPath + L"." + nodeName + L".txt";
|
||||
wstring outputFile = config("outputFile", WCharToString(defOutFilePath.c_str()).c_str());
|
||||
bool printValues = config("printValues", "true");
|
||||
|
||||
ComputationNetwork<ElemType> net(-1); //always use CPU
|
||||
net.LoadFromFile(modelPath);
|
||||
net.DumpNodeInfoToFile(nodeName, printValues, outputFile);
|
||||
}
|
||||
|
||||
template <typename ElemType>
|
||||
void DoEvalBase(const ConfigParameters& config, IDataReader<ElemType>& reader)
|
||||
{
|
||||
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
|
||||
ConfigArray minibatchSize = config("minibatchSize", "40960");
|
||||
size_t epochSize = config("epochSize", "0");
|
||||
if (epochSize == 0)
|
||||
{
|
||||
epochSize = requestDataSize;
|
||||
}
|
||||
wstring modelPath = config("modelPath");
|
||||
intargvector mbSize = minibatchSize;
|
||||
|
||||
int traceLevel = config("traceLevel", "0");
|
||||
size_t numMBsToShowResult = config("numMBsToShowResult", "100");
|
||||
|
||||
ConfigArray evalNodeNames = config("evalNodeNames","");
|
||||
vector<wstring> evalNodeNamesVector;
|
||||
for (int i=0; i < evalNodeNames.size(); ++i)
|
||||
{
|
||||
evalNodeNamesVector.push_back(evalNodeNames[i]);
|
||||
}
|
||||
|
||||
ComputationNetwork<ElemType> net(deviceId);
|
||||
net.LoadFromFile(modelPath);
|
||||
net.ResetEvalTimeStamp();
|
||||
|
||||
SimpleEvaluator<ElemType> eval(net, numMBsToShowResult, traceLevel);
|
||||
eval.Evaluate(reader, evalNodeNamesVector, mbSize[0], epochSize);
|
||||
}
|
||||
|
||||
template <typename ElemType>
|
||||
void DoEval(const ConfigParameters& config)
|
||||
{
|
||||
//test
|
||||
ConfigParameters readerConfig (config("reader"));
|
||||
readerConfig.Insert("traceLevel",config("traceLevel","0"));
|
||||
|
||||
DataReader<ElemType> testDataReader(readerConfig);
|
||||
|
||||
DoEvalBase(config, testDataReader);
|
||||
}
|
||||
|
||||
template <typename ElemType>
|
||||
void DoEvalUnroll(const ConfigParameters& config)
|
||||
{
|
||||
//test
|
||||
ConfigParameters readerConfig (config("reader"));
|
||||
readerConfig.Insert("traceLevel",config("traceLevel","0"));
|
||||
|
||||
DataReader<ElemType> testDataReader(readerConfig);
|
||||
|
||||
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
|
||||
ConfigArray minibatchSize = config("minibatchSize", "40960");
|
||||
size_t epochSize = config("epochSize", "0");
|
||||
if (epochSize == 0)
|
||||
{
|
||||
epochSize = requestDataSize;
|
||||
}
|
||||
wstring modelPath = config("modelPath");
|
||||
intargvector mbSize = minibatchSize;
|
||||
wstring path2EvalResults = config("path2EvalResults", L"");
|
||||
|
||||
ComputationNetwork<ElemType> net(deviceId);
|
||||
net.LoadFromFile(modelPath);
|
||||
net.ResetEvalTimeStamp();
|
||||
|
||||
SimpleEvaluator<ElemType> eval(net);
|
||||
ElemType evalEntropy;
|
||||
eval.EvaluateUnroll(testDataReader, mbSize[0], evalEntropy, path2EvalResults == L""? nullptr : path2EvalResults.c_str(), epochSize);
|
||||
}
|
||||
|
||||
template <typename ElemType>
|
||||
void DoCrossValidate(const ConfigParameters& config)
|
||||
{
|
||||
//test
|
||||
ConfigParameters readerConfig (config("reader"));
|
||||
readerConfig.Insert("traceLevel",config("traceLevel","0"));
|
||||
|
||||
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
|
||||
ConfigArray minibatchSize = config("minibatchSize", "40960");
|
||||
size_t epochSize = config("epochSize", "0");
|
||||
if (epochSize == 0)
|
||||
{
|
||||
epochSize = requestDataSize;
|
||||
}
|
||||
wstring modelPath = config("modelPath");
|
||||
intargvector mbSize = minibatchSize;
|
||||
|
||||
ConfigArray cvIntervalConfig = config("crossValidationInterval");
|
||||
intargvector cvInterval = cvIntervalConfig;
|
||||
|
||||
size_t sleepSecondsBetweenRuns = config("sleepTimeBetweenRuns", "0");
|
||||
|
||||
int traceLevel = config("traceLevel", "0");
|
||||
size_t numMBsToShowResult = config("numMBsToShowResult", "100");
|
||||
|
||||
ConfigArray evalNodeNames = config("evalNodeNames","");
|
||||
vector<wstring> evalNodeNamesVector;
|
||||
for (int i=0; i < evalNodeNames.size(); ++i)
|
||||
{
|
||||
evalNodeNamesVector.push_back(evalNodeNames[i]);
|
||||
}
|
||||
|
||||
std::vector<std::vector<ElemType>> cvErrorResults;
|
||||
std::vector<std::wstring> cvModels;
|
||||
|
||||
DataReader<ElemType> cvDataReader(readerConfig);
|
||||
|
||||
bool finalModelEvaluated = false;
|
||||
for (size_t i=cvInterval[0]; i<=cvInterval[2]; i+=cvInterval[1])
|
||||
{
|
||||
wstring cvModelPath = msra::strfun::wstrprintf (L"%ls.%lld", modelPath.c_str(), i);
|
||||
|
||||
if (!fexists (cvModelPath))
|
||||
{
|
||||
fprintf(stderr, "model %ls does not exist.\n", cvModelPath.c_str());
|
||||
if (finalModelEvaluated || !fexists (modelPath))
|
||||
continue; // file missing
|
||||
else
|
||||
{
|
||||
cvModelPath = modelPath;
|
||||
finalModelEvaluated = true;
|
||||
}
|
||||
}
|
||||
|
||||
cvModels.push_back(cvModelPath);
|
||||
ComputationNetwork<ElemType> net(deviceId);
|
||||
net.LoadFromFile(cvModelPath);
|
||||
net.ResetEvalTimeStamp();
|
||||
|
||||
SimpleEvaluator<ElemType> eval(net, numMBsToShowResult, traceLevel);
|
||||
|
||||
fprintf(stderr, "model %ls --> \n",cvModelPath.c_str());
|
||||
std::vector<ElemType> evalErrors;
|
||||
evalErrors = eval.Evaluate(cvDataReader, evalNodeNamesVector, mbSize[0], epochSize);
|
||||
cvErrorResults.push_back(evalErrors);
|
||||
|
||||
::Sleep(1000*sleepSecondsBetweenRuns);
|
||||
}
|
||||
|
||||
//find best model
|
||||
if (cvErrorResults.size() == 0)
|
||||
throw std::logic_error("No model is evaluated.");
|
||||
|
||||
std::vector<ElemType> minErrors;
|
||||
std::vector<int> minErrIds;
|
||||
std::vector<ElemType> evalErrors = cvErrorResults[0];
|
||||
for (int i=0; i < evalErrors.size(); ++i)
|
||||
{
|
||||
minErrors.push_back(evalErrors[i]);
|
||||
minErrIds.push_back(0);
|
||||
}
|
||||
|
||||
for (int i=0; i<cvErrorResults.size(); i++)
|
||||
{
|
||||
evalErrors = cvErrorResults[i];
|
||||
for (int j=0; j<evalErrors.size(); j++)
|
||||
{
|
||||
if (evalErrors[j] < minErrors[j])
|
||||
{
|
||||
minErrors[j] = evalErrors[j];
|
||||
minErrIds[j] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, "Best models:\n");
|
||||
fprintf(stderr,"------------\n");
|
||||
for (int i=0; i < minErrors.size(); ++i)
|
||||
{
|
||||
fprintf(stderr,"Based on Err[%d]: Best model = %ls with min err %.8g\n", i, cvModels[minErrIds[i]].c_str(), minErrors[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ElemType>
|
||||
void DoWriteOutput(const ConfigParameters& config)
|
||||
{
|
||||
ConfigParameters readerConfig (config("reader"));
|
||||
readerConfig.Insert("traceLevel",config("traceLevel","0"));
|
||||
readerConfig.Insert("randomize","None"); //we don't want randomization when output results
|
||||
|
||||
DataReader<ElemType> testDataReader(readerConfig);
|
||||
|
||||
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
|
||||
ConfigArray minibatchSize = config("minibatchSize", "2048");
|
||||
wstring modelPath = config("modelPath");
|
||||
intargvector mbSize = minibatchSize;
|
||||
|
||||
size_t epochSize = config("epochSize", "0");
|
||||
if (epochSize == 0)
|
||||
{
|
||||
epochSize = requestDataSize;
|
||||
}
|
||||
|
||||
ConfigArray outputNodeNames = config("outputNodeNames","");
|
||||
vector<wstring> outputNodeNamesVector;
|
||||
for (int i=0; i < outputNodeNames.size(); ++i)
|
||||
{
|
||||
outputNodeNamesVector.push_back(outputNodeNames[i]);
|
||||
}
|
||||
|
||||
ComputationNetwork<ElemType> net(deviceId);
|
||||
net.LoadFromFile(modelPath);
|
||||
net.ResetEvalTimeStamp();
|
||||
|
||||
SimpleOutputWriter<ElemType> writer(net, 1);
|
||||
|
||||
if (config.Exists("writer"))
|
||||
{
|
||||
ConfigParameters writerConfig (config("writer"));
|
||||
bool bWriterUnittest = writerConfig("unittest","false");
|
||||
DataWriter<ElemType> testDataWriter(writerConfig);
|
||||
writer.WriteOutput(testDataReader,mbSize[0], testDataWriter, outputNodeNamesVector, epochSize, bWriterUnittest);
|
||||
}
|
||||
else if (config.Exists("outputPath"))
|
||||
{
|
||||
wstring outputPath = config("outputPath"); // crashes if no default given?
|
||||
writer.WriteOutput(testDataReader, mbSize[0], outputPath, outputNodeNamesVector, epochSize);
|
||||
}
|
||||
//writer.WriteOutput(testDataReader, mbSize[0], testDataWriter, outputNodeNamesVector, epochSize);
|
||||
}
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
TrainingCriterion ParseTrainingCriterionString(wstring s)
|
||||
{
|
||||
msra::strfun::tolower_ascii(s);
|
||||
if (s==L"crossentropywithsoftmax")
|
||||
return TrainingCriterion::CrossEntropyWithSoftmax;
|
||||
else if (s==L"squareerror")
|
||||
return TrainingCriterion::SquareError;
|
||||
else if (s!=L"classcrossentropywithsoftmax") // (twisted logic to keep compiler happy w.r.t. not returning from LogicError)
|
||||
LogicError("trainingCriterion: Invalid trainingCriterion value. Valid values are (CrossEntropyWithSoftmax | SquareError | ClassCrossEntropyWithSoftmax)");
|
||||
return TrainingCriterion::ClassCrossEntropyWithSoftmax;
|
||||
}
|
||||
|
||||
EvalCriterion ParseEvalCriterionString(wstring s)
|
||||
{
|
||||
msra::strfun::tolower_ascii(s);
|
||||
if (s==L"errorprediction")
|
||||
return EvalCriterion::ErrorPrediction;
|
||||
else if (s==L"crossentropywithsoftmax")
|
||||
return EvalCriterion::CrossEntropyWithSoftmax;
|
||||
else if (s==L"classcrossentropywithsoftmax")
|
||||
return EvalCriterion::ClassCrossEntropyWithSoftmax;
|
||||
else if (s!=L"squareerror")
|
||||
LogicError("evalCriterion: Invalid trainingCriterion value. Valid values are (ErrorPrediction | CrossEntropyWithSoftmax | SquareError)");
|
||||
return EvalCriterion::SquareError;
|
||||
}
|
||||
|
||||
}}};
|
||||
|
||||
template <typename ElemType>
|
||||
void DoCreateLabelMap(const ConfigParameters& config)
|
||||
{
|
||||
// this gets the section name we are interested in
|
||||
std::string section = config("section");
|
||||
// get that section (probably a peer config section, which works thanks to heirarchal symbol resolution)
|
||||
ConfigParameters configSection (config(section));
|
||||
ConfigParameters readerConfig (configSection("reader"));
|
||||
readerConfig.Insert("allowMapCreation","true");
|
||||
DEVICEID_TYPE deviceId = CPUDEVICE;
|
||||
size_t minibatchSize = config("minibatchSize", "2048");
|
||||
int traceLevel = config("traceLevel","0");
|
||||
std::vector<std::wstring> featureNames;
|
||||
std::vector<std::wstring> labelNames;
|
||||
GetFileConfigNames(readerConfig, featureNames, labelNames);
|
||||
|
||||
// setup minibatch matrices
|
||||
Matrix<ElemType> featuresMatrix(deviceId);
|
||||
Matrix<ElemType> labelsMatrix(deviceId);
|
||||
std::map<std::wstring, Matrix<ElemType>*> matrices;
|
||||
matrices[featureNames[0]] = &featuresMatrix;
|
||||
if (labelNames.size() == 0)
|
||||
RuntimeError("CreateLabelMap: no labels found to process");
|
||||
|
||||
// now create the reader and loop through the entire dataset to get all the labels
|
||||
auto start = std::chrono::system_clock::now();
|
||||
for (const std::wstring& labelsName: labelNames)
|
||||
{
|
||||
// take the last label file defined (the other one might be input)
|
||||
matrices[labelsName] = &labelsMatrix;
|
||||
|
||||
// get the label mapping file name
|
||||
ConfigParameters labelConfig (readerConfig(labelsName));
|
||||
std::string labelMappingFile;
|
||||
if (labelConfig.ExistsCurrent("labelMappingFile"))
|
||||
labelMappingFile = labelConfig("labelMappingFile");
|
||||
else if (readerConfig.ExistsCurrent("labelMappingFile"))
|
||||
labelMappingFile = labelConfig("labelMappingFile");
|
||||
else
|
||||
RuntimeError("CreateLabelMap: No labelMappingFile defined");
|
||||
|
||||
if (fexists(labelMappingFile))
|
||||
{
|
||||
fprintf(stderr,"CreateLabelMap: the label mapping file '%s' already exists, no work to do.\n", labelMappingFile.c_str());
|
||||
return;
|
||||
}
|
||||
fprintf(stderr,"CreateLabelMap: Creating the mapping file '%s' \n", labelMappingFile.c_str());
|
||||
|
||||
DataReader<ElemType> dataReader(readerConfig);
|
||||
|
||||
dataReader.StartMinibatchLoop(minibatchSize, 0, requestDataSize);
|
||||
int count = 0;
|
||||
while (dataReader.GetMinibatch(matrices))
|
||||
{
|
||||
Matrix<ElemType>& features = *matrices[featureNames[0]];
|
||||
count += features.GetNumCols();
|
||||
if (traceLevel > 1)
|
||||
fprintf(stderr,"."); // progress meter
|
||||
}
|
||||
dataReader.StartMinibatchLoop(minibatchSize, 1, requestDataSize);
|
||||
|
||||
// print the results
|
||||
if (traceLevel > 0)
|
||||
fprintf(stderr,"\nread %d labels and produced %s\n", count, labelMappingFile.c_str());
|
||||
}
|
||||
auto end = std::chrono::system_clock::now();
|
||||
auto elapsed = end-start;
|
||||
if (traceLevel > 1)
|
||||
fprintf(stderr, "%f seconds elapsed\n", (float)(std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count())/1000);
|
||||
}
|
||||
|
||||
|
||||
template <typename ElemType>
|
||||
void DoTrain(const ConfigParameters& config)
|
||||
{
|
||||
ConfigParameters configSGD (config("SGD"));
|
||||
bool makeMode = config("makeMode", "true");
|
||||
|
||||
ConfigParameters readerConfig (config("reader"));
|
||||
readerConfig.Insert("traceLevel",config("traceLevel","0"));
|
||||
|
||||
IComputationNetBuilder<ElemType>* netBuilder = NULL;
|
||||
|
||||
if (config.Exists("NDLNetworkBuilder"))
|
||||
{
|
||||
ConfigParameters configNDL (config("NDLNetworkBuilder"));
|
||||
netBuilder = (IComputationNetBuilder<ElemType>*)new NDLBuilder<ElemType>(configNDL);
|
||||
}
|
||||
else if (config.Exists("SimpleNetworkBuilder"))
|
||||
{
|
||||
ConfigParameters configSNB (config("SimpleNetworkBuilder"));
|
||||
netBuilder = (IComputationNetBuilder<ElemType>*)new SimpleNetworkBuilder<ElemType>(configSNB);
|
||||
}
|
||||
else
|
||||
{
|
||||
RuntimeError("No network builder found in the config file. NDLNetworkBuilder or SimpleNetworkBuilde must be specified" );
|
||||
}
|
||||
|
||||
DataReader<ElemType>* dataReader = new DataReader<ElemType>(readerConfig);
|
||||
|
||||
DataReader<ElemType>* cvDataReader = nullptr;
|
||||
ConfigParameters cvReaderConfig (config("cvReader", L""));
|
||||
|
||||
if (cvReaderConfig.size() != 0)
|
||||
{
|
||||
cvReaderConfig.Insert("traceLevel",config("traceLevel","0"));
|
||||
cvDataReader = new DataReader<ElemType>(cvReaderConfig);
|
||||
}
|
||||
|
||||
SGD<ElemType> sgd(configSGD);
|
||||
|
||||
sgd.Train(netBuilder, dataReader, cvDataReader, makeMode);
|
||||
|
||||
delete netBuilder;
|
||||
delete dataReader;
|
||||
delete cvDataReader;
|
||||
}
|
||||
|
||||
template <typename ElemType>
|
||||
void DoAdapt(const ConfigParameters& config)
|
||||
{
|
||||
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
|
||||
|
||||
ConfigParameters configSGD (config("SGD"));
|
||||
bool makeMode = config("makeMode", "true");
|
||||
|
||||
ConfigParameters readerConfig (config("reader"));
|
||||
readerConfig.Insert("traceLevel",config("traceLevel","0"));
|
||||
|
||||
DataReader<ElemType>* dataReader = new DataReader<ElemType>(readerConfig);
|
||||
|
||||
DataReader<ElemType>* cvDataReader = nullptr;
|
||||
ConfigParameters cvReaderConfig (config("cvReader", L""));
|
||||
|
||||
if (cvReaderConfig.size() != 0)
|
||||
{
|
||||
cvReaderConfig.Insert("traceLevel",config("traceLevel","0"));
|
||||
cvDataReader = new DataReader<ElemType>(cvReaderConfig);
|
||||
}
|
||||
|
||||
wstring origModelFileName = config("origModelFileName", L"");
|
||||
wstring refNodeName = config("refNodeName", L"");
|
||||
|
||||
SGD<ElemType> sgd(configSGD);
|
||||
|
||||
sgd.Adapt(origModelFileName, refNodeName, dataReader, cvDataReader, deviceId, makeMode);
|
||||
|
||||
delete dataReader;
|
||||
delete cvDataReader;
|
||||
}
|
||||
|
||||
template <typename ElemType>
|
||||
void DoEdit(const ConfigParameters& config)
|
||||
{
|
||||
wstring editPath = config("editPath");
|
||||
wstring ndlMacros = config("ndlMacros","");
|
||||
NDLScript<ElemType> ndlScript;
|
||||
if (!ndlMacros.empty())
|
||||
ndlScript.LoadConfigFile(ndlMacros);
|
||||
MELScript<ElemType> melScript;
|
||||
melScript.LoadConfigFileAndResolveVariables(editPath, config);
|
||||
}
|
||||
|
||||
template <typename ElemType>
|
||||
void DoConvertFromDbn(const ConfigParameters& config)
|
||||
{
|
||||
//config.Insert("deviceId","-1"); //force using CPU
|
||||
|
||||
wstring modelPath = config("modelPath");
|
||||
wstring dbnModelPath = config("dbnModelPath");
|
||||
|
||||
IComputationNetBuilder<ElemType>* netBuilder = (IComputationNetBuilder<ElemType>*)new SimpleNetworkBuilder<ElemType>(config);
|
||||
ComputationNetwork<ElemType>& net = netBuilder->LoadNetworkFromFile(dbnModelPath);
|
||||
net.SaveToFile(modelPath);
|
||||
delete (netBuilder);
|
||||
}
|
||||
// process the command
|
||||
template <typename ElemType>
|
||||
void DoCommand(const ConfigParameters& config)
|
||||
{
|
||||
ConfigArray command = config("command", "train");
|
||||
for (int i=0; i < command.size(); i++)
|
||||
{
|
||||
//get the configuration parameters that match the command
|
||||
ConfigParameters commandParams (config(command[i]));
|
||||
ConfigArray action = commandParams("action","train");
|
||||
|
||||
// determine the action to perform, and do it
|
||||
for (int j=0; j < action.size(); j++)
|
||||
{
|
||||
if (action[j] == "train" || action[j] == "trainRNN")
|
||||
DoTrain<ElemType>(commandParams);
|
||||
else if (action[j] == "adapt")
|
||||
DoAdapt<ElemType>(commandParams);
|
||||
else if (action[j] == "test" || action[j] == "eval")
|
||||
DoEval<ElemType>(commandParams);
|
||||
else if (action[j] == "testunroll")
|
||||
DoEvalUnroll<ElemType>(commandParams);
|
||||
else if (action[j] == "edit")
|
||||
DoEdit<ElemType>(commandParams);
|
||||
else if (action[j] == "cv")
|
||||
DoCrossValidate<ElemType>(commandParams);
|
||||
else if (action[j] == "write")
|
||||
DoWriteOutput<ElemType>(commandParams);
|
||||
else if (action[j] == "devtest")
|
||||
TestCn<ElemType>(config); // for "devtest" action pass the root config instead
|
||||
else if (action[j] == "dumpnode")
|
||||
DumpNodeInfo<ElemType>(commandParams);
|
||||
else if (action[j] == "convertdbn")
|
||||
DoConvertFromDbn<ElemType>(commandParams);
|
||||
else if (action[j] == "createLabelMap")
|
||||
DoCreateLabelMap<ElemType>(commandParams);
|
||||
else
|
||||
RuntimeError("unknown action: %s in command set: %s", action[j].c_str(), command[i].c_str());
|
||||
|
||||
NDLScript<ElemType> ndlScript;
|
||||
ndlScript.ClearGlobal(); // clear global macros between commands
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string TimeDateStamp()
|
||||
{
|
||||
#if 0 // "safe" version for Windows, not needed it seems
|
||||
__time64_t localtime;
|
||||
|
||||
_time64 (&localtime);// get current time and date
|
||||
struct tm now;
|
||||
_localtime64_s (&now, &localtime); // convert
|
||||
#else
|
||||
time_t t = time(NULL);
|
||||
struct tm now = *localtime(&t);
|
||||
#endif
|
||||
char buf[30];
|
||||
sprintf (buf, "%04d/%02d/%02d %02d:%02d:%02d", now.tm_year + 1900, now.tm_mon + 1, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec);
|
||||
return buf;
|
||||
}
|
||||
|
||||
#ifdef MPI_SUPPORT
|
||||
// Oh, my gosh, this is going to be ugly. MPI_INIT needs a char* argv[], so let's interface.
|
||||
int MPIAPI MPI_Init(_In_opt_ int *argc, _Inout_count_(*argc) wchar_t*** argv)
|
||||
{
|
||||
// this maps from the strings
|
||||
std::map<std::string, wchar_t*> recover_wstring;
|
||||
|
||||
// do the mapping to 8-bit encoding for MPI_Init()
|
||||
vector<vector<char>> argv_string_vector;
|
||||
transform(*argv, *argv + *argc, std::back_inserter(argv_string_vector),
|
||||
[&recover_wstring](wchar_t*pws)->vector<char>
|
||||
{
|
||||
std::string tmp = msra::strfun::utf8(std::wstring(pws));
|
||||
recover_wstring[tmp] = pws;
|
||||
vector<char> rv(tmp.begin(), tmp.end());
|
||||
rv.push_back('\0');
|
||||
return rv;
|
||||
}
|
||||
);
|
||||
vector<char*> argv_charptr_vector;
|
||||
transform(argv_string_vector.begin(), argv_string_vector.end(), std::back_inserter(argv_charptr_vector),
|
||||
[](std::vector<char>&cs)->char*{ return &(cs[0]); }
|
||||
);
|
||||
char** argv_char = &(argv_charptr_vector[0]);
|
||||
|
||||
// Do the initialization
|
||||
int rv = MPI_Init(argc, &argv_char);
|
||||
|
||||
// try and reconstruct how MPI_Init changed the argv
|
||||
transform(argv_char, argv_char + *argc, stdext::checked_array_iterator<wchar_t**>(*argv, *argc),
|
||||
[&recover_wstring](char*pc)->wchar_t*
|
||||
{
|
||||
auto it = recover_wstring.find(std::string(pc));
|
||||
if (it == recover_wstring.end())
|
||||
RuntimeError("Unexpected interaction between MPI_Init and command line parameters");
|
||||
return it->second;
|
||||
}
|
||||
);
|
||||
|
||||
// pass through return value from internal call to MPI_Init()
|
||||
return rv;
|
||||
}
|
||||
#endif
|
||||
|
||||
void PrintBuiltInfo()
|
||||
{
|
||||
fprintf(stderr, "-------------------------------------------------------------------\n");
|
||||
fprintf(stderr, "Build info: \n\n");
|
||||
fprintf(stderr, "\t\tBuilt time: %s %s\n", __DATE__, __TIME__);
|
||||
fprintf(stderr, "\t\tLast modified date: %s\n", __TIMESTAMP__);
|
||||
fprintf(stderr, "\t\tBuilt by %s on %s\n", _BUILDER_, _BUILDMACHINE_);
|
||||
fprintf(stderr, "\t\tBuild Path: %s\n", _BUILDPATH_);
|
||||
#ifdef _GIT_EXIST
|
||||
fprintf(stderr, "\t\tBuild Branch: %s\n", _BUILDBRANCH_);
|
||||
fprintf(stderr, "\t\tBuild SHA1: %s\n", _BUILDSHA1_);
|
||||
#endif
|
||||
fprintf(stderr, "-------------------------------------------------------------------\n");
|
||||
|
||||
}
|
||||
|
||||
|
||||
int wmain(int argc, wchar_t* argv[])
|
||||
{
|
||||
|
||||
try
|
||||
{
|
||||
#ifdef MPI_SUPPORT
|
||||
{
|
||||
int rc;
|
||||
rc = MPI_Init(&argc, &argv);
|
||||
if (rc != MPI_SUCCESS)
|
||||
{
|
||||
MPI_Abort(MPI_COMM_WORLD, rc);
|
||||
RuntimeError("Failure in MPI_Init: %d", rc);
|
||||
}
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
|
||||
fprintf(stderr, "MPI: RUNNING ON (%s), process %d/%d\n", getenv("COMPUTERNAME"), myRank, numProcs);
|
||||
fflush(stderr);
|
||||
}
|
||||
#else
|
||||
numProcs = 1;
|
||||
myRank = 0;
|
||||
#endif
|
||||
|
||||
ConfigParameters config;
|
||||
std::string rawConfigString = ConfigParameters::ParseCommandLine(argc, argv, config);
|
||||
|
||||
// get the command param set they want
|
||||
wstring logpath = config("stderr", L"");
|
||||
// [1/26/2015 erw, add done file so that it can be used on HPC]
|
||||
wstring DoneFile = config("DoneFile", L"");
|
||||
ConfigArray command = config("command", "train");
|
||||
|
||||
if (logpath != L"")
|
||||
{
|
||||
for (int i=0; i < command.size(); i++)
|
||||
{
|
||||
logpath += L"_";
|
||||
logpath += (wstring)command[i];
|
||||
}
|
||||
logpath += L".log";
|
||||
if (numProcs > 1)
|
||||
{
|
||||
std::wostringstream oss;
|
||||
oss << myRank;
|
||||
logpath += L"rank" + oss.str();
|
||||
}
|
||||
|
||||
RedirectStdErr(logpath);
|
||||
}
|
||||
|
||||
|
||||
PrintBuiltInfo();
|
||||
|
||||
|
||||
std::string timestamp = TimeDateStamp();
|
||||
|
||||
if (myRank == 0) // main process
|
||||
{
|
||||
//dump config info
|
||||
fprintf(stderr, "running on %s at %s\n", GetHostName().c_str(), timestamp.c_str());
|
||||
fprintf(stderr, "command line options: \n");
|
||||
for (int i = 1; i < argc; i++)
|
||||
fprintf(stderr, "%s ", WCharToString(argv[i]).c_str());
|
||||
|
||||
// This simply merges all the different config parameters specified (eg, via config files or via command line directly),
|
||||
// and prints it.
|
||||
fprintf(stderr, "\n\n>>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>\n");
|
||||
fprintf(stderr, "%s\n", rawConfigString.c_str());
|
||||
fprintf(stderr, "<<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<\n");
|
||||
|
||||
// Same as above, but all variables are resolved. If a parameter is set multiple times (eg, set in config, overriden at command line),
|
||||
// All of these assignments will appear, even though only the last assignment matters.
|
||||
fprintf(stderr, "\n>>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>\n");
|
||||
fprintf(stderr, "%s\n", config.ResolveVariables(rawConfigString).c_str());
|
||||
fprintf(stderr, "<<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<\n");
|
||||
|
||||
// This outputs the final value each variable/parameter is assigned to in config (so if a parameter is set multiple times, only the last
|
||||
// value it is set to will appear).
|
||||
fprintf(stderr, "\n>>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>\n");
|
||||
config.dumpWithResolvedVariables();
|
||||
fprintf(stderr, "<<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<\n");
|
||||
|
||||
fprintf(stderr, "command: ");
|
||||
for (int i = 0; i < command.size(); i++)
|
||||
{
|
||||
fprintf(stderr, "%s ", command[i].c_str());
|
||||
}
|
||||
}
|
||||
|
||||
//run commands
|
||||
std::string type = config("precision", "float");
|
||||
// accept old precision key for backward compatibility
|
||||
if (config.Exists("type"))
|
||||
type = config("type", "float");
|
||||
if ( myRank == 0 )
|
||||
fprintf(stderr, "\nprecision = %s\n", type.c_str());
|
||||
if (type == "float")
|
||||
DoCommand<float>(config);
|
||||
else if (type == "double")
|
||||
DoCommand<double>(config);
|
||||
else
|
||||
RuntimeError("invalid precision specified: %s", type.c_str());
|
||||
|
||||
// still here , write a DoneFile if necessary
|
||||
if (!DoneFile.empty()){
|
||||
FILE* fp = fopenOrDie(DoneFile.c_str(), L"w");
|
||||
fprintf(fp, "successfully finished at %s on %s\n", TimeDateStamp().c_str(),GetHostName().c_str());
|
||||
fcloseOrDie(fp);
|
||||
}
|
||||
}
|
||||
catch (const std::exception &err)
|
||||
{
|
||||
fprintf(stderr, "EXCEPTION occurred: %s", err.what());
|
||||
#ifdef _DEBUG
|
||||
DebugBreak();
|
||||
#endif
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
catch(...)
|
||||
{
|
||||
fprintf(stderr, "Unknown ERROR occurred");
|
||||
#ifdef _DEBUG
|
||||
DebugBreak();
|
||||
#endif
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
#ifdef MPI_SUPPORT
|
||||
MPI_Finalize();
|
||||
#endif
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
>>>>>>> origin/master
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
<<<<<<< HEAD
|
||||
//
|
||||
// <copyright file="CPUSparseMatrix.cpp" company="Microsoft">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
|
@ -961,967 +960,3 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template class CPUSparseMatrix<double>;
|
||||
|
||||
}}}
|
||||
=======
|
||||
//
|
||||
// <copyright file="CPUSparseMatrix.cpp" company="Microsoft">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//
|
||||
// Math.cpp : Defines the exported functions for the DLL application.
|
||||
//
|
||||
|
||||
#include "stdafx.h"
|
||||
#include <assert.h>
|
||||
#include <stdexcept>
|
||||
#include <omp.h>
|
||||
#include <math.h>
|
||||
#include "CPUMatrix.h"
|
||||
#include "CPUSparseMatrix.h"
|
||||
#include <random>
|
||||
#include <chrono>
|
||||
#ifdef _WIN32
|
||||
#include <Windows.h>
|
||||
#endif
|
||||
#ifdef LEAKDETECT
|
||||
#include <vld.h>
|
||||
#endif
|
||||
|
||||
#include "basetypes.h"
|
||||
#include "fileutil.h"
|
||||
|
||||
|
||||
#ifndef USE_MKL
|
||||
// use ACML as default.
|
||||
// Download ACML 5.3.0 (e.g., acml5.3.0-ifort64.exe) or above
|
||||
// from http://developer.amd.com/tools/cpu-development/amd-core-math-library-acml/acml-downloads-resources/
|
||||
// Install the ifort64 variant (compiled with intel compiler) of the library
|
||||
// Set Environment variable ACML_PATH to C:\AMD\acml5.3.0\ifort64_mp or the folder you installed acml
|
||||
// to point to your folder for the include file and link library
|
||||
#include <acml.h> // requires ACML 5.3.0 and above
|
||||
#else
|
||||
// requires MKL 10.0 and above
|
||||
#endif
|
||||
|
||||
// This is an example of an exported variable
|
||||
//MATH_API int nMath=0;
|
||||
|
||||
// This is an example of an exported function.
|
||||
//MATH_API int fnMath(void)
|
||||
//{
|
||||
// return 42;
|
||||
//}
|
||||
|
||||
#ifndef USE_MKL //MKL has one additional parameter for different matrix order
|
||||
#define BLAS_COLMAJOR
|
||||
#else
|
||||
#define BLAS_COLMAJOR (int)MatrixOrder::ColMajor,
|
||||
#endif
|
||||
|
||||
#define SWAP(a,b) {(a) ^= (b); (b) ^= (a); (a) ^= (b);}
|
||||
#define IDX2C(i,j,ld) (((j)*(ld))+(i)) // 0 based indexing
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
#pragma region Helpful Enum Definitions
|
||||
enum class MatrixOrder
|
||||
{
|
||||
RowMajor = 101, // row-major arrays
|
||||
ColMajor = 102 // column-major arrays
|
||||
};
|
||||
|
||||
enum class MatrixTranspose : char
|
||||
{
|
||||
NoTrans = 'N', // trans='N'
|
||||
Trans = 'T', // trans='T'
|
||||
ConjTrans = 'C' // trans='C'
|
||||
};
|
||||
|
||||
enum class SymMatrixType : char
|
||||
{
|
||||
Up = 'U', // symmetric matrix is stored in the upper part
|
||||
Low = 'L', // symmetric matrix is stored in thelower part
|
||||
Full = 'F', //full populated
|
||||
NotSymmetric = 'N' //not a symmetric matrix
|
||||
};
|
||||
|
||||
enum class MatrixOpSide : char
|
||||
{
|
||||
Left = 'L', // left multiply
|
||||
Right = 'R', // right multiply
|
||||
};
|
||||
#pragma endregion Helpful Enum Definitions
|
||||
|
||||
#pragma region Constructors and Destructor
|
||||
|
||||
//should only be used by constructors.
|
||||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::ZeroInit()
|
||||
{
|
||||
m_numRows = 0;
|
||||
m_numCols = 0;
|
||||
m_elemSizeAllocated = 0;
|
||||
m_compIndexSize = 0;
|
||||
m_externalBuffer = false;
|
||||
m_computeDevice = CPUDEVICE;
|
||||
m_nz = 0;
|
||||
m_matrixName = NULL;
|
||||
|
||||
//if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
|
||||
{
|
||||
m_colIdx = -1;
|
||||
m_pArray = NULL;
|
||||
m_unCompIndex = NULL;
|
||||
m_compIndex = NULL;
|
||||
}
|
||||
//else if (m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
m_blockSize = 0;
|
||||
m_pArray = NULL;
|
||||
m_blockIds = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
//should only be used by constructors.
|
||||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::CheckInit(const MatrixFormat format)
|
||||
{
|
||||
if (format != MatrixFormat::matrixFormatSparseCSC && format != MatrixFormat::matrixFormatSparseCSR && format != MatrixFormat::matrixFormatSparseBlockCol && format != MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
throw std::logic_error("CPUSparseMatrix: unsupported sparse matrix format");
|
||||
}
|
||||
m_format = format;
|
||||
m_default = defaultElem();
|
||||
ZeroInit();
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
CPUSparseMatrix<ElemType>::CPUSparseMatrix(const MatrixFormat format)
|
||||
{
|
||||
CheckInit(format);
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
CPUSparseMatrix<ElemType>::CPUSparseMatrix(const MatrixFormat format, const size_t numRows, const size_t numCols, const size_t size)
|
||||
{
|
||||
CheckInit(format);
|
||||
Resize(numRows, numCols, size);
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
CPUSparseMatrix<ElemType>::~CPUSparseMatrix()
|
||||
{
|
||||
if (m_matrixName!=NULL)
|
||||
{
|
||||
delete[] m_matrixName;
|
||||
m_matrixName = nullptr;
|
||||
}
|
||||
if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
|
||||
{
|
||||
if(m_pArray != NULL)
|
||||
delete[] m_pArray;
|
||||
if(m_unCompIndex != NULL)
|
||||
delete[] m_unCompIndex;
|
||||
if(m_compIndex != NULL)
|
||||
delete[] m_compIndex;
|
||||
}
|
||||
else if (m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
if (m_pArray != NULL)
|
||||
delete[] m_pArray;
|
||||
if(m_blockIds != NULL)
|
||||
delete[] m_blockIds;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#pragma endregion Constructors and Destructor
|
||||
|
||||
#pragma region Basic Operators
|
||||
|
||||
//make sure call order in colume wise for CSC and row wise for CSR
|
||||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::SetValue(const size_t row, const size_t col, const ElemType v)
|
||||
{
|
||||
if(m_format != MatrixFormat::matrixFormatSparseCSC && m_format != MatrixFormat::matrixFormatSparseCSR)
|
||||
{
|
||||
throw std::logic_error("CPUSparseMatrix: unsupported SetValue() call.");
|
||||
}
|
||||
|
||||
if(m_elemSizeAllocated < m_nz +1) //automatic resize
|
||||
{
|
||||
Resize(m_numRows, m_numCols, m_nz + 100); //allocate 100 more elelemnts and keep existing values
|
||||
}
|
||||
|
||||
if(row < 0 || row >= m_numRows)
|
||||
{
|
||||
throw std::logic_error("CPUSparseMatrix: SetValue() invalid row id");
|
||||
}
|
||||
|
||||
if(col < 0 || col >= m_numCols) {
|
||||
throw std::logic_error("CPUSparseMatrix: SetValue() invalid column id");
|
||||
}
|
||||
|
||||
size_t r = (m_format == matrixFormatSparseCSC) ? row: col;
|
||||
size_t c = (m_format == matrixFormatSparseCSC) ? col: row;
|
||||
|
||||
m_pArray[m_nz] = v;
|
||||
m_unCompIndex[m_nz] = (CPUSPARSE_INDEX_TYPE)r;
|
||||
|
||||
//consistency check
|
||||
if(c == m_colIdx && r <= m_unCompIndex[m_nz-1])
|
||||
{
|
||||
throw std::logic_error("CPUSparseMatrix: SetValue is not called properly");
|
||||
}
|
||||
|
||||
if (c != m_colIdx)
|
||||
{
|
||||
m_compIndex[c] = CPUSPARSE_INDEX_TYPE(m_nz);
|
||||
m_colIdx = (int) c;
|
||||
}
|
||||
m_compIndex[c + 1] = CPUSPARSE_INDEX_TYPE(m_nz + 1);
|
||||
m_nz++;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
ElemType* CPUSparseMatrix<ElemType>::BufferPointer() const
|
||||
{
|
||||
return m_pArray;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, size_t numNZElemToReserve, const bool growOnly, const bool keepExistingValues)
|
||||
{
|
||||
size_t newCompIndexSize = (numCols > numRows ? numCols : numRows) + 1;
|
||||
bool reallocate = (m_elemSizeAllocated < numNZElemToReserve || (m_elemSizeAllocated > numNZElemToReserve && !growOnly) || m_compIndexSize < newCompIndexSize);
|
||||
|
||||
m_numRows = numRows;
|
||||
m_numCols = numCols;
|
||||
|
||||
if (reallocate)
|
||||
{
|
||||
if (m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
|
||||
{
|
||||
ElemType *pArray = new ElemType[numNZElemToReserve];
|
||||
CPUSPARSE_INDEX_TYPE *unCompIndex = new CPUSPARSE_INDEX_TYPE[numNZElemToReserve];
|
||||
CPUSPARSE_INDEX_TYPE *compIndex = new CPUSPARSE_INDEX_TYPE[newCompIndexSize];
|
||||
|
||||
if (keepExistingValues && (m_nz > numNZElemToReserve || m_compIndexSize > newCompIndexSize))
|
||||
throw std::logic_error("Resize: To keep values m_nz should <= numNZElemToReserve and m_compIndexSize <= newCompIndexSize");
|
||||
|
||||
if (keepExistingValues && m_nz > 0)
|
||||
{
|
||||
assert(m_compIndexSize > 0 && m_nz < numNZElemToReserve);
|
||||
memcpy(pArray, m_pArray, NzSize());
|
||||
memcpy(unCompIndex, m_unCompIndex, MajorIndexSize());
|
||||
memcpy(compIndex, m_compIndex, SecondaryIndexSize());
|
||||
}
|
||||
|
||||
if (m_pArray != NULL)
|
||||
delete[] m_pArray;
|
||||
if (m_unCompIndex != NULL)
|
||||
delete[] m_unCompIndex;
|
||||
if (m_compIndex != NULL)
|
||||
delete[] m_compIndex;
|
||||
|
||||
m_pArray = pArray;
|
||||
m_unCompIndex = unCompIndex;
|
||||
m_compIndex = compIndex;
|
||||
}
|
||||
else if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
ElemType *blockVal = new ElemType[numNZElemToReserve];
|
||||
size_t *blockIds = new size_t[newCompIndexSize];
|
||||
|
||||
if (keepExistingValues && (m_nz > numNZElemToReserve || m_compIndexSize > newCompIndexSize))
|
||||
throw std::logic_error("Resize: To keep values m_nz should <= numNZElemToReserve and m_compIndexSize <= newCompIndexSize");
|
||||
|
||||
if (keepExistingValues && m_elemSizeAllocated > 0)
|
||||
{
|
||||
assert(m_compIndexSize > 0 && m_elemSizeAllocated < numNZElemToReserve);
|
||||
memcpy(blockVal, m_pArray, NzSize());
|
||||
memcpy(blockIds, m_blockIds, sizeof(size_t)*m_compIndexSize);
|
||||
}
|
||||
|
||||
if (m_pArray != NULL)
|
||||
delete[] m_pArray;
|
||||
if(m_blockIds != NULL)
|
||||
delete[] m_blockIds;
|
||||
|
||||
m_pArray = blockVal;
|
||||
m_blockIds = blockIds;
|
||||
}
|
||||
|
||||
m_elemSizeAllocated = numNZElemToReserve;
|
||||
m_compIndexSize = newCompIndexSize;
|
||||
}
|
||||
}
|
||||
|
||||
//Reset matrix so it can be reused
|
||||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::Reset()
|
||||
{
|
||||
m_nz = 0;
|
||||
m_colIdx = -1;
|
||||
m_blockSize = 0;
|
||||
}
|
||||
|
||||
//c = alpha*op(lhs) * op(rhs) + beta*c
|
||||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const CPUMatrix<ElemType>& lhs, const bool transposeA,
|
||||
const CPUSparseMatrix<ElemType>& rhs, const bool transposeB, ElemType beta, CPUMatrix<ElemType>& c)
|
||||
|
||||
{
|
||||
if (lhs.IsEmpty() || rhs.IsEmpty())
|
||||
throw std::logic_error("MultiplyAndWeightedAdd: one of the input matrix is empty.");
|
||||
|
||||
int m = transposeA? (int)lhs.GetNumCols(): (int)lhs.GetNumRows();
|
||||
int k = transposeA? (int)lhs.GetNumRows(): (int)lhs.GetNumCols();
|
||||
int l = transposeB? (int)rhs.GetNumCols(): (int)rhs.GetNumRows();
|
||||
int n = transposeB? (int)rhs.GetNumRows(): (int)rhs.GetNumCols();
|
||||
|
||||
assert (m>0 && k>0 && l>0 && n>0); //converting from size_t to int may cause overflow
|
||||
assert (k == l);
|
||||
if (k != l)
|
||||
{
|
||||
throw std::invalid_argument("CPUSparseMatrix::MultiplyAndWeightedAdd: The inner dimensions of a and b must match.");
|
||||
}
|
||||
|
||||
if (c.GetNumRows() != m || c.GetNumCols() != n)
|
||||
{
|
||||
c.Resize(m,n);
|
||||
}
|
||||
|
||||
if (beta == 0)
|
||||
{
|
||||
memset(c.GetArray(), 0, sizeof(ElemType) * c.GetNumElements());
|
||||
}
|
||||
else if (beta != 1)
|
||||
{
|
||||
#pragma omp parallel for
|
||||
foreach_coord(i,j,c)
|
||||
{
|
||||
c(i,j) = beta * c(i,j);
|
||||
}
|
||||
}
|
||||
|
||||
if (rhs.GetFormat() != matrixFormatSparseCSC)
|
||||
NOT_IMPLEMENTED;
|
||||
|
||||
if (!transposeA && !transposeB)
|
||||
{
|
||||
for(size_t j = 0; j < rhs.GetNumCols(); j++)
|
||||
{
|
||||
size_t start = rhs.m_compIndex[j]; //ColLocation
|
||||
size_t end = rhs.m_compIndex[j+1];
|
||||
for(size_t p = start; p < end; p++)
|
||||
{
|
||||
size_t i = rhs.m_unCompIndex[p]; //RowLocation
|
||||
ElemType val = rhs.m_pArray[p];
|
||||
|
||||
for(size_t h = 0; h < lhs.GetNumRows(); h++)
|
||||
{
|
||||
c(h,j) += alpha * lhs(h, i)*val;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!transposeA && transposeB)
|
||||
{
|
||||
for(size_t j = 0; j < rhs.GetNumCols(); j++)
|
||||
{
|
||||
size_t start = rhs.m_compIndex[j];
|
||||
size_t end = rhs.m_compIndex[j + 1];
|
||||
|
||||
for(size_t p = start; p < end; p++)
|
||||
{
|
||||
size_t i = rhs.m_unCompIndex[p];
|
||||
ElemType val = rhs.m_pArray[p];
|
||||
for(size_t h = 0; h < lhs.GetNumRows(); h++)
|
||||
{
|
||||
c(h, i) += alpha * lhs(h, j)*val;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (transposeA && !transposeB)
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
else
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
}
|
||||
|
||||
//c = alpha * op(lhs) * op(rhs)
|
||||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::MultiplyAndAdd(ElemType alpha, const CPUMatrix<ElemType>& lhs, const bool transposeA,
|
||||
const CPUSparseMatrix<ElemType>& rhs, const bool transposeB, CPUSparseMatrix<ElemType>& c)
|
||||
{
|
||||
if (lhs.IsEmpty() || rhs.IsEmpty())
|
||||
throw std::logic_error("LeftMultiplyAndAdd: one of the input matrix is empty.");
|
||||
|
||||
int m = transposeA? (int)lhs.GetNumCols(): (int)lhs.GetNumRows();
|
||||
int k = transposeA? (int)lhs.GetNumRows(): (int)lhs.GetNumCols();
|
||||
int l = transposeB? (int)rhs.GetNumCols(): (int)rhs.GetNumRows();
|
||||
int n = transposeB? (int)rhs.GetNumRows(): (int)rhs.GetNumCols();
|
||||
|
||||
assert (m>0 && k>0 && l>0 && n>0); m; n; //converting from size_t to int may cause overflow
|
||||
assert (k == l);
|
||||
if (k != l)
|
||||
{
|
||||
throw std::invalid_argument("CPUSparseMatrix::MultiplyAndAdd: The inner dimensions of a and b must match.");
|
||||
}
|
||||
|
||||
c.Reset();
|
||||
|
||||
if (!transposeA && !transposeB)
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
else if (!transposeA && transposeB)
|
||||
{
|
||||
if (rhs.GetFormat() != matrixFormatSparseCSC)
|
||||
NOT_IMPLEMENTED;
|
||||
|
||||
//allocate enough memory
|
||||
c.SetFormat(matrixFormatSparseBlockCol);
|
||||
c.Resize(m, n, m*min(n, rhs.m_nz));
|
||||
|
||||
map<size_t, size_t> w2Id;
|
||||
for(size_t j = 0; j < rhs.GetNumCols(); j++)
|
||||
{ // j ranges over batches
|
||||
size_t start = rhs.m_compIndex[j];
|
||||
size_t end = rhs.m_compIndex[j+1];
|
||||
|
||||
for(size_t p = start; p < end; p++)
|
||||
{
|
||||
size_t i = rhs.m_unCompIndex[p]; //i ranges over words
|
||||
ElemType val = rhs.m_pArray[p]; //1 for(i, j)
|
||||
|
||||
bool first = true;
|
||||
if(w2Id.find(i) == w2Id.end())
|
||||
{
|
||||
w2Id[i] = w2Id.size();
|
||||
c.m_blockIds[c.m_blockSize]=i;
|
||||
c.m_blockSize++;
|
||||
}
|
||||
else
|
||||
{
|
||||
first = false;
|
||||
}
|
||||
size_t pos = w2Id[i] * lhs.GetNumRows();
|
||||
for(size_t h = 0; h < lhs.GetNumRows(); h++)
|
||||
{ // h range over hidden layer
|
||||
if(first == true)
|
||||
{
|
||||
c.m_pArray[pos] = alpha*lhs(h, j)*val;
|
||||
} else
|
||||
{
|
||||
c.m_pArray[pos] += alpha*lhs(h, j)*val;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
}
|
||||
c.m_nz = c.m_blockSize * m;
|
||||
if(c.m_nz > c.GetSizeAllocated())
|
||||
{
|
||||
throw std::logic_error("sparse matrix out of range.");
|
||||
}
|
||||
//c.SetFormat(matrixFormatSparseBlockCol);
|
||||
}
|
||||
else if (transposeA && !transposeB)
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
else
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::ScaleAndAdd(const ElemType alpha, const CPUSparseMatrix<ElemType>& lhs, CPUMatrix<ElemType>& rhs)
|
||||
{
|
||||
if (lhs.IsEmpty() || rhs.IsEmpty())
|
||||
{
|
||||
throw std::logic_error("ScaleAndAdd: one of the input matrix is empty.");
|
||||
}
|
||||
|
||||
if (lhs.GetNumRows() != rhs.GetNumRows() || lhs.GetNumCols() != rhs.GetNumCols())
|
||||
{
|
||||
throw std::invalid_argument("CPUSparseMatrix::ScaleAndAdd: The dimensions of a and b must match.");
|
||||
}
|
||||
|
||||
if(lhs.GetFormat() == MatrixFormat::matrixFormatSparseCSC || lhs.GetFormat() == MatrixFormat::matrixFormatSparseCSR)
|
||||
{
|
||||
size_t col_num = (lhs.m_format == MatrixFormat::matrixFormatSparseCSC) ? lhs.GetNumCols(): lhs.GetNumRows();
|
||||
for(size_t j = 0; j < col_num; j++)
|
||||
{
|
||||
size_t start = lhs.m_compIndex[j];
|
||||
size_t end = lhs.m_compIndex[j + 1];
|
||||
for(size_t p = start; p < end; p++)
|
||||
{
|
||||
size_t i = lhs.m_unCompIndex[p];
|
||||
ElemType val = lhs.m_pArray[p];
|
||||
size_t r = (lhs.m_format == MatrixFormat::matrixFormatSparseCSC) ? i : j;
|
||||
size_t c = (lhs.m_format == MatrixFormat::matrixFormatSparseCSC) ? j : i;
|
||||
rhs(r, c) += alpha * val;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (lhs.m_format == MatrixFormat::matrixFormatSparseBlockCol || lhs.m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
for(size_t j = 0; j < lhs.m_blockSize; j++)
|
||||
{
|
||||
size_t i = lhs.m_blockIds[j];
|
||||
size_t len = (lhs.m_format == MatrixFormat::matrixFormatSparseBlockCol) ? lhs.GetNumRows() : lhs.GetNumCols();
|
||||
size_t start = j * len;
|
||||
for(size_t p = start; p < start+len; p++)
|
||||
{
|
||||
ElemType val = lhs.m_pArray[p];
|
||||
|
||||
size_t r = (lhs.m_format == MatrixFormat::matrixFormatSparseBlockCol) ? (p - start) : i;
|
||||
size_t c = (lhs.m_format == MatrixFormat::matrixFormatSparseBlockCol) ? i : (p - start);
|
||||
rhs(r, c) += alpha * val;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error("CPUSparseMatrix:: ScaleAndAdd() Not implemented");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class ElemType>
|
||||
bool CPUSparseMatrix<ElemType>::AreEqual(const CPUSparseMatrix<ElemType>& a, const CPUSparseMatrix<ElemType>& b, const ElemType threshold)
|
||||
{
|
||||
if (a.IsEmpty() || b.IsEmpty())
|
||||
throw std::logic_error("AreEqual: one of the input matrices is empty.");
|
||||
|
||||
if (a.GetNumRows() != b.GetNumRows() || a.GetNumCols() != b.GetNumCols())
|
||||
return false;
|
||||
|
||||
bool result = true;
|
||||
|
||||
#pragma omp parallel for
|
||||
foreach_coord(i, j, a)
|
||||
{
|
||||
if (abs(a(i, j) - b(i, j)) > threshold)
|
||||
{
|
||||
result = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// a: H x No: H is hidden layer size and No is mini-batch size
|
||||
// weight: V x H, V is vocab size
|
||||
// label: V x No
|
||||
// cls: 2 x Nc, Nc is number of classes, each col is start and end word ids of a class
|
||||
// idx2cls: V x 1, mapping from word to class id
|
||||
// etp: V x No, stores predicted values
|
||||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::ClassEntropy(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& weight,
|
||||
const CPUSparseMatrix<ElemType> & label, const CPUMatrix<ElemType>& cls,
|
||||
const CPUMatrix<ElemType>& idx2cls, CPUSparseMatrix<ElemType>& etp, CPUMatrix<ElemType>& entropyScore)
|
||||
{
|
||||
if (a.IsEmpty() || cls.IsEmpty() || label.IsEmpty() || idx2cls.IsEmpty())
|
||||
throw std::logic_error("AssignSoftmaxOf: Matrix a, class, idx2cls or label is empty.");
|
||||
|
||||
if(etp.GetFormat() != MatrixFormat::matrixFormatSparseCSC)
|
||||
throw std::runtime_error("CPUSparseMatrix:: ClassEntropy() only support CSC");
|
||||
|
||||
size_t nC = cls.GetNumCols();
|
||||
size_t nV = label.GetNumRows() - nC;
|
||||
|
||||
if (nV != idx2cls.GetNumRows() || idx2cls.GetNumCols() != 1 || cls.GetNumCols() + idx2cls.GetNumRows() != label.GetNumRows())
|
||||
throw std::logic_error("ClassEntropy: check matrix dimension");
|
||||
|
||||
//allocate enough memory
|
||||
if(etp.m_elemSizeAllocated < etp.GetNumElements())
|
||||
{
|
||||
etp.Resize(etp.GetNumRows(), etp.GetNumCols(), etp.GetNumElements(), true, false);
|
||||
}
|
||||
etp.Reset();
|
||||
|
||||
entropyScore(0, 0) = 0;
|
||||
for(size_t j = 0; j < label.GetNumCols(); j++)
|
||||
{
|
||||
size_t start = label.m_compIndex[j];
|
||||
size_t end = label.m_compIndex[j + 1];
|
||||
for (size_t p = start; p < end; p++)
|
||||
{
|
||||
size_t i = label.m_unCompIndex[p];
|
||||
size_t iStt, iEnd;
|
||||
if (i < nV)
|
||||
{
|
||||
size_t clsid = (size_t)idx2cls(i, 0);
|
||||
iStt = (size_t) cls(0, clsid); //class start word id
|
||||
iEnd = (size_t) cls(1, clsid); //class end word id
|
||||
}
|
||||
else
|
||||
{
|
||||
iStt = nV;
|
||||
iEnd = nV + nC;
|
||||
}
|
||||
|
||||
size_t b = etp.m_nz;
|
||||
for(size_t ii = iStt; ii < iEnd; ii++) //ii ranges over sub-vocab or class ids
|
||||
{
|
||||
ElemType val = 0.0;
|
||||
foreach_row(rw, a) //rw ranges over hidden units
|
||||
{
|
||||
val += weight(ii,rw) * a(rw,j);
|
||||
}
|
||||
etp.SetValue(ii, j, val);
|
||||
}
|
||||
ElemType maxV = LZERO;
|
||||
for(size_t ii = b; ii < etp.m_nz; ii++)
|
||||
{
|
||||
maxV = (ElemType) logadd(maxV, etp.m_pArray[ii]);
|
||||
}
|
||||
|
||||
for(size_t ii = b; ii < etp.m_nz; ii++)
|
||||
{
|
||||
etp.m_pArray[ii] = etp.m_pArray[ii] - maxV;
|
||||
}
|
||||
|
||||
entropyScore(0, 0) -= etp.m_pArray[b+i-iStt];
|
||||
//negate positive data points
|
||||
etp.m_pArray[b+i-iStt] *=-1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::ClassEntropyError(CPUSparseMatrix<ElemType>& a)
|
||||
{
|
||||
for(int i = 0; i < a.m_nz; i++)
|
||||
{
|
||||
if(a.m_pArray[i] < 0)
|
||||
{
|
||||
a.m_pArray[i] = exp(a.m_pArray[i]); //negative;
|
||||
}
|
||||
else
|
||||
{
|
||||
a.m_pArray[i] = exp(-a.m_pArray[i])-1; //positive
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::ClassEntropyGradientOfInput(
|
||||
const CPUSparseMatrix<ElemType>& error,
|
||||
const CPUMatrix<ElemType>& weight,
|
||||
CPUMatrix<ElemType>& grd)
|
||||
{
|
||||
grd.SetValue(0);
|
||||
|
||||
for(size_t j = 0; j < error.GetNumCols(); j++)
|
||||
{
|
||||
size_t start = error.m_compIndex[j];
|
||||
size_t end = error.m_compIndex[j+1];
|
||||
for(size_t p = start; p < end; p++)
|
||||
{
|
||||
size_t i = error.m_unCompIndex[p];
|
||||
for(size_t h = 0; h < grd.GetNumRows(); h++)
|
||||
{ // h ranges over hidden units
|
||||
grd(h,j) += weight(i, h) * error.m_pArray[p];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::ClassEntropyGradientOfWeight(
|
||||
const CPUSparseMatrix<ElemType>& error,
|
||||
const CPUMatrix<ElemType>& input,
|
||||
const CPUSparseMatrix<ElemType> & /*label*/,
|
||||
const CPUMatrix<ElemType>& /*cls*/,
|
||||
const CPUMatrix<ElemType>& /*idx2cls*/,
|
||||
CPUSparseMatrix<ElemType>& grd)
|
||||
{
|
||||
grd.SetFormat(matrixFormatSparseBlockRow);
|
||||
//allocate enough memory
|
||||
grd.Resize(grd.GetNumRows(), grd.GetNumCols(), error.m_nz*input.GetNumRows(), true, false);
|
||||
|
||||
grd.Reset();
|
||||
map<size_t, size_t> w2Id;
|
||||
for(size_t j = 0; j < error.GetNumCols(); j++)
|
||||
{
|
||||
size_t start = error.m_compIndex[j];
|
||||
size_t end = error.m_compIndex[j+1];
|
||||
|
||||
for(size_t p = start; p < end; p++)
|
||||
{
|
||||
size_t i = error.m_unCompIndex[p]; // i ranges over words
|
||||
bool first = true;
|
||||
if(w2Id.find(i) == w2Id.end())
|
||||
{
|
||||
w2Id[i] = w2Id.size();
|
||||
grd.m_blockIds[grd.m_blockSize]=i;
|
||||
grd.m_blockSize++;
|
||||
}
|
||||
else
|
||||
{
|
||||
first = false;
|
||||
}
|
||||
size_t pos = w2Id[i]*input.GetNumRows();
|
||||
for(size_t h = 0; h < input.GetNumRows(); h++)
|
||||
{ // h range over hidden layer
|
||||
if(first == true)
|
||||
{
|
||||
grd.m_pArray[pos] = input(h, j)*error.m_pArray[p];
|
||||
}
|
||||
else
|
||||
{
|
||||
grd.m_pArray[pos] += input(h, j)*error.m_pArray[p];
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
}
|
||||
grd.m_nz = grd.m_blockSize * input.GetNumRows();
|
||||
if(grd.m_nz > grd.GetSizeAllocated())
|
||||
{
|
||||
throw std::logic_error("sparse matrix out of range.");
|
||||
}
|
||||
//grd.SetFormat(matrixFormatSparseBlockRow);
|
||||
}
|
||||
|
||||
// normal update for smoothed gradients c and current gradients (this)
|
||||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::NormalGrad(CPUMatrix<ElemType>& c, const ElemType momentum)
|
||||
{
|
||||
if (c.IsEmpty())
|
||||
{
|
||||
c.Resize(GetNumRows(), GetNumCols());
|
||||
c.SetValue(0.0);
|
||||
}
|
||||
|
||||
if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
for(size_t j = 0; j < m_blockSize; j++)
|
||||
{
|
||||
size_t i = m_blockIds[j];
|
||||
size_t len = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? GetNumRows() : GetNumCols();
|
||||
size_t start = j* len;
|
||||
for(size_t p = start; p < start+len; p++)
|
||||
{
|
||||
ElemType val = m_pArray[p];
|
||||
size_t row = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? (p - start) : i;
|
||||
size_t col = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? i : (p - start);
|
||||
c(row, col) = (1-momentum)*val + momentum*c(row, col);
|
||||
m_pArray[p] = c(row, col);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error("CPUSparseMatrix:: NormalGrad() only support block sparse format");
|
||||
}
|
||||
}
|
||||
|
||||
// update smoothed gradients c and current gradients (this)
|
||||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::Adagrad(CPUMatrix<ElemType>& c)
|
||||
{
|
||||
if (c.IsEmpty())
|
||||
{
|
||||
c.Resize(GetNumRows(), GetNumCols());
|
||||
c.SetValue(0.0);
|
||||
}
|
||||
|
||||
const ElemType floor = 1e-16f;
|
||||
if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
|
||||
{
|
||||
size_t col_num = (m_format == MatrixFormat::matrixFormatSparseCSC) ? GetNumCols() : GetNumRows();
|
||||
for(size_t j = 0; j < col_num; j++)
|
||||
{
|
||||
size_t start = m_compIndex[j];
|
||||
size_t end = m_compIndex[j+1];
|
||||
for(size_t p = start; p < end; p++)
|
||||
{
|
||||
size_t i = m_unCompIndex[p];
|
||||
ElemType val = m_pArray[p];
|
||||
|
||||
size_t row = (m_format == MatrixFormat::matrixFormatSparseCSC) ? i : j;
|
||||
size_t col = (m_format == MatrixFormat::matrixFormatSparseCSC) ? j : i;
|
||||
ElemType adenorm = c(row, col);
|
||||
adenorm += val * val;
|
||||
val = val / (floor + sqrt(adenorm));
|
||||
m_pArray[p] = val;
|
||||
c(row, col) = adenorm;
|
||||
}
|
||||
}
|
||||
} else if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
for(size_t j = 0; j < m_blockSize; j++)
|
||||
{
|
||||
size_t i = m_blockIds[j];
|
||||
size_t len = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? GetNumRows() : GetNumCols();
|
||||
size_t start = j* len;
|
||||
for(size_t p = start; p < start+len; p++)
|
||||
{
|
||||
ElemType val = m_pArray[p];
|
||||
|
||||
size_t row = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? (p - start) : i;
|
||||
size_t col = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? i : (p - start);
|
||||
ElemType adenorm = c(row, col);
|
||||
adenorm += val * val;
|
||||
val = val / (floor + sqrt(adenorm));
|
||||
m_pArray[p] = val;
|
||||
c(row, col) = adenorm;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
CPUSparseMatrix<ElemType>& CPUSparseMatrix<ElemType>::InplaceTruncate (const ElemType threshold)
|
||||
{
|
||||
if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
ElemType locThresholdPos = abs(threshold);
|
||||
ElemType locTHresholdNeg = -locThresholdPos;
|
||||
|
||||
for(size_t j = 0; j < m_blockSize; j++)
|
||||
{
|
||||
size_t len = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? GetNumRows() : GetNumCols();
|
||||
size_t start = j* len;
|
||||
for (size_t p = start; p < start+len; p++)
|
||||
{
|
||||
if (m_pArray[p] > locThresholdPos)
|
||||
{
|
||||
m_pArray[p] = locThresholdPos;
|
||||
}
|
||||
else if (m_pArray[p] < locTHresholdNeg)
|
||||
{
|
||||
m_pArray[p] = locTHresholdNeg;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error("CPUSparseMatrix:: InplaceTruncate() only support block based sparse matrix");
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
MATH_API File& operator>>(File& stream, CPUSparseMatrix<ElemType>& us)
|
||||
{
|
||||
stream.GetMarker(fileMarkerBeginSection, std::wstring(L"BMAT"));
|
||||
size_t elsize;
|
||||
stream >> elsize;
|
||||
if (sizeof(ElemType) != elsize)
|
||||
throw std::runtime_error("Template argument size doesn't match those in file");
|
||||
std::wstring matrixName;
|
||||
|
||||
// now prepare this header to receive the data being read
|
||||
size_t nz, colnum, rownum;
|
||||
int format;
|
||||
|
||||
// read in the header information
|
||||
stream >> matrixName >> format >> nz >> colnum >> rownum;
|
||||
|
||||
us.SetFormat((MatrixFormat)format);
|
||||
if (us.GetFormat() != matrixFormatSparseCSC && us.GetFormat() != matrixFormatSparseCSR)
|
||||
NOT_IMPLEMENTED;
|
||||
|
||||
us.Resize(rownum, colnum, nz);
|
||||
|
||||
if (nz > 0)
|
||||
{
|
||||
size_t compressedSize = (us.GetFormat() == matrixFormatSparseCSC) ? colnum + 1 : rownum + 1;
|
||||
ElemType* dataBuffer = us.NzValues();
|
||||
CPUSPARSE_INDEX_TYPE* unCompressedIndex = us.MajorIndexLocation();
|
||||
CPUSPARSE_INDEX_TYPE* compressedIndex = us.SecondaryIndexLocation();
|
||||
|
||||
// read in the sparse matrix info
|
||||
for (size_t i = 0; i < nz; ++i)
|
||||
{
|
||||
stream >> dataBuffer[i];
|
||||
}
|
||||
for (size_t i = 0; i < nz; ++i)
|
||||
{
|
||||
stream >> unCompressedIndex[i];
|
||||
}
|
||||
for (size_t i = 0; i < compressedSize; ++i)
|
||||
{
|
||||
stream >> compressedIndex[i];
|
||||
}
|
||||
}
|
||||
stream.GetMarker(fileMarkerEndSection, std::wstring(L"EMAT"));
|
||||
|
||||
us.SetMatrixName(matrixName.c_str());
|
||||
|
||||
return stream;
|
||||
}
|
||||
|
||||
template MATH_API File& operator>>(File& stream, CPUSparseMatrix<float>& us);
|
||||
template MATH_API File& operator>>(File& stream, CPUSparseMatrix<double>& us);
|
||||
|
||||
template <class ElemType>
|
||||
MATH_API File& operator<<(File& stream, const CPUSparseMatrix<ElemType>& us)
|
||||
{
|
||||
if (us.GetFormat() != matrixFormatSparseCSC && us.GetFormat() != matrixFormatSparseCSR)
|
||||
NOT_IMPLEMENTED;
|
||||
|
||||
stream.PutMarker(fileMarkerBeginSection, std::wstring(L"BMAT"));
|
||||
stream << sizeof(ElemType);
|
||||
if (us.GetMatrixName() == nullptr)
|
||||
{
|
||||
std::wstring s(L"nnmatrix");
|
||||
stream << s;
|
||||
}
|
||||
else
|
||||
{
|
||||
stream << us.GetMatrixName();
|
||||
}
|
||||
|
||||
size_t nz, numRows, numCols;
|
||||
size_t compressedSize = us.SecondaryIndexCount();
|
||||
int format = us.GetFormat();
|
||||
|
||||
stream << format << nz << numCols << numRows;
|
||||
|
||||
if (nz > 0)
|
||||
{
|
||||
ElemType* dataBuffer = us.NzValues();
|
||||
CPUSPARSE_INDEX_TYPE* unCompressedIndex = us.MajorIndexLocation();
|
||||
CPUSPARSE_INDEX_TYPE* compressedIndex = us.SecondaryIndexLocation();
|
||||
|
||||
for (size_t i = 0; i < nz; ++i)
|
||||
{
|
||||
stream << dataBuffer[i];
|
||||
}
|
||||
for (size_t i = 0; i < nz; ++i)
|
||||
{
|
||||
stream << unCompressedIndex[i];
|
||||
}
|
||||
for (size_t i = 0; i < compressedSize; ++i)
|
||||
{
|
||||
stream << compressedIndex[i];
|
||||
}
|
||||
}
|
||||
stream.PutMarker(fileMarkerEndSection, std::wstring(L"EMAT"));
|
||||
|
||||
return stream;
|
||||
}
|
||||
|
||||
template class CPUSparseMatrix<float>;
|
||||
template class CPUSparseMatrix<double>;
|
||||
|
||||
}}}
|
||||
>>>>>>> origin/master
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,5 +1,4 @@
|
|||
<<<<<<< HEAD
|
||||
//
|
||||
//
|
||||
// <copyright file="GPUSparseMatrix.h" company="Microsoft">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
|
@ -24,8 +23,27 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
class MATH_API GPUSparseMatrix : public BaseMatrix<ElemType>
|
||||
{
|
||||
typedef BaseMatrix<ElemType> B; using B::m_numRows; using B::m_numCols; using B::m_pArray; using B::m_elemSizeAllocated; using B::m_nz; using B::m_format; // without this, base members would require to use thi-> in GCC
|
||||
|
||||
public:
|
||||
typedef BaseMatrix<ElemType> B;
|
||||
using B::m_numRows;
|
||||
using B::m_numCols;
|
||||
using B::m_pArray;
|
||||
using B::m_elemSizeAllocated;
|
||||
using B::m_nz;
|
||||
using B::m_format;
|
||||
using B::m_computeDevice;
|
||||
using B::m_externalBuffer;
|
||||
using B::m_matrixName;
|
||||
using B::OwnBuffer;
|
||||
using B::GetFormat;
|
||||
using B::SetFormat;
|
||||
using B::GetNumRows;
|
||||
using B::GetNumCols;
|
||||
using B::IsEmpty;
|
||||
using B::SetComputeDeviceId;
|
||||
using B::SetMatrixName;
|
||||
using B::SetNzCount;
|
||||
// without this, base members would require to use thi-> in GCC
|
||||
public:
|
||||
GPUSparseMatrix(const size_t numRows, const size_t numCols, const size_t numNZ, const MatrixFormat matrixFormat = MatrixFormat::matrixFormatSparseCSR, const DEVICEID_TYPE computeDevice = AUTOPLACEMATRIX);
|
||||
|
||||
|
@ -264,270 +282,3 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
};
|
||||
}}}
|
||||
|
||||
=======
|
||||
//
|
||||
// <copyright file="GPUSparseMatrix.h" company="Microsoft">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "GPUMatrix.h"
|
||||
#include "CPUSparseMatrix.h"
|
||||
#include <functional>
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
//GPU Sparse Matrix, using cuSPARSE library.
|
||||
//By default we are assuming CSR representation
|
||||
// NOTE m_elemSizeAllocated (in base matrix) means the number of non-zero elements we have allocated space
|
||||
// We are packing the CSR format (pointed to by m_pArray) as follows:
|
||||
// ElemType elements[m_elemSizeAllocated]
|
||||
// int colIdx[m_elemSizeAllocated]
|
||||
// int rowIdxStart[m_numRows+1]
|
||||
|
||||
template<class ElemType>
|
||||
class MATH_API GPUSparseMatrix : public BaseMatrix<ElemType>
|
||||
{
|
||||
typedef BaseMatrix<ElemType> B; using B::m_numRows; using B::m_numCols; using B::m_pArray; using B::m_elemSizeAllocated; using B::m_nz; using B::m_format; // without this, base members would require to use thi-> in GCC
|
||||
|
||||
public:
|
||||
GPUSparseMatrix(const size_t numRows, const size_t numCols, const size_t numNZ, const MatrixFormat matrixFormat = MatrixFormat::matrixFormatSparseCSR, const DEVICEID_TYPE computeDevice = AUTOPLACEMATRIX);
|
||||
|
||||
GPUSparseMatrix(const MatrixFormat matrixFormat = MatrixFormat::matrixFormatSparseCSR,
|
||||
const DEVICEID_TYPE computeDevice = AUTOPLACEMATRIX);
|
||||
|
||||
GPUSparseMatrix(const GPUSparseMatrix<ElemType>&);
|
||||
|
||||
GPUSparseMatrix(const GPUMatrix<ElemType>&, const MatrixFormat matrixFormat = MatrixFormat::matrixFormatSparseCSR);
|
||||
|
||||
#ifndef LINUX
|
||||
GPUSparseMatrix(GPUSparseMatrix<ElemType>&&);
|
||||
#endif /* LINUX */
|
||||
|
||||
~GPUSparseMatrix();
|
||||
|
||||
public:
|
||||
void Reset();
|
||||
|
||||
public:
|
||||
// return col pointer, which is immediately following the non-zero element
|
||||
// in memory format is always in the following order:
|
||||
// Non-zero data elements, Full index locations, compressed index locations
|
||||
// In CSR row data is compressed, in CSC col data is compressed
|
||||
inline const ElemType* NzValues() const {return m_pArray;}
|
||||
inline ElemType* NzValues() {return m_pArray;}
|
||||
inline size_t NzSize() const {return sizeof(ElemType)*m_nz;} // actual number of element bytes in use
|
||||
|
||||
GPUSPARSE_INDEX_TYPE* MajorIndexLocation() const { return (GPUSPARSE_INDEX_TYPE*)(m_pArray + m_elemSizeAllocated); } //this is the major index, row/col ids in CSC/CSR format
|
||||
size_t MajorIndexCount() const { return m_nz; }
|
||||
size_t MajorIndexSize() const { return sizeof(GPUSPARSE_INDEX_TYPE)*MajorIndexCount(); } // actual number of major index bytes in use
|
||||
|
||||
GPUSPARSE_INDEX_TYPE* SecondaryIndexLocation() const { return MajorIndexLocation() + m_elemSizeAllocated; } //this is the compressed index, col/row in CSC/CSR format
|
||||
size_t SecondaryIndexCount(const size_t numNZ) const
|
||||
{
|
||||
if (m_format&matrixFormatCompressed)
|
||||
{
|
||||
size_t cnt = (m_format&matrixFormatRowMajor)?m_numRows:m_numCols;
|
||||
if (cnt > 0) cnt++; // add an extra element on the end for the "max" value
|
||||
return cnt;
|
||||
}
|
||||
else
|
||||
return numNZ; // COO format
|
||||
}
|
||||
|
||||
size_t SecondaryIndexCount() const
|
||||
{
|
||||
return SecondaryIndexCount(m_nz);
|
||||
}
|
||||
|
||||
// get size for compressed index
|
||||
size_t SecondaryIndexSize() const { return (SecondaryIndexCount())*sizeof(GPUSPARSE_INDEX_TYPE); }
|
||||
|
||||
size_t BufferSizeNeeded() const { return NzSize() + MajorIndexSize() + SecondaryIndexSize(); }
|
||||
size_t BufferSizeNeeded(const size_t numNZ) const
|
||||
{ return sizeof(ElemType)*numNZ + sizeof(GPUSPARSE_INDEX_TYPE)*(numNZ + SecondaryIndexCount(numNZ)); }
|
||||
|
||||
inline size_t BufferSizeAllocated() const { return m_totalBufferSizeAllocated; }
|
||||
inline ElemType* BufferPointer() const { return m_pArray; }
|
||||
|
||||
// the column and row locations will swap based on what format we are in. Full index always follows the data array
|
||||
GPUSPARSE_INDEX_TYPE* RowLocation() const { return (m_format&matrixFormatRowMajor) ? SecondaryIndexLocation() : MajorIndexLocation(); }
|
||||
size_t RowSize() const {return (m_format&matrixFormatRowMajor)?SecondaryIndexSize():MajorIndexSize();}
|
||||
GPUSPARSE_INDEX_TYPE* ColLocation() const { return (m_format&matrixFormatRowMajor) ? MajorIndexLocation() : SecondaryIndexLocation(); }
|
||||
size_t ColSize() const {return (m_format&matrixFormatRowMajor)?MajorIndexSize():SecondaryIndexSize();} // actual number of bytes in use
|
||||
|
||||
void SetValue(const GPUSparseMatrix<ElemType>& deepCopyFrom);
|
||||
void SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom);
|
||||
void SetValue(const GPUMatrix<ElemType>& denseMatrix, const MatrixFormat matrixFormat);
|
||||
void SetValue(const GPUMatrix<ElemType>& denseMatrix);
|
||||
|
||||
void ResizeAsAndCopyIndexFrom(const GPUSparseMatrix<ElemType>& a, const bool growOnly = true);
|
||||
void Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat matrixFormat, const bool growOnly = true); //matrix format will affect the size to allocate
|
||||
void Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const bool growOnly = true);
|
||||
|
||||
GPUSparseMatrix<ElemType> Transpose() const;
|
||||
void InplaceTranspose();
|
||||
GPUSparseMatrix<ElemType>& AssignTransposeOf(const GPUSparseMatrix<ElemType>& a);
|
||||
|
||||
GPUMatrix<ElemType> CopyToDenseMatrix() const;
|
||||
void CopyToDenseMatrix(GPUMatrix<ElemType> &denseMatrix) const;
|
||||
void CopyToCPUSparseMatrix(CPUSparseMatrix<ElemType> &cpuSparseMatrix) const;
|
||||
void ChangeDeviceTo(DEVICEID_TYPE toId);
|
||||
|
||||
GPUSparseMatrix<ElemType>& operator=(const GPUSparseMatrix<ElemType>& deepCopy);
|
||||
#ifndef LINUX
|
||||
GPUSparseMatrix<ElemType>& operator=(GPUSparseMatrix<ElemType>&& moveFrom);
|
||||
#endif /* LINUX */
|
||||
GPUSparseMatrix<ElemType> operator+ (const GPUSparseMatrix<ElemType>& a) const;
|
||||
GPUSparseMatrix<ElemType> operator- (const GPUSparseMatrix<ElemType>& a) const;
|
||||
GPUSparseMatrix<ElemType>& operator^= (const ElemType alpha); //element-wise power
|
||||
GPUSparseMatrix<ElemType> operator^ (const ElemType alpha) const; //element-wise power
|
||||
GPUSparseMatrix<ElemType>& operator*= (const ElemType alpha);
|
||||
GPUSparseMatrix<ElemType> operator*(const ElemType alpha) const;
|
||||
GPUSparseMatrix<ElemType>& AssignElementPowerOf(const GPUSparseMatrix<ElemType>& a, const ElemType power);
|
||||
|
||||
bool IsEqualTo(const GPUSparseMatrix<ElemType>& a, const ElemType threshold = 1e-8) const;
|
||||
bool IsEqualTo(const GPUMatrix<ElemType>& a, const ElemType threshold = 1e-8) const;
|
||||
public:
|
||||
virtual DEVICEID_TYPE GetComputeDeviceId(void) const;
|
||||
inline size_t GetNumNZElements() const {return m_nz;}
|
||||
|
||||
//Sets sparse matrix in CSR format. this acts as deep copy
|
||||
void SetMatrixFromCSRFormat(const GPUSPARSE_INDEX_TYPE *h_CSRRow, const GPUSPARSE_INDEX_TYPE *h_Col, const ElemType *h_Val,
|
||||
const size_t nz, const size_t numRows, const size_t numCols, const bool IsOnDevice = false, const DEVICEID_TYPE devId = -1);
|
||||
void SetMatrixFromCSCFormat(const GPUSPARSE_INDEX_TYPE *h_CSCCol, const GPUSPARSE_INDEX_TYPE *h_Row, const ElemType *h_Val,
|
||||
const size_t nz, const size_t numRows, const size_t numCols, const bool IsOnDevice = false, const DEVICEID_TYPE devId = -1);
|
||||
void SetMatrixFromLabelAndClass(CPUSPARSE_INDEX_TYPE *h_row, size_t *h_block2Id, size_t *h_block2UniqId, size_t labelSize, size_t expandedSize, size_t blockSize);
|
||||
//Gets sparse matrix in CSR format. this acts as deep copy. All passed pointers must be NULL. the function will allocate memory itself.
|
||||
void GetMatrixFromCSRFormat(GPUSPARSE_INDEX_TYPE*& h_CSRRow, GPUSPARSE_INDEX_TYPE*& h_Col, ElemType*& h_Val, size_t &nz, size_t &numRows, size_t &numCols) const;
|
||||
|
||||
void GetMatrixFromCSCFormat(GPUSPARSE_INDEX_TYPE*& h_CSCCol, GPUSPARSE_INDEX_TYPE*& h_Row, ElemType*& h_Val, size_t &nz, size_t &numRows, size_t &numCols) const;
|
||||
|
||||
void ConvertToSparseFormat(MatrixFormat newFormat);
|
||||
void ConvertToSparseFormat(MatrixFormat newFormat, GPUSparseMatrix<ElemType>& outMatrix) const;
|
||||
|
||||
public:
|
||||
GPUSparseMatrix<ElemType>& ElementInverse ();
|
||||
GPUSparseMatrix<ElemType>& AssignElementInverseOf (const GPUSparseMatrix<ElemType>& a);
|
||||
|
||||
GPUSparseMatrix<ElemType>& InplaceLinearRectifierDerivative();
|
||||
GPUSparseMatrix<ElemType>& AssignLinearRectifierDerivativeOf (const GPUSparseMatrix<ElemType>& a);
|
||||
|
||||
GPUSparseMatrix<ElemType>& InplaceSigmoid ();
|
||||
GPUSparseMatrix<ElemType>& AssignSigmoidOf (const GPUSparseMatrix<ElemType>& a);
|
||||
|
||||
GPUSparseMatrix<ElemType>& InplaceTanh ();
|
||||
GPUSparseMatrix<ElemType>& AssignTanhOf (const GPUSparseMatrix<ElemType>& a);
|
||||
|
||||
GPUSparseMatrix<ElemType>& InplaceSqrt ();
|
||||
GPUSparseMatrix<ElemType>& AssignSqrtOf (const GPUSparseMatrix<ElemType>& a);
|
||||
|
||||
GPUSparseMatrix<ElemType>& InplaceExp ();
|
||||
GPUSparseMatrix<ElemType>& AssignExpOf (const GPUSparseMatrix<ElemType>& a);
|
||||
|
||||
GPUSparseMatrix<ElemType>& InplaceLog ();
|
||||
GPUSparseMatrix<ElemType>& AssignLogOf (const GPUSparseMatrix<ElemType>& a);
|
||||
|
||||
GPUSparseMatrix<ElemType>& InplaceAbs ();
|
||||
GPUSparseMatrix<ElemType>& AssignAbsOf (const GPUSparseMatrix<ElemType>& a);
|
||||
|
||||
GPUSparseMatrix<ElemType>& InplaceTruncate (const ElemType threshold);
|
||||
|
||||
GPUSparseMatrix<ElemType>& InplaceTruncateBottom (const ElemType threshold);
|
||||
GPUSparseMatrix<ElemType>& AssignTruncateBottomOf (const GPUSparseMatrix<ElemType>& a, const ElemType threshold);
|
||||
GPUSparseMatrix<ElemType>& InplaceTruncateTop (const ElemType threshold);
|
||||
GPUSparseMatrix<ElemType>& AssignTruncateTopOf (const GPUSparseMatrix<ElemType>& a, const ElemType threshold);
|
||||
|
||||
GPUSparseMatrix<ElemType>& SetToZeroIfAbsLessThan (const ElemType threshold);
|
||||
|
||||
ElemType SumOfElements () const; //sum of all elements
|
||||
ElemType SumOfAbsElements () const; //sum of all abs(elements)
|
||||
ElemType FrobeniusNorm() const;
|
||||
ElemType MatrixNormInf() const;
|
||||
ElemType MatrixNorm1() const;
|
||||
ElemType MatrixNorm0() const { return (ElemType)GetNumNZElements(); };
|
||||
public:
|
||||
//Performs C = alpha ∗ op ( S ) ∗ D + beta ∗ C; Where S is sparse and D and C are dense
|
||||
static void MultiplyAndWeightedAdd(ElemType alpha, const GPUMatrix<ElemType>& a, const bool transposeA, const GPUSparseMatrix<ElemType>& b,
|
||||
const bool transposeB, ElemType beta, GPUMatrix<ElemType>& c);
|
||||
static void MultiplyAndWeightedAdd(ElemType alpha, const GPUSparseMatrix<ElemType>& S, const bool transposeS, const GPUMatrix<ElemType>& D,
|
||||
const bool transposeD, ElemType beta, GPUMatrix<ElemType>& C);
|
||||
static void MultiplyAndAdd(ElemType alpha, const GPUMatrix<ElemType>& lhs, const bool transposeA, const GPUSparseMatrix<ElemType>& rhs,
|
||||
const bool transposeB, GPUSparseMatrix<ElemType>& c);
|
||||
static void ScaleAndAdd(const ElemType alpha, const GPUSparseMatrix<ElemType>& lhs, GPUMatrix<ElemType>& c);
|
||||
|
||||
static void ClassEntropy(const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& weight,
|
||||
const GPUSparseMatrix<ElemType> & label, const GPUMatrix<ElemType>& cls,
|
||||
const GPUMatrix<ElemType>& idx2cls, GPUSparseMatrix<ElemType>& etp, GPUMatrix<ElemType>& entropyScore);
|
||||
static void ClassEntropyError(GPUSparseMatrix<ElemType>& a);
|
||||
static void ClassEntropyGradientOfInput(const GPUSparseMatrix<ElemType>& error, const GPUMatrix<ElemType>& weight, GPUMatrix<ElemType>& grd);
|
||||
static void ClassEntropyGradientOfWeight(const GPUSparseMatrix<ElemType>& error, const GPUMatrix<ElemType>& input, const GPUSparseMatrix<ElemType> & label, const GPUMatrix<ElemType>& cls,
|
||||
const GPUMatrix<ElemType>& idx2cls, GPUSparseMatrix<ElemType>& grd);
|
||||
|
||||
void NormalGrad(GPUMatrix<ElemType>& c, const ElemType momentum);
|
||||
|
||||
static void Multiply(const GPUSparseMatrix<ElemType>& S, const GPUMatrix<ElemType>& D, GPUMatrix<ElemType>& C);
|
||||
static void Multiply(const GPUMatrix<ElemType>& D, const GPUSparseMatrix<ElemType>& S, GPUMatrix<ElemType>& C);
|
||||
static void Multiply(const GPUSparseMatrix<ElemType>& S1, bool transposeS1, const GPUSparseMatrix<ElemType>& S2, bool transposeS2, GPUSparseMatrix<ElemType> &C);
|
||||
GPUSparseMatrix<ElemType>& AssignProductOf(const GPUSparseMatrix<ElemType>& a, const bool transposeA, const GPUSparseMatrix<ElemType>& b, const bool transposeB);
|
||||
|
||||
static ElemType InnerProductOfMatrices(const GPUSparseMatrix<ElemType>& a, const GPUMatrix<ElemType>& b);
|
||||
static ElemType InnerProductOfMatrices(const GPUMatrix<ElemType>& a, const GPUSparseMatrix<ElemType>& b);
|
||||
static void ScaleAndAdd(ElemType alpha,const GPUSparseMatrix<ElemType>& a, ElemType beta, const GPUSparseMatrix<ElemType>& b, GPUSparseMatrix<ElemType>& c);
|
||||
static void ScaleAndAdd(ElemType alpha,const GPUSparseMatrix<ElemType>& a, ElemType beta, const GPUMatrix<ElemType>& b, GPUMatrix<ElemType>& c);
|
||||
static void ScaleAndAdd(ElemType alpha,const GPUMatrix<ElemType>& a, ElemType beta, const GPUSparseMatrix<ElemType>& b, GPUMatrix<ElemType>& c);
|
||||
static void Scale(ElemType alpha, GPUSparseMatrix<ElemType>& a);
|
||||
static void ElementWisePower (ElemType alpha, const GPUSparseMatrix<ElemType>& a, GPUSparseMatrix<ElemType>& c);
|
||||
static bool AreEqual(const GPUSparseMatrix<ElemType>& a, const GPUSparseMatrix<ElemType>& b, const ElemType threshold = 1e-8);
|
||||
static bool AreEqual(const GPUSparseMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, const ElemType threshold = 1e-8);
|
||||
static bool AreEqual(const GPUMatrix<ElemType>& a, const GPUSparseMatrix<ElemType>& b, const ElemType threshold = 1e-8);
|
||||
|
||||
//For these two, I should also add a version which would return GPUSparseMatrix, since Dense.*Sparse =Sparse.*Dense=Sparse
|
||||
static GPUMatrix<ElemType> ElementProductOf (const GPUSparseMatrix<ElemType>& a, const GPUMatrix<ElemType>& b);
|
||||
static GPUMatrix<ElemType> ElementProductOf (const GPUMatrix<ElemType>& a, const GPUSparseMatrix<ElemType>& b);
|
||||
|
||||
public:
|
||||
// See: http://stackoverflow.com/questions/4660123/overloading-friend-operator-for-template-class/4661372#4661372
|
||||
template <class ElemTypeDummy>
|
||||
friend MATH_API File& operator>>(File& stream, GPUSparseMatrix<ElemTypeDummy>& us);
|
||||
template <class ElemTypeDummy>
|
||||
friend MATH_API File& operator<<(File& stream, const GPUSparseMatrix<ElemTypeDummy>& us);
|
||||
|
||||
private:
|
||||
void* ReserveTempHostBuffer(const size_t sizeInByte) const;
|
||||
template <class OutType, class InType>
|
||||
static void CopyBuffer(OutType * outBuffer, const InType * inBuffer, const size_t size);
|
||||
private:
|
||||
void ZeroInit(const MatrixFormat matrixFormat, const DEVICEID_TYPE deviceId);
|
||||
|
||||
private:
|
||||
void performInplaceFunction(const int kind);
|
||||
void DeepCopy(const GPUSparseMatrix<ElemType>& deepCopyFrom);
|
||||
void Clear();
|
||||
void PrepareBuffer(const size_t numRows, const size_t numCols, const bool canReuseBuffer, std::function<size_t(GPUSPARSE_INDEX_TYPE* csrRowPtrC)> func);
|
||||
size_t ElemCountFromBufferSize(const size_t totalBufferSize) const;
|
||||
size_t ElemCountFromBufferSize() const;
|
||||
DEVICEID_TYPE PrepareDevice(const DEVICEID_TYPE deviceId = -1) const;
|
||||
|
||||
private:
|
||||
|
||||
size_t m_totalBufferSizeAllocated;
|
||||
|
||||
size_t m_blockSize; //block size
|
||||
size_t *m_blockIds; //block ids
|
||||
size_t *m_rowToId; //the id showing the order row number is observed in the nnz values.
|
||||
|
||||
size_t m_expandedSize; // expanded label size
|
||||
size_t* m_block2Id; // label block id to first word location
|
||||
size_t* m_block2UniqId; // label block id to unique first word location
|
||||
|
||||
mutable void* m_tempHostBuffer; //used to copy values.
|
||||
mutable size_t m_tempHostBufferSize;
|
||||
|
||||
static bool do_sync;
|
||||
};
|
||||
}}}
|
||||
|
||||
>>>>>>> origin/master
|
||||
|
|
Загрузка…
Ссылка в новой задаче