(added logging to calgammaformb() to track down an error)

This commit is contained in:
Frank Seide 2015-12-14 15:40:50 -08:00
Родитель f4a91555cf
Коммит 7a57f8b21b
2 изменённых файлов: 61 добавлений и 20 удалений

Просмотреть файл

@ -25,6 +25,45 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// which represents the column-major interpretation of a transposed row-by-row-scanned image where each pixel stores (R,G,B) as a float3.
// -----------------------------------------------------------------------
// Plans for improved tensor support:
//
// TensorShape support for:
// - column-major arbitrary-dimension arrays --this is already implemented
// - strides for storage, allowing
// - slicing
// - strides for computation, allowing
// - broadcasting (stride = 0)
// - stride magic such as inverting index order or convolution
// - insertion and dropping of 1-dimension (cf. 'new_axis' in numpy)
//
// Relation to Matrix and MBLayout:
// - tensors are stored in Matrix objects
// - both matrix row and column dimensions are interpreted as tensor dimensions
// - row dimension is explained by a TensorShape ComputationNode::SampleLayout
// - column dimensions are explained by MBLayout, which has one parallel-sequence index and one (or more) time-step dimensions, e.g. (s,t)
// - the total tensor shape of what is stored in the matrix is
// - no MBLayout: the SampleLayout
// - in presence of an MBLayout, it is determined as
// - when applying element-wise operations, first expand all operands to the same SampleLayout length by padding with 1-dimensions
// - concatenate that shape, say, (I,J,K) with the shape derived from the MBLayout, say (S,T) -> (I,J,K,S,T)
// - these extra dimensions are only used internally, but not accessible to the user (user/network definition operates on samples only)
// - examples:
// - A[(I,J,K), (S,T)] + B[(I,J,K), (S,T)] -> C[I,J,K,S,T] // all dimensions match
// - A[(I,J), (S,T)] + B[(I,J,K), (S,T)] -> C[I,J,K,S,T] // A gets an additional broadcasting dimension that matches K
// - A(I,T) + B(I) -> C(I,T) // T is broadcasting for B, e.g. adding a bias
// - A(I,T1,T2) + B(1,T1) -> C(I,T1,T2) // 2D iteration; implies a third dim for B where both first and third dim broadcast
//
// Operations:
// - all elementwise operations:
// - dimensions are expanded as explained above for all operands
// - of note: result may also have broadcasting dimensions
// - elementwise 'copy' is also considered here, which allows for strided copies
// - inner product (Kronecker product+contraction) -> TimesNode
// - implementable as SGEMM (may extend in the future)
// - tensor transpose -> TransposeNode
// - swaps any two dimensions. This does not change the column-major definition, i.e. requires a memory copy.
// - special case: swapping between sample and MBLayout, e.g. turn a sample dimension to a time dimension
// TODO: must match ComputationNode::m_numRows; or, rather, the TensorShape is how m_numRows is stored??
struct TensorShape
{

Просмотреть файл

@ -55,14 +55,14 @@ namespace msra { namespace lattices {
//check total frame number to be added ?
//int deviceid = loglikelihood.GetDeviceId();
size_t boundaryframenum;
std::vector<size_t> validframes;
std::vector<size_t> validframes; // [s] cursor pointing to next utterance begin within a single parallel sequence [s]
validframes.assign(samplesInRecurrentStep, 0);
ElemType objectValue = 0.0;
//convert from Microsoft::MSR::CNTK::Matrix to msra::math::ssematrixbase
size_t numrows = loglikelihood.GetNumRows();
size_t numcols = loglikelihood.GetNumCols();
Microsoft::MSR::CNTK::Matrix<ElemType> tempmatrix(m_deviceid);
//copy loglikelihood to pred
if (numcols > pred.cols())
{
@ -72,19 +72,17 @@ namespace msra { namespace lattices {
if (doreferencealign)
labels.SetValue((ElemType)(0.0f));
size_t mbsize = numcols / samplesInRecurrentStep;
size_t T = numcols / samplesInRecurrentStep; // number of time steps in minibatch
if (samplesInRecurrentStep > 1)
{
assert(extrauttmap.size() == lattices.size());
assert(mbsize == pMBLayout->GetNumTimeSteps());
assert(T == pMBLayout->GetNumTimeSteps());
}
size_t mapi = 0;
size_t mapframenum = 0;
//cal gamma for each utterance
size_t mapi = 0; // parallel-sequence index for utterance [i]
// cal gamma for each utterance
size_t ts = 0;
//size_t ts_uid = 0;
for (size_t i = 0; i < lattices.size(); i++)
{
const size_t numframes = lattices[i]->getnumframes();
@ -92,8 +90,7 @@ namespace msra { namespace lattices {
msra::dbn::matrixstripe predstripe(pred, ts, numframes); // logLLs for this utterance
msra::dbn::matrixstripe dengammasstripe(dengammas, ts, numframes); // denominator gammas
if (samplesInRecurrentStep == 1) //one channel
if (samplesInRecurrentStep == 1) // no sequence parallelism
{
tempmatrix = loglikelihood.ColumnSlice(ts, numframes);
//if (m_deviceid == CPUDEVICE)
@ -104,21 +101,26 @@ namespace msra { namespace lattices {
if (m_deviceid != CPUDEVICE)
parallellattice.setloglls(tempmatrix);
}
else //multi channel
else // multiple parallel sequences
{
//get frame number for each utterance
mapi = extrauttmap[i];
for (size_t j = validframes[mapi]; j < mbsize; j++)
// get number of frames for the utterance
mapi = extrauttmap[i]; // parallel-sequence index; in case of >1 utterance within this parallel sequence, this is in order of concatenation
// scan MBLayout for end of utterance
size_t mapframenum = SIZE_MAX; // duration of utterance [i] as determined from MBLayout
for (size_t t = validframes[mapi]; t < T; t++)
{
// TODO: Adapt this to new MBLayout, m_sequences would be easier to work off.
if (pMBLayout->IsEnd(mapi,j))
if (pMBLayout->IsEnd(mapi,t))
{
mapframenum = j - validframes[mapi] + 1;
mapframenum = t - validframes[mapi] + 1;
break;
}
}
// must match the explicit information we get from the reader
if (numframes != mapframenum)
LogicError("gammacalculation: IsEnd() not working, numframes (%d) vs. mapframenum (%d)", (int)numframes, (int)mapframenum);
assert(numframes == mapframenum);
if (numframes > tempmatrix.GetNumCols())
@ -195,7 +197,7 @@ namespace msra { namespace lattices {
}
}
if (samplesInRecurrentStep > 1)
validframes[mapi] += numframes;
validframes[mapi] += numframes; // advance the cursor within the parallel sequence
fprintf(stderr, "dengamma value %f\n", denavlogp);
ts += numframes;
}