CNTK/Source/Readers/Kaldi2Reader/UtteranceDerivativeBuffer.cpp

#include "basetypes.h"
#include "htkfeatio_utils.h"
#include "UtteranceDerivativeBuffer.h"

namespace Microsoft { namespace MSR { namespace CNTK {

// Constructor.
template <class ElemType>
UtteranceDerivativeBuffer<ElemType>::UtteranceDerivativeBuffer(
    size_t numberOfuttsPerMinibatch,
    UtteranceDerivativeComputationInterface<ElemType>* derivativeInterface)
{
    assert(derivativeInterface != NULL);
    m_derivativeInterface = derivativeInterface;
    m_numUttsPerMinibatch = numberOfuttsPerMinibatch;
    m_needLikelihood = true;
    m_currentObj = 0;
    m_uttReady.assign(m_numUttsPerMinibatch, false);
    m_epochEnd = false;
    m_dimension = 0;
}

template <class ElemType>
void UtteranceDerivativeBuffer<ElemType>::ProcessUttInfo(
    const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
    const MBLayoutPtr pMBLayout,
    std::vector<std::vector<std::pair<
        wstring, std::pair<size_t, size_t>>>>* uttInfoInMinibatch) const
{
    assert(uttInfoInMinibatch != NULL);
    assert(uttInfo.size() == m_numUttsPerMinibatch);
    assert(pMBLayout->GetNumParallelSequences() == m_numUttsPerMinibatch);
    uttInfoInMinibatch->clear();
    uttInfoInMinibatch->resize(uttInfo.size());

    for (size_t i = 0; i < uttInfo.size(); ++i)
    {
        size_t startFrameIndexInMinibatch = 0;
        size_t numFrames = 0;

        for (size_t j = 0; j < pMBLayout->GetNumTimeSteps(); ++j)
        {
            /*  if (pMBLayout->Is(i, j, MinibatchPackingFlags::NoLabel))
                {
                    continue;
                }*/
            FrameRange fr(pMBLayout, j);

            if (pMBLayout->IsGap(fr.Sequence(i)))
            {
                continue;
            }
            numFrames += 1;
            if (pMBLayout->IsBeyondStartOrEnd(fr.WithTimeOffset((ptrdiff_t) 1).Sequence(i)) || j == pMBLayout->GetNumTimeSteps() - 1)
            {
                size_t uttIndex = (*uttInfoInMinibatch)[i].size();
                wstring uttID = uttInfo[i][uttIndex].first;
                (*uttInfoInMinibatch)[i].push_back(
                    make_pair(uttID, make_pair(startFrameIndexInMinibatch,
                                               numFrames)));
                startFrameIndexInMinibatch = j + 1;
                numFrames = 0;
            }
        }
        assert(uttInfo[i].size() == (*uttInfoInMinibatch)[i].size());
    }
}

// Suppose we have a, b, c 3 streams, the <logLikelihoodIn> is the in the
// following format:
// 1: a11 b11 c11 a12 b12 c12...
// 2: a21 b21 c21 a22 b22 c22...
// 3: a31 b31 c31 a32 b32 c32...
template <class ElemType>
bool UtteranceDerivativeBuffer<ElemType>::SetLikelihood(
    const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
    const Matrix<ElemType>& logLikelihoodIn,
    const MBLayoutPtr pMBLayout)
{
    assert(m_needLikelihood == true);
    assert(m_epochEnd == false);

    if (m_dimension == 0)
    {
        m_dimension = logLikelihoodIn.GetNumRows();
    }
    assert(m_dimension == logLikelihoodIn.GetNumRows());

    std::vector<std::vector<
        std::pair<wstring, std::pair<size_t, size_t>>>> uttInfoInMinibatch;
    ProcessUttInfo(uttInfo, pMBLayout, &uttInfoInMinibatch);

    // Checks if we need to move data to CPU.
    Matrix<ElemType> logLikelihood = logLikelihoodIn.DeepClone();
    if (logLikelihood.GetDeviceId() >= 0)
    {
        logLikelihood.TransferFromDeviceToDevice(
            logLikelihood.GetDeviceId(), CPUDEVICE, true, false, false);
    }

    size_t currentMBSize = pMBLayout->GetNumTimeSteps();
    for (size_t i = 0; i < uttInfo.size(); ++i)
    {
        assert(uttInfo[i].size() == uttInfoInMinibatch[i].size());
        for (size_t j = 0; j < uttInfo[i].size(); ++j)
        {
            wstring uttID = uttInfo[i][j].first;
            if (m_uttPool.find(uttID) == m_uttPool.end())
            {
                UtteranceDerivativeUnit tmpUttUnit;
                tmpUttUnit.hasDerivative = false;
                tmpUttUnit.uttLength = uttInfo[i][j].second;
                tmpUttUnit.progress = 0;
                tmpUttUnit.streamID = i;
                tmpUttUnit.logLikelihood.Resize(logLikelihood.GetNumRows(),
                                                tmpUttUnit.uttLength);
                m_uttPool[uttID] = std::move(tmpUttUnit);
            }

            // Sets the likelihood and computes derivatives.
            assert(m_uttPool.find(uttID) != m_uttPool.end());
            if (m_uttPool[uttID].hasDerivative == false)
            {
                assert(uttID == uttInfoInMinibatch[i][j].first);
                size_t startFrame = uttInfoInMinibatch[i][j].second.first;
                size_t numFrames = uttInfoInMinibatch[i][j].second.second;
                assert(m_uttPool[uttID].progress + numFrames <= m_uttPool[uttID].uttLength);

                // Sets the likelihood.
                for (size_t k = 0; k < numFrames; ++k)
                {
                    m_uttPool[uttID].logLikelihood.SetColumn(
                        logLikelihood.ColumnSlice(
                            (startFrame + k) * m_numUttsPerMinibatch + i, 1),
                        m_uttPool[uttID].progress + k);
                }

                m_uttPool[uttID].progress += numFrames;
                if (m_uttPool[uttID].progress == m_uttPool[uttID].uttLength)
                {
                    m_derivativeInterface->ComputeDerivative(
                        uttID,
                        m_uttPool[uttID].logLikelihood,
                        &m_uttPool[uttID].derivative,
                        &m_uttPool[uttID].objective);
                    m_uttPool[uttID].hasDerivative = true;
                    m_uttPool[uttID].progress = 0;
                    m_uttReady[m_uttPool[uttID].streamID] = true;
                }
            }
        }
    }

    // Checks if we are ready to provide derivatives.
    m_needLikelihood = false;
    for (size_t i = 0; i < m_uttReady.size(); ++i)
    {
        if (m_uttReady[i] == false)
        {
            m_needLikelihood = true;
            break;
        }
    }
}

// Suppose we have a, b, c 3 streams, the <derivativesOut> should be in the
// following format:
// 1: a11 b11 c11 a12 b12 c12...
// 2: a21 b21 c21 a22 b22 c22...
// 3: a31 b31 c31 a32 b32 c32...
template <class ElemType>
bool UtteranceDerivativeBuffer<ElemType>::GetDerivative(
    const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
    const MBLayoutPtr pMBLayout,
    Matrix<ElemType>* derivativesOut)
{
    assert(derivativesOut != NULL);
    assert(m_needLikelihood == false);
    std::vector<std::vector<
        std::pair<wstring, std::pair<size_t, size_t>>>> uttInfoInMinibatch;
    ProcessUttInfo(uttInfo, pMBLayout, &uttInfoInMinibatch);

    m_currentObj = 0;
    Matrix<ElemType> derivatives(CPUDEVICE);
    derivatives.Resize(m_dimension, pMBLayout->GetNumCols());
    for (size_t i = 0; i < uttInfo.size(); ++i)
    {
        assert(uttInfo[i].size() == uttInfoInMinibatch[i].size());
        for (size_t j = 0; j < uttInfo[i].size(); ++j)
        {
            wstring uttID = uttInfo[i][j].first;

            // Checks if we have derivatives.
            if (m_uttPool.find(uttID) == m_uttPool.end() || (m_uttPool.find(uttID) != m_uttPool.end() && m_uttPool[uttID].hasDerivative == false))
            {
                RuntimeError("Derivatives are not ready for utterance:"
                             " %S\n",
                             uttID.c_str());
            }

            // Assign the derivatives.
            assert(uttID == uttInfoInMinibatch[i][j].first);
            size_t startFrame = uttInfoInMinibatch[i][j].second.first;
            size_t startFrameInUtt = m_uttPool[uttID].progress;
            size_t numFrames = uttInfoInMinibatch[i][j].second.second;
            for (size_t k = 0; k < numFrames; ++k)
            {
                derivatives.SetColumn(
                    m_uttPool[uttID].derivative.ColumnSlice(
                        startFrameInUtt + k, 1),
                    (startFrame + k) * m_numUttsPerMinibatch + i);
            }
            m_currentObj += m_uttPool[uttID].objective * numFrames / m_uttPool[uttID].uttLength;
            m_uttPool[uttID].progress += numFrames;
            assert(m_uttPool[uttID].progress <= m_uttPool[uttID].uttLength);
            if (m_uttPool[uttID].progress == m_uttPool[uttID].uttLength)
            {
                m_uttPool.erase(uttID);
            }
        }
    }

    // Checks if we need to move data to GPU.
    if (derivativesOut->GetDeviceId() >= 0)
    {
        derivatives.TransferFromDeviceToDevice(
            CPUDEVICE, derivativesOut->GetDeviceId(), true, false, false);
    }
    derivativesOut->SetValue(derivatives);

    // Keeps the utterance information so we can check next time when we
    // gives the objectives.
    m_currentUttInfo = uttInfo;

    // Checks if we need to read more loglikelihoods.
    m_needLikelihood = (m_epochEnd || m_uttPool.size() > 0) ? false : true;
    if (m_needLikelihood == true)
    {
        m_uttReady.assign(m_numUttsPerMinibatch, false);
    }
    return true;
}

template <class ElemType>
bool UtteranceDerivativeBuffer<ElemType>::GetObjective(
    const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo,
    MBLayoutPtr pMBLayout,
    Matrix<ElemType>* objectivesIn)
{
    assert(objectivesIn != NULL);

    // Checks utterance information.
    bool match = CompareUttInfo(uttInfo, m_currentUttInfo);
    if (!match)
    {
        RuntimeError("Current objective does not correspond to the"
                     " minibatch utterance information, perhaps you did not"
                     " run GetObjective() right after GetDerivative()?");
    }

    // Sets the objectives...
    objectivesIn->Resize(1, pMBLayout->GetNumCols());
    objectivesIn->SetValue(
        m_currentObj / static_cast<ElemType>(pMBLayout->GetNumCols()));

    return true;
}

template <class ElemType>
bool UtteranceDerivativeBuffer<ElemType>::HasResourceForDerivative(
    const wstring& uttID) const
{
    return m_derivativeInterface->HasResourceForDerivative(uttID);
}

template <class ElemType>
bool UtteranceDerivativeBuffer<ElemType>::CompareUttInfo(
    const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo1,
    const std::vector<std::vector<std::pair<wstring, size_t>>>& uttInfo2)
{
    bool match = true;
    if (uttInfo1.size() == uttInfo2.size())
    {
        for (size_t i = 0; i < uttInfo1.size(); ++i)
        {
            if (uttInfo1[i].size() != uttInfo2[i].size())
            {
                match = false;
                break;
            }
            for (size_t j = 0; j < uttInfo1[i].size(); ++j)
            {
                if (uttInfo1[i][j].first != uttInfo2[i][j].first ||
                    uttInfo1[i][j].second != uttInfo2[i][j].second)
                {
                    match = false;
                    break;
                }
            }
        }
    }
    else
    {
        match = false;
    }
    return match;
}

template <class ElemType>
void UtteranceDerivativeBuffer<ElemType>::ResetEpoch()
{
    m_needLikelihood = true;
    m_currentObj = 0;
    m_epochEnd = false;
    m_uttPool.clear();
    m_currentUttInfo.clear();
    m_uttReady.assign(m_numUttsPerMinibatch, false);
}

template class UtteranceDerivativeBuffer<float>;
template class UtteranceDerivativeBuffer<double>;
} } }