Normalize files with mixed line endings (to DOS LE)

This commit is contained in:
Mark Hillebrand 2015-11-09 22:33:39 +01:00
Родитель a2f393270f
Коммит 403cc42b09
13 изменённых файлов: 157 добавлений и 157 удалений

Просмотреть файл

@ -43,7 +43,7 @@ namespace Microsoft { namespace MSR { namespace ScriptableObjects {
// - C++ primitives like 'double' -> wrap in a Wrapper first then in a BoxOf, e.g. Number = BoxOf<Wrapped<double>>
struct Object { virtual ~Object() { } };
// indicates that the object has a name should be set from the expression path
struct HasName { virtual void SetName(const wstring & name) = 0; };
@ -95,7 +95,7 @@ namespace Microsoft { namespace MSR { namespace ScriptableObjects {
// TODO: unify with ComputationNodeBase
// -----------------------------------------------------------------------
class ComputationNodeObject : public Object { }; // a base class for all nodes (that has no template parameter)
class ComputationNodeObject : public Object { }; // a base class for all nodes (that has no template parameter)
// -----------------------------------------------------------------------
// HasToString -- trait to indicate an object can print their content

Просмотреть файл

@ -567,97 +567,97 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// -----------------------------------------------------------------------
template<class ElemType>
static inline Matrix<ElemType> DataSliceWithMBLayout(Matrix<ElemType> & data,
const FrameRange & frameRange/*select frame or entire batch*/,
const MBLayoutPtr & pMBLayout/*the MB layout of 'data'*/)
{
// MBLayout of data and of FrameRange must be identical pointers,
// or in case of broadcasting, respective parent pointers.
// MBLayouts that are identical in content but not object identity (pointer) are not admissible.
// For those cases, use a ReconcileMBLayout node.
if (frameRange.m_pMBLayout != pMBLayout)
{
// if broadcast allowed then it is allowed to broadcast from an outer-loop value
// Currently, the only 'outer' loop we have is to have no layout.
if (frameRange.m_broadcastAllowed && !pMBLayout && data.GetNumCols() == 1)
return data.AsReference();
if (frameRange.m_pMBLayout && pMBLayout && *frameRange.m_pMBLayout == *pMBLayout)
LogicError("DataSlice: frameRange's MBLayout inconsistent with matrix. They are compatible though--are you missing a ReconcileMBLayout operation?");
else
LogicError("DataSlice: frameRange's MBLayout inconsistent with matrix");
}
// if FrameRange refers to whole minibatch (map mode)
// or if we don't even have a layout
// then return the whole matrix
// but as a reference (e.g. it cannot be resized)
if (!pMBLayout || frameRange.IsAllFrames())
{
if (frameRange.seqIndex == SIZE_MAX)
return data.AsReference();
else
{
if (!pMBLayout)
LogicError("DataSlice: Attempting to retrieve a parallel sequence from data without layout.");
#if 1
else
LogicError("DataSlice: To retrieve a parallel sequence, implement Matrix::RowSlice() first!");
#else
// get a reshaped view that stacks all sequences into T long vectors
auto mat = data.ColumnSlice(0, data.GetNumCols());
mat.Resize(data.GetNumRows() * pMBLayout->GetNumParallelSequences(), data.GetNumRows() / pMBLayout->GetNumParallelSequences());
return mat; // .RowSlice(frameRange.seqIndex * data.GetNumRows());
// TODO: Why does RowSlice() not exist? Seems simple. Is there a hidden assumption of contiguous memory?#endif
#endif
}
}
// FrameRange refers to a time slice -> return that
else
{
size_t numParallelSequences = pMBLayout->GetNumParallelSequences();
size_t startColumn = frameRange.t() * numParallelSequences;
if (frameRange.seqIndex == SIZE_MAX)
return data.ColumnSlice(startColumn, numParallelSequences);
else
return data.ColumnSlice(startColumn + frameRange.seqIndex, 1);
}
}
static inline Matrix<ElemType> DataSliceWithMBLayout(Matrix<ElemType> & data,
const FrameRange & frameRange/*select frame or entire batch*/,
const MBLayoutPtr & pMBLayout/*the MB layout of 'data'*/)
{
// MBLayout of data and of FrameRange must be identical pointers,
// or in case of broadcasting, respective parent pointers.
// MBLayouts that are identical in content but not object identity (pointer) are not admissible.
// For those cases, use a ReconcileMBLayout node.
if (frameRange.m_pMBLayout != pMBLayout)
{
// if broadcast allowed then it is allowed to broadcast from an outer-loop value
// Currently, the only 'outer' loop we have is to have no layout.
if (frameRange.m_broadcastAllowed && !pMBLayout && data.GetNumCols() == 1)
return data.AsReference();
if (frameRange.m_pMBLayout && pMBLayout && *frameRange.m_pMBLayout == *pMBLayout)
LogicError("DataSlice: frameRange's MBLayout inconsistent with matrix. They are compatible though--are you missing a ReconcileMBLayout operation?");
else
LogicError("DataSlice: frameRange's MBLayout inconsistent with matrix");
}
// if FrameRange refers to whole minibatch (map mode)
// or if we don't even have a layout
// then return the whole matrix
// but as a reference (e.g. it cannot be resized)
if (!pMBLayout || frameRange.IsAllFrames())
{
if (frameRange.seqIndex == SIZE_MAX)
return data.AsReference();
else
{
if (!pMBLayout)
LogicError("DataSlice: Attempting to retrieve a parallel sequence from data without layout.");
#if 1
else
LogicError("DataSlice: To retrieve a parallel sequence, implement Matrix::RowSlice() first!");
#else
// get a reshaped view that stacks all sequences into T long vectors
auto mat = data.ColumnSlice(0, data.GetNumCols());
mat.Resize(data.GetNumRows() * pMBLayout->GetNumParallelSequences(), data.GetNumRows() / pMBLayout->GetNumParallelSequences());
return mat; // .RowSlice(frameRange.seqIndex * data.GetNumRows());
// TODO: Why does RowSlice() not exist? Seems simple. Is there a hidden assumption of contiguous memory?#endif
#endif
}
}
// FrameRange refers to a time slice -> return that
else
{
size_t numParallelSequences = pMBLayout->GetNumParallelSequences();
size_t startColumn = frameRange.t() * numParallelSequences;
if (frameRange.seqIndex == SIZE_MAX)
return data.ColumnSlice(startColumn, numParallelSequences);
else
return data.ColumnSlice(startColumn + frameRange.seqIndex, 1);
}
}
// -----------------------------------------------------------------------
// MaskMissingColumnsTo() -- function to set gaps to zero or NaN
// -----------------------------------------------------------------------
// This sets MB columns to 0 (or any 'val') that have the NoLabel or NoFeature flag set.
// Such situation happens when packing multiple sequences for parallel processing--there will be some gaps, which are flagged by these flags.
// Nodes that operate in 'map' style (input(j) -> output(j) independently) can ignore this; it will be garbage-in-garbage-out.
// However, nodes that 'reduce' minibatches (e.g. computing the sum of all frames across all sequences) must deal with the garbage.
// This function sets those to 0, assuming that now they can be reduced without affecting the result.
// This function can operate on the whole range or on a selected single frame and/or a single sequence.
// It is indirectly guarded by the m_maskMissingColumnsToZero flag, which, if false, will install a layout with IsAllNone() to be true. TODO: we better always install the same layout, and instead test m_maskMissingColumnsToZero here.
// Note that existing 'reduce' style operations--the criterion nodes and gradient computation--already call this. --BUGBUG: They can't, wrong layout!
// Warning: The layout used here must match the matrix. E.g. don't pass a child's matrix from a criterion node (use Inputs(x)->MaskMissing{Values,Gradient}ColumnsToZero() instead.
template<class ElemType>
static inline bool MaskMissingColumnsTo(Matrix<ElemType>& matrixToBeMasked, const MBLayoutPtr & pMBLayout, const FrameRange & frameRange, ElemType val)
{
bool foundLabelOrFeatureMissing = false; // return value: set to true if either nolabel or feature missing is processed
if (pMBLayout && !pMBLayout->IsAllNone()) // TODO: This should check whether there are any gaps.
{
size_t nT = pMBLayout->GetNumTimeSteps();
size_t nS = pMBLayout->GetNumParallelSequences();
if (matrixToBeMasked.GetNumCols() != nT * nS)
LogicError("MaskMissingColumnsToZero: pMBLayout->m_minibatchPackingFlags should have one element for each timestep of all streams. Check feature reader. ");
shared_ptr<Matrix<char>> columnsValidityMask = pMBLayout->GetColumnsValidityMask(frameRange, matrixToBeMasked.GetDeviceId());
if (columnsValidityMask != nullptr)
{
auto matrixSliceToMask = DataSliceWithMBLayout(matrixToBeMasked, frameRange, pMBLayout);
foundLabelOrFeatureMissing = true;
matrixSliceToMask.MaskColumnsValue(*columnsValidityMask, val);
}
}
return foundLabelOrFeatureMissing;
}
// This sets MB columns to 0 (or any 'val') that have the NoLabel or NoFeature flag set.
// Such situation happens when packing multiple sequences for parallel processing--there will be some gaps, which are flagged by these flags.
// Nodes that operate in 'map' style (input(j) -> output(j) independently) can ignore this; it will be garbage-in-garbage-out.
// However, nodes that 'reduce' minibatches (e.g. computing the sum of all frames across all sequences) must deal with the garbage.
// This function sets those to 0, assuming that now they can be reduced without affecting the result.
// This function can operate on the whole range or on a selected single frame and/or a single sequence.
// It is indirectly guarded by the m_maskMissingColumnsToZero flag, which, if false, will install a layout with IsAllNone() to be true. TODO: we better always install the same layout, and instead test m_maskMissingColumnsToZero here.
// Note that existing 'reduce' style operations--the criterion nodes and gradient computation--already call this. --BUGBUG: They can't, wrong layout!
// Warning: The layout used here must match the matrix. E.g. don't pass a child's matrix from a criterion node (use Inputs(x)->MaskMissing{Values,Gradient}ColumnsToZero() instead.
template<class ElemType>
static inline bool MaskMissingColumnsTo(Matrix<ElemType>& matrixToBeMasked, const MBLayoutPtr & pMBLayout, const FrameRange & frameRange, ElemType val)
{
bool foundLabelOrFeatureMissing = false; // return value: set to true if either nolabel or feature missing is processed
if (pMBLayout && !pMBLayout->IsAllNone()) // TODO: This should check whether there are any gaps.
{
size_t nT = pMBLayout->GetNumTimeSteps();
size_t nS = pMBLayout->GetNumParallelSequences();
if (matrixToBeMasked.GetNumCols() != nT * nS)
LogicError("MaskMissingColumnsToZero: pMBLayout->m_minibatchPackingFlags should have one element for each timestep of all streams. Check feature reader. ");
shared_ptr<Matrix<char>> columnsValidityMask = pMBLayout->GetColumnsValidityMask(frameRange, matrixToBeMasked.GetDeviceId());
if (columnsValidityMask != nullptr)
{
auto matrixSliceToMask = DataSliceWithMBLayout(matrixToBeMasked, frameRange, pMBLayout);
foundLabelOrFeatureMissing = true;
matrixSliceToMask.MaskColumnsValue(*columnsValidityMask, val);
}
}
return foundLabelOrFeatureMissing;
}
}}}

Просмотреть файл

@ -93,7 +93,7 @@ private:
bool m_partialMinibatch; // a partial minibatch is allowed
LabelKind m_labelType; // labels are categories, create mapping table
msra::dbn::randomordering m_randomordering; // randomizing class
MBLayoutPtr m_pMBLayout;
MBLayoutPtr m_pMBLayout;
std::wstring m_labelsName;
std::wstring m_featuresName;

Просмотреть файл

@ -96,7 +96,7 @@ private:
bool m_partialMinibatch; // a partial minibatch is allowed
LabelKind m_labelType; // labels are categories, create mapping table
msra::dbn::randomordering m_randomordering; // randomizing class
MBLayoutPtr m_pMBLayout;
MBLayoutPtr m_pMBLayout;
std::wstring m_labelsName;
std::wstring m_labelsCategoryName;

Просмотреть файл

@ -70,7 +70,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
fstream >> rows >> cols;
SetDims(rows, cols);
LoadFunctionValues(fstream);
LoadFunctionValues(fstream);
m_imageLayout = ImageLayoutWHC(1, rows, 1);
}
@ -350,8 +350,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Inputs(1)->FunctionValues().TransferFromDeviceToDevice(input0DeviceId, input1DeviceId);
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
{
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
{
if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
if (inputIndex > 1)
InvalidArgument("LookupTable operation only takes two inputs.");
@ -573,8 +573,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType>::ScaleAndAdd(1.0, GradientValues(), Inputs(inputIndex)->GradientValues());
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
{
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
{
if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
assert(m_functionValues->GetNumRows() == GradientValues().GetNumRows()); // original used m_functionValues->GetNumRows() for loop dimension
assert(m_pMBLayout);

Просмотреть файл

@ -1,22 +1,22 @@
// DataReaderHelper.h -- helper functions that understand both DataReader and ComputationNetwork
#pragma once
#include "Basics.h"
#include "DataReader.h"
#include "ComputationNetwork.h"
#include "MPIWrapper.h"
#include <string>
#include <map>
#include "Basics.h"
#include "DataReader.h"
#include "ComputationNetwork.h"
#include "MPIWrapper.h"
#include <string>
#include <map>
#include "TrainingCriterionNodes.h"
namespace Microsoft { namespace MSR { namespace CNTK {
/*static*/ struct DataReaderHelpers
{
// -------------------------------------------------------------------
namespace Microsoft { namespace MSR { namespace CNTK {
/*static*/ struct DataReaderHelpers
{
// -------------------------------------------------------------------
// DecimateMinibatch - decimate minibatch for parallelization
// -------------------------------------------------------------------
// -------------------------------------------------------------------
// We sub-sample the parallel utterances.
template<class ElemType>
@ -96,15 +96,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
pNewMBLayout->Set(id, t, pMBLayout->Get(id + sent_start, t));
pMBLayout->MoveFrom(pNewMBLayout); // update layout in-place
}
// -------------------------------------------------------------------
// -------------------------------------------------------------------
// GetMinibatchIntoNetwork() -- get one minibatch from Reader (this->trainSetDataReader) into Network (this->net)
// Returns false if end of epoch has been reached.
// If not, then actualMBSize is set. Note that 0 is a valid value to be returned for actualMBSize, caller must handle that correctly.
// -------------------------------------------------------------------
// -------------------------------------------------------------------
// Note: Later, a function like this will become part of the reader interface.
// TODO: callers of this often do ComputationNetwork::UpdateEvalTimeStamps(featureNodes) and also for labels; we should eliminate the need for this.
// TODO: callers of this often do ComputationNetwork::UpdateEvalTimeStamps(featureNodes) and also for labels; we should eliminate the need for this.
template<class ElemType>
static bool GetMinibatchIntoNetwork(IDataReader<ElemType>& trainSetDataReader,
ComputationNetwork& net,
@ -188,6 +188,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return true;
}
};
}}}
};
}}}

Просмотреть файл

@ -2,8 +2,8 @@
//
// F. Seide, V-hansu
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
#include "BestGpu.h" // for CPUONLY
#include "latticearchive.h" // we implement parts of class lattice
#include "simple_checked_arrays.h"
@ -865,4 +865,4 @@ namespace msra { namespace lattices {
emulatemmierrorsignal (thisedgealignments.getalignmentsbuffer(), thisedgealignments.getalignoffsets(), edges, nodes, logpps, errorsignal);
}
}
};};
};};

Просмотреть файл

@ -332,8 +332,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
static void ElementWisePower (ElemType alpha, const CPUMatrix<ElemType>& a, CPUMatrix<ElemType>& c);
static bool AreEqual(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, const ElemType threshold = 1e-8);
static void TensorShuffleScaleAndAdd(ElemType keepWeight, const CPUMatrix<ElemType>& a, size_t D, size_t S, size_t M, size_t K, size_t T, ElemType scaleFactor, const CPUMatrix<ElemType>& b, CPUMatrix<ElemType>& c);
static void TensorShuffleScaleAndAdd(ElemType keepWeight, const CPUMatrix<ElemType>& a, size_t D, size_t S, size_t M, size_t K, size_t T, ElemType scaleFactor, const CPUMatrix<ElemType>& b, CPUMatrix<ElemType>& c);
static CPUMatrix<ElemType> Ones(const size_t rows, const size_t cols);
static CPUMatrix<ElemType> Zeros(const size_t rows, const size_t cols);

Просмотреть файл

@ -465,7 +465,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
static bool AreEqual(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const ElemType threshold = 1e-8);
static bool HasElement(const Matrix<ElemType>& a, const ElemType value = 0.0);
static void TensorShuffleScaleAndAdd(ElemType keepWeight, const Matrix<ElemType>& a, size_t D, size_t S, size_t M, size_t K, size_t T, ElemType scaleFactor, const Matrix<ElemType>& b, Matrix<ElemType>& c);
static void TensorShuffleScaleAndAdd(ElemType keepWeight, const Matrix<ElemType>& a, size_t D, size_t S, size_t M, size_t K, size_t T, ElemType scaleFactor, const Matrix<ElemType>& b, Matrix<ElemType>& c);
public:
void Read(File& stream);
void Write(File& stream) const;

Просмотреть файл

@ -251,7 +251,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
GPUMatrix<ElemType> a(0);
return a;
}
}
template<class ElemType> ElemType GPUSparseMatrix<ElemType>::SumOfAbsElements() const
{
@ -541,9 +541,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType> void GPUMatrix<ElemType>::SetColumn(const ElemType* colPointer, size_t colInd) { }
template<class ElemType> void GPUMatrix<ElemType>::SetColumn(const GPUMatrix<ElemType>& valMat, size_t colInd) { }
template<class ElemType> void GPUMatrix<ElemType>::MaskColumnsValue(const GPUMatrix<char>& columnsMask, ElemType val) { }
template<class ElemType> void GPUMatrix<ElemType>:: CopyColumnsStrided(const GPUMatrix<ElemType>& fromMatrix, size_t numCols, size_t srcNumColsStride, size_t destNumColsStride) { }
template<class ElemType> void GPUMatrix<ElemType>::MaskColumnsValue(const GPUMatrix<char>& columnsMask, ElemType val) { }
template<class ElemType> void GPUMatrix<ElemType>:: CopyColumnsStrided(const GPUMatrix<ElemType>& fromMatrix, size_t numCols, size_t srcNumColsStride, size_t destNumColsStride) { }
template<class ElemType> void GPUMatrix<ElemType>::SetValue(const GPUMatrix<ElemType>& deepCopyFrom) { }
template<class ElemType>
@ -1036,7 +1036,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return false;
}
template<class ElemType> void GPUMatrix<ElemType>::TensorShuffleScaleAndAdd(ElemType keepWeight, const GPUMatrix<ElemType>& a, size_t D, size_t S, size_t M, size_t K, size_t T, ElemType scaleFactor, const GPUMatrix<ElemType>& b, GPUMatrix<ElemType>& c) { }
template<class ElemType> void GPUMatrix<ElemType>::TensorShuffleScaleAndAdd(ElemType keepWeight, const GPUMatrix<ElemType>& a, size_t D, size_t S, size_t M, size_t K, size_t T, ElemType scaleFactor, const GPUMatrix<ElemType>& b, GPUMatrix<ElemType>& c) { }
template<class ElemType> GPUMatrix<ElemType> GPUMatrix<ElemType>::Ones(const size_t rows, const size_t cols, int deviceId)
{

Просмотреть файл

@ -6,22 +6,22 @@
#ifdef __CUDA_ARCH__ // we are compiling under CUDA
#define ON_CUDA 1
#ifdef __device__
#define cudacode __device__
#define cudasharedcode __device__ __host__
#else
#define cudacode
#define cudasharedcode
#endif
#ifdef __device__
#define cudacode __device__
#define cudasharedcode __device__ __host__
#else
#define cudacode
#define cudasharedcode
#endif
#else
#define ON_CUDA 0 // TODO: this does not work for some combination--fix this
#ifdef __device__
#define cudacode __device__
#define cudasharedcode __device__ __host__
#else
#define cudacode
#define cudasharedcode
#endif
#ifdef __device__
#define cudacode __device__
#define cudasharedcode __device__ __host__
#else
#define cudacode
#define cudasharedcode
#endif
#endif
#include <assert.h>

Просмотреть файл

@ -2,7 +2,7 @@
//
// F. Seide, V-hansu
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
#define DLLEXPORT
#define __kernel_emulation__ // allow the compilation of CUDA kernels on the CPU

Просмотреть файл

@ -34,20 +34,20 @@ speechTrain=[
BFF(in, rows, cols) = [ B = Parameter(rows, 1, init = 'fixedValue', value = 0) ; W = Parameter(rows, cols, init = if uniformInit then 'uniform' else 'gaussian'/*, initValueScale from outer scope*/) ; z = W*in+B ]
GBFF(f, in, rows, cols) = [ Eh = rows,f(BFF(in, rows, cols).z) ]
L = Length(layerSizes)-1 // number of model layers
features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label')
featNorm = if applyMeanVarNorm
then MeanVarNorm(features)
else features
layers[layer:1..L-1] = if layer > 1
then GBFF(layerTypes[layer], layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1])
else GBFF(layerTypes[layer], featNorm, layerSizes[layer], layerSizes[layer-1])
outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1])
outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z)
CE = trainingCriterion(labels, outZ, tag='criterion')
Err = evalCriterion(labels, outZ, tag='eval')
logPrior = LogPrior(labels)
// TODO: how to add a tag to an infix operation?
L = Length(layerSizes)-1 // number of model layers
features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label')
featNorm = if applyMeanVarNorm
then MeanVarNorm(features)
else features
layers[layer:1..L-1] = if layer > 1
then GBFF(layerTypes[layer], layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1])
else GBFF(layerTypes[layer], featNorm, layerSizes[layer], layerSizes[layer-1])
outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1])
outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z)
CE = trainingCriterion(labels, outZ, tag='criterion')
Err = evalCriterion(labels, outZ, tag='eval')
logPrior = LogPrior(labels)
// TODO: how to add a tag to an infix operation?
ScaledLogLikelihood = Minus (outZ, logPrior, tag='output')
]