Implemented eye_like Op and the depedent SetDiagonalValue methods for CPU and GPU sparse matrices.

2018-05-05 21:24:59 -07:00 · 2018-05-05 21:24:59 -07:00 · 5a587b376d
--- a/Source/CNTKv2LibraryDll/API/CNTKLibrary.h
+++ b/Source/CNTKv2LibraryDll/API/CNTKLibrary.h
@ -4145,6 +4145,11 @@ namespace CNTK
    ///
    CNTK_API FunctionPtr OnesLike(const Variable& operand, const std::wstring& name = L"");

+    ///
+    /// Create an instance of a eye-like operation. This produces ones with the shape and dynamic axes specified by the operand.
+    ///
+    CNTK_API FunctionPtr EyeLike(const Variable& operand, bool isOutputSparse, const std::wstring& name = L"");
+

    ///
    /// Create an instance of the CNTK built-in elementwise tensor addition operation with the specified input operands.
--- a/Source/CNTKv2LibraryDll/CompositeFunction.cpp
+++ b/Source/CNTKv2LibraryDll/CompositeFunction.cpp
@ -956,6 +956,12 @@ namespace CNTK
                    computationNodePtr = New<ConstantNode<ElementType>>(network->GetDeviceId(), internalNodeName, fillValue);
                    break;
                }
+                case PrimitiveOpType::EyeLikeOp:
+                {
+                    bool outputSparse = functionConfig[PrimitiveFunction::AttributeNameOutputSparse].Value<bool>();
+                    ASSIGN_NEW_NODE(EyeLikeNode, network->GetDeviceId(), internalNodeName, outputSparse);
+                    break;
+                }
                case PrimitiveOpType::ROIPooling:
                {
                    PoolingType poolingType = (PoolingType)(functionConfig[PrimitiveFunction::AttributeNamePoolingType].Value<size_t>());
--- a/Source/CNTKv2LibraryDll/Function.cpp
+++ b/Source/CNTKv2LibraryDll/Function.cpp
@ -1688,6 +1688,13 @@ namespace CNTK
        return UnaryOp(PrimitiveOpType::ConstantOp, operand, std::move(additionalProperties), name);
    }

+    FunctionPtr EyeLike(const Variable& operand, bool isOutputSparse, const std::wstring& name)
+    {
+        auto additionalProperties = Dictionary();
+        additionalProperties[PrimitiveFunction::AttributeNameOutputSparse] = isOutputSparse;
+        return UnaryOp(PrimitiveOpType::EyeLikeOp, operand, std::move(additionalProperties), name);
+    }
+
    std::vector<Variable> AutoBroadcastSequence(PrimitiveOpType op, const Variable& left, const Variable& right, bool autoBroadcast)
    {
        auto left_axis = left.DynamicAxes();
--- a/Source/CNTKv2LibraryDll/PrimitiveFunction.cpp
+++ b/Source/CNTKv2LibraryDll/PrimitiveFunction.cpp
@ -360,6 +360,20 @@ namespace CNTK
                            assert(m_inputs.size() == 1);
                            outputShape = UnaryElementwiseOpOutputShape(m_inputs[0].Shape());
                            break;
+                        case PrimitiveOpType::EyeLikeOp:
+                        {
+                            assert(m_inputs.size() == 1);
+                            const auto& dynAxes = m_inputs[0].DynamicAxes();
+                            if (dynAxes.size() + m_inputs[0].Shape().Rank() != 2)
+                                InvalidArgument("EyeLike: Operand '%S' must have exactly 2 axes including dynamic and static axes.",
+                                    m_inputs[0].AsString().c_str());
+                            if (any_of(dynAxes.begin(), dynAxes.end(), [](const Axis& axis) {return axis.IsSequenceAxis(); }))
+                                InvalidArgument("EyeLike: Operand '%S' can not have sequence axis.",
+                                    m_inputs[0].AsString().c_str());
+
+                            outputShape = UnaryElementwiseOpOutputShape(m_inputs[0].Shape());
+                            break;
+                        }
                        case PrimitiveOpType::Where:
                            assert(m_inputs.size() == 1);
                            outputShape = NDShape{}; // scalar
--- a/Source/CNTKv2LibraryDll/PrimitiveFunction.h
+++ b/Source/CNTKv2LibraryDll/PrimitiveFunction.h
@ -117,6 +117,7 @@ namespace CNTK
        {PrimitiveOpType::ConstantOp, L"ConstantOp"},
        {PrimitiveOpType::Squeeze, L"Squeeze"},
        {PrimitiveOpType::Cast, L"Cast" },
+        { PrimitiveOpType::EyeLikeOp, L"EyeLikeOp" },
    };

    inline const std::wstring& PrimitiveOpTypeName(PrimitiveOpType opType)
@ -289,6 +290,7 @@ namespace CNTK
        static const std::wstring AttributeNameSeqGammarWordPen;
        static const std::wstring AttributeNameNumClass;
        static const std::wstring AttributeNameOneHotOutputSparse;
+        static const std::wstring AttributeNameOutputSparse;
        static const std::wstring AttributeNameOneHotAxis;
        static const std::wstring AttributeNameSequenceAxisNamePrefix;
        static const std::wstring AttributeNameSequenceUnpackPaddingValue;
@ -833,7 +835,8 @@ namespace CNTK
        // Version 18: Add Crop node.
        // Version 19: Add TopK
        // Version 20: Add squeeze, expand dims, zeros like, ones like
-        static const size_t s_serializationVersion = 20;
+        // Version 21: Add EyeLikeOp
+        static const size_t s_serializationVersion = 21;
    };

    std::vector<DictionaryValue> GetInputUids(const Function& f);
--- a/Source/CNTKv2LibraryDll/PrimitiveFunctionAttributes.h
+++ b/Source/CNTKv2LibraryDll/PrimitiveFunctionAttributes.h
@ -92,6 +92,7 @@ namespace CNTK
    /*static*/ const std::wstring PrimitiveFunction::AttributeNameSeqGammarWordPen = L"SeqGammarWordPen";
    /*static*/ const std::wstring PrimitiveFunction::AttributeNameNumClass = L"numClass";
    /*static*/ const std::wstring PrimitiveFunction::AttributeNameOneHotOutputSparse = L"oneHotOutputSparse";
+    /*static*/ const std::wstring PrimitiveFunction::AttributeNameOutputSparse = L"OutputSparse";
    /*static*/ const std::wstring PrimitiveFunction::AttributeNameOneHotAxis = L"onehotAxis";
    /*static*/ const std::wstring PrimitiveFunction::AttributeNameSequenceAxisNamePrefix = L"sequenceAxis";
    /*static*/ const std::wstring PrimitiveFunction::AttributeNameSequenceUnpackPaddingValue = L"sequenceUnpackPaddingValue";
--- a/Source/CNTKv2LibraryDll/PrimitiveOpType.h
+++ b/Source/CNTKv2LibraryDll/PrimitiveOpType.h
@ -101,6 +101,7 @@ namespace CNTK
        ConstantOp = 89,
        LatticeSequenceWithSoftmax = 90,
        Cast = 91,
+        EyeLikeOp = 92,
        // New op types should only be appended to the end of this list 
        UnknownOP
        // and UnknownOP should always be last.
--- a/Source/ComputationNetworkLib/InputAndParamNodes.h
+++ b/Source/ComputationNetworkLib/InputAndParamNodes.h
@ -761,7 +761,7 @@ public:

    virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
    {
-        auto result = ValueFor(fr);
+        auto& result = Value();
        result.SetValue(m_fillValue);
    }

@ -787,4 +787,73 @@ private:
 template class ConstantNode<float>;
 template class ConstantNode<double>;

+// -----------------------------------------------------------------------
+// DiagonalLikeNode
+// -----------------------------------------------------------------------
+
+template <class ElemType>
+class EyeLikeNode : public ComputationNode<ElemType>, public NumInputs<1>
+{
+    typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
+    static const std::wstring TypeName() { return L"EyeLikeOp"; }
+public:
+    DeclareConstructorFromConfigWithNumInputs(EyeLikeNode);
+    EyeLikeNode(DEVICEID_TYPE deviceId, const wstring& name)
+        : EyeLikeNode(deviceId, name, false)
+    {
+    }
+
+    EyeLikeNode(DEVICEID_TYPE deviceId, const wstring& name, bool isOutputSparse)
+        : Base(deviceId, name), m_isOutputSparse(isOutputSparse)
+    {
+    }
+
+    virtual void /*ComputationNode::*/ BackpropTo(const size_t /* inputIndex */, const FrameRange& /* t */) override
+    {
+        // EyeLikeNode is constant; nothing to backpropagate
+    }
+
+    virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
+    {
+        auto& result = this->Value();
+        if (m_isOutputSparse && result.GetMatrixType() != SPARSE)
+        {
+            result.SwitchToMatrixType(SPARSE, matrixFormatSparseCSC, false);
+        }
+        result.SetValue(static_cast<ElemType>(0.0));
+        result.SetDiagonalValue(static_cast<ElemType>(1.0));
+    }
+
+    virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
+    {
+        ValidateUnaryMap(isFinalValidationPass);
+    }
+
+    virtual bool OutputUsedInComputingInputNodesGradients() const override
+    {
+        return false;
+    }
+
+    virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override
+    {
+        return false;
+    }
+
+    virtual void Save(File& fstream) const override
+    {
+        Base::Save(fstream);
+        fstream << m_isOutputSparse;
+    }
+
+    virtual void Load(File& fstream, size_t modelVersion) override
+    {
+        Base::Load(fstream, modelVersion);
+        fstream >> m_isOutputSparse;
+    }
+private:
+    bool m_isOutputSparse;
+};
+
+template class EyeLikeNode<float>;
+template class EyeLikeNode<double>;
 }}}
--- a/Source/Math/CPUMatrix.h
+++ b/Source/Math/CPUMatrix.h
@ -55,6 +55,7 @@ public:
    using Base::Buffer;
    using Base::GetNumRows;
    using Base::GetNumCols;
+    using Base::GetDiagSize;
    using Base::GetNumElements;
    using Base::OwnBuffer;
    using Base::GetFormat;
--- a/Source/Math/CPUMatrixImpl.h
+++ b/Source/Math/CPUMatrixImpl.h
@ -928,11 +928,8 @@ void CPUMatrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, E
 template <class ElemType>
 void CPUMatrix<ElemType>::SetDiagonalValue(const ElemType v)
 {
-    if (GetNumRows() != GetNumCols())
-        LogicError("SetDiagonalValue: NumRows and NumCols do not agree.");
-
    auto& us = *this;
-    long m = (long) GetNumRows();
+    long m = static_cast<long>(GetDiagSize());
 #pragma omp parallel for
    // four-way unrolling
    for (long i = 0; i < (m & ~3); i += 4)
@ -955,21 +952,18 @@ void CPUMatrix<ElemType>::SetDiagonalValue(const CPUMatrix<ElemType>& vector)
    if (IsEmpty() || vector.IsEmpty())
        LogicError("SetDiagonalValue: Matrix is empty.");

-    if (GetNumRows() != GetNumCols())
-        LogicError("SetDiagonalValue: NumRows and NumCols do not agree.");
-
    if (vector.GetNumRows() != 1 && vector.GetNumCols() != 1)
        LogicError("SetDiagonalValue: input vector must be a vector.");

    if (vector.GetNumElements() == 1) // reduce to simple form
        SetDiagonalValue(vector(0, 0));
-    else if (vector.GetNumRows() != GetNumRows() && vector.GetNumCols() != GetNumRows())
+    else if (vector.GetNumRows() != GetDiagSize() && vector.GetNumCols() != GetDiagSize())
        LogicError("SetDiagonalValue: input vector's dimension does not agree with [this].");
    else
    {
        auto& us = *this;

-        long m = (long) GetNumRows();
+        long m = (long) GetDiagSize();
        if (vector.GetNumRows() == 1) // row vector
        {
 #pragma omp parallel for
--- a/Source/Math/CPUSparseMatrix.cpp
+++ b/Source/Math/CPUSparseMatrix.cpp
@ -177,6 +177,60 @@ CPUSparseMatrix<ElemType>::~CPUSparseMatrix()
    ZeroValues();
 }

+template <class ElemType>
+void CPUSparseMatrix<ElemType>::SetDiagonalValue(const ElemType v)
+{
+    if (NzCount() > 0)
+        //So far only support SetDiagonalValue for zero sparse matrix for now
+        LogicError("Not implemented: SetDiagonalValue is not implemented for non-zero sparse CPU matrices.");
+
+    RequireSizeAndAllocate(GetNumRows(), GetNumCols(), GetDiagSize(), true, false);
+    CPUSPARSE_INDEX_TYPE* secondaryIndices = SecondaryIndexLocation();
+    CPUSPARSE_INDEX_TYPE* majorIndices = MajorIndexLocation();
+    ElemType* data = Data();
+    for (CPUSPARSE_INDEX_TYPE j = 0; j < GetDiagSize(); j++)
+    {
+        //The same logic for both CSC and CSR format:
+        data[j] = v;
+        secondaryIndices[j] = j;
+        majorIndices[j] = j;
+    }
+    for (size_t j = GetDiagSize(); j < SecondaryIndexCount(); ++j)
+        secondaryIndices[j] = (CPUSPARSE_INDEX_TYPE)GetDiagSize();
+}
+
+template <class ElemType>
+void CPUSparseMatrix<ElemType>::SetDiagonalValue(const CPUMatrix<ElemType>& vector)
+{
+    if (NzCount() > 0)
+        //So far only support SetDiagonalValue for zero sparse matrix for now
+        NOT_IMPLEMENTED;
+
+    if (vector.GetNumRows() != 1 && vector.GetNumCols() != 1)
+        LogicError("SetDiagonalValue: input vector must be a vector.");
+
+    if (vector.GetNumElements() == 1) // reduce to simple form
+        SetDiagonalValue(vector(0, 0));
+    else if (vector.GetNumRows() != GetDiagSize() && vector.GetNumCols() != GetDiagSize())
+        LogicError("SetDiagonalValue: input vector's dimension does not agree with [this].");
+    else
+    {
+        RequireSizeAndAllocate(GetNumRows(), GetNumCols(), GetDiagSize(), true, false);
+        CPUSPARSE_INDEX_TYPE* secondaryIndices = SecondaryIndexLocation();
+        CPUSPARSE_INDEX_TYPE* majorIndices = MajorIndexLocation();
+        ElemType* data = Data();
+        //The same logic for both CSC and CSR format:
+        for (CPUSPARSE_INDEX_TYPE j = 0; j < GetDiagSize(); j++)
+        {
+            data[j] = vector.Data()[j];
+            secondaryIndices[j] = j;
+            majorIndices[j] = j;
+        }
+        for (size_t j = GetDiagSize(); j < SecondaryIndexCount(); ++j)
+            secondaryIndices[j] = (CPUSPARSE_INDEX_TYPE)GetDiagSize();
+    }
+}
+
 template <class ElemType>
 CPUSparseMatrix<ElemType>& CPUSparseMatrix<ElemType>::AssignOneHot(const CPUMatrix<ElemType>& a, vector<size_t>& shape, size_t axis)
 {
--- a/Source/Math/CPUSparseMatrix.h
+++ b/Source/Math/CPUSparseMatrix.h
@ -62,6 +62,7 @@ public:
    using Base::Buffer;
    using Base::GetNumRows;
    using Base::GetNumCols;
+    using Base::GetDiagSize;
    using Base::GetNumElements;
    using Base::OwnBuffer;
    using Base::GetFormat;
@ -92,6 +93,8 @@ public:
    void MaskColumnsValue(const CPUMatrix<char>& columnsMask, ElemType val, size_t numColsPerMaskEntry);

    CPUSparseMatrix<ElemType>& AssignOneHot(const CPUMatrix<ElemType>& a, vector<size_t>& shape, size_t axis);
+    void SetDiagonalValue(const ElemType v);
+    void SetDiagonalValue(const CPUMatrix<ElemType>& vector);

    CPUSparseMatrix<ElemType>& DoGatherColumnsOf(ElemType beta, const CPUMatrix<ElemType>& idx, const CPUSparseMatrix<ElemType>& a, ElemType alpha);
    CPUSparseMatrix<ElemType>& DoScatterColumnsOf(ElemType beta, const CPUMatrix<ElemType>& idx, const CPUSparseMatrix<ElemType>& a, ElemType alpha);
--- a/Source/Math/CommonMatrix.h
+++ b/Source/Math/CommonMatrix.h
@ -521,6 +521,8 @@ public:

    size_t GetNumRows() const { return m_numRows; }
    size_t GetNumCols() const { return m_numCols; }
+    //for non-squared matrix, the major diagonal size is defined by the row or col with smaller dimension
+    size_t GetDiagSize() const { return GetNumRows() < GetNumCols() ? GetNumRows() : GetNumCols(); }

 protected:

--- a/Source/Math/GPUMatrix.cu
+++ b/Source/Math/GPUMatrix.cu
@ -1291,7 +1291,7 @@ void GPUMatrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, i
 template <class ElemType>
 void GPUMatrix<ElemType>::SetDiagonalValue(const ElemType v)
 {
-    CUDA_LONG N = (CUDA_LONG) GetNumRows();
+    CUDA_LONG N = (CUDA_LONG) GetDiagSize();
    int blocksPerGrid = (int) ceil(1.0 * N / GridDim::maxThreadsPerBlock);
    PrepareDevice();
    SyncGuard syncGuard;
@ -1304,24 +1304,21 @@ void GPUMatrix<ElemType>::SetDiagonalValue(const GPUMatrix<ElemType>& vector)
    if (IsEmpty() || vector.IsEmpty())
        LogicError("SetDiagonalValue: Matrix is empty.");

-    if (GetNumRows() != GetNumCols())
-        LogicError("SetDiagonalValue: NumRows and NumCols do not agree.");
-
    if (vector.GetNumRows() != 1 && vector.GetNumCols() != 1)
        LogicError("SetDiagonalValue: input vector must be a vector.");

    if (vector.GetNumElements() == 1) // reduce to simple form
        SetDiagonalValue(vector.Data()[0]);

-    else if (vector.GetNumRows() != GetNumRows() && vector.GetNumCols() != GetNumRows())
+    else if (vector.GetNumRows() != GetDiagSize() && vector.GetNumCols() != GetDiagSize())
        LogicError("SetDiagonalValue: input vector's dimension does not agree with [this].");
    else
    {
-        CUDA_LONG N = (CUDA_LONG) GetNumRows();
+        CUDA_LONG N = (CUDA_LONG) GetDiagSize();
        int blocksPerGrid = (int) ceil(1.0 * N / GridDim::maxThreadsPerBlock);
        PrepareDevice();
        SyncGuard syncGuard;
-        _setDiagonalValueFromVector<ElemType><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0, t_stream>>>(Data(), vector.Data(), N);
+        _setDiagonalValueFromVector<ElemType><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0, t_stream>>>(Data(), vector.Data(), N, (CUDA_LONG) GetNumRows());
    }
 }

@ -4401,6 +4398,7 @@ GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignOneHot(const GPUMatrix<ElemType>
    return *this;
 }

+
 template <class ElemType>
 GPUMatrix<ElemType>& GPUMatrix<ElemType>::GatherFromTarget(const GPUMatrix<ElemType>& indices, const GPUMatrix<ElemType>& target, size_t row_elements)
 {
--- a/Source/Math/GPUMatrix.h
+++ b/Source/Math/GPUMatrix.h
@ -151,6 +151,7 @@ public:
    using Base::Buffer;
    using Base::GetNumRows;
    using Base::GetNumCols;
+    using Base::GetDiagSize;
    using Base::GetNumElements;
    using Base::OwnBuffer;
    using Base::GetFormat;
--- a/Source/Math/GPUMatrixCUDAKernels.cuh
+++ b/Source/Math/GPUMatrixCUDAKernels.cuh
@ -1425,12 +1425,13 @@ template <class ElemType>
 __global__ void _setDiagonalValueFromVector(
    ElemType* a,
    const ElemType* b,
-    const CUDA_LONG N)
+    const CUDA_LONG N,
+    const CUDA_LONG ld)
 {
    int id = blockDim.x * blockIdx.x + threadIdx.x;
    if (id >= N)
        return;
-    a[IDX2C(id, id, N)] = b[id];
+    a[IDX2C(id, id, ld)] = b[id];
 }

 template <class ElemType>
@ -5854,6 +5855,48 @@ __global__ void _assignOneHotAsSparse(ElemType *indices,
    }
 }

+template<class ElemType>
+__global__ void _setSparseDiagonalValue(ElemType v,
+    GPUSPARSE_INDEX_TYPE *secondaryIndices,
+    GPUSPARSE_INDEX_TYPE *majorIndices,
+    ElemType *targetBuffer,
+    size_t diagSize,
+    size_t num_elements)
+{
+    const CUDA_LONG index = blockIdx.x * blockDim.x + threadIdx.x;
+    if (index < diagSize)
+    {
+        majorIndices[index] = index;
+        targetBuffer[index] = v;
+        secondaryIndices[index] = index;
+    }
+    else if (index < num_elements)
+    {
+        secondaryIndices[index] = diagSize;
+    }
+}
+
+template<class ElemType>
+__global__ void _setSparseDiagonalValue(ElemType *vector,
+    GPUSPARSE_INDEX_TYPE *secondaryIndices,
+    GPUSPARSE_INDEX_TYPE *majorIndices,
+    ElemType *targetBuffer,
+    size_t diagSize,
+    size_t num_elements)
+{
+    const CUDA_LONG index = blockIdx.x * blockDim.x + threadIdx.x;
+    if (index < diagSize)
+    {
+        majorIndices[index] = index;
+        targetBuffer[index] = vector[index];
+        secondaryIndices[index] = index;
+    }
+    else if (index < num_elements)
+    {
+        secondaryIndices[index] = diagSize;
+    }
+}
+
 }}}

 #endif // !CPUONLY
--- a/Source/Math/GPUSparseMatrix.cu
+++ b/Source/Math/GPUSparseMatrix.cu
@ -2887,6 +2887,72 @@ GPUSparseMatrix<ElemType>& GPUSparseMatrix<ElemType>::AssignOneHot(const GPUMatr
    return *this;
 }

+template <class ElemType>
+void GPUSparseMatrix<ElemType>::SetDiagonalValue(const ElemType v)
+{
+    if (NzCount() > 0)
+        //So far only support SetDiagonalValue for zero sparse matrix for now
+        LogicError("Not implemented: SetDiagonalValue is not implemented for non-zero sparse GPU matrices.");
+    //TODO: because sparse setting value on non-zero sparse matrix involves
+    //shifting values, we need a more involved implementation. We should consider
+    //the current implemenation as AssignAsDiagonalMatrix(...).
+    if (GetFormat() != matrixFormatSparseCSC && GetFormat() != matrixFormatSparseCSR)
+        LogicError("SetDiagonalValue: Matrix format is not supported.");
+
+    this->RequireSizeAndAllocate(GetNumRows(), GetNumCols(), GetDiagSize());
+    this->PrepareDevice();
+
+    GPUSPARSE_INDEX_TYPE* secondaryIndices = SecondaryIndexLocation();
+    GPUSPARSE_INDEX_TYPE* majorIndices = MajorIndexLocation();
+    ElemType* targetData = NzValues();
+    CUDA_LONG N = (CUDA_LONG)SecondaryIndexCount();
+    int blocksPerGrid = (int)ceil(N * 1.0 / GridDim::maxThreadsPerBlock);
+    SyncGuard syncGuard;
+    _setSparseDiagonalValue<ElemType> << <blocksPerGrid, GridDim::maxThreadsPerBlock >> >(v,
+        secondaryIndices,
+        majorIndices,
+        targetData,
+        GetDiagSize(),
+        N);
+}
+
+template <class ElemType>
+void GPUSparseMatrix<ElemType>::SetDiagonalValue(const GPUMatrix<ElemType>& vector)
+{
+    if (NzCount() > 0)
+        //So far only support SetDiagonalValue for zero sparse matrix for now
+        NOT_IMPLEMENTED;
+
+    if (vector.IsEmpty())
+        LogicError("SetDiagonalValue: Input vector is empty.");
+
+    if (vector.GetNumRows() != 1 && vector.GetNumCols() != 1)
+        LogicError("SetDiagonalValue: input tensor must be a vector.");
+
+    if (vector.GetNumRows() != GetDiagSize() && vector.GetNumCols() != GetDiagSize())
+        LogicError("SetDiagonalValue: input vector's dimension does not agree with [this].");
+
+    if (GetFormat() != matrixFormatSparseCSC && GetFormat() != matrixFormatSparseCSR)
+        LogicError("SetDiagonalValue: Matrix format is not supported.");
+
+    this->RequireSizeAndAllocate(GetNumRows(), GetNumCols(), GetDiagSize());
+    this->PrepareDevice();
+
+    GPUSPARSE_INDEX_TYPE* secondaryIndices = SecondaryIndexLocation();
+    GPUSPARSE_INDEX_TYPE* majorIndices = MajorIndexLocation();
+    ElemType* v = vector.Data();
+    ElemType* targetData = NzValues();
+    CUDA_LONG N = (CUDA_LONG)SecondaryIndexCount();
+    int blocksPerGrid = (int)ceil(N * 1.0 / GridDim::maxThreadsPerBlock);
+    SyncGuard syncGuard;
+    _setSparseDiagonalValue<ElemType> << <blocksPerGrid, GridDim::maxThreadsPerBlock >> >(v,
+        secondaryIndices,
+        majorIndices,
+        targetData,
+        GetDiagSize(),
+        N);
+}
+
 template <class ElemType>
 GPUSparseMatrix<ElemType>& GPUSparseMatrix<ElemType>::AssignTruncateTopOf(const GPUSparseMatrix<ElemType>& a, const ElemType threshold)
 {
--- a/Source/Math/GPUSparseMatrix.h
+++ b/Source/Math/GPUSparseMatrix.h
@ -56,6 +56,7 @@ public:
    using Base::Buffer;
    using Base::GetNumRows;
    using Base::GetNumCols;
+    using Base::GetDiagSize;
    using Base::SetNumRows;
    using Base::SetNumCols;
    using Base::GetNumElements;
@ -396,6 +397,8 @@ public:
    GPUSparseMatrix<ElemType>& SetToZeroIfAbsLessThan(const ElemType threshold);

    GPUSparseMatrix<ElemType>& AssignOneHot(const GPUMatrix<ElemType>& a, vector<size_t>& shape, size_t axis);
+    void SetDiagonalValue(const ElemType v);
+    void SetDiagonalValue(const GPUMatrix<ElemType>& vector);

    ElemType SumOfElements() const;    // sum of all elements
    ElemType SumOfAbsElements() const; // sum of all abs(elements)
--- a/Source/Math/Matrix.cpp
+++ b/Source/Math/Matrix.cpp
@ -1473,27 +1473,21 @@ void Matrix<ElemType>::SetDiagonalValue(const ElemType v)
    if (IsEmpty())
        LogicError("SetDiagonalValue: Matrix is empty.");

-    if (GetNumRows() != GetNumCols())
-        LogicError("SetDiagonalValue: NumRows and NumCols do not agree.");
-
    DISPATCH_MATRIX_ON_FLAG(this,
                            this,
                            m_CPUMatrix->SetDiagonalValue(v),
                            m_GPUMatrix->SetDiagonalValue(v),
-                            NOT_IMPLEMENTED,
-                            NOT_IMPLEMENTED);
+                            m_CPUSparseMatrix->SetDiagonalValue(v),
+                            m_GPUSparseMatrix->SetDiagonalValue(v));
 }

 template <class ElemType>
 void Matrix<ElemType>::SetDiagonalValue(const Matrix<ElemType>& vector)
 {
-    if (GetNumRows() != GetNumCols())
-        LogicError("SetDiagonalValue: NumRows and NumCols do not agree.");
-
    if (vector.GetNumRows() != 1 && vector.GetNumCols() != 1)
        LogicError("SetDiagonalValue: Input vector must be a vector.");

-    if (vector.GetNumRows() * vector.GetNumCols() != GetNumRows())
+    if (vector.GetNumRows() * vector.GetNumCols() != GetDiagSize())
        LogicError("SetDiagonalValue: Input vector must match matrix dimension.");

    if (IsEmpty())
@ -1511,7 +1505,7 @@ void Matrix<ElemType>::SetDiagonalValue(const Matrix<ElemType>& vector)
                                SetDiagonalValue(vector.m_GPUMatrix->Get00Element()) // BUGBUG: efficiency
                                );
    }
-    else if (vector.GetNumRows() != GetNumRows() && vector.GetNumCols() != GetNumRows())
+    else if (vector.GetNumRows() != GetDiagSize() && vector.GetNumCols() != GetDiagSize())
        LogicError("SetDiagonalValue: input vector's dimension does not agree with [this].");
    else
    {
@ -1522,8 +1516,10 @@ void Matrix<ElemType>::SetDiagonalValue(const Matrix<ElemType>& vector)
                                m_CPUMatrix->SetDiagonalValue(*vector.m_CPUMatrix),
                                assert(vector.m_GPUMatrix);
                                m_GPUMatrix->SetDiagonalValue(*vector.m_GPUMatrix),
-                                NOT_IMPLEMENTED,
-                                NOT_IMPLEMENTED);
+                                assert(vector.m_CPUMatrix);
+                                m_CPUSparseMatrix->SetDiagonalValue(*vector.m_CPUMatrix),
+                                assert(vector.m_GPUSparseMatrix);
+                                m_GPUSparseMatrix->SetDiagonalValue(*vector.m_GPUMatrix));
    }
 }

@ -1989,6 +1985,12 @@ size_t Matrix<ElemType>::GetNumCols() const
    return m_baseMatrix->GetNumCols();
 }

+template <class ElemType>
+size_t Matrix<ElemType>::GetDiagSize() const
+{
+    return m_baseMatrix->GetDiagSize();
+}
+
 template <class ElemType>
 size_t Matrix<ElemType>::GetNumElements() const
 {
--- a/Source/Math/Matrix.h
+++ b/Source/Math/Matrix.h
@ -177,6 +177,7 @@ public:
    void SwitchToMatrixType(MatrixType newMatrixType, MatrixFormat newMatrixFormat, bool keepValues); // sets matrix type between dense and sparse
    size_t GetNumRows() const;
    size_t GetNumCols() const;
+    size_t GetDiagSize() const;
    size_t GetNumElements() const;
    bool HasNoElements() const { return GetNumElements() == 0; }
    bool IsEmpty() const;
--- a/Source/Math/NoGPU.cpp
+++ b/Source/Math/NoGPU.cpp
@ -110,6 +110,16 @@ void GPUSparseMatrix<ElemType>::SetValue(const GPUMatrix<ElemType>& denseMatrix,
 {
 }

+template <class ElemType>
+void GPUSparseMatrix<ElemType>::SetDiagonalValue(const ElemType v)
+{
+}
+
+template <class ElemType>
+void GPUSparseMatrix<ElemType>::SetDiagonalValue(const GPUMatrix<ElemType>& vector)
+{
+}
+
 template <class ElemType>
 GPUSPARSE_INDEX_TYPE* GPUSparseMatrix<ElemType>::GetCondensedVector() const
 {
--- a/Tests/UnitTests/MathTests/MatrixTests.cpp
+++ b/Tests/UnitTests/MathTests/MatrixTests.cpp
@ -207,23 +207,149 @@ BOOST_FIXTURE_TEST_CASE(MatrixInitRandomUniform, RandomSeedFixture)
    BOOST_CHECK(has_big);
 }

-BOOST_FIXTURE_TEST_CASE(MatrixInitRandomUniformSeed, RandomSeedFixture)
+BOOST_FIXTURE_TEST_CASE(MatrixSetValueMethodsWithDoubleInstantiation, RandomSeedFixture)
 {
-    const float low = -0.01f;
-    const float high = 0.01f;
-    SingleMatrix a = SingleMatrix::RandomUniform(429, 1024, c_deviceIdZero, low, high, IncrementCounter());
-    foreach_coord (i, j, a)
+    //Test on ElemType = double
+    // void SetValue(const ElemType v);
+    DoubleMatrix a(32, 12, c_deviceIdZero);
+    BOOST_CHECK_EQUAL(32, a.GetNumRows());
+    BOOST_CHECK_EQUAL(12, a.GetNumCols());
+    BOOST_CHECK_EQUAL(12 * 32, a.GetNumElements());
+    const double v = -32.3451f;
+    a.SetValue(v);
+    foreach_coord(i, j, a)
    {
-        BOOST_CHECK_GE(a(i, j), low);
-        BOOST_CHECK_LE(a(i, j), high);
+        BOOST_CHECK_EQUAL(v, a(i, j));
    }

-    // SingleMatrix b = SingleMatrix::RandomUniform(429, 1024, (float)-0.01, (float) 0.01, IncrementCounter());
-    // BOOST_CHECK(a.IsEqualTo(b));
+    // void SetValue(const Matrix<ElemType>& deepCopyFrom);
+    DoubleMatrix b(c_deviceIdZero);
+    b.SetValue(a);
+    foreach_coord(i, j, b)
+    {
+        BOOST_CHECK_EQUAL(v, b(i, j));
+    }
+
+    // void SetValue(const size_t numRows, const size_t numCols, ElemType *pArray, const bool srcIsColMajor);
+    std::array<double, 7> arrVector = { 123.0f, 0.23f, -22.0f, 63.0f, 43.42f, 324.3f, 99912.0f };
+
+    double *arr = arrVector.data();
+    b.SetValue(2, 3, b.GetDeviceId(), arr, matrixFlagNormal);
+
+    DoubleMatrix b1(c_deviceIdZero);
+    b1.SetValue(2, 3, b.GetDeviceId(), arr);
+    foreach_coord(i, j, b1)
+    {
+        BOOST_CHECK_EQUAL(arr[IDX2C(i, j, 2)], b(i, j));
+        BOOST_CHECK_EQUAL(arr[IDX2C(i, j, 2)], b1(i, j));
+    }
+
+    DoubleMatrix bbbb = DoubleMatrix::Zeros(6, 8, c_deviceIdZero);
+    bbbb.SetColumn(arr, 3);
+    for (int i = 0; i < 6; ++i)
+    {
+        BOOST_CHECK_EQUAL(arr[i], bbbb(i, 3));
+    }
+
+    // void SetDiagonalValue(const ElemType v);
+    DoubleMatrix c(4, 4, c_deviceIdZero);
+    const double val = -0.00332f;
+    c.SetDiagonalValue(val);
+    foreach_coord(i, j, c)
+    {
+        if (i == j)
+            BOOST_CHECK_EQUAL(val, c(i, j));
+        else
+            BOOST_CHECK_EQUAL(0, c(i, j));
+    }
+
+    // void SetDiagonalValue(const Matrix<ElemType>& vector);
+    DoubleMatrix d(4, 1, c_deviceIdZero);
+    const double val1 = 43.324f;
+    d.SetValue(val1);
+    c.SetDiagonalValue(d);
+    foreach_coord(i, j, c)
+    {
+        if (i == j)
+            BOOST_CHECK_EQUAL(val1, c(i, j));
+        else
+            BOOST_CHECK_EQUAL(0, c(i, j));
+    }
+
+    // void SetDiagonalValue(const ElemType v);//on non-squared matrix row < col
+    DoubleMatrix c_ns1(4, 6, c_deviceIdZero);
+    const double val_ns1 = -0.00332f;
+    c_ns1.SetValue(0.0f);
+    c_ns1.SetDiagonalValue(val_ns1);
+    foreach_coord(i, j, c_ns1)
+    {
+        if (i == j && i <= 4)
+            BOOST_CHECK_EQUAL(val_ns1, c_ns1(i, j));
+        else
+            BOOST_CHECK_EQUAL(0, c_ns1(i, j));
+    }
+
+    // void SetDiagonalValue(const Matrix<ElemType>& vector);
+    const double val1_ns1 = 43.324f;
+    c_ns1.SetValue(0.0f);
+    d.SetValue(val1_ns1);
+    c_ns1.SetDiagonalValue(d);
+    foreach_coord(i, j, c_ns1)
+    {
+        if (i == j && i <= 4)
+            BOOST_CHECK_EQUAL(val1, c_ns1(i, j));
+        else
+            BOOST_CHECK_EQUAL(0, c_ns1(i, j));
+    }
+
+    // void SetDiagonalValue(const ElemType v);//on non-squared matrix row > col
+    DoubleMatrix c_ns2(7, 4, c_deviceIdZero);
+    const double val_ns2 = -0.00332f;
+    c_ns2.SetValue(0.0f);
+    c_ns2.SetDiagonalValue(val_ns2);
+    foreach_coord(i, j, c_ns2)
+    {
+        if (i == j && i <= 4)
+            BOOST_CHECK_EQUAL(val_ns2, c_ns2(i, j));
+        else
+            BOOST_CHECK_EQUAL(0, c_ns2(i, j));
+    }
+
+    // void SetDiagonalValue(const Matrix<ElemType>& vector);
+    DoubleMatrix c_ns2_1(7, 4, c_deviceIdZero);
+    DoubleMatrix dd(4, 1, c_deviceIdZero);
+    const double val1_ns2 = 43.324f;
+    dd.SetValue(val1_ns2);
+    c_ns2_1.SetValue(0.0f);
+    c_ns2_1.SetDiagonalValue(dd);
+    foreach_coord(i, j, c_ns2)
+    {
+        if (i == j && i <= 4)
+            BOOST_CHECK_EQUAL(val1_ns2, c_ns2_1(i, j));
+        else
+            BOOST_CHECK_EQUAL(0, c_ns2_1(i, j));
+    }
+
+
+    DoubleMatrix c1(5, 5, c_deviceIdZero);
+    DoubleMatrix d1(1, 5, c_deviceIdZero);
+    double val2 = 0.53f;
+    c1.SetValue(0.0f);
+    d1 = d1.Transpose();
+    d1.SetValue(val2);
+    c1.SetDiagonalValue(d1);
+    foreach_coord(i, j, c1)
+    {
+        if (i == j)
+            BOOST_CHECK_EQUAL(val2, c1(i, j));
+        else
+            BOOST_CHECK_EQUAL(0, c1(i, j));
+    }
 }

 BOOST_FIXTURE_TEST_CASE(MatrixSetValueMethods, RandomSeedFixture)
 {
+    //Test on ElemType = float
    // void SetValue(const ElemType v);
    SingleMatrix a(32, 12, c_deviceIdZero);
    BOOST_CHECK_EQUAL(32, a.GetNumRows());
@ -290,9 +416,65 @@ BOOST_FIXTURE_TEST_CASE(MatrixSetValueMethods, RandomSeedFixture)
            BOOST_CHECK_EQUAL(0, c(i, j));
    }

+    // void SetDiagonalValue(const ElemType v);//on non-squared matrix row < col
+    SingleMatrix c_ns1(4, 6, c_deviceIdZero);
+    const float val_ns1 = -0.00332f;
+    c_ns1.SetValue(0.0f);
+    c_ns1.SetDiagonalValue(val_ns1);
+    foreach_coord(i, j, c_ns1)
+    {
+        if (i == j && i <= 4)
+            BOOST_CHECK_EQUAL(val_ns1, c_ns1(i, j));
+        else
+            BOOST_CHECK_EQUAL(0, c_ns1(i, j));
+    }
+
+    // void SetDiagonalValue(const Matrix<ElemType>& vector);
+    const float val1_ns1 = 43.324f;
+    c_ns1.SetValue(0.0f);
+    d.SetValue(val1_ns1);
+    c_ns1.SetDiagonalValue(d);
+    foreach_coord(i, j, c_ns1)
+    {
+        if (i == j && i <= 4)
+            BOOST_CHECK_EQUAL(val1, c_ns1(i, j));
+        else
+            BOOST_CHECK_EQUAL(0, c_ns1(i, j));
+    }
+
+    // void SetDiagonalValue(const ElemType v);//on non-squared matrix row > col
+    SingleMatrix c_ns2(7, 4, c_deviceIdZero);
+    const float val_ns2 = -0.00332f;
+    c_ns2.SetValue(0.0f);
+    c_ns2.SetDiagonalValue(val_ns2);
+    foreach_coord(i, j, c_ns2)
+    {
+        if (i == j && i <= 4)
+            BOOST_CHECK_EQUAL(val_ns2, c_ns2(i, j));
+        else
+            BOOST_CHECK_EQUAL(0, c_ns2(i, j));
+    }
+
+    // void SetDiagonalValue(const Matrix<ElemType>& vector);
+    SingleMatrix c_ns2_1(7, 4, c_deviceIdZero);
+    SingleMatrix dd(4, 1, c_deviceIdZero);
+    const float val1_ns2 = 43.324f;
+    dd.SetValue(val1_ns2);
+    c_ns2_1.SetValue(0.0f);
+    c_ns2_1.SetDiagonalValue(dd);
+    foreach_coord(i, j, c_ns2)
+    {
+        if (i == j && i <= 4)
+            BOOST_CHECK_EQUAL(val1_ns2, c_ns2_1(i, j));
+        else
+            BOOST_CHECK_EQUAL(0, c_ns2_1(i, j));
+    }
+
+
    SingleMatrix c1(5, 5, c_deviceIdZero);
    SingleMatrix d1(1, 5, c_deviceIdZero);
    float val2 = 0.53f;
+    c1.SetValue(0.0f);
    d1 = d1.Transpose();
    d1.SetValue(val2);
    c1.SetDiagonalValue(d1);
--- a/bindings/python/cntk/ops/init.py
+++ b/bindings/python/cntk/ops/init.py
@ -2183,6 +2183,36 @@ def zeros_like(x, name=''):
    return zeros_like(x, name)


+@typemap
+def eye_like(x, sparse_output = True, name=''):
+    '''
+    Creates a matrix with diagonal set to 1s and of the same shape and the same dynamic axes as ``x``. To be a matrix,
+     ``x`` must have exactly two axes (counting both dynamic and static axes).
+
+    Example:
+        >>> x0 = np.arange(12).reshape((3, 4)).astype('f')
+        >>> x = C.input_variable(4)
+        >>> C.eye_like(x).eval({x: x0}).toarray()
+        array([[ 1.,  0.,  0.,  0.],
+                [ 0.,  1.,  0.,  0.],
+                [ 0.,  0.,  1.,  0.]], dtype=float32)
+
+    Args:
+        x: numpy array or any :class:`~cntk.ops.functions.Function` that outputs a tensor of rank 2
+        name (str, optional): the name of the Function instance in the network
+    Returns:
+        :class:`~cntk.ops.functions.Function`
+    '''
+    from cntk.cntk_py import eye_like
+    x = sanitize_input(x)
+    if len(x.dynamic_axes) + len(x.shape) != 2:
+        raise(ValueError('eye_like operand must have exactly two axes (counting both dynamic and static axes) however "%s" is provided as the operand'%x))
+    if any([ax.is_sequence_axis for ax in x.dynamic_axes]):
+        raise (ValueError(
+            'eye_like operand must have no sequence axis however "%s" is provided as the operand' % x))
+    return eye_like(x, sparse_output, name)
+
+
@typemap
 def element_select(flag, value_if_true, value_if_false, name=''):
    '''
--- a/bindings/python/cntk/ops/tests/non_diff_test.py
+++ b/bindings/python/cntk/ops/tests/non_diff_test.py
@ -148,4 +148,113 @@ def test_ones_like(operand, device_id, precision):

    from .. import ones_like
    _test_unary_op(precision, device_id, ones_like, operand,
-                   expected_forward, expected_backward)
+                   expected_forward, expected_backward)
+
+
+Matrices = [
+    ([[2.1, 4.7], [2.1, 2.1]], True),
+    ([[2.1, 2., 2.], [4.7, 3, 5], [5.1, 2, 5]], True),
+    ([[2.1], [4.7], [5.1], [5.8]], True),
+    ([[2.1, 4.7], [2.1, 2.1]], False),
+    ([[2.1, 2., 2.], [4.7, 3, 5], [5.1, 2, 5]], False),
+    ([[2.1], [4.7], [5.1], [5.8]], False),
+]
+
+
+@pytest.mark.parametrize("operand, sparse_output", Matrices)
+def test_eye_like(operand, sparse_output, device_id, precision):
+    np_eye_like = lambda matrix: np.eye(matrix.shape[0], matrix.shape[1], dtype=np.float32)
+    operand = AA(operand).astype(np.float32)
+    expected = np_eye_like(operand)
+    expected_grad = np.zeros_like(operand).reshape(expected.shape)
+
+    my_eval = (lambda f, arg: f.eval(arg).todense()) if sparse_output else (lambda f, arg: f.eval(arg))
+
+    from .. import eye_like
+    import cntk as C
+
+    #testing with direct numpy input
+    y =  C.eye_like(operand, sparse_output=sparse_output)
+    actual = y.eval().todense() if sparse_output else y.eval()
+    np.testing.assert_almost_equal(actual, expected)
+
+    #testing through input_variable
+    #test load and save:
+    import tempfile
+    import os
+    x = C.input_variable(operand.shape[1:], dtype=np.float32, needs_gradient=True)
+    cntk_eye_like = C.eye_like(x, sparse_output=sparse_output)
+    actual = my_eval(cntk_eye_like, {x: operand})
+    grad = cntk_eye_like.grad({x: operand})
+    np.testing.assert_almost_equal(actual, expected)
+    np.testing.assert_almost_equal(grad, expected_grad)
+    tempdir = os.path.join(tempfile.gettempdir(), 'eye_like_test')
+    cntk_eye_like.save(tempdir)
+    cntk_eye_like2 = C.load_model(tempdir)
+    np.testing.assert_almost_equal(my_eval(cntk_eye_like2, {cntk_eye_like2.arguments[0]: operand}), expected)
+    os.remove(tempdir)
+
+    cntk_eye_like = C.eye_like(C.unpack_batch(x), sparse_output=sparse_output)
+    actual = my_eval(cntk_eye_like, {x: operand})
+    grad = cntk_eye_like.grad({x: operand})
+    np.testing.assert_almost_equal(actual, expected)
+    np.testing.assert_almost_equal(grad, expected_grad)
+    tempdir = os.path.join(tempfile.gettempdir(), 'eye_like_test2')
+    cntk_eye_like.save(tempdir)
+    cntk_eye_like2 = C.load_model(tempdir)
+    np.testing.assert_almost_equal(my_eval(cntk_eye_like2, {cntk_eye_like2.arguments[0]: operand}), expected)
+    os.remove(tempdir)
+
+    cntk_eye_like = C.eye_like(C.transpose(C.unpack_batch(x), (1,0)), sparse_output=sparse_output)
+    actual = my_eval(cntk_eye_like, {x: operand})
+    grad = cntk_eye_like.grad({x: operand})
+    np.testing.assert_almost_equal(actual, expected.transpose())
+    np.testing.assert_almost_equal(grad, expected_grad)
+    tempdir = os.path.join(tempfile.gettempdir(), 'eye_like_test3')
+    cntk_eye_like.save(tempdir)
+    cntk_eye_like2 = C.load_model(tempdir)
+    np.testing.assert_almost_equal(my_eval(cntk_eye_like2, {cntk_eye_like2.arguments[0]: operand}), expected.transpose())
+    os.remove(tempdir)
+
+    #test pass through gradients
+    #test direct input: no gradients pass through to inputs
+    data = operand
+    op = lambda x: eye_like(x, sparse_output=False) #sparse are not supported for some of the following basic operations
+    w = C.parameter(x.shape, init=np.ones(x.shape).astype(np.float32) * 3.0)
+    expected_x_backward = np.zeros_like(data)
+    expected_w_backward = np.zeros_like(w)
+    op_func = op(x)
+    grad = op_func.grad({x: data}, [x])
+    np.testing.assert_almost_equal(grad, expected_x_backward)
+
+    # test inputs through sub-expressions: no gradients pass through to inputs (e.g. x, w) of the subexpressoin (e.g. x * w here)
+    op_func = op(x * w)
+    grad = op_func.grad({x: data}, [w, x])
+    np.testing.assert_almost_equal(grad[x], expected_x_backward)
+    np.testing.assert_almost_equal(grad[w], expected_w_backward)
+
+    # testing inputs through shared sub-expressions: no gradients pass through reduce arg ops to inputs (e.g. x, w) of the subexpressoin
+    # (e.g. x * w here), therefore the gradients will depend on how the shared expressions participate in other experssions:
+    shared_exp = x * w
+    op_func = op(shared_exp) + x + w + shared_exp
+    ref_op_func = x + w + shared_exp
+    grad = op_func.grad({x: data}, [w, x])
+    ref_grad = ref_op_func.grad({x: data}, [w, x])
+    np.testing.assert_almost_equal(grad[x], ref_grad[x])
+    np.testing.assert_almost_equal(grad[w], ref_grad[w])
+
+    #test expecting exception with sequence axis
+    with pytest.raises(Exception) as info:
+        #no sequence axis is allowed
+        x = C.sequence.input_variable(operand.shape[1:], dtype=np.float32, needs_gradient=True)
+        cntk_eye_like = C.eye_like(x, sparse_output=sparse_output)
+
+    with pytest.raises(Exception) as info:
+        #no more than 2 axes is allowed (including any dynamic axes)
+        x = C.input_variable((3, 3), dtype=np.float32, needs_gradient=True)
+        cntk_eye_like = C.eye_like(x, sparse_output=sparse_output)
+
+    with pytest.raises(Exception) as info:
+        #no less than 2 axes is allowed (including any dynamic axes)
+        x = C.input_variable((), dtype=np.float32, needs_gradient=True)
+        cntk_eye_like = C.eye_like(x, sparse_output=sparse_output)