Improved formatting and comments

2016-07-22 15:51:15 +02:00 · 2016-07-22 15:51:15 +02:00 · fec05bffe8
--- a/Source/ComputationNetworkLib/ReshapingNodes.cpp
+++ b/Source/ComputationNetworkLib/ReshapingNodes.cpp
@ -91,6 +91,19 @@ template <class ElemType>
        auto input = Input(inputIndex)->ValueTensorFor(rank, fr);
        auto output = ValueTensorFor(rank, fr.AllowBroadcast());

+        // POTENETIAL PROBLEM:
+        // For ReduceMin/Max there are combinations of input values where the gradient is not defined because the function has an edge at these points.
+        // E.g. for ReduceMin this is the case when the minimum input value is attained by several inputs at the same time.
+        // In these cases there is no correct gradient.The question is if this could lead to any problems.
+        // Let's look at two scenarios where this might happen:
+        //
+        // * Scenario 1: The input comes from a layer of nodes like e.g. ReLU and some of them might operate in the regime where they clip to a constant value.
+        //   In this case it's not a problem of the input gradient is kind of bad as the derivative of the concerning input nodes will be zero.
+        //
+        // * Scenario 2: The input data is directly coming from training data. Here bad gradients don't matter as would wouldn't wan't to propagate gradients to the training data.
+        //
+        // So as we don't have a better solution yet and it probably doesn't have impact lets stay with the current solution.
+        // Also note that for Clip , Min, Max and ReLU we have the same kind of problem.
        sliceInputGrad.AddCopyIfEqualOf(input, output, sliceOutputGrad);
        break;

@ -141,7 +154,7 @@ void ReduceElementsNode<ElemType>::ValidateOp()
    else if (m_operation == L"Min") m_reductionOp = ElementWiseOperator::opMin;

    // more here
-    else InvalidArgument("%ls was given an invalid operation code '%ls'. Allowed are: 'Sum'. And a few more soon.", NodeDescription().c_str(), m_operation.c_str());
+    else InvalidArgument("%ls was given an invalid operation code '%ls'. Allowed are: 'Sum', 'Max', 'Min'.", NodeDescription().c_str(), m_operation.c_str());
 }

 template <class ElemType>
--- a/Source/Math/GPUTensor.cu
+++ b/Source/Math/GPUTensor.cu
@ -267,8 +267,9 @@ struct TensorOps
 template <class ElemType> class BinaryOpConstants
 {
 public:
-    __device__ static ElemType NeutralValue(ElementWiseOperator op) {
-        return 0; //error, only the explicit instantiations below should be used.
+    __device__ static ElemType NeutralValue(ElementWiseOperator op)
+    {
+        return 0; // error, only the explicit instantiations below should be used.
    }
 };

@ -278,14 +279,10 @@ public:
    __device__ static float NeutralValue(ElementWiseOperator op) {
        switch (op)
        {
-        case ElementWiseOperator::opMax:
-            return FLT_MIN;
-        case ElementWiseOperator::opMin:
-            return FLT_MAX;
-        case ElementWiseOperator::opSum:
-            return 0;
-        default:
-            return 0; // error
+        case ElementWiseOperator::opMax: return FLT_MIN;
+        case ElementWiseOperator::opMin: return FLT_MAX;
+        case ElementWiseOperator::opSum: return 0;
+        default:                         return 0; // error
        }
    }
 };
@ -296,14 +293,10 @@ public:
    __device__ static double NeutralValue(ElementWiseOperator op) {
        switch (op)
        {
-        case ElementWiseOperator::opMax:
-            return DBL_MIN;
-        case ElementWiseOperator::opMin:
-            return DBL_MAX;
-        case ElementWiseOperator::opSum:
-            return 0;
-        default:
-            return 0; // error
+        case ElementWiseOperator::opMax: return DBL_MIN;
+        case ElementWiseOperator::opMin: return DBL_MAX;
+        case ElementWiseOperator::opSum: return 0;
+        default:                         return 0; // error
        }
    }
 };
@ -534,9 +527,8 @@ struct TensorOpElement<ElemType, N, M, K, /*parallelReduce=*/true, /*k=*/-1>
        for (CUDA_LONG i = 256; i; i >>= 1)
        {
            if (tid < i && tid + i < tids)
-            {
                AggregationOp<volatile ReduceElemType, volatile ReduceElemType>::Update(accumulators[tid], accumulators[tid + i], reductionOp);
-            }
+
            if (0 + i < tids)
                __syncthreads(); // sync if condition true for at least one thread
            // TODO: use volatile* and then we can skip the __syncthreads() for the last 32 values. See Amit's allreduce() function implementation in MatrixQuantizer_kernel.cu.
@ -765,7 +757,7 @@ static void LaunchTensorOpWithReduction(ElemType beta, array<ElemType*, N> point
            _launchTensorOpWithReduction<ElemType, N, M, K><<<dim3(numBlocksX, numBlocksY, numBlocksZ), numThreadsX, numThreadsX * sizeof(ReduceElemType), t_stream>>>(
                beta, pointers, alpha, op, reductionOp,
                regularOpStrides, regularStrides, NN,
-                reducingOpDims, reducingStrides,/*reductionBegin*/ 0, reductionChunkSize);
+                reducingOpDims, reducingStrides, /*reductionBegin*/ 0, reductionChunkSize);
        }
        // --- case (b)
        // Reduction across blocks. This is the difficult one.