bug fix: vsnprintf() used in ThrowFormatted() does not

2016-03-25 17:40:07 -07:00 · 2016-03-25 17:40:07 -07:00 · 265998566e
--- a/Source/Common/Include/Basics.h
+++ b/Source/Common/Include/Basics.h
@ -61,13 +61,16 @@ template <class E>
 __declspec_noreturn static inline void ThrowFormatted(const char* format, ...)
 {
    va_list args;
-    const size_t bufferSize = 1024;
-    char buffer[bufferSize];
-
    va_start(args, format);
-    int written = vsnprintf(buffer, bufferSize, format, args);
-    if (written >= bufferSize - 1)
-        sprintf(buffer + bufferSize - 6, "[...]");
+
+    char buffer[1024] = { 0 }; // initialize in case vsnprintf() does a half-assed job such as a failing character conversion
+    int written = vsnprintf(buffer, _countof(buffer) - 1, format, args); // -1 because vsnprintf() does not always write a 0-terminator, although the MSDN documentation states so
+    // TODO: In case of EILSEQ error, choose between just outputting the raw format itself vs. continuing the half-completed buffer
+    //if (written < 0) // an invalid wide-string conversion may lead to EILSEQ
+    //    strncpy(buffer, format, _countof(buffer)
+    UNUSED(written); // vsnprintf() returns -1 in case of overflow, instead of the #characters written as claimed in the MSDN documentation states so
+    if (strlen(buffer)/*written*/ >= (int)_countof(buffer) - 2)
+        sprintf(buffer + _countof(buffer) - 4, "...");
 #ifdef _DEBUG // print this to log, so we can see what the error is before throwing
    fprintf(stderr, "\nAbout to throw exception '%s'\n", buffer);
 #endif
--- a/Source/ComputationNetworkLib/ComputationNetwork.h
+++ b/Source/ComputationNetworkLib/ComputationNetwork.h
@ -357,7 +357,7 @@ public:
    {
        auto iter = m_nameToNodeMap.find(name);
        if (iter == m_nameToNodeMap.end())
-            RuntimeError("GetNodeFromName: Node name %ls does not exist.", name.c_str());
+            RuntimeError("GetNodeFromName: Network has no node named '%ls'.", name.c_str());
        return iter->second;
    }

@ -720,7 +720,7 @@ public:
            }
            else // node name is not found, dump all nodes
            {
-                fprintf(stderr, "Warning: node name %ls does not exist in the network. dumping all nodes.\n",
+                fprintf(stderr, "Warning: node name '%ls' does not exist in the network. dumping all nodes instead.\n",
                        nodeName.c_str());
                DumpAllNodesToFile(printValues, printMetadata, outputFile);
            }
--- a/Source/ComputationNetworkLib/ComputationNode.cpp
+++ b/Source/ComputationNetworkLib/ComputationNode.cpp
@ -270,13 +270,13 @@ TensorShape ComputationNodeBase::GetTensorSliceFor(size_t rank, const FrameRange
    return tensorShape;
 }

-// same but 'fr' refers to a single column, and result will not have seq/time axes
+// same as GetTensorSliceFor() except that 'fr' refers to a single column, and result will not have seq/time axes
 // This is needed by TimesNode when the left argument has to be broken up into individual matrices/GEMM calls.
+// To enable its first argument to have an MBLayout, it needs to un-pad if we have an MBLayout but only refer to a single sequence and time step.
 TensorShape ComputationNodeBase::GetOneSampleTensorSliceFor(size_t rank, const FrameRange& fr) const
 {
    TensorShape result = GetTensorSliceFor(rank, fr);
-    // To enable A to have an MBLayout, we need to un-pad if we have an MBLayout but only refer to a single sequence and time step.
-    // Undo the adding of (seq, time) axes that was done by GetTensorShape()
+    // undo the adding of (seq, time) axes that was done by GetTensorShape()
    if (!fr.IsOneColumnWrt(GetMBLayout()))
        LogicError("GetOneSampleTensorSliceFor: Requires 'fr' to refer to a single sample.");
    if (HasMBLayout())
--- a/Source/ComputationNetworkLib/LinearAlgebraNodes.h
+++ b/Source/ComputationNetworkLib/LinearAlgebraNodes.h
@ -224,7 +224,7 @@ template class ElementTimesNode<double>;
 // shared code of TimesNode and TransposeTimesNode (which transposes A)
 // The common case, W * v with weights W and minibatch data v is efficiently
 // implemented as a per-minibatch BLAS GEMM call.
-// If the A is minibatch data, then this operation is currently not efficient.
+// If A is minibatch data, then this operation is currently not efficient.
 // TODO: Implement this with TensorView::DoElementwiseProductOf() and stride magic
 // TODO: Transpose flags for all matrices, inputs and outputs?
 // -----------------------------------------------------------------------
@ -256,8 +256,8 @@ public:
    }

 private:
-    // the left argument can only be applied sample by sample, and must be returned as a matrix object
-    // (as a consequence, it then also applies to the node itself)
+    // if the left argument of the matrix product (A) has a time axis, it can only be applied sample by sample
+    // where each sample is treated as a separate matrix object (as a consequence, it then also applies to B and the result as well)
    TensorView<ElemType> OneSampleTensorFor(int inputIndex, bool gradient/*instead of value*/, const FrameRange& fr)
    {
        auto input = inputIndex < 0 ? this : Input(inputIndex).get();
@ -272,7 +272,7 @@ private:
 public:
    virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
    {
-        // if A is minibatch data, then this must be performed frame-by-frame, sequence-by-sequence, one GEMM call each.
+        // If argument A is minibatch data, then this must be performed frame-by-frame, sequence-by-sequence, one GEMM call each.
        // This will be inefficient. We hope this will be the baseline of a future, more efficient TensorView-based implementation.
        if (!fr.IsOneColumnWrt(Input(0)->GetMBLayout()))
        {
@ -290,7 +290,6 @@ public:
        // Transposition is applied after flattening into 2D, but only allowed if the input sample is 2D anyway.
        auto input0 =       OneSampleTensorFor(0,  /*gradient=*/false,                fr.AllowBroadcast());
        auto input1 =       OneSampleTensorFor(1,  /*gradient=*/false,                fr.AllowBroadcast());
-      //auto input1 = Input(1)->ValueTensorFor(Input(1)->GetSampleLayout().GetRank(), fr.AllowBroadcast());
        auto output =       OneSampleTensorFor(-1, /*gradient=*/false,                fr);
        output.AssignMatrixProductOf(false/*transC*/, input0, m_transpose/*transA*/, input1, false/*transB*/);
    }
@ -321,18 +320,14 @@ public:
            if (Input(1)->Value().GetMatrixType() == SPARSE && Input(0)->Gradient().GetMatrixType() == DENSE && Gradient().GetMatrixType() == DENSE)
                Input(0)->Gradient().SwitchToMatrixType(SPARSE, MatrixFormat::matrixFormatSparseBlockCol, false);
            auto input0Gradient =       OneSampleTensorFor(0, /*gradient=*/true,                  fr.AllowBroadcast());
-          //auto input1         = Input(1)->ValueTensorFor(Input(1)->GetSampleLayout().GetRank(), fr.AllowBroadcast());
            auto input1         =       OneSampleTensorFor(1,  /*gradient=*/false,                fr.AllowBroadcast());
-          //auto outputGradient =        GradientTensorFor(          GetSampleLayout().GetRank(), fr);
            auto outputGradient =       OneSampleTensorFor(-1, /*gradient=*/true,                 fr);
            input0Gradient.AddMatrixProductOf(m_transpose/*transC*/, outputGradient, false/*transA*/, input1, true/*transB*/);
        }
        else if (inputIndex == 1) // right derivative
        {
            auto input0         =          OneSampleTensorFor(0, /*gradient=*/false,                 fr.AllowBroadcast());
-          //auto input1Gradient = Input(1)->GradientTensorFor(Input(1)->GetSampleLayout().GetRank(), fr.AllowBroadcast());
            auto input1Gradient =          OneSampleTensorFor(1, /*gradient=*/true,                  fr.AllowBroadcast());
-          //auto outputGradient =           GradientTensorFor(          GetSampleLayout().GetRank(), fr);
            auto outputGradient =          OneSampleTensorFor(-1, /*gradient=*/true,                 fr);
            input1Gradient.AddMatrixProductOf(false/*transC*/, input0, !m_transpose/*transA*/, outputGradient, false/*transB*/);
        }