made everything build again after deleting the unused MultiNetworks functions, incl. deleting related entries in SimpleNetworkBuilder;

deleted PairNetworkNode; renamed EsotericActions.cpp to SpecialPurposeActions.cpp
2016-01-22 09:13:52 -08:00 · 2016-01-22 09:13:52 -08:00 · a4a20183aa
--- a/Source/ActionsLib/Actions.h
+++ b/Source/ActionsLib/Actions.h
@ -43,16 +43,6 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config);
 template <typename ElemType>
 void DoTopologyPlot(const ConfigParameters& config);
-// deprecated (EsotericActions.cp)
+// special purpose (EsotericActions.cp)
 template <typename ElemType>
 void DoConvertFromDbn(const ConfigParameters& config);
 template <typename ElemType>
 void DoEvalUnroll(const ConfigParameters& config);
 template <typename ElemType>
 void DoEncoderDecoder(const ConfigParameters& config);
 template <typename ElemType>
 void DoBidirectionEncoderDecoder(const ConfigParameters& config);
 template <typename ElemType>
 void DoEvalEncodingBeamSearchDecoding(const ConfigParameters& config);
 template <typename ElemType>
 void DoBeamSearchDecoding(const ConfigParameters& config);
--- a/Source/ActionsLib/ActionsLib.vcxproj
+++ b/Source/ActionsLib/ActionsLib.vcxproj
@ -170,7 +170,7 @@
      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
    </ClCompile>
    <ClCompile Include="..\Common\TimerUtility.cpp" />
-    <ClCompile Include="EsotericActions.cpp" />
+    <ClCompile Include="SpecialPurposeActions.cpp" />
    <ClCompile Include="EvalActions.cpp" />
    <ClCompile Include="OtherActions.cpp" />
    <ClCompile Include="TrainActions.cpp" />
--- a/Source/ActionsLib/ActionsLib.vcxproj.filters
+++ b/Source/ActionsLib/ActionsLib.vcxproj.filters
@ -19,7 +19,7 @@
    <ClCompile Include="OtherActions.cpp">
      <Filter>Actions</Filter>
    </ClCompile>
-    <ClCompile Include="EsotericActions.cpp">
+    <ClCompile Include="SpecialPurposeActions.cpp">
      <Filter>Actions</Filter>
    </ClCompile>
  </ItemGroup>
--- a/Source/ActionsLib/EsotericActions.cpp
+++ b/Source/ActionsLib/EsotericActions.cpp
@ -1,424 +0,0 @@
 //
 // Copyright (c) Microsoft. All rights reserved.
 // Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
 //
 // EsotericActions.cpp -- CNTK actions that are deprecated
 //
 #define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
 #include "stdafx.h"
 #include "Basics.h"
 #include "Actions.h"
 #include "ComputationNetwork.h"
 #include "ComputationNode.h"
 #include "DataReader.h"
 #include "DataWriter.h"
 #include "SimpleNetworkBuilder.h"
 #include "NDLNetworkBuilder.h"
 #include "SynchronousExecutionEngine.h"
 #include "ModelEditLanguage.h"
 #include "SGD.h"
 #include "Config.h"
 #include "MultiNetworksSGD.h"
 #include "SimpleEvaluator.h"
 #include "SimpleOutputWriter.h"
 #include "MultiNetworksEvaluator.h"
 #include "BestGpu.h"
 #include "ScriptableObjects.h"
 #include "BrainScriptEvaluator.h"
 #include "BrainScriptParser.h"
 #include <string>
 #include <chrono>
 #include <algorithm>
 #include <vector>
 #include <iostream>
 #include <queue>
 #include <set>
 #include <memory>
 #ifndef let
 #define let const auto
 #endif
 using namespace std;
 using namespace Microsoft::MSR;
 using namespace Microsoft::MSR::CNTK;
 // ===========================================================================
 // DoConvertFromDbn() - implements CNTK "convertdbn" command
 // ===========================================================================
 template <typename ElemType>
 void DoConvertFromDbn(const ConfigParameters& config)
 {
    wstring modelPath = config(L"modelPath");
    wstring dbnModelPath = config(L"dbnModelPath");
    auto netBuilder = make_shared<SimpleNetworkBuilder<ElemType>>(config);
    ComputationNetworkPtr net = netBuilder->BuildNetworkFromDbnFile(dbnModelPath);
    net->Save(modelPath);
 }
 template void DoConvertFromDbn<float>(const ConfigParameters& config);
 template void DoConvertFromDbn<double>(const ConfigParameters& config);
 // ===========================================================================
 // DoEvalUnroll() - implements CNTK "testunroll" command
 // ===========================================================================
 // Special early implementation of RNNs by emulating them as a DNN.
 // The code is very restricted to simple RNNs.
 // The idea can be used for more complicated network but need to know which nodes are stateful or time-dependent so that unroll is done in a correct way to represent recurrent networks.
 // TODO: can probably be removed.
 template <typename ElemType>
 void DoEvalUnroll(const ConfigParameters& config)
 {
    //test
    ConfigParameters readerConfig(config(L"reader"));
    readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
    DataReader<ElemType> testDataReader(readerConfig);
    DEVICEID_TYPE deviceId = DeviceFromConfig(config);
    ConfigArray minibatchSize = config(L"minibatchSize", "40960");
    size_t epochSize = config(L"epochSize", "0");
    if (epochSize == 0)
    {
        epochSize = requestDataSize;
    }
    wstring modelPath = config(L"modelPath");
    intargvector mbSize = minibatchSize;
    wstring path2EvalResults = config(L"path2EvalResults", L"");
    auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);
    MultiNetworksEvaluator<ElemType> eval(net);
    double evalEntropy;
    eval.EvaluateUnroll(&testDataReader, mbSize[0], evalEntropy, path2EvalResults == L"" ? nullptr : path2EvalResults.c_str(), epochSize);
 }
 template void DoEvalUnroll<float>(const ConfigParameters& config);
 template void DoEvalUnroll<double>(const ConfigParameters& config);
 // ===========================================================================
 // DoEncoderDecoder() - implements CNTK "trainEncoderDecoder" command
 // ===========================================================================
 /**
 This implements sequence to sequence translation paper in
 http://arxiv.org/pdf/1409.3215.pdf
 */
 template <typename ElemType>
 void DoEncoderDecoder(const ConfigParameters& config)
 {
    vector<IComputationNetBuilder<ElemType>*> netBuilders;
    vector<IDataReader<ElemType>*> trainDataReader;
    vector<IDataReader<ElemType>*> validationDataReader;
    ConfigParameters configSGD = config(L"SGD");
    bool makeMode = config(L"makeMode", "true");
    IComputationNetBuilder<ElemType>* encoderNetBuilder = NULL;
    IComputationNetBuilder<ElemType>* decoderNetBuilder = NULL;
    ConfigParameters readerConfig = config(L"encoderReader");
    readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
    DataReader<ElemType>* encoderDataReader = new DataReader<ElemType>(readerConfig);
    ConfigParameters decoderReaderConfig = config(L"decoderReader");
    DataReader<ElemType>* decoderDataReader = new DataReader<ElemType>(decoderReaderConfig);
    ConfigParameters cvEncoderReaderConfig = config(L"encoderCVReader");
    DataReader<ElemType>* cvEncoderDataReader = new DataReader<ElemType>(cvEncoderReaderConfig);
    ConfigParameters cvDecoderReaderConfig = config(L"decoderCVReader");
    DataReader<ElemType>* cvDecoderDataReader = new DataReader<ElemType>(cvDecoderReaderConfig);
    if (config.Exists("EncoderNetworkBuilder"))
    {
        ConfigParameters configSNB = config(L"EncoderNetworkBuilder");
        encoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
    }
    else
    {
        LogicError("Need encoder network");
    }
    if (config.Exists("DecoderNetworkBuilder"))
    {
        ConfigParameters configSNB = config(L"DecoderNetworkBuilder");
        decoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
    }
    else
    {
        LogicError("Need decoder networks");
    }
    MultiNetworksSGD<ElemType> sgd(configSGD);
    sgd.InitTrainEncoderDecoderWithHiddenStates(configSGD);
    netBuilders.push_back(encoderNetBuilder);
    netBuilders.push_back(decoderNetBuilder);
    trainDataReader.push_back(encoderDataReader);
    trainDataReader.push_back(decoderDataReader);
    validationDataReader.push_back(cvEncoderDataReader);
    validationDataReader.push_back(cvDecoderDataReader);
    sgd.EncoderDecoder(netBuilders, (int) config(L"deviceId"), trainDataReader, validationDataReader, makeMode);
    delete encoderDataReader;
    delete decoderDataReader;
    delete cvEncoderDataReader;
    delete cvDecoderDataReader;
 }
 template void DoEncoderDecoder<float>(const ConfigParameters& config);
 template void DoEncoderDecoder<double>(const ConfigParameters& config);
 // ===========================================================================
 // DoBidirectionEncoderDecoder() - implements CNTK "trainBidirectionEncoderDecoder" command
 // ===========================================================================
 /**
 DoBidirecionEncoderDecoder
 */
 template <typename ElemType>
 void DoBidirectionEncoderDecoder(const ConfigParameters& config)
 {
    ConfigParameters configSGD = config(L"SGD");
    bool makeMode = config(L"makeMode", "true");
    IComputationNetBuilder<ElemType>* encoderNetBuilder = NULL;
    IComputationNetBuilder<ElemType>* forwardDecoderNetBuilder = NULL;
    IComputationNetBuilder<ElemType>* backwardDecoderNetBuilder = NULL;
    vector<IComputationNetBuilder<ElemType>*> netBuilders;
    vector<IDataReader<ElemType>*> trainDataReader;
    vector<IDataReader<ElemType>*> validationDataReader;
    ConfigParameters readerConfig = config(L"encoderReader");
    readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
    DataReader<ElemType>* encoderDataReader = new DataReader<ElemType>(readerConfig);
    ConfigParameters decoderReaderConfig = config(L"decoderReader");
    DataReader<ElemType>* decoderDataReader = new DataReader<ElemType>(decoderReaderConfig);
    ConfigParameters backwardDecoderReaderConfig = config(L"backwardDecoderReader");
    DataReader<ElemType>* backwardDecoderDataReader = new DataReader<ElemType>(backwardDecoderReaderConfig);
    ConfigParameters cvEncoderReaderConfig = config(L"encoderCVReader");
    DataReader<ElemType>* cvEncoderDataReader = new DataReader<ElemType>(cvEncoderReaderConfig);
    ConfigParameters cvDecoderReaderConfig = config(L"decoderCVReader");
    DataReader<ElemType>* cvDecoderDataReader = new DataReader<ElemType>(cvDecoderReaderConfig);
    ConfigParameters cvBackwardDecoderReaderConfig = config(L"BackwardDecoderCVReader");
    DataReader<ElemType>* cvBackwardDecoderDataReader = new DataReader<ElemType>(cvBackwardDecoderReaderConfig);
    if (config.Exists("EncoderNetworkBuilder"))
    {
        ConfigParameters configSNB = config(L"EncoderNetworkBuilder");
        encoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
    }
    else
        LogicError("Need encoder network");
    if (config.Exists("DecoderNetworkBuilder"))
    {
        ConfigParameters configSNB = config(L"DecoderNetworkBuilder");
        forwardDecoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
    }
    else
    {
        LogicError("Need decoder networks");
    }
    if (config.Exists("BackwardDecoderNetworkBuilder"))
    {
        ConfigParameters configSNB = config(L"BackwardDecoderNetworkBuilder");
        backwardDecoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
    }
    else
    {
        LogicError("Need decoder networks");
    }
    MultiNetworksSGD<ElemType> sgd(configSGD);
    sgd.InitTrainEncoderDecoderWithHiddenStates(configSGD);
    netBuilders.push_back(encoderNetBuilder);
    netBuilders.push_back(forwardDecoderNetBuilder);
    netBuilders.push_back(backwardDecoderNetBuilder);
    trainDataReader.push_back(encoderDataReader);
    trainDataReader.push_back(decoderDataReader);
    trainDataReader.push_back(backwardDecoderDataReader);
    validationDataReader.push_back(cvEncoderDataReader);
    validationDataReader.push_back(cvDecoderDataReader);
    validationDataReader.push_back(cvBackwardDecoderDataReader);
    sgd.EncoderDecoder(netBuilders, (int) config(L"deviceId"), trainDataReader, validationDataReader, makeMode);
    delete encoderDataReader;
    delete decoderDataReader;
    delete cvEncoderDataReader;
    delete cvDecoderDataReader;
    delete backwardDecoderDataReader;
    delete cvBackwardDecoderDataReader;
 }
 template void DoBidirectionEncoderDecoder<float>(const ConfigParameters& config);
 template void DoBidirectionEncoderDecoder<double>(const ConfigParameters& config);
 // ===========================================================================
 // DoEvalEncodingBeamSearchDecoding() - implements CNTK "testEncoderDecoder" command
 // ===========================================================================
 /**
 Originally, this is for testing models trained using the sequence to sequence translation method below
 http://arxiv.org/pdf/1409.3215.pdf
 Later on, it is extended to be more general to include a sequence of network operations. 
 */
 template <typename ElemType>
 void DoEvalEncodingBeamSearchDecoding(const ConfigParameters& config)
 {
    DEVICEID_TYPE deviceId = DeviceFromConfig(config);
    vector<IDataReader<ElemType>*> readers;
    ConfigParameters readerConfig = config(L"encoderReader");
    readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
    DataReader<ElemType> encoderReader(readerConfig);
    ConfigParameters decoderReaderConfig = config(L"decoderReader");
    decoderReaderConfig.Insert("traceLevel", config(L"traceLevel", "0"));
    DataReader<ElemType> decoderReader(decoderReaderConfig);
    readers.push_back(&encoderReader);
    readers.push_back(&decoderReader);
    ConfigArray minibatchSize = config(L"minibatchSize", "40960");
    size_t epochSize = config(L"epochSize", "0");
    if (epochSize == 0)
    {
        epochSize = requestDataSize;
    }
    wstring encoderModelPath = config(L"encoderModelPath");
    wstring decoderModelPath = config(L"decoderModelPath");
    intargvector mbSize = minibatchSize;
    int traceLevel = config(L"traceLevel", "0");
    size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
    vector<ComputationNetworkPtr> nets;
    auto encoderNet = ComputationNetwork::CreateFromFile<ElemType>(deviceId, encoderModelPath, FileOptions::fileOptionsBinary, true);
    auto decoderNet = ComputationNetwork::CreateFromFile<ElemType>(deviceId, decoderModelPath, FileOptions::fileOptionsBinary, false, encoderNet.get());
    nets.push_back(encoderNet);
    nets.push_back(decoderNet);
    ConfigArray evalNodeNames = config(L"evalNodeNames");
    vector<wstring> evalNodeNamesVector;
    for (int i = 0; i < evalNodeNames.size(); ++i)
    {
        evalNodeNamesVector.push_back(evalNodeNames[i]);
    }
    ConfigArray outputNodeNames = config(L"outputNodeNames");
    vector<wstring> outputNodeNamesVector;
    for (int i = 0; i < outputNodeNames.size(); ++i)
    {
        outputNodeNamesVector.push_back(outputNodeNames[i]);
    }
    ElemType beamWidth = config(L"beamWidth", "1");
    ConfigParameters writerConfig = config(L"writer");
    DataWriter<ElemType> testDataWriter(writerConfig);
    MultiNetworksEvaluator<ElemType> eval(decoderNet, numMBsToShowResult, traceLevel);
    eval.InitTrainEncoderDecoderWithHiddenStates(config);
    eval.EncodingEvaluateDecodingBeamSearch(nets, readers,
                                            testDataWriter, evalNodeNamesVector,
                                            outputNodeNamesVector,
                                            mbSize[0], beamWidth, epochSize);
 }
 template void DoEvalEncodingBeamSearchDecoding<float>(const ConfigParameters& config);
 template void DoEvalEncodingBeamSearchDecoding<double>(const ConfigParameters& config);
 // ===========================================================================
 // DoBeamSearchDecoding() - implements CNTK "beamSearch" command
 // ===========================================================================
 template <typename ElemType>
 static void DoEvalBeamSearch(const ConfigParameters& config, IDataReader<ElemType>& reader)
 {
    DEVICEID_TYPE deviceId = DeviceFromConfig(config);
    ConfigArray minibatchSize = config(L"minibatchSize", "40960");
    size_t epochSize = config(L"epochSize", "0");
    if (epochSize == 0)
    {
        epochSize = requestDataSize;
    }
    wstring modelPath = config(L"modelPath");
    intargvector mbSize = minibatchSize;
    int traceLevel = config(L"traceLevel", "0");
    size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
    auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);
    ConfigArray evalNodeNames = config(L"evalNodeNames");
    vector<wstring> evalNodeNamesVector;
    for (int i = 0; i < evalNodeNames.size(); ++i)
    {
        evalNodeNamesVector.push_back(evalNodeNames[i]);
    }
    ConfigArray outputNodeNames = config(L"outputNodeNames");
    vector<wstring> outputNodeNamesVector;
    for (int i = 0; i < outputNodeNames.size(); ++i)
    {
        outputNodeNamesVector.push_back(outputNodeNames[i]);
    }
    ElemType beamWidth = config(L"beamWidth", "1");
    ConfigParameters writerConfig = config(L"writer");
    DataWriter<ElemType> testDataWriter(writerConfig);
    MultiNetworksEvaluator<ElemType> eval(net, numMBsToShowResult, traceLevel);
    eval.BeamSearch(&reader, testDataWriter, evalNodeNamesVector, outputNodeNamesVector, mbSize[0], beamWidth, epochSize);
 }
 /**
 This is beam search decoder.
 Developed by Kaisheng Yao.
 It is used in the following work:
 K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion" in Interspeech 2015
 */
 template <typename ElemType>
 void DoBeamSearchDecoding(const ConfigParameters& config)
 {
    //test
    ConfigParameters readerConfig = config(L"reader");
    readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
    DataReader<ElemType> testDataReader(readerConfig);
    DoEvalBeamSearch(config, testDataReader);
 }
 template void DoBeamSearchDecoding<float>(const ConfigParameters& config);
 template void DoBeamSearchDecoding<double>(const ConfigParameters& config);
--- a/Source/ActionsLib/EvalActions.cpp
+++ b/Source/ActionsLib/EvalActions.cpp
@ -14,20 +14,12 @@
 #include "ComputationNode.h"
 #include "DataReader.h"
 #include "DataWriter.h"
 #include "SimpleNetworkBuilder.h"
 #include "NDLNetworkBuilder.h"
 #include "SynchronousExecutionEngine.h"
 #include "ModelEditLanguage.h"
 #include "SGD.h"
 #include "Config.h"
 #include "MultiNetworksSGD.h"
 #include "SimpleEvaluator.h"
 #include "SimpleOutputWriter.h"
 #include "MultiNetworksEvaluator.h"
 #include "BestGpu.h"
 #include "ScriptableObjects.h"
 #include "BrainScriptEvaluator.h"
 #include "BrainScriptParser.h"
 #include <string>
 #include <chrono>
--- a/Source/ActionsLib/OtherActions.cpp
+++ b/Source/ActionsLib/OtherActions.cpp
@ -12,22 +12,9 @@
 #include "Actions.h"
 #include "ComputationNetwork.h"
 #include "ComputationNode.h"
 #include "DataReader.h"
 #include "DataWriter.h"
 #include "SimpleNetworkBuilder.h"
 #include "NDLNetworkBuilder.h"
 #include "SynchronousExecutionEngine.h"
 #include "ModelEditLanguage.h"
 #include "SGD.h"
 #include "Config.h"
 #include "MultiNetworksSGD.h"
 #include "SimpleEvaluator.h"
 #include "SimpleOutputWriter.h"
 #include "MultiNetworksEvaluator.h"
 #include "BestGpu.h"
 #include "ScriptableObjects.h"
 #include "BrainScriptEvaluator.h"
 #include "BrainScriptParser.h"
 #include <string>
 #include <chrono>
--- a/Source/ActionsLib/SpecialPurposeActions.cpp
+++ b/Source/ActionsLib/SpecialPurposeActions.cpp
@ -0,0 +1,54 @@
 //
 // Copyright (c) Microsoft. All rights reserved.
 // Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
 //
 // EsotericActions.cpp -- CNTK actions that are deprecated
 //
 #define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
 #include "stdafx.h"
 #include "Basics.h"
 #include "Actions.h"
 #include "ComputationNetwork.h"
 #include "ComputationNode.h"
 #include "DataReader.h"
 #include "DataWriter.h"
 #include "SimpleNetworkBuilder.h"
 #include "Config.h"
 #include "ScriptableObjects.h"
 #include <string>
 #include <chrono>
 #include <algorithm>
 #include <vector>
 #include <iostream>
 #include <queue>
 #include <set>
 #include <memory>
 #ifndef let
 #define let const auto
 #endif
 using namespace std;
 using namespace Microsoft::MSR;
 using namespace Microsoft::MSR::CNTK;
 // ===========================================================================
 // DoConvertFromDbn() - implements CNTK "convertdbn" command
 // ===========================================================================
 template <typename ElemType>
 void DoConvertFromDbn(const ConfigParameters& config)
 {
    wstring modelPath = config(L"modelPath");
    wstring dbnModelPath = config(L"dbnModelPath");
    auto netBuilder = make_shared<SimpleNetworkBuilder<ElemType>>(config);
    ComputationNetworkPtr net = netBuilder->BuildNetworkFromDbnFile(dbnModelPath);
    net->Save(modelPath);
 }
 template void DoConvertFromDbn<float>(const ConfigParameters& config);
 template void DoConvertFromDbn<double>(const ConfigParameters& config);
--- a/Source/ActionsLib/TrainActions.cpp
+++ b/Source/ActionsLib/TrainActions.cpp
@ -20,10 +20,8 @@
 #include "ModelEditLanguage.h"
 #include "SGD.h"
 #include "Config.h"
 #include "MultiNetworksSGD.h"
 #include "SimpleEvaluator.h"
 #include "SimpleOutputWriter.h"
 #include "MultiNetworksEvaluator.h"
 #include "BestGpu.h"
 #include "ScriptableObjects.h"
 #include "BrainScriptEvaluator.h"
@ -50,76 +48,15 @@ using namespace Microsoft::MSR::CNTK;
 // DoTrain() - implements CNTK "train" command
 // ===========================================================================
 template <class ElemType>
 class BrainScriptNetworkBuilder : public IComputationNetBuilder<ElemType>
 {
    typedef shared_ptr<ComputationNetwork> ComputationNetworkPtr;
    ComputationNetworkPtr m_net;
    ScriptableObjects::ConfigLambdaPtr m_createNetworkFn;
    DEVICEID_TYPE m_deviceId;
 public:
    // the constructor remembers the config lambda
    // TODO: Really this should just take the lambda itself, or rather, this class should just be replaced by a lambda. But we need the IConfigRecord for templates to be compile-compatible with old CNTK config.
    BrainScriptNetworkBuilder(const ScriptableObjects::IConfigRecord& config)
    {
        m_deviceId = config[L"deviceId"]; // TODO: only needed for LoadNetworkFromFile() which should go away anyway
        m_createNetworkFn = config[L"createNetwork"].AsPtr<ScriptableObjects::ConfigLambda>();
    }
    // not supported for old CNTK
    BrainScriptNetworkBuilder(const ConfigParameters& config)
    {
        NOT_IMPLEMENTED;
    }
    // build a ComputationNetwork from description language
    virtual /*IComputationNetBuilder::*/ ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* = nullptr) override
    {
        vector<ScriptableObjects::ConfigValuePtr> args; // this lambda has no arguments
        ScriptableObjects::ConfigLambda::NamedParams namedArgs;
        let netValue = m_createNetworkFn->Apply(move(args), move(namedArgs), L"BuildNetworkFromDescription");
        m_net = netValue.AsPtr<ComputationNetwork>();
        if (m_net->GetDeviceId() < 0)
            fprintf(stderr, "BrainScriptNetworkBuilder using CPU\n");
        else
            fprintf(stderr, "BrainScriptNetworkBuilder using GPU %d\n", (int) m_net->GetDeviceId());
        return m_net;
    }
    // load an existing file--this is the same code as for NDLNetworkBuilder.h (OK to copy it here because this is temporary code anyway)
    // TODO: This does not belong into NetworkBuilder, since the code is the same for all. Just create the network and load the darn thing.
    virtual /*IComputationNetBuilder::*/ ComputationNetwork* LoadNetworkFromFile(const wstring& modelFileName, bool forceLoad = true,
                                                                                 bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr) override
    {
        if (!m_net || m_net->GetTotalNumberOfNodes() == 0 || forceLoad) //not built or force load   --TODO: why all these options?
        {
            auto net = make_shared<ComputationNetwork>(m_deviceId);
            net->Load<ElemType>(modelFileName, FileOptions::fileOptionsBinary, bAllowNoCriterionNode, anotherNetwork);
            m_net = net;
        }
        m_net->ResetEvalTimeStamps();
        return m_net.get();
    }
 };
 // TODO: decide where these should go. Also, do we need three variables?
 extern wstring standardFunctions;
 extern wstring commonMacros;
 extern wstring computationNodes;
 // helper that returns 'float' or 'double' depending on ElemType
-template <class ElemType>
+template <class ElemType> static const wchar_t* ElemTypeName();
-static const wchar_t* ElemTypeName();
+template <> /*static*/ const wchar_t* ElemTypeName<float>()  { return L"float"; }
-template <>
+template <> /*static*/ const wchar_t* ElemTypeName<double>() { return L"double"; }
 /*static*/ const wchar_t* ElemTypeName<float>()
 {
    return L"float";
 }
 template <>
 /*static*/ const wchar_t* ElemTypeName<double>()
 {
    return L"double";
 }
 function<ComputationNetworkPtr(DEVICEID_TYPE)> GetCreateNetworkFn(const ScriptableObjects::IConfigRecord& config)
 {
--- a/Source/CNTK/BrainScript/ExperimentalNetworkBuilder.cpp
+++ b/Source/CNTK/BrainScript/ExperimentalNetworkBuilder.cpp
@ -75,51 +75,50 @@ wstring computationNodes = // TODO: use actual TypeName() here? would first need
    QuaternaryStandardNode(ClassBasedCrossEntropyWithSoftmax, labelClassDescriptorVectorSequence, mainInputInfo, mainWeight, classLogProbsBeforeSoftmax)
    // BUGBUG: the commented-out ones are not mentioned in the CNTK book, nor are their parameters documented in the source code
    BinaryStandardNode(ColumnElementTimes, aVectorSequence, anotherVectorSequence)
-        BinaryStandardNode(CosDistance, aVectorSequence, anotherVectorSequence)
+    BinaryStandardNode(CosDistance, aVectorSequence, anotherVectorSequence)
-            QuaternaryStandardNode(CosDistanceWithNegativeSamples, aVectorSequence, anotherVectorSequence, numShifts, numNegSamples)
+    QuaternaryStandardNode(CosDistanceWithNegativeSamples, aVectorSequence, anotherVectorSequence, numShifts, numNegSamples)
    //BinaryStandardNode(CosDistanceWithNegativeSamplesNode)
    UnaryStandardNode(Cosine, x)
-        BinaryStandardNode(CrossEntropy, refProbVectorSequence, outProbVectorSequence)
+    BinaryStandardNode(CrossEntropy, refProbVectorSequence, outProbVectorSequence)
-            BinaryStandardNode(CrossEntropyWithSoftmax, labelVectorSequence, outProbVectorSequence)
+    BinaryStandardNode(CrossEntropyWithSoftmax, labelVectorSequence, outProbVectorSequence)
-                BinaryStandardNode(DiagTimes, diagonalMatrixAsColumnVector, matrix)
+    BinaryStandardNode(DiagTimes, diagonalMatrixAsColumnVector, matrix)
-                    UnaryStandardNode(Dropout, activationVectorSequence)
+    UnaryStandardNode(Dropout, activationVectorSequence)
    //BinaryStandardNode(DummyCriterionNode)
    BinaryStandardNode(ElementTimes, aMatrix, anotherMatrix)
-        BinaryStandardNode(ErrorPrediction, labelVectorSequence, outVectorSequence) // CNTKBook: ClassificationError?
+    BinaryStandardNode(ErrorPrediction, labelVectorSequence, outVectorSequence) // CNTKBook: ClassificationError?
    UnaryStandardNode(Exp, x)
-        QuaternaryStandardNode(GMMLogLikelihood, unnormalizedPriorVector, meansAsRows, logStdDevAsRows, dataVectorSequence)
+    QuaternaryStandardNode(GMMLogLikelihood, unnormalizedPriorVector, meansAsRows, logStdDevAsRows, dataVectorSequence)
-            UnaryStandardNode(InvStdDev, dataVectorSequence)
+    UnaryStandardNode(InvStdDev, dataVectorSequence)
-                BinaryStandardNode(KhatriRaoProduct, leftMatrix, rightMatrix)
+    BinaryStandardNode(KhatriRaoProduct, leftMatrix, rightMatrix)
    //BinaryStandardNode(LSTMNode)
    UnaryStandardNode(Log, x)
-        UnaryStandardNode(LogSoftmax, z)
+    UnaryStandardNode(LogSoftmax, z)
    //BinaryStandardNode(LookupTableNode)
    UnaryStandardNode(MatrixL1Reg, matrix)
-        UnaryStandardNode(MatrixL2Reg, matrix)
+    UnaryStandardNode(MatrixL2Reg, matrix)
    // BUGBUG: CNTKBook also mentions L1Norm and L2Norm
    UnaryStandardNode(Mean, dataVectorSequence)
-        BinaryStandardNode(Minus, leftMatrix, rightMatrix)
+    BinaryStandardNode(Minus, leftMatrix, rightMatrix)
-            UnaryStandardNode(Negate, input)
+    UnaryStandardNode(Negate, input)
    //BinaryStandardNode(NoiseContrastiveEstimationNode)
    //BinaryStandardNode(PairNetworkNode)
    //BinaryStandardNode(ParallelNode)
    TernaryStandardNode(PerDimMeanVarDeNormalization, dataVectorSequence, meanVector, invStdDevVector) // TODO: correct?
    TernaryStandardNode(PerDimMeanVarNormalization, dataVectorSequence, meanVector, invStdDevVector)
-        BinaryStandardNode(Plus, leftMatrix, rightMatrix)
+    BinaryStandardNode(Plus, leftMatrix, rightMatrix)
-            UnaryStandardNode(RectifiedLinear, z)
+    UnaryStandardNode(RectifiedLinear, z)
    //BinaryStandardNode(RowElementTimesNode)
    BinaryStandardNode(Scale, scalarScalingFactor, matrix)
    //BinaryStandardNode(SequenceDecoderNode)
    UnaryStandardNode(Sigmoid, z)
-        UnaryStandardNode(Softmax, z)
+    UnaryStandardNode(Softmax, z)
-            UnaryStandardNode(Hardmax, z)
+    UnaryStandardNode(Hardmax, z)
-                BinaryStandardNode(SquareError, aMatrix, anotherMatrix)
+    BinaryStandardNode(SquareError, aMatrix, anotherMatrix)
    //BinaryStandardNode(StrideTimesNode)
    //BinaryStandardNode(SumColumnElementsNode)
    UnaryStandardNode(SumElements, matrix)
-        UnaryStandardNode(Tanh, z)
+    UnaryStandardNode(Tanh, z)
-            UnaryStandardNode(TimeReverse, vectorSequence)
+    UnaryStandardNode(TimeReverse, vectorSequence)
-                BinaryStandardNode(Times, leftMatrix, rightMatrix)
+    BinaryStandardNode(Times, leftMatrix, rightMatrix)
-                    UnaryStandardNode(Transpose, matrix)
+    UnaryStandardNode(Transpose, matrix)
    //BinaryStandardNode(TransposeTimesNode)
    ;
--- a/Source/CNTK/CNTK.cpp
+++ b/Source/CNTK/CNTK.cpp
@ -23,10 +23,8 @@
 #include "SGD.h"
 #include "MPIWrapper.h"
 #include "Config.h"
 #include "MultiNetworksSGD.h"
 #include "SimpleEvaluator.h"
 #include "SimpleOutputWriter.h"
 #include "MultiNetworksEvaluator.h"
 #include "BestGpu.h"
 #include "ProgressTracing.h"
 #include "fileutil.h"
@ -240,10 +238,6 @@ void DoCommands(const ConfigParameters& config)
            {
                DoEval<ElemType>(commandParams);
            }
            else if (action[j] == "testunroll")
            {
                DoEvalUnroll<ElemType>(commandParams);
            }
            else if (action[j] == "edit")
            {
                DoEdit<ElemType>(commandParams);
@ -284,22 +278,6 @@ void DoCommands(const ConfigParameters& config)
            {
                DoParameterSVD<ElemType>(commandParams);
            }
            else if (action[j] == "trainEncoderDecoder")
            {
                DoEncoderDecoder<ElemType>(commandParams);
            }
            else if (action[j] == "testEncoderDecoder")
            {
                DoEvalEncodingBeamSearchDecoding<ElemType>(commandParams);
            }
            else if (action[j] == "trainBidirectionEncoderDecoder")
            {
                DoBidirectionEncoderDecoder<ElemType>(commandParams);
            }
            else if (action[j] == "beamSearch")
            {
                DoBeamSearchDecoding<ElemType>(commandParams);
            }
            else
            {
                RuntimeError("unknown action: %s  in command set: %s", action[j].c_str(), command[i].c_str());
--- a/Source/CNTK/NetworkDescriptionLanguage.cpp
+++ b/Source/CNTK/NetworkDescriptionLanguage.cpp
@ -280,8 +280,6 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
        ret = true;
    else if (EqualInsensitive(nodeType, OperationNameOf(LSTMNode), L"LSTM"))
        ret = true;
    else if (EqualInsensitive(nodeType, OperationNameOf(PairNetworkNode), L"PairNetwork"))
        ret = true;
    else if (EqualInsensitive(nodeType, OperationNameOf(StrideTimesNode), L"StrideTimes"))
        ret = true;
    else if (EqualInsensitive(nodeType, OperationNameOf(BatchNormalizationNode)))
--- a/Source/CNTK/SimpleNetworkBuilder.cpp
+++ b/Source/CNTK/SimpleNetworkBuilder.cpp
@ -59,9 +59,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescriptio
    case RCRF:
        net = BuildSeqTrnLSTMNetworkFromDescription();
        break;
    case LSTMENCODER:
        net = BuildLSTMEncoderNetworkFromDescription();
        break;
    case UNIDIRECTIONALLSTM:
        net = BuildUnidirectionalLSTMNetworksFromDescription();
        break;
@ -72,35 +69,12 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescriptio
        LogicError("BuildNetworkFromDescription: invalid m_rnnType %d", (int) m_rnnType);
    }
-// post-process the network
+    // post-process the network
 #if 1
    net->CompileNetwork();
 #else
    net->ValidateNetwork(false /*allowFragment*/, true /*bAllowNoCriterion*/); // no criterion possible because  ...TODO: what's the reason?
 #endif
    return net;
 }
 // special version for a deprecated implementation of sequence-to-sequence models
 template <class ElemType>
 ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescription(ComputationNetwork* encoderNet)
 {
    ComputationNetworkPtr net;
    switch (m_rnnType)
    {
    case ALIGNMENTSIMILARITYGENERATOR:
        net = BuildAlignmentDecoderNetworkFromDescription(encoderNet);
        net->CompileNetwork();
        return net;
    case ALIGNMENTSIMILARITYGFORWARDDECODER:
        net = BuildAlignmentForwardDecoderNetworkFromDescription(encoderNet);
        net->CompileNetwork();
        return net;
    }
    return BuildNetworkFromDescription();
 }
 template <class ElemType>
 ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleDNN()
 {
@ -530,265 +504,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildConditionalLSTMNetwor
    return m_net;
 }
 /**
    this builds an alignment based LM generator
    the aligment node takes a variable length input and relates each element to a variable length output
    */
 template <class ElemType>
 ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet)
 {
    ComputationNetworkBuilder<ElemType> builder(*m_net);
    if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
    {
        unsigned long randomSeed = 1;
        size_t numHiddenLayers = m_layerSizes.size() - 2;
        size_t numRecurrentLayers = m_recurrentLayers.size();
        ComputationNodePtr input, encoderOutput, e,
            b, w, u, pastValue, output, label, alignoutput;
        ComputationNodePtr clslogpostprob;
        ComputationNodePtr clsweight;
        ComputationNodePtr columnStride, rowStride;
        input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]);
        m_net->FeatureNodes().push_back(input);
        if (m_lookupTableOrder > 0)
        {
            e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"E%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
            m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
            output = builder.LookupTable(e, input, L"LookupTable");
            if (m_addDropoutNodes)
                input = builder.Dropout(output);
            else
                input = output;
        }
        else
        {
            LogicError("BuildCLASSLSTMNetworkFromDescription: LSTMNode cannot take sparse input. Need to project sparse input to continuous vector using LookupTable. Suggest using setups below\n layerSizes=$VOCABSIZE$:100:$HIDDIM$:$VOCABSIZE$ \nto have 100 dimension projection, and lookupTableOrder=1\n to project to a single window. To use larger context window, set lookupTableOrder=3 for example with width-3 context window.\n ");
        }
        int recur_idx = 0;
        int offset = m_lookupTableOrder > 0 ? 1 : 0;
        /// the source network side output dimension needs to match the 1st layer dimension in the decoder network
        std::vector<ComputationNodeBasePtr>& encoderPairNodes = encoderNet->PairNodes();
        if (encoderPairNodes.size() != 1)
            LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one pairoutput node as source node for the decoder network: ");
        encoderOutput = builder.PairNetwork(dynamic_pointer_cast<ComputationNode<ElemType>>(encoderPairNodes[0]), L"pairNetwork");
        /// the source network side output dimension needs to match the 1st layer dimension in the decoder network
        std::vector<ComputationNodeBasePtr>& encoderEvaluationNodes = encoderNet->OutputNodes();
        if (encoderEvaluationNodes.size() != 1)
            LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one output node as source node for the decoder network: ");
        if (numHiddenLayers > 0)
        {
            int i = 1 + offset;
            u = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"U%d", i), m_layerSizes[i], m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1));
            m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
            w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", i), m_layerSizes[i], m_layerSizes[i]);
            m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
            pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t) m_layerSizes[i], 1);
            //                output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
            //                output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
            /// alignment node to get weights from source to target
            /// this aligment node computes weights of the current hidden state after special encoder ending symbol to all
            /// states before the special encoder ending symbol. The weights are used to summarize all encoder inputs.
            /// the weighted sum of inputs are then used as the additional input to the LSTM input in the next layer
            e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"MatForSimilarity%d", i), m_layerSizes[i], m_layerSizes[i]);
            m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
            columnStride = builder.CreateLearnableParameter(L"columnStride", 1, 1);
            columnStride->Value().SetValue(1);
            columnStride->SetParameterUpdateRequired(false);
            rowStride = builder.CreateLearnableParameter(L"rowStride", 1, 1);
            rowStride->Value().SetValue(0);
            rowStride->SetParameterUpdateRequired(false);
            alignoutput = builder.StrideTimes(encoderOutput, builder.Softmax(builder.StrideTimes(builder.Times(builder.Transpose(encoderOutput), e), pastValue, rowStride)), columnStride);
            //                alignoutput = builder.Times(encoderOutput, builder.Softmax(builder.Times(builder.Times(builder.Transpose(encoderOutput), e), pastValue)));
            output = ApplyNonlinearFunction(
                builder.Plus(
                    builder.Times(u, input), builder.Times(w, alignoutput)),
                0);
            pastValue->AttachInputs(output);
            input = output;
            for (; i < numHiddenLayers; i++)
            {
                //output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
                output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
                if (m_addDropoutNodes)
                    input = builder.Dropout(output);
                else
                    input = output;
            }
        }
        /// need to have [input_dim x output_dim] matrix
        /// e.g., [200 x 10000], where 10000 is the vocabulary size
        /// this is for speed-up issue as per word matrix can be simply obtained using column slice
        w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"OW%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
        m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
        /// the label is a dense matrix. each element is the word index
        label = builder.CreateInputNode(L"labels", 4);
        clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
        m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale);
        clslogpostprob = builder.Times(clsweight, input, L"ClassPostProb");
        output = builder.Times(builder.Transpose(w), input, L"outputs");
        m_net->PairNodes().push_back(input);
        m_net->OutputNodes().push_back(output);
        //add softmax layer (if prob is needed or KL reg adaptation is needed)
        output = builder.Softmax(output, L"PosteriorProb");
    }
    return m_net;
 }
 template <class ElemType>
 ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet)
 {
    ComputationNetworkBuilder<ElemType> builder(*m_net);
    if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
    {
        unsigned long randomSeed = 1;
        size_t numHiddenLayers = m_layerSizes.size() - 2;
        size_t numRecurrentLayers = m_recurrentLayers.size();
        ComputationNodePtr input, encoderOutput, e,
            b, w, u, pastValue, output, label, alignoutput;
        ComputationNodePtr clslogpostprob;
        ComputationNodePtr clsweight;
        ComputationNodePtr columnStride, rowStride;
        input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]);
        m_net->FeatureNodes().push_back(input);
        if (m_lookupTableOrder > 0)
        {
            e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"E%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
            m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
            output = builder.LookupTable(e, input, L"LookupTable");
            if (m_addDropoutNodes)
                input = builder.Dropout(output);
            else
                input = output;
        }
        else
        {
            LogicError("BuildCLASSLSTMNetworkFromDescription: LSTMNode cannot take sparse input. Need to project sparse input to continuous vector using LookupTable. Suggest using setups below\n layerSizes=$VOCABSIZE$:100:$HIDDIM$:$VOCABSIZE$ \nto have 100 dimension projection, and lookupTableOrder=1\n to project to a single window. To use larger context window, set lookupTableOrder=3 for example with width-3 context window.\n ");
        }
        int recur_idx = 0;
        int offset = m_lookupTableOrder > 0 ? 1 : 0;
        /// the source network side output dimension needs to match the 1st layer dimension in the decoder network
        std::vector<ComputationNodeBasePtr>& encoderPairNodes = encoderNet->PairNodes();
        if (encoderPairNodes.size() != 1)
            LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one pairoutput node as source node for the decoder network: ");
        encoderOutput = builder.PairNetwork(dynamic_pointer_cast<ComputationNode<ElemType>>(encoderPairNodes[0]), L"pairNetwork");
        /// the source network side output dimension needs to match the 1st layer dimension in the decoder network
        std::vector<ComputationNodeBasePtr>& encoderEvaluationNodes = encoderNet->OutputNodes();
        if (encoderEvaluationNodes.size() != 1)
            LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one output node as source node for the decoder network: ");
        if (numHiddenLayers > 0)
        {
            int i = 1 + offset;
            u = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"U%d", i), m_layerSizes[i], m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1));
            m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
            w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", i), m_layerSizes[i], m_layerSizes[i]);
            m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
            pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t) m_layerSizes[i], 1);
            //                output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
            //                output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
            /// alignment node to get weights from source to target
            /// this aligment node computes weights of the current hidden state after special encoder ending symbol to all
            /// states before the special encoder ending symbol. The weights are used to summarize all encoder inputs.
            /// the weighted sum of inputs are then used as the additional input to the LSTM input in the next layer
            e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"MatForSimilarity%d", i), m_layerSizes[i], m_layerSizes[i]);
            m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
            columnStride = builder.CreateLearnableParameter(L"columnStride", 1, 1);
            columnStride->Value().SetValue(1);
            columnStride->SetParameterUpdateRequired(false);
            rowStride = builder.CreateLearnableParameter(L"rowStride", 1, 1);
            rowStride->Value().SetValue(0);
            rowStride->SetParameterUpdateRequired(false);
            alignoutput = builder.StrideTimes(encoderOutput, builder.Softmax(builder.StrideTimes(builder.Times(builder.Transpose(encoderOutput), e), pastValue, rowStride)), columnStride);
            //                alignoutput = builder.Times(encoderOutput, builder.Softmax(builder.Times(builder.Times(builder.Transpose(encoderOutput), e), pastValue)));
            output = ApplyNonlinearFunction(
                builder.Plus(
                    builder.Times(u, input), builder.Times(w, alignoutput)),
                0);
            pastValue->AttachInputs(output);
            input = output;
            for (; i < numHiddenLayers; i++)
            {
                //output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
                output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
                if (m_addDropoutNodes)
                    input = builder.Dropout(output);
                else
                    input = output;
            }
        }
        /// need to have [input_dim x output_dim] matrix
        /// e.g., [200 x 10000], where 10000 is the vocabulary size
        /// this is for speed-up issue as per word matrix can be simply obtained using column slice
        w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"OW%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
        m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
        /// the label is a dense matrix. each element is the word index
        label = builder.CreateInputNode(L"labels", 4);
        clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
        m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale);
        clslogpostprob = builder.Times(clsweight, input, L"ClassPostProb");
        output = AddTrainAndEvalCriterionNodes(input, label, w, L"TrainNodeClassBasedCrossEntropy", L"EvalNodeClassBasedCrossEntrpy",
                                               clslogpostprob);
        output = builder.Times(builder.Transpose(w), input, L"outputs");
        m_net->PairNodes().push_back(input);
        m_net->OutputNodes().push_back(output);
        //add softmax layer (if prob is needed or KL reg adaptation is needed)
        output = builder.Softmax(output, L"PosteriorProb");
    }
    return m_net;
 }
 template <class ElemType>
 ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLogBilinearNetworkFromDescription()
 {
@ -1608,95 +1323,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMNetworkFromDescri
    return m_net;
 }
 /**
    This is encoder LSTM described in the following papers:
    H. Sutskever, O. Vinyals and Q. V. Le, "Sequence to sequence learning with neural networks", http://arxiv.org/abs/1409.3215
    The following code constructs the encoder and, to construct decoder, use BuildLSTMNetworkFromDescription
    Developed by Kaisheng Yao
    This is used in the following works:
    K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion, submitted to Interspeech 2015
    */
 template <class ElemType>
 ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMEncoderNetworkFromDescription()
 {
    ComputationNetworkBuilder<ElemType> builder(*m_net);
    if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
    {
        ULONG randomSeed = 1;
        size_t i = 0;
        size_t numHiddenLayers = m_layerSizes.size() - 1;
        size_t numRecurrentLayers = m_recurrentLayers.size();
        ComputationNodePtr input, w, b, u, e, pastValue, output, label, prior;
        if (m_sparse_input)
            input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]);
        else
            input = builder.CreateInputNode(L"features", m_layerSizes[0]);
        m_net->FeatureNodes().push_back(input);
        if (m_applyMeanVarNorm)
        {
            w = builder.Mean(input);
            b = builder.InvStdDev(input);
            output = builder.PerDimMeanVarNormalization(input, w, b);
            input = output;
        }
        if (m_lookupTableOrder > 0)
        {
            e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"EncoderE%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
            m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
            output = builder.LookupTable(e, input, L"EncoderLookupTable");
 #ifdef DEBUG_DECODER
            e->Value().SetValue((ElemType) 0.01);
 #endif
            if (m_addDropoutNodes)
                input = builder.Dropout(output);
            else
                input = output;
            i++;
        }
        /// direct connect from input node to output node
        int recur_idx = 0;
        int offset = m_lookupTableOrder > 0 ? 1 : 0;
        if (numHiddenLayers > 0)
        {
            //output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
            output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
            input = output;
            i++;
            for (; i < numHiddenLayers; i++)
            {
                //output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
                output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
                if (m_addDropoutNodes)
                    input = builder.Dropout(output);
                else
                    input = output;
            }
        }
        m_net->OutputNodes().push_back(output);
        m_net->PairNodes().push_back(output); /// need to provide pairnodes so that the next layer of network can connect to this network
        m_net->EvaluationNodes().push_back(output);
    }
    return m_net;
 }
 /**
    Build unidirectional LSTM p(y_t | y_t-1, x_1^t)
--- a/Source/CNTK/SimpleNetworkBuilder.h
+++ b/Source/CNTK/SimpleNetworkBuilder.h
@ -38,16 +38,13 @@ enum RNNTYPE
    DEEPRNN = 4,
    CLASSLM = 8,
    LBLM = 16,
    LSTMENCODER = 18,
    NPLM = 32,
    CLASSLSTM = 64,
    NCELSTM = 128,
    CLSTM = 256,
    RCRF = 512,
    UNIDIRECTIONALLSTM = 19,
-    BIDIRECTIONALLSTM = 20,
+    BIDIRECTIONALLSTM = 20
    ALIGNMENTSIMILARITYGENERATOR = 21,
    ALIGNMENTSIMILARITYGFORWARDDECODER = 22
 };
 enum class TrainingCriterion : int // TODO: camel-case these
@ -191,18 +188,12 @@ public:
            m_rnnType = CLSTM;
        else if (std::find(strType.begin(), strType.end(), L"CRF") != strType.end())
            m_rnnType = RCRF;
        else if (std::find(strType.begin(), strType.end(), L"LSTMENCODER") != strType.end())
            m_rnnType = LSTMENCODER;
        else if (std::find(strType.begin(), strType.end(), L"TRANSDUCER") != strType.end() ||
                 std::find(strType.begin(), strType.end(), L"UNIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end())
            m_rnnType = UNIDIRECTIONALLSTM;
        else if (std::find(strType.begin(), strType.end(), L"JOINTCONDITIONALBILSTMSTREAMS") != strType.end() ||
                 std::find(strType.begin(), strType.end(), L"BIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end())
            m_rnnType = BIDIRECTIONALLSTM;
        else if (std::find(strType.begin(), strType.end(), L"ALIGNMENTSIMILARITYGENERATOR") != strType.end())
            m_rnnType = ALIGNMENTSIMILARITYGENERATOR;
        else if (std::find(strType.begin(), strType.end(), L"ALIGNMENTSIMILARITYGFORWARDDECODER") != strType.end())
            m_rnnType = ALIGNMENTSIMILARITYGFORWARDDECODER;
        else
            InvalidArgument("InitRecurrentConfig: unknown value for rnnType parameter '%ls'", strType[0].c_str());
    }
@ -255,7 +246,6 @@ public:
    }
    ComputationNetworkPtr BuildNetworkFromDescription();
    ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* encoderNet); // legacy support of deprecated sequence-to-sequence implementation
    ComputationNetworkPtr BuildNetworkFromDbnFile(const std::wstring& dbnModelFileName); // legacy support for fseide's Microsoft-internal tool "DBN.exe"
@ -287,8 +277,6 @@ protected:
    ComputationNetworkPtr BuildSeqTrnLSTMNetworkFromDescription();
    ComputationNetworkPtr BuildLSTMEncoderNetworkFromDescription();
    ComputationNetworkPtr BuildUnidirectionalLSTMNetworksFromDescription();
    ComputationNetworkPtr BuildBiDirectionalLSTMNetworksFromDescription();
@ -299,10 +287,6 @@ protected:
    ComputationNetworkPtr BuildNCELSTMNetworkFromDescription();
    ComputationNetworkPtr BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet);
    ComputationNetworkPtr BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet);
    //layer is 0 based
    ComputationNodePtr ApplyNonlinearFunction(ComputationNodePtr input, const size_t layer, const std::wstring nodeName = L"");
    ComputationNodePtr AddTrainAndEvalCriterionNodes(ComputationNodePtr input, ComputationNodePtr label, ComputationNodePtr matrix = nullptr, const std::wstring trainNodeName = L"", const std::wstring evalNodeName = L"", ComputationNodePtr clspostprob = nullptr, ComputationNodePtr trans = nullptr);
--- a/Source/ComputationNetworkLib/ComputationNetworkAnalysis.cpp
+++ b/Source/ComputationNetworkLib/ComputationNetworkAnalysis.cpp
@ -183,7 +183,6 @@ static int DetermineLoopDirection(const std::vector<ComputationNodeBasePtr>& nes
 // This sets index, lowLink, m_visited, and m_inStack.
 void ComputationNetwork::DetermineSCCs(const ComputationNodeBasePtr& rootNode)
 {
    // notice that this graph including graphs from a parent networks if two or more networks are connected via PairNetworkNode
    list<ComputationNodeBasePtr> sccStack;
    size_t index = 0;
    size_t loopId = 0; // BUGBUG: I think this is currently buggy in an edge case, and not needed (use m_allSEQNodes.size() instead).
--- a/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp
+++ b/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp
@ -100,8 +100,6 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
        return New<NegateNode<ElemType>>(forward<_Types>(_Args)...);
    else if (nodeType == OperationNameOf(NoiseContrastiveEstimationNode))
        return New<NoiseContrastiveEstimationNode<ElemType>>(forward<_Types>(_Args)...);
    else if (nodeType == OperationNameOf(PairNetworkNode))
        return New<PairNetworkNode<ElemType>>(forward<_Types>(_Args)...);
    else if (nodeType == OperationNameOf(ParallelNode))
        return New<ParallelNode<ElemType>>(forward<_Types>(_Args)...);
    else if (nodeType == OperationNameOf(PastValueNode))
@ -293,12 +291,6 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Creat
    return net.AddNodeToNetWithElemType(New<SparseInputValue<ElemType>>(net.GetDeviceId(), inputName, imageLayout));
 }
 template <class ElemType>
 shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreatePairNetworkNode(const std::wstring& inputName, const size_t rows, const size_t cols)
 {
    return net.AddNodeToNetWithElemType(New<PairNetworkNode<ElemType>>(net.GetDeviceId(), inputName, rows, cols));
 }
 template <class ElemType>
 shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateConvolutionNode(const std::wstring& nodeName,
                                                                                                 const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels,
@ -342,17 +334,6 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Creat
 // The following functions create nodes and link them to the network and their inputs.
 // TODO: Do we need both this set and the one above that does not add inputs? Can they share more code?
 template <class ElemType>
 shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::PairNetwork(const ComputationNodePtr& a, const std::wstring nodeName)
 {
    if (net.GetNodeFromName(a->NodeName(), nullptr, false) != nullptr)
    {
        fprintf(stderr, "PairNetwork: asked to pair a node with name %ls in another network. However, this network has already a node with the same name. Should avoid this case.\n", a->NodeName().c_str());
        RuntimeError("PairNetwork: asked to pair a node with name in another network. However, this network has already a node with the same name. Should avoid this case.\n");
    }
    return net.AddNodeToNetAndAttachInputs(New<PairNetworkNode<ElemType>>(net.GetDeviceId(), nodeName), a);
 }
 template <class ElemType>
 shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Convolution(const ComputationNodePtr weight,
                                                                                       const ComputationNodePtr inputValues,
--- a/Source/ComputationNetworkLib/ComputationNetworkBuilder.h
+++ b/Source/ComputationNetworkLib/ComputationNetworkBuilder.h
@ -51,7 +51,6 @@ public:
    ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const size_t rows);
    ComputationNodePtr CreateInputNode(const std::wstring& inputName, const TensorShape& sampleLayout);
    ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const TensorShape& sampleLayout);
    ComputationNodePtr CreatePairNetworkNode(const std::wstring& inputName, const size_t rows, const size_t cols);
    ComputationNodePtr CreateConvolutionNode(const std::wstring& nodeName, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind, const bool zeroPadding = false, const size_t maxTempMemSizeInSamples = 0);
    ComputationNodePtr CreateMaxPoolingNode(const std::wstring& nodeName, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind);
    ComputationNodePtr CreateAveragePoolingNode(const std::wstring& nodeName, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind);
@ -60,7 +59,6 @@ public:
    ComputationNodePtr CreateComputationNode(const std::wstring& nodeType, const std::wstring& nodeName);
    // The following functions create nodes and link them to the network and their inputs.
    // TODO: Do we need both this set and the one above that does not add inputs? Can they share more code?
    ComputationNodePtr PairNetwork(const ComputationNodePtr& a, const std::wstring nodeName = L"");
    ComputationNodePtr Convolution(const ComputationNodePtr weight,
                                   const ComputationNodePtr inputValues,
                                   const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels,
--- a/Source/ComputationNetworkLib/EsotericNodes.h
+++ b/Source/ComputationNetworkLib/EsotericNodes.h
@ -1662,98 +1662,6 @@ public:
 template class StrideTimesNode<float>;
 template class StrideTimesNode<double>;
 // -----------------------------------------------------------------------
 // PairNetworkNode (input)
 // -----------------------------------------------------------------------
 /**
    pair this node to a node in another network
    this node provide an interface from this network. The next layer network then can use this interface to know which node to connect to.
    */
 template <class ElemType>
 class PairNetworkNode : public ComputationNode<ElemType>, public NumInputs<1>
 {
    typedef ComputationNode<ElemType> Base;
    UsingComputationNodeMembersBoilerplate;
    static const std::wstring TypeName()
    {
        return L"PairNetwork";
    }
    void Init(size_t row_size, size_t /*col_size*/)
    {
        CreateMatrixIfNull(m_value);
        SetDims(TensorShape(row_size), HasMBLayout());
        UpdateFunctionValuesSize();
    }
 public:
    DeclareConstructorFromConfigWithNumInputs(PairNetworkNode);
    PairNetworkNode(DEVICEID_TYPE deviceId, const wstring& name, size_t row_size = 1, size_t col_size = 1)
        : Base(deviceId, name)
    {
        Init(row_size, col_size);
        CreateMatrixIfNull(m_gradient);
        m_gradient->Resize(row_size, col_size);
        m_gradient->SetValue(0.0f);
    }
    virtual void Load(File& fstream, size_t modelVersion) override
    {
        Init(1, 1); // TODO: this looks wrong; should the dimension not come from the loaded model data?
        Base::Load(fstream, modelVersion);
    }
    /// to-do: need to change to the new way of resetting state
    void BackpropToMap(const size_t inputIndex)
    {
        if (inputIndex > 0)
            InvalidArgument("PairNetwork operation only takes one input.");
        Matrix<ElemType>::ScaleAndAdd(1.0, Gradient(), Input(inputIndex)->Gradient());
    }
    virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
    {
        if (fr.IsAllFrames())
        {
            BackpropToMap(inputIndex);
            return;
        }                                                            // TODO: remove these one by one
        assert(GetSampleMatrixNumRows() == Gradient().GetNumRows()); // original used m_value->GetNumRows() for loop dimension
        assert(m_pMBLayout);
        Matrix<ElemType> mTmp = Input(inputIndex)->GradientFor(fr);
        Matrix<ElemType>::ScaleAndAdd(1.0, GradientFor(fr), mTmp);
    }
    virtual bool OutputUsedInComputingInputNodesGradients() const override
    {
        return false;
    }
    virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override
    {
        return false;
    }
    virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
    {
        Matrix<ElemType> mTmp = ValueFor(fr);
        mTmp.SetValue(Input(0)->ValueFor(fr));
    }
    virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
    {
        Base::Validate(isFinalValidationPass);
        InferMBLayoutFromInputsForStandardCase();
        SetDims(Input(0));
    }
 };
 template class PairNetworkNode<float>;
 template class PairNetworkNode<double>;
 // -----------------------------------------------------------------------
 // ParallelNode (input0, input1)
 // TODO: How is this different from RowStack?
--- a/Source/Math/Helpers.h
+++ b/Source/Math/Helpers.h
@ -2,12 +2,12 @@
 // Copyright (c) Microsoft. All rights reserved.
 // Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
 //
-//helpful macros
+// helpful macros
 // TODO: the file's name is too general to be included from outside; MathHelpers.h?
-//iterators
+//#pragma once
-//
+
-#pragma once
+// iterators
 #undef foreach_row
 #undef foreach_column
 #undef foreach_coord
@ -19,5 +19,5 @@
        for (long _i = 0; _i < (_m).GetNumRows(); _i++)
 #define foreach_row_in_submat(_i, _istart, _iend, _m) for (long _i = _istart; _i < min(_iend, (_m).GetNumRows()); _i++)
-//this functions returns the index of the first column element in the columnwise array representing matrix with _numRows rows
+// this functions returns the index of the first column element in the columnwise array representing matrix with _numRows rows
 #define column_s_ind_colwisem(_colNum, _numRows) ((_numRows) * (_colNum))
--- a/Source/SGDLib/SGD.h
+++ b/Source/SGDLib/SGD.h
@ -10,7 +10,6 @@
 #include "CompositeComputationNodes.h" // for PrecomputeNode
 #include "SimpleEvaluator.h"
 #include "DataReader.h"
 #include "IComputationNetBuilder.h"
 #include "ScriptableObjects.h"
 #include <vector>
 #include <string>
--- a/Source/SGDLib/SimpleOutputWriter.h
+++ b/Source/SGDLib/SimpleOutputWriter.h
@ -8,6 +8,7 @@
 #include "DataReader.h"
 #include "ComputationNetwork.h"
 #include "DataReaderHelpers.h"
 #include "Helpers.h"
 #include "fileutil.h"
 #include <vector>
 #include <string>