replace all tabs to four spaces in all files in the solution.

2014-11-06 19:24:05 -08:00 · 2014-11-06 19:24:05 -08:00 · b46302c10f
--- a/Common/BestGpu.cpp
+++ b/Common/BestGpu.cpp
@ -541,9 +541,9 @@ void BestGpu::QueryNvmlData()
 // dliNotify == { dliStartProcessing | dliNotePreLoadLibrary  | dliNotePreGetProc | dliNoteEndProcessing } on this call.

 extern "C" INT_PTR WINAPI DelayLoadNofify(
-	unsigned        dliNotify,
-	PDelayLoadInfo  pdli
-	)
+    unsigned        dliNotify,
+    PDelayLoadInfo  pdli
+    )
 {
    // load the library from an alternate path
    if (dliNotify == dliNotePreLoadLibrary && !strcmp(pdli->szDll, "nvml.dll"))
--- a/Common/Eval.cpp
+++ b/Common/Eval.cpp
@ -115,7 +115,7 @@ void Eval<ElemType>::Evaluate(std::map<std::wstring, std::vector<ElemType>*>& in
 template<class ElemType>
 void Eval<ElemType>::ResetState()
 {
-	m_eval->ResetState();
+    m_eval->ResetState();
 }

 //The explicit instantiation
--- a/Common/Include/DataReader.h
+++ b/Common/Include/DataReader.h
@ -156,7 +156,7 @@ public:
    virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart=0);

    virtual bool DataEnd(EndDataType endDataType);
-	void SetSentenceEndInBatch(std::vector<size_t> &sentenceEnd);
+    void SetSentenceEndInBatch(std::vector<size_t> &sentenceEnd);
 };

 }}}
--- a/Common/Include/Eval.h
+++ b/Common/Include/Eval.h
@ -43,7 +43,7 @@ public:
    virtual void LoadModel(const std::wstring& modelFileName)=0;
    virtual void GetNodeDimensions(std::map<std::wstring, size_t>& dimensions, NodeGroup nodeGroup)=0; 
    virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs)=0; 
-	virtual void ResetState() = 0;
+    virtual void ResetState() = 0;
 };

 // GetEval - get a evaluator type from the DLL
@ -94,7 +94,7 @@ public:
    // outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
    virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs);

-	virtual void ResetState();
+    virtual void ResetState();
 };

 }}}
--- a/Common/Include/File.h
+++ b/Common/Include/File.h
@ -123,7 +123,7 @@ public:
    template <typename T>
    File& operator<<(T val)
    {
-#ifndef	__CUDACC__      // TODO: CUDA compiler blows up, fix this
+#ifndef    __CUDACC__      // TODO: CUDA compiler blows up, fix this
        attempt([=]()
 #endif
        {
@ -132,7 +132,7 @@ public:
            else
                fput(m_file, val);
        }
-#ifndef	__CUDACC__
+#ifndef    __CUDACC__
        );
 #endif
        return *this;
@ -161,7 +161,7 @@ public:
    template <typename T>
    File& operator>>(T& val)
    {
-#ifndef	__CUDACC__      // TODO: CUDA compiler blows up, fix this
+#ifndef    __CUDACC__      // TODO: CUDA compiler blows up, fix this
        attempt([&]()
 #endif
        {
@ -170,7 +170,7 @@ public:
            else
                fget(m_file, val);
        }
-#ifndef	__CUDACC__
+#ifndef    __CUDACC__
        );
 #endif
        return *this;
--- a/Common/Include/basetypes.h
+++ b/Common/Include/basetypes.h
@ -330,7 +330,7 @@ public:
 };

 // class CCritSec and CAutoLock -- simple critical section handling
-#ifndef	_WIN32          // TODO: Currently only working under Windows; BROKEN otherwise, to be fixed
+#ifndef    _WIN32          // TODO: Currently only working under Windows; BROKEN otherwise, to be fixed
 typedef int CRITICAL_SECTION;
 static inline void InitializeCriticalSection(CRITICAL_SECTION *) {}
 static inline void DeleteCriticalSection(CRITICAL_SECTION *) {}
@ -471,11 +471,11 @@ public:
 #include <xlocale>      // uses strlen()
 #endif
 #define strlen strlen_
-#ifndef	LINUX
+#ifndef    LINUX
 template<typename _T> inline __declspec(deprecated("Dummy general template, cannot be used directly")) 
 #else
 template<typename _T> inline 
-#endif	// LINUX
+#endif    // LINUX
 size_t strlen_(_T &s) { return strnlen_s(static_cast<const char *>(s), SIZE_MAX); } // never be called but needed to keep compiler happy
 template<typename _T> inline size_t strlen_(const _T &s)     { return strnlen_s(static_cast<const char *>(s), SIZE_MAX); }
 template<> inline size_t strlen_(char * &s)                  { return strnlen_s(s, SIZE_MAX); }
--- a/Common/Include/commandArgUtil.h
+++ b/Common/Include/commandArgUtil.h
@ -634,7 +634,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            return false;
        }

-		// ExistsCurrent - check to see if a key exists in THIS config, don't check parent
+        // ExistsCurrent - check to see if a key exists in THIS config, don't check parent
        bool ExistsCurrent(const std::string & name) const 
        {
            return  (find(name) != end());
--- a/Common/Include/fileutil.h
+++ b/Common/Include/fileutil.h
@ -593,4 +593,4 @@ static inline std::string &trim(std::string &s) {

 vector<string> sep_string(const string & str, const string & sep);

-#endif	// _FILEUTIL_
+#endif    // _FILEUTIL_
--- a/DataReader/HTKMLFReader/DataWriter.cpp
+++ b/DataReader/HTKMLFReader/DataWriter.cpp
@ -71,7 +71,7 @@ DataWriter<ElemType>::DataWriter(const ConfigParameters& config)
 template<class ElemType>
 DataWriter<ElemType>::~DataWriter()
 {
-	delete m_dataWriter;
+    delete m_dataWriter;
    m_dataWriter = NULL;
 }

--- a/DataReader/HTKMLFReader/HTKMLFReader.cpp
+++ b/DataReader/HTKMLFReader/HTKMLFReader.cpp
--- a/DataReader/HTKMLFReader/HTKMLFReader.h
+++ b/DataReader/HTKMLFReader/HTKMLFReader.h
@ -16,22 +16,22 @@ private:
    msra::dbn::minibatchiterator* m_mbiter;
    msra::dbn::minibatchsource* m_frameSource;
    msra::dbn::minibatchreadaheadsource* m_readAheadSource;
- 	msra::dbn::FileEvalSource* m_fileEvalSource; 
+     msra::dbn::FileEvalSource* m_fileEvalSource; 
    msra::dbn::latticesource* m_lattices;
    map<wstring,msra::lattices::lattice::htkmlfwordsequence> m_latticeMap;
-	
-	vector<bool> m_sentenceEnd;
+    
+    vector<bool> m_sentenceEnd;
    bool m_readAhead;
-	bool m_truncated;
-	vector<size_t> m_processedFrame;
-	size_t m_numberOfuttsPerMinibatch;
-	size_t m_actualnumberOfuttsPerMinibatch;
-	size_t m_mbSize;
-	vector<size_t> m_toProcess;
-	vector<size_t> m_switchFrame;
-	bool m_noData;
+    bool m_truncated;
+    vector<size_t> m_processedFrame;
+    size_t m_numberOfuttsPerMinibatch;
+    size_t m_actualnumberOfuttsPerMinibatch;
+    size_t m_mbSize;
+    vector<size_t> m_toProcess;
+    vector<size_t> m_switchFrame;
+    bool m_noData;

-	bool m_trainOrTest; // if false, in file writing mode
+    bool m_trainOrTest; // if false, in file writing mode
 
    std::map<LabelIdType, LabelType> m_idToLabelMap;
    
@ -41,8 +41,8 @@ private:
    std::vector<size_t> m_featuresBufferAllocatedMultiUtt;
    std::vector<ElemType*> m_labelsBufferMultiUtt;
    std::vector<size_t> m_labelsBufferAllocatedMultiUtt;
-	std::vector<size_t> m_featuresStartIndexMultiUtt;
-	std::vector<size_t> m_labelsStartIndexMultiUtt;
+    std::vector<size_t> m_featuresStartIndexMultiUtt;
+    std::vector<size_t> m_labelsStartIndexMultiUtt;

    std::vector<ElemType*> m_featuresBufferMultiIO;
    std::vector<size_t> m_featuresBufferAllocatedMultiIO;
@ -57,38 +57,38 @@ private:
    // for writing outputs to files (standard single input/output network) - deprecate eventually
    bool m_checkDictionaryKeys;
    bool m_convertLabelsToTargets;
-	std::vector <bool> m_convertLabelsToTargetsMultiIO;
+    std::vector <bool> m_convertLabelsToTargetsMultiIO;
    std::vector<std::vector<std::wstring>> m_inputFilesMultiIO;
 
    size_t m_inputFileIndex;
-	std::vector<size_t> m_featDims;
-	std::vector<size_t> m_labelDims;
+    std::vector<size_t> m_featDims;
+    std::vector<size_t> m_labelDims;

-	std::vector<std::vector<std::vector<ElemType>>>m_labelToTargetMapMultiIO;
- 	
+    std::vector<std::vector<std::vector<ElemType>>>m_labelToTargetMapMultiIO;
+     
    void PrepareForTrainingOrTesting(const ConfigParameters& config);
-	void PrepareForWriting(const ConfigParameters& config);
-	
-	bool GetMinibatchToTrainOrTest(std::map<std::wstring, Matrix<ElemType>*>&matrices);
-	bool GetMinibatchToWrite(std::map<std::wstring, Matrix<ElemType>*>&matrices);
-	
+    void PrepareForWriting(const ConfigParameters& config);
+    
+    bool GetMinibatchToTrainOrTest(std::map<std::wstring, Matrix<ElemType>*>&matrices);
+    bool GetMinibatchToWrite(std::map<std::wstring, Matrix<ElemType>*>&matrices);
+    
    void StartMinibatchLoopToTrainOrTest(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
    void StartMinibatchLoopToWrite(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);

-	bool ReNewBufferForMultiIO(size_t i);
+    bool ReNewBufferForMultiIO(size_t i);

    size_t NumberSlicesInEachRecurrentIter() { return m_numberOfuttsPerMinibatch ;} 
    void SetNbrSlicesEachRecurrentIter(const size_t) { };

- 	void GetDataNamesFromConfig(const ConfigParameters& readerConfig, std::vector<std::wstring>& features, std::vector<std::wstring>& labels);
+     void GetDataNamesFromConfig(const ConfigParameters& readerConfig, std::vector<std::wstring>& features, std::vector<std::wstring>& labels);

    
    size_t ReadLabelToTargetMappingFile (const std::wstring& labelToTargetMappingFile, const std::wstring& labelListFile, std::vector<std::vector<ElemType>>& labelToTargetMap);
    enum InputOutputTypes
    {
-		real,
-		category,
-	};
+        real,
+        category,
+    };



--- a/DataReader/HTKMLFReader/HTKMLFWriter.cpp
+++ b/DataReader/HTKMLFReader/HTKMLFWriter.cpp
@ -33,159 +33,159 @@

 namespace Microsoft { namespace MSR { namespace CNTK {

-	// Create a Data Writer
-	//DATAWRITER_API IDataWriter* DataWriterFactory(void)
+    // Create a Data Writer
+    //DATAWRITER_API IDataWriter* DataWriterFactory(void)

    template<class ElemType>
-	void HTKMLFWriter<ElemType>::Init(const ConfigParameters& writerConfig)
-	{
+    void HTKMLFWriter<ElemType>::Init(const ConfigParameters& writerConfig)
+    {
        m_tempArray = nullptr;
        m_tempArraySize = 0;

-		vector<wstring> scriptpaths;
-		vector<wstring> filelist;
-		size_t numFiles;
-		size_t firstfilesonly = SIZE_MAX;   // set to a lower value for testing
+        vector<wstring> scriptpaths;
+        vector<wstring> filelist;
+        size_t numFiles;
+        size_t firstfilesonly = SIZE_MAX;   // set to a lower value for testing

-		ConfigArray outputNames = writerConfig("outputNodeNames","");
-		if (outputNames.size()<1)
-			RuntimeError("writer needs at least one outputNodeName specified in config");
+        ConfigArray outputNames = writerConfig("outputNodeNames","");
+        if (outputNames.size()<1)
+            RuntimeError("writer needs at least one outputNodeName specified in config");


-		foreach_index(i, outputNames) // inputNames should map to node names
-		{
-			ConfigParameters thisOutput = writerConfig(outputNames[i]);
-			if (thisOutput.Exists("dim"))
-				udims.push_back(thisOutput("dim"));
-			else
-				RuntimeError("HTKMLFWriter::Init: writer need to specify dim of output");
+        foreach_index(i, outputNames) // inputNames should map to node names
+        {
+            ConfigParameters thisOutput = writerConfig(outputNames[i]);
+            if (thisOutput.Exists("dim"))
+                udims.push_back(thisOutput("dim"));
+            else
+                RuntimeError("HTKMLFWriter::Init: writer need to specify dim of output");

-			if (thisOutput.Exists("file"))
-				scriptpaths.push_back(thisOutput("file"));
-			else if (thisOutput.Exists("scpFile"))
-				scriptpaths.push_back(thisOutput("scpFile"));
-			else
-				RuntimeError("HTKMLFWriter::Init: writer needs to specify scpFile for output");
+            if (thisOutput.Exists("file"))
+                scriptpaths.push_back(thisOutput("file"));
+            else if (thisOutput.Exists("scpFile"))
+                scriptpaths.push_back(thisOutput("scpFile"));
+            else
+                RuntimeError("HTKMLFWriter::Init: writer needs to specify scpFile for output");

-			outputNameToIdMap[outputNames[i]]= i;
-			outputNameToDimMap[outputNames[i]]=udims[i];
-			wstring type = thisOutput("type","Real");
-			if (type == L"Real")
-			{
-				outputNameToTypeMap[outputNames[i]] = OutputTypes::outputReal;
-			}
-			else
-			{
-				throw std::runtime_error ("HTKMLFWriter::Init: output type for writer output expected to be Real");
-			}
-		}
+            outputNameToIdMap[outputNames[i]]= i;
+            outputNameToDimMap[outputNames[i]]=udims[i];
+            wstring type = thisOutput("type","Real");
+            if (type == L"Real")
+            {
+                outputNameToTypeMap[outputNames[i]] = OutputTypes::outputReal;
+            }
+            else
+            {
+                throw std::runtime_error ("HTKMLFWriter::Init: output type for writer output expected to be Real");
+            }
+        }

-		numFiles=0;
-		foreach_index(i,scriptpaths)
-		{
-			filelist.clear();
-			std::wstring scriptPath = scriptpaths[i];
-			fprintf(stderr, "HTKMLFWriter::Init: reading output script file %S ...", scriptPath.c_str());
-			size_t n = 0;
-			for (msra::files::textreader reader(scriptPath); reader && filelist.size() <= firstfilesonly/*optimization*/; )
-			{
-				filelist.push_back (reader.wgetline());
-				n++;
-			}
+        numFiles=0;
+        foreach_index(i,scriptpaths)
+        {
+            filelist.clear();
+            std::wstring scriptPath = scriptpaths[i];
+            fprintf(stderr, "HTKMLFWriter::Init: reading output script file %S ...", scriptPath.c_str());
+            size_t n = 0;
+            for (msra::files::textreader reader(scriptPath); reader && filelist.size() <= firstfilesonly/*optimization*/; )
+            {
+                filelist.push_back (reader.wgetline());
+                n++;
+            }

-			fprintf (stderr, " %d entries\n", n);
+            fprintf (stderr, " %d entries\n", n);

-			if (i==0)
-				numFiles=n;
-			else
-				if (n!=numFiles)
-					throw std::runtime_error (msra::strfun::strprintf ("HTKMLFWriter:Init: number of files in each scriptfile inconsistent (%d vs. %d)", numFiles,n));
+            if (i==0)
+                numFiles=n;
+            else
+                if (n!=numFiles)
+                    throw std::runtime_error (msra::strfun::strprintf ("HTKMLFWriter:Init: number of files in each scriptfile inconsistent (%d vs. %d)", numFiles,n));

-			outputFiles.push_back(filelist);
-		}
-		outputFileIndex=0;
-		sampPeriod=100000;
+            outputFiles.push_back(filelist);
+        }
+        outputFileIndex=0;
+        sampPeriod=100000;

-	}
+    }

-	template<class ElemType>
-	void HTKMLFWriter<ElemType>::Destroy()
-	{
+    template<class ElemType>
+    void HTKMLFWriter<ElemType>::Destroy()
+    {
        delete [] m_tempArray;
        m_tempArray = nullptr;
        m_tempArraySize = 0;
-	}
+    }

-	template<class ElemType>
-	void HTKMLFWriter<ElemType>::GetSections(std::map<std::wstring, SectionType, nocase_compare>& /*sections*/)
-	{
-	}
+    template<class ElemType>
+    void HTKMLFWriter<ElemType>::GetSections(std::map<std::wstring, SectionType, nocase_compare>& /*sections*/)
+    {
+    }

-	template<class ElemType>
-	bool HTKMLFWriter<ElemType>::SaveData(size_t /*recordStart*/, const std::map<std::wstring, void*, nocase_compare>& matrices, size_t /*numRecords*/, size_t /*datasetSize*/, size_t /*byteVariableSized*/)
-	{
-		
+    template<class ElemType>
+    bool HTKMLFWriter<ElemType>::SaveData(size_t /*recordStart*/, const std::map<std::wstring, void*, nocase_compare>& matrices, size_t /*numRecords*/, size_t /*datasetSize*/, size_t /*byteVariableSized*/)
+    {
+        

-		//std::map<std::wstring, void*, nocase_compare>::iterator iter;
-		if (outputFileIndex>=outputFiles[0].size())
-			RuntimeError("index for output scp file out of range...");
+        //std::map<std::wstring, void*, nocase_compare>::iterator iter;
+        if (outputFileIndex>=outputFiles[0].size())
+            RuntimeError("index for output scp file out of range...");

-		for (auto iter = matrices.begin();iter!=matrices.end(); iter++)
-		{
-			wstring outputName = iter->first;
-			Matrix<ElemType>& outputData = *(static_cast<Matrix<ElemType>*>(iter->second));
-			size_t id = outputNameToIdMap[outputName];
-			size_t dim = outputNameToDimMap[outputName];
-			wstring outFile = outputFiles[id][outputFileIndex];
-			
-			assert(outputData.GetNumRows()==dim); dim;
+        for (auto iter = matrices.begin();iter!=matrices.end(); iter++)
+        {
+            wstring outputName = iter->first;
+            Matrix<ElemType>& outputData = *(static_cast<Matrix<ElemType>*>(iter->second));
+            size_t id = outputNameToIdMap[outputName];
+            size_t dim = outputNameToDimMap[outputName];
+            wstring outFile = outputFiles[id][outputFileIndex];
+            
+            assert(outputData.GetNumRows()==dim); dim;

-			SaveToFile(outFile,outputData);
-		}
+            SaveToFile(outFile,outputData);
+        }

-		outputFileIndex++;
+        outputFileIndex++;

-		return true;
-	}
+        return true;
+    }

-	template<class ElemType>
-	void HTKMLFWriter<ElemType>::SaveToFile(std::wstring& outputFile, Matrix<ElemType>& outputData)
-	{
-		msra::dbn::matrix output;
-		output.resize(outputData.GetNumRows(),outputData.GetNumCols());
-		outputData.CopyToArray(m_tempArray, m_tempArraySize);
+    template<class ElemType>
+    void HTKMLFWriter<ElemType>::SaveToFile(std::wstring& outputFile, Matrix<ElemType>& outputData)
+    {
+        msra::dbn::matrix output;
+        output.resize(outputData.GetNumRows(),outputData.GetNumCols());
+        outputData.CopyToArray(m_tempArray, m_tempArraySize);
        ElemType * pValue = m_tempArray;

-		for (int j=0; j< outputData.GetNumCols(); j++)
-			{
-				for (int i=0; i<outputData.GetNumRows(); i++)
-				{
-					output(i,j) = (float)*pValue++;				
-				}
-			}
-			
-		const size_t nansinf = output.countnaninf();
-		if (nansinf > 0)
-			fprintf (stderr, "chunkeval: %d NaNs or INF detected in '%S' (%d frames)\n", (int) nansinf, outputFile.c_str(), (int) output.cols());
-		// save it
-		msra::files::make_intermediate_dirs (outputFile);
-		msra::util::attempt (5, [&]()
-		{
-			msra::asr::htkfeatwriter::write (outputFile, "USER", sampPeriod, output);
-		});
-						
-		fprintf (stderr, "evaluate: writing %d frames of %S\n", output.cols(), outputFile.c_str());
+        for (int j=0; j< outputData.GetNumCols(); j++)
+            {
+                for (int i=0; i<outputData.GetNumRows(); i++)
+                {
+                    output(i,j) = (float)*pValue++;                
+                }
+            }
+            
+        const size_t nansinf = output.countnaninf();
+        if (nansinf > 0)
+            fprintf (stderr, "chunkeval: %d NaNs or INF detected in '%S' (%d frames)\n", (int) nansinf, outputFile.c_str(), (int) output.cols());
+        // save it
+        msra::files::make_intermediate_dirs (outputFile);
+        msra::util::attempt (5, [&]()
+        {
+            msra::asr::htkfeatwriter::write (outputFile, "USER", sampPeriod, output);
+        });
+                        
+        fprintf (stderr, "evaluate: writing %d frames of %S\n", output.cols(), outputFile.c_str());


-	}
+    }


-	template<class ElemType>
-	void HTKMLFWriter<ElemType>::SaveMapping(std::wstring saveId, const std::map<typename LabelIdType, typename LabelType>& /*labelMapping*/)
-	{
-	}
+    template<class ElemType>
+    void HTKMLFWriter<ElemType>::SaveMapping(std::wstring saveId, const std::map<typename LabelIdType, typename LabelType>& /*labelMapping*/)
+    {
+    }
   
-	template class HTKMLFWriter<float>;
-	template class HTKMLFWriter<double>;
+    template class HTKMLFWriter<float>;
+    template class HTKMLFWriter<double>;

 }}}
--- a/DataReader/HTKMLFReader/HTKMLFWriter.h
+++ b/DataReader/HTKMLFReader/HTKMLFWriter.h
@ -13,27 +13,27 @@ template<class ElemType>
 class HTKMLFWriter : public IDataWriter<ElemType>
 {
 private:
-	std::vector<size_t> outputDims;
-	std::vector<std::vector<std::wstring>> outputFiles;
-	
+    std::vector<size_t> outputDims;
+    std::vector<std::vector<std::wstring>> outputFiles;
+    
    std::vector<size_t> udims;
-	std::map<std::wstring,size_t> outputNameToIdMap;
+    std::map<std::wstring,size_t> outputNameToIdMap;
    std::map<std::wstring,size_t> outputNameToDimMap;
    std::map<std::wstring,size_t> outputNameToTypeMap;
-	unsigned int sampPeriod;
-	size_t outputFileIndex;
-	void SaveToFile(std::wstring& outputFile, Matrix<ElemType>& outputData);
+    unsigned int sampPeriod;
+    size_t outputFileIndex;
+    void SaveToFile(std::wstring& outputFile, Matrix<ElemType>& outputData);
    ElemType * m_tempArray;
    size_t m_tempArraySize;

-	enum OutputTypes
+    enum OutputTypes
    {
        outputReal,
        outputCategory,
    };

 public:
-	virtual void Init(const ConfigParameters& writerConfig);
+    virtual void Init(const ConfigParameters& writerConfig);
    virtual void Destroy();
    virtual void GetSections(std::map<std::wstring, SectionType, nocase_compare>& sections);
    virtual bool SaveData(size_t recordStart, const std::map<std::wstring, void*, nocase_compare>& matrices, size_t numRecords, size_t datasetSize, size_t byteVariableSized);
--- a/DataReader/HTKMLFReader/chunkevalsource.h
+++ b/DataReader/HTKMLFReader/chunkevalsource.h
@ -18,338 +18,338 @@

 namespace msra { namespace dbn {

-	class chunkevalsource // : public numamodelmanager
-	{
-		const size_t chunksize;                 // actual block size to perform computation on
+    class chunkevalsource // : public numamodelmanager
+    {
+        const size_t chunksize;                 // actual block size to perform computation on

-		// data FIFO
-		msra::dbn::matrix feat;
-		std::vector<std::vector<float>> frames; // [t] all feature frames concatenated into a big block
-		std::vector<char> boundaryflags;        // [t] -1 for first and +1 last frame, 0 else (for augmentneighbors())
-		std::vector<size_t> numframes;          // [k] number of frames for all appended files
-		std::vector<std::wstring> outpaths;     // [k] and their pathnames
-		std::vector<unsigned int> sampperiods;  // [k] and sample periods (they should really all be the same...)
-		size_t vdim; // input dimension
-		size_t udim; // output dimension
-		bool minibatchready;
-		void operator=(const chunkevalsource &);
-	private:
-		void clear()    // empty the FIFO
-		{
-			frames.clear();
-			boundaryflags.clear();
-			numframes.clear();
-			outpaths.clear();
-			sampperiods.clear();
-			minibatchready=false;
-		}
+        // data FIFO
+        msra::dbn::matrix feat;
+        std::vector<std::vector<float>> frames; // [t] all feature frames concatenated into a big block
+        std::vector<char> boundaryflags;        // [t] -1 for first and +1 last frame, 0 else (for augmentneighbors())
+        std::vector<size_t> numframes;          // [k] number of frames for all appended files
+        std::vector<std::wstring> outpaths;     // [k] and their pathnames
+        std::vector<unsigned int> sampperiods;  // [k] and sample periods (they should really all be the same...)
+        size_t vdim; // input dimension
+        size_t udim; // output dimension
+        bool minibatchready;
+        void operator=(const chunkevalsource &);
+    private:
+        void clear()    // empty the FIFO
+        {
+            frames.clear();
+            boundaryflags.clear();
+            numframes.clear();
+            outpaths.clear();
+            sampperiods.clear();
+            minibatchready=false;
+        }

-		
+        

-		void saveandflush(msra::dbn::matrix &pred)
-		{
-			const size_t framesinblock = frames.size();
+        void saveandflush(msra::dbn::matrix &pred)
+        {
+            const size_t framesinblock = frames.size();

-			// write out all files
-			size_t firstframe = 0;
-			foreach_index (k, numframes)
-			{
-				const wstring & outfile = outpaths[k];
-				unsigned int sampperiod = sampperiods[k];
-				size_t n = numframes[k];
-				msra::files::make_intermediate_dirs (outfile);
-				fprintf (stderr, "saveandflush: writing %d frames to %S\n", n, outfile.c_str());
-				msra::dbn::matrixstripe thispred (pred, firstframe, n);
-				// some sanity check for the data we've written
-				const size_t nansinf = thispred.countnaninf();
-				if (nansinf > 0)
-					fprintf (stderr, "chunkeval: %d NaNs or INF detected in '%S' (%d frames)\n", (int) nansinf, outfile.c_str(), (int) thispred.cols());
-				// save it
-				msra::util::attempt (5, [&]()
-				{
-					msra::asr::htkfeatwriter::write (outfile, "USER", sampperiod, thispred);
-				});
-				firstframe += n;
-			}
-			assert (firstframe == framesinblock); framesinblock;
+            // write out all files
+            size_t firstframe = 0;
+            foreach_index (k, numframes)
+            {
+                const wstring & outfile = outpaths[k];
+                unsigned int sampperiod = sampperiods[k];
+                size_t n = numframes[k];
+                msra::files::make_intermediate_dirs (outfile);
+                fprintf (stderr, "saveandflush: writing %d frames to %S\n", n, outfile.c_str());
+                msra::dbn::matrixstripe thispred (pred, firstframe, n);
+                // some sanity check for the data we've written
+                const size_t nansinf = thispred.countnaninf();
+                if (nansinf > 0)
+                    fprintf (stderr, "chunkeval: %d NaNs or INF detected in '%S' (%d frames)\n", (int) nansinf, outfile.c_str(), (int) thispred.cols());
+                // save it
+                msra::util::attempt (5, [&]()
+                {
+                    msra::asr::htkfeatwriter::write (outfile, "USER", sampperiod, thispred);
+                });
+                firstframe += n;
+            }
+            assert (firstframe == framesinblock); framesinblock;

-			// and we are done --forget the FIFO content & get ready for next chunk
-			clear();
+            // and we are done --forget the FIFO content & get ready for next chunk
+            clear();

-		}
+        }

    public:
-		chunkevalsource (size_t numinput, size_t numoutput, size_t chunksize)
-			:vdim(numinput),udim(numoutput),chunksize(chunksize)
-		{ 		
-			frames.reserve (chunksize * 2);	
-			feat.resize(vdim,chunksize); // initialize to size chunksize
-		}
+        chunkevalsource (size_t numinput, size_t numoutput, size_t chunksize)
+            :vdim(numinput),udim(numoutput),chunksize(chunksize)
+        {         
+            frames.reserve (chunksize * 2);    
+            feat.resize(vdim,chunksize); // initialize to size chunksize
+        }

-		// append data to chunk
-		template<class MATRIX> void addfile (const MATRIX & feat, const string & featkind, unsigned int sampperiod, const std::wstring & outpath)
-		{
-			// append to frames; also expand neighbor frames
-			if (feat.cols() < 2)
-				throw std::runtime_error ("evaltofile: utterances < 2 frames not supported");
-			foreach_column (t, feat)
-			{
-				std::vector<float> v (&feat(0,t), &feat(0,t) + feat.rows());
-				frames.push_back (v);
-				boundaryflags.push_back ((t == 0) ? -1 : (t == feat.cols() -1) ? +1 : 0);
-			}
+        // append data to chunk
+        template<class MATRIX> void addfile (const MATRIX & feat, const string & featkind, unsigned int sampperiod, const std::wstring & outpath)
+        {
+            // append to frames; also expand neighbor frames
+            if (feat.cols() < 2)
+                throw std::runtime_error ("evaltofile: utterances < 2 frames not supported");
+            foreach_column (t, feat)
+            {
+                std::vector<float> v (&feat(0,t), &feat(0,t) + feat.rows());
+                frames.push_back (v);
+                boundaryflags.push_back ((t == 0) ? -1 : (t == feat.cols() -1) ? +1 : 0);
+            }

-			numframes.push_back (feat.cols());
-			outpaths.push_back (outpath);
-			sampperiods.push_back (sampperiod);
-			
-		}
+            numframes.push_back (feat.cols());
+            outpaths.push_back (outpath);
+            sampperiods.push_back (sampperiod);
+            
+        }

-		void createevalminibatch()
-		{
-			const size_t framesinblock = frames.size();
-			feat.resize(vdim, framesinblock);   // input features for whole utt (col vectors)
-			// augment the features
-			msra::dbn::augmentneighbors (frames, boundaryflags, 0, framesinblock, feat);
-			minibatchready=true;
-		}
+        void createevalminibatch()
+        {
+            const size_t framesinblock = frames.size();
+            feat.resize(vdim, framesinblock);   // input features for whole utt (col vectors)
+            // augment the features
+            msra::dbn::augmentneighbors (frames, boundaryflags, 0, framesinblock, feat);
+            minibatchready=true;
+        }

-		void writetofiles(msra::dbn::matrix &pred){ saveandflush(pred); }
+        void writetofiles(msra::dbn::matrix &pred){ saveandflush(pred); }

-		msra::dbn::matrix chunkofframes() { assert(minibatchready); return feat; }
+        msra::dbn::matrix chunkofframes() { assert(minibatchready); return feat; }

-		bool isminibatchready() { return minibatchready; }
+        bool isminibatchready() { return minibatchready; }

-		size_t currentchunksize() { return frames.size(); }
-		void flushinput(){createevalminibatch();}
-		void reset() { clear(); }
+        size_t currentchunksize() { return frames.size(); }
+        void flushinput(){createevalminibatch();}
+        void reset() { clear(); }

-	};
+    };


-	class chunkevalsourcemulti // : public numamodelmanager
-	{
-		const size_t chunksize;                 // actual block size to perform computation on
+    class chunkevalsourcemulti // : public numamodelmanager
+    {
+        const size_t chunksize;                 // actual block size to perform computation on

-		// data FIFO
-		std::vector<msra::dbn::matrix> feat;
-		std::vector<std::vector<std::vector<float>>> framesmulti; // [t] all feature frames concatenated into a big block
-		std::vector<char> boundaryflags;        // [t] -1 for first and +1 last frame, 0 else (for augmentneighbors())
-		std::vector<size_t> numframes;          // [k] number of frames for all appended files
-		std::vector<std::vector<std::wstring>> outpaths;     // [k] and their pathnames
-		std::vector<std::vector<unsigned int>> sampperiods;  // [k] and sample periods (they should really all be the same...)
-		std::vector<size_t> vdims; // input dimension
-		std::vector<size_t> udims; // output dimension
-		bool minibatchready;
+        // data FIFO
+        std::vector<msra::dbn::matrix> feat;
+        std::vector<std::vector<std::vector<float>>> framesmulti; // [t] all feature frames concatenated into a big block
+        std::vector<char> boundaryflags;        // [t] -1 for first and +1 last frame, 0 else (for augmentneighbors())
+        std::vector<size_t> numframes;          // [k] number of frames for all appended files
+        std::vector<std::vector<std::wstring>> outpaths;     // [k] and their pathnames
+        std::vector<std::vector<unsigned int>> sampperiods;  // [k] and sample periods (they should really all be the same...)
+        std::vector<size_t> vdims; // input dimension
+        std::vector<size_t> udims; // output dimension
+        bool minibatchready;

                void operator=(const chunkevalsourcemulti &);
-	private:
-		void clear()    // empty the FIFO
-		{
-			foreach_index(i, vdims)
-			{
-				framesmulti[i].clear();
-				outpaths[i].clear();
-				sampperiods[i].clear();
-			}
-			boundaryflags.clear();
-			numframes.clear();
-			minibatchready=false;
-		}
+    private:
+        void clear()    // empty the FIFO
+        {
+            foreach_index(i, vdims)
+            {
+                framesmulti[i].clear();
+                outpaths[i].clear();
+                sampperiods[i].clear();
+            }
+            boundaryflags.clear();
+            numframes.clear();
+            minibatchready=false;
+        }

-		
+        

-		void saveandflush(msra::dbn::matrix &pred, size_t index)
-		{
-			const size_t framesinblock = framesmulti[index].size();
+        void saveandflush(msra::dbn::matrix &pred, size_t index)
+        {
+            const size_t framesinblock = framesmulti[index].size();

-			// write out all files
-			size_t firstframe = 0;
-			foreach_index (k, numframes)
-			{
-				const wstring & outfile = outpaths[index][k];
-				unsigned int sampperiod = sampperiods[index][k];
-				size_t n = numframes[k];
-				msra::files::make_intermediate_dirs (outfile);
-				fprintf (stderr, "saveandflush: writing %d frames to %S\n", n, outfile.c_str());
-				msra::dbn::matrixstripe thispred (pred, firstframe, n);
-				// some sanity check for the data we've written
-				const size_t nansinf = thispred.countnaninf();
-				if (nansinf > 0)
-					fprintf (stderr, "chunkeval: %d NaNs or INF detected in '%S' (%d frames)\n", (int) nansinf, outfile.c_str(), (int) thispred.cols());
-				// save it
-				msra::util::attempt (5, [&]()
-				{
-					msra::asr::htkfeatwriter::write (outfile, "USER", sampperiod, thispred);
-				});
-				firstframe += n;
-			}
-			assert (firstframe == framesinblock); framesinblock;
+            // write out all files
+            size_t firstframe = 0;
+            foreach_index (k, numframes)
+            {
+                const wstring & outfile = outpaths[index][k];
+                unsigned int sampperiod = sampperiods[index][k];
+                size_t n = numframes[k];
+                msra::files::make_intermediate_dirs (outfile);
+                fprintf (stderr, "saveandflush: writing %d frames to %S\n", n, outfile.c_str());
+                msra::dbn::matrixstripe thispred (pred, firstframe, n);
+                // some sanity check for the data we've written
+                const size_t nansinf = thispred.countnaninf();
+                if (nansinf > 0)
+                    fprintf (stderr, "chunkeval: %d NaNs or INF detected in '%S' (%d frames)\n", (int) nansinf, outfile.c_str(), (int) thispred.cols());
+                // save it
+                msra::util::attempt (5, [&]()
+                {
+                    msra::asr::htkfeatwriter::write (outfile, "USER", sampperiod, thispred);
+                });
+                firstframe += n;
+            }
+            assert (firstframe == framesinblock); framesinblock;

-			// and we are done --forget the FIFO content & get ready for next chunk
-			
-		}
+            // and we are done --forget the FIFO content & get ready for next chunk
+            
+        }

    public:
-		chunkevalsourcemulti (std::vector<size_t> vdims, std::vector<size_t> udims, size_t chunksize)
-			:vdims(vdims),udims(udims),chunksize(chunksize)
-		{ 	
+        chunkevalsourcemulti (std::vector<size_t> vdims, std::vector<size_t> udims, size_t chunksize)
+            :vdims(vdims),udims(udims),chunksize(chunksize)
+        {     

-			foreach_index(i, vdims)
-			{
-				msra::dbn::matrix thisfeat;
-				std::vector<std::vector<float>> frames; // [t] all feature frames concatenated into a big block
-				
-				frames.reserve(chunksize * 2);
-				framesmulti.push_back(frames);
-				//framesmulti[i].reserve (chunksize * 2);	
-				
-				thisfeat.resize(vdims[i], chunksize);
-				feat.push_back(thisfeat);
-	
-				outpaths.push_back(std::vector<std::wstring>());
-				sampperiods.push_back(std::vector<unsigned int>());
-				//feat[i].resize(vdims[i],chunksize); // initialize to size chunksize
-			}
-		}
+            foreach_index(i, vdims)
+            {
+                msra::dbn::matrix thisfeat;
+                std::vector<std::vector<float>> frames; // [t] all feature frames concatenated into a big block
+                
+                frames.reserve(chunksize * 2);
+                framesmulti.push_back(frames);
+                //framesmulti[i].reserve (chunksize * 2);    
+                
+                thisfeat.resize(vdims[i], chunksize);
+                feat.push_back(thisfeat);
+    
+                outpaths.push_back(std::vector<std::wstring>());
+                sampperiods.push_back(std::vector<unsigned int>());
+                //feat[i].resize(vdims[i],chunksize); // initialize to size chunksize
+            }
+        }

-		// append data to chunk
-		template<class MATRIX> void addfile (const MATRIX & feat, const string & featkind, unsigned int sampperiod, const std::wstring & outpath, size_t index)
-		{
-			// append to frames; also expand neighbor frames
-			if (feat.cols() < 2)
-				throw std::runtime_error ("evaltofile: utterances < 2 frames not supported");
-			foreach_column (t, feat)
-			{
-				std::vector<float> v (&feat(0,t), &feat(0,t) + feat.rows());
-				framesmulti[index].push_back (v);
-				if (index==0)
-					boundaryflags.push_back ((t == 0) ? -1 : (t == feat.cols() -1) ? +1 : 0);
-			}
-			if (index==0)
-				numframes.push_back (feat.cols());
+        // append data to chunk
+        template<class MATRIX> void addfile (const MATRIX & feat, const string & featkind, unsigned int sampperiod, const std::wstring & outpath, size_t index)
+        {
+            // append to frames; also expand neighbor frames
+            if (feat.cols() < 2)
+                throw std::runtime_error ("evaltofile: utterances < 2 frames not supported");
+            foreach_column (t, feat)
+            {
+                std::vector<float> v (&feat(0,t), &feat(0,t) + feat.rows());
+                framesmulti[index].push_back (v);
+                if (index==0)
+                    boundaryflags.push_back ((t == 0) ? -1 : (t == feat.cols() -1) ? +1 : 0);
+            }
+            if (index==0)
+                numframes.push_back (feat.cols());

-			outpaths[index].push_back (outpath);
-			sampperiods[index].push_back (sampperiod);
-			
-		}
+            outpaths[index].push_back (outpath);
+            sampperiods[index].push_back (sampperiod);
+            
+        }

-		void createevalminibatch()
-		{
-			foreach_index(i, framesmulti)
-			{
+        void createevalminibatch()
+        {
+            foreach_index(i, framesmulti)
+            {
                const size_t framesinblock = framesmulti[i].size();
-				feat[i].resize(vdims[i], framesinblock);   // input features for whole utt (col vectors)
-				// augment the features
-				msra::dbn::augmentneighbors (framesmulti[i], boundaryflags, 0, framesinblock, feat[i]);
-			}
-			minibatchready=true;
-		}
+                feat[i].resize(vdims[i], framesinblock);   // input features for whole utt (col vectors)
+                // augment the features
+                msra::dbn::augmentneighbors (framesmulti[i], boundaryflags, 0, framesinblock, feat[i]);
+            }
+            minibatchready=true;
+        }

-		void writetofiles(msra::dbn::matrix &pred, size_t index){ saveandflush(pred, index); }
+        void writetofiles(msra::dbn::matrix &pred, size_t index){ saveandflush(pred, index); }

-		msra::dbn::matrix chunkofframes(size_t index) { assert(minibatchready); assert(index<=feat.size()); return feat[index]; }
+        msra::dbn::matrix chunkofframes(size_t index) { assert(minibatchready); assert(index<=feat.size()); return feat[index]; }

-		bool isminibatchready() { return minibatchready; }
+        bool isminibatchready() { return minibatchready; }

-		size_t currentchunksize() { return framesmulti[0].size(); }
-		void flushinput(){createevalminibatch();}
-		void reset() { clear(); }
+        size_t currentchunksize() { return framesmulti[0].size(); }
+        void flushinput(){createevalminibatch();}
+        void reset() { clear(); }

-	};
+    };

-	class FileEvalSource // : public numamodelmanager
-	{
-		const size_t chunksize;                 // actual block size to perform computation on
+    class FileEvalSource // : public numamodelmanager
+    {
+        const size_t chunksize;                 // actual block size to perform computation on

-		// data FIFO
-		std::vector<msra::dbn::matrix> feat;
-		std::vector<std::vector<std::vector<float>>> framesMulti; // [t] all feature frames concatenated into a big block
-		std::vector<char> boundaryFlags;        // [t] -1 for first and +1 last frame, 0 else (for augmentneighbors())
-		std::vector<size_t> numFrames;          // [k] number of frames for all appended files
-		std::vector<std::vector<unsigned int>> sampPeriods;  // [k] and sample periods (they should really all be the same...)
-		std::vector<size_t> vdims; // input dimension
-		bool minibatchReady;
-		size_t minibatchSize;
-		size_t frameIndex;
+        // data FIFO
+        std::vector<msra::dbn::matrix> feat;
+        std::vector<std::vector<std::vector<float>>> framesMulti; // [t] all feature frames concatenated into a big block
+        std::vector<char> boundaryFlags;        // [t] -1 for first and +1 last frame, 0 else (for augmentneighbors())
+        std::vector<size_t> numFrames;          // [k] number of frames for all appended files
+        std::vector<std::vector<unsigned int>> sampPeriods;  // [k] and sample periods (they should really all be the same...)
+        std::vector<size_t> vdims; // input dimension
+        bool minibatchReady;
+        size_t minibatchSize;
+        size_t frameIndex;

-		void operator=(const FileEvalSource &);
+        void operator=(const FileEvalSource &);

-	private:
-		void Clear()    // empty the FIFO
-		{
-			foreach_index(i, vdims)
-			{
-				framesMulti[i].clear();
-				sampPeriods[i].clear();
-			}
-			boundaryFlags.clear();
-			numFrames.clear();
-			minibatchReady=false;
-			frameIndex=0;
-		}
+    private:
+        void Clear()    // empty the FIFO
+        {
+            foreach_index(i, vdims)
+            {
+                framesMulti[i].clear();
+                sampPeriods[i].clear();
+            }
+            boundaryFlags.clear();
+            numFrames.clear();
+            minibatchReady=false;
+            frameIndex=0;
+        }

-	public:
-		FileEvalSource (std::vector<size_t> vdims, size_t chunksize):vdims(vdims),chunksize(chunksize)
-		{ 	
-			foreach_index(i, vdims)
-			{
-				msra::dbn::matrix thisfeat;
-				std::vector<std::vector<float>> frames; // [t] all feature frames concatenated into a big block
-				
-				frames.reserve(chunksize * 2);
-				framesMulti.push_back(frames);
-				//framesmulti[i].reserve (chunksize * 2);	
-				
-				thisfeat.resize(vdims[i], chunksize);
-				feat.push_back(thisfeat);
-	
-				sampPeriods.push_back(std::vector<unsigned int>());
-				//feat[i].resize(vdims[i],chunksize); // initialize to size chunksize
-			}
-		}
+    public:
+        FileEvalSource (std::vector<size_t> vdims, size_t chunksize):vdims(vdims),chunksize(chunksize)
+        {     
+            foreach_index(i, vdims)
+            {
+                msra::dbn::matrix thisfeat;
+                std::vector<std::vector<float>> frames; // [t] all feature frames concatenated into a big block
+                
+                frames.reserve(chunksize * 2);
+                framesMulti.push_back(frames);
+                //framesmulti[i].reserve (chunksize * 2);    
+                
+                thisfeat.resize(vdims[i], chunksize);
+                feat.push_back(thisfeat);
+    
+                sampPeriods.push_back(std::vector<unsigned int>());
+                //feat[i].resize(vdims[i],chunksize); // initialize to size chunksize
+            }
+        }

-		// append data to chunk
-		template<class MATRIX> void AddFile (const MATRIX & feat, const string & /*featkind*/, unsigned int sampPeriod, size_t index)
-		{
-			// append to frames; also expand neighbor frames
-			if (feat.cols() < 2)
-				throw std::runtime_error ("evaltofile: utterances < 2 frames not supported");
-			foreach_column (t, feat)
-			{
-				std::vector<float> v (&feat(0,t), &feat(0,t) + feat.rows());
-				framesMulti[index].push_back (v);
-				if (index==0)
-					boundaryFlags.push_back ((t == 0) ? -1 : (t == feat.cols() -1) ? +1 : 0);
-			}
-			if (index==0)
-				numFrames.push_back (feat.cols());
+        // append data to chunk
+        template<class MATRIX> void AddFile (const MATRIX & feat, const string & /*featkind*/, unsigned int sampPeriod, size_t index)
+        {
+            // append to frames; also expand neighbor frames
+            if (feat.cols() < 2)
+                throw std::runtime_error ("evaltofile: utterances < 2 frames not supported");
+            foreach_column (t, feat)
+            {
+                std::vector<float> v (&feat(0,t), &feat(0,t) + feat.rows());
+                framesMulti[index].push_back (v);
+                if (index==0)
+                    boundaryFlags.push_back ((t == 0) ? -1 : (t == feat.cols() -1) ? +1 : 0);
+            }
+            if (index==0)
+                numFrames.push_back (feat.cols());

-			sampPeriods[index].push_back (sampPeriod);
-			
-		}
+            sampPeriods[index].push_back (sampPeriod);
+            
+        }

-		void CreateEvalMinibatch()
-		{
-			foreach_index(i, framesMulti)
-			{
+        void CreateEvalMinibatch()
+        {
+            foreach_index(i, framesMulti)
+            {
                const size_t framesInBlock = framesMulti[i].size();
-				feat[i].resize(vdims[i], framesInBlock);   // input features for whole utt (col vectors)
-				// augment the features
-				msra::dbn::augmentneighbors (framesMulti[i], boundaryFlags, 0, framesInBlock, feat[i]);
-			}
-			minibatchReady=true;
-		}
+                feat[i].resize(vdims[i], framesInBlock);   // input features for whole utt (col vectors)
+                // augment the features
+                msra::dbn::augmentneighbors (framesMulti[i], boundaryFlags, 0, framesInBlock, feat[i]);
+            }
+            minibatchReady=true;
+        }

-		void SetMinibatchSize(size_t mbSize){ minibatchSize=mbSize;}
-		msra::dbn::matrix ChunkOfFrames(size_t index) { assert(minibatchReady); assert(index<=feat.size()); return feat[index]; }
+        void SetMinibatchSize(size_t mbSize){ minibatchSize=mbSize;}
+        msra::dbn::matrix ChunkOfFrames(size_t index) { assert(minibatchReady); assert(index<=feat.size()); return feat[index]; }

-		bool IsMinibatchReady() { return minibatchReady; }
+        bool IsMinibatchReady() { return minibatchReady; }

-		size_t CurrentFileSize() { return framesMulti[0].size(); }
-		void FlushInput(){CreateEvalMinibatch();}
-		void Reset() { Clear(); }
-	};
+        size_t CurrentFileSize() { return framesMulti[0].size(); }
+        void FlushInput(){CreateEvalMinibatch();}
+        void Reset() { Clear(); }
+    };

-	
+    
 };};
--- a/DataReader/HTKMLFReader/fileutil.cpp
+++ b/DataReader/HTKMLFReader/fileutil.cpp
@ -47,7 +47,7 @@ template<class _T> FILE * fopenStdHandle (const _T * mode)
 {
    FILE * f = strchr (mode, 'r') ? stdin : stdout;
 #ifndef __unix__ // don't need binary/text distinction on unix
-	if (strchr(mode, 'b') || strchr(mode, 't'))   // change binary mode
+    if (strchr(mode, 'b') || strchr(mode, 't'))   // change binary mode
    {
        // switch to binary mode if not yet (in case it is stdin)
        int rc = _setmode (_fileno (f), strchr (mode, 'b') ? _O_BINARY : _O_TEXT);
--- a/DataReader/HTKMLFReader/readaheadsource.h
+++ b/DataReader/HTKMLFReader/readaheadsource.h
@ -233,8 +233,8 @@ public:

        feat.resize(1);
        uids.resize(1);
-		//transcripts.resize(1);
-		//lattices.resize(1);
+        //transcripts.resize(1);
+        //lattices.resize(1);
        return getbatch(globalts, framesrequested, feat[0], uids[0], transcripts, lattices);
    }

--- a/DataReader/HTKMLFReader/rollingwindowsource.h
+++ b/DataReader/HTKMLFReader/rollingwindowsource.h
@ -50,20 +50,20 @@ namespace msra { namespace dbn {
            if (!paging()) return;
            msra::files::make_intermediate_dirs (pagepath);

-			if (!wantread)
-			{
+            if (!wantread)
+            {
                FILE *ftry = NULL;
                wstring pathname (pagepath);
                ftry = _wfopen (pathname.c_str(), L"wbS");
                if (ftry) fclose (ftry);
-			}
+            }

-			/* 
-				code below to cycle through a-z appended to file name is no longer necessary 
-				since caller guarantees unique file names via HTKMLFReader 
-				and we want the pagepath logged to the user to be the actual one used by the code
+            /* 
+                code below to cycle through a-z appended to file name is no longer necessary 
+                since caller guarantees unique file names via HTKMLFReader 
+                and we want the pagepath logged to the user to be the actual one used by the code

-			// try to open the pagepath from a to z
+            // try to open the pagepath from a to z
            if (!wantread)
            {
                FILE *ftry = NULL;
@ -77,7 +77,7 @@ namespace msra { namespace dbn {
                if (ftry) fclose (ftry);
                pagepath += --trynum;
            }
-			*/
+            */
            f = fopenOrDie (pagepath, wantread ? L"rbS" : L"wbS");
            reading = wantread;
        }
@ -115,7 +115,7 @@ namespace msra { namespace dbn {
            fsetpos (f, blockid * block.sizeinpagefile());
            block.frompagefile (f);
        }
-		
+        
    public:
        biggrowablevectorarray (const wstring & pagepath)
            : growablevectorbase (65536), m (0), 
@ -125,17 +125,17 @@ namespace msra { namespace dbn {
            if (paging())
                fprintf (stderr, "biggrowablevectorarray: creating disk backup store at '%S'\n", pagepath.c_str());
        }
-		~biggrowablevectorarray() { // clean up the big temp file 
-			if (paging()) {
-				fclose (f); 
-				if (_wunlink (pagepath.c_str())==0)
-					fprintf (stderr, "biggrowablevectorarray: deleted disk backup store at '%S'\n", pagepath.c_str());
-				else
-					fprintf (stderr, "biggrowablevectorarray: unable to delete disk backup store at '%S'\n", pagepath.c_str());
-			}
-		}            
-		
-		size_t dim() const { return m; }    // dimension of a frame
+        ~biggrowablevectorarray() { // clean up the big temp file 
+            if (paging()) {
+                fclose (f); 
+                if (_wunlink (pagepath.c_str())==0)
+                    fprintf (stderr, "biggrowablevectorarray: deleted disk backup store at '%S'\n", pagepath.c_str());
+                else
+                    fprintf (stderr, "biggrowablevectorarray: unable to delete disk backup store at '%S'\n", pagepath.c_str());
+            }
+        }            
+        
+        size_t dim() const { return m; }    // dimension of a frame

        // reading phase
        void push_back (const std::vector<float> & in)
@ -213,19 +213,19 @@ namespace msra { namespace dbn {
            /*const*/ msra::dbn::matrix & block = getblock (t);
            return msra::dbn::matrixstripe (block, blockt, 1);
        }
-		wstring pagepathname(){ return pagepath;}
-		void cleanuppagefile()
-		{
-			if (paging()) {
-				fclose (f); 
-				if (_wunlink (pagepath.c_str())==0){
-					fprintf (stderr, "biggrowablevectorarray: deleted disk backup store at '%S'\n", pagepath.c_str());
-				}
-				else{
-					fprintf (stderr, "biggrowablevectorarray: could NOT delete disk backup store at '%S'\n", pagepath.c_str());
-				}
-			}
-		}
+        wstring pagepathname(){ return pagepath;}
+        void cleanuppagefile()
+        {
+            if (paging()) {
+                fclose (f); 
+                if (_wunlink (pagepath.c_str())==0){
+                    fprintf (stderr, "biggrowablevectorarray: deleted disk backup store at '%S'\n", pagepath.c_str());
+                }
+                else{
+                    fprintf (stderr, "biggrowablevectorarray: could NOT delete disk backup store at '%S'\n", pagepath.c_str());
+                }
+            }
+        }
    };

    // ---------------------------------------------------------------------------
@ -459,8 +459,8 @@ namespace msra { namespace dbn {
            // for single input/output set size to be 1 and run old getbatch
            feat.resize(1);
            uids.resize(1);
-			//transcripts.resize(1);
-			//latticepairs.resize(1);
+            //transcripts.resize(1);
+            //latticepairs.resize(1);
            return getbatch(globalts, framesrequested, feat[0], uids[0], transcripts, latticepairs);
        }

--- a/DataReader/HTKMLFReader/utterancesource.h
+++ b/DataReader/HTKMLFReader/utterancesource.h
@ -361,10 +361,10 @@ public:
        fprintf (stderr, " %d frames in %d out of %d utterances; %d classes\n", _totalframes, utteranceset.size(),infiles.size(), numclasses);
        if (!labels.empty())
            foreach_index (i, utteranceset)
-		{
+        {
                if (classids[utteranceset[i].classidsbegin + utteranceset[i].numframes()] != (CLASSIDTYPE) -1)
                    throw std::logic_error ("minibatchutterancesource: classids[] out of sync");
-		}
+        }
        if (nomlf + nolat > 0)
        {
            fprintf (stderr, "minibatchutterancesource: out of %d files, %d files not found in label set and %d have no lattice\n", infiles.size(), nomlf, nolat);
@ -952,15 +952,15 @@ public:
        return readfromdisk;
    }

-	bool getbatch (const size_t globalts, const size_t framesrequested, std::vector<msra::dbn::matrix> & feat, std::vector<std::vector<size_t>> & uids,
+    bool getbatch (const size_t globalts, const size_t framesrequested, std::vector<msra::dbn::matrix> & feat, std::vector<std::vector<size_t>> & uids,
            std::vector<std::vector<const_array_ref<msra::lattices::lattice::htkmlfwordsequence::word>>> & transcripts, 
            std::vector<std::vector<shared_ptr<const latticesource::latticepair>>> & latticepairs)
    {
        // for single input/output set size to be 1 and run old getbatch
-		feat.resize(1);
+        feat.resize(1);
        uids.resize(1);
-		transcripts.resize(1);
-		latticepairs.resize(1);
+        transcripts.resize(1);
+        latticepairs.resize(1);
        return getbatch(globalts, framesrequested, feat[0], uids[0], transcripts[0], latticepairs[0]);
    }

--- a/DataReader/HTKMLFReader/utterancesourcemulti.h
+++ b/DataReader/HTKMLFReader/utterancesourcemulti.h
--- a/DataReader/LUSequenceReader/LUSequenceReader.cpp
+++ b/DataReader/LUSequenceReader/LUSequenceReader.cpp
@ -1253,7 +1253,7 @@ bool BatchLUSequenceReader<ElemType>::GetMinibatch(std::map<std::wstring, Matrix
        {
 //            assert(features.GetMatrixType() == MatrixType::SPARSE);
            //features.Resize(featInfo.dim, actualmbsize, true);
-			features.Resize(featInfo.dim * m_wordContext.size(), actualmbsize, true);
+            features.Resize(featInfo.dim * m_wordContext.size(), actualmbsize, true);
            features.SetValue(0);
        }

--- a/DataReader/LUSequenceReader/LUSequenceWriter.cpp
+++ b/DataReader/LUSequenceReader/LUSequenceWriter.cpp
@ -22,7 +22,7 @@
 namespace Microsoft { namespace MSR { namespace CNTK {

    // Create a Data Writer
-	//DATAWRITER_API IDataWriter* DataWriterFactory(void)
+    //DATAWRITER_API IDataWriter* DataWriterFactory(void)


    // comparison, not case sensitive.
@ -32,30 +32,30 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        return ( first < second );
    }

-	template<class ElemType>
-	void LUSequenceWriter<ElemType>::Init(const ConfigParameters& writerConfig)
-	{
+    template<class ElemType>
+    void LUSequenceWriter<ElemType>::Init(const ConfigParameters& writerConfig)
+    {
        udims.clear();

-		ConfigArray outputNames = writerConfig("outputNodeNames","");
-		if (outputNames.size()<1)
-			RuntimeError("writer needs at least one outputNodeName specified in config");
+        ConfigArray outputNames = writerConfig("outputNodeNames","");
+        if (outputNames.size()<1)
+            RuntimeError("writer needs at least one outputNodeName specified in config");


-		foreach_index(i, outputNames) // inputNames should map to node names
-		{
-			ConfigParameters thisOutput = writerConfig(outputNames[i]);
-			outputFiles[outputNames[i]] = thisOutput("file");
+        foreach_index(i, outputNames) // inputNames should map to node names
+        {
+            ConfigParameters thisOutput = writerConfig(outputNames[i]);
+            outputFiles[outputNames[i]] = thisOutput("file");
            int iN = thisOutput("nbest", "1");
            nBests[outputNames[i]] = iN; 
-			wstring fname = thisOutput("token");
+            wstring fname = thisOutput("token");
            ReadLabelInfo(fname, word4idx[outputNames[i]], idx4word[outputNames[i]]);
-			size_t dim = idx4word[outputNames[i]].size(); 
+            size_t dim = idx4word[outputNames[i]].size(); 
            udims.push_back(dim);

-		}
+        }

-	}
+    }

    template<class ElemType>
    void LUSequenceWriter<ElemType>::ReadLabelInfo(const wstring & vocfile, 
@ -87,40 +87,40 @@ namespace Microsoft { namespace MSR { namespace CNTK {

    }

-	template<class ElemType>
-	void LUSequenceWriter<ElemType>::Destroy()
-	{
+    template<class ElemType>
+    void LUSequenceWriter<ElemType>::Destroy()
+    {
        for (auto ptr = outputFileIds.begin(); ptr != outputFileIds.end(); ptr++)
        {
            fclose(ptr->second);
        }
    }

-	template<class ElemType>
-	bool LUSequenceWriter<ElemType>::SaveData(size_t /*recordStart*/, const std::map<std::wstring, void*, nocase_compare>& matrices, size_t /*numRecords*/, size_t /*datasetSize*/, size_t /*byteVariableSized*/)
-	{
-		
-		for (auto iter = matrices.begin();iter!=matrices.end(); iter++)
-		{
-			wstring outputName = iter->first;
-			Matrix<ElemType>& outputData = *(static_cast<Matrix<ElemType>*>(iter->second));
-			wstring outFile = outputFiles[outputName];
-			
-			SaveToFile(outFile,outputData, idx4word[outputName], nBests[outputName]);
-		}
+    template<class ElemType>
+    bool LUSequenceWriter<ElemType>::SaveData(size_t /*recordStart*/, const std::map<std::wstring, void*, nocase_compare>& matrices, size_t /*numRecords*/, size_t /*datasetSize*/, size_t /*byteVariableSized*/)
+    {
+        
+        for (auto iter = matrices.begin();iter!=matrices.end(); iter++)
+        {
+            wstring outputName = iter->first;
+            Matrix<ElemType>& outputData = *(static_cast<Matrix<ElemType>*>(iter->second));
+            wstring outFile = outputFiles[outputName];
+            
+            SaveToFile(outFile,outputData, idx4word[outputName], nBests[outputName]);
+        }

        return true;
-	}
+    }

-	template<class ElemType>
-	void LUSequenceWriter<ElemType>::SaveToFile(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest)
-	{
+    template<class ElemType>
+    void LUSequenceWriter<ElemType>::SaveToFile(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest)
+    {
        size_t nT = outputData.GetNumCols();
        size_t nD = min(idx2wrd.size(), outputData.GetNumRows());
        FILE *fp = nullptr; 
        vector<pair<size_t, ElemType>> lv;

-    	auto NbestComparator = [](const pair<size_t, ElemType>& lv,const pair<size_t, ElemType>& rv){return lv.second > rv.second;};
+        auto NbestComparator = [](const pair<size_t, ElemType>& lv,const pair<size_t, ElemType>& rv){return lv.second > rv.second;};

        if (outputFileIds.find(outputFile) == outputFileIds.end())
        {
@ -136,13 +136,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            fp = outputFileIds[outputFile];

        for (int j=0; j< nT; j++)
-		{
+        {
            int imax = 0;
            ElemType fmax = outputData(imax,j);
            lv.clear();
            if (nbest > 1) lv.push_back(pair<size_t, ElemType>(0, fmax));
-		    for (int i=1; i<nD; i++)
-			{
+            for (int i=1; i<nD; i++)
+            {
                if (nbest > 1) lv.push_back(pair<size_t, ElemType>(i, outputData(i,j)));
                if (outputData(i,j) > fmax)
                {
@ -150,7 +150,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    imax = i;
                }
            }
-			if (nbest > 1) sort(lv.begin(),lv.end(),NbestComparator);
+            if (nbest > 1) sort(lv.begin(),lv.end(),NbestComparator);
            for (int i = 0 ;i < nbest; i++)
            {
                if (nbest > 1) 
@ -170,10 +170,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
            fprintf(fp, "\n");
        }
-	}
+    }


-	template class LUSequenceWriter<float>;
-	template class LUSequenceWriter<double>;
+    template class LUSequenceWriter<float>;
+    template class LUSequenceWriter<double>;

 }}}
--- a/DataReader/LUSequenceReader/LUSequenceWriter.h
+++ b/DataReader/LUSequenceReader/LUSequenceWriter.h
@ -16,20 +16,20 @@ template<class ElemType>
 class LUSequenceWriter : public IDataWriter<ElemType>
 {
 private:
-	std::vector<size_t> outputDims;
-	map<wstring, wstring> outputFiles;
+    std::vector<size_t> outputDims;
+    map<wstring, wstring> outputFiles;
    map<wstring, FILE*> outputFileIds;

    std::vector<size_t> udims;
    map<wstring, map<string, int>> word4idx;
    map<wstring, map<int, string>> idx4word;

-	map<wstring, int> nBests;
+    map<wstring, int> nBests;
    bool compare_val(const ElemType& first, const ElemType& second);

-	void SaveToFile(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest = 1);
+    void SaveToFile(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest = 1);

-	void ReadLabelInfo(const wstring & vocfile, 
+    void ReadLabelInfo(const wstring & vocfile, 
            map<string, int> & word4idx,
            map<int, string>& idx4word);

@ -43,7 +43,7 @@ public:
    void SaveMapping(std::wstring saveId, const std::map<typename LabelIdType, typename LabelType>& /*labelMapping*/){}

 public:
-	virtual void Init(const ConfigParameters& writerConfig);
+    virtual void Init(const ConfigParameters& writerConfig);
    virtual void Destroy();
    virtual bool SaveData(size_t recordStart, const std::map<std::wstring, void*, nocase_compare>& matrices, size_t numRecords, size_t datasetSize, size_t byteVariableSized);
 };
--- a/DataReader/UCIFastReader/UCIFastReader.cpp
+++ b/DataReader/UCIFastReader/UCIFastReader.cpp
@ -740,9 +740,9 @@ bool UCIFastReader<ElemType>::GetMinibatch(std::map<std::wstring, Matrix<ElemTyp
        return m_cachingReader->GetMinibatch(matrices);
    }
    // get the features array
-	if (matrices.find(m_featuresName) == matrices.end())
-		RuntimeError("Features matrix not found in config file, there should be a section '%ls=[...]' in the configuration file.", m_featuresName.c_str());
-		
+    if (matrices.find(m_featuresName) == matrices.end())
+        RuntimeError("Features matrix not found in config file, there should be a section '%ls=[...]' in the configuration file.", m_featuresName.c_str());
+        
    Matrix<ElemType>& features = *matrices[m_featuresName];

    // get out if they didn't call StartMinibatchLoop() first
--- a/DataReader/UCIFastReader/UCIFastReader.h
+++ b/DataReader/UCIFastReader/UCIFastReader.h
@ -97,7 +97,7 @@ public:

    size_t NumberSlicesInEachRecurrentIter() { return 1 ;} 
    void SetNbrSlicesEachRecurrentIter(const size_t) { };
-	void SetSentenceEndInBatch(std::vector<size_t> &/*sentenceEnd*/){};
+    void SetSentenceEndInBatch(std::vector<size_t> &/*sentenceEnd*/){};
    virtual const std::map<LabelIdType, LabelType>& GetLabelMapping(const std::wstring& sectionName);
    virtual void SetLabelMapping(const std::wstring& sectionName, const std::map<LabelIdType, typename LabelType>& labelMapping);
    virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart=0);
--- a/MachineLearning/CNTKEval/CNTKEval.cpp
+++ b/MachineLearning/CNTKEval/CNTKEval.cpp
@ -37,7 +37,7 @@ extern "C" EVAL_API void GetEvalD(IEvaluateModel<double>** peval)
 template<class ElemType>
 void CNTKEval<ElemType>::Init(const std::string& config)
 {
-	m_start = 0;
+    m_start = 0;
    m_config.Parse(config);
    if (m_config.Exists("modelPath"))
    {
@ -144,7 +144,7 @@ void CNTKEval<ElemType>::Evaluate(std::map<std::wstring, std::vector<ElemType>*>
    // now set the data in the reader
    GetNodeDimensions(m_dimensions, nodeInput);
    m_reader->SetData(&inputs, &m_dimensions);
-	m_reader->SetBoundary(m_start);
+    m_reader->SetBoundary(m_start);
    // create the reader if necessary
    if (m_writer == nullptr)
    {
@ -164,7 +164,7 @@ void CNTKEval<ElemType>::Evaluate(std::map<std::wstring, std::vector<ElemType>*>
 template<class ElemType>
 void CNTKEval<ElemType>::ResetState()
 {
-	m_start = 1 - m_start;
+    m_start = 1 - m_start;
 }

 // instantiate all the combinations we expect to be used
--- a/MachineLearning/CNTKEval/CNTKEval.h
+++ b/MachineLearning/CNTKEval/CNTKEval.h
@ -23,7 +23,7 @@ class CNTKEval : public IEvaluateModel<ElemType>
    ConfigParameters m_config;
    ComputationNetwork<ElemType>* m_net;
    std::map<std::wstring, size_t> m_dimensions;
-	size_t m_start;
+    size_t m_start;

 public:
    // constructor
@ -45,6 +45,6 @@ public:

    virtual void Init(const std::string& config);
    virtual void Destroy();
-	virtual void ResetState();
+    virtual void ResetState();
 };
 }}}
--- a/MachineLearning/CNTKEval/EvalReader.h
+++ b/MachineLearning/CNTKEval/EvalReader.h
@ -23,8 +23,8 @@ private:
    size_t m_recordCount; // count of records in this data
    size_t m_currentRecord; // next record number to read
    size_t m_mbSize;
-	vector<size_t> m_switchFrame;
-	size_t m_oldSig;
+    vector<size_t> m_switchFrame;
+    size_t m_oldSig;
 public:
    // Method to setup the data for the reader
    void SetData(std::map<std::wstring, std::vector<ElemType>*>* inputs, std::map<std::wstring, size_t>* dimensions)
@ -55,26 +55,26 @@ public:
        }
    }

-	void SetBoundary (size_t newSig)
-	{
-		if (m_switchFrame.size()==0)
-		{
-			m_oldSig = newSig;
-			m_switchFrame.assign(1,0);
-		} else
-		{
-			if (m_oldSig==newSig)
-			{
-				m_switchFrame[0] = m_mbSize+8888;
-			}
-			else
-			{
-				m_switchFrame[0] = 0;
-				m_oldSig = newSig;
-			}
-		}
+    void SetBoundary (size_t newSig)
+    {
+        if (m_switchFrame.size()==0)
+        {
+            m_oldSig = newSig;
+            m_switchFrame.assign(1,0);
+        } else
+        {
+            if (m_oldSig==newSig)
+            {
+                m_switchFrame[0] = m_mbSize+8888;
+            }
+            else
+            {
+                m_switchFrame[0] = 0;
+                m_oldSig = newSig;
+            }
+        }

-	}
+    }

    virtual void Init(const ConfigParameters& /*config*/)
    {
@ -164,22 +164,22 @@ public:
    size_t NumberSlicesInEachRecurrentIter() {return 1;}

    void SetNbrSlicesEachRecurrentIter(const size_t ) {}
-	void SetSentenceEndInBatch(std::vector<size_t> &sentenceEnd)
-	{
-		sentenceEnd.resize(m_switchFrame.size());
-		for (size_t i = 0; i < m_switchFrame.size() ; i++)
-		{
-			sentenceEnd[i] = m_switchFrame[i];
-		}
-	}
-	void GetSentenceBoundary(std::vector<size_t> boundaryInfo)
-	{
-		m_switchFrame.resize(boundaryInfo.size());
-		for (size_t i = 0; i < m_switchFrame.size(); i ++)
-		{
-			m_switchFrame[i] = boundaryInfo[i];
-		}
-	}
+    void SetSentenceEndInBatch(std::vector<size_t> &sentenceEnd)
+    {
+        sentenceEnd.resize(m_switchFrame.size());
+        for (size_t i = 0; i < m_switchFrame.size() ; i++)
+        {
+            sentenceEnd[i] = m_switchFrame[i];
+        }
+    }
+    void GetSentenceBoundary(std::vector<size_t> boundaryInfo)
+    {
+        m_switchFrame.resize(boundaryInfo.size());
+        for (size_t i = 0; i < m_switchFrame.size(); i ++)
+        {
+            m_switchFrame[i] = boundaryInfo[i];
+        }
+    }
    // GetLabelMapping - Gets the label mapping from integer index to label type 
    // returns - a map from numeric datatype to native label type 
    virtual const std::map<typename EvalReader<ElemType>::LabelIdType, typename EvalReader<ElemType>::LabelType>& GetLabelMapping(const std::wstring& /*sectionName*/) 
--- a/MachineLearning/cn/CompositeComputationNode.h
+++ b/MachineLearning/cn/CompositeComputationNode.h
@ -326,14 +326,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {

                if (samples.GetNumCols() != ones.GetNumRows())
                {
-					//ones._decideDevices(ones); // if it's going to move do it before we resize
+                    //ones._decideDevices(ones); // if it's going to move do it before we resize
                    ones.Resize(samples.GetNumCols(), 1);
                    ones.SetValue(1);
                }

                if (samples.GetNumCols() != sampsqr.GetNumCols() || samples.GetNumRows() != sampsqr.GetNumRows())
                {
-					//sampsqr._decideDevices(sampsqr); // if it's going to move do it before we resize
+                    //sampsqr._decideDevices(sampsqr); // if it's going to move do it before we resize
                    sampsqr.Resize(samples.GetNumRows(), samples.GetNumCols());
                    sampsqr.SetValue(1); // value not needed, but need to get it to correct device (handled by SetValue())
                }
@ -393,7 +393,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    avgsqr.TransferFromDeviceToDevice(avgsqr.GetDeviceId(), deviceId);
                if (ones.GetDeviceId() != deviceId)
                    ones.TransferFromDeviceToDevice(ones.GetDeviceId(), deviceId);
-				if (sampsqr.GetDeviceId() != deviceId)
+                if (sampsqr.GetDeviceId() != deviceId)
                    sampsqr.TransferFromDeviceToDevice(sampsqr.GetDeviceId(), deviceId);

            }
@ -608,7 +608,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    template class PerDimMeanVarNormalizationNode<float>; 
    template class PerDimMeanVarNormalizationNode<double>;

-	template<class ElemType>
+    template<class ElemType>
    class PerDimMeanVarDeNormalizationNode : public ComputationNode<ElemType>
    {
        UsingComputationNodeMembers;
--- a/MachineLearning/cn/ComputationNetwork.h
+++ b/MachineLearning/cn/ComputationNetwork.h
@ -37,7 +37,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        typedef ComputationNode<ElemType>* ComputationNodePtr;
        typedef struct stRecurrentInfo{
            std::vector<ComputationNodePtr> m_recurrentNodes;
-			std::vector<ComputationNodePtr> m_recurrentNodesForForward;
+            std::vector<ComputationNodePtr> m_recurrentNodesForForward;
            ComputationNodePtr    m_sourceNode;
            int m_loopId;
            bool m_completedGradient;
@ -427,7 +427,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ERootNodes");

            fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ECN");
-			
+            

            ValidateNetwork();  //some internal values in the nodes are computed during validation

@ -1401,20 +1401,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            for (auto nodeIter=allNodes.begin(); nodeIter != allNodes.end(); nodeIter++)
            {
                (*nodeIter)->SetNbrSlicesInEachRecurrentIteration(m_nbrSlicesInEachRecurrentIteration);
-				if ((*nodeIter)->OperationName() == L"Delay")
-				{
-					for (size_t i = 0; i < m_nbrSlicesInEachRecurrentIteration; i++)
-					{
-						(*nodeIter)->ResetBound(i, m_sentenceEnd[i]);
-					}
-					if (m_sentenceEnd[0] <= m_actMiniBSize)
-					{
-						(*nodeIter)->Reset();
-					} else
-					{
-						(*nodeIter)->NotReset();
-					}
-				}
+                if ((*nodeIter)->OperationName() == L"Delay")
+                {
+                    for (size_t i = 0; i < m_nbrSlicesInEachRecurrentIteration; i++)
+                    {
+                        (*nodeIter)->ResetBound(i, m_sentenceEnd[i]);
+                    }
+                    if (m_sentenceEnd[0] <= m_actMiniBSize)
+                    {
+                        (*nodeIter)->Reset();
+                    } else
+                    {
+                        (*nodeIter)->NotReset();
+                    }
+                }
            }

            for (auto nodeIter=allNodes.begin(); nodeIter != allNodes.end(); nodeIter++)
@ -1467,7 +1467,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        void SetActualNbrSlicesInEachRecIter(const size_t aSize)
        {
            m_nbrSlicesInEachRecurrentIteration = aSize;
-			m_sentenceEnd.assign(aSize, m_actMiniBSize/aSize);
+            m_sentenceEnd.assign(aSize, m_actMiniBSize/aSize);
        }

        void ComputeGradientLoop(std::list<ComputationNodePtr>& /*allNodes*/, const ComputationNodePtr startNode)
@ -1745,10 +1745,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            {
                for (ComputationNodePtr node : FinalCriterionNodes())
                {
-					PrintComputationTree(node, false);
+                    PrintComputationTree(node, false);
                    if(!allowFragment) FormRecurentLoops(node);
-					size_t actualMBSize = this->GetActualMBSize();
-					this->SetActualMiniBatchSize(actualMBSize);
+                    size_t actualMBSize = this->GetActualMBSize();
+                    this->SetActualMiniBatchSize(actualMBSize);
                    ValidateNetwork(node);
                }
            }
@ -1944,95 +1944,95 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-		// get the strong connected component from the graph
-		void getStrongSCC (const ComputationNodePtr rootNode)
-		{
-			std::unordered_set<ComputationNodePtr> visited;
-			std::list<ComputationNodePtr> sccStack;
-			size_t index = 0;
-			size_t loopId = 0;
-			if(rootNode->isVisisted()==false)
-				strongSCC(rootNode, sccStack, index, loopId);
-			
-		}
+        // get the strong connected component from the graph
+        void getStrongSCC (const ComputationNodePtr rootNode)
+        {
+            std::unordered_set<ComputationNodePtr> visited;
+            std::list<ComputationNodePtr> sccStack;
+            size_t index = 0;
+            size_t loopId = 0;
+            if(rootNode->isVisisted()==false)
+                strongSCC(rootNode, sccStack, index, loopId);
+            
+        }

-		void strongSCC (ComputationNodePtr cur, std::list<ComputationNodePtr>& sccStack, size_t& index, size_t& loopId)
-		{
-			cur->SetIndex(index);
-			cur->Setlowlink(index);
-			index++;
+        void strongSCC (ComputationNodePtr cur, std::list<ComputationNodePtr>& sccStack, size_t& index, size_t& loopId)
+        {
+            cur->SetIndex(index);
+            cur->Setlowlink(index);
+            index++;

-			cur->SetVisited(true);
-			sccStack.push_back(cur);
-			cur->SetInStack(true);
+            cur->SetVisited(true);
+            sccStack.push_back(cur);
+            cur->SetInStack(true);

-			for (int i = 0; i < cur->ChildrenSize(); i++)
-			{
-				if (cur->Inputs(i)->isVisisted() == false)
-				{
-					strongSCC(cur->Inputs(i),sccStack, index, loopId);
-					cur->Setlowlink(min(cur->Getlowlink(), cur->Inputs(i)->Getlowlink()));
-				} else if (cur->Inputs(i)->isInStack())
-				{
-					cur->Setlowlink(min(cur->Getlowlink(),cur->Inputs(i)->Getlowlink())); 
-				}
-			}
+            for (int i = 0; i < cur->ChildrenSize(); i++)
+            {
+                if (cur->Inputs(i)->isVisisted() == false)
+                {
+                    strongSCC(cur->Inputs(i),sccStack, index, loopId);
+                    cur->Setlowlink(min(cur->Getlowlink(), cur->Inputs(i)->Getlowlink()));
+                } else if (cur->Inputs(i)->isInStack())
+                {
+                    cur->Setlowlink(min(cur->Getlowlink(),cur->Inputs(i)->Getlowlink())); 
+                }
+            }

-			if (cur->Getlowlink() == cur->GetIndex())
-			{
-				RecurrentInfo rInfo;
-				rInfo.m_loopId = loopId;
-				rInfo.m_sourceNode = cur;
-				size_t sccSize = 0;
+            if (cur->Getlowlink() == cur->GetIndex())
+            {
+                RecurrentInfo rInfo;
+                rInfo.m_loopId = loopId;
+                rInfo.m_sourceNode = cur;
+                size_t sccSize = 0;
                                for (;;)
-				{
-					ComputationNodePtr w = sccStack.back();
-					sccStack.pop_back();
-					w->SetInStack(false);
-					rInfo.m_recurrentNodes.push_back(w);
-					sccSize++;
-					if (w == cur)
-					{
-						break;
-					}
-				}
+                {
+                    ComputationNodePtr w = sccStack.back();
+                    sccStack.pop_back();
+                    w->SetInStack(false);
+                    rInfo.m_recurrentNodes.push_back(w);
+                    sccSize++;
+                    if (w == cur)
+                    {
+                        break;
+                    }
+                }
                rInfo.Reset(); 
-				if (sccSize>1)
-				{
-					loopId++;
-					m_recurrentInfo.push_back(rInfo);
-				}
-			}
-		}
+                if (sccSize>1)
+                {
+                    loopId++;
+                    m_recurrentInfo.push_back(rInfo);
+                }
+            }
+        }
        
-		void getLoopForwordOrder(std::unordered_set<ComputationNodePtr>& visited,std::unordered_set<ComputationNodePtr>& recStack, std::list<ComputationNodePtr>& nodesStack, ComputationNodePtr cur)
-		{
-			if (visited.find(cur) == visited.end())
-			{
-				visited.insert(cur);
-				recStack.insert(cur);
+        void getLoopForwordOrder(std::unordered_set<ComputationNodePtr>& visited,std::unordered_set<ComputationNodePtr>& recStack, std::list<ComputationNodePtr>& nodesStack, ComputationNodePtr cur)
+        {
+            if (visited.find(cur) == visited.end())
+            {
+                visited.insert(cur);
+                recStack.insert(cur);

-				if (cur->OperationName() != L"Delay")
-				{
-					for (size_t i = 0; i < cur->ChildrenSize() ; i++)
-					{
-						if (cur->Inputs(i)->LoopId()==cur->LoopId())
-						{
-							getLoopForwordOrder(visited, recStack, nodesStack, cur->Inputs(i));
-						}
-					}
-				}
-				recStack.erase(cur);
-				nodesStack.push_back(cur);
-			} else
-			{
-				if (!(recStack.find(cur) == recStack.end()))
-				{
-					 throw std::logic_error("There is infinite Loop which cannot be unrolled!!");
-				}
+                if (cur->OperationName() != L"Delay")
+                {
+                    for (size_t i = 0; i < cur->ChildrenSize() ; i++)
+                    {
+                        if (cur->Inputs(i)->LoopId()==cur->LoopId())
+                        {
+                            getLoopForwordOrder(visited, recStack, nodesStack, cur->Inputs(i));
+                        }
+                    }
+                }
+                recStack.erase(cur);
+                nodesStack.push_back(cur);
+            } else
+            {
+                if (!(recStack.find(cur) == recStack.end()))
+                {
+                     throw std::logic_error("There is infinite Loop which cannot be unrolled!!");
+                }

-			}
-		}
+            }
+        }
        //must be called before ValidateNetwork
        void FormRecurentLoops(const ComputationNodePtr rootNode)
        {
@ -2076,43 +2076,43 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    }
                }
            }
-	
-			for (auto iter = m_recurrentInfo.begin(); iter != m_recurrentInfo.end(); iter++)
+    
+            for (auto iter = m_recurrentInfo.begin(); iter != m_recurrentInfo.end(); iter++)
            {
                // sort the recurrent nodes in their ascending name, which is the same as visiting nodes in G^R
                if ((*iter).m_recurrentNodes.size() > 1 && (*iter).m_recurrentNodesForForward.size() == 0)
                {
-					std::list<ComputationNodePtr> result;
-					std::unordered_set<ComputationNodePtr> visited;
-					std::unordered_set<ComputationNodePtr> recStack;
-					
-					for (size_t j = 0 ; j < (*iter).m_recurrentNodes.size(); j++)
-					{
-						ComputationNodePtr nodeRecIter = (*iter).m_recurrentNodes[j];
-						for (size_t i = 0; i < nodeRecIter->ChildrenSize() ; i++)
-						{
-							if ((nodeRecIter->Inputs(i)->LoopId() == nodeRecIter->LoopId()) && (nodeRecIter->OperationName() != L"Delay"))
-							{
-								nodeRecIter->Inputs(i)->SetIndexInLoop(nodeRecIter->Inputs(i)->GetIndexInLoop()+1);
-							}
-						}
-					}
-				
-					//for (auto nodeRecIter = startNodes.begin(); nodeRecIter != startNodes.end(); nodeRecIter++)
-						
-					for (size_t i = 0 ; i < (*iter).m_recurrentNodes.size(); i++)
-					{
-						ComputationNodePtr nodeRecIter = (*iter).m_recurrentNodes[i];
-						if (visited.find(nodeRecIter) == visited.end() && nodeRecIter->GetIndexInLoop() == 0)
-							getLoopForwordOrder(visited,recStack, result,nodeRecIter);
-					}
-					for(size_t i = 0; i < (*iter).m_recurrentNodes.size(); i++)
-					{
-						(*iter).m_recurrentNodesForForward.push_back(result.front());
-						result.pop_front();
-					}
-					
-				
+                    std::list<ComputationNodePtr> result;
+                    std::unordered_set<ComputationNodePtr> visited;
+                    std::unordered_set<ComputationNodePtr> recStack;
+                    
+                    for (size_t j = 0 ; j < (*iter).m_recurrentNodes.size(); j++)
+                    {
+                        ComputationNodePtr nodeRecIter = (*iter).m_recurrentNodes[j];
+                        for (size_t i = 0; i < nodeRecIter->ChildrenSize() ; i++)
+                        {
+                            if ((nodeRecIter->Inputs(i)->LoopId() == nodeRecIter->LoopId()) && (nodeRecIter->OperationName() != L"Delay"))
+                            {
+                                nodeRecIter->Inputs(i)->SetIndexInLoop(nodeRecIter->Inputs(i)->GetIndexInLoop()+1);
+                            }
+                        }
+                    }
+                
+                    //for (auto nodeRecIter = startNodes.begin(); nodeRecIter != startNodes.end(); nodeRecIter++)
+                        
+                    for (size_t i = 0 ; i < (*iter).m_recurrentNodes.size(); i++)
+                    {
+                        ComputationNodePtr nodeRecIter = (*iter).m_recurrentNodes[i];
+                        if (visited.find(nodeRecIter) == visited.end() && nodeRecIter->GetIndexInLoop() == 0)
+                            getLoopForwordOrder(visited,recStack, result,nodeRecIter);
+                    }
+                    for(size_t i = 0; i < (*iter).m_recurrentNodes.size(); i++)
+                    {
+                        (*iter).m_recurrentNodesForForward.push_back(result.front());
+                        result.pop_front();
+                    }
+                    
+                
                }
            }

@ -2284,7 +2284,7 @@ public: // public so PTask can use eval/gradient order, and pre-compute matrix s

            return GetCalcOrder(rootNode, m_cacheGradientCalcOrders, false);
        }
-		vector<size_t> m_sentenceEnd;
+        vector<size_t> m_sentenceEnd;
 protected:
        std::list<ComputationNodePtr>& GetCalcOrder(const ComputationNodePtr rootNode, std::map<const ComputationNodePtr, std::list<ComputationNodePtr>>& orderMap, const bool forwardCompute) 
        {
--- a/MachineLearning/cn/ComputationNetworkHelper.h
+++ b/MachineLearning/cn/ComputationNetworkHelper.h
@ -51,8 +51,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    for (auto nodeIter=dropoutNodes.begin(); nodeIter != dropoutNodes.end(); nodeIter++)
                    {
                        DropoutNode<ElemType>* node = static_cast<DropoutNode<ElemType>*>(*nodeIter);
-				        node->SetDropoutRate(dropoutRate);
-				        node->SetRandomSeed(dropOutSeed++);
+                        node->SetDropoutRate(dropoutRate);
+                        node->SetRandomSeed(dropOutSeed++);
                    }
                }

@ -73,7 +73,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                for (auto nodeIter=convolutionNodes.begin(); nodeIter != convolutionNodes.end(); nodeIter++)
                {
                    ConvolutionNode<ElemType>* node = static_cast<ConvolutionNode<ElemType>*>(*nodeIter);
-				    node->SetmMaxTempMemSizeInSamples(maxTempMemSizeInSamples);
+                    node->SetmMaxTempMemSizeInSamples(maxTempMemSizeInSamples);
                }
            }
        }
--- a/MachineLearning/cn/ComputationNode.h
+++ b/MachineLearning/cn/ComputationNode.h
@ -273,7 +273,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        void SetNbrSlicesInEachRecurrentIteration(size_t bsz)
        {
            m_samplesInRecurrentStep = bsz;
-			m_sentenceEnd.resize(bsz);
+            m_sentenceEnd.resize(bsz);
        }

        int64_t UpdateEvalTimeStamp()
@ -1939,169 +1939,169 @@ protected:  \
    template class TanhNode<float>; 
    template class TanhNode<double>;

-	template<class ElemType>
-	class LogNode : public ComputationNode<ElemType>
-	{
+    template<class ElemType>
+    class LogNode : public ComputationNode<ElemType>
+    {
                UsingComputationNodeMembers;
        public:
-		LogNode(const short deviceId = AUTOPLACEMATRIX, const std::wstring name = L"")
-			: ComputationNode<ElemType>(deviceId), m_gradientOfLog(deviceId)
-		{
-			m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
-			m_deviceId = deviceId;
-			MoveMatricesToDevice(deviceId);
-			InitRecurrentNode();
-		}
+        LogNode(const short deviceId = AUTOPLACEMATRIX, const std::wstring name = L"")
+            : ComputationNode<ElemType>(deviceId), m_gradientOfLog(deviceId)
+        {
+            m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
+            m_deviceId = deviceId;
+            MoveMatricesToDevice(deviceId);
+            InitRecurrentNode();
+        }

-		LogNode(File& fstream, const size_t modelVersion, const short deviceId = AUTOPLACEMATRIX, const std::wstring name = L"")
-			: ComputationNode<ElemType>(deviceId), m_gradientOfLog(deviceId)
-		{
-			m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
-			LoadFromFile(fstream, modelVersion, deviceId);
-		}
+        LogNode(File& fstream, const size_t modelVersion, const short deviceId = AUTOPLACEMATRIX, const std::wstring name = L"")
+            : ComputationNode<ElemType>(deviceId), m_gradientOfLog(deviceId)
+        {
+            m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
+            LoadFromFile(fstream, modelVersion, deviceId);
+        }

-		virtual const std::wstring OperationName() const { return TypeName(); }
-		static const std::wstring TypeName() { return L"Log"; }
+        virtual const std::wstring OperationName() const { return TypeName(); }
+        static const std::wstring TypeName() { return L"Log"; }


-		virtual void ComputeInputPartial(const size_t inputIndex)
-		{
-			if (inputIndex != 0)
-				throw std::invalid_argument("Log only has one input.");
-			ComputeInputPartialS(m_gradientOfLog, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), GradientValues());
-		}
+        virtual void ComputeInputPartial(const size_t inputIndex)
+        {
+            if (inputIndex != 0)
+                throw std::invalid_argument("Log only has one input.");
+            ComputeInputPartialS(m_gradientOfLog, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), GradientValues());
+        }

-		virtual void ComputeInputPartial(const size_t inputIndex, const size_t timeIdxInSeq)
-		{
-			if (inputIndex != 0)
-				throw std::invalid_argument("Log only has one input.");
+        virtual void ComputeInputPartial(const size_t inputIndex, const size_t timeIdxInSeq)
+        {
+            if (inputIndex != 0)
+                throw std::invalid_argument("Log only has one input.");

-			Matrix<ElemType> sliceInputGrad = Inputs(0)->GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
-			Matrix<ElemType> sliceOutputGrad = GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
+            Matrix<ElemType> sliceInputGrad = Inputs(0)->GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
+            Matrix<ElemType> sliceOutputGrad = GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);

-			Matrix<ElemType> sliceInputValue = Inputs(0)->FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
+            Matrix<ElemType> sliceInputValue = Inputs(0)->FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);

-			ComputeInputPartialS(m_gradientOfLog, sliceInputGrad, sliceInputValue, sliceOutputGrad);
-		}
+            ComputeInputPartialS(m_gradientOfLog, sliceInputGrad, sliceInputValue, sliceOutputGrad);
+        }

-		static void WINAPI ComputeInputPartialS(Matrix<ElemType>& gradientOfLog, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& inputFunctionValues, const Matrix<ElemType>& gradientValues)
-		{
-			gradientOfLog.AssignElementInverseOf(inputFunctionValues); // 1/x (x is input to log(x))
+        static void WINAPI ComputeInputPartialS(Matrix<ElemType>& gradientOfLog, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& inputFunctionValues, const Matrix<ElemType>& gradientValues)
+        {
+            gradientOfLog.AssignElementInverseOf(inputFunctionValues); // 1/x (x is input to log(x))

-			inputGradientValues.AddElementProductOf(gradientValues, gradientOfLog);
-		}
+            inputGradientValues.AddElementProductOf(gradientValues, gradientOfLog);
+        }

-		// GetTaskDescriptor - Get a task descriptor for this node
-		// taskType - task type we are generating a task for
-		virtual TaskDescriptor<ElemType>* GetPTaskDescriptor(TaskType taskType, size_t inputIndex = 0) const
-		{
-			TaskDescriptor<ElemType>* descriptor = new TaskDescriptor<ElemType>(this, taskType, inputIndex);
-			switch (taskType)
-			{
-			case taskComputeInputPartial:
-				descriptor->MatrixParam(m_gradientOfLog, "GradientOfLog", paramOptionsInput | paramOptionsTemporary);
-				descriptor->GradientParam(0, paramOptionsInput | paramOptionsOutput | paramOptionsInitialize);
-				descriptor->FunctionParam(0, paramOptionsInput);
-				descriptor->GradientParam();
-				descriptor->SetFunction((FARPROC)ComputeInputPartialS);
-				break;
-			case taskEvaluate:
-				descriptor->FunctionParam();
-				descriptor->FunctionParam(0, paramOptionsInput);
-				descriptor->SetFunction((FARPROC)EvaluateThisNodeS);
-				break;
-			default:
-				assert(false);
-				throw std::logic_error("Unsupported task requested");
-			}
-			return descriptor;
-		}
+        // GetTaskDescriptor - Get a task descriptor for this node
+        // taskType - task type we are generating a task for
+        virtual TaskDescriptor<ElemType>* GetPTaskDescriptor(TaskType taskType, size_t inputIndex = 0) const
+        {
+            TaskDescriptor<ElemType>* descriptor = new TaskDescriptor<ElemType>(this, taskType, inputIndex);
+            switch (taskType)
+            {
+            case taskComputeInputPartial:
+                descriptor->MatrixParam(m_gradientOfLog, "GradientOfLog", paramOptionsInput | paramOptionsTemporary);
+                descriptor->GradientParam(0, paramOptionsInput | paramOptionsOutput | paramOptionsInitialize);
+                descriptor->FunctionParam(0, paramOptionsInput);
+                descriptor->GradientParam();
+                descriptor->SetFunction((FARPROC)ComputeInputPartialS);
+                break;
+            case taskEvaluate:
+                descriptor->FunctionParam();
+                descriptor->FunctionParam(0, paramOptionsInput);
+                descriptor->SetFunction((FARPROC)EvaluateThisNodeS);
+                break;
+            default:
+                assert(false);
+                throw std::logic_error("Unsupported task requested");
+            }
+            return descriptor;
+        }

-		virtual void EvaluateThisNode()
-		{
-			EvaluateThisNodeS(m_functionValues, Inputs(0)->FunctionValues());
-		}
+        virtual void EvaluateThisNode()
+        {
+            EvaluateThisNodeS(m_functionValues, Inputs(0)->FunctionValues());
+        }

-		virtual void EvaluateThisNode(const size_t timeIdxInSeq)
-		{
-			Matrix<ElemType> sliceInputValue = Inputs(0)->FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
-			Matrix<ElemType> sliceOutputValue = m_functionValues.ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
+        virtual void EvaluateThisNode(const size_t timeIdxInSeq)
+        {
+            Matrix<ElemType> sliceInputValue = Inputs(0)->FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
+            Matrix<ElemType> sliceOutputValue = m_functionValues.ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);

-			EvaluateThisNodeS(sliceOutputValue, sliceInputValue);
-		}
+            EvaluateThisNodeS(sliceOutputValue, sliceInputValue);
+        }

-		static void WINAPI EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
-		{
-			functionValues.AssignLogOf(inputFunctionValues);
+        static void WINAPI EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
+        {
+            functionValues.AssignLogOf(inputFunctionValues);
 #if NANCHECK
-			functionValues.HasNan("Log");
+            functionValues.HasNan("Log");
 #endif
-		}
+        }

-		virtual void Validate()
-		{
-			PrintSelfBeforeValidation();
+        virtual void Validate()
+        {
+            PrintSelfBeforeValidation();

-			if (m_children.size() != 1)
-				throw std::logic_error("Log operation should have one input.");
+            if (m_children.size() != 1)
+                throw std::logic_error("Log operation should have one input.");

-			if (Inputs(0)->FunctionValues().GetNumElements() == 0)
-				throw std::logic_error("Log operation: the input node has 0 element.");
+            if (Inputs(0)->FunctionValues().GetNumElements() == 0)
+                throw std::logic_error("Log operation: the input node has 0 element.");

-			FunctionValues().Resize(Inputs(0)->FunctionValues().GetNumRows(), Inputs(0)->FunctionValues().GetNumCols());
-			m_gradientOfLog.Resize(Inputs(0)->FunctionValues().GetNumRows(), Inputs(0)->FunctionValues().GetNumCols());
-			CopyImageSizeFromInputs();
-		}
+            FunctionValues().Resize(Inputs(0)->FunctionValues().GetNumRows(), Inputs(0)->FunctionValues().GetNumCols());
+            m_gradientOfLog.Resize(Inputs(0)->FunctionValues().GetNumRows(), Inputs(0)->FunctionValues().GetNumCols());
+            CopyImageSizeFromInputs();
+        }

-		virtual void AttachInputs(const ComputationNodePtr singleInput)
-		{
-			m_children.resize(1);
-			m_children[0] = singleInput;
-		}
+        virtual void AttachInputs(const ComputationNodePtr singleInput)
+        {
+            m_children.resize(1);
+            m_children[0] = singleInput;
+        }

-		virtual void MoveMatricesToDevice(const short deviceId)
-		{
-			ComputationNode<ElemType>::MoveMatricesToDevice(deviceId);
+        virtual void MoveMatricesToDevice(const short deviceId)
+        {
+            ComputationNode<ElemType>::MoveMatricesToDevice(deviceId);

-			if (deviceId != AUTOPLACEMATRIX)
-			{
-				if (m_gradientOfLog.GetDeviceId() != deviceId)
-					m_gradientOfLog.TransferFromDeviceToDevice(m_gradientOfLog.GetDeviceId(), deviceId);
-			}
-		}
+            if (deviceId != AUTOPLACEMATRIX)
+            {
+                if (m_gradientOfLog.GetDeviceId() != deviceId)
+                    m_gradientOfLog.TransferFromDeviceToDevice(m_gradientOfLog.GetDeviceId(), deviceId);
+            }
+        }

-		virtual void CopyTo(const ComputationNodePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const
-		{
-			ComputationNode<ElemType>::CopyTo(nodeP, newName, flags);
-			LogNode<ElemType>* node = (LogNode<ElemType>*) nodeP;
+        virtual void CopyTo(const ComputationNodePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const
+        {
+            ComputationNode<ElemType>::CopyTo(nodeP, newName, flags);
+            LogNode<ElemType>* node = (LogNode<ElemType>*) nodeP;

-			if (flags & CopyNodeFlags::copyNodeValue)
-			{
-				node->m_gradientOfLog = m_gradientOfLog;
-			}
-		}
+            if (flags & CopyNodeFlags::copyNodeValue)
+            {
+                node->m_gradientOfLog = m_gradientOfLog;
+            }
+        }

-		// copy constructor
-		LogNode(const LogNode<ElemType>* node, const std::wstring& newName, const CopyNodeFlags flags)
-			: ComputationNode<ElemType>(node->m_deviceId), m_gradientOfLog(node->m_deviceId)
-		{
-			node->CopyTo(this, newName, flags);
-		}
+        // copy constructor
+        LogNode(const LogNode<ElemType>* node, const std::wstring& newName, const CopyNodeFlags flags)
+            : ComputationNode<ElemType>(node->m_deviceId), m_gradientOfLog(node->m_deviceId)
+        {
+            node->CopyTo(this, newName, flags);
+        }

-		virtual ComputationNodePtr Duplicate(const std::wstring& newName, const CopyNodeFlags flags) const
-		{
-			const std::wstring& name = (newName == L"") ? NodeName() : newName;
+        virtual ComputationNodePtr Duplicate(const std::wstring& newName, const CopyNodeFlags flags) const
+        {
+            const std::wstring& name = (newName == L"") ? NodeName() : newName;

-			ComputationNodePtr node = new LogNode<ElemType>(this, name, flags);
-			return node;
-		}
+            ComputationNodePtr node = new LogNode<ElemType>(this, name, flags);
+            return node;
+        }

-	private:
-		Matrix<ElemType> m_gradientOfLog;
-	};
+    private:
+        Matrix<ElemType> m_gradientOfLog;
+    };

-	template class LogNode<float>;
-	template class LogNode<double>;
+    template class LogNode<float>;
+    template class LogNode<double>;


    template<class ElemType>
--- a/MachineLearning/cn/NDLNetworkBuilder.h
+++ b/MachineLearning/cn/NDLNetworkBuilder.h
@ -205,7 +205,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        {
            NDLUtil<ElemType> ndlUtil(m_net);
            ndlUtil.ProcessNDLConfig(config, true);
-		}
+        }

        virtual ComputationNetwork<ElemType>& BuildNetworkFromDescription()
        {
--- a/MachineLearning/cn/NDLUtil.h
+++ b/MachineLearning/cn/NDLUtil.h
@ -139,31 +139,31 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }


-		// CheckOutputNodes - check output nodes
-		// symbolName - name of the computation nodes we are collecting
-		// compNodes - array of computation nodes
-		void CheckOutputNodes(NDLScript<ElemType>* script, std::string symbolName, std::vector<ComputationNodePtr>& compNodes)
-		{
-			NDLNode<ElemType>* nodeArray = script->FindSymbol(symbolName);
-			bool valid = m_net->FeatureNodes().size() > 0; // see if it's already valid
-			if (!valid && nodeArray) //otherwise, see if we found a symbol
-			{
-				NDLType outputType = nodeArray->GetType();
-				// accept either an array of nodes, or a single node
-				valid = (outputType == ndlTypeArray || outputType == ndlTypeFunction || outputType == ndlTypeMacroCall);
-			}
-			if (!valid)
+        // CheckOutputNodes - check output nodes
+        // symbolName - name of the computation nodes we are collecting
+        // compNodes - array of computation nodes
+        void CheckOutputNodes(NDLScript<ElemType>* script, std::string symbolName, std::vector<ComputationNodePtr>& compNodes)
+        {
+            NDLNode<ElemType>* nodeArray = script->FindSymbol(symbolName);
+            bool valid = m_net->FeatureNodes().size() > 0; // see if it's already valid
+            if (!valid && nodeArray) //otherwise, see if we found a symbol
+            {
+                NDLType outputType = nodeArray->GetType();
+                // accept either an array of nodes, or a single node
+                valid = (outputType == ndlTypeArray || outputType == ndlTypeFunction || outputType == ndlTypeMacroCall);
+            }
+            if (!valid)
                            RuntimeError("Invalid network node definition for '%s', nonexistant or wrong type", symbolName.c_str());
-			if (nodeArray)
-			{
-				vector<NDLNode<ElemType>*> nodes;
-				if (nodeArray->GetType() == ndlTypeArray)
-					nodes = nodeArray->GetParameters();
-				else
-					nodes.push_back(nodeArray);
+            if (nodeArray)
+            {
+                vector<NDLNode<ElemType>*> nodes;
+                if (nodeArray->GetType() == ndlTypeArray)
+                    nodes = nodeArray->GetParameters();
+                else
+                    nodes.push_back(nodeArray);

-				for (size_t i=0; i<nodes.size(); i++)
-				{
+                for (size_t i=0; i<nodes.size(); i++)
+                {
                    // get the computation node 
                    ComputationNodePtr cnNode = (ComputationNodePtr)nodes[i]->GetEvalValue();

@ -186,24 +186,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {

                    // add it if it's not already there
                    if (!found)
-					    compNodes.push_back(cnNode);
-				}
-			}
-		}
+                        compNodes.push_back(cnNode);
+                }
+            }
+        }

-		// SetOutputNodes - Set the output nodes for the Computational Network
-		// NOTE: seems to be specific to SynchronousExecutionEngine, should be in a derived class for that execution engine
-		void SetOutputNodes(NDLScript<ElemType>* script)
-		{
-			// NOTE: all optional parameter nodes (i.e. tag=feature) have already been processed in ProcessOptionalParameters()
+        // SetOutputNodes - Set the output nodes for the Computational Network
+        // NOTE: seems to be specific to SynchronousExecutionEngine, should be in a derived class for that execution engine
+        void SetOutputNodes(NDLScript<ElemType>* script)
+        {
+            // NOTE: all optional parameter nodes (i.e. tag=feature) have already been processed in ProcessOptionalParameters()

-			// handle the alternate way of specifying nodes, the array of nodes method
-			CheckOutputNodes(script, "FeatureNodes", m_net->FeatureNodes());
-			CheckOutputNodes(script, "LabelNodes", m_net->LabelNodes());
-			CheckOutputNodes(script, "CriteriaNodes", m_net->FinalCriterionNodes());
-			CheckOutputNodes(script, "EvalNodes", m_net->EvaluationNodes());
-			CheckOutputNodes(script, "OutputNodes", m_net->OutputNodes());
-		}
+            // handle the alternate way of specifying nodes, the array of nodes method
+            CheckOutputNodes(script, "FeatureNodes", m_net->FeatureNodes());
+            CheckOutputNodes(script, "LabelNodes", m_net->LabelNodes());
+            CheckOutputNodes(script, "CriteriaNodes", m_net->FinalCriterionNodes());
+            CheckOutputNodes(script, "EvalNodes", m_net->EvaluationNodes());
+            CheckOutputNodes(script, "OutputNodes", m_net->OutputNodes());
+        }
    };

    template class NDLUtil<float>; 
--- a/MachineLearning/cn/NetworkDescriptionLanguage.cpp
+++ b/MachineLearning/cn/NetworkDescriptionLanguage.cpp
@ -19,53 +19,53 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 template <typename ElemType>
 NDLNode<ElemType>* NDLScript<ElemType>::DuplicateNode(NDLNode<ElemType>* node)
 {
-	NDLNode<ElemType>* newNode = node->Copy();
-	m_children.push_back(newNode);
-	newNode->SetParentScript(this);
-	return newNode;
+    NDLNode<ElemType>* newNode = node->Copy();
+    m_children.push_back(newNode);
+    newNode->SetParentScript(this);
+    return newNode;
 }

 template <typename ElemType>
 NDLScript<ElemType>::NDLScript(const NDLScript& copyMe) : ConfigParser(copyMe)
 {
-	m_baseName = copyMe.m_baseName;
-	m_scriptString = copyMe.m_scriptString;
-	m_macroNode = copyMe.m_macroNode;
-	m_noDefinitions = copyMe.m_noDefinitions; // no definitions can be made in this script, interpret all macro/function names as calls
-	m_definingMacro = false; // not defining when expanding macros (only reason to call this method
-	m_cn = copyMe.m_cn; // computation network to use for backup symbol lookup. Used for MEL where NDL and network nodes are mixed
+    m_baseName = copyMe.m_baseName;
+    m_scriptString = copyMe.m_scriptString;
+    m_macroNode = copyMe.m_macroNode;
+    m_noDefinitions = copyMe.m_noDefinitions; // no definitions can be made in this script, interpret all macro/function names as calls
+    m_definingMacro = false; // not defining when expanding macros (only reason to call this method
+    m_cn = copyMe.m_cn; // computation network to use for backup symbol lookup. Used for MEL where NDL and network nodes are mixed

-	// script lines in parsed node order
-	for (NDLNode<ElemType>* node : copyMe.m_script)
-	{
-		// duplicate this node
-		NDLNode<ElemType>* newNode = DuplicateNode(node);
-		AddSymbol(newNode->GetName(), newNode);
+    // script lines in parsed node order
+    for (NDLNode<ElemType>* node : copyMe.m_script)
+    {
+        // duplicate this node
+        NDLNode<ElemType>* newNode = DuplicateNode(node);
+        AddSymbol(newNode->GetName(), newNode);

-		// now get the parameters to the functions added
-		ConfigValue value = newNode->GetParamString();
-		ParseParameters(newNode, value, true /*createNew*/);
+        // now get the parameters to the functions added
+        ConfigValue value = newNode->GetParamString();
+        ParseParameters(newNode, value, true /*createNew*/);

-		// add it to the new script
-		m_script.push_back(newNode);
-	}
+        // add it to the new script
+        m_script.push_back(newNode);
+    }

-	// now search the symbol table for other symbols that haven't been copied yet
-	// this happens for constants defined in macros and such
-	for (std::pair<std::string, NDLNode<ElemType>*> pair : copyMe.m_symbols)
-	{
-		// if we can't find the symbol in the copied symbol table, copy it here
-		if (m_symbols.find(pair.first) == end(m_symbols))
-		{
-			// duplicate this node
-			NDLNode<ElemType>* newNode = DuplicateNode(pair.second);
-			AddSymbol(pair.first, newNode);
-			// anything that takes parameters should be evaluated in the script loop
-			assert(newNode->GetParamString().empty());
-		}
-	}
-	// NOTE: the child nodes get populated as the nodes are duplicated in the loop above
-	// we shouldn't try to duplicate them separately
+    // now search the symbol table for other symbols that haven't been copied yet
+    // this happens for constants defined in macros and such
+    for (std::pair<std::string, NDLNode<ElemType>*> pair : copyMe.m_symbols)
+    {
+        // if we can't find the symbol in the copied symbol table, copy it here
+        if (m_symbols.find(pair.first) == end(m_symbols))
+        {
+            // duplicate this node
+            NDLNode<ElemType>* newNode = DuplicateNode(pair.second);
+            AddSymbol(pair.first, newNode);
+            // anything that takes parameters should be evaluated in the script loop
+            assert(newNode->GetParamString().empty());
+        }
+    }
+    // NOTE: the child nodes get populated as the nodes are duplicated in the loop above
+    // we shouldn't try to duplicate them separately
 }

 // copy constructor, creates a new disconnected copy of this node
@ -74,32 +74,32 @@ NDLScript<ElemType>::NDLScript(const NDLScript& copyMe) : ConfigParser(copyMe)
 template <typename ElemType>
 NDLNode<ElemType>::NDLNode(const NDLNode<ElemType>& copyMe)
 {
-	m_name = copyMe.m_name; // value on the left of the equals
-	m_value = copyMe.m_value; // value on the right of the equals (CN node name, or value)
-	m_parent = copyMe.m_parent; // parent script
-	m_type = copyMe.m_type; //type of node
-	m_paramString = copyMe.m_paramString; // parameter of a function/array
-	m_paramMacro = copyMe.m_paramMacro; // parameter of a macro (the variables used in the macro definition)
-	// don't copy over the parameters, they will be reparsed after the copy
-	//m_parameters = copyMe.m_parameters; // copy over the parameters straight
+    m_name = copyMe.m_name; // value on the left of the equals
+    m_value = copyMe.m_value; // value on the right of the equals (CN node name, or value)
+    m_parent = copyMe.m_parent; // parent script
+    m_type = copyMe.m_type; //type of node
+    m_paramString = copyMe.m_paramString; // parameter of a function/array
+    m_paramMacro = copyMe.m_paramMacro; // parameter of a macro (the variables used in the macro definition)
+    // don't copy over the parameters, they will be reparsed after the copy
+    //m_parameters = copyMe.m_parameters; // copy over the parameters straight

-	m_eval = nullptr; // pointer to an arbitrary eval structure
-	// script for macro calls, need to expand the macro for each call
-	// if it's not expanded the evalValue will be overwitten on multiple calls to a macro
-	m_script = (copyMe.m_script) ? new NDLScript<ElemType>(*copyMe.m_script) : nullptr;
+    m_eval = nullptr; // pointer to an arbitrary eval structure
+    // script for macro calls, need to expand the macro for each call
+    // if it's not expanded the evalValue will be overwitten on multiple calls to a macro
+    m_script = (copyMe.m_script) ? new NDLScript<ElemType>(*copyMe.m_script) : nullptr;
 }
 template <typename ElemType>
 NDLScript<ElemType>::NDLScript(const NDLScript&& moveMe) : ConfigParser(move(moveMe))
 {
-	m_baseName = move(moveMe.m_baseName);
-	m_scriptString = move(moveMe.m_scriptString);
-	m_script = move(moveMe.m_script); // script lines in parsed node order, macros will have definition followed by body
-	m_symbols = move(moveMe.m_symbols); // symbol table
-	m_macroNode = move(moveMe.m_macroNode); // set when interpretting a macro definition
-	m_noDefinitions = move(moveMe.m_noDefinitions); // no definitions can be made in this script, interpret all macro/function names as calls
-	m_definingMacro = move(moveMe.m_definingMacro);
-	m_children = move(moveMe.m_children); // child nodes. Note that m_script nodes may not be children of this object, they include macro nodes
-	m_cn = move(moveMe.m_cn); // computation network to use for backup symbol lookup. Used for MEL where NDL and network nodes are mixed
+    m_baseName = move(moveMe.m_baseName);
+    m_scriptString = move(moveMe.m_scriptString);
+    m_script = move(moveMe.m_script); // script lines in parsed node order, macros will have definition followed by body
+    m_symbols = move(moveMe.m_symbols); // symbol table
+    m_macroNode = move(moveMe.m_macroNode); // set when interpretting a macro definition
+    m_noDefinitions = move(moveMe.m_noDefinitions); // no definitions can be made in this script, interpret all macro/function names as calls
+    m_definingMacro = move(moveMe.m_definingMacro);
+    m_children = move(moveMe.m_children); // child nodes. Note that m_script nodes may not be children of this object, they include macro nodes
+    m_cn = move(moveMe.m_cn); // computation network to use for backup symbol lookup. Used for MEL where NDL and network nodes are mixed
 }

 // EqualInsensitive - check to see if two nodes are equal 
@ -212,10 +212,10 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
        ret = true;   
    else if (EqualInsensitive(nodeType, DelayNode<ElemType>::TypeName()))
        ret = true;
-	else if (EqualInsensitive(nodeType, RowSliceNode<ElemType>::TypeName()))
-		ret = true;
-	else if (EqualInsensitive(nodeType, LookupTableNode<ElemType>::TypeName()))
-		ret = true;
+    else if (EqualInsensitive(nodeType, RowSliceNode<ElemType>::TypeName()))
+        ret = true;
+    else if (EqualInsensitive(nodeType, LookupTableNode<ElemType>::TypeName()))
+        ret = true;
    else if (EqualInsensitive(nodeType, GMMLogLikelihoodNode<ElemType>::TypeName(), L"GMMLL"))
        ret = true;
    
--- a/MachineLearning/cn/NetworkDescriptionLanguage.h
+++ b/MachineLearning/cn/NetworkDescriptionLanguage.h
@ -182,18 +182,18 @@ public:
    ~NDLNode()
    {}

-	// publicly accessible Copy method
-	// should only be used for macro expansion
-	NDLNode* Copy() const
-	{
-		NDLNode* ret = new NDLNode(*this);
-		return ret;
-	}
+    // publicly accessible Copy method
+    // should only be used for macro expansion
+    NDLNode* Copy() const
+    {
+        NDLNode* ret = new NDLNode(*this);
+        return ret;
+    }

 private:

-	// copy constructor, creates a new disconnected copy of this node for macro expansion
-	NDLNode(const NDLNode& copyMe);
+    // copy constructor, creates a new disconnected copy of this node for macro expansion
+    NDLNode(const NDLNode& copyMe);

    NDLNode& operator=(NDLNode& /*copyMe*/)  //this is just a place holder implementation which is not functioning but prevent callers to use it.
    {            
@ -226,8 +226,8 @@ public:
    void SetParamMacro(ConfigValue paramMacro) {m_paramMacro = paramMacro;} 
    ConfigArray GetParamMacro() const {return m_paramMacro;}

-	void SetParentScript(NDLScript<ElemType>* script) {m_parent = script;}
-	NDLScript<ElemType>* GetParentScript() { return m_parent; } 
+    void SetParentScript(NDLScript<ElemType>* script) {m_parent = script;}
+    NDLScript<ElemType>* GetParentScript() { return m_parent; } 

    // get parameters, either just optional or just regular
    vector<NDLNode*> GetParameters(bool optional=false) const
@ -277,27 +277,27 @@ public:

    // GetScalar - Get a scalar value from a node, may loop through some variables before arriving
    // returns: scalar value
-	ConfigValue GetScalar()
-	{
-		NDLNode<ElemType>* node = this;
-		while (node && (node->GetType() == ndlTypeVariable || node->GetType() == ndlTypeParameter))
-		{
-			NDLNode<ElemType>* nodeLast = node;
-			node = node->FindNode(node->GetValue(), true /*searchForDotNames*/);
+    ConfigValue GetScalar()
+    {
+        NDLNode<ElemType>* node = this;
+        while (node && (node->GetType() == ndlTypeVariable || node->GetType() == ndlTypeParameter))
+        {
+            NDLNode<ElemType>* nodeLast = node;
+            node = node->FindNode(node->GetValue(), true /*searchForDotNames*/);

-			// if we are still on the same node, that means it was never resolved to anything, an undefined variable
-			if (nodeLast == node)
-			{
-				RuntimeError("undefined Variable, '%s' found, must be declared before first use\n", node->GetName().c_str());
-			}
-		}
-		if (!node || node->GetType() != ndlTypeConstant)
-		{
-			std::string name = node ? node->GetName() : GetName();
-			RuntimeError("Scalar expected, '%s' must be a constant or variable that resolves to a constant\n", name.c_str());
-		}
-		return node->GetValue();
-	}
+            // if we are still on the same node, that means it was never resolved to anything, an undefined variable
+            if (nodeLast == node)
+            {
+                RuntimeError("undefined Variable, '%s' found, must be declared before first use\n", node->GetName().c_str());
+            }
+        }
+        if (!node || node->GetType() != ndlTypeConstant)
+        {
+            std::string name = node ? node->GetName() : GetName();
+            RuntimeError("Scalar expected, '%s' must be a constant or variable that resolves to a constant\n", name.c_str());
+        }
+        return node->GetValue();
+    }

    void InsertParam(NDLNode* param) {m_parameters.push_back(param);}

@ -386,7 +386,7 @@ private:
    static NDLScript<ElemType> s_global; //("global"); // global script for storing macros and global nodes
    std::vector<NDLNode<ElemType>*> m_children; // child nodes. Note that m_script nodes may not be children of this object, they include macro nodes
    ComputationNetwork<ElemType>* m_cn; // computation network to use for backup symbol lookup. Used for MEL where NDL and network nodes are mixed
-	bool m_definingMacro; // currently defining a macro, flag to determine if we are defining or interpretting a macro call
+    bool m_definingMacro; // currently defining a macro, flag to determine if we are defining or interpretting a macro call

 public:
    // constructors that take a config name
@ -410,7 +410,7 @@ public:
    {
        m_macroNode = NULL;
        m_noDefinitions=false;
-		m_definingMacro = false;
+        m_definingMacro = false;
        m_scriptString = configValue;
        Parse(m_scriptString);
    }
@ -422,7 +422,7 @@ public:
    NDLScript(const ConfigValue& configValue, std::string macroName, bool oneLineDefinition) : ConfigParser(';',configValue.Name())
    {
        m_noDefinitions = oneLineDefinition;
-		m_definingMacro = true;
+        m_definingMacro = true;
        m_macroNode = NULL;
        m_scriptString = configValue;
        NDLNode<ElemType>* ndlNode = s_global.CheckName(macroName, true);
@ -452,7 +452,7 @@ public:
            AddSymbol(param, paramNode);
        }
        Parse(m_scriptString);
-		m_definingMacro = false;
+        m_definingMacro = false;
    }


@ -481,27 +481,27 @@ public:
        return m_baseName;
    }

-	void ClearGlobal()
-	{
-		s_global.Clear();
-	}
+    void ClearGlobal()
+    {
+        s_global.Clear();
+    }

-	void Clear()
-	{
+    void Clear()
+    {

-		for (NDLNode<ElemType>* node : m_children)
+        for (NDLNode<ElemType>* node : m_children)
        {
            delete node;
        }
        m_children.clear();
-		for (NDLNode<ElemType>* node : m_script)
+        for (NDLNode<ElemType>* node : m_script)
        {
            delete node;
        }
        m_script.clear();

-		m_symbols.clear();
-	}
+        m_symbols.clear();
+    }
    void ClearEvalValues()
    {
        for (NDLNode<ElemType>* node : m_children)
@ -602,7 +602,7 @@ public:
        {
            NDLNode<ElemType>* nodeFound = found->second;
            // check for undetermined nodes, because these nodes are to be defined later
-			if (nodeFound->GetType() != ndlTypeUndetermined && nodeFound->GetType() != ndlTypeParameter)
+            if (nodeFound->GetType() != ndlTypeUndetermined && nodeFound->GetType() != ndlTypeParameter)
            {
                std::string value = found->second->GetValue();
                RuntimeError("Symbol '%s' currently assigned to '%s' reassigning to a different value not allowed\n", symbol.c_str(), value.c_str());
@ -663,12 +663,12 @@ public:
        }
    }

-	// IsMacroDefinition - is this a macro definition?
-	// returns - true if a definition, otherwise false
-	bool IsMacroDefinition()
-	{
-		return m_definingMacro;
-	}
+    // IsMacroDefinition - is this a macro definition?
+    // returns - true if a definition, otherwise false
+    bool IsMacroDefinition()
+    {
+        return m_definingMacro;
+    }

    // CheckName - check for a name in our symbols, see if it exists
    // name - name we are looking for
@ -695,14 +695,14 @@ public:
                    // if we are calling a macro we need to keep track of formal parameters, 
                    // keep them as strings in this macroCall node
                    NDLNode<ElemType>* newNode = new NDLNode<ElemType>("", name, this, ndlTypeMacroCall);
-					NDLScript<ElemType>* script = node->GetScript();
+                    NDLScript<ElemType>* script = node->GetScript();

-					// if this is a macro call (and not a definition), we want to expand the macro (make a copy)
-					if (!IsMacroDefinition())
-					{
-						script = new NDLScript<ElemType>(*script);
-					}
-					newNode->SetScript(script);
+                    // if this is a macro call (and not a definition), we want to expand the macro (make a copy)
+                    if (!IsMacroDefinition())
+                    {
+                        script = new NDLScript<ElemType>(*script);
+                    }
+                    newNode->SetScript(script);

                    newNode->SetParamMacro(node->GetParamMacro());
                    node = newNode;
@ -745,7 +745,7 @@ public:
    // ParseParameters - parse the parameters of a macro, or an array
    // ndlNode - node we should add the parameters to
    // value - parameters as config value
-	// createNew - create a new parameter node if one does not exist
+    // createNew - create a new parameter node if one does not exist
    void ParseParameters(NDLNode<ElemType>* ndlNode, const ConfigValue& value, bool createNew=false)
    {
        ConfigArray parameters = value;
@ -800,7 +800,7 @@ public:
        if (found == npos)
        {
            ndlNode = new NDLNode<ElemType>("", token, this, ndlTypeConstant);
-		}
+        }
        // not a constant, so must be a variable
        else
        {
@ -815,18 +815,18 @@ public:
                Trim(value);
                
                ndlNode = new NDLNode<ElemType>(name, value, this, ndlTypeOptionalParameter);
-			}
+            }
            else
            {
                ndlNode = CheckName(token);
-				if (createNew && ndlNode == NULL)
-				{
-					// NOTE: currently we only get here in Parameter scenarios, 
-					// if other scenarios present themselves, need a good way to change the type
+                if (createNew && ndlNode == NULL)
+                {
+                    // NOTE: currently we only get here in Parameter scenarios, 
+                    // if other scenarios present themselves, need a good way to change the type
                    ndlNode = new NDLNode<ElemType>(token, token, this, ndlTypeParameter);
-					AddSymbol(token, ndlNode);
-				}
-			}
+                    AddSymbol(token, ndlNode);
+                }
+            }
        }
        return ndlNode;
    }
@ -1001,30 +1001,30 @@ public:
        return tokenEnd;
    }

-	// ExpandMacro - Expand a macro into a new macro definition
-	// node - NDLNode that holds the macro call
-	// returns: new node with the expanded macro
-	NDLNode<ElemType>* ExpandMacro(const NDLNode<ElemType>* node)
-	{
-		assert(node->GetType() == ndlTypeMacroCall); // needs to be a macro call (not definition)
+    // ExpandMacro - Expand a macro into a new macro definition
+    // node - NDLNode that holds the macro call
+    // returns: new node with the expanded macro
+    NDLNode<ElemType>* ExpandMacro(const NDLNode<ElemType>* node)
+    {
+        assert(node->GetType() == ndlTypeMacroCall); // needs to be a macro call (not definition)

-		std::string name = node->GetName();
-		// if we are calling a macro make a new copy of it and execute that instead (macro expansion)
-		// we do this so the evalValues in the macros will be valid regardless of number of instantiations
-		NDLNode<ElemType>* newNode = new NDLNode<ElemType>(name, node->GetValue(), this, ndlTypeMacroCall);
-		NDLScript<ElemType>* newScript = new NDLScript<ElemType>(*node->GetScript());
-		newNode->SetScript(newScript);
-		newNode->SetParamMacro(node->GetParamMacro());
+        std::string name = node->GetName();
+        // if we are calling a macro make a new copy of it and execute that instead (macro expansion)
+        // we do this so the evalValues in the macros will be valid regardless of number of instantiations
+        NDLNode<ElemType>* newNode = new NDLNode<ElemType>(name, node->GetValue(), this, ndlTypeMacroCall);
+        NDLScript<ElemType>* newScript = new NDLScript<ElemType>(*node->GetScript());
+        newNode->SetScript(newScript);
+        newNode->SetParamMacro(node->GetParamMacro());

-		// now get the parameters to the macro added
-		ConfigValue paramString = node->GetParamString();
-		ParseParameters(newNode, paramString, true /*createNew*/);
-		newNode->SetParamString(paramString);
+        // now get the parameters to the macro added
+        ConfigValue paramString = node->GetParamString();
+        ParseParameters(newNode, paramString, true /*createNew*/);
+        newNode->SetParamString(paramString);

-		// fixup the symbol table to point to this one instead
-		AssignSymbol(name, newNode);
-		return newNode;
-	}
+        // fixup the symbol table to point to this one instead
+        AssignSymbol(name, newNode);
+        return newNode;
+    }

    // Evaluate - Evaluate the script
    // nodeEval - the node evaluator to call
--- a/MachineLearning/cn/PTaskGraphBuilder.cpp
+++ b/MachineLearning/cn/PTaskGraphBuilder.cpp
@ -277,7 +277,7 @@ void TaskDescriptor<ElemType>::ConfigureInputsAndOutputs(UINT& uidCounter, std::
            }

            // TODO: Any benefit to caching/reusing templates across different tasks? DBN doesn't bother; 
-			// PTask doesn't use unique identity, it seems - at least at present. Do it anyway, in case this changes in the future?
+            // PTask doesn't use unique identity, it seems - at least at present. Do it anyway, in case this changes in the future?
            DatablockTemplate* dt = Runtime::GetDatablockTemplate((char *)name.c_str(), &dims, 1, FALSE, TRUE); // TODO which override? flag values?

            // if this is a matrix type we want to add an application context
@ -354,18 +354,18 @@ void TaskDescriptor<ElemType>::CreateChannelsForInputs(
    {
        // find the associated parameter data
        ParamData<ElemType>* param;
-		// for (param = m_paramData[iparam]; 
+        // for (param = m_paramData[iparam]; 
        //     !(param->options & (paramOptionsInput|paramOptionsConstant|paramOptionsInitialize))
        //     && iparam < m_paramData.size();
        //      param = m_paramData[++iparam])
        //         ; 
-		for (param = m_paramData[iparam++];
+        for (param = m_paramData[iparam++];
            !(param->options & (paramOptionsInput|paramOptionsConstant|paramOptionsInitialize))
            && iparam < m_paramData.size();
            param = m_paramData[iparam++])
                ;

-		Port* destinationPort = m_inputPorts[i];
+        Port* destinationPort = m_inputPorts[i];
        std::string valueName = m_inputNames.at(i);
        assert(destinationPort == param->port);

@ -475,7 +475,7 @@ void TaskDescriptor<ElemType>::CreateInitializerChannel(
    }
    Channel* pInitChannel = graph->AddInitializerChannel(port, (char*)name.c_str());
    pInitChannel->SetPredicationType(CE_DST, CGATEFN_OPEN_ON_BOF);
-	pInitChannel->SetInitialPropagatedControlSignal(DBCTLC_BOF);
+    pInitChannel->SetInitialPropagatedControlSignal(DBCTLC_BOF);
 }

 // FindEmptyOutPorts - find unconnected output ports and hook up "bit buckets"
@ -738,7 +738,7 @@ PTaskGraphBuilder<ElemType>::PTaskGraphBuilder()
    // Level of console logging, useful for development/debugging.
    // 0 = silent; 1 = summary; 2 = debug. 
    // m_verbosity = 1;
-	m_verbosity = 2;
+    m_verbosity = 2;

    m_portUIDCounter = 0;
    PTask::Runtime::SetUseOpenCL(FALSE);
--- a/MachineLearning/cn/PTaskGraphBuilder.h
+++ b/MachineLearning/cn/PTaskGraphBuilder.h
@ -20,7 +20,7 @@ typedef int CONTROLSIGNAL;

 #ifndef _WIN32          // BUGBUG: fix this once we need it
 typedef unsigned int UINT;
-typedef long long (*FARPROC)();
+typedef long long (*FARPROC)();
 #endif

 #include <string>
@ -275,7 +275,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            if (m_verbosity >= 1) fprintf(stderr, "\nConfiguring task inputs and outputs ...\n");
            for (auto taskIter=m_taskNameToTaskDescriptorMap.begin(); taskIter != m_taskNameToTaskDescriptorMap.end(); taskIter++)
            {                
-	            if (m_verbosity >= 2) fprintf(stderr, "  Task %s\n", taskIter->first.c_str());
+                if (m_verbosity >= 2) fprintf(stderr, "  Task %s\n", taskIter->first.c_str());
                TaskDescriptorPtr taskDescriptor = taskIter->second;
                taskDescriptor->ConfigureInputsAndOutputs(m_portUIDCounter, m_valueNameToProducerPortMap);
            }
@ -287,7 +287,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            if (m_verbosity >= 1) fprintf(stderr, "\nCreating tasks from descriptors ...\n");
            for (auto taskIter=m_taskNameToTaskDescriptorMap.begin(); taskIter != m_taskNameToTaskDescriptorMap.end(); taskIter++)
            {                
-	            if (m_verbosity >= 2) fprintf(stderr, "  Task %s\n", taskIter->first.c_str());
+                if (m_verbosity >= 2) fprintf(stderr, "  Task %s\n", taskIter->first.c_str());
                TaskDescriptorPtr taskDescriptor = taskIter->second;
                taskDescriptor->CreateTask(m_PTaskGraph);
            }
@ -299,7 +299,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            if (m_verbosity >= 1) fprintf(stderr, "\nCreating PTask channels ...\n");
            for (auto taskIter=m_taskNameToTaskDescriptorMap.begin(); taskIter != m_taskNameToTaskDescriptorMap.end(); taskIter++)
            {                
-	            if (m_verbosity >= 2) fprintf(stderr, "  Task %s\n", taskIter->first.c_str());
+                if (m_verbosity >= 2) fprintf(stderr, "  Task %s\n", taskIter->first.c_str());
                TaskDescriptorPtr taskDescriptor = taskIter->second;
                taskDescriptor->CreateChannelsForInputs(m_PTaskGraph, m_valueNameToProducerPortMap, m_inputNameToChannelsMap, m_verbosity);
            }
@ -310,7 +310,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            if (m_verbosity >= 1) fprintf(stderr, "\nCreating LearnableParameter extra channels...\n");
            for (auto taskIter=m_taskNameToTaskDescriptorMap.begin(); taskIter != m_taskNameToTaskDescriptorMap.end(); taskIter++)
            {                
-	            if (m_verbosity >= 2) fprintf(stderr, "  Task %s\n", taskIter->first.c_str());
+                if (m_verbosity >= 2) fprintf(stderr, "  Task %s\n", taskIter->first.c_str());
                TaskDescriptorPtr taskDescriptor = taskIter->second;
                taskDescriptor->CreateBackAndInitChannel(m_PTaskGraph, m_outputNameToChannelsMap);
            }
--- a/MachineLearning/cn/SGD.h
+++ b/MachineLearning/cn/SGD.h
@ -42,24 +42,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        RmsProp
    };
    
-	// configuration parameters associated with RMSProp learning algorithm
+    // configuration parameters associated with RMSProp learning algorithm
    typedef struct stRMSPropInfo{
-		double gamma;
-		double inc;
-		double dec;
-		double max;
-		double min;
-		stRMSPropInfo()
-		{
-			gamma = 0.99;
-			inc = 1.2;
-			dec = 0.75;
-			max = 10.0;
-			min = 0.1;
-		}
-	}RMSPropInfo;
+        double gamma;
+        double inc;
+        double dec;
+        double max;
+        double min;
+        stRMSPropInfo()
+        {
+            gamma = 0.99;
+            inc = 1.2;
+            dec = 0.75;
+            max = 10.0;
+            min = 0.1;
+        }
+    }RMSPropInfo;

-	typedef struct stGradientUpdateInfo{
+    typedef struct stGradientUpdateInfo{
        GradientsUpdateType mType;
        float mGaussianNoiseInjectStd;
        stGradientUpdateInfo()
@ -115,7 +115,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {

            size_t maxEpochs = configSGD("maxEpochs");
            ConfigArray momentumPerMBStr = configSGD("momentumPerMB", "");
-			floatargvector momentumPerMB = momentumPerMBStr;
+            floatargvector momentumPerMB = momentumPerMBStr;

            wstring modelPath = configSGD("modelPath");
            wstring trainCriterionNodeName = configSGD("trainCriterionNodeName", "");
@ -172,9 +172,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                loadBestModel, numMiniBatch4LRSearch, numPrevLearnRates, numBestSearchEpoch, traceLevel, numMBsToShowResult,
                maxTempMemSizeInSamplesForCNN, gUpdateInfo, usePtask, keepCheckPointFiles, adaptationRegType, adaptationRegWeight,
                trainCriterionNodeName, evalCriterionNodeName, doGradientCheck, gradientCheckSigDigit, validateAfterModelReloading,
-				rpi);
+                rpi);
        }
-	
+    
        void setMomentum(float momentum)
        {
            m_momentumPerMB = (ElemType)momentum;
@ -222,7 +222,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            m_numBestSearchEpoch=numBestSearchEpoch;
            m_maxTempMemSizeInSamplesForCNN=maxTempMemSizeInSamplesForCNN;
            m_gradType = gradUpdateType;
-			m_rpi = rpi;
+            m_rpi = rpi;
            m_usePtask = usePtask;
            m_keepCheckPointFiles = keepCheckPointFiles;

@ -258,13 +258,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    m_learningRatesPerSample[i] = learningRatesPerMB[i]/m_mbSize[i];
                }
            }
-			m_momentumPerMB = 0.9f;
-			if  (momentumPerMB.size() >0)
-			{
-				m_momentumInputPerMB=momentumPerMB;
-		        if (m_momentumInputPerMB[0]>=1 || m_momentumInputPerMB[0]<0)
-					throw std::invalid_argument ("momentumPerMB must be in [0, 1).");
-			}
+            m_momentumPerMB = 0.9f;
+            if  (momentumPerMB.size() >0)
+            {
+                m_momentumInputPerMB=momentumPerMB;
+                if (m_momentumInputPerMB[0]>=1 || m_momentumInputPerMB[0]<0)
+                    throw std::invalid_argument ("momentumPerMB must be in [0, 1).");
+            }

            if (m_learnRateDecreaseFactor > 1 || m_learnRateIncreaseFactor<1)
            {
@ -454,11 +454,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }

            ElemType epochCriterion = std::numeric_limits<ElemType>::infinity(), prevCriterion = std::numeric_limits<ElemType>::infinity();
-			std::vector<ElemType> epochEvalErrors(evaluationNodes.size(),std::numeric_limits<ElemType>::infinity());
-			
-			std::vector<wstring> evalNodeNames;
-			for (size_t i=0;i<evaluationNodes.size(); i++)
-				evalNodeNames.push_back(evaluationNodes[i]->NodeName());
+            std::vector<ElemType> epochEvalErrors(evaluationNodes.size(),std::numeric_limits<ElemType>::infinity());
+            
+            std::vector<wstring> evalNodeNames;
+            for (size_t i=0;i<evaluationNodes.size(); i++)
+                evalNodeNames.push_back(evaluationNodes[i]->NodeName());

            size_t totalSamplesSeen = 0;
            ElemType learnRatePerSample = 0.5f / m_mbSize[startEpoch];
@ -477,10 +477,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {

            bool learnRateInitialized = false;
            if (startEpoch > 0)
-			{
+            {
                learnRateInitialized = LoadCheckPointInfo(startEpoch-1, totalSamplesSeen, learnRatePerSample, smoothedGradients, prevCriterion);  
-				setMomentum(m_momentumInputPerMB[m_momentumInputPerMB.size()-1]);
-			}
+                setMomentum(m_momentumInputPerMB[m_momentumInputPerMB.size()-1]);
+            }

            if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::AdjustAfterEpoch && !learnRateInitialized && m_learningRatesPerSample.size() <= startEpoch)
                throw std::invalid_argument ("When using \"AdjustAfterEpoch\", there must either exist a checkpoint file, or an explicit learning rate must be specified in config for the starting epoch.");
@ -523,9 +523,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                //learning rate adjustment
                if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::None || (m_learningRatesPerSample.size() > 0 && m_learningRatesPerSample.size() > i))
                {    
-					learnRatePerSample = m_learningRatesPerSample[i]; 
-					setMomentum(m_momentumInputPerMB[i]);
-				}	
+                    learnRatePerSample = m_learningRatesPerSample[i]; 
+                    setMomentum(m_momentumInputPerMB[i]);
+                }    
                else if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::SearchBeforeEpoch)    
                {
                    ElemType largestPrevLearnRatePerSample = prevLearnRates[0];
@ -710,7 +710,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                UpdateEvalTimeStamps(labelNodes);

                size_t actualMBSize = net.GetActualMBSize();
-				net.SetActualMiniBatchSize(actualMBSize);
+                net.SetActualMiniBatchSize(actualMBSize);
                for (auto nodeIter=nodes.begin(); nodeIter != nodes.end(); nodeIter++)
                {
                    net.Evaluate( *nodeIter);
@ -740,7 +740,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            std::list<Matrix<ElemType>>& smoothedGradients, const bool /*learnRateInitialized*/, const ElemType largestPrevLearnRatePerSample)
        {
            ElemType epochCriterion = std::numeric_limits<ElemType>::infinity(), prevCriterion = std::numeric_limits<ElemType>::infinity();
-			vector<ElemType> epochEvalErrors(evaluationNodes.size(),std::numeric_limits<ElemType>::infinity());
+            vector<ElemType> epochEvalErrors(evaluationNodes.size(),std::numeric_limits<ElemType>::infinity());
            //ElemType epochEvalError = std::numeric_limits<ElemType>::infinity();
            size_t totalSamplesSeen = 0;
            ElemType bestLearnRatePerSample = curLearnRate;
@ -845,16 +845,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            TrainOneEpoch(net, refNet, refNode, epochNumber, epochSize, trainSetDataReader, learnRatePerSample,FeatureNodes,labelNodes,
                criterionNodes,evaluationNodes,inputMatrices, learnableNodes,smoothedGradients,
                epochCriterion, epochEvalErrors, totalSamplesSeen); 
-			fprintf(stderr, "Finished Mini-Epoch For LearnRate Selection: Train Loss Per Sample = %.8g    ", epochCriterion);
-			if (epochEvalErrors.size()==1)
-	            fprintf(stderr, "EvalErr Per Sample = %.8g   Ave Learn Rate Per Sample = %.10g\n", epochEvalErrors[0], learnRatePerSample);
-			else
-			{
-				fprintf(stderr, "EvalErr Per Sample ");
-				for (size_t i=0; i<epochEvalErrors.size(); i++)
-					fprintf(stderr, "[%lu] = %.8g ", i, epochEvalErrors[i]);
-				fprintf(stderr, "Ave Learn Rate Per Sample = %.10g\n",learnRatePerSample);
-			}
+            fprintf(stderr, "Finished Mini-Epoch For LearnRate Selection: Train Loss Per Sample = %.8g    ", epochCriterion);
+            if (epochEvalErrors.size()==1)
+                fprintf(stderr, "EvalErr Per Sample = %.8g   Ave Learn Rate Per Sample = %.10g\n", epochEvalErrors[0], learnRatePerSample);
+            else
+            {
+                fprintf(stderr, "EvalErr Per Sample ");
+                for (size_t i=0; i<epochEvalErrors.size(); i++)
+                    fprintf(stderr, "[%lu] = %.8g ", i, epochEvalErrors[i]);
+                fprintf(stderr, "Ave Learn Rate Per Sample = %.10g\n",learnRatePerSample);
+            }

            int baseModelEpoch =  epochNumber-1;
            net.LoadPersistableParametersFromFile(GetModelNameForEpoch(baseModelEpoch), m_validateAfterModelReloading);
@ -969,7 +969,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {

                    // pull the values from the graph for the totals
                    epochCriterion += ptaskGraphBuilder->GetValue(criterionNodes[0]);
-				    for (size_t i=0; i<numEvalNodes; i++)
+                    for (size_t i=0; i<numEvalNodes; i++)
                    {
                        epochEvalErrors[i] += ptaskGraphBuilder->GetValue(evaluationNodes[i]);
                    }
@ -986,9 +986,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    Matrix<ElemType>::AddElementToElement(criterionNodes[0]->FunctionValues(), 0, 0, localEpochCriterion, 0, 0);

                    std::vector<ElemType>mbEvalErrors(numEvalNodes,0);
-				    for (size_t i=0; i<numEvalNodes; i++)
+                    for (size_t i=0; i<numEvalNodes; i++)
                    {
-					    net.Evaluate(evaluationNodes[i]);
+                        net.Evaluate(evaluationNodes[i]);
                        Matrix<ElemType>::AddElementToElement(evaluationNodes[i]->FunctionValues(), 0, 0, localEpochEvalErrors, 0, i);
                    }

@ -1065,7 +1065,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    ptaskGraphBuilder->GetValue(node, node->FunctionValues());
                }
                epochCriterion /= float(totalEpochSamples);
-			    for (size_t i=0; i< numEvalNodes; i++)
+                for (size_t i=0; i< numEvalNodes; i++)
                {
                    epochEvalErrors[i] /= float(totalEpochSamples);
                }
@ -1076,7 +1076,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                localEpochEvalErrors /= float(totalEpochSamples);

                epochCriterion = localEpochCriterion.Get00Element();
-			    for (size_t i=0; i< numEvalNodes; i++)
+                for (size_t i=0; i< numEvalNodes; i++)
                {
                    epochEvalErrors[i] = (const ElemType)localEpochEvalErrors(0,i);
                }
@ -1124,9 +1124,9 @@ public:
            }
            if (adpType == GradientsUpdateType::RmsProp)
            {
-				// include L2 regularizer
-				Matrix<ElemType>::ScaleAndAdd((ElemType)0.001, functionValues, gradientValues);
-				smoothedGradient.RmsProp(gradientValues, (ElemType)sgd->m_rpi.gamma, (ElemType)sgd->m_rpi.inc, (ElemType)sgd->m_rpi.max, (ElemType)sgd->m_rpi.dec, (ElemType)sgd->m_rpi.min);
+                // include L2 regularizer
+                Matrix<ElemType>::ScaleAndAdd((ElemType)0.001, functionValues, gradientValues);
+                smoothedGradient.RmsProp(gradientValues, (ElemType)sgd->m_rpi.gamma, (ElemType)sgd->m_rpi.inc, (ElemType)sgd->m_rpi.max, (ElemType)sgd->m_rpi.dec, (ElemType)sgd->m_rpi.min);
                Matrix<ElemType>::ScaleAndAdd(-learnRatePerSample, gradientValues, functionValues);
            }

@ -1315,12 +1315,12 @@ protected:
        #define EPSILON 1e-5

        bool GradientCheck(
-			ComputationNetwork<ElemType>& net,
+            ComputationNetwork<ElemType>& net,
            const std::vector<ComputationNodePtr>& criterionNodes,
            const std::list<ComputationNodePtr>& learnableNodes,
-			int npos)
+            int npos)
        {
-		    // gradient checking
+            // gradient checking
            for (auto nodeIter=learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++)
            {
                ComputationNodePtr node = (*nodeIter);
@ -1353,7 +1353,7 @@ protected:
                net.Evaluate(criterionNodes[npos]); 
                ElemType mbEvalCriNeg = criterionNodes[npos]->FunctionValues().Get00Element(); //criterionNode should be a scalar

-				// back to its orginal parameter value
+                // back to its orginal parameter value
                node->FunctionValues()(irow, icol) = eOrg; 

                // check if they are consistent
@ -1362,13 +1362,13 @@ protected:
                ElemType diff = (ElemType)fabs(eGradErr - eGradNum);
                bool wrong = (std::isnan(diff) || diff > threshold);
                if (wrong)
-				{
+                {
                    fprintf (stderr, "\nd%ls Numeric gradient = %e, Error BP gradient = %e\n", node->NodeName().c_str(), eGradNum, eGradErr);
                    return false; 
-				}
+                }
            }

-			return true;
+            return true;
        }

        void SetOtherInfo(ComputationNetwork<ElemType>& net , IDataReader<ElemType>* /*trainSetDataReader*/, IDataReader<ElemType>* /*validSetDataReader*/, std::map<std::wstring, Matrix<ElemType>*>& inputMatrices)
@ -1386,7 +1386,7 @@ protected:
                }
            }

-			for (size_t i=0;i<evaluationNodes.size(); i++)
+            for (size_t i=0;i<evaluationNodes.size(); i++)
            {
                if (evaluationNodes[i]->OperationName() == L"ClassBasedCrossEntropyWithSoftmax")
                {
@ -1402,8 +1402,8 @@ protected:
        intargvector m_mbSize;
        size_t m_epochSize;
        size_t m_maxEpochs;
-		floatargvector m_momentumInputPerMB;
-		ElemType m_momentumPerMB;
+        floatargvector m_momentumInputPerMB;
+        ElemType m_momentumPerMB;
        bool m_gradientClippingWithTruncation;
        ElemType m_clippingThresholdPerSample;

@ -1427,7 +1427,7 @@ protected:
        ElemType m_learnRateIncreaseFactor;
        ElemType m_learnRateDecreaseFactor;

-		floatargvector m_dropoutRates;
+        floatargvector m_dropoutRates;
        size_t m_maxTempMemSizeInSamplesForCNN;

        int m_traceLevel;
@ -1437,7 +1437,7 @@ protected:
        ElemType m_minLearnRate;

        GradientUpdateInfo m_gradType;
-		RMSPropInfo m_rpi;
+        RMSPropInfo m_rpi;

        bool m_usePtask;

--- a/MachineLearning/cn/SimpleEvaluator.h
+++ b/MachineLearning/cn/SimpleEvaluator.h
@ -202,7 +202,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            std::vector<ComputationNodePtr> labelNodes = m_net.LabelNodes();
            std::vector<ComputationNodePtr> criterionNodes = m_net.FinalCriterionNodes();
            std::vector<ComputationNodePtr> evaluationNodes = m_net.EvaluationNodes();
-			
+            
            if (criterionNodes.size()==0)
            {
                throw std::runtime_error("No CrossEntropyWithSoftmax node found\n");
@ -219,7 +219,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
            for (size_t i=0; i<labelNodes.size(); i++)
            {
-                inputMatrices[labelNodes[i]->NodeName()] = &labelNodes[i]->FunctionValues();				
+                inputMatrices[labelNodes[i]->NodeName()] = &labelNodes[i]->FunctionValues();                
            }
            inputMatrices[L"numberobs"] = new Matrix<ElemType>(1,1, m_net.GetDeviceID()); 

@ -235,7 +235,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            size_t numSamples = 0;
            ElemType crossEntropy = 0;
            ElemType evalError = 0;
-			
+            
            ofstream outputStream;
            if (output)
            {
@ -247,7 +247,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }

            size_t numMBsRun = 0;
-			size_t actualMBSize = 0;
+            size_t actualMBSize = 0;
            while (dataReader.GetMinibatch(inputMatrices))
            {
                size_t nbrSamples = (size_t)(*inputMatrices[L"numberobs"])(0, 0);
--- a/MachineLearning/cn/SimpleNetworkBuilder.cpp
+++ b/MachineLearning/cn/SimpleNetworkBuilder.cpp
@ -41,7 +41,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    input = output;
                }

-				int recur_idx = 0; 
+                int recur_idx = 0; 
                if (numHiddenLayers > 0)
                {
                    //TODO: to figure out sparse matrix size
@ -49,27 +49,27 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);

                    if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == 1)
-					{
-	                    w = m_net->CreateLearnableParameter(L"W0", m_layerSizes[1], m_layerSizes[1]);
+                    {
+                        w = m_net->CreateLearnableParameter(L"W0", m_layerSizes[1], m_layerSizes[1]);
                        m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);

-						delay = m_net->Delay(NULL, m_defaultHiddenActivity, m_layerSizes[1], mbSize); 
-						/// unless there is a good algorithm to detect loops, use this explicit setup
-						output = ApplyNonlinearFunction(
-							m_net->Plus(
-								m_net->Times(u, input), m_net->Times(w, delay)), 0);
-						delay->AttachInputs(output);
+                        delay = m_net->Delay(NULL, m_defaultHiddenActivity, m_layerSizes[1], mbSize); 
+                        /// unless there is a good algorithm to detect loops, use this explicit setup
+                        output = ApplyNonlinearFunction(
+                            m_net->Plus(
+                                m_net->Times(u, input), m_net->Times(w, delay)), 0);
+                        delay->AttachInputs(output);
                        ((DelayNode<ElemType>*) delay)->SetDelay(1);
-						recur_idx ++;
-					}
-					else
-					{
-	                    output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), 0);
+                        recur_idx ++;
+                    }
+                    else
+                    {
+                        output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), 0);
                        //output = m_net->Times(u, input);
-					}
+                    }

-					if (m_addDropoutNodes)
-						input = m_net->Dropout(output);
+                    if (m_addDropoutNodes)
+                        input = m_net->Dropout(output);
                    else
                        input = output;

@ -80,31 +80,31 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                        m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);

                        if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i+1)
-						{
-							w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", i), m_layerSizes[i+1], m_layerSizes[i+1]);
+                        {
+                            w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", i), m_layerSizes[i+1], m_layerSizes[i+1]);
                            m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);

-						    delay = m_net->Delay(NULL, m_defaultHiddenActivity, (size_t)m_layerSizes[i+1], mbSize); 
-						    /// unless there is a good algorithm to detect loops, use this explicit setup
-						    output = ApplyNonlinearFunction(
-							    m_net->Plus(
-								    m_net->Times(u, input), m_net->Times(w, delay)), 0);
-						    delay->AttachInputs(output);
-							recur_idx++;
-						}
-						else
-						{
-	                        output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), i);
-						}
+                            delay = m_net->Delay(NULL, m_defaultHiddenActivity, (size_t)m_layerSizes[i+1], mbSize); 
+                            /// unless there is a good algorithm to detect loops, use this explicit setup
+                            output = ApplyNonlinearFunction(
+                                m_net->Plus(
+                                    m_net->Times(u, input), m_net->Times(w, delay)), 0);
+                            delay->AttachInputs(output);
+                            recur_idx++;
+                        }
+                        else
+                        {
+                            output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), i);
+                        }

-					    if (m_addDropoutNodes)
-						    input = m_net->Dropout(output);
+                        if (m_addDropoutNodes)
+                            input = m_net->Dropout(output);
                        else
                            input = output;
                    }
                }

-				w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], m_layerSizes[numHiddenLayers]);
+                w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], m_layerSizes[numHiddenLayers]);
                m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
                /*m_net->MatrixL2Reg(w , L"L1w");*/

@ -136,9 +136,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {

                size_t numHiddenLayers = m_layerSizes.size()-2;

-				size_t numRecurrentLayers = m_recurrentLayers.size(); 
+                size_t numRecurrentLayers = m_recurrentLayers.size(); 

-				ComputationNodePtr input=nullptr, w=nullptr, b=nullptr, u=nullptr, delay = nullptr, output=nullptr, label=nullptr, prior=nullptr;
+                ComputationNodePtr input=nullptr, w=nullptr, b=nullptr, u=nullptr, delay = nullptr, output=nullptr, label=nullptr, prior=nullptr;
                input = m_net->CreateSparseInputNode(L"features", m_layerSizes[0], mbSize);
                m_net->FeatureNodes().push_back(input);

@ -151,7 +151,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    input = output;
                }

-				int recur_idx = 0; 
+                int recur_idx = 0; 
                if (numHiddenLayers > 0)
                {
                    u = m_net->CreateSparseLearnableParameter(L"U0", m_layerSizes[1], m_layerSizes[0]);
@ -167,9 +167,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    b->FunctionValues()(0,0) = 0.1;
                    b->FunctionValues()(1,0) = -0.1;
 #endif
-					if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == 1)
-					{
-	                    w = m_net->CreateLearnableParameter(L"W0", m_layerSizes[1], m_layerSizes[1]);
+                    if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == 1)
+                    {
+                        w = m_net->CreateLearnableParameter(L"W0", m_layerSizes[1], m_layerSizes[1]);
                        m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
 #ifdef RNN_DEBUG
                        w->FunctionValues()(0,0) = 0.2;
@ -178,21 +178,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                        w->FunctionValues()(1,1) = -0.1;
 #endif

-						delay = m_net->Delay(NULL, m_defaultHiddenActivity, m_layerSizes[1], mbSize); 
-						/// unless there is a good algorithm to detect loops, use this explicit setup
-						output = ApplyNonlinearFunction(
-							m_net->Plus(
-								m_net->Times(u, input), m_net->Times(w, delay)), 0);
-						delay->AttachInputs(output);
-						recur_idx ++;
-					}
-					else
-					{
-	                    output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), 0);
-					}
+                        delay = m_net->Delay(NULL, m_defaultHiddenActivity, m_layerSizes[1], mbSize); 
+                        /// unless there is a good algorithm to detect loops, use this explicit setup
+                        output = ApplyNonlinearFunction(
+                            m_net->Plus(
+                                m_net->Times(u, input), m_net->Times(w, delay)), 0);
+                        delay->AttachInputs(output);
+                        recur_idx ++;
+                    }
+                    else
+                    {
+                        output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), 0);
+                    }

-					if (m_addDropoutNodes)
-						input = m_net->Dropout(output);
+                    if (m_addDropoutNodes)
+                        input = m_net->Dropout(output);
                    else
                        input = output;

@ -211,26 +211,26 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                        b->FunctionValues()(0,0) = 0.1;
                        b->FunctionValues()(1,0) = -0.1;
 #endif
-						if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i+1)
-						{
-							w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", i), m_layerSizes[i+1], m_layerSizes[i+1]);
+                        if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i+1)
+                        {
+                            w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", i), m_layerSizes[i+1], m_layerSizes[i+1]);
                            m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);

-						    delay = m_net->Delay(NULL, m_defaultHiddenActivity, (size_t)m_layerSizes[i+1], mbSize); 
-						    /// unless there is a good algorithm to detect loops, use this explicit setup
-						    output = ApplyNonlinearFunction(
-							    m_net->Plus(
-								    m_net->Times(u, input), m_net->Times(w, delay)), 0);
-						    delay->AttachInputs(output);
-							recur_idx++;
-						}
-						else
-						{
-	                        output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), i);
-						}
+                            delay = m_net->Delay(NULL, m_defaultHiddenActivity, (size_t)m_layerSizes[i+1], mbSize); 
+                            /// unless there is a good algorithm to detect loops, use this explicit setup
+                            output = ApplyNonlinearFunction(
+                                m_net->Plus(
+                                    m_net->Times(u, input), m_net->Times(w, delay)), 0);
+                            delay->AttachInputs(output);
+                            recur_idx++;
+                        }
+                        else
+                        {
+                            output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), i);
+                        }

-					    if (m_addDropoutNodes)
-						    input = m_net->Dropout(output);
+                        if (m_addDropoutNodes)
+                            input = m_net->Dropout(output);
                        else
                            input = output;
                    }
@ -267,9 +267,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {

            size_t numHiddenLayers = m_layerSizes.size()-2;

-			size_t numRecurrentLayers = m_recurrentLayers.size(); 
+            size_t numRecurrentLayers = m_recurrentLayers.size(); 

-			ComputationNodePtr input=nullptr, w=nullptr, b=nullptr, u=nullptr, delay = nullptr, output=nullptr, label=nullptr, prior=nullptr, pastEmbedding=nullptr;
+            ComputationNodePtr input=nullptr, w=nullptr, b=nullptr, u=nullptr, delay = nullptr, output=nullptr, label=nullptr, prior=nullptr, pastEmbedding=nullptr;
            ComputationNodePtr Wxo = nullptr, Who=nullptr, Wco=nullptr, bo = nullptr, Wxi=nullptr, Whi=nullptr, Wci=nullptr, bi=nullptr;
            ComputationNodePtr Wxf=nullptr, Whf=nullptr, Wcf=nullptr, bf=nullptr, Wxc=nullptr, Whc=nullptr, bc=nullptr;
            ComputationNodePtr ot=nullptr, it=nullptr, ft=nullptr, ct=nullptr, ht=nullptr;
@ -327,20 +327,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                Wxc = m_net->CreateLearnableParameter(L"WXC0", m_layerSizes[offset+1], m_layerSizes[offset]*(offset?m_lookupTableOrder:1));
                m_net->InitLearnableParameters(Wxc, m_uniformInit, randomSeed++, m_initValueScale);
                if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == offset+1)
-				{
-	                Whi = m_net->CreateLearnableParameter(L"WHI0", m_layerSizes[offset+1], m_layerSizes[offset+1]);
+                {
+                    Whi = m_net->CreateLearnableParameter(L"WHI0", m_layerSizes[offset+1], m_layerSizes[offset+1]);
                    m_net->InitLearnableParameters(Whi, m_uniformInit, randomSeed++, m_initValueScale);
-	                Wci = m_net->CreateLearnableParameter(L"WCI0", m_layerSizes[offset+1], 1);
+                    Wci = m_net->CreateLearnableParameter(L"WCI0", m_layerSizes[offset+1], 1);
                    m_net->InitLearnableParameters(Wci, m_uniformInit, randomSeed++, m_initValueScale);

                    Whf = m_net->CreateLearnableParameter(L"WHF0", m_layerSizes[offset+1], m_layerSizes[offset+1]);
                    m_net->InitLearnableParameters(Whf, m_uniformInit, randomSeed++, m_initValueScale);
-	                Wcf = m_net->CreateLearnableParameter(L"WCF0", m_layerSizes[offset+1], 1);
+                    Wcf = m_net->CreateLearnableParameter(L"WCF0", m_layerSizes[offset+1], 1);
                    m_net->InitLearnableParameters(Wcf, m_uniformInit, randomSeed++, m_initValueScale);

                    Who = m_net->CreateLearnableParameter(L"WHO0", m_layerSizes[offset+1], m_layerSizes[offset+1]);
                    m_net->InitLearnableParameters(Who, m_uniformInit, randomSeed++, m_initValueScale);
-	                Wco = m_net->CreateLearnableParameter(L"WCO0", m_layerSizes[offset+1], 1);
+                    Wco = m_net->CreateLearnableParameter(L"WCO0", m_layerSizes[offset+1], 1);
                    m_net->InitLearnableParameters(Wco, m_uniformInit, randomSeed++, m_initValueScale);

                    Whc = m_net->CreateLearnableParameter(L"WHC0", m_layerSizes[offset+1], m_layerSizes[offset+1]);
@ -359,9 +359,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    delayHF = m_net->Delay(NULL, m_defaultHiddenActivity, layer1, mbSize); 
                    delayHO = m_net->Delay(NULL, m_defaultHiddenActivity, layer1, mbSize); 
                    delayHC = m_net->Delay(NULL, m_defaultHiddenActivity, layer1, mbSize); 
-					delayCI = m_net->Delay(NULL, m_defaultHiddenActivity, layer1, mbSize); 
-					delayCF = m_net->Delay(NULL, m_defaultHiddenActivity, layer1, mbSize); 
-					delayCC = m_net->Delay(NULL, m_defaultHiddenActivity, layer1, mbSize);
+                    delayCI = m_net->Delay(NULL, m_defaultHiddenActivity, layer1, mbSize); 
+                    delayCF = m_net->Delay(NULL, m_defaultHiddenActivity, layer1, mbSize); 
+                    delayCC = m_net->Delay(NULL, m_defaultHiddenActivity, layer1, mbSize);


                    delayYI = m_net->Delay(NULL, 0, layerOutput, mbSize);                    
@ -380,7 +380,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                        m_net->Times(Wti, m_net->Times(Wtensoroi,m_net->KhatriRaoProduct(reducedInput, reducedOutput))));

                    it = ApplyNonlinearFunction(
-					    m_net->Plus(
+                        m_net->Plus(
                            m_net->Plus(
                                m_net->Plus(
                                    m_net->Times(Wxi, input), 
@ -388,7 +388,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                                    m_net->Times(Whi, delayHI)),
                                m_net->DiagTimes(Wci, delayCI)), 0);
                    ft = ApplyNonlinearFunction(
-					    m_net->Plus(
+                        m_net->Plus(
                            m_net->Plus(
                                m_net->Plus(
                                    m_net->Times(Wxf, input), 
@ -411,7 +411,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                                )
                              );
                    ot = ApplyNonlinearFunction(
-					    m_net->Plus(
+                        m_net->Plus(
                            m_net->Plus(
                                m_net->Plus(
                                    m_net->Times(Wxo, input), 
@ -419,24 +419,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                                    m_net->Times(Who, delayHO)),
                                m_net->DiagTimes(Wco, ct)), 0);
                    output = m_net->ElementTimes(ot, m_net->Tanh(ct));
-					
+                    
                    delayHO->AttachInputs(output);
-					delayHI->AttachInputs(output);
-					delayHF->AttachInputs(output);
-					delayHC->AttachInputs(output);
-					delayCI->AttachInputs(ct);
-					delayCF->AttachInputs(ct);
-					delayCC->AttachInputs(ct);
-					
+                    delayHI->AttachInputs(output);
+                    delayHF->AttachInputs(output);
+                    delayHC->AttachInputs(output);
+                    delayCI->AttachInputs(ct);
+                    delayCF->AttachInputs(ct);
+                    delayCC->AttachInputs(ct);
+                    
                    recur_idx ++;
-			    }
-				else
-				{
-	                output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), 0);
-			    }
+                }
+                else
+                {
+                    output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), 0);
+                }

-				if (m_addDropoutNodes)
-				    input = m_net->Dropout(output);
+                if (m_addDropoutNodes)
+                    input = m_net->Dropout(output);
                else
                    input = output;

@ -444,29 +444,29 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                {
                    u = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"U%d", i), m_layerSizes[i+1], m_layerSizes[i]);
                    m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
-					if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i+1)
-					{
-					    w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", i), m_layerSizes[i+1], m_layerSizes[i+1]);
+                    if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i+1)
+                    {
+                        w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", i), m_layerSizes[i+1], m_layerSizes[i+1]);
                        m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
-						std::list<ComputationNodePtr> recurrent_loop;
-						delay = m_net->Delay(NULL, m_defaultHiddenActivity, m_layerSizes[i+1], mbSize);
-						output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), m_net->Times(w, delay)), i);
-						delay->AttachInputs(output);
-						recur_idx++;
-					}
-					else
-					{
-	                    output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), i);
-					}
+                        std::list<ComputationNodePtr> recurrent_loop;
+                        delay = m_net->Delay(NULL, m_defaultHiddenActivity, m_layerSizes[i+1], mbSize);
+                        output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), m_net->Times(w, delay)), i);
+                        delay->AttachInputs(output);
+                        recur_idx++;
+                    }
+                    else
+                    {
+                        output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), i);
+                    }

-					if (m_addDropoutNodes)
+                    if (m_addDropoutNodes)
                        input = m_net->Dropout(output);
                    else
                        input = output;
                }
            }

-			w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], m_layerSizes[numHiddenLayers]);
+            w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], m_layerSizes[numHiddenLayers]);
            m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
            label = m_net->CreateInputNode(L"labels", m_layerSizes[numHiddenLayers+1], mbSize);

@ -536,7 +536,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                }

                int recur_idx = 0; 
-    			/// unless there is a good algorithm to detect loops, use this explicit setup
+                /// unless there is a good algorithm to detect loops, use this explicit setup
                int ik = 1; 
                output = input;
                while (ik <= m_maOrder)
@ -558,7 +558,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                }
                
                if (m_addDropoutNodes)
-				    input = m_net->Dropout(output);
+                    input = m_net->Dropout(output);
                else
                    input = output;

@ -568,28 +568,28 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
                    output= m_net->Times(u, input);
                    input = output;
-					if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i+1)
-					{
-					    w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"R%d", i+1), m_layerSizes[i+1], m_layerSizes[i+1]);
+                    if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i+1)
+                    {
+                        w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"R%d", i+1), m_layerSizes[i+1], m_layerSizes[i+1]);
                        m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
-						delay = m_net->Delay(NULL, m_defaultHiddenActivity, m_layerSizes[i+1], mbSize);
-						output = m_net->Plus(m_net->Times(w, delay), input);
+                        delay = m_net->Delay(NULL, m_defaultHiddenActivity, m_layerSizes[i+1], mbSize);
+                        output = m_net->Plus(m_net->Times(w, delay), input);

                        delay->AttachInputs(output);
                        input = output;
-						recur_idx++;
-			        }
+                        recur_idx++;
+                    }

                    bi = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"bi%d", i), m_layerSizes[i+1], 1);
                    output = m_net->Plus(input, bi);

-					if (m_addDropoutNodes)
-						input = m_net->Dropout(output);
+                    if (m_addDropoutNodes)
+                        input = m_net->Dropout(output);
                    else
                        input = output;
                }
            
-				w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], m_layerSizes[numHiddenLayers]);
+                w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], m_layerSizes[numHiddenLayers]);
                m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);

                label = m_net->CreateInputNode(L"labels", m_layerSizes[numHiddenLayers+1], mbSize);
@ -619,9 +619,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {

                size_t numHiddenLayers = m_layerSizes.size()-2;

-				size_t numRecurrentLayers = m_recurrentLayers.size(); 
+                size_t numRecurrentLayers = m_recurrentLayers.size(); 

-				ComputationNodePtr input=nullptr, w=nullptr, b=nullptr, u=nullptr, delay = nullptr, output=nullptr, label=nullptr, prior=nullptr;
+                ComputationNodePtr input=nullptr, w=nullptr, b=nullptr, u=nullptr, delay = nullptr, output=nullptr, label=nullptr, prior=nullptr;
                ComputationNodePtr bi=nullptr;
                ComputationNodePtr Wxi1=nullptr, Wxi=nullptr;
                ComputationNodePtr Wxi2=nullptr, Wxi3=nullptr, Wxi4=nullptr;
@ -655,26 +655,26 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    delayXIII->AttachInputs(input);
                    delayXIV->AttachInputs(input);

-					if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == 1)
-					{
+                    if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == 1)
+                    {
                        //TODO: to figure out sparse matrix size
-	                    Wxi2 = m_net->CreateSparseLearnableParameter(L"WXI2", m_layerSizes[1], m_layerSizes[0], 0);
+                        Wxi2 = m_net->CreateSparseLearnableParameter(L"WXI2", m_layerSizes[1], m_layerSizes[0], 0);
                        m_net->InitLearnableParameters(Wxi2, m_uniformInit, randomSeed++, m_initValueScale);
-	                    //TODO: to figure out sparse matrix size
+                        //TODO: to figure out sparse matrix size
                        Wxi3 = m_net->CreateSparseLearnableParameter(L"WXI3", m_layerSizes[1], m_layerSizes[0], 0);
                        m_net->InitLearnableParameters(Wxi3, m_uniformInit, randomSeed++, m_initValueScale);
-	                    //TODO: to figure out sparse matrix size
+                        //TODO: to figure out sparse matrix size
                        Wxi4 = m_net->CreateSparseLearnableParameter(L"WXI4", m_layerSizes[1], m_layerSizes[0], 0);
                        m_net->InitLearnableParameters(Wxi4, m_uniformInit, randomSeed++, m_initValueScale);
-	                    //TODO: to figure out sparse matrix size
+                        //TODO: to figure out sparse matrix size
                        Wxi1 = m_net->CreateSparseLearnableParameter(L"WXI1", m_layerSizes[1], m_layerSizes[0], 0);
                        m_net->InitLearnableParameters(Wxi1, m_uniformInit, randomSeed++, m_initValueScale);
-	                    //TODO: to figure out sparse matrix size
+                        //TODO: to figure out sparse matrix size
                        Wxi = m_net->CreateSparseLearnableParameter(L"WXI", m_layerSizes[1], m_layerSizes[0], 0);
                        m_net->InitLearnableParameters(Wxi, m_uniformInit, randomSeed++, m_initValueScale);

-						/// unless there is a good algorithm to detect loops, use this explicit setup
-						it = m_net->Plus(
+                        /// unless there is a good algorithm to detect loops, use this explicit setup
+                        it = m_net->Plus(
                                m_net->Tanh(
                                m_net->Plus(
                                m_net->Times(Wxi4, delayXIV),
@ -697,15 +697,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                        delayXII->NeedGradient() = false;
                        delayXIII->NeedGradient() = false;
                        delayXIV->NeedGradient() = false;
-						recur_idx ++;
-					}
-					else
-					{
-	                    output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), 0);
-					}
+                        recur_idx ++;
+                    }
+                    else
+                    {
+                        output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), 0);
+                    }

-					if (m_addDropoutNodes)
-						input = m_net->Dropout(output);
+                    if (m_addDropoutNodes)
+                        input = m_net->Dropout(output);
                    else
                        input = output;

@ -713,30 +713,30 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    {
                        u = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"U%d", i), m_layerSizes[i+1], m_layerSizes[i]);
                        m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
-						if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i+1)
-						{
-							w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", i), m_layerSizes[i+1], m_layerSizes[i+1]);
+                        if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i+1)
+                        {
+                            w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", i), m_layerSizes[i+1], m_layerSizes[i+1]);
                            m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
-							std::list<ComputationNodePtr> recurrent_loop;
-							delay = m_net->Delay(NULL, m_defaultHiddenActivity, m_layerSizes[i+1], mbSize);
-							output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), m_net->Times(w, delay)), i);
-							delay->AttachInputs(output);
-							recur_idx++;
-						}
-						else
-						{
-	                        output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), i);
-						}
+                            std::list<ComputationNodePtr> recurrent_loop;
+                            delay = m_net->Delay(NULL, m_defaultHiddenActivity, m_layerSizes[i+1], mbSize);
+                            output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), m_net->Times(w, delay)), i);
+                            delay->AttachInputs(output);
+                            recur_idx++;
+                        }
+                        else
+                        {
+                            output = SimpleNetworkBuilder<ElemType>::ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), i);
+                        }

-					    if (m_addDropoutNodes)
-						    input = m_net->Dropout(output);
+                        if (m_addDropoutNodes)
+                            input = m_net->Dropout(output);
                        else
                            input = output;
                    }
                }

                //TODO: to figure out sparse matrix size
-				w = m_net->CreateSparseLearnableParameter(msra::strfun::wstrprintf (L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], m_layerSizes[numHiddenLayers], 0);
+                w = m_net->CreateSparseLearnableParameter(msra::strfun::wstrprintf (L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], m_layerSizes[numHiddenLayers], 0);
                m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
 //                b = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"B%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], 1);
                //TODO: to figure out sparse matrix size
@ -833,17 +833,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        Wci = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WCI%d", iLayer), outputDim, 1);
        m_net->InitLearnableParameters(Wci, m_uniformInit, randomSeed++, m_initValueScale);

-	    Whf = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WHF%d", iLayer), outputDim, outputDim);
+        Whf = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WHF%d", iLayer), outputDim, outputDim);
        m_net->InitLearnableParameters(Whf, m_uniformInit, randomSeed++, m_initValueScale);
-	    Wcf = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WCF%d", iLayer), outputDim, 1);
+        Wcf = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WCF%d", iLayer), outputDim, 1);
        m_net->InitLearnableParameters(Wcf, m_uniformInit, randomSeed++, m_initValueScale);

-	    Who = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WHO%d", iLayer), outputDim, outputDim);
+        Who = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WHO%d", iLayer), outputDim, outputDim);
        m_net->InitLearnableParameters(Who, m_uniformInit, randomSeed++, m_initValueScale);
-	    Wco = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WCO%d", iLayer), outputDim, 1);
+        Wco = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WCO%d", iLayer), outputDim, 1);
        m_net->InitLearnableParameters(Wco, m_uniformInit, randomSeed++, m_initValueScale);

-	    Whc = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WHC%d", iLayer), outputDim, outputDim);
+        Whc = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"WHC%d", iLayer), outputDim, outputDim);
        m_net->InitLearnableParameters(Whc, m_uniformInit, randomSeed++, m_initValueScale);

        size_t layer1 = outputDim;
@ -852,10 +852,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        delayHF = m_net->Delay(NULL, m_defaultHiddenActivity, layer1, mbSize); 
        delayHO = m_net->Delay(NULL, m_defaultHiddenActivity, layer1, mbSize); 
        delayHC = m_net->Delay(NULL, m_defaultHiddenActivity, layer1, mbSize); 
-		delayCI = m_net->Delay(NULL, m_defaultHiddenActivity, layer1, mbSize); 
-		delayCF = m_net->Delay(NULL, m_defaultHiddenActivity, layer1, mbSize); 
-		delayCC = m_net->Delay(NULL, m_defaultHiddenActivity, layer1, mbSize); 
-		
+        delayCI = m_net->Delay(NULL, m_defaultHiddenActivity, layer1, mbSize); 
+        delayCF = m_net->Delay(NULL, m_defaultHiddenActivity, layer1, mbSize); 
+        delayCC = m_net->Delay(NULL, m_defaultHiddenActivity, layer1, mbSize); 
+        
        if(m_constInputGateValue)
        {
            //it = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"CONSTIT%d", iLayer), outputDim, mbSize);
@ -865,7 +865,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }
        else
            it = ApplyNonlinearFunction(
-		        m_net->Plus(
+                m_net->Plus(
                    m_net->Plus(
                        m_net->Plus(
                            m_net->Times(Wxi, input), 
@ -906,7 +906,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }
        else
            ft = ApplyNonlinearFunction(
-		        m_net->Plus(
+                m_net->Plus(
                    m_net->Plus(
                        m_net->Plus(
                            m_net->Times(Wxf, input), 
@ -933,7 +933,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }
        else
            ot = ApplyNonlinearFunction(
-		        m_net->Plus(
+                m_net->Plus(
                    m_net->Plus(
                        m_net->Plus(
                            m_net->Times(Wxo, input), 
@ -949,17 +949,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        {
            output = m_net->ElementTimes(ot, m_net->Tanh(ct));
        }
-		
+        
        delayHO->AttachInputs(output);
-		delayHI->AttachInputs(output);
-		delayHF->AttachInputs(output);
-		delayHC->AttachInputs(output);
-		delayCI->AttachInputs(ct);
-		delayCF->AttachInputs(ct);
-		delayCC->AttachInputs(ct);
-		
+        delayHI->AttachInputs(output);
+        delayHF->AttachInputs(output);
+        delayHC->AttachInputs(output);
+        delayCI->AttachInputs(ct);
+        delayCF->AttachInputs(ct);
+        delayCC->AttachInputs(ct);
+        
        if (m_addDropoutNodes)
-		    input = m_net->Dropout(output);
+            input = m_net->Dropout(output);
        else
            input = output;
        output = input;
@ -976,9 +976,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {

                size_t numHiddenLayers = m_layerSizes.size()-2;

-				size_t numRecurrentLayers = m_recurrentLayers.size(); 
+                size_t numRecurrentLayers = m_recurrentLayers.size(); 

-				ComputationNodePtr input=nullptr, w=nullptr, b=nullptr, u=nullptr, e=nullptr, delay = nullptr, output=nullptr, label=nullptr, prior=nullptr;
+                ComputationNodePtr input=nullptr, w=nullptr, b=nullptr, u=nullptr, e=nullptr, delay = nullptr, output=nullptr, label=nullptr, prior=nullptr;
                ComputationNodePtr Wxo = nullptr, Who=nullptr, Wco=nullptr, bo = nullptr, Wxi=nullptr, Whi=nullptr, Wci=nullptr, bi=nullptr;
                ComputationNodePtr Wxf=nullptr, Whf=nullptr, Wcf=nullptr, bf=nullptr, Wxc=nullptr, Whc=nullptr, bc=nullptr;
                ComputationNodePtr ot=nullptr, it=nullptr, ft=nullptr, ct=nullptr, ht=nullptr;
@ -1014,7 +1014,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {

                /// direct connect from input node to output node

-				int recur_idx = 0;
+                int recur_idx = 0;
                int offset = m_lookupTableOrder > 0? 1 : 0;
                if (numHiddenLayers > 0)
                {
@ -1024,22 +1024,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {

                    for (int i=1 + offset; i<numHiddenLayers; i++)
                    {
-						if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i)
-						{
+                        if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i)
+                        {
                            output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, mbSize, i, m_layerSizes[i], m_layerSizes[i+1], input);

                            recur_idx++;
-						}
-						else
-						{
+                        }
+                        else
+                        {
                            u = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"U%d", i), m_layerSizes[i+1], m_layerSizes[i]);
                            m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
                            b = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"B%d", i), m_layerSizes[i+1], 1);
-	                        output = ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), i);
-						}
+                            output = ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), i);
+                        }

-					    if (m_addDropoutNodes)
-						    input = m_net->Dropout(output);
+                        if (m_addDropoutNodes)
+                            input = m_net->Dropout(output);
                        else
                            input = output;

@ -1055,7 +1055,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                        input = output;
                }

-				w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], m_layerSizes[numHiddenLayers]);
+                w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], m_layerSizes[numHiddenLayers]);
                m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
                label = m_net->CreateInputNode(L"labels", m_layerSizes[numHiddenLayers+1], mbSize);
                AddTrainAndEvalCriterionNodes(input, label, w);
@ -1092,9 +1092,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {

                size_t numHiddenLayers = m_layerSizes.size()-2;

-				size_t numRecurrentLayers = m_recurrentLayers.size(); 
+                size_t numRecurrentLayers = m_recurrentLayers.size(); 

-				ComputationNodePtr input=nullptr, w=nullptr, b=nullptr, u=nullptr, e=nullptr, delay = nullptr, output=nullptr, label=nullptr, prior=nullptr;
+                ComputationNodePtr input=nullptr, w=nullptr, b=nullptr, u=nullptr, e=nullptr, delay = nullptr, output=nullptr, label=nullptr, prior=nullptr;
                ComputationNodePtr Wxo = nullptr, Who=nullptr, Wco=nullptr, bo = nullptr, Wxi=nullptr, Whi=nullptr, Wci=nullptr, bi=nullptr;
                ComputationNodePtr Wxf=nullptr, Whf=nullptr, Wcf=nullptr, bf=nullptr, Wxc=nullptr, Whc=nullptr, bc=nullptr;
                ComputationNodePtr ot=nullptr, it=nullptr, ft=nullptr, ct=nullptr, ht=nullptr;
@ -1130,7 +1130,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {

                /// direct connect from input node to output node

-				int recur_idx = 0;
+                int recur_idx = 0;
                int offset = m_lookupTableOrder > 0? 1 : 0;
                if (numHiddenLayers > 0)
                {
@ -1140,22 +1140,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {

                    for (int i=1 + offset; i<numHiddenLayers; i++)
                    {
-						if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i)
-						{
+                        if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i)
+                        {
                            output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, mbSize, i, m_layerSizes[i], m_layerSizes[i+1], input);

                            recur_idx++;
-						}
-						else
-						{
+                        }
+                        else
+                        {
                            u = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"U%d", i), m_layerSizes[i+1], m_layerSizes[i]);
                            m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
                            b = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"B%d", i), m_layerSizes[i+1], 1);
-	                        output = ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), i);
-						}
+                            output = ApplyNonlinearFunction(m_net->Plus(m_net->Times(u, input), b), i);
+                        }

-					    if (m_addDropoutNodes)
-						    input = m_net->Dropout(output);
+                        if (m_addDropoutNodes)
+                            input = m_net->Dropout(output);
                        else
                            input = output;

--- a/MachineLearning/cn/SimpleNetworkBuilder.h
+++ b/MachineLearning/cn/SimpleNetworkBuilder.h
@ -100,7 +100,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        virtual void InitRecurrentConfig(const ConfigParameters& config)
        {
            ConfigArray rLayerSizes = config("recurrentLayer", "");
-		    intargvector recurrentLayers = rLayerSizes;
+            intargvector recurrentLayers = rLayerSizes;
            m_recurrentLayers=recurrentLayers;
            m_defaultHiddenActivity = config("defaultHiddenActivity", "0.1");
            ConfigArray str_rnnType = config("rnnType", L"SIMPLENET");
@ -133,7 +133,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                m_rnnType= CLASSLSTM;
            if (std::find(strType.begin(), strType.end(), L"TENSORIOLSTM") != strType.end())
                m_rnnType= TENSORIOLSTM;
-		}
+        }

        // Init - Builder Initialize for multiple data sets
        // config - [in] configuration parameters for the network builder
--- a/MachineLearning/cn/SimpleOutputWriter.h
+++ b/MachineLearning/cn/SimpleOutputWriter.h
@ -35,7 +35,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {

        }

-		void WriteOutput(IDataReader<ElemType>& dataReader, size_t mbSize, IDataWriter<ElemType>& dataWriter, const std::vector<std::wstring>& outputNodeNames, size_t numOutputSamples=requestDataSize, bool doUnitTest = false)
+        void WriteOutput(IDataReader<ElemType>& dataReader, size_t mbSize, IDataWriter<ElemType>& dataWriter, const std::vector<std::wstring>& outputNodeNames, size_t numOutputSamples=requestDataSize, bool doUnitTest = false)
        {
            
            //specify output nodes and files
@ -67,8 +67,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            {
                inputMatrices[labelNodes[i]->NodeName()] = &labelNodes[i]->FunctionValues();                
            }
-			Matrix<ElemType> endOfFile =  Matrix<ElemType>(1,1);
-			endOfFile(0,0)=0;
+            Matrix<ElemType> endOfFile =  Matrix<ElemType>(1,1);
+            endOfFile(0,0)=0;

            //evaluate with minibatches
            dataReader.StartMinibatchLoop(mbSize, 0, numOutputSamples);
@ -85,12 +85,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                size_t actualMBSize = m_net.GetActualMBSize();
                m_net.SetActualMiniBatchSize(actualMBSize);
                m_net.SetActualNbrSlicesInEachRecIter(dataReader.NumberSlicesInEachRecurrentIter());
-				dataReader.SetSentenceEndInBatch(m_net.m_sentenceEnd);
+                dataReader.SetSentenceEndInBatch(m_net.m_sentenceEnd);

-	            for (int i=0; i<outputNodes.size(); i++)
+                for (int i=0; i<outputNodes.size(); i++)
                {
                    m_net.Evaluate(outputNodes[i]);
-					outputMatrices[outputNodes[i]->NodeName()] = (void *)(&outputNodes[i]->FunctionValues());
+                    outputMatrices[outputNodes[i]->NodeName()] = (void *)(&outputNodes[i]->FunctionValues());
                }

                if (doUnitTest) 
@ -121,7 +121,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            //clean up
            
        }
-		
+        

        void WriteOutput(IDataReader<ElemType>& dataReader, size_t mbSize, std::wstring outputPath, const std::vector<std::wstring>& outputNodeNames, size_t numOutputSamples=requestDataSize)
        {
@ -173,12 +173,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {

                size_t actualMBSize = m_net.GetActualMBSize();
                m_net.SetActualMiniBatchSize(actualMBSize);
-				dataReader.SetSentenceEndInBatch(m_net.m_sentenceEnd);
+                dataReader.SetSentenceEndInBatch(m_net.m_sentenceEnd);

                for (int i=0; i<outputNodes.size(); i++)
                {
                    m_net.Evaluate(outputNodes[i]);
-					
+                    
                    Matrix<ElemType> & outputValues = outputNodes[i]->FunctionValues();
                    ofstream & outputStream = *outputStreams[i];
                    outputValues.CopyToArray(tempArray, tempArraySize);
--- a/MachineLearning/cn/SynchronousExecutionEngine.h
+++ b/MachineLearning/cn/SynchronousExecutionEngine.h
@ -31,8 +31,8 @@ public:
    {
        // constants don't need to be evaluated, they just translate into numbers...
        if (node->GetType() == ndlTypeConstant 
-			|| node->GetType() == ndlTypeArray)
-			return;
+            || node->GetType() == ndlTypeArray)
+            return;

        // setup the node parameters, where they start in the parameter list, and how many there are
        // this is needed for the ndlPassResolve step to hookup all the inputs
@ -53,15 +53,15 @@ public:

        ComputationNodePtr nodePtr = nullptr;

-		// get the node pointer for the node, should be stored in the EvalValue;
-		if (pass > ndlPassInitial) 
-		{
-			nodePtr = (ComputationNodePtr)node->GetEvalValue();
-			if (nodePtr == nullptr)
-			{
-				nodePtr = (ComputationNodePtr)m_net.GetNodeFromName(name);
-				node->SetEvalValue(nodePtr);
-			}
+        // get the node pointer for the node, should be stored in the EvalValue;
+        if (pass > ndlPassInitial) 
+        {
+            nodePtr = (ComputationNodePtr)node->GetEvalValue();
+            if (nodePtr == nullptr)
+            {
+                nodePtr = (ComputationNodePtr)m_net.GetNodeFromName(name);
+                node->SetEvalValue(nodePtr);
+            }
        }
        
        if (InputValue<ElemType>::TypeName() == cnNodeType)
@ -90,10 +90,10 @@ public:

            if (pass == ndlPassInitial)
            {
-				// evaluate only scalar parameters
-				vector<void*> params = EvaluateParameters(node, baseName, 0, parameter.size(), pass);
-				size_t rows = ((NDLNode<ElemType>*)params[0])->GetScalar();
-				size_t cols = params.size() > 1 ? ((NDLNode<ElemType>*)params[1])->GetScalar() : 1;
+                // evaluate only scalar parameters
+                vector<void*> params = EvaluateParameters(node, baseName, 0, parameter.size(), pass);
+                size_t rows = ((NDLNode<ElemType>*)params[0])->GetScalar();
+                size_t cols = params.size() > 1 ? ((NDLNode<ElemType>*)params[1])->GetScalar() : 1;

                // first look for this node already existing in the network
                if (m_net.NodeNameExist(name))
@ -109,12 +109,12 @@ public:

            if (pass == ndlPassInitial)
            {
-				// evaluate only scalar parameters
-				vector<void*> params = EvaluateParameters(node, baseName, 0, parameter.size(), pass);
-				size_t imageWidth = ((NDLNode<ElemType>*)params[0])->GetScalar();
-				size_t imageHeight = ((NDLNode<ElemType>*)params[1])->GetScalar();
-				size_t imageChannels = ((NDLNode<ElemType>*)params[2])->GetScalar();
-				size_t numImages = parameter.size() > 3 ? ((NDLNode<ElemType>*)params[3])->GetScalar() : 1;
+                // evaluate only scalar parameters
+                vector<void*> params = EvaluateParameters(node, baseName, 0, parameter.size(), pass);
+                size_t imageWidth = ((NDLNode<ElemType>*)params[0])->GetScalar();
+                size_t imageHeight = ((NDLNode<ElemType>*)params[1])->GetScalar();
+                size_t imageChannels = ((NDLNode<ElemType>*)params[2])->GetScalar();
+                size_t numImages = parameter.size() > 3 ? ((NDLNode<ElemType>*)params[3])->GetScalar() : 1;

                nodePtr = m_net.CreateInputNode(name, imageWidth, imageHeight, imageChannels, numImages);
            }
@ -126,10 +126,10 @@ public:

            if (pass == ndlPassInitial)
            {
-				// evaluate only scalar parameters
-				vector<void*> params = EvaluateParameters(node, baseName, 0, parameter.size(), pass);
-				size_t rows = ((NDLNode<ElemType>*)params[0])->GetScalar();
-				size_t cols = params.size() > 1 ? ((NDLNode<ElemType>*)params[1])->GetScalar() : 1;
+                // evaluate only scalar parameters
+                vector<void*> params = EvaluateParameters(node, baseName, 0, parameter.size(), pass);
+                size_t rows = ((NDLNode<ElemType>*)params[0])->GetScalar();
+                size_t cols = params.size() > 1 ? ((NDLNode<ElemType>*)params[1])->GetScalar() : 1;

                bool needGradient = node->GetOptionalParameter("needGradient", "true");

@ -234,50 +234,50 @@ public:
                nodePtr->FunctionValues().SetValue(val);
            }
        }
-		else if (cnNodeType == RowSliceNode<ElemType>::TypeName())
-		{
+        else if (cnNodeType == RowSliceNode<ElemType>::TypeName())
+        {

            // setup the parameter position of children so we can hook them up later
            nodeParamCount = 1;
            // parameters are (rows, [cols], inputNode)
            nodeParamStart = parameter.size() > 2?2:1;
-			if (pass == ndlPassInitial)
-			{
-				// evaluate only scalar parameters
-				vector<void*> params = EvaluateParameters(node, baseName, 0, parameter.size(), pass);
-				size_t start_index = ((NDLNode<ElemType>*)params[0])->GetScalar();
-				size_t num_rows = ((NDLNode<ElemType>*)params[1])->GetScalar();
+            if (pass == ndlPassInitial)
+            {
+                // evaluate only scalar parameters
+                vector<void*> params = EvaluateParameters(node, baseName, 0, parameter.size(), pass);
+                size_t start_index = ((NDLNode<ElemType>*)params[0])->GetScalar();
+                size_t num_rows = ((NDLNode<ElemType>*)params[1])->GetScalar();

-				bool needGradient = node->GetOptionalParameter("needGradient", "false");
-				nodePtr = m_net.RowSlice(NULL, start_index, num_rows, name);
-				nodePtr->NeedGradient() = needGradient;
+                bool needGradient = node->GetOptionalParameter("needGradient", "false");
+                nodePtr = m_net.RowSlice(NULL, start_index, num_rows, name);
+                nodePtr->NeedGradient() = needGradient;

-			}
-		}
-		else if (cnNodeType == DelayNode<ElemType>::TypeName())
-		{
+            }
+        }
+        else if (cnNodeType == DelayNode<ElemType>::TypeName())
+        {
            // setup the parameter position of children so we can hook them up later
            nodeParamCount = 1;
            // parameters are (rows, [cols], delayNode)
            nodeParamStart = parameter.size() > 2?2:1;

-			if (pass == ndlPassInitial)
-			{
-				// evaluate only scalar parameters
-				vector<void*> params = EvaluateParameters(node, baseName, 0, parameter.size(), pass);
-				size_t rows = ((NDLNode<ElemType>*)params[0])->GetScalar();
-				// if we have three parameters the second is columns
-				size_t cols = parameter.size() > 2 ? ((NDLNode<ElemType>*)params[1])->GetScalar() : 1;
+            if (pass == ndlPassInitial)
+            {
+                // evaluate only scalar parameters
+                vector<void*> params = EvaluateParameters(node, baseName, 0, parameter.size(), pass);
+                size_t rows = ((NDLNode<ElemType>*)params[0])->GetScalar();
+                // if we have three parameters the second is columns
+                size_t cols = parameter.size() > 2 ? ((NDLNode<ElemType>*)params[1])->GetScalar() : 1;

-				bool needGradient = node->GetOptionalParameter("needGradient", "false");
-				float defaultHiddenActivity = node->GetOptionalParameter("defaultHiddenActivity", "0.1");
+                bool needGradient = node->GetOptionalParameter("needGradient", "false");
+                float defaultHiddenActivity = node->GetOptionalParameter("defaultHiddenActivity", "0.1");
                nodePtr = m_net.Delay(NULL, defaultHiddenActivity, rows, cols, name);
-				size_t delayTime = node->GetOptionalParameter("delayTime","1");
-				((DelayNode<ElemType>*)nodePtr)->SetDelay(delayTime);
+                size_t delayTime = node->GetOptionalParameter("delayTime","1");
+                ((DelayNode<ElemType>*)nodePtr)->SetDelay(delayTime);

                nodePtr->NeedGradient() = needGradient;
-			}
-		}	
+            }
+        }    
        else if (cnNodeType == ConvolutionNode<ElemType>::TypeName())
        {
            if (parameter.size() != 7)
@ -291,14 +291,14 @@ public:
            {
                int id = 2; // skip weightNode and inputValueNode

-				// evaluate only scalar parameters
-				vector<void*> params = EvaluateParameters(node, baseName, id, parameter.size()-id, pass);
-				id = 0; // reset counter because the params array starts at zero
-				size_t kernelWidth = ((NDLNode<ElemType>*)params[id++])->GetScalar();
-				size_t kernelHeight = ((NDLNode<ElemType>*)params[id++])->GetScalar();
-				size_t outputChannels = ((NDLNode<ElemType>*)params[id++])->GetScalar();
-				size_t horizontalSubsample = ((NDLNode<ElemType>*)params[id++])->GetScalar();
-				size_t verticalSubsample = ((NDLNode<ElemType>*)params[id++])->GetScalar();
+                // evaluate only scalar parameters
+                vector<void*> params = EvaluateParameters(node, baseName, id, parameter.size()-id, pass);
+                id = 0; // reset counter because the params array starts at zero
+                size_t kernelWidth = ((NDLNode<ElemType>*)params[id++])->GetScalar();
+                size_t kernelHeight = ((NDLNode<ElemType>*)params[id++])->GetScalar();
+                size_t outputChannels = ((NDLNode<ElemType>*)params[id++])->GetScalar();
+                size_t horizontalSubsample = ((NDLNode<ElemType>*)params[id++])->GetScalar();
+                size_t verticalSubsample = ((NDLNode<ElemType>*)params[id++])->GetScalar();
            
                assert (id == 5);

@ -324,13 +324,13 @@ public:
            {
                int id = 1; // skip inputValueNode

-				// evaluate only scalar parameters
-				vector<void*> params = EvaluateParameters(node, baseName, id, parameter.size() - id, pass);
-				id = 0; // reset counter because the params array starts at zero
-				size_t windowWidth = ((NDLNode<ElemType>*)params[id++])->GetScalar();
-				size_t windowHeight = ((NDLNode<ElemType>*)params[id++])->GetScalar();
-				size_t horizontalSubsample = ((NDLNode<ElemType>*)params[id++])->GetScalar();
-				size_t verticalSubsample = ((NDLNode<ElemType>*)params[id++])->GetScalar();
+                // evaluate only scalar parameters
+                vector<void*> params = EvaluateParameters(node, baseName, id, parameter.size() - id, pass);
+                id = 0; // reset counter because the params array starts at zero
+                size_t windowWidth = ((NDLNode<ElemType>*)params[id++])->GetScalar();
+                size_t windowHeight = ((NDLNode<ElemType>*)params[id++])->GetScalar();
+                size_t horizontalSubsample = ((NDLNode<ElemType>*)params[id++])->GetScalar();
+                size_t verticalSubsample = ((NDLNode<ElemType>*)params[id++])->GetScalar();
            
                assert (id == 4);

@ -351,13 +351,13 @@ public:
            {
                int id = 1; // skip inputValueNode

-				// evaluate only scalar parameters
-				vector<void*> params = EvaluateParameters(node, baseName, id, parameter.size() - id, pass);
-				id = 0; // reset counter because the params array starts at zero
-				size_t windowWidth = ((NDLNode<ElemType>*)params[id++])->GetScalar();
-				size_t windowHeight = ((NDLNode<ElemType>*)params[id++])->GetScalar();
-				size_t horizontalSubsample = ((NDLNode<ElemType>*)params[id++])->GetScalar();
-				size_t verticalSubsample = ((NDLNode<ElemType>*)params[id++])->GetScalar();
+                // evaluate only scalar parameters
+                vector<void*> params = EvaluateParameters(node, baseName, id, parameter.size() - id, pass);
+                id = 0; // reset counter because the params array starts at zero
+                size_t windowWidth = ((NDLNode<ElemType>*)params[id++])->GetScalar();
+                size_t windowHeight = ((NDLNode<ElemType>*)params[id++])->GetScalar();
+                size_t horizontalSubsample = ((NDLNode<ElemType>*)params[id++])->GetScalar();
+                size_t verticalSubsample = ((NDLNode<ElemType>*)params[id++])->GetScalar();

                assert (id == 4);

@ -409,8 +409,8 @@ public:
                break;
            }

-    		// process common optional parameters (like "tag");
-		    ProcessOptionalParameters(node);
+            // process common optional parameters (like "tag");
+            ProcessOptionalParameters(node);
            break;
            }
        case ndlPassFinal:
@ -419,96 +419,96 @@ public:
    }

 #ifdef LATER
-	// EvaluateDotName - Evaluate a dot name and resolve to target node
-	// node - NDLNode of the script
-	// nodeParam - NDLNode parameter we are evaluating
-	// baseName - name of the base node
-	// pass - which pass through the NDL nodes
-	// returns: the node that is the evaluated parameter
-	virtual NDLNode<ElemType>* EvaluateDotName(NDLNode<ElemType>* node, NDLNode<ElemType>* nodeParam, const std::wstring& baseNameP, const NDLPass pass)
+    // EvaluateDotName - Evaluate a dot name and resolve to target node
+    // node - NDLNode of the script
+    // nodeParam - NDLNode parameter we are evaluating
+    // baseName - name of the base node
+    // pass - which pass through the NDL nodes
+    // returns: the node that is the evaluated parameter
+    virtual NDLNode<ElemType>* EvaluateDotName(NDLNode<ElemType>* node, NDLNode<ElemType>* nodeParam, const std::wstring& baseNameP, const NDLPass pass)

-	{
-		if (pass > ndlPassInitial && evaluateNode)
-		{
-			std::string name = nodeParam->GetName();
-			std::wstring wname = msra::strfun::utf16(name);
-			if (nodeParam->GetType() == ndlTypeDotParameter)
-			{
-				// When we see a variable of the form "A.B" in a macro, we need to resolve it to an actual node, by first constructing it's
-				// fully-qualified name. There are 2 possibilities: 
-				// 1) "A" was defined locally within the macro.  In this case, we must find the fully-qualified name of the node that this macro
-				//    call is being assigned to (eg, "C" in the example "C=Macro(X)"), and concatenate it's name with "A.B" (eg, "C.A.B").
-				// 2) "A" was passed in as a parameter to a macro.  In this case, we must find the fully-qualified name of the node that
-				//    was passed in as "A", and replace the "A" and "A.B" with this name.
+    {
+        if (pass > ndlPassInitial && evaluateNode)
+        {
+            std::string name = nodeParam->GetName();
+            std::wstring wname = msra::strfun::utf16(name);
+            if (nodeParam->GetType() == ndlTypeDotParameter)
+            {
+                // When we see a variable of the form "A.B" in a macro, we need to resolve it to an actual node, by first constructing it's
+                // fully-qualified name. There are 2 possibilities: 
+                // 1) "A" was defined locally within the macro.  In this case, we must find the fully-qualified name of the node that this macro
+                //    call is being assigned to (eg, "C" in the example "C=Macro(X)"), and concatenate it's name with "A.B" (eg, "C.A.B").
+                // 2) "A" was passed in as a parameter to a macro.  In this case, we must find the fully-qualified name of the node that
+                //    was passed in as "A", and replace the "A" and "A.B" with this name.

-				// Consider the following example:
-				// NdlBLob=[
-				//      P=MacroCall1(...)
-				//      C=MacroCall2(P) 
-				// ]
-				// # MacroDefinition
-				// MacroCall2(X)
-				// { 
-				//      A=MacroCall3(...)
-				//      D=Times(A.B,X.B)}
-				// }
-				// 
+                // Consider the following example:
+                // NdlBLob=[
+                //      P=MacroCall1(...)
+                //      C=MacroCall2(P) 
+                // ]
+                // # MacroDefinition
+                // MacroCall2(X)
+                // { 
+                //      A=MacroCall3(...)
+                //      D=Times(A.B,X.B)}
+                // }
+                // 

-				// In this example, in the call D=Times(A.B,X.B), we need to resolve A.B and X.B appropriately.
-				// Specifically, "A.B" must be resolved to the fully qualified name "C.A.B", whereas "X.B" must be resolved to the fully qualified name "P.B".
-				// We then use this fully-qualified name to look up this node in the model (using "m_net.GetNodeFromName").
+                // In this example, in the call D=Times(A.B,X.B), we need to resolve A.B and X.B appropriately.
+                // Specifically, "A.B" must be resolved to the fully qualified name "C.A.B", whereas "X.B" must be resolved to the fully qualified name "P.B".
+                // We then use this fully-qualified name to look up this node in the model (using "m_net.GetNodeFromName").

-				std::size_t firstDotPos = name.find_first_of(".");
-				if (firstDotPos == std::string::npos)
-				{
-					LogicError("nodeParam of type \"ndlTypeDotParameter\" doesn't have a dot in its name: %s", name.c_str());
-				}
+                std::size_t firstDotPos = name.find_first_of(".");
+                if (firstDotPos == std::string::npos)
+                {
+                    LogicError("nodeParam of type \"ndlTypeDotParameter\" doesn't have a dot in its name: %s", name.c_str());
+                }

-				std::string nameBeforeDot = name.substr(0, firstDotPos);
-				std::string nameAfterDot = name.substr(firstDotPos + 1, name.size() - (firstDotPos + 1));
+                std::string nameBeforeDot = name.substr(0, firstDotPos);
+                std::string nameAfterDot = name.substr(firstDotPos + 1, name.size() - (firstDotPos + 1));

-				// look up if "nameBeforeDot" was a parameter to the macro.
-				NDLNode<ElemType>* resolvedParam = nodeParam->GetParentScript()->FindSymbol(nameBeforeDot);
-				if (resolvedParam != nullptr && resolvedParam->GetType() == ndlTypeMacroCall)
-				{
-					// if "nameBeforeDot" was a parameter to the macro, builds it's fully qualified name by
-					// replacing "nameBeforeDot" with the fully qualified name of the node passed in as the parameter.
-					NDLScript<ElemType>* parentScript = resolvedParam->GetParentScript();
-					baseName = parentScript->GetBaseName();
-					std::wstring resolvedParamName = msra::strfun::utf16(resolvedParam->GetName());
-					wname = baseName.empty() ?
-						resolvedParamName + L"." + msra::strfun::utf16(nameAfterDot) :
-						baseName + L"." + resolvedParamName + L"." + msra::strfun::utf16(nameAfterDot);
-				}
-				else if (!baseName.empty())
-				{
-					// else, "nameBeforeDot" wasn't a parameter to the macro, so treat it as a local variable.
-					wname = baseName + L"." + wname;
-				}
-			}
-			else if (!baseName.empty())
-			{
-				wname = baseName + L"." + wname;
-			}
+                // look up if "nameBeforeDot" was a parameter to the macro.
+                NDLNode<ElemType>* resolvedParam = nodeParam->GetParentScript()->FindSymbol(nameBeforeDot);
+                if (resolvedParam != nullptr && resolvedParam->GetType() == ndlTypeMacroCall)
+                {
+                    // if "nameBeforeDot" was a parameter to the macro, builds it's fully qualified name by
+                    // replacing "nameBeforeDot" with the fully qualified name of the node passed in as the parameter.
+                    NDLScript<ElemType>* parentScript = resolvedParam->GetParentScript();
+                    baseName = parentScript->GetBaseName();
+                    std::wstring resolvedParamName = msra::strfun::utf16(resolvedParam->GetName());
+                    wname = baseName.empty() ?
+                        resolvedParamName + L"." + msra::strfun::utf16(nameAfterDot) :
+                        baseName + L"." + resolvedParamName + L"." + msra::strfun::utf16(nameAfterDot);
+                }
+                else if (!baseName.empty())
+                {
+                    // else, "nameBeforeDot" wasn't a parameter to the macro, so treat it as a local variable.
+                    wname = baseName + L"." + wname;
+                }
+            }
+            else if (!baseName.empty())
+            {
+                wname = baseName + L"." + wname;
+            }

-			// fully qualified names can be looked up in the model
-			if (m_net.NodeNameExist(wname))
-			{
-				void* np = (void*)m_net.GetNodeFromName(wname);
-				nodeParam->SetEvalValue(np);
-			}
-			// NOTE: there is a bug here, we allow an abbreviated node reference (i.e. L1.BFF) based on return values in NDL 
-			// when the actual full node reference that the computational network uses would be L1.BFF.FF.P, so that is what CN sees
-			// can we do the normal find symbol here to allow abbreviated node references?
+            // fully qualified names can be looked up in the model
+            if (m_net.NodeNameExist(wname))
+            {
+                void* np = (void*)m_net.GetNodeFromName(wname);
+                nodeParam->SetEvalValue(np);
+            }
+            // NOTE: there is a bug here, we allow an abbreviated node reference (i.e. L1.BFF) based on return values in NDL 
+            // when the actual full node reference that the computational network uses would be L1.BFF.FF.P, so that is what CN sees
+            // can we do the normal find symbol here to allow abbreviated node references?

-			// if we still didn't get a value, throw an error
-			if (nodeParam->GetEvalValue() == nullptr)
-			{
-				LogicError("Dot name could not be resolved '%s': should have a node named '%ls' in computational network\n", nodeParam->GetName().c_str(), name.c_str());
-			}
-		}
-		return nodeParam;
-	}
+            // if we still didn't get a value, throw an error
+            if (nodeParam->GetEvalValue() == nullptr)
+            {
+                LogicError("Dot name could not be resolved '%s': should have a node named '%ls' in computational network\n", nodeParam->GetName().c_str(), name.c_str());
+            }
+        }
+        return nodeParam;
+    }
 #endif

    // EvaluateParameter - Evaluate a parameter of a call
@ -534,46 +534,46 @@ public:
        {
        // if the node is a parameter then look it up in the symbol table
        case ndlTypeUndetermined: // an undetermined parameter needs to be looked up again in the symbol table
-		case ndlTypeParameter:
-		{
-			// lookup the parameter
-			NDLNode<ElemType>* nodeResolve = script->FindSymbol(nodeParam->GetName());
+        case ndlTypeParameter:
+        {
+            // lookup the parameter
+            NDLNode<ElemType>* nodeResolve = script->FindSymbol(nodeParam->GetName());

-			// if we have resolved the name, no need to continue evaluation
-			if (!(pass == ndlPassResolve && nodeResolve && nodeParam->GetEvalValue() == nullptr))
-			{
-				break;
-			}
-			if (pass > ndlPassInitial && evaluateNode && nodeResolve)
-			{
-				std::string name = nodeResolve->GetName();
-				// we need to start from the parent script, because that is the namespace of the parameter being passed in
-				NDLScript<ElemType>* parentScript = nodeResolve->GetParentScript();
-				nodeResolve = parentScript->FindSymbol(name, true);
+            // if we have resolved the name, no need to continue evaluation
+            if (!(pass == ndlPassResolve && nodeResolve && nodeParam->GetEvalValue() == nullptr))
+            {
+                break;
+            }
+            if (pass > ndlPassInitial && evaluateNode && nodeResolve)
+            {
+                std::string name = nodeResolve->GetName();
+                // we need to start from the parent script, because that is the namespace of the parameter being passed in
+                NDLScript<ElemType>* parentScript = nodeResolve->GetParentScript();
+                nodeResolve = parentScript->FindSymbol(name, true);

-				// if we still didn't get a value
-				if (nodeResolve == nullptr || nodeResolve->GetEvalValue() == nullptr)
-				{
-					// check for the fully quantified name in the computation network
-					// this is needed for MEL processing, since CN nodes names can be used as parameters in MEL
-					std::wstring wname = msra::strfun::utf16(name);
-					if (m_net.NodeNameExist(wname))
-					{
-						void* np = (void*)m_net.GetNodeFromName(wname);
-						// if we don't have a resolve node, it's because the name didn't exist in NDL
-						if (!nodeResolve)
-							nodeResolve = nodeParam;
-						nodeResolve->SetEvalValue(np);
-					}
-					else
-					{
-						RuntimeError("Parameter name could not be resolved '%s'\n", name.c_str());
-					}
-				}
-			}
-			nodeParam = nodeResolve;
-			break;
-		}
+                // if we still didn't get a value
+                if (nodeResolve == nullptr || nodeResolve->GetEvalValue() == nullptr)
+                {
+                    // check for the fully quantified name in the computation network
+                    // this is needed for MEL processing, since CN nodes names can be used as parameters in MEL
+                    std::wstring wname = msra::strfun::utf16(name);
+                    if (m_net.NodeNameExist(wname))
+                    {
+                        void* np = (void*)m_net.GetNodeFromName(wname);
+                        // if we don't have a resolve node, it's because the name didn't exist in NDL
+                        if (!nodeResolve)
+                            nodeResolve = nodeParam;
+                        nodeResolve->SetEvalValue(np);
+                    }
+                    else
+                    {
+                        RuntimeError("Parameter name could not be resolved '%s'\n", name.c_str());
+                    }
+                }
+            }
+            nodeParam = nodeResolve;
+            break;
+        }
        case ndlTypeFunction:
            if (evaluateNode)
                Evaluate(nodeParam, baseName, pass);
@ -635,10 +635,10 @@ public:
                assert(np != nullptr);
                inputs.push_back((void*)np);
            }
-			else if (pass == ndlPassInitial) // for initial pass we are only interested in resolved nodes (to get constant values)
-			{
-				inputs.push_back((void*)nodeResult);
-			}
+            else if (pass == ndlPassInitial) // for initial pass we are only interested in resolved nodes (to get constant values)
+            {
+                inputs.push_back((void*)nodeResult);
+            }
            // NOTE: in final pass inputs are always NULL
        }

@ -649,11 +649,11 @@ public:
    // ProcessOptionalParameters - Process the optional parameters of a node
    virtual void ProcessOptionalParameters(NDLNode<ElemType>* node)
    {
-		vector<NDLNode<ElemType>*> params = node->GetParameters(true); // get all the optional parameters only
-		ComputationNode<ElemType>* compNode = (ComputationNode<ElemType>*)node->GetEvalValue();
+        vector<NDLNode<ElemType>*> params = node->GetParameters(true); // get all the optional parameters only
+        ComputationNode<ElemType>* compNode = (ComputationNode<ElemType>*)node->GetEvalValue();
        std::string empty;

-		// loop through all the optional parameters processing them as necessary
+        // loop through all the optional parameters processing them as necessary
        for (NDLNode<ElemType>* param : params)
        {
            // make sure it's a "tag" optional parameter, that's all we process currently
--- a/MachineLearning/cn/TrainingCriterionNode.h
+++ b/MachineLearning/cn/TrainingCriterionNode.h
@ -344,9 +344,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        static void WINAPI EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues0, const Matrix<ElemType>& inputFunctionValues1, 
            Matrix<ElemType>& softmaxOfRight, Matrix<ElemType>& logSoftmaxOfRight)  
        {
-			logSoftmaxOfRight.AssignLogSoftmaxOf(inputFunctionValues1, true);
-			softmaxOfRight.SetValue(logSoftmaxOfRight);
-			softmaxOfRight.InplaceExp();
+            logSoftmaxOfRight.AssignLogSoftmaxOf(inputFunctionValues1, true);
+            softmaxOfRight.SetValue(logSoftmaxOfRight);
+            softmaxOfRight.InplaceExp();
            functionValues.AssignInnerProductOfMatrices(inputFunctionValues0, logSoftmaxOfRight);
            functionValues*=(-1);
 #if NANCHECK
--- a/MachineLearning/cn/cn.cpp
+++ b/MachineLearning/cn/cn.cpp
@ -6,7 +6,7 @@
 // cn.cpp : Defines the entry point for the console application.
 //

-#define _CRT_NONSTDC_NO_DEPRECATE   // make VS accept POSIX functions without _
+#define _CRT_NONSTDC_NO_DEPRECATE   // make VS accept POSIX functions without _

 #include "stdafx.h"
 #include "ComputationNetwork.h"
@ -279,19 +279,19 @@ void DoWriteOutput(const ConfigParameters& config)

    SimpleOutputWriter<ElemType> writer(net, 1);

-	if (config.Exists("writer"))
-	{
-		ConfigParameters writerConfig (config("writer"));
+    if (config.Exists("writer"))
+    {
+        ConfigParameters writerConfig (config("writer"));
        bool bWriterUnittest = writerConfig("unittest","false");
-		DataWriter<ElemType> testDataWriter(writerConfig);
-		writer.WriteOutput(testDataReader,mbSize[0], testDataWriter, outputNodeNamesVector, epochSize, bWriterUnittest);
-	}
-	else if (config.Exists("outputPath"))
-	{
-		wstring outputPath = config("outputPath"); // crashes if no default given? 
-		writer.WriteOutput(testDataReader, mbSize[0], outputPath, outputNodeNamesVector, epochSize);
-	}
-	//writer.WriteOutput(testDataReader, mbSize[0], testDataWriter, outputNodeNamesVector, epochSize);
+        DataWriter<ElemType> testDataWriter(writerConfig);
+        writer.WriteOutput(testDataReader,mbSize[0], testDataWriter, outputNodeNamesVector, epochSize, bWriterUnittest);
+    }
+    else if (config.Exists("outputPath"))
+    {
+        wstring outputPath = config("outputPath"); // crashes if no default given? 
+        writer.WriteOutput(testDataReader, mbSize[0], outputPath, outputNodeNamesVector, epochSize);
+    }
+    //writer.WriteOutput(testDataReader, mbSize[0], testDataWriter, outputNodeNamesVector, epochSize);
 }

 namespace Microsoft { namespace MSR { namespace CNTK {
@ -538,7 +538,7 @@ void DoCommand(const ConfigParameters& config)
                DoCreateLabelMap<ElemType>(commandParams);
            else
                RuntimeError("unknown action: %s  in command set: %s", action[j].c_str(), command[i].c_str());
-			    
+                
            NDLScript<ElemType> ndlScript;
            ndlScript.ClearGlobal(); // clear global macros between commands
        }
@ -554,7 +554,7 @@ std::string TimeDateStamp()
    struct tm now;
    _localtime64_s (&now, &localtime);  // convert
 #else
-    time_t t = time(NULL);
+    time_t t = time(NULL);
    struct tm now = *localtime(&t);
 #endif
    char buf[30];
--- a/MachineLearning/cn/modelEditorFromScratch.txt
+++ b/MachineLearning/cn/modelEditorFromScratch.txt
@ -5,10 +5,10 @@ m1=[
    HDim=256
    LDim=10

-	macro(test)
-	{
-		local=test
-	}
+    macro(test)
+    {
+        local=test
+    }

    features=Input(SDim, tag=feature)
    labels=Input(LDim, tag=label)
--- a/Math/CNTKMathTest/CPUMatrixUnitTests.cpp
+++ b/Math/CNTKMathTest/CPUMatrixUnitTests.cpp
@ -275,14 +275,14 @@ namespace CNTKMathTest
           
            M3.SetValue(M0);
            M3.InplaceLogSoftmax(true);
-			M3.InplaceExp();
+            M3.InplaceExp();
            M2(0,0) = 0.0474; M2(0,1) = 0.0474; M2(0,2) = 0.0474;
            M2(1,0) = 0.9526; M2(1,1) = 0.9526; M2(1,2) = 0.9526;
            Assert::IsTrue(M3.IsEqualTo(M2, 0.0001)); 
           
            M3.SetValue(M0);
            M3.InplaceLogSoftmax(false);
-			M3.InplaceExp();
+            M3.InplaceExp();
            M2(0,0) = 0.0900; M2(0,1) = 0.2447; M2(0,2) = 0.6652;
            M2(1,0) = 0.0900; M2(1,1) = 0.2447; M2(1,2) = 0.6652;
            Assert::IsTrue(M3.IsEqualTo(M2, 0.0001)); 
--- a/Math/CNTKMathTest/GPUMatrixUnitTests.cpp
+++ b/Math/CNTKMathTest/GPUMatrixUnitTests.cpp
@ -278,8 +278,8 @@ namespace CNTKMathTest
            Assert::IsTrue(M2.IsEqualTo(M3, 0.0001f)); 
        }

-		TEST_METHOD(GPUMatrixRowSlice)
-		{
+        TEST_METHOD(GPUMatrixRowSlice)
+        {
            float *fArray = new float[15];
            fArray[0] = 1; fArray[5] = 6; fArray[10] = 11;
            fArray[1] = 2; fArray[6] = 7; fArray[11] = 12;
@ -308,7 +308,7 @@ namespace CNTKMathTest
            M3 += M0;
            M0.AddToRowSliceValuesOf(M1, 2,2);
            Assert::IsTrue(M3.IsEqualTo(M0, 0.0001)); 
-		}
+        }

        TEST_METHOD(GPUKhatriRaoProduct)
        {
--- a/Math/CNTKMathTest/MatrixUnitTests.cpp
+++ b/Math/CNTKMathTest/MatrixUnitTests.cpp
@ -468,14 +468,14 @@ namespace CNTKMathTest
           
            M3.SetValue(M0);
            M3.InplaceLogSoftmax(true);
-			M3.InplaceExp();
+            M3.InplaceExp();
            M2(0,0) = 0.0474; M2(0,1) = 0.0474; M2(0,2) = 0.0474;
            M2(1,0) = 0.9526; M2(1,1) = 0.9526; M2(1,2) = 0.9526;
            Assert::IsTrue(M3.IsEqualTo(M2, 0.0001)); 
           
            M3.SetValue(M0);
            M3.InplaceLogSoftmax(false);
-			M3.InplaceExp();
+            M3.InplaceExp();
            M2(0,0) = 0.0900; M2(0,1) = 0.2447; M2(0,2) = 0.6652;
            M2(1,0) = 0.0900; M2(1,1) = 0.2447; M2(1,2) = 0.6652;
            Assert::IsTrue(M3.IsEqualTo(M2, 0.0001)); 
--- a/Math/Math/CPUMatrix.cpp
+++ b/Math/Math/CPUMatrix.cpp
@ -19,7 +19,7 @@
 #include <chrono>
 #include <exception>

-#ifdef	 _WIN32
+#ifdef     _WIN32
 #include <Windows.h>
 #else
 #ifndef max
@ -646,7 +646,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    #ifndef USE_MKL
                            dcopy((int)numRows, reinterpret_cast <double*>(pArray+j), (int)numCols, reinterpret_cast <double*>(m_pArray + LocateColumn(j)), 1);
    #else
-						    cblas_dcopy ((int)numRows, reinterpret_cast <double*>(pArray+j), (int)numCols, reinterpret_cast <double*>(m_pArray + LocateColumn(j)), 1);
+                            cblas_dcopy ((int)numRows, reinterpret_cast <double*>(pArray+j), (int)numCols, reinterpret_cast <double*>(m_pArray + LocateColumn(j)), 1);
    #endif
                        }
                    }
@ -660,7 +660,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    #ifndef USE_MKL
                                scopy((int)numRows, reinterpret_cast <float*>(pArray+j), (int)numCols, reinterpret_cast <float*>(m_pArray + LocateColumn(j)), 1);
    #else
-							    cblas_scopy ((int)numRows, reinterpret_cast <float*>(pArray+j), (int)numCols, reinterpret_cast <float*>(m_pArray + LocateColumn(j)), 1);
+                                cblas_scopy ((int)numRows, reinterpret_cast <float*>(pArray+j), (int)numCols, reinterpret_cast <float*>(m_pArray + LocateColumn(j)), 1);
    #endif
                            }
                        }
@ -761,7 +761,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        if (IsEmpty())
            throw std::logic_error("SetUniformRandomValue: Matrix is empty.");

-#ifdef _MSC_VER	// TODO: check if available under GCC/Linux
+#ifdef _MSC_VER    // TODO: check if available under GCC/Linux
        std::ranlux64_base_01 generator;   
        generator.seed(seed==USE_TIME_BASED_SEED ? (unsigned long) time(NULL) : seed);
 #else
@ -796,7 +796,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            throw std::logic_error("SetUniformRandomValue: Matrix is empty.");

        auto& us = *this;
-#ifdef _MSC_VER	// TODO: check if available under GCC/Linux
+#ifdef _MSC_VER    // TODO: check if available under GCC/Linux
        std::ranlux64_base_01 generator;
        generator.seed(seed==USE_TIME_BASED_SEED ? (unsigned long) time(NULL) : seed);
 #else
@ -820,7 +820,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            throw std::logic_error("SetUniformRandomValue: Matrix is empty.");

        auto& us = *this;
-#ifdef _MSC_VER	// TODO: check if available under GCC/Linux
+#ifdef _MSC_VER    // TODO: check if available under GCC/Linux
        std::ranlux64_base_01 generator;
        generator.seed(seed==USE_TIME_BASED_SEED ? (unsigned long) time(NULL) : seed);
 #else
@ -857,7 +857,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            throw std::logic_error("SetUniformRandomValue: Matrix is empty.");

        auto& us = *this;
-#ifdef _MSC_VER	// TODO: check if available under GCC/Linux
+#ifdef _MSC_VER    // TODO: check if available under GCC/Linux
        std::ranlux64_base_01 generator;
        generator.seed(seed==USE_TIME_BASED_SEED ? (unsigned long) time(NULL) : seed);
 #else
@ -934,87 +934,87 @@ namespace Microsoft { namespace MSR { namespace CNTK {

    template<class ElemType>
    void CPUMatrix<ElemType>::RmsProp(CPUMatrix<ElemType>& gradients,
-		ElemType RMS_GAMMA,
-		ElemType RMS_WGT_INC,
-		ElemType RMS_WGT_MAX,
-		ElemType RMS_WGT_DEC,
-		ElemType RMS_WGT_MIN
-		)
+        ElemType RMS_GAMMA,
+        ElemType RMS_WGT_INC,
+        ElemType RMS_WGT_MAX,
+        ElemType RMS_WGT_DEC,
+        ElemType RMS_WGT_MIN
+        )
    {
        const ElemType floor = 1e-6f;

        size_t n = gradients.GetNumElements();
-		ElemType *curr_grad=gradients.m_pArray;
+        ElemType *curr_grad=gradients.m_pArray;

        if (IsEmpty() || GetNumCols() < gradients.GetNumCols() * 3)
        {
            Resize(gradients.GetNumRows(), gradients.GetNumCols() * 3);
            SetValue(0.0);

-			ElemType *avars=m_pArray; // accumulated variances for RMS scaling
-			ElemType *steps=m_pArray+2*n; // current step size
+            ElemType *avars=m_pArray; // accumulated variances for RMS scaling
+            ElemType *steps=m_pArray+2*n; // current step size

-			// initialize moving average of gradient-squared
-			for( long i = 0; i < n; i++ )
-				avars[i] = curr_grad[i]*curr_grad[i];
+            // initialize moving average of gradient-squared
+            for( long i = 0; i < n; i++ )
+                avars[i] = curr_grad[i]*curr_grad[i];

-			// initialize starting step size
-			for( long i = 0; i < n; i++ )
-				steps[i] = ElemType(0.02);
+            // initialize starting step size
+            for( long i = 0; i < n; i++ )
+                steps[i] = ElemType(0.02);
        }

        ElemType *avars=m_pArray; // accumulated variances for RMS scaling
-		ElemType *signs=m_pArray+n; // sign of previous gradient
-		ElemType *steps=m_pArray+2*n; // current step size
+        ElemType *signs=m_pArray+n; // sign of previous gradient
+        ElemType *steps=m_pArray+2*n; // current step size

        assert(GetNumRows() == gradients.GetNumRows() && GetNumCols() == gradients.GetNumCols() * 3);

-		ElemType ONE_MINUS_GAMMA = ElemType(1.0) - RMS_GAMMA;
-		//int upd[] = {
-		//	2,2,0,
-		//	2,2,0,
-		//	1,1,1,
-		//	2,2,0,
-		//	1,2,1,
-		//	0,2,2,
-		//	1,1,1,
-		//	0,2,2,
-		//	0,2,2,
-		//};
+        ElemType ONE_MINUS_GAMMA = ElemType(1.0) - RMS_GAMMA;
+        //int upd[] = {
+        //    2,2,0,
+        //    2,2,0,
+        //    1,1,1,
+        //    2,2,0,
+        //    1,2,1,
+        //    0,2,2,
+        //    1,1,1,
+        //    0,2,2,
+        //    0,2,2,
+        //};

  //      for (long i=0; i<n; i++)
  //      {
  //          avars[i] = RMS_GAMMA * avars[i] + ONE_MINUS_GAMMA * (curr_grad[i] * curr_grad[i]);
-		//	// grad sign base 3: 0->neg, 1->zero, 2->pos
-		//	const int grad_sign = 1 + (ElemType(0) < curr_grad[i]) - (curr_grad[i] < ElemType(0));
+        //    // grad sign base 3: 0->neg, 1->zero, 2->pos
+        //    const int grad_sign = 1 + (ElemType(0) < curr_grad[i]) - (curr_grad[i] < ElemType(0));

-		//	// signs[i] contains three consecutive grad_sign
-		//	signs[i]  = 3*(int(signs[i]) % 9) + grad_sign;
+        //    // signs[i] contains three consecutive grad_sign
+        //    signs[i]  = 3*(int(signs[i]) % 9) + grad_sign;

-		//	switch(upd[int(signs[i])])
-		//	{
-		//	case 0:
-		//		steps[i] = max(steps[i] * RMS_WGT_DEC, RMS_WGT_MIN);
-		//		break;
-		//	case 2:
-		//		steps[i] = min(steps[i] * RMS_WGT_INC, RMS_WGT_MAX);
-		//		break;
-		//	}
-		//	curr_grad[i] *= steps[i] / sqrt(avars[i] + floor);
+        //    switch(upd[int(signs[i])])
+        //    {
+        //    case 0:
+        //        steps[i] = max(steps[i] * RMS_WGT_DEC, RMS_WGT_MIN);
+        //        break;
+        //    case 2:
+        //        steps[i] = min(steps[i] * RMS_WGT_INC, RMS_WGT_MAX);
+        //        break;
+        //    }
+        //    curr_grad[i] *= steps[i] / sqrt(avars[i] + floor);
  //      }

        for (long i=0; i<n; i++)
        {
            avars[i] = RMS_GAMMA * avars[i] + ONE_MINUS_GAMMA * (curr_grad[i] * curr_grad[i]);
-			const int grad_sign = (ElemType(0) < curr_grad[i]) - (curr_grad[i] < ElemType(0));
+            const int grad_sign = (ElemType(0) < curr_grad[i]) - (curr_grad[i] < ElemType(0));

-			if( signs[i] * grad_sign > 0 )
-				steps[i] = min(steps[i] * RMS_WGT_INC, RMS_WGT_MAX);
-			else
-				steps[i] = max(steps[i] * RMS_WGT_DEC, RMS_WGT_MIN);
+            if( signs[i] * grad_sign > 0 )
+                steps[i] = min(steps[i] * RMS_WGT_INC, RMS_WGT_MAX);
+            else
+                steps[i] = max(steps[i] * RMS_WGT_DEC, RMS_WGT_MIN);

-			curr_grad[i] *= steps[i] / sqrt(avars[i] + floor);
-			signs[i] = (ElemType)grad_sign;
+            curr_grad[i] *= steps[i] / sqrt(avars[i] + floor);
+            signs[i] = (ElemType)grad_sign;
        }
    }

@ -1924,7 +1924,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                ElemType sum = 0;
                foreach_row(i, a)
                    sum +=  exp(us(i,j) = a(i,j) - maxV);
-				sum = log(sum);
+                sum = log(sum);
                foreach_row(i, us)
                    us(i,j) -= sum;
            }
@ -1943,7 +1943,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                ElemType sum = 0;
                foreach_column(j,a)
                    sum +=  exp(us(i,j) = a(i,j) - maxV);
-				sum = log(sum);
+                sum = log(sum);
                foreach_column(j,us)
                    us(i,j) -= sum;
            }
@ -2383,16 +2383,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
            return (ElemType)dasum((int)GetNumElements(), reinterpret_cast <double*>(m_pArray), 1);
 #else  
-			return (ElemType)cblas_dasum((int)GetNumElements(), reinterpret_cast <double*>(m_pArray), 1);
+            return (ElemType)cblas_dasum((int)GetNumElements(), reinterpret_cast <double*>(m_pArray), 1);
 #endif
-		}
+        }
        else
        {
 #pragma warning (suppress: 4244)
 #ifndef USE_MKL
            return sasum((int)GetNumElements(), reinterpret_cast <float*>(m_pArray), 1);
 #else
-			return cblas_sasum ((int)GetNumElements(), reinterpret_cast <float*>(m_pArray), 1);
+            return cblas_sasum ((int)GetNumElements(), reinterpret_cast <float*>(m_pArray), 1);
 #endif
        }
    }
@ -2525,7 +2525,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
                    c(0,j) = (ElemType) dnrm2(m, reinterpret_cast <double*>(us.m_pArray+us.LocateColumn(j)), 1);
 #else
-					c(0,j) = (ElemType) cblas_dnrm2 (m, reinterpret_cast <double*>(us.m_pArray+us.LocateColumn(j)), 1);
+                    c(0,j) = (ElemType) cblas_dnrm2 (m, reinterpret_cast <double*>(us.m_pArray+us.LocateColumn(j)), 1);
 #endif
                }
            }
@ -2538,7 +2538,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
                    c(0,j) = snrm2(m, reinterpret_cast <float*>(us.m_pArray+us.LocateColumn(j)), 1);
 #else
-					c(0,j) = cblas_snrm2 (m, reinterpret_cast <float*>(us.m_pArray+us.LocateColumn(j)), 1);
+                    c(0,j) = cblas_snrm2 (m, reinterpret_cast <float*>(us.m_pArray+us.LocateColumn(j)), 1);
 #endif
                }                
            }
@ -2555,7 +2555,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
                    c(i,0) = dnrm2(n, reinterpret_cast <double*>(us.m_pArray+i), m);
 #else
-					c(i,0) = cblas_dnrm2 (n, reinterpret_cast <double*>(us.m_pArray+i), m);
+                    c(i,0) = cblas_dnrm2 (n, reinterpret_cast <double*>(us.m_pArray+i), m);
 #endif
                }
            }
@ -2568,7 +2568,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
                    c(i,0) = snrm2(n, reinterpret_cast <float*>(us.m_pArray+i), m);
 #else
-					c(i,0) = cblas_snrm2 (n, reinterpret_cast <float*>(us.m_pArray+i), m);
+                    c(i,0) = cblas_snrm2 (n, reinterpret_cast <float*>(us.m_pArray+i), m);
 #endif
                }

@ -3461,8 +3461,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
        char transA, transB;
 #else
-		CBLAS_TRANSPOSE mklTransA;
-		CBLAS_TRANSPOSE mklTransB;
+        CBLAS_TRANSPOSE mklTransA;
+        CBLAS_TRANSPOSE mklTransB;
 #endif

        if (transposeA)
@ -3473,7 +3473,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
            transA = (char)MatrixTranspose::Trans;
 #else
-			mklTransA = CBLAS_TRANSPOSE::CblasTrans;
+            mklTransA = CBLAS_TRANSPOSE::CblasTrans;
 #endif
        }
        else
@ -3484,7 +3484,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
            transA = (char)MatrixTranspose::NoTrans;
 #else
-			mklTransA = CBLAS_TRANSPOSE::CblasNoTrans;
+            mklTransA = CBLAS_TRANSPOSE::CblasNoTrans;
 #endif   
        }

@ -3496,7 +3496,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
            transB = (char)MatrixTranspose::Trans;
 #else
-			mklTransB = CBLAS_TRANSPOSE::CblasTrans;
+            mklTransB = CBLAS_TRANSPOSE::CblasTrans;
 #endif
        }
        else
@ -3507,7 +3507,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
            transB = (char)MatrixTranspose::NoTrans;
 #else
-			mklTransB = CBLAS_TRANSPOSE::CblasNoTrans;
+            mklTransB = CBLAS_TRANSPOSE::CblasNoTrans;
 #endif            
        }

@ -3520,20 +3520,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        ldc = (int)c.GetNumRows();

        if (sizeof(ElemType) == sizeof(double))
-		{
+        {
 #ifndef USE_MKL
            dgemm(transA, transB, m, n, k, alpha, reinterpret_cast <double*>(a.m_pArray), lda, reinterpret_cast <double*>(b.m_pArray), ldb, beta, reinterpret_cast <double*>(c.m_pArray), ldc);
 #else
-			cblas_dgemm ((CBLAS_ORDER) BLAS_COLMAJOR, mklTransA, mklTransB, m, n, k, alpha, reinterpret_cast <double*>(a.m_pArray), lda, reinterpret_cast <double*>(b.m_pArray), ldb, beta, reinterpret_cast <double*>(c.m_pArray), ldc);
+            cblas_dgemm ((CBLAS_ORDER) BLAS_COLMAJOR, mklTransA, mklTransB, m, n, k, alpha, reinterpret_cast <double*>(a.m_pArray), lda, reinterpret_cast <double*>(b.m_pArray), ldb, beta, reinterpret_cast <double*>(c.m_pArray), ldc);
 #endif
-		}
+        }
        else
        {
 #pragma warning (suppress: 4244)
 #ifndef USE_MKL
            sgemm(BLAS_COLMAJOR transA, transB, m, n, k, alpha, reinterpret_cast <float*>(a.m_pArray), lda, reinterpret_cast <float*>(b.m_pArray), ldb, beta, reinterpret_cast <float*>(c.m_pArray), ldc);
 #else
-		    cblas_sgemm ((CBLAS_ORDER) BLAS_COLMAJOR, mklTransA, mklTransB, m, n, k, alpha, reinterpret_cast <float*>(a.m_pArray), lda, reinterpret_cast <float*>(b.m_pArray), ldb, beta, reinterpret_cast <float*>(c.m_pArray), ldc);
+            cblas_sgemm ((CBLAS_ORDER) BLAS_COLMAJOR, mklTransA, mklTransB, m, n, k, alpha, reinterpret_cast <float*>(a.m_pArray), lda, reinterpret_cast <float*>(b.m_pArray), ldb, beta, reinterpret_cast <float*>(c.m_pArray), ldc);
 #endif
        }
    }
@ -3636,16 +3636,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
                daxpy(len, alpha, reinterpret_cast <double*>(a.m_pArray), incx, reinterpret_cast <double*>(c.m_pArray), incy);
 #else
-				cblas_daxpy(len, alpha, reinterpret_cast <double*>(a.m_pArray), incx, reinterpret_cast <double*>(c.m_pArray), incy);
+                cblas_daxpy(len, alpha, reinterpret_cast <double*>(a.m_pArray), incx, reinterpret_cast <double*>(c.m_pArray), incy);
 #endif
-			}
-			else
+            }
+            else
            {
 #pragma warning (suppress: 4244)
 #ifndef USE_MKL
                saxpy(len, alpha, reinterpret_cast <float*>(a.m_pArray), incx, reinterpret_cast <float*>(c.m_pArray), incy);
 #else
-				cblas_saxpy(len, alpha, reinterpret_cast <float*>(a.m_pArray), incx, reinterpret_cast <float*>(c.m_pArray), incy);
+                cblas_saxpy(len, alpha, reinterpret_cast <float*>(a.m_pArray), incx, reinterpret_cast <float*>(c.m_pArray), incy);
 #endif
            }
        }
@ -3937,7 +3937,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
            dscal(len, alpha, reinterpret_cast <double*>(a.m_pArray), incx);
 #else
-			cblas_dscal(len, alpha, reinterpret_cast <double*>(a.m_pArray), incx);
+            cblas_dscal(len, alpha, reinterpret_cast <double*>(a.m_pArray), incx);
 #endif
        }
        else
@ -3946,7 +3946,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
            sscal(len, alpha, reinterpret_cast <float*>(a.m_pArray), incx);
 #else
-			cblas_sscal (len, alpha, reinterpret_cast <float*>(a.m_pArray), incx);
+            cblas_sscal (len, alpha, reinterpret_cast <float*>(a.m_pArray), incx);
 #endif
        }
    }
@ -3996,7 +3996,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
                    c(0,j) = (ElemType)ddot(m, reinterpret_cast <double*>(a.m_pArray+a.LocateColumn(j)), 1, reinterpret_cast <double*>(b.m_pArray+b.LocateColumn(j)), 1);
 #else
-					c(0,j) = (ElemType)cblas_ddot(m, reinterpret_cast <double*>(a.m_pArray+a.LocateColumn(j)), 1, reinterpret_cast <double*>(b.m_pArray+b.LocateColumn(j)), 1);
+                    c(0,j) = (ElemType)cblas_ddot(m, reinterpret_cast <double*>(a.m_pArray+a.LocateColumn(j)), 1, reinterpret_cast <double*>(b.m_pArray+b.LocateColumn(j)), 1);
 #endif
                }
            }
@ -4009,7 +4009,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
                    c(0,j) = (ElemType)sdot(m, reinterpret_cast <float*>(a.m_pArray+a.LocateColumn(j)), 1, reinterpret_cast <float*>(b.m_pArray+b.LocateColumn(j)), 1);
 #else
-					c(0,j) = (ElemType)cblas_sdot(m, reinterpret_cast <float*>(a.m_pArray+a.LocateColumn(j)), 1, reinterpret_cast <float*>(b.m_pArray+b.LocateColumn(j)), 1);
+                    c(0,j) = (ElemType)cblas_sdot(m, reinterpret_cast <float*>(a.m_pArray+a.LocateColumn(j)), 1, reinterpret_cast <float*>(b.m_pArray+b.LocateColumn(j)), 1);
 #endif
                }                
            }
@ -4026,7 +4026,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
                    c(i,0) = ddot(n, reinterpret_cast <double*>(a.m_pArray+i), m, reinterpret_cast <double*>(b.m_pArray+i), m);
 #else
-					c(i,0) = cblas_ddot (n, reinterpret_cast <double*>(a.m_pArray+i), m, reinterpret_cast <double*>(b.m_pArray+i), m);
+                    c(i,0) = cblas_ddot (n, reinterpret_cast <double*>(a.m_pArray+i), m, reinterpret_cast <double*>(b.m_pArray+i), m);
 #endif
                }
            }
@ -4039,7 +4039,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
                    c(i,0) = sdot(n, reinterpret_cast <float*>(a.m_pArray+i), m, reinterpret_cast <float*>(b.m_pArray+i), m);
 #else
-					c(i,0) = cblas_sdot (n, reinterpret_cast <float*>(a.m_pArray+i), m, reinterpret_cast <float*>(b.m_pArray+i), m);
+                    c(i,0) = cblas_sdot (n, reinterpret_cast <float*>(a.m_pArray+i), m, reinterpret_cast <float*>(b.m_pArray+i), m);
 #endif                
                }                
            }
@ -4068,7 +4068,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
                    return (ElemType)ddot((int)a.GetNumElements(), reinterpret_cast <double*>(a.m_pArray), 1, reinterpret_cast <double*>(b.m_pArray), 1);
 #else
-					return (ElemType)cblas_ddot ((int)a.GetNumElements(), reinterpret_cast <double*>(a.m_pArray), 1, reinterpret_cast <double*>(b.m_pArray), 1);
+                    return (ElemType)cblas_ddot ((int)a.GetNumElements(), reinterpret_cast <double*>(a.m_pArray), 1, reinterpret_cast <double*>(b.m_pArray), 1);
 #endif
        }
        else
@ -4077,7 +4077,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #ifndef USE_MKL
                    return (ElemType)sdot((int)a.GetNumElements(), reinterpret_cast <float*>(a.m_pArray), 1, reinterpret_cast <float*>(b.m_pArray), 1);
 #else
-					return (ElemType)cblas_sdot ((int)a.GetNumElements(), reinterpret_cast <float*>(a.m_pArray), 1, reinterpret_cast <float*>(b.m_pArray), 1);
+                    return (ElemType)cblas_sdot ((int)a.GetNumElements(), reinterpret_cast <float*>(a.m_pArray), 1, reinterpret_cast <float*>(b.m_pArray), 1);
 #endif 
        }
    }
--- a/Math/Math/CPUMatrix.h
+++ b/Math/Math/CPUMatrix.h
@ -13,14 +13,14 @@
 #include "CommonMatrix.h"
 #include "basetypes.h" // for RuntimeError()

-#ifdef	_WIN32
+#ifdef    _WIN32
 #ifdef MATH_EXPORTS
 #define MATH_API __declspec(dllexport)
 #else
 #define MATH_API __declspec(dllimport)
 #endif
-#else	// no DLLs on Linux
-#define	MATH_API 
+#else    // no DLLs on Linux
+#define    MATH_API 
 #endif

 #ifndef USE_TIME_BASED_SEED
@ -69,12 +69,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {

        void Adagrad(CPUMatrix<ElemType>& gradients);
        void RmsProp(CPUMatrix<ElemType>& gradients,
-			ElemType RMS_GAMMA,
-			ElemType RMS_WGT_INC,
-			ElemType RMS_WGT_MAX,
-			ElemType RMS_WGT_DEC,
-			ElemType RMS_WGT_MIN
-			);
+            ElemType RMS_GAMMA,
+            ElemType RMS_WGT_INC,
+            ElemType RMS_WGT_MAX,
+            ElemType RMS_WGT_DEC,
+            ElemType RMS_WGT_MIN
+            );

        void Reshape(const size_t numRows, const size_t numCols);
        void Resize(const size_t numRows, const size_t numCols, bool growOnly = true);  //by default we only reallocate if need to grow
--- a/Math/Math/CPUSparseMatrix.cpp
+++ b/Math/Math/CPUSparseMatrix.cpp
@ -15,7 +15,7 @@
 #include "CPUSparseMatrix.h"
 #include <random>
 #include <chrono>
-#ifdef	_WIN32
+#ifdef    _WIN32
 #include <Windows.h>
 #endif
 #ifdef LEAKDETECT
--- a/Math/Math/CPUSparseMatrix.h
+++ b/Math/Math/CPUSparseMatrix.h
@ -16,7 +16,7 @@
 #else
 #define MATH_API __declspec(dllimport)
 #endif
-#endif	/* Linux - already defined in CPUMatrix.h */
+#endif    /* Linux - already defined in CPUMatrix.h */

 namespace Microsoft { namespace MSR { namespace CNTK {    

--- a/Math/Math/GPUMatrix.cu
+++ b/Math/Math/GPUMatrix.cu
@ -21,10 +21,10 @@
 #include "GPUSparseMatrix.h"
 #include <iostream> // for cout

-#pragma comment (lib, "cudart.lib")     // instruct linker to reference these libs
-#pragma comment (lib, "cublas.lib")
-#pragma comment (lib, "cusparse.lib")
-#pragma comment (lib, "curand.lib")
+#pragma comment (lib, "cudart.lib")     // instruct linker to reference these libs
+#pragma comment (lib, "cublas.lib")
+#pragma comment (lib, "cusparse.lib")
+#pragma comment (lib, "curand.lib")

 #pragma warning (disable: 4267) // conversion from 'size_t' to 'unsigned int'; happens in CUDA <<<a,b>>> syntax if a and b are size_t
 #pragma warning (disable: 4127) // conditional expression is constant; "if (sizeof(ElemType)==sizeof(float))" triggers this
@ -276,30 +276,30 @@ namespace Microsoft { namespace MSR { namespace CNTK {

        m_elemSizeAllocated = m_numRows*m_numCols;

-		// check to make sure we have something to copy (on init we often have zero sized allocations)
-		if (m_elemSizeAllocated > 0)
-		{
-			// first try peer access
-			int canAccessPeer = false;
-			CUDA_CALL(cudaDeviceCanAccessPeer(&canAccessPeer, to_id, m_computeDevice));
-			if (canAccessPeer)
-			{
-				CUDA_CALL(cudaDeviceEnablePeerAccess(m_computeDevice, 0));
-				CUDA_CALL(cudaMemcpyPeer(d_dst,to_id,m_pArray,m_computeDevice,sizeof(ElemType)*m_numRows*m_numCols));  
-			}
-			else
-			{
-				// peer access didn't work, just copy normal
-				// make this more efficient by keeping some buffers available for each copy
-				ElemType* h_dst=NULL;
-				PrepareDevice();
-				CUDA_CALL(cudaMallocHost((void**)&h_dst,sizeof(ElemType)*m_numRows*m_numCols));
-				CUDA_CALL(cudaMemcpy(h_dst,m_pArray,sizeof(ElemType)*m_numRows*m_numCols, cudaMemcpyDeviceToHost));  
-				PrepareDevice((short)to_id);       
-				CUDA_CALL(cudaMemcpy(d_dst,h_dst,sizeof(ElemType)*m_numRows*m_numCols, cudaMemcpyHostToDevice)); 
-				CUDA_CALL(cudaFreeHost(h_dst));  
-			}
-		}
+        // check to make sure we have something to copy (on init we often have zero sized allocations)
+        if (m_elemSizeAllocated > 0)
+        {
+            // first try peer access
+            int canAccessPeer = false;
+            CUDA_CALL(cudaDeviceCanAccessPeer(&canAccessPeer, to_id, m_computeDevice));
+            if (canAccessPeer)
+            {
+                CUDA_CALL(cudaDeviceEnablePeerAccess(m_computeDevice, 0));
+                CUDA_CALL(cudaMemcpyPeer(d_dst,to_id,m_pArray,m_computeDevice,sizeof(ElemType)*m_numRows*m_numCols));  
+            }
+            else
+            {
+                // peer access didn't work, just copy normal
+                // make this more efficient by keeping some buffers available for each copy
+                ElemType* h_dst=NULL;
+                PrepareDevice();
+                CUDA_CALL(cudaMallocHost((void**)&h_dst,sizeof(ElemType)*m_numRows*m_numCols));
+                CUDA_CALL(cudaMemcpy(h_dst,m_pArray,sizeof(ElemType)*m_numRows*m_numCols, cudaMemcpyDeviceToHost));  
+                PrepareDevice((short)to_id);       
+                CUDA_CALL(cudaMemcpy(d_dst,h_dst,sizeof(ElemType)*m_numRows*m_numCols, cudaMemcpyHostToDevice)); 
+                CUDA_CALL(cudaFreeHost(h_dst));  
+            }
+        }
        PrepareDevice();
        CUDA_CALL(cudaFree(m_pArray));
        m_pArray=d_dst;
@ -840,7 +840,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            {
                if (!(matrixFlags&matrixFormatRowMajor))
                {
-				    CUDA_CALL(cudaMemcpy(m_pArray, pArray, sizeof(ElemType)*GetNumElements(), 
+                    CUDA_CALL(cudaMemcpy(m_pArray, pArray, sizeof(ElemType)*GetNumElements(), 
                        (matrixFlags&matrixFlagSetValueOnDevice)?cudaMemcpyDeviceToDevice:cudaMemcpyHostToDevice));
                }
                else
@ -1014,62 +1014,62 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        _adagrad<ElemType><<<blocksPerGrid, threadsPerBlock>>>(m_pArray, gradients.m_pArray, GetNumElements());
    }

-	template<class ElemType>
-	void GPUMatrix<ElemType>::RmsProp(GPUMatrix<ElemType>& gradients,
-		ElemType RMS_GAMMA,
-		ElemType RMS_WGT_INC,
-		ElemType RMS_WGT_MAX,
-		ElemType RMS_WGT_DEC,
-		ElemType RMS_WGT_MIN
-		)
-	{
+    template<class ElemType>
+    void GPUMatrix<ElemType>::RmsProp(GPUMatrix<ElemType>& gradients,
+        ElemType RMS_GAMMA,
+        ElemType RMS_WGT_INC,
+        ElemType RMS_WGT_MAX,
+        ElemType RMS_WGT_DEC,
+        ElemType RMS_WGT_MIN
+        )
+    {
        const ElemType floor = 1e-6f;
-		static ElemType *upd_gpu = (ElemType*)0;
+        static ElemType *upd_gpu = (ElemType*)0;

        size_t n = gradients.GetNumElements();
-		int blocksPerGrid = (GetNumElements() + threadsPerBlock -1 )/threadsPerBlock;
+        int blocksPerGrid = (GetNumElements() + threadsPerBlock -1 )/threadsPerBlock;

        if (IsEmpty() || GetNumCols() < gradients.GetNumCols() * 3)
        {
            Resize(gradients.GetNumRows(), gradients.GetNumCols() * 3);
            SetValue(0.0);

-			ElemType *avars=m_pArray; // accumulated variances for RMS scaling
-			ElemType *signs=m_pArray+n; // sign of previous gradient
-			ElemType *steps=m_pArray+2*n; // current step size
+            ElemType *avars=m_pArray; // accumulated variances for RMS scaling
+            ElemType *signs=m_pArray+n; // sign of previous gradient
+            ElemType *steps=m_pArray+2*n; // current step size

-			_rmsprop_init<ElemType><<<blocksPerGrid, threadsPerBlock>>>(avars,signs,steps,gradients.m_pArray,n);
+            _rmsprop_init<ElemType><<<blocksPerGrid, threadsPerBlock>>>(avars,signs,steps,gradients.m_pArray,n);

        }

        ElemType *avars=m_pArray; // accumulated variances for RMS scaling
-		ElemType *signs=m_pArray+n; // sign of previous gradient
-		ElemType *steps=m_pArray+2*n; // current step size
+        ElemType *signs=m_pArray+n; // sign of previous gradient
+        ElemType *steps=m_pArray+2*n; // current step size

        assert(GetNumRows() == gradients.GetNumRows() && GetNumCols() == gradients.GetNumCols() * 3);

-		if( !upd_gpu )
-		{
-			ElemType upd[] = {
-				2,2,0,
-				2,2,0,
-				1,1,1,
-				2,2,0,
-				1,2,1,
-				0,2,2,
-				1,1,1,
-				0,2,2,
-				0,2,2,
-			};
+        if( !upd_gpu )
+        {
+            ElemType upd[] = {
+                2,2,0,
+                2,2,0,
+                1,1,1,
+                2,2,0,
+                1,2,1,
+                0,2,2,
+                1,1,1,
+                0,2,2,
+                0,2,2,
+            };

-			CUDA_CALL(cudaMalloc((void**)&upd_gpu,sizeof(ElemType)*27));
+            CUDA_CALL(cudaMalloc((void**)&upd_gpu,sizeof(ElemType)*27));
            CUDA_CALL(cudaMemcpy(upd_gpu,upd,sizeof(ElemType)*27,cudaMemcpyHostToDevice));
-		}
+        }

-		_rmsprop<ElemType><<<blocksPerGrid, threadsPerBlock>>>(avars,signs,steps,gradients.m_pArray,n,
-			RMS_GAMMA,RMS_WGT_INC,RMS_WGT_MAX,RMS_WGT_DEC,RMS_WGT_MIN,
-			floor,upd_gpu);
-	}
+        _rmsprop<ElemType><<<blocksPerGrid, threadsPerBlock>>>(avars,signs,steps,gradients.m_pArray,n,
+            RMS_GAMMA,RMS_WGT_INC,RMS_WGT_MAX,RMS_WGT_DEC,RMS_WGT_MIN,
+            floor,upd_gpu);
+    }

    template<class ElemType>
    void GPUMatrix<ElemType>::Reshape(const size_t numRows, const size_t numCols)
@ -2682,7 +2682,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    void GPUMatrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const GPUMatrix<ElemType>& a, const bool transposeA, const GPUMatrix<ElemType>& b, const bool transposeB, 
        ElemType beta, GPUMatrix<ElemType>& c)
    {
-		a.PrepareDevice();
+        a.PrepareDevice();
        if ((a.GetComputeDeviceId()!=b.GetComputeDeviceId()) || (b.GetComputeDeviceId()!=c.GetComputeDeviceId())) //different GPUs
        {
            throw std::invalid_argument("All matrices must be on the same GPU");
--- a/Math/Math/GPUMatrix.h
+++ b/Math/Math/GPUMatrix.h
@ -22,14 +22,14 @@ struct CUstream_st;
 typedef struct CUstream_st *cudaStream_t;

 #ifdef _WIN32
-#ifndef	MATH_API
+#ifndef    MATH_API
 #ifdef MATH_EXPORTS
 #define MATH_API __declspec(dllexport)
 #else
 #define MATH_API __declspec(dllimport)
 #endif
-#endif	/* MATH_API */
-#else	// no DLLs in Linux
+#endif    /* MATH_API */
+#else    // no DLLs in Linux
 #define MATH_API 
 #endif

--- a/Math/Math/GPUMatrixCUDAKernels.cu
+++ b/Math/Math/GPUMatrixCUDAKernels.cu
@ -604,10 +604,10 @@ __global__ void _logSoftMaxColWise(

    for (long i=0;i<m_numRows;++i)
    {
-		ElemType tmp = a[IDX2C(i,col_id,m_numRows)]-maxV[threadIdx.x];
-		Sum[threadIdx.x] += (sizeof(ElemType)==sizeof(float) ? expf(tmp) : exp(tmp));
-	}
-	Sum[threadIdx.x] = maxV[threadIdx.x] + (sizeof(ElemType)==sizeof(float)?logf(Sum[threadIdx.x]):log(Sum[threadIdx.x]));
+        ElemType tmp = a[IDX2C(i,col_id,m_numRows)]-maxV[threadIdx.x];
+        Sum[threadIdx.x] += (sizeof(ElemType)==sizeof(float) ? expf(tmp) : exp(tmp));
+    }
+    Sum[threadIdx.x] = maxV[threadIdx.x] + (sizeof(ElemType)==sizeof(float)?logf(Sum[threadIdx.x]):log(Sum[threadIdx.x]));
    for (long i=0;i<m_numRows;++i)
    {
        a[IDX2C(i,col_id,m_numRows)] -= Sum[threadIdx.x] ;
@ -741,8 +741,8 @@ __global__ void _assignColumnwiseLogSoftmaxOf(
    for (int i= threadIdx.x*loadPerThread; i< (threadIdx.x == blockDim.x - 1 ? m_numRows : (threadIdx.x+1)*loadPerThread);++i)
    {
        ElemType tmp=a[IDX2C(i,blockIdx.x,m_numRows)]-colMax[0];
-		us[IDX2C(i,blockIdx.x,m_numRows)]=tmp;
-		partials[threadIdx.x]+=(sizeof(ElemType)==sizeof(float)?expf(tmp):exp(tmp));
+        us[IDX2C(i,blockIdx.x,m_numRows)]=tmp;
+        partials[threadIdx.x]+=(sizeof(ElemType)==sizeof(float)?expf(tmp):exp(tmp));
    }
    __syncthreads();

@ -798,7 +798,7 @@ __global__ void _assignColumnwiseLogSoftmaxOf(
    if (threadIdx.x==0)
    {
        colSum[0] = partials[0]+partials[1]+partials[2]+partials[3];
-		colSum[0] = (sizeof(ElemType)==sizeof(float)?logf(colSum[0]):log(colSum[0]));
+        colSum[0] = (sizeof(ElemType)==sizeof(float)?logf(colSum[0]):log(colSum[0]));
    }
    __syncthreads();
    //end of finding sums
@ -833,10 +833,10 @@ __global__ void _logSoftMaxRowWise(

    for (long j=0;j<m_numCols;++j)
    {
-		ElemType tmp = a[IDX2C(row_id,j,m_numRows)]-maxV[threadIdx.x];
-		Sum[threadIdx.x] += sizeof(ElemType)==sizeof(float) ? expf(tmp) : exp(tmp);
+        ElemType tmp = a[IDX2C(row_id,j,m_numRows)]-maxV[threadIdx.x];
+        Sum[threadIdx.x] += sizeof(ElemType)==sizeof(float) ? expf(tmp) : exp(tmp);
    }
-	Sum[threadIdx.x] = maxV[threadIdx.x]+(sizeof(ElemType)==sizeof(float)?logf(Sum[threadIdx.x]):log(Sum[threadIdx.x]));
+    Sum[threadIdx.x] = maxV[threadIdx.x]+(sizeof(ElemType)==sizeof(float)?logf(Sum[threadIdx.x]):log(Sum[threadIdx.x]));
    for (long j=0;j<m_numCols;++j)
    {
        a[IDX2C(row_id,j,m_numRows)] -= Sum[threadIdx.x] ;
@ -995,68 +995,68 @@ __global__ void _adagrad(

 template<class ElemType>
 __global__ void _rmsprop_init(
-	ElemType* avars, ElemType* signs, ElemType* steps,
-	ElemType* curr_grad,
-	const LONG64 N
-	)
+    ElemType* avars, ElemType* signs, ElemType* steps,
+    ElemType* curr_grad,
+    const LONG64 N
+    )
 {
    LONG64 i = blockDim.x * blockIdx.x + threadIdx.x;
    if (i >= N)
        return;

-	ElemType tmp = curr_grad[i];
-	avars[i] = tmp * tmp;
-	signs[i] = ElemType(0.0);
-	steps[i] = ElemType(0.02);
+    ElemType tmp = curr_grad[i];
+    avars[i] = tmp * tmp;
+    signs[i] = ElemType(0.0);
+    steps[i] = ElemType(0.02);
 }

 template<class ElemType>
 __global__ void _rmsprop(
-	ElemType* avars, ElemType* signs, ElemType* steps,
-	ElemType* curr_grad,
-	const LONG64 N,
-	ElemType RMS_GAMMA,ElemType RMS_WGT_INC,ElemType RMS_WGT_MAX,ElemType RMS_WGT_DEC,ElemType RMS_WGT_MIN,
-	ElemType floor,
-	ElemType *upd_gpu
-	)
+    ElemType* avars, ElemType* signs, ElemType* steps,
+    ElemType* curr_grad,
+    const LONG64 N,
+    ElemType RMS_GAMMA,ElemType RMS_WGT_INC,ElemType RMS_WGT_MAX,ElemType RMS_WGT_DEC,ElemType RMS_WGT_MIN,
+    ElemType floor,
+    ElemType *upd_gpu
+    )
 {
    LONG64 i = blockDim.x * blockIdx.x + threadIdx.x;
    if (i >= N)
        return;

-	avars[i] = RMS_GAMMA * avars[i] + (ElemType(1.0)-RMS_GAMMA)* (curr_grad[i] * curr_grad[i]);
+    avars[i] = RMS_GAMMA * avars[i] + (ElemType(1.0)-RMS_GAMMA)* (curr_grad[i] * curr_grad[i]);

-	//// grad sign base 3: 0->neg, 1->zero, 2->pos
-	//const int grad_sign = 1 + (ElemType(0) < curr_grad[i]) - (curr_grad[i] < ElemType(0));
+    //// grad sign base 3: 0->neg, 1->zero, 2->pos
+    //const int grad_sign = 1 + (ElemType(0) < curr_grad[i]) - (curr_grad[i] < ElemType(0));

-	//// signs[i] contains three consecutive grad_sign
-	//signs[i]  = 3*(int(signs[i]) % 9) + grad_sign;
+    //// signs[i] contains three consecutive grad_sign
+    //signs[i]  = 3*(int(signs[i]) % 9) + grad_sign;

-	//// update according to the following table:
-	//// (!pos,!pos,!pos) or (!neg,!neg,!neg): RMS_WGT_INC
-	//// (!neg,!neg,neg) or (!pos,!pos,pos): RMS_WGT_DEC
-	//// otherwise: no action
+    //// update according to the following table:
+    //// (!pos,!pos,!pos) or (!neg,!neg,!neg): RMS_WGT_INC
+    //// (!neg,!neg,neg) or (!pos,!pos,pos): RMS_WGT_DEC
+    //// otherwise: no action

-	//switch(int(upd_gpu[int(signs[i])]))
-	//{
-	//case 0:
-	//	steps[i] = max(steps[i] * RMS_WGT_DEC, RMS_WGT_MIN);
-	//	break;
-	//case 2:
-	//	steps[i] = min(steps[i] * RMS_WGT_INC, RMS_WGT_MAX);
-	//	break;
-	//}
-	//curr_grad[i] *= steps[i] / sqrt(avars[i] + floor);
+    //switch(int(upd_gpu[int(signs[i])]))
+    //{
+    //case 0:
+    //    steps[i] = max(steps[i] * RMS_WGT_DEC, RMS_WGT_MIN);
+    //    break;
+    //case 2:
+    //    steps[i] = min(steps[i] * RMS_WGT_INC, RMS_WGT_MAX);
+    //    break;
+    //}
+    //curr_grad[i] *= steps[i] / sqrt(avars[i] + floor);

-	const int grad_sign = (ElemType(0) < curr_grad[i]) - (curr_grad[i] < ElemType(0));
+    const int grad_sign = (ElemType(0) < curr_grad[i]) - (curr_grad[i] < ElemType(0));

-	if( signs[i] * grad_sign > 0 )
-		steps[i] = min(steps[i] * RMS_WGT_INC, RMS_WGT_MAX);
-	else
-		steps[i] = max(steps[i] * RMS_WGT_DEC, RMS_WGT_MIN);
+    if( signs[i] * grad_sign > 0 )
+        steps[i] = min(steps[i] * RMS_WGT_INC, RMS_WGT_MAX);
+    else
+        steps[i] = max(steps[i] * RMS_WGT_DEC, RMS_WGT_MIN);

-	curr_grad[i] *= steps[i] / sqrt(avars[i] + floor);
-	signs[i] = grad_sign;
+    curr_grad[i] *= steps[i] / sqrt(avars[i] + floor);
+    signs[i] = grad_sign;

 }

--- a/Math/Math/GPUSparseMatrix.cu
+++ b/Math/Math/GPUSparseMatrix.cu
@ -21,7 +21,7 @@
 #pragma warning (disable: 4267) // conversion from 'size_t' to 'unsigned int'; happens in CUDA <<<a,b>>> syntax if a and b are size_t
 #pragma warning (disable: 4127) // conditional expression is constant; "if (sizeof(ElemType)==sizeof(float))" triggers this

-#ifdef	_WIN32
+#ifdef    _WIN32
 // thread local storage to access the current stream, initalize to default stream
 extern __declspec (thread)
 #endif
--- a/Math/Math/GPUSparseMatrix.h
+++ b/Math/Math/GPUSparseMatrix.h
@ -42,9 +42,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    
        GPUSparseMatrix(const GPUSparseMatrix<ElemType>&);
        GPUSparseMatrix(const GPUMatrix<ElemType>&);
-#ifndef	LINUX
+#ifndef    LINUX
        GPUSparseMatrix(GPUSparseMatrix<ElemType>&&);
-#endif	/* LINUX */
+#endif    /* LINUX */
        ~GPUSparseMatrix();
    public:
        void Resize(const size_t numRows, const size_t numCols, size_t size = 0);
@ -94,9 +94,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {

        GPUMatrix<ElemType> CopyToDenseMatrix();
        GPUSparseMatrix<ElemType>& operator=(const GPUSparseMatrix<ElemType>& deepCopy);
-#ifndef	LINUX
+#ifndef    LINUX
        GPUSparseMatrix<ElemType>& operator=(GPUSparseMatrix<ElemType>&& moveFrom);
-#endif	/* LINUX */
+#endif    /* LINUX */
        GPUSparseMatrix<ElemType> operator+ (const GPUSparseMatrix<ElemType>& a) const;
        GPUSparseMatrix<ElemType> operator- (const GPUSparseMatrix<ElemType>& a) const;
        GPUSparseMatrix<ElemType>& operator^= (ElemType alpha); //element-wise power        
--- a/Math/Math/Matrix.cpp
+++ b/Math/Math/Matrix.cpp
@ -1116,12 +1116,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {

    template<class ElemType>
    void Matrix<ElemType>::RmsProp(Matrix<ElemType>& gradients,
-		ElemType RMS_GAMMA,
-		ElemType RMS_WGT_INC,
-		ElemType RMS_WGT_MAX,
-		ElemType RMS_WGT_DEC,
-		ElemType RMS_WGT_MIN
-		)
+        ElemType RMS_GAMMA,
+        ElemType RMS_WGT_INC,
+        ElemType RMS_WGT_MAX,
+        ElemType RMS_WGT_DEC,
+        ElemType RMS_WGT_MIN
+        )
    {
        DecideAndMoveToRightDevice(*this, gradients);

@ -1470,7 +1470,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    template<class ElemType>
    Matrix<ElemType>& Matrix<ElemType>::operator-= (const Matrix<ElemType>& a)
    {
-		if (a.IsEmpty())
+        if (a.IsEmpty())
            throw std::logic_error("Minus Operation: Matrix a is empty.");
        DecideAndMoveToRightDevice(*this, a);

@ -1481,7 +1481,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            NOT_IMPLEMENTED, 
            NOT_IMPLEMENTED
            );
-		
+        
        return *this;
    }

@ -2391,7 +2391,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {

        if (sizeof(ElemType)==sizeof(float))
        {
-		    if (!isfinite((float)threshold))
+            if (!isfinite((float)threshold))
        {
                (*this) = a;
                return *this;
@ -3427,7 +3427,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                else 
                {
                    GPUMatrix<ElemType> firstDummy = transposeA ? a.m_GPUMatrix->Transpose()*alpha : (*a.m_GPUMatrix)*alpha;
-                    GPUMatrix<ElemType> & first= firstDummy;				// GCC does not support mixing refs and non-refs
+                    GPUMatrix<ElemType> & first= firstDummy;                // GCC does not support mixing refs and non-refs
                    GPUSparseMatrix<ElemType> secondDummy = transposeB ? b.m_GPUSparseMatrix->Transpose() : *b.m_GPUSparseMatrix;
                    GPUSparseMatrix<ElemType> & second = secondDummy;
                    if (beta==0)
@ -3452,7 +3452,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            else if (a.m_matrixType==b.m_matrixType && b.m_matrixType==c.m_matrixType && a.m_matrixType==MatrixType::SPARSE)
            {
                GPUSparseMatrix<ElemType> firstDummy = alpha==1 ? *a.m_GPUSparseMatrix : (*a.m_GPUSparseMatrix)*alpha;
-                GPUSparseMatrix<ElemType> & first = firstDummy;				 // By Malcolm.. gcc doesn't support auto
+                GPUSparseMatrix<ElemType> & first = firstDummy;                 // By Malcolm.. gcc doesn't support auto
                if (beta==0)
                {
                    GPUSparseMatrix<ElemType>::Multiply(first,transposeA,*b.m_GPUSparseMatrix,transposeB,*c.m_GPUSparseMatrix);   
@ -3970,27 +3970,27 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        return r; 
    }

-	template<class ElemType>
-	ElemType Matrix<ElemType>::LogAdd(ElemType x, ElemType y)
-	{
-		ElemType temp, diff, z;
+    template<class ElemType>
+    ElemType Matrix<ElemType>::LogAdd(ElemType x, ElemType y)
+    {
+        ElemType temp, diff, z;

-		if (x < y) {
-			temp = x; x = y; y = temp;
-		}
-		diff = y - x;
-		if (diff < MINLOGEXP)
-		{
-			return (ElemType) ((x < LSMALL) ? LZERO : x);
-		}
-		else
-		{
-			z = exp(diff);
+        if (x < y) {
+            temp = x; x = y; y = temp;
+        }
+        diff = y - x;
+        if (diff < MINLOGEXP)
+        {
+            return (ElemType) ((x < LSMALL) ? LZERO : x);
+        }
+        else
+        {
+            z = exp(diff);
                        return (ElemType) (x + log(1.0 + z));
-		}
-	}
+        }
+    }

-	template<class ElemType>
+    template<class ElemType>
    void Matrix<ElemType>::ClassEntropy(const Matrix<ElemType>& a, const Matrix<ElemType>& wgt,
        const Matrix<ElemType> & label, const Matrix<ElemType>* cls, 
        const Matrix<ElemType>* idx2cls,  Matrix<ElemType>& etp, Matrix<ElemType>& entropyScore)
--- a/Math/Math/Matrix.h
+++ b/Math/Math/Matrix.h
@ -313,7 +313,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    public:
        ElemType Exp10(ElemType num); 
        ElemType Mod(ElemType x , ElemType y);
-		ElemType LogAdd(ElemType x, ElemType y);
+        ElemType LogAdd(ElemType x, ElemType y);

    public:
        static short GetBestGPUDeviceId(); //{ return GPUMatrix<ElemType>::GetBestGPUDeviceId();}