2015-04-05 23:28:16 +03:00
|
|
|
//
|
2016-01-18 11:36:17 +03:00
|
|
|
// Copyright (c) Microsoft. All rights reserved.
|
|
|
|
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
2015-04-05 23:28:16 +03:00
|
|
|
//
|
2016-01-18 11:36:14 +03:00
|
|
|
// HTKMLFReader.h - Include file for the MTK and MLF format of features and samples
|
2016-01-18 11:36:17 +03:00
|
|
|
//
|
2015-04-05 23:28:16 +03:00
|
|
|
#pragma once
|
|
|
|
#include "DataWriter.h"
|
|
|
|
#include <map>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
namespace Microsoft { namespace MSR { namespace CNTK {
|
|
|
|
|
2016-01-18 11:36:14 +03:00
|
|
|
template <class ElemType>
|
2015-04-05 23:28:16 +03:00
|
|
|
class HTKMLFWriter : public IDataWriter<ElemType>
|
|
|
|
{
|
|
|
|
private:
|
|
|
|
std::vector<size_t> outputDims;
|
|
|
|
std::vector<std::vector<std::wstring>> outputFiles;
|
|
|
|
std::vector<std::wstring> kaldicmd;
|
|
|
|
std::vector<kaldi::BaseFloatMatrixWriter> feature_writer;
|
|
|
|
std::vector<size_t> udims;
|
2016-01-18 11:36:14 +03:00
|
|
|
std::map<std::wstring, size_t> outputNameToIdMap;
|
|
|
|
std::map<std::wstring, size_t> outputNameToDimMap;
|
|
|
|
std::map<std::wstring, size_t> outputNameToTypeMap;
|
2015-04-05 23:28:16 +03:00
|
|
|
unsigned int sampPeriod;
|
|
|
|
size_t outputFileIndex;
|
2015-12-05 03:08:30 +03:00
|
|
|
void Save(std::wstring& outputFile, Matrix<ElemType>& outputData);
|
2015-04-05 23:28:16 +03:00
|
|
|
void SaveToKaldiFile(std::wstring& outputFile, Matrix<ElemType>& outputData);
|
2016-01-18 11:36:14 +03:00
|
|
|
ElemType* m_tempArray;
|
2015-04-05 23:28:16 +03:00
|
|
|
size_t m_tempArraySize;
|
2015-07-25 12:53:30 +03:00
|
|
|
int m_verbosity;
|
2015-07-30 20:16:32 +03:00
|
|
|
size_t m_overflowWarningCount;
|
|
|
|
size_t m_maxNumOverflowWarning;
|
|
|
|
float m_overflowValue;
|
2015-04-05 23:28:16 +03:00
|
|
|
|
|
|
|
enum OutputTypes
|
|
|
|
{
|
|
|
|
outputReal,
|
|
|
|
outputCategory,
|
|
|
|
};
|
|
|
|
|
|
|
|
public:
|
|
|
|
using LabelType = typename IDataWriter<ElemType>::LabelType;
|
|
|
|
using LabelIdType = typename IDataWriter<ElemType>::LabelIdType;
|
2016-01-18 11:36:14 +03:00
|
|
|
template <class ConfigRecordType>
|
|
|
|
void InitFromConfig(const ConfigRecordType& writerConfig);
|
|
|
|
virtual void Init(const ConfigParameters& config)
|
|
|
|
{
|
|
|
|
InitFromConfig(config);
|
|
|
|
}
|
|
|
|
virtual void Init(const ScriptableObjects::IConfigRecord& config)
|
|
|
|
{
|
|
|
|
InitFromConfig(config);
|
|
|
|
}
|
2015-04-05 23:28:16 +03:00
|
|
|
virtual void Destroy();
|
|
|
|
virtual void GetSections(std::map<std::wstring, SectionType, nocase_compare>& sections);
|
|
|
|
virtual bool SaveData(size_t recordStart, const std::map<std::wstring, void*, nocase_compare>& matrices, size_t numRecords, size_t datasetSize, size_t byteVariableSized);
|
|
|
|
virtual void SaveMapping(std::wstring saveId, const std::map<LabelIdType, LabelType>& labelMapping);
|
|
|
|
};
|
2016-01-18 11:36:14 +03:00
|
|
|
} } }
|