Adding test cases
This commit is contained in:
Родитель
d3c4fc2054
Коммит
43759a432a
6
CNTK.sln
6
CNTK.sln
|
@ -1122,6 +1122,12 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CommandEval", "Tests\UnitTe
|
|||
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Scripts", "Scripts", "{68263A2F-1D5F-4C46-B5AF-2304B80FC3D4}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Scripts\pytest.ini = Scripts\pytest.ini
|
||||
Scripts\txt2ctf.py = Scripts\txt2ctf.py
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug_CpuOnly|x64 = Debug_CpuOnly|x64
|
||||
|
|
|
@ -22,22 +22,21 @@ class Txt2CftConverter:
|
|||
Each token for a stream should be inside the corresponding dictionary file, a token per line, so the line number of the token becomes
|
||||
the numeric index written into the cntk text format output file"""
|
||||
|
||||
def __init__(self, dictionaries, inputs, output, streamSeparator, comment):
|
||||
def __init__(self, dictionaries, inputs, output, comment):
|
||||
self.dictionaries = dictionaries
|
||||
self.inputs = inputs
|
||||
self.streamSeparator = streamSeparator
|
||||
self.output = output
|
||||
self.comment = comment
|
||||
|
||||
def convert(self):
|
||||
dictionaries = self._createDictionaries()
|
||||
self._convertInputs(dictionaries)
|
||||
|
||||
def _createDictionaries(self):
|
||||
dictionaries = []
|
||||
for dic in self.dictionaries:
|
||||
dictionaries.append(self._createDictionary(dic))
|
||||
return dictionaries
|
||||
|
||||
if len(self.inputs) == 0:
|
||||
return self._convertInput(dictionaries, sys.stdin)
|
||||
for input in self.inputs:
|
||||
self._convertInput(dictionaries, input)
|
||||
|
||||
def _createDictionary(self, dictionary):
|
||||
result = {}
|
||||
|
@ -48,12 +47,6 @@ class Txt2CftConverter:
|
|||
counter += 1
|
||||
return result
|
||||
|
||||
def _convertInputs(self, dictionaries):
|
||||
if len(self.inputs) == 0:
|
||||
return self._convertInput(dictionaries, sys.stdin)
|
||||
for input in self.inputs:
|
||||
self._convertInput(dictionaries, input)
|
||||
|
||||
def _convertInput(self, dictionaries, input):
|
||||
sequenceId = 0
|
||||
for line in input:
|
||||
|
@ -76,67 +69,66 @@ class Txt2CftConverter:
|
|||
|
||||
# writing to the output file
|
||||
for sampleIndex in range(maxLen):
|
||||
self.output.write(str(sequenceId) + "\t")
|
||||
self.output.write(str(sequenceId))
|
||||
for streamIndex in range(len(tokenizedStreams)):
|
||||
if len(tokenizedStreams[streamIndex]) <= sampleIndex:
|
||||
self.output.write(self.streamSeparator)
|
||||
self.output.write("\t")
|
||||
continue
|
||||
token = tokenizedStreams[streamIndex][sampleIndex]
|
||||
value = dictionaries[streamIndex][token]
|
||||
self.output.write(self.streamSeparator)
|
||||
self.output.write("|S" + str(streamIndex) + " "+ str(value) + ":1")
|
||||
self.output.write("\t|S" + str(streamIndex) + " "+ str(value) + ":1")
|
||||
if self.comment:
|
||||
self.output.write("|# " + token)
|
||||
self.output.write(" |# " + token)
|
||||
self.output.write("\n")
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Transforms text file given dictionaries into cntk text format.")
|
||||
parser.add_argument('--map', help='List of dictionaries,given in the same order as streams in the input files', required=True)
|
||||
parser.add_argument('--sep', help='Stream separator, default TAB', default="\t", required=False)
|
||||
parser.add_argument('--comment', help='Whether to annotate indexes with tokens. Default is false', choices=["True", "False"], default="False", required=False)
|
||||
parser.add_argument('--output', help='Name of the output file, stdout if not given', default="", required=False)
|
||||
parser.add_argument('--input', help='Name of the inputs files, stdin if not given', default="", required=False)
|
||||
args = parser.parse_args()
|
||||
|
||||
# creating dictionaries
|
||||
dictionaryFiles = "".join(str(x) for x in args.map).split(",")
|
||||
dictionaries = open(d) for d in dictionaryFiles
|
||||
dictionaryFiles = "".join(str(x) for x in args.map).split(",")
|
||||
dictionaries = [open(d) for d in dictionaryFiles]
|
||||
|
||||
# creating inputs
|
||||
inputs = [sys.stdin]
|
||||
if args.input != "":
|
||||
inputFiles = "".join(str(x) for x in args.input).split(",")
|
||||
inputs = open(i) for i in inputFiles
|
||||
inputs = [open(i) for i in inputFiles]
|
||||
|
||||
# creating outputs
|
||||
output = sys.stdout
|
||||
if args.output != "":
|
||||
output = open(args.output, "w")
|
||||
|
||||
converter = Txt2CftConverter(dictionaries, inputs, output, args.sep, args.comment == "True")
|
||||
converter = Txt2CftConverter(dictionaries, inputs, output, args.comment == "True")
|
||||
converter.convert()
|
||||
|
||||
# Test
|
||||
|
||||
#####################################################################################################
|
||||
# Tests
|
||||
#####################################################################################################
|
||||
|
||||
import StringIO
|
||||
|
||||
def test_sanityCheck():
|
||||
dictionary1 = StringIO.StringIO()
|
||||
dictionary1.write("hello\nmy\nworld\nof\nnothing\n")
|
||||
|
||||
dictionary2 = StringIO.StringIO()
|
||||
dictionary2.write("let\nme\nbe\nclear\nabout\nit\n")
|
||||
|
||||
input = StringIO.StringIO()
|
||||
input.write("hello my\tclear about\nworld of\tit let clear\n")
|
||||
def test_simpleSanityCheck():
|
||||
dictionary1 = StringIO.StringIO("hello\nmy\nworld\nof\nnothing\n")
|
||||
dictionary2 = StringIO.StringIO("let\nme\nbe\nclear\nabout\nit\n")
|
||||
|
||||
input = StringIO.StringIO("hello my\tclear about\nworld of\tit let clear\n")
|
||||
|
||||
output = StringIO.StringIO()
|
||||
converter = Txt2CftConverter([dictionary1, dictionary2], [input], output, "\t", False)
|
||||
|
||||
converter = Txt2CftConverter([dictionary1, dictionary2], [input], output, False)
|
||||
converter.convert()
|
||||
|
||||
expectedOutput = StringIO.StringIO()
|
||||
expectedOutput.write("0\t|S0 0:1\t|S1 3:1\n")
|
||||
expectedOutput.write("0\t|S0 1:1\t|S1 4:1\n")
|
||||
expectedOutput.write("1\t|S0 2:1\t|S1 5:1\n")
|
||||
expectedOutput.write("1\t|S0 3:1\t|S1 0:1\n")
|
||||
expectedOutput.write("1\t\t|S1 3:1")
|
||||
|
||||
assert expectedOutput.content() == output.content()
|
||||
expectedOutput.write("1\t\t|S1 3:1\n")
|
||||
|
||||
assert expectedOutput.getvalue() == output.getvalue()
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
#!/bin/bash
|
||||
|
||||
. $TEST_ROOT_DIR/run-test-common
|
||||
|
||||
SCRIPTSDIR=$TEST_ROOT_DIR/../../Scripts
|
||||
|
||||
pushd $SCRIPTSDIR
|
||||
|
|
|
@ -9,5 +9,4 @@ tags:
|
|||
testCases:
|
||||
PyTest run must finish with error code 0 (outputs __COMPLETED__ in that case):
|
||||
patterns:
|
||||
- __COMPLETED__
|
||||
|
||||
- __COMPLETED__
|
Загрузка…
Ссылка в новой задаче