Adding test cases

2016-06-10 15:26:13 +02:00 · 2016-06-10 15:26:13 +02:00 · 43759a432a
--- a/CNTK.sln
+++ b/CNTK.sln
@ -1122,6 +1122,12 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CommandEval", "Tests\UnitTe
 		{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
 	EndProjectSection
 EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Scripts", "Scripts", "{68263A2F-1D5F-4C46-B5AF-2304B80FC3D4}"
+	ProjectSection(SolutionItems) = preProject
+		Scripts\pytest.ini = Scripts\pytest.ini
+		Scripts\txt2ctf.py = Scripts\txt2ctf.py
+	EndProjectSection
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug_CpuOnly|x64 = Debug_CpuOnly|x64
--- a/Scripts/txt2ctf.py
+++ b/Scripts/txt2ctf.py
@ -22,22 +22,21 @@ class Txt2CftConverter:
       Each token for a stream should be inside the corresponding dictionary file, a token per line, so the line number of the token becomes
       the numeric index written into the cntk text format output file"""

-    def __init__(self, dictionaries, inputs, output, streamSeparator, comment):
+    def __init__(self, dictionaries, inputs, output, comment):
        self.dictionaries = dictionaries
        self.inputs = inputs
-        self.streamSeparator = streamSeparator
        self.output = output
        self.comment = comment

    def convert(self):
-        dictionaries = self._createDictionaries()
-        self._convertInputs(dictionaries)
-
-    def _createDictionaries(self):
        dictionaries = []
        for dic in self.dictionaries:
            dictionaries.append(self._createDictionary(dic))
-        return dictionaries
+
+        if len(self.inputs) == 0:
+            return self._convertInput(dictionaries, sys.stdin)
+        for input in self.inputs:
+            self._convertInput(dictionaries, input)

    def _createDictionary(self, dictionary):
        result = {}
@ -48,12 +47,6 @@ class Txt2CftConverter:
            counter += 1
        return result

-    def _convertInputs(self, dictionaries):
-        if len(self.inputs) == 0:
-            return self._convertInput(dictionaries, sys.stdin)
-        for input in self.inputs:
-            self._convertInput(dictionaries, input)
-
    def _convertInput(self, dictionaries, input):
        sequenceId = 0
        for line in input:
@ -76,67 +69,66 @@ class Txt2CftConverter:

        # writing to the output file
        for sampleIndex in range(maxLen):
-            self.output.write(str(sequenceId) + "\t")
+            self.output.write(str(sequenceId))
            for streamIndex in range(len(tokenizedStreams)):
                if len(tokenizedStreams[streamIndex]) <= sampleIndex:
-                    self.output.write(self.streamSeparator)
+                    self.output.write("\t")
                    continue
                token = tokenizedStreams[streamIndex][sampleIndex]
                value = dictionaries[streamIndex][token]
-                self.output.write(self.streamSeparator)
-                self.output.write("|S" + str(streamIndex) + " "+ str(value) + ":1")
+                self.output.write("\t|S" + str(streamIndex) + " "+ str(value) + ":1")
                if self.comment:
-                    self.output.write("|# " + token)
+                    self.output.write(" |# " + token)
            self.output.write("\n")

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Transforms text file given  dictionaries into cntk text format.")
    parser.add_argument('--map', help='List of dictionaries,given in the same order as streams in the input files', required=True)
-    parser.add_argument('--sep', help='Stream separator, default TAB', default="\t", required=False)
    parser.add_argument('--comment', help='Whether to annotate indexes with tokens. Default is false', choices=["True", "False"], default="False", required=False)
    parser.add_argument('--output', help='Name of the output file, stdout if not given', default="", required=False)
    parser.add_argument('--input', help='Name of the inputs files, stdin if not given', default="", required=False)
    args = parser.parse_args()

    # creating dictionaries
-    dictionaryFiles = "".join(str(x) for x in args.map).split(",")    
-    dictionaries = open(d) for d in dictionaryFiles
+    dictionaryFiles = "".join(str(x) for x in args.map).split(",")
+    dictionaries = [open(d) for d in dictionaryFiles]
    
    # creating inputs
    inputs = [sys.stdin]
    if args.input != "":
        inputFiles = "".join(str(x) for x in args.input).split(",")
-        inputs = open(i) for i in inputFiles
+        inputs = [open(i) for i in inputFiles]

    # creating outputs
    output = sys.stdout
    if args.output != "":
        output = open(args.output, "w")

-    converter = Txt2CftConverter(dictionaries, inputs, output, args.sep, args.comment == "True")
+    converter = Txt2CftConverter(dictionaries, inputs, output, args.comment == "True")
    converter.convert()

-# Test
+
+#####################################################################################################
+# Tests
+#####################################################################################################
+
 import StringIO

-def test_sanityCheck():
-    dictionary1 = StringIO.StringIO()
-    dictionary1.write("hello\nmy\nworld\nof\nnothing\n")
-    
-    dictionary2 = StringIO.StringIO()
-    dictionary2.write("let\nme\nbe\nclear\nabout\nit\n")
-    
-    input = StringIO.StringIO()
-    input.write("hello my\tclear about\nworld of\tit let clear\n")
+def test_simpleSanityCheck():
+    dictionary1 = StringIO.StringIO("hello\nmy\nworld\nof\nnothing\n")
+    dictionary2 = StringIO.StringIO("let\nme\nbe\nclear\nabout\nit\n")
+
+    input = StringIO.StringIO("hello my\tclear about\nworld of\tit let clear\n")

    output = StringIO.StringIO()
-    converter = Txt2CftConverter([dictionary1, dictionary2], [input], output, "\t", False)
-    
+    converter = Txt2CftConverter([dictionary1, dictionary2], [input], output, False)
+    converter.convert()
+
    expectedOutput = StringIO.StringIO()
    expectedOutput.write("0\t|S0 0:1\t|S1 3:1\n")
    expectedOutput.write("0\t|S0 1:1\t|S1 4:1\n")
    expectedOutput.write("1\t|S0 2:1\t|S1 5:1\n")
    expectedOutput.write("1\t|S0 3:1\t|S1 0:1\n")
-    expectedOutput.write("1\t\t|S1 3:1")
-    
-    assert expectedOutput.content() == output.content()
+    expectedOutput.write("1\t\t|S1 3:1\n")
+
+    assert expectedOutput.getvalue() == output.getvalue()
--- a/Tests/EndToEndTests/ScriptTest/run-test
+++ b/Tests/EndToEndTests/ScriptTest/run-test
@ -1,7 +1,5 @@
 #!/bin/bash

-. $TEST_ROOT_DIR/run-test-common
-
 SCRIPTSDIR=$TEST_ROOT_DIR/../../Scripts

 pushd $SCRIPTSDIR
--- a/Tests/EndToEndTests/ScriptTest/testcases.yml
+++ b/Tests/EndToEndTests/ScriptTest/testcases.yml
@ -9,5 +9,4 @@ tags:
 testCases:
  PyTest run must finish with error code 0 (outputs __COMPLETED__ in that case):
    patterns:
-      - __COMPLETED__
-
+      - __COMPLETED__