ComputeMovieGenre allows any number of genres (#124)

2019-03-09 09:48:27 +00:00 · 2019-03-09 09:48:27 +00:00 · 702874f9aa
--- a/src/Csoft/EmbeddedResourceSourceProvider.cs
+++ b/src/Csoft/EmbeddedResourceSourceProvider.cs
@ -20,9 +20,9 @@ namespace Microsoft.ML.Probabilistic.Compiler
            var asm = t.Assembly;
            return asm.GetManifestResourceNames().Where(s => s.EndsWith(".cs")).Select(s =>
            {
-                using (var stream = asm.GetManifestResourceStream(s))
+                var stream = asm.GetManifestResourceStream(s);
+                using (var reader = new StreamReader(stream))
                {
-                    var reader = new StreamReader(stream);
                    return new SourceCode(s, reader.ReadToEnd());
                }
            }).FirstOrDefault(code =>
--- a/src/Examples/ImageClassifier/Form1.cs
+++ b/src/Examples/ImageClassifier/Form1.cs
@ -46,8 +46,8 @@ namespace ImageClassifier

        public void ReadImages()
        {
-            List<string> filenames = ReadLines(folder + "Images.txt");
-            int n = filenames.Count;
+            string[] filenames = File.ReadAllLines(folder + "Images.txt");
+            int n = filenames.Length;

            probLabels = new Label[n];
            int xpos = 5;
@ -131,26 +131,10 @@ namespace ImageClassifier
            }
        }

-        public static List<string> ReadLines(string path)
-        {
-            List<string> result = new List<string>();
-            StreamReader reader = new StreamReader(path);
-            while (true)
-            {
-                string s = reader.ReadLine();
-                if (s == null) break;
-                result.Add(s);
-            }
-
-            reader.Close();
-            return result;
-        }
-
        public static List<Vector> ReadVectors(string path)
        {
            List<Vector> result = new List<Vector>();
-            List<string> lines = ReadLines(path);
-            foreach (string line in lines)
+            foreach (string line in File.ReadLines(path))
            {
                string[] entries = line.Split(',');
                Vector v = Vector.Zero(entries.Length - 1);
--- a/src/Examples/ImageClassifier/ImageFeatures.cs
+++ b/src/Examples/ImageClassifier/ImageFeatures.cs
@ -7,6 +7,7 @@ using System.IO;
 using System.Drawing;
 using Microsoft.ML.Probabilistic.Math;
 using Microsoft.ML.Probabilistic.Utilities;
+using System.Linq;

 namespace ImageClassifier
 {
@ -15,7 +16,7 @@ namespace ImageClassifier
        public void ComputeImageFeatures()
        {
            string folder = @"..\..\Images\";
-            List<string> filenames = ReadLines(folder + "Images.txt");
+            string[] filenames = File.ReadAllLines(folder + "Images.txt");
            Dictionary<string, Vector> labels = ReadLabels(folder + "Labels.txt");
            Dictionary<string, Vector> features = new Dictionary<string, Vector>();
            foreach (string filename in filenames)
@ -40,24 +41,20 @@ namespace ImageClassifier
        private Dictionary<string, Vector> ReadLabels(string path)
        {
            Dictionary<string, int> keywords = new Dictionary<string, int>();
-            List<string[]> lines = new List<string[]>();
-            StreamReader reader = new StreamReader(path);
-            while (true)
+            List<string[]> lines = File.ReadLines(path).Select(s =>
            {
-                string s = reader.ReadLine();
-                if (s == null) break;
                string[] items = s.Split(',');
-                if (items.Length == 0) continue;
-                for (int i = 1; i < items.Length; i++)
+                if (items.Length > 0)
                {
-                    items[i] = items[i].Trim();
-                    if (!keywords.ContainsKey(items[i])) keywords[items[i]] = keywords.Count;
+                    for (int i = 1; i < items.Length; i++)
+                    {
+                        items[i] = items[i].Trim();
+                        if (!keywords.ContainsKey(items[i])) keywords[items[i]] = keywords.Count;
+                    }
                }
+                return items;
+            }).ToList();

-                lines.Add(items);
-            }
-
-            reader.Close();
            Dictionary<string, Vector> labels = new Dictionary<string, Vector>();
            foreach (string[] items in lines)
            {
@ -229,20 +226,5 @@ namespace ImageClassifier

            return max;
        }
-
-        private List<string> ReadLines(string path)
-        {
-            List<string> result = new List<string>();
-            StreamReader reader = new StreamReader(path);
-            while (true)
-            {
-                string s = reader.ReadLine();
-                if (s == null) break;
-                result.Add(s);
-            }
-
-            reader.Close();
-            return result;
-        }
    }
 }
--- a/src/Examples/ImageClassifier/ItemsModel.cs
+++ b/src/Examples/ImageClassifier/ItemsModel.cs
@ -85,12 +85,11 @@ namespace ImageClassifier
            foreach (Item item in Items) item.Reset();
        }

-        public void PopulateFromStringsAndVectors(List<string> filenames, List<Vector> data)
+        public void PopulateFromStringsAndVectors(IReadOnlyList<string> filenames, IReadOnlyList<Vector> data)
        {
-            int ct = 0;
-            foreach (string s in filenames)
+            for (int i = 0; i < filenames.Count; i++)
            {
-                items.Add(new Item(form1.folder + s, data[ct++]));
+                items.Add(new Item(form1.folder + filenames[i], data[i]));
            }
        }
    }
--- a/src/Examples/ImageClassifier/Program.cs
+++ b/src/Examples/ImageClassifier/Program.cs
@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 using System;
+using System.IO;
 using System.Windows.Forms;

 namespace ImageClassifier
@ -22,7 +23,7 @@ namespace ImageClassifier
            Application.EnableVisualStyles();
            Application.SetCompatibleTextRenderingDefault(false);
            ItemsModel model = new ItemsModel();
-            model.PopulateFromStringsAndVectors(Form1.ReadLines(model.form1.folder + "Images.txt"), model.form1.data);
+            model.PopulateFromStringsAndVectors(File.ReadAllLines(model.form1.folder + "Images.txt"), model.form1.data);
            ClassifierView cv = new ClassifierView();
            cv.DataContext = model;
            cv.ShowInForm("Image Classifer using Infer.NET");
--- a/src/Examples/LDA/Utilities.cs
+++ b/src/Examples/LDA/Utilities.cs
@ -7,6 +7,7 @@ using System.Text;
 using Microsoft.ML.Probabilistic.Distributions;
 using Microsoft.ML.Probabilistic.Math;
 using System.IO;
+using System.Linq;

 namespace LDAExample
 {
@ -167,25 +168,17 @@ namespace LDAExample
        /// <returns></returns>
        public static Dictionary<int, int>[] LoadWordCounts(string fileName)
        {
-            List<Dictionary<int, int>> ld = new List<Dictionary<int, int>>();
-            using (StreamReader sr = new StreamReader(fileName))
+            return File.ReadLines(fileName).Select(str =>
            {
-                string str = null;
-                while ((str = sr.ReadLine()) != null)
+                string[] split = str.Split(' ', ':');
+                int numUniqueTerms = int.Parse(split[0]);
+                var dict = new Dictionary<int, int>();
+                for (int i = 0; i < (split.Length - 1) / 2; i++)
                {
-                    string[] split = str.Split(' ', ':');
-                    int numUniqueTerms = int.Parse(split[0]);
-                    var dict = new Dictionary<int, int>();
-                    for (int i = 0; i < (split.Length - 1) / 2; i++)
-                    {
-                        dict.Add(int.Parse(split[2 * i + 1]), int.Parse(split[2 * i + 2]));
-                    }
-
-                    ld.Add(dict);
+                    dict.Add(int.Parse(split[2 * i + 1]), int.Parse(split[2 * i + 2]));
                }
-            }
-
-            return ld.ToArray();
+                return dict;
+            }).ToArray();
        }

        /// <summary>
@ -195,18 +188,7 @@ namespace LDAExample
        /// <returns></returns>
        public static Dictionary<int, string> LoadVocabulary(string fileName)
        {
-            Dictionary<int, string> vocab = new Dictionary<int, string>();
-            using (StreamReader sr = new StreamReader(fileName))
-            {
-                string str = null;
-                int idx = 0;
-                while ((str = sr.ReadLine()) != null)
-                {
-                    vocab.Add(idx++, str);
-                }
-            }
-
-            return vocab;
+            return File.ReadLines(fileName).Select((str, idx) => Tuple.Create(str, idx)).ToDictionary(tup => tup.Item2, tup => tup.Item1);
        }

        /// <summary>
--- a/src/Learners/Runners/Evaluator/DatasetGenerators/MovieLens/Features.cs
+++ b/src/Learners/Runners/Evaluator/DatasetGenerators/MovieLens/Features.cs
@ -239,11 +239,7 @@ namespace Microsoft.ML.Probabilistic.Learners.Runners.MovieLens
        public static Tuple<int, string> ComputeMovieGenre(int offset, string feature)
        {
            string[] genres = feature.Split('|');
-            if (genres.Length < 1 || genres.Length > 3)
-            {
-                throw new ArgumentOutOfRangeException(nameof(feature), feature, "Movies should have between 1 and 3 genres.");
-            }
-
+            // genres.Length will always be at least 1
            double value = 1.0 / genres.Length;

            var result = new StringBuilder(string.Format("{0}:{1}", offset + MovieGenreBuckets[genres[0]], value));
--- a/src/Learners/Runners/Evaluator/DatasetGenerators/MovieLens/MovieLensConverter.cs
+++ b/src/Learners/Runners/Evaluator/DatasetGenerators/MovieLens/MovieLensConverter.cs
@ -59,19 +59,14 @@ namespace Microsoft.ML.Probabilistic.Learners.Runners.MovieLens
            try
            {
                using (TextWriter writer = new StreamWriter(outputFileName))
+                using (TextReader moviesReader = new StreamReader(moviesInfoFileName))
+                using (TextReader usersReader = new StreamReader(usersInfoFileName))
+                using (TextReader ratingReader = new StreamReader(ratingFileName))
                {
-                    TextReader moviesReader = new StreamReader(moviesInfoFileName);
-                    TextReader usersReader = new StreamReader(usersInfoFileName);
-                    TextReader ratingReader = new StreamReader(ratingFileName);
-
                    writer.WriteLine("R,1,5");
                    ConvertRating(ratingReader, writer);
                    GenerateEntityFeatures(usersReader, writer, FeatureProcessor.ProcessUserFeatures);
                    GenerateEntityFeatures(moviesReader, writer, FeatureProcessor.ProcessItemFeatures);
-
-                    moviesReader.Close();
-                    usersReader.Close();
-                    ratingReader.Close();
                }
            }
            catch
--- a/src/Tutorials/ClickModel.cs
+++ b/src/Tutorials/ClickModel.cs
@ -400,7 +400,6 @@ namespace Microsoft.ML.Probabilistic.Tutorials
            exams = null;
            int totalDocs = 0;
            string myStr;
-            StreamReader mySR;
            char[] sep = { '\t', ',' };

            for (int pass = 0; pass < 2; pass++)
@ -413,30 +412,30 @@ namespace Microsoft.ML.Probabilistic.Tutorials
                    totalDocs = 0;
                }

-                mySR = new StreamReader(ifn);
-                mySR.ReadLine(); // Skip over header line
-                while ((myStr = mySR.ReadLine()) != null)
+                using (var mySR = new StreamReader(ifn))
                {
-                    string[] mySplitStr = myStr.Split(sep);
-                    int exm = int.Parse(mySplitStr[2]);
-
-                    // Only include data with non-zero examinations
-                    if (0 != exm || allowNoExams)
+                    mySR.ReadLine(); // Skip over header line
+                    while ((myStr = mySR.ReadLine()) != null)
                    {
-                        if (1 == pass)
-                        {
-                            int lab = int.Parse(mySplitStr[0]);
-                            int clk = int.Parse(mySplitStr[1]);
-                            labels[totalDocs] = lab;
-                            clicks[totalDocs] = clk;
-                            exams[totalDocs] = exm;
-                        }
+                        string[] mySplitStr = myStr.Split(sep);
+                        int exm = int.Parse(mySplitStr[2]);

-                        totalDocs++;
+                        // Only include data with non-zero examinations
+                        if (0 != exm || allowNoExams)
+                        {
+                            if (1 == pass)
+                            {
+                                int lab = int.Parse(mySplitStr[0]);
+                                int clk = int.Parse(mySplitStr[1]);
+                                labels[totalDocs] = lab;
+                                clicks[totalDocs] = clk;
+                                exams[totalDocs] = exm;
+                            }
+
+                            totalDocs++;
+                        }
                    }
                }
-
-                mySR.Close();
            }
        }

--- a/test/Tests/BioTests.cs
+++ b/test/Tests/BioTests.cs
@ -352,7 +352,6 @@ namespace Microsoft.ML.Probabilistic.Tests
            Nrows = 1;
            Ncols = 0;
            string myStr;
-            StreamReader mySR;
            char[] sep = {'\t', ','};
            for (int pass = 0; pass < 2; pass++)
            {
@ -361,28 +360,29 @@ namespace Microsoft.ML.Probabilistic.Tests
                    M = new double[Nrows,Ncols];
                    Nrows = 0;
                }
-                mySR = new StreamReader(ifn);
-                mySR.ReadLine(); // Skip over header line
-                //overwrite number seperator to use US-style(. for decimal separation)
-                NumberFormatInfo nfi = new CultureInfo("en-US", false).NumberFormat;
-                while ((myStr = mySR.ReadLine()) != null)
+                using (var mySR = new StreamReader(ifn))
                {
-                    string[] mySplitStr = myStr.Split(sep);
-                    Ncols = mySplitStr.Length;
-                    if (1 == pass)
+                    mySR.ReadLine(); // Skip over header line
+                    //overwrite number seperator to use US-style(. for decimal separation)
+                    NumberFormatInfo nfi = new CultureInfo("en-US", false).NumberFormat;
+                    while ((myStr = mySR.ReadLine()) != null)
                    {
-                        //parse the doubles into the array.
-                        for (int i = 0; i < Ncols; i++)
+                        string[] mySplitStr = myStr.Split(sep);
+                        Ncols = mySplitStr.Length;
+                        if (1 == pass)
                        {
-                            string str = mySplitStr[i];
-                            M[Nrows, i] = Double.Parse(str, nfi);
+                            //parse the doubles into the array.
+                            for (int i = 0; i < Ncols; i++)
+                            {
+                                string str = mySplitStr[i];
+                                M[Nrows, i] = Double.Parse(str, nfi);
+                            }
                        }
+
+
+                        Nrows++;
                    }
-
-
-                    Nrows++;
                }
-                mySR.Close();
            }
            return M;
        }
--- a/test/Tests/ClickTest.cs
+++ b/test/Tests/ClickTest.cs
@ -227,9 +227,7 @@ namespace Microsoft.ML.Probabilistic.Tests
            List<int> examList = new List<int>();
            char[] sep = {'\t'};

-            StreamReader reader = new StreamReader(filename);
-            string line;
-            while ((line = reader.ReadLine()) != null)
+            foreach (string line in File.ReadLines(filename))
            {
                if (maxDocs >= 0 && labelList.Count == maxDocs) break;
                string[] split = line.Split(sep);
@ -237,7 +235,6 @@ namespace Microsoft.ML.Probabilistic.Tests
                clickList.Add(int.Parse(split[1]));
                examList.Add(int.Parse(split[2]));
            }
-            reader.Close();
            labels = labelList.ToArray();
            clicks = clickList.ToArray();
            exams = examList.ToArray();