ComputeMovieGenre allows any number of genres (#124)

This commit is contained in:
Tom Minka 2019-03-09 09:48:27 +00:00 коммит произвёл GitHub
Родитель 7ec8af439b
Коммит 702874f9aa
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
11 изменённых файлов: 72 добавлений и 137 удалений

Просмотреть файл

@ -20,9 +20,9 @@ namespace Microsoft.ML.Probabilistic.Compiler
var asm = t.Assembly;
return asm.GetManifestResourceNames().Where(s => s.EndsWith(".cs")).Select(s =>
{
using (var stream = asm.GetManifestResourceStream(s))
var stream = asm.GetManifestResourceStream(s);
using (var reader = new StreamReader(stream))
{
var reader = new StreamReader(stream);
return new SourceCode(s, reader.ReadToEnd());
}
}).FirstOrDefault(code =>

Просмотреть файл

@ -46,8 +46,8 @@ namespace ImageClassifier
public void ReadImages()
{
List<string> filenames = ReadLines(folder + "Images.txt");
int n = filenames.Count;
string[] filenames = File.ReadAllLines(folder + "Images.txt");
int n = filenames.Length;
probLabels = new Label[n];
int xpos = 5;
@ -131,26 +131,10 @@ namespace ImageClassifier
}
}
public static List<string> ReadLines(string path)
{
List<string> result = new List<string>();
StreamReader reader = new StreamReader(path);
while (true)
{
string s = reader.ReadLine();
if (s == null) break;
result.Add(s);
}
reader.Close();
return result;
}
public static List<Vector> ReadVectors(string path)
{
List<Vector> result = new List<Vector>();
List<string> lines = ReadLines(path);
foreach (string line in lines)
foreach (string line in File.ReadLines(path))
{
string[] entries = line.Split(',');
Vector v = Vector.Zero(entries.Length - 1);

Просмотреть файл

@ -7,6 +7,7 @@ using System.IO;
using System.Drawing;
using Microsoft.ML.Probabilistic.Math;
using Microsoft.ML.Probabilistic.Utilities;
using System.Linq;
namespace ImageClassifier
{
@ -15,7 +16,7 @@ namespace ImageClassifier
public void ComputeImageFeatures()
{
string folder = @"..\..\Images\";
List<string> filenames = ReadLines(folder + "Images.txt");
string[] filenames = File.ReadAllLines(folder + "Images.txt");
Dictionary<string, Vector> labels = ReadLabels(folder + "Labels.txt");
Dictionary<string, Vector> features = new Dictionary<string, Vector>();
foreach (string filename in filenames)
@ -40,24 +41,20 @@ namespace ImageClassifier
private Dictionary<string, Vector> ReadLabels(string path)
{
Dictionary<string, int> keywords = new Dictionary<string, int>();
List<string[]> lines = new List<string[]>();
StreamReader reader = new StreamReader(path);
while (true)
List<string[]> lines = File.ReadLines(path).Select(s =>
{
string s = reader.ReadLine();
if (s == null) break;
string[] items = s.Split(',');
if (items.Length == 0) continue;
for (int i = 1; i < items.Length; i++)
if (items.Length > 0)
{
items[i] = items[i].Trim();
if (!keywords.ContainsKey(items[i])) keywords[items[i]] = keywords.Count;
for (int i = 1; i < items.Length; i++)
{
items[i] = items[i].Trim();
if (!keywords.ContainsKey(items[i])) keywords[items[i]] = keywords.Count;
}
}
return items;
}).ToList();
lines.Add(items);
}
reader.Close();
Dictionary<string, Vector> labels = new Dictionary<string, Vector>();
foreach (string[] items in lines)
{
@ -229,20 +226,5 @@ namespace ImageClassifier
return max;
}
private List<string> ReadLines(string path)
{
List<string> result = new List<string>();
StreamReader reader = new StreamReader(path);
while (true)
{
string s = reader.ReadLine();
if (s == null) break;
result.Add(s);
}
reader.Close();
return result;
}
}
}

Просмотреть файл

@ -85,12 +85,11 @@ namespace ImageClassifier
foreach (Item item in Items) item.Reset();
}
public void PopulateFromStringsAndVectors(List<string> filenames, List<Vector> data)
public void PopulateFromStringsAndVectors(IReadOnlyList<string> filenames, IReadOnlyList<Vector> data)
{
int ct = 0;
foreach (string s in filenames)
for (int i = 0; i < filenames.Count; i++)
{
items.Add(new Item(form1.folder + s, data[ct++]));
items.Add(new Item(form1.folder + filenames[i], data[i]));
}
}
}

Просмотреть файл

@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.IO;
using System.Windows.Forms;
namespace ImageClassifier
@ -22,7 +23,7 @@ namespace ImageClassifier
Application.EnableVisualStyles();
Application.SetCompatibleTextRenderingDefault(false);
ItemsModel model = new ItemsModel();
model.PopulateFromStringsAndVectors(Form1.ReadLines(model.form1.folder + "Images.txt"), model.form1.data);
model.PopulateFromStringsAndVectors(File.ReadAllLines(model.form1.folder + "Images.txt"), model.form1.data);
ClassifierView cv = new ClassifierView();
cv.DataContext = model;
cv.ShowInForm("Image Classifer using Infer.NET");

Просмотреть файл

@ -7,6 +7,7 @@ using System.Text;
using Microsoft.ML.Probabilistic.Distributions;
using Microsoft.ML.Probabilistic.Math;
using System.IO;
using System.Linq;
namespace LDAExample
{
@ -167,25 +168,17 @@ namespace LDAExample
/// <returns></returns>
public static Dictionary<int, int>[] LoadWordCounts(string fileName)
{
List<Dictionary<int, int>> ld = new List<Dictionary<int, int>>();
using (StreamReader sr = new StreamReader(fileName))
return File.ReadLines(fileName).Select(str =>
{
string str = null;
while ((str = sr.ReadLine()) != null)
string[] split = str.Split(' ', ':');
int numUniqueTerms = int.Parse(split[0]);
var dict = new Dictionary<int, int>();
for (int i = 0; i < (split.Length - 1) / 2; i++)
{
string[] split = str.Split(' ', ':');
int numUniqueTerms = int.Parse(split[0]);
var dict = new Dictionary<int, int>();
for (int i = 0; i < (split.Length - 1) / 2; i++)
{
dict.Add(int.Parse(split[2 * i + 1]), int.Parse(split[2 * i + 2]));
}
ld.Add(dict);
dict.Add(int.Parse(split[2 * i + 1]), int.Parse(split[2 * i + 2]));
}
}
return ld.ToArray();
return dict;
}).ToArray();
}
/// <summary>
@ -195,18 +188,7 @@ namespace LDAExample
/// <returns></returns>
public static Dictionary<int, string> LoadVocabulary(string fileName)
{
Dictionary<int, string> vocab = new Dictionary<int, string>();
using (StreamReader sr = new StreamReader(fileName))
{
string str = null;
int idx = 0;
while ((str = sr.ReadLine()) != null)
{
vocab.Add(idx++, str);
}
}
return vocab;
return File.ReadLines(fileName).Select((str, idx) => Tuple.Create(str, idx)).ToDictionary(tup => tup.Item2, tup => tup.Item1);
}
/// <summary>

Просмотреть файл

@ -239,11 +239,7 @@ namespace Microsoft.ML.Probabilistic.Learners.Runners.MovieLens
public static Tuple<int, string> ComputeMovieGenre(int offset, string feature)
{
string[] genres = feature.Split('|');
if (genres.Length < 1 || genres.Length > 3)
{
throw new ArgumentOutOfRangeException(nameof(feature), feature, "Movies should have between 1 and 3 genres.");
}
// genres.Length will always be at least 1
double value = 1.0 / genres.Length;
var result = new StringBuilder(string.Format("{0}:{1}", offset + MovieGenreBuckets[genres[0]], value));

Просмотреть файл

@ -59,19 +59,14 @@ namespace Microsoft.ML.Probabilistic.Learners.Runners.MovieLens
try
{
using (TextWriter writer = new StreamWriter(outputFileName))
using (TextReader moviesReader = new StreamReader(moviesInfoFileName))
using (TextReader usersReader = new StreamReader(usersInfoFileName))
using (TextReader ratingReader = new StreamReader(ratingFileName))
{
TextReader moviesReader = new StreamReader(moviesInfoFileName);
TextReader usersReader = new StreamReader(usersInfoFileName);
TextReader ratingReader = new StreamReader(ratingFileName);
writer.WriteLine("R,1,5");
ConvertRating(ratingReader, writer);
GenerateEntityFeatures(usersReader, writer, FeatureProcessor.ProcessUserFeatures);
GenerateEntityFeatures(moviesReader, writer, FeatureProcessor.ProcessItemFeatures);
moviesReader.Close();
usersReader.Close();
ratingReader.Close();
}
}
catch

Просмотреть файл

@ -400,7 +400,6 @@ namespace Microsoft.ML.Probabilistic.Tutorials
exams = null;
int totalDocs = 0;
string myStr;
StreamReader mySR;
char[] sep = { '\t', ',' };
for (int pass = 0; pass < 2; pass++)
@ -413,30 +412,30 @@ namespace Microsoft.ML.Probabilistic.Tutorials
totalDocs = 0;
}
mySR = new StreamReader(ifn);
mySR.ReadLine(); // Skip over header line
while ((myStr = mySR.ReadLine()) != null)
using (var mySR = new StreamReader(ifn))
{
string[] mySplitStr = myStr.Split(sep);
int exm = int.Parse(mySplitStr[2]);
// Only include data with non-zero examinations
if (0 != exm || allowNoExams)
mySR.ReadLine(); // Skip over header line
while ((myStr = mySR.ReadLine()) != null)
{
if (1 == pass)
{
int lab = int.Parse(mySplitStr[0]);
int clk = int.Parse(mySplitStr[1]);
labels[totalDocs] = lab;
clicks[totalDocs] = clk;
exams[totalDocs] = exm;
}
string[] mySplitStr = myStr.Split(sep);
int exm = int.Parse(mySplitStr[2]);
totalDocs++;
// Only include data with non-zero examinations
if (0 != exm || allowNoExams)
{
if (1 == pass)
{
int lab = int.Parse(mySplitStr[0]);
int clk = int.Parse(mySplitStr[1]);
labels[totalDocs] = lab;
clicks[totalDocs] = clk;
exams[totalDocs] = exm;
}
totalDocs++;
}
}
}
mySR.Close();
}
}

Просмотреть файл

@ -352,7 +352,6 @@ namespace Microsoft.ML.Probabilistic.Tests
Nrows = 1;
Ncols = 0;
string myStr;
StreamReader mySR;
char[] sep = {'\t', ','};
for (int pass = 0; pass < 2; pass++)
{
@ -361,28 +360,29 @@ namespace Microsoft.ML.Probabilistic.Tests
M = new double[Nrows,Ncols];
Nrows = 0;
}
mySR = new StreamReader(ifn);
mySR.ReadLine(); // Skip over header line
//overwrite number seperator to use US-style(. for decimal separation)
NumberFormatInfo nfi = new CultureInfo("en-US", false).NumberFormat;
while ((myStr = mySR.ReadLine()) != null)
using (var mySR = new StreamReader(ifn))
{
string[] mySplitStr = myStr.Split(sep);
Ncols = mySplitStr.Length;
if (1 == pass)
mySR.ReadLine(); // Skip over header line
//overwrite number seperator to use US-style(. for decimal separation)
NumberFormatInfo nfi = new CultureInfo("en-US", false).NumberFormat;
while ((myStr = mySR.ReadLine()) != null)
{
//parse the doubles into the array.
for (int i = 0; i < Ncols; i++)
string[] mySplitStr = myStr.Split(sep);
Ncols = mySplitStr.Length;
if (1 == pass)
{
string str = mySplitStr[i];
M[Nrows, i] = Double.Parse(str, nfi);
//parse the doubles into the array.
for (int i = 0; i < Ncols; i++)
{
string str = mySplitStr[i];
M[Nrows, i] = Double.Parse(str, nfi);
}
}
Nrows++;
}
Nrows++;
}
mySR.Close();
}
return M;
}

Просмотреть файл

@ -227,9 +227,7 @@ namespace Microsoft.ML.Probabilistic.Tests
List<int> examList = new List<int>();
char[] sep = {'\t'};
StreamReader reader = new StreamReader(filename);
string line;
while ((line = reader.ReadLine()) != null)
foreach (string line in File.ReadLines(filename))
{
if (maxDocs >= 0 && labelList.Count == maxDocs) break;
string[] split = line.Split(sep);
@ -237,7 +235,6 @@ namespace Microsoft.ML.Probabilistic.Tests
clickList.Add(int.Parse(split[1]));
examList.Add(int.Parse(split[2]));
}
reader.Close();
labels = labelList.ToArray();
clicks = clickList.ToArray();
exams = examList.ToArray();