зеркало из https://github.com/dotnet/infer.git
ComputeMovieGenre allows any number of genres (#124)
This commit is contained in:
Родитель
7ec8af439b
Коммит
702874f9aa
|
@ -20,9 +20,9 @@ namespace Microsoft.ML.Probabilistic.Compiler
|
|||
var asm = t.Assembly;
|
||||
return asm.GetManifestResourceNames().Where(s => s.EndsWith(".cs")).Select(s =>
|
||||
{
|
||||
using (var stream = asm.GetManifestResourceStream(s))
|
||||
var stream = asm.GetManifestResourceStream(s);
|
||||
using (var reader = new StreamReader(stream))
|
||||
{
|
||||
var reader = new StreamReader(stream);
|
||||
return new SourceCode(s, reader.ReadToEnd());
|
||||
}
|
||||
}).FirstOrDefault(code =>
|
||||
|
|
|
@ -46,8 +46,8 @@ namespace ImageClassifier
|
|||
|
||||
public void ReadImages()
|
||||
{
|
||||
List<string> filenames = ReadLines(folder + "Images.txt");
|
||||
int n = filenames.Count;
|
||||
string[] filenames = File.ReadAllLines(folder + "Images.txt");
|
||||
int n = filenames.Length;
|
||||
|
||||
probLabels = new Label[n];
|
||||
int xpos = 5;
|
||||
|
@ -131,26 +131,10 @@ namespace ImageClassifier
|
|||
}
|
||||
}
|
||||
|
||||
public static List<string> ReadLines(string path)
|
||||
{
|
||||
List<string> result = new List<string>();
|
||||
StreamReader reader = new StreamReader(path);
|
||||
while (true)
|
||||
{
|
||||
string s = reader.ReadLine();
|
||||
if (s == null) break;
|
||||
result.Add(s);
|
||||
}
|
||||
|
||||
reader.Close();
|
||||
return result;
|
||||
}
|
||||
|
||||
public static List<Vector> ReadVectors(string path)
|
||||
{
|
||||
List<Vector> result = new List<Vector>();
|
||||
List<string> lines = ReadLines(path);
|
||||
foreach (string line in lines)
|
||||
foreach (string line in File.ReadLines(path))
|
||||
{
|
||||
string[] entries = line.Split(',');
|
||||
Vector v = Vector.Zero(entries.Length - 1);
|
||||
|
|
|
@ -7,6 +7,7 @@ using System.IO;
|
|||
using System.Drawing;
|
||||
using Microsoft.ML.Probabilistic.Math;
|
||||
using Microsoft.ML.Probabilistic.Utilities;
|
||||
using System.Linq;
|
||||
|
||||
namespace ImageClassifier
|
||||
{
|
||||
|
@ -15,7 +16,7 @@ namespace ImageClassifier
|
|||
public void ComputeImageFeatures()
|
||||
{
|
||||
string folder = @"..\..\Images\";
|
||||
List<string> filenames = ReadLines(folder + "Images.txt");
|
||||
string[] filenames = File.ReadAllLines(folder + "Images.txt");
|
||||
Dictionary<string, Vector> labels = ReadLabels(folder + "Labels.txt");
|
||||
Dictionary<string, Vector> features = new Dictionary<string, Vector>();
|
||||
foreach (string filename in filenames)
|
||||
|
@ -40,24 +41,20 @@ namespace ImageClassifier
|
|||
private Dictionary<string, Vector> ReadLabels(string path)
|
||||
{
|
||||
Dictionary<string, int> keywords = new Dictionary<string, int>();
|
||||
List<string[]> lines = new List<string[]>();
|
||||
StreamReader reader = new StreamReader(path);
|
||||
while (true)
|
||||
List<string[]> lines = File.ReadLines(path).Select(s =>
|
||||
{
|
||||
string s = reader.ReadLine();
|
||||
if (s == null) break;
|
||||
string[] items = s.Split(',');
|
||||
if (items.Length == 0) continue;
|
||||
for (int i = 1; i < items.Length; i++)
|
||||
if (items.Length > 0)
|
||||
{
|
||||
items[i] = items[i].Trim();
|
||||
if (!keywords.ContainsKey(items[i])) keywords[items[i]] = keywords.Count;
|
||||
for (int i = 1; i < items.Length; i++)
|
||||
{
|
||||
items[i] = items[i].Trim();
|
||||
if (!keywords.ContainsKey(items[i])) keywords[items[i]] = keywords.Count;
|
||||
}
|
||||
}
|
||||
return items;
|
||||
}).ToList();
|
||||
|
||||
lines.Add(items);
|
||||
}
|
||||
|
||||
reader.Close();
|
||||
Dictionary<string, Vector> labels = new Dictionary<string, Vector>();
|
||||
foreach (string[] items in lines)
|
||||
{
|
||||
|
@ -229,20 +226,5 @@ namespace ImageClassifier
|
|||
|
||||
return max;
|
||||
}
|
||||
|
||||
private List<string> ReadLines(string path)
|
||||
{
|
||||
List<string> result = new List<string>();
|
||||
StreamReader reader = new StreamReader(path);
|
||||
while (true)
|
||||
{
|
||||
string s = reader.ReadLine();
|
||||
if (s == null) break;
|
||||
result.Add(s);
|
||||
}
|
||||
|
||||
reader.Close();
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -85,12 +85,11 @@ namespace ImageClassifier
|
|||
foreach (Item item in Items) item.Reset();
|
||||
}
|
||||
|
||||
public void PopulateFromStringsAndVectors(List<string> filenames, List<Vector> data)
|
||||
public void PopulateFromStringsAndVectors(IReadOnlyList<string> filenames, IReadOnlyList<Vector> data)
|
||||
{
|
||||
int ct = 0;
|
||||
foreach (string s in filenames)
|
||||
for (int i = 0; i < filenames.Count; i++)
|
||||
{
|
||||
items.Add(new Item(form1.folder + s, data[ct++]));
|
||||
items.Add(new Item(form1.folder + filenames[i], data[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
// The .NET Foundation licenses this file to you under the MIT license.
|
||||
// See the LICENSE file in the project root for more information.
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Windows.Forms;
|
||||
|
||||
namespace ImageClassifier
|
||||
|
@ -22,7 +23,7 @@ namespace ImageClassifier
|
|||
Application.EnableVisualStyles();
|
||||
Application.SetCompatibleTextRenderingDefault(false);
|
||||
ItemsModel model = new ItemsModel();
|
||||
model.PopulateFromStringsAndVectors(Form1.ReadLines(model.form1.folder + "Images.txt"), model.form1.data);
|
||||
model.PopulateFromStringsAndVectors(File.ReadAllLines(model.form1.folder + "Images.txt"), model.form1.data);
|
||||
ClassifierView cv = new ClassifierView();
|
||||
cv.DataContext = model;
|
||||
cv.ShowInForm("Image Classifer using Infer.NET");
|
||||
|
|
|
@ -7,6 +7,7 @@ using System.Text;
|
|||
using Microsoft.ML.Probabilistic.Distributions;
|
||||
using Microsoft.ML.Probabilistic.Math;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
|
||||
namespace LDAExample
|
||||
{
|
||||
|
@ -167,25 +168,17 @@ namespace LDAExample
|
|||
/// <returns></returns>
|
||||
public static Dictionary<int, int>[] LoadWordCounts(string fileName)
|
||||
{
|
||||
List<Dictionary<int, int>> ld = new List<Dictionary<int, int>>();
|
||||
using (StreamReader sr = new StreamReader(fileName))
|
||||
return File.ReadLines(fileName).Select(str =>
|
||||
{
|
||||
string str = null;
|
||||
while ((str = sr.ReadLine()) != null)
|
||||
string[] split = str.Split(' ', ':');
|
||||
int numUniqueTerms = int.Parse(split[0]);
|
||||
var dict = new Dictionary<int, int>();
|
||||
for (int i = 0; i < (split.Length - 1) / 2; i++)
|
||||
{
|
||||
string[] split = str.Split(' ', ':');
|
||||
int numUniqueTerms = int.Parse(split[0]);
|
||||
var dict = new Dictionary<int, int>();
|
||||
for (int i = 0; i < (split.Length - 1) / 2; i++)
|
||||
{
|
||||
dict.Add(int.Parse(split[2 * i + 1]), int.Parse(split[2 * i + 2]));
|
||||
}
|
||||
|
||||
ld.Add(dict);
|
||||
dict.Add(int.Parse(split[2 * i + 1]), int.Parse(split[2 * i + 2]));
|
||||
}
|
||||
}
|
||||
|
||||
return ld.ToArray();
|
||||
return dict;
|
||||
}).ToArray();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -195,18 +188,7 @@ namespace LDAExample
|
|||
/// <returns></returns>
|
||||
public static Dictionary<int, string> LoadVocabulary(string fileName)
|
||||
{
|
||||
Dictionary<int, string> vocab = new Dictionary<int, string>();
|
||||
using (StreamReader sr = new StreamReader(fileName))
|
||||
{
|
||||
string str = null;
|
||||
int idx = 0;
|
||||
while ((str = sr.ReadLine()) != null)
|
||||
{
|
||||
vocab.Add(idx++, str);
|
||||
}
|
||||
}
|
||||
|
||||
return vocab;
|
||||
return File.ReadLines(fileName).Select((str, idx) => Tuple.Create(str, idx)).ToDictionary(tup => tup.Item2, tup => tup.Item1);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
|
|
@ -239,11 +239,7 @@ namespace Microsoft.ML.Probabilistic.Learners.Runners.MovieLens
|
|||
public static Tuple<int, string> ComputeMovieGenre(int offset, string feature)
|
||||
{
|
||||
string[] genres = feature.Split('|');
|
||||
if (genres.Length < 1 || genres.Length > 3)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(feature), feature, "Movies should have between 1 and 3 genres.");
|
||||
}
|
||||
|
||||
// genres.Length will always be at least 1
|
||||
double value = 1.0 / genres.Length;
|
||||
|
||||
var result = new StringBuilder(string.Format("{0}:{1}", offset + MovieGenreBuckets[genres[0]], value));
|
||||
|
|
|
@ -59,19 +59,14 @@ namespace Microsoft.ML.Probabilistic.Learners.Runners.MovieLens
|
|||
try
|
||||
{
|
||||
using (TextWriter writer = new StreamWriter(outputFileName))
|
||||
using (TextReader moviesReader = new StreamReader(moviesInfoFileName))
|
||||
using (TextReader usersReader = new StreamReader(usersInfoFileName))
|
||||
using (TextReader ratingReader = new StreamReader(ratingFileName))
|
||||
{
|
||||
TextReader moviesReader = new StreamReader(moviesInfoFileName);
|
||||
TextReader usersReader = new StreamReader(usersInfoFileName);
|
||||
TextReader ratingReader = new StreamReader(ratingFileName);
|
||||
|
||||
writer.WriteLine("R,1,5");
|
||||
ConvertRating(ratingReader, writer);
|
||||
GenerateEntityFeatures(usersReader, writer, FeatureProcessor.ProcessUserFeatures);
|
||||
GenerateEntityFeatures(moviesReader, writer, FeatureProcessor.ProcessItemFeatures);
|
||||
|
||||
moviesReader.Close();
|
||||
usersReader.Close();
|
||||
ratingReader.Close();
|
||||
}
|
||||
}
|
||||
catch
|
||||
|
|
|
@ -400,7 +400,6 @@ namespace Microsoft.ML.Probabilistic.Tutorials
|
|||
exams = null;
|
||||
int totalDocs = 0;
|
||||
string myStr;
|
||||
StreamReader mySR;
|
||||
char[] sep = { '\t', ',' };
|
||||
|
||||
for (int pass = 0; pass < 2; pass++)
|
||||
|
@ -413,30 +412,30 @@ namespace Microsoft.ML.Probabilistic.Tutorials
|
|||
totalDocs = 0;
|
||||
}
|
||||
|
||||
mySR = new StreamReader(ifn);
|
||||
mySR.ReadLine(); // Skip over header line
|
||||
while ((myStr = mySR.ReadLine()) != null)
|
||||
using (var mySR = new StreamReader(ifn))
|
||||
{
|
||||
string[] mySplitStr = myStr.Split(sep);
|
||||
int exm = int.Parse(mySplitStr[2]);
|
||||
|
||||
// Only include data with non-zero examinations
|
||||
if (0 != exm || allowNoExams)
|
||||
mySR.ReadLine(); // Skip over header line
|
||||
while ((myStr = mySR.ReadLine()) != null)
|
||||
{
|
||||
if (1 == pass)
|
||||
{
|
||||
int lab = int.Parse(mySplitStr[0]);
|
||||
int clk = int.Parse(mySplitStr[1]);
|
||||
labels[totalDocs] = lab;
|
||||
clicks[totalDocs] = clk;
|
||||
exams[totalDocs] = exm;
|
||||
}
|
||||
string[] mySplitStr = myStr.Split(sep);
|
||||
int exm = int.Parse(mySplitStr[2]);
|
||||
|
||||
totalDocs++;
|
||||
// Only include data with non-zero examinations
|
||||
if (0 != exm || allowNoExams)
|
||||
{
|
||||
if (1 == pass)
|
||||
{
|
||||
int lab = int.Parse(mySplitStr[0]);
|
||||
int clk = int.Parse(mySplitStr[1]);
|
||||
labels[totalDocs] = lab;
|
||||
clicks[totalDocs] = clk;
|
||||
exams[totalDocs] = exm;
|
||||
}
|
||||
|
||||
totalDocs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mySR.Close();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -352,7 +352,6 @@ namespace Microsoft.ML.Probabilistic.Tests
|
|||
Nrows = 1;
|
||||
Ncols = 0;
|
||||
string myStr;
|
||||
StreamReader mySR;
|
||||
char[] sep = {'\t', ','};
|
||||
for (int pass = 0; pass < 2; pass++)
|
||||
{
|
||||
|
@ -361,28 +360,29 @@ namespace Microsoft.ML.Probabilistic.Tests
|
|||
M = new double[Nrows,Ncols];
|
||||
Nrows = 0;
|
||||
}
|
||||
mySR = new StreamReader(ifn);
|
||||
mySR.ReadLine(); // Skip over header line
|
||||
//overwrite number seperator to use US-style(. for decimal separation)
|
||||
NumberFormatInfo nfi = new CultureInfo("en-US", false).NumberFormat;
|
||||
while ((myStr = mySR.ReadLine()) != null)
|
||||
using (var mySR = new StreamReader(ifn))
|
||||
{
|
||||
string[] mySplitStr = myStr.Split(sep);
|
||||
Ncols = mySplitStr.Length;
|
||||
if (1 == pass)
|
||||
mySR.ReadLine(); // Skip over header line
|
||||
//overwrite number seperator to use US-style(. for decimal separation)
|
||||
NumberFormatInfo nfi = new CultureInfo("en-US", false).NumberFormat;
|
||||
while ((myStr = mySR.ReadLine()) != null)
|
||||
{
|
||||
//parse the doubles into the array.
|
||||
for (int i = 0; i < Ncols; i++)
|
||||
string[] mySplitStr = myStr.Split(sep);
|
||||
Ncols = mySplitStr.Length;
|
||||
if (1 == pass)
|
||||
{
|
||||
string str = mySplitStr[i];
|
||||
M[Nrows, i] = Double.Parse(str, nfi);
|
||||
//parse the doubles into the array.
|
||||
for (int i = 0; i < Ncols; i++)
|
||||
{
|
||||
string str = mySplitStr[i];
|
||||
M[Nrows, i] = Double.Parse(str, nfi);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Nrows++;
|
||||
}
|
||||
|
||||
|
||||
Nrows++;
|
||||
}
|
||||
mySR.Close();
|
||||
}
|
||||
return M;
|
||||
}
|
||||
|
|
|
@ -227,9 +227,7 @@ namespace Microsoft.ML.Probabilistic.Tests
|
|||
List<int> examList = new List<int>();
|
||||
char[] sep = {'\t'};
|
||||
|
||||
StreamReader reader = new StreamReader(filename);
|
||||
string line;
|
||||
while ((line = reader.ReadLine()) != null)
|
||||
foreach (string line in File.ReadLines(filename))
|
||||
{
|
||||
if (maxDocs >= 0 && labelList.Count == maxDocs) break;
|
||||
string[] split = line.Split(sep);
|
||||
|
@ -237,7 +235,6 @@ namespace Microsoft.ML.Probabilistic.Tests
|
|||
clickList.Add(int.Parse(split[1]));
|
||||
examList.Add(int.Parse(split[2]));
|
||||
}
|
||||
reader.Close();
|
||||
labels = labelList.ToArray();
|
||||
clicks = clickList.ToArray();
|
||||
exams = examList.ToArray();
|
||||
|
|
Загрузка…
Ссылка в новой задаче