Import three sample projects that use our public nuget package
This commit is contained in:
Родитель
1b61087c21
Коммит
b6351b4b7e
|
@ -0,0 +1,178 @@
|
|||
## Ignore Visual Studio temporary files, build results, and
|
||||
## files generated by popular Visual Studio add-ons.
|
||||
|
||||
# User-specific files
|
||||
*.suo
|
||||
*.user
|
||||
*.sln.docstates
|
||||
|
||||
# Build results
|
||||
|
||||
[Dd]ebug/
|
||||
[Rr]elease/
|
||||
x64/
|
||||
build/
|
||||
[Bb]in/
|
||||
[Oo]bj/
|
||||
|
||||
# Enable "build/" folder in the NuGet Packages folder since NuGet packages use it for MSBuild targets
|
||||
!packages/*/build/
|
||||
|
||||
# MSTest test Results
|
||||
[Tt]est[Rr]esult*/
|
||||
[Bb]uild[Ll]og.*
|
||||
|
||||
*_i.c
|
||||
*_p.c
|
||||
*.ilk
|
||||
*.meta
|
||||
*.obj
|
||||
*.pch
|
||||
*.pdb
|
||||
*.pgc
|
||||
*.pgd
|
||||
*.rsp
|
||||
*.sbr
|
||||
*.tlb
|
||||
*.tli
|
||||
*.tlh
|
||||
*.tmp
|
||||
*.tmp_proj
|
||||
*.log
|
||||
*.vspscc
|
||||
*.vssscc
|
||||
.builds
|
||||
*.pidb
|
||||
*.scc
|
||||
|
||||
# Visual C++ cache files
|
||||
ipch/
|
||||
*.aps
|
||||
*.ncb
|
||||
*.opensdf
|
||||
*.sdf
|
||||
*.cachefile
|
||||
|
||||
# Visual Studio profiler
|
||||
*.psess
|
||||
*.vsp
|
||||
*.vspx
|
||||
|
||||
# Guidance Automation Toolkit
|
||||
*.gpState
|
||||
|
||||
# ReSharper is a .NET coding add-in
|
||||
_ReSharper*/
|
||||
*.[Rr]e[Ss]harper
|
||||
|
||||
# TeamCity is a build add-in
|
||||
_TeamCity*
|
||||
|
||||
# DotCover is a Code Coverage Tool
|
||||
*.dotCover
|
||||
|
||||
# NCrunch
|
||||
*.ncrunch*
|
||||
.*crunch*.local.xml
|
||||
|
||||
# Installshield output folder
|
||||
[Ee]xpress/
|
||||
|
||||
# DocProject is a documentation generator add-in
|
||||
DocProject/buildhelp/
|
||||
DocProject/Help/*.HxT
|
||||
DocProject/Help/*.HxC
|
||||
DocProject/Help/*.hhc
|
||||
DocProject/Help/*.hhk
|
||||
DocProject/Help/*.hhp
|
||||
DocProject/Help/Html2
|
||||
DocProject/Help/html
|
||||
|
||||
# Click-Once directory
|
||||
publish/
|
||||
|
||||
# Publish Web Output
|
||||
*.Publish.xml
|
||||
|
||||
# Javascript from Typescript
|
||||
*.map
|
||||
|
||||
# NuGet Packages Directory
|
||||
**/packages/*
|
||||
**/.nuget/*
|
||||
*.nupkg
|
||||
*.rtf
|
||||
*/repositories.config
|
||||
|
||||
# Windows Azure Build Output
|
||||
csx
|
||||
*.build.csdef
|
||||
|
||||
# Windows Store app package directory
|
||||
AppPackages/
|
||||
|
||||
# Others
|
||||
sql/
|
||||
*.Cache
|
||||
ClientBin/
|
||||
[Ss]tyle[Cc]op.*
|
||||
~$*
|
||||
*~
|
||||
*.dbmdl
|
||||
*.[Pp]ublish.xml
|
||||
*.pfx
|
||||
*.publishsettings
|
||||
|
||||
# RIA/Silverlight projects
|
||||
Generated_Code/
|
||||
|
||||
# Backup & report files from converting an old project file to a newer
|
||||
# Visual Studio version. Backup files are not needed, because we have git ;-)
|
||||
_UpgradeReport_Files/
|
||||
Backup*/
|
||||
UpgradeLog*.XML
|
||||
UpgradeLog*.htm
|
||||
|
||||
# SQL Server files
|
||||
App_Data/*.mdf
|
||||
App_Data/*.ldf
|
||||
|
||||
|
||||
#LightSwitch generated files
|
||||
GeneratedArtifacts/
|
||||
_Pvt_Extensions/
|
||||
ModelManifest.xml
|
||||
|
||||
# =========================
|
||||
# Windows detritus
|
||||
# =========================
|
||||
|
||||
# Windows image file caches
|
||||
Thumbs.db
|
||||
ehthumbs.db
|
||||
|
||||
# Folder config file
|
||||
Desktop.ini
|
||||
|
||||
# Recycle Bin used on file shares
|
||||
$RECYCLE.BIN/
|
||||
|
||||
# Mac desktop service store files
|
||||
.DS_Store
|
||||
*.dgml
|
||||
|
||||
*.orig
|
||||
*.ide
|
||||
*.jrs
|
||||
*.dll
|
||||
*.zip
|
||||
*.dtp
|
||||
*.DotSettings
|
||||
|
||||
# Xamarin Stuidio userprefs
|
||||
*.userprefs
|
||||
|
||||
# Auto-generated
|
||||
VersionInfo.cs
|
||||
|
||||
.vs/
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="utf-8" ?>
|
||||
<configuration>
|
||||
<startup>
|
||||
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5" />
|
||||
</startup>
|
||||
</configuration>
|
|
@ -0,0 +1,477 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text.RegularExpressions;
|
||||
using Microsoft.ProgramSynthesis.Extraction;
|
||||
using Microsoft.ProgramSynthesis.Extraction.Text.Semantics;
|
||||
using Microsoft.ProgramSynthesis;
|
||||
using Microsoft.ProgramSynthesis.AST;
|
||||
using Microsoft.ProgramSynthesis.VersionSpace;
|
||||
|
||||
namespace Microsoft.ProgramSynthesis.Extraction.Text.Sample
|
||||
{
|
||||
/// <summary>
|
||||
/// Extraction.Text learns programs to extract a single string region or a sequence of string regions from text files.
|
||||
/// This class demonstrates some common usage of Extraction.Text APIs.
|
||||
/// </summary>
|
||||
internal static class LearningSamples
|
||||
{
|
||||
private static void Main(string[] args)
|
||||
{
|
||||
|
||||
LearnRegion();
|
||||
|
||||
LearnRegionUsingMultipleFiles();
|
||||
|
||||
LearnRegionWithNegativeExamples();
|
||||
|
||||
LearnRegionWithAdditionalReferences();
|
||||
|
||||
LearnRegionReferencingParent();
|
||||
|
||||
LearnRegionReferencingPrecedingSibling();
|
||||
|
||||
LearnRegionReferencingSucceedingSibling();
|
||||
|
||||
LearnTop3RegionPrograms();
|
||||
|
||||
LearnAllRegionPrograms();
|
||||
|
||||
LearnRegionWithRegexes();
|
||||
|
||||
SerializeProgram();
|
||||
|
||||
// Learning sequence is similar to learning region.
|
||||
// We only illustrate some API usages. Other sequence learning APIs are similar to their region APIs counterpart.
|
||||
// Note: we need to give positive examples continuously.
|
||||
// For instance, suppose we learn a list of {A, B, C, D, E}.
|
||||
// {A, B} is a valid set of examples, while {A, C} is not.
|
||||
// In case of { A, C}, Extraction.Text assumes that B is a negative example.
|
||||
// This helps our learning converge more quickly.
|
||||
LearnSequence();
|
||||
LearnSequenceReferencingSibling();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learns a program to extract a single region from a file.
|
||||
/// </summary>
|
||||
private static void LearnRegion()
|
||||
{
|
||||
var input = StringRegion.Create("Carrie Dodson 100");
|
||||
|
||||
// Only one example because we extract one region from one file.
|
||||
// Position specifies the location between two characters in the file. It starts at 0 (the beginning of the file).
|
||||
// An example is identified by a pair of start and end positions.
|
||||
var positiveExamples = new[] {
|
||||
new ExtractionExample<StringRegion>(input, input.Slice(7, 13)) // "Carrie Dodson 100" => "Dodson"
|
||||
};
|
||||
var negativeExamples = Enumerable.Empty<ExtractionExample<StringRegion>>();
|
||||
|
||||
Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples);
|
||||
if (topRankedProg == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: Learning fails!");
|
||||
return;
|
||||
}
|
||||
|
||||
var testInput = StringRegion.Create("Leonard Robledo 75"); // expect "Robledo"
|
||||
IEnumerable<StringRegion> run = topRankedProg.Run(testInput);
|
||||
// Retrieve the first element because this is a region textProgram
|
||||
var output = run.FirstOrDefault();
|
||||
if (output == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: Extracting fails!");
|
||||
return;
|
||||
}
|
||||
Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learns a program to extract a single region using two examples in two different files.
|
||||
/// Learning from different files is similar to learning with multiple examples from a single file.
|
||||
/// Demonstrates how to learn with examples from different files.
|
||||
/// </summary>
|
||||
private static void LearnRegionUsingMultipleFiles()
|
||||
{
|
||||
var input1 = StringRegion.Create("Carrie Dodson 100");
|
||||
var input2 = StringRegion.Create("Leonard Robledo 75");
|
||||
|
||||
var positiveExamples = new[] {
|
||||
new ExtractionExample<StringRegion>(input1, input1.Slice(7, 13)), // "Carrie Dodson 100" => "Dodson"
|
||||
new ExtractionExample<StringRegion>(input2, input2.Slice(8, 15)) // "Leonard Robledo 75" => "Robledo"
|
||||
};
|
||||
var negativeExamples = Enumerable.Empty<ExtractionExample<StringRegion>>();
|
||||
|
||||
Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples);
|
||||
if (topRankedProg == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: Learning fails!");
|
||||
return;
|
||||
}
|
||||
|
||||
var testInput = StringRegion.Create("Margaret Cook 320"); // expect "Cook"
|
||||
IEnumerable<StringRegion> run = topRankedProg.Run(testInput);
|
||||
// Retrieve the first element because this is a region textProgram
|
||||
var output = run.FirstOrDefault();
|
||||
if (output == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: Extracting fails!");
|
||||
return;
|
||||
}
|
||||
Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Learns a program to extract a region with both positive and negative examples.
|
||||
/// Demonstrates the use of negative examples.
|
||||
/// </summary>
|
||||
private static void LearnRegionWithNegativeExamples()
|
||||
{
|
||||
var input = StringRegion.Create("Carrie Dodson 100\nLeonard Robledo NA\nMargaret Cook 320");
|
||||
StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };
|
||||
|
||||
// Suppose we want to extract "100", "320".
|
||||
var positiveExamples = new[] {
|
||||
new ExtractionExample<StringRegion>(records[0], records[0].Slice(14, 17)) // "Carrie Dodson 100" => "100"
|
||||
};
|
||||
var negativeExamples = new[] {
|
||||
new ExtractionExample<StringRegion>(records[1], records[1]) // no extraction in "Leonard Robledo NA"
|
||||
};
|
||||
|
||||
// Extraction.Text will find a program whose output does not OVERLAP with any of the negative examples.
|
||||
Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples);
|
||||
if (topRankedProg == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: Learning fails!");
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var r in topRankedProg.Run(records))
|
||||
{
|
||||
var output = r.Output != null ? r.Output.Value : "null";
|
||||
Console.WriteLine("\"{0}\" => \"{1}\"", r.Reference, output);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learns a program to extract a region and provides other references to help find the intended program.
|
||||
/// Demonstrates the use of additional references.
|
||||
/// </summary>
|
||||
private static void LearnRegionWithAdditionalReferences()
|
||||
{
|
||||
var input = StringRegion.Create("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook ***");
|
||||
StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };
|
||||
|
||||
// Suppose we want to extract "100", "75", and "***".
|
||||
var positiveExamples = new[] {
|
||||
new ExtractionExample<StringRegion>(records[0], records[0].Slice(14, 17)) // "Carrie Dodson 100" => "100"
|
||||
};
|
||||
var negativeExamples = Enumerable.Empty<ExtractionExample<StringRegion>>();
|
||||
|
||||
// Additional references help Extraction.Text observe the behavior of the learnt programs on unseen data.
|
||||
// In this example, if we do not use additional references, Extraction.Text may learn a program that extracts the first number.
|
||||
// On the contrary, if other references are present, it knows that this program is not applicable on the third record "Margaret Cook ***",
|
||||
// and promotes a more applicable program.
|
||||
Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples, records.Skip(1));
|
||||
if (topRankedProg == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: Learning fails!");
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var r in topRankedProg.Run(records))
|
||||
{
|
||||
var output = r.Output != null ? r.Output.Value : "null";
|
||||
Console.WriteLine("\"{0}\" => \"{1}\"", r.Reference, output);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learns a program to extract a single region from a containing region (i.e., parent region).
|
||||
/// Demonstrates how parent referencing works.
|
||||
/// </summary>
|
||||
private static void LearnRegionReferencingParent()
|
||||
{
|
||||
var input = StringRegion.Create("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook 320");
|
||||
StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };
|
||||
|
||||
// Suppose we want to extract the number out of a record
|
||||
var positiveExamples = new[] {
|
||||
new ExtractionExample<StringRegion>(records[0], records[0].Slice(14, 17)), // "Carrie Dodson 100" => "100"
|
||||
new ExtractionExample<StringRegion>(records[1], records[1].Slice(34, 36)) // "Leonard Robledo 75" => "75"
|
||||
};
|
||||
var negativeExamples = Enumerable.Empty<ExtractionExample<StringRegion>>();
|
||||
|
||||
Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples);
|
||||
if (topRankedProg == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: Learning fails!");
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var r in topRankedProg.Run(records))
|
||||
{
|
||||
var output = r.Output != null ? r.Output.Value : "null";
|
||||
Console.WriteLine("\"{0}\" => \"{1}\"", r.Reference, output);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learns a program to extract a single region using another region that appears before it as reference (i.e.,
|
||||
/// preceding sibling region).
|
||||
/// Demonstrates how sibling referencing works.
|
||||
/// </summary>
|
||||
private static void LearnRegionReferencingPrecedingSibling()
|
||||
{
|
||||
var input = StringRegion.Create("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook 320");
|
||||
StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };
|
||||
StringRegion[] firstNames = { input.Slice(0, 6), input.Slice(18, 25), input.Slice(37, 45) };
|
||||
|
||||
// Suppose we want to extract the number w.r.t the first name
|
||||
var positiveExamples = new[] {
|
||||
new ExtractionExample<StringRegion>(firstNames[0], records[0].Slice(14, 17)), // "Carrie" => "100"
|
||||
new ExtractionExample<StringRegion>(firstNames[1], records[1].Slice(34, 36)) // "Leonard" => "75"
|
||||
};
|
||||
var negativeExamples = Enumerable.Empty<ExtractionExample<StringRegion>>();
|
||||
|
||||
Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples);
|
||||
if (topRankedProg == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: Learning fails!");
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var r in topRankedProg.Run(firstNames))
|
||||
{
|
||||
var output = r.Output != null ? r.Output.Value : "null";
|
||||
Console.WriteLine("\"{0}\" => \"{1}\"", r.Reference, output);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learns a program to extract a single region using another region that appears after it as reference (i.e.,
|
||||
/// succeeding sibling region).
|
||||
/// Demonstrates how sibling referencing works.
|
||||
/// </summary>
|
||||
private static void LearnRegionReferencingSucceedingSibling()
|
||||
{
|
||||
var input = StringRegion.Create("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook 320");
|
||||
StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };
|
||||
StringRegion[] numbers = { input.Slice(14, 17), input.Slice(34, 36), input.Slice(51, 54) };
|
||||
|
||||
// Suppose we want to extract the first name w.r.t the number
|
||||
var positiveExamples = new[] {
|
||||
new ExtractionExample<StringRegion>(numbers[0], records[0].Slice(0, 6)), // "Carrie" => "100"
|
||||
new ExtractionExample<StringRegion>(numbers[1], records[1].Slice(18, 25)) // "Leonard" => "75"
|
||||
};
|
||||
var negativeExamples = Enumerable.Empty<ExtractionExample<StringRegion>>();
|
||||
|
||||
Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples);
|
||||
if (topRankedProg == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: Learning fails!");
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var r in topRankedProg.Run(numbers))
|
||||
{
|
||||
var output = r.Output != null ? r.Output.Value : "null";
|
||||
Console.WriteLine("\"{0}\" => \"{1}\"", r.Reference, output);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learns top-ranked 3 region programs.
|
||||
/// Demonstrates access to lower-ranked programs.
|
||||
/// </summary>
|
||||
private static void LearnTop3RegionPrograms()
|
||||
{
|
||||
var input = StringRegion.Create("Carrie Dodson 100");
|
||||
|
||||
var positiveExamples = new[] {
|
||||
new ExtractionExample<StringRegion>(input, input.Slice(14, 17)) // "Carrie Dodson 100" => "Dodson"
|
||||
};
|
||||
var negativeExamples = Enumerable.Empty<ExtractionExample<StringRegion>>();
|
||||
|
||||
IEnumerable<Program> topKPrograms = Learner.Instance.LearnTopKRegion(positiveExamples, negativeExamples, 3);
|
||||
|
||||
var i = 0;
|
||||
StringRegion[] otherInputs = { input, StringRegion.Create("Leonard Robledo NA"), StringRegion.Create("Margaret Cook 320") };
|
||||
foreach (var prog in topKPrograms)
|
||||
{
|
||||
Console.WriteLine("Program {0}:", ++i);
|
||||
foreach (var str in otherInputs)
|
||||
{
|
||||
var r = prog.Run(str).FirstOrDefault();
|
||||
Console.WriteLine(r != null ? r.Value : "null");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Learns all region programs that satisfy the examples (advanced feature).
|
||||
/// Demonstrates access to the entire program set.
|
||||
/// </summary>
|
||||
private static void LearnAllRegionPrograms()
|
||||
{
|
||||
var input = StringRegion.Create("Carrie Dodson 100");
|
||||
|
||||
var positiveExamples = new[] {
|
||||
new ExtractionExample<StringRegion>(input, input.Slice(14, 17)) // "Carrie Dodson 100" => "Dodson"
|
||||
};
|
||||
var negativeExamples = Enumerable.Empty<ExtractionExample<StringRegion>>();
|
||||
|
||||
ProgramSet allPrograms = Learner.Instance.LearnAllRegion(positiveExamples, negativeExamples);
|
||||
IEnumerable<ProgramNode> topKPrograms = allPrograms.TopK("Score", 3); // "Score" is the ranking feature
|
||||
|
||||
var i = 0;
|
||||
StringRegion[] otherInputs = { input, StringRegion.Create("Leonard Robledo NA"), StringRegion.Create("Margaret Cook 320") };
|
||||
foreach (var prog in topKPrograms)
|
||||
{
|
||||
Console.WriteLine("Program {0}:", ++i);
|
||||
foreach (var str in otherInputs)
|
||||
{
|
||||
State inputState = State.Create(Language.Grammar.InputSymbol, str); // Create Microsoft.ProgramSynthesis input state
|
||||
object r = prog.Invoke(inputState); // Invoke Microsoft.ProgramSynthesis program node on the input state
|
||||
Console.WriteLine(r != null ? (r as StringRegion).Value : "null");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Learns a program to extract a region using positive examples and the matching regular expression.
|
||||
/// Demonstrates the possibility to give other constraint (regex) to Extraction.Text.
|
||||
/// This is an advanced feature.
|
||||
/// </summary>
|
||||
private static void LearnRegionWithRegexes()
|
||||
{
|
||||
var input = StringRegion.Create("Carrie Dodson 100\nLeonard Robledo NA\nMargaret Cook 320");
|
||||
StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };
|
||||
|
||||
// Suppose we want to extract the number out of a record
|
||||
var positiveExamples = new[] {
|
||||
new ExtractionExample<StringRegion>(records[0], records[0].Slice(14, 17)), // "Carrie Dodson 100" => "100"
|
||||
};
|
||||
var negativeExamples = Enumerable.Empty<ExtractionExample<StringRegion>>();
|
||||
|
||||
Regex lookBehindRegex = new Regex("\\s");
|
||||
Regex lookAheadRegex = null;
|
||||
Regex matchingRegex = new Regex("\\d+");
|
||||
|
||||
IEnumerable<Program> topRankedPrograms =
|
||||
Learner.Instance.LearnTopKRegion(positiveExamples, negativeExamples, null, 1, lookBehindRegex, matchingRegex, lookAheadRegex);
|
||||
|
||||
Program topRankedProg = topRankedPrograms.FirstOrDefault();
|
||||
if (topRankedProg == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: Learning fails!");
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var r in topRankedProg.Run(records))
|
||||
{
|
||||
var output = r.Output != null ? r.Output.Value : "null";
|
||||
Console.WriteLine("\"{0}\" => \"{1}\"", r.Reference, output);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Learns to serialize and deserialize Extraction.Text program.
|
||||
/// </summary>
|
||||
private static void SerializeProgram()
|
||||
{
|
||||
var input = StringRegion.Create("Carrie Dodson 100");
|
||||
|
||||
var positiveExamples = new[] {
|
||||
new ExtractionExample<StringRegion>(input, input.Slice(7, 13)) // "Carrie Dodson 100" => "Dodson"
|
||||
};
|
||||
var negativeExamples = Enumerable.Empty<ExtractionExample<StringRegion>>();
|
||||
|
||||
Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples);
|
||||
if (topRankedProg == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: Learning fails!");
|
||||
return;
|
||||
}
|
||||
|
||||
string serializedProgram = topRankedProg.Serialize();
|
||||
Program deserializedProgram = Program.Load(serializedProgram);
|
||||
var testInput = StringRegion.Create("Leonard Robledo 75"); // expect "Robledo"
|
||||
IEnumerable<StringRegion> run = deserializedProgram.Run(testInput);
|
||||
// Retrieve the first element because this is a region textProgram
|
||||
var output = run.FirstOrDefault();
|
||||
if (output == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: Extracting fails!");
|
||||
return;
|
||||
}
|
||||
Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learns a program to extract a sequence of regions using its preceding sibling as reference.
|
||||
/// </summary>
|
||||
private static void LearnSequence()
|
||||
{
|
||||
// It is advised to learn a sequence with at least 2 examples because generalizing a sequence from a single element is hard.
|
||||
// Also, we need to give positive examples continuously (i.e., we cannot skip any example).
|
||||
var input = StringRegion.Create("United States\nCarrie Dodson 100\nLeonard Robledo 75\nMargaret Cook 320\n" +
|
||||
"Canada\nConcetta Beck 350\nNicholas Sayers 90\nFrancis Terrill 2430\n" +
|
||||
"Great Britain\nNettie Pope 50\nMack Beeson 1070");
|
||||
// Suppose we want to extract all last names from the input string.
|
||||
var positiveExamples = new[] {
|
||||
new ExtractionExample<StringRegion>(input, input.Slice(14, 20)), // input => "Carrie"
|
||||
new ExtractionExample<StringRegion>(input, input.Slice(32, 39)) // input => "Leonard"
|
||||
};
|
||||
var negativeExamples = Enumerable.Empty<ExtractionExample<StringRegion>>();
|
||||
|
||||
Program topRankedProg = Learner.Instance.LearnSequence(positiveExamples, negativeExamples);
|
||||
if (topRankedProg == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: Learning fails!");
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var r in topRankedProg.Run(input))
|
||||
{
|
||||
var output = r != null ? r.Value : "null";
|
||||
Console.WriteLine(output);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learns a program to extract a sequence of regions from a file.
|
||||
/// </summary>
|
||||
private static void LearnSequenceReferencingSibling()
|
||||
{
|
||||
var input = StringRegion.Create("United States\nCarrie Dodson 100\nLeonard Robledo 75\nMargaret Cook 320\n" +
|
||||
"Canada\nConcetta Beck 350\nNicholas Sayers 90\nFrancis Terrill 2430\n" +
|
||||
"Great Britain\nNettie Pope 50\nMack Beeson 1070");
|
||||
StringRegion[] countries = { input.Slice(0, 13), input.Slice(69, 75), input.Slice(134, 147) };
|
||||
|
||||
// Suppose we want to extract all last names from the input string.
|
||||
var positiveExamples = new[] {
|
||||
new ExtractionExample<StringRegion>(countries[0], input.Slice(14, 20)), // "United States" => "Carrie"
|
||||
new ExtractionExample<StringRegion>(countries[0], input.Slice(32, 39)), // "United States" => "Leonard"
|
||||
};
|
||||
var negativeExamples = Enumerable.Empty<ExtractionExample<StringRegion>>();
|
||||
|
||||
Program topRankedProg = Learner.Instance.LearnSequence(positiveExamples, negativeExamples);
|
||||
if (topRankedProg == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: Learning fails!");
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var r in topRankedProg.Run(countries))
|
||||
{
|
||||
var output = r.Output != null ? r.Output.Value : "null";
|
||||
Console.WriteLine("\"{0}\" => \"{1}\"", r.Reference, output);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,135 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
|
||||
<PropertyGroup>
|
||||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||
<ProjectGuid>{96D149D8-0A64-451A-A909-499D2162683D}</ProjectGuid>
|
||||
<OutputType>Exe</OutputType>
|
||||
<AppDesignerFolder>Properties</AppDesignerFolder>
|
||||
<RootNamespace>Microsoft.ProgramSynthesis.Extraction.Text.Sample</RootNamespace>
|
||||
<AssemblyName>Microsoft.ProgramSynthesis.Extraction.Text.Sample</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
<DebugType>full</DebugType>
|
||||
<Optimize>false</Optimize>
|
||||
<OutputPath>bin\Debug\</OutputPath>
|
||||
<DefineConstants>DEBUG;TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugType>pdbonly</DebugType>
|
||||
<Optimize>true</Optimize>
|
||||
<OutputPath>bin\Release\</OutputPath>
|
||||
<DefineConstants>TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="Antlr4.Runtime.net45, Version=4.3.0.0, Culture=neutral, PublicKeyToken=eb42632606e9261f, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Antlr4.Runtime.4.3.0\lib\net45\Antlr4.Runtime.net45.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="CsQuery, Version=1.3.5.124, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\CsQuery.1.3.5-beta5\lib\net40\CsQuery.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Experimental.Collections, Version=1.0.3.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.Experimental.Collections.1.0.3-alpha\lib\portable-net45+win8+wp8\Microsoft.Experimental.Collections.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction.Text, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.Text.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction.Text.Learning, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.Text.Learning.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction.Text.Semantics, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.Text.Semantics.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction.Web, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.Web.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction.Web.Learning, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.Web.Learning.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction.Web.Semantics, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.Web.Semantics.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.FlashFill, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.FlashFill.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.FlashFill.Semantics, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.FlashFill.Semantics.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Learning, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Learning.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Paraphrasing, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Paraphrasing.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Utils, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Utils.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Collections.Immutable, Version=1.1.36.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\System.Collections.Immutable.1.1.36\lib\portable-net45+win8+wp8+wpa81\System.Collections.Immutable.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="System.Core" />
|
||||
<Reference Include="System.Interactive, Version=1.2.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Ix-Main.1.2.5\lib\net45\System.Interactive.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="System.Reflection.Metadata, Version=1.0.21.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\System.Reflection.Metadata.1.0.21\lib\portable-net45+win8\System.Reflection.Metadata.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="System.Xml.Linq" />
|
||||
<Reference Include="System.Data.DataSetExtensions" />
|
||||
<Reference Include="Microsoft.CSharp" />
|
||||
<Reference Include="System.Data" />
|
||||
<Reference Include="System.Xml" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="LearningSamples.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="App.config" />
|
||||
<None Include="packages.config" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
||||
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
|
||||
Other similar extension points exist, see Microsoft.Common.targets.
|
||||
<Target Name="BeforeBuild">
|
||||
</Target>
|
||||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
|
@ -0,0 +1,37 @@
|
|||
using System.Reflection;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
// General Information about an assembly is controlled through the following
|
||||
// set of attributes. Change these attribute values to modify the information
|
||||
// associated with an assembly.
|
||||
[assembly: AssemblyTitle("Microsoft.ProgramSynthesis.Extraction.Text.Sample")]
|
||||
[assembly: AssemblyDescription("")]
|
||||
[assembly: AssemblyConfiguration("")]
|
||||
[assembly: AssemblyCompany("Microsoft")]
|
||||
[assembly: AssemblyProduct("Microsoft.ProgramSynthesis.Extraction.Text.Sample")]
|
||||
[assembly: AssemblyCopyright("Copyright © 2015")]
|
||||
[assembly: AssemblyTrademark("")]
|
||||
[assembly: AssemblyCulture("")]
|
||||
|
||||
// Setting ComVisible to false makes the types in this assembly not visible
|
||||
// to COM components. If you need to access a type in this assembly from
|
||||
// COM, set the ComVisible attribute to true on that type.
|
||||
[assembly: ComVisible(false)]
|
||||
|
||||
// The following GUID is for the ID of the typelib if this project is exposed to COM
|
||||
[assembly: Guid("80fa46f0-36e2-4a1a-b4ec-414676d2a38d")]
|
||||
|
||||
// Version information for an assembly consists of the following four values:
|
||||
//
|
||||
// Major Version
|
||||
// Minor Version
|
||||
// Build Number
|
||||
// Revision
|
||||
//
|
||||
// You can specify all the values or you can default the Build and Revision Numbers
|
||||
// by using the '*' as shown below:
|
||||
// [assembly: AssemblyVersion("1.0.*")]
|
||||
[assembly: AssemblyVersion("1.0.0.0")]
|
||||
[assembly: AssemblyInformationalVersion("")]
|
||||
[assembly: AssemblyFileVersion("1.0.0.0")]
|
|
@ -0,0 +1,10 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<packages>
|
||||
<package id="Antlr4.Runtime" version="4.3.0" targetFramework="net45" />
|
||||
<package id="CsQuery" version="1.3.5-beta5" targetFramework="net45" />
|
||||
<package id="Ix-Main" version="1.2.5" targetFramework="net45" />
|
||||
<package id="Microsoft.Experimental.Collections" version="1.0.3-alpha" targetFramework="net45" />
|
||||
<package id="Microsoft.ProgramSynthesis" version="0.1.1-preview" targetFramework="net45" />
|
||||
<package id="System.Collections.Immutable" version="1.1.36" targetFramework="net45" />
|
||||
<package id="System.Reflection.Metadata" version="1.0.21" targetFramework="net45" />
|
||||
</packages>
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="utf-8" ?>
|
||||
<configuration>
|
||||
<startup>
|
||||
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5" />
|
||||
</startup>
|
||||
</configuration>
|
|
@ -0,0 +1,135 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
|
||||
<PropertyGroup>
|
||||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||
<ProjectGuid>{D24046B6-4E7C-4112-B88C-D360695D1E66}</ProjectGuid>
|
||||
<OutputType>Exe</OutputType>
|
||||
<AppDesignerFolder>Properties</AppDesignerFolder>
|
||||
<RootNamespace>Microsoft.ProgramSynthesis.Extraction.Web.Sample</RootNamespace>
|
||||
<AssemblyName>Microsoft.ProgramSynthesis.Extraction.Web.Sample</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
<DebugType>full</DebugType>
|
||||
<Optimize>false</Optimize>
|
||||
<OutputPath>bin\Debug\</OutputPath>
|
||||
<DefineConstants>DEBUG;TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugType>pdbonly</DebugType>
|
||||
<Optimize>true</Optimize>
|
||||
<OutputPath>bin\Release\</OutputPath>
|
||||
<DefineConstants>TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="Antlr4.Runtime.net45, Version=4.3.0.0, Culture=neutral, PublicKeyToken=eb42632606e9261f, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Antlr4.Runtime.4.3.0\lib\net45\Antlr4.Runtime.net45.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="CsQuery, Version=1.3.5.124, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\CsQuery.1.3.5-beta5\lib\net40\CsQuery.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Experimental.Collections, Version=1.0.3.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.Experimental.Collections.1.0.3-alpha\lib\portable-net45+win8+wp8\Microsoft.Experimental.Collections.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction.Text, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.Text.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction.Text.Learning, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.Text.Learning.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction.Text.Semantics, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.Text.Semantics.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction.Web, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.Web.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction.Web.Learning, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.Web.Learning.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction.Web.Semantics, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.Web.Semantics.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.FlashFill, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.FlashFill.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.FlashFill.Semantics, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.FlashFill.Semantics.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Learning, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Learning.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Paraphrasing, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Paraphrasing.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Utils, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Utils.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Collections.Immutable, Version=1.1.36.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\System.Collections.Immutable.1.1.36\lib\portable-net45+win8+wp8+wpa81\System.Collections.Immutable.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="System.Core" />
|
||||
<Reference Include="System.Interactive, Version=1.2.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Ix-Main.1.2.5\lib\net45\System.Interactive.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="System.Reflection.Metadata, Version=1.0.21.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\System.Reflection.Metadata.1.0.21\lib\portable-net45+win8\System.Reflection.Metadata.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="System.Xml.Linq" />
|
||||
<Reference Include="System.Data.DataSetExtensions" />
|
||||
<Reference Include="Microsoft.CSharp" />
|
||||
<Reference Include="System.Data" />
|
||||
<Reference Include="System.Xml" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="SampleProgram.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="App.config" />
|
||||
<None Include="packages.config" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
||||
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
|
||||
Other similar extension points exist, see Microsoft.Common.targets.
|
||||
<Target Name="BeforeBuild">
|
||||
</Target>
|
||||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
|
@ -0,0 +1,37 @@
|
|||
using System.Reflection;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
// General Information about an assembly is controlled through the following
|
||||
// set of attributes. Change these attribute values to modify the information
|
||||
// associated with an assembly.
|
||||
[assembly: AssemblyTitle("Microsoft.ProgramSynthesis.Extraction.Web.Sample")]
|
||||
[assembly: AssemblyDescription("")]
|
||||
[assembly: AssemblyConfiguration("")]
|
||||
[assembly: AssemblyCompany("Microsoft")]
|
||||
[assembly: AssemblyProduct("Microsoft.ProgramSynthesis.Extraction.Web.Sample")]
|
||||
[assembly: AssemblyCopyright("Copyright © 2015")]
|
||||
[assembly: AssemblyTrademark("")]
|
||||
[assembly: AssemblyCulture("")]
|
||||
|
||||
// Setting ComVisible to false makes the types in this assembly not visible
|
||||
// to COM components. If you need to access a type in this assembly from
|
||||
// COM, set the ComVisible attribute to true on that type.
|
||||
[assembly: ComVisible(false)]
|
||||
|
||||
// The following GUID is for the ID of the typelib if this project is exposed to COM
|
||||
[assembly: Guid("12ee5291-77cb-4a5c-86e3-cbb50995cb8b")]
|
||||
|
||||
// Version information for an assembly consists of the following four values:
|
||||
//
|
||||
// Major Version
|
||||
// Minor Version
|
||||
// Build Number
|
||||
// Revision
|
||||
//
|
||||
// You can specify all the values or you can default the Build and Revision Numbers
|
||||
// by using the '*' as shown below:
|
||||
// [assembly: AssemblyVersion("1.0.*")]
|
||||
[assembly: AssemblyVersion("1.0.0.0")]
|
||||
[assembly: AssemblyInformationalVersion("")]
|
||||
[assembly: AssemblyFileVersion("1.0.0.0")]
|
|
@ -0,0 +1,31 @@
|
|||
#,First Name,Last Name,Language,Gender
|
||||
1,Harriet,Briggs,English (UK),Female
|
||||
2,Bill,Parsons,English (UK),Male
|
||||
3,Toby,Cameron,English (UK),Male
|
||||
4,Lucy,Archer,English (UK),Female
|
||||
5,Shannon,Owens,English (UK),Female
|
||||
6,Zoe,Parsons,English (UK),Female
|
||||
7,Laura,Atkins,English (UK),Female
|
||||
8,Paige,Wall,English (UK),Female
|
||||
9,Maya,Garner,English (UK),Female
|
||||
10,Phoebe,Briggs,English (UK),Female
|
||||
11,Brandan,May,English (UK),Male
|
||||
12,Billy,May,English (UK),Male
|
||||
13,Rosie,Moss,English (UK),Female
|
||||
14,Abbi,Haynes,English (UK),Female
|
||||
15,Amelie,Garner,English (UK),Female
|
||||
16,Eleanor,Booth,English (UK),Female
|
||||
17,William,Perkins,English (UK),Male
|
||||
18,Morgan,Dobson,English (UK),Male
|
||||
19,Lily,Harrison,English (UK),Female
|
||||
20,Victoria,Bryan,English (UK),Female
|
||||
21,Skye,Stokes,English (UK),Female
|
||||
22,Ella,Bull,English (UK),Female
|
||||
23,Maya,Atkins,English (UK),Female
|
||||
24,Elliot,Dobson,English (UK),Male
|
||||
25,Mary,Mellor,English (UK),Female
|
||||
26,Hayden,Lucas,English (UK),Male
|
||||
27,Elliot,Small,English (UK),Male
|
||||
28,Kiera,Rees,English (UK),Female
|
||||
29,Oliver,Giles,English (UK),Male
|
||||
30,Aaliyah,Perry,English (UK),Female
|
|
|
@ -0,0 +1,113 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<style>
|
||||
table, td, th {
|
||||
border: 1px solid black;
|
||||
}
|
||||
|
||||
table {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
th {
|
||||
height: 50px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<p>
|
||||
The table below shows the first name, last name and savings for each person.
|
||||
</p>
|
||||
|
||||
<table>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>Harriet</td>
|
||||
<td>Briggs</td>
|
||||
<td>
|
||||
<ul>
|
||||
<li>100</li>
|
||||
<li>120</li>
|
||||
<li>45</li>
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Bill</td>
|
||||
<td>Parsons</td>
|
||||
<td>
|
||||
<ul>
|
||||
<li>150</li>
|
||||
<li>620</li>
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Toby</td>
|
||||
<td>Cameron</td>
|
||||
<td>
|
||||
<ul>
|
||||
<li>30</li>
|
||||
<li>420</li>
|
||||
<li>345</li>
|
||||
<li>555</li>
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Shannon</td>
|
||||
<td>Owens</td>
|
||||
<td>
|
||||
<ul>
|
||||
<li>60</li>
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Maya</td>
|
||||
<td>Garner</td>
|
||||
<td>
|
||||
<ul>
|
||||
<li>440</li>
|
||||
<li>920</li>
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Eleanor</td>
|
||||
<td>Booth</td>
|
||||
<td>
|
||||
<ul>
|
||||
<li>765</li>
|
||||
<li>355</li>
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Elliot</td>
|
||||
<td>Dobson</td>
|
||||
<td>
|
||||
<ul>
|
||||
<li>240</li>
|
||||
<li>532</li>
|
||||
<li>100</li>
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Aaliyah</td>
|
||||
<td>Perry</td>
|
||||
<td>
|
||||
<ul>
|
||||
<li>830</li>
|
||||
<li>120</li>
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,105 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<style>
|
||||
table, td, th {
|
||||
border: 1px solid black;
|
||||
}
|
||||
|
||||
table {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
th {
|
||||
height: 50px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<p>
|
||||
The table below shows the first name, last name and savings for each person.
|
||||
</p>
|
||||
|
||||
<table>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>Brandan</td>
|
||||
<td>May</td>
|
||||
<td>
|
||||
<ul>
|
||||
<li>2343</li>
|
||||
<li>1245</li>
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Eleanor</td>
|
||||
<td>Booth</td>
|
||||
<td>
|
||||
<ul>
|
||||
<li>2350</li>
|
||||
<li>220</li>
|
||||
<li>150</li>
|
||||
<li>120</li>
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>William</td>
|
||||
<td>Perkins</td>
|
||||
<td>
|
||||
<ul>
|
||||
<li>220</li>
|
||||
<li>345</li>
|
||||
<li>85</li>
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Morgan</td>
|
||||
<td>Dobson</td>
|
||||
<td>
|
||||
<ul>
|
||||
<li>80</li>
|
||||
<li>2980</li>
|
||||
<li>550</li>
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Lily</td>
|
||||
<td>Harrison</td>
|
||||
<td>
|
||||
<ul>
|
||||
<li>250</li>
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Victoria</td>
|
||||
<td>Bryan</td>
|
||||
<td>
|
||||
<ul>
|
||||
<li>12</li>
|
||||
<li>532</li>
|
||||
<li>100</li>
|
||||
<li>8899</li>
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Skye</td>
|
||||
<td>Stokes</td>
|
||||
<td>
|
||||
<ul>
|
||||
<li>123120</li>
|
||||
<li>123120</li>
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
<tbody>
|
||||
</table>
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,188 @@
|
|||
using System;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Collections.Generic;
|
||||
using Microsoft.ProgramSynthesis.Extraction.Web.Semantics;
|
||||
|
||||
namespace Microsoft.ProgramSynthesis.Extraction.Web.Sample
|
||||
{
|
||||
internal class SampleProgram
|
||||
{
|
||||
private static void Main(string[] args)
|
||||
{
|
||||
LearnFirstSurnameInDocumentUsingOneExample();
|
||||
LearnFirstSurnameInDocumentUsingMultipleExamples();
|
||||
LearnSurnameWithRespectToTableRow();
|
||||
LearnSurnameWithRespectToTableRowUsingNegativeExample();
|
||||
LearnAllSurnamesInDocument();
|
||||
SerializeProgram();
|
||||
Console.ReadLine();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learns a program to extract the first surname in the document from one example.
|
||||
/// </summary>
|
||||
public static void LearnFirstSurnameInDocumentUsingOneExample()
|
||||
{
|
||||
string s = File.ReadAllText(@"..\..\SampleDocuments\sample-document-1.html");
|
||||
HtmlDoc doc = HtmlDoc.Create(s);
|
||||
WebRegion referenceRegion = new WebRegion(doc);
|
||||
WebRegion exampleRegion = doc.GetRegion("tr:nth-child(1) td:nth-child(2)"); //2nd cell in 1st table row
|
||||
ExtractionExample<WebRegion> exampleSpec = new ExtractionExample<WebRegion>(referenceRegion, exampleRegion);
|
||||
Web.Program prog = Web.Learner.Instance.LearnRegion(new[] { exampleSpec }, Enumerable.Empty<ExtractionExample<WebRegion>>());
|
||||
if (prog != null)
|
||||
{
|
||||
//run the program to extract first surname from the document
|
||||
IEnumerable<WebRegion> executionResult = prog.Run(referenceRegion);
|
||||
foreach (WebRegion region in executionResult)
|
||||
{
|
||||
Console.WriteLine("Learn first surname in document from one example: ");
|
||||
Console.WriteLine(region.GetSpecificSelector());
|
||||
Console.WriteLine(region.Text());
|
||||
Console.WriteLine();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learns a program to extract the first surname in the document from two examples
|
||||
/// from two different documents.
|
||||
/// </summary>
|
||||
public static void LearnFirstSurnameInDocumentUsingMultipleExamples()
|
||||
{
|
||||
string s1 = File.ReadAllText(@"..\..\SampleDocuments\sample-document-1.html");
|
||||
HtmlDoc doc1 = HtmlDoc.Create(s1);
|
||||
string s2 = File.ReadAllText(@"..\..\SampleDocuments\sample-document-2.html");
|
||||
HtmlDoc doc2 = HtmlDoc.Create(s2);
|
||||
WebRegion referenceRegion1 = new WebRegion(doc1);
|
||||
WebRegion referenceRegion2 = new WebRegion(doc2);
|
||||
WebRegion exampleRegion1 = doc1.GetRegion("tr:nth-child(1) td:nth-child(2)"); //2nd cell in 1st table row of doc1
|
||||
WebRegion exampleRegion2 = doc2.GetRegion("tr:nth-child(1) td:nth-child(2)"); //2nd cell in 1st table row of doc2
|
||||
ExtractionExample<WebRegion> exampleSpec1 = new ExtractionExample<WebRegion>(referenceRegion1, exampleRegion1);
|
||||
ExtractionExample<WebRegion> exampleSpec2 = new ExtractionExample<WebRegion>(referenceRegion2, exampleRegion2);
|
||||
Web.Program prog = Web.Learner.Instance.LearnRegion(new[] { exampleSpec1, exampleSpec2 },
|
||||
Enumerable.Empty<ExtractionExample<WebRegion>>());
|
||||
if (prog != null)
|
||||
{
|
||||
//run the program on the second document
|
||||
IEnumerable<WebRegion> executionResult = prog.Run(referenceRegion2);
|
||||
foreach (WebRegion region in executionResult)
|
||||
{
|
||||
Console.WriteLine("Learn first surname in document from multiple examples: ");
|
||||
Console.WriteLine(region.GetSpecificSelector());
|
||||
Console.WriteLine(region.Text());
|
||||
Console.WriteLine();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learns a program to extract the surname from a given table row (rather than a whole document).
|
||||
/// </summary>
|
||||
public static void LearnSurnameWithRespectToTableRow()
|
||||
{
|
||||
string s = File.ReadAllText(@"..\..\SampleDocuments\sample-document-1.html");
|
||||
HtmlDoc doc = HtmlDoc.Create(s);
|
||||
WebRegion referenceRegion1 = doc.GetRegion("tr:nth-child(1)"); //1st table row
|
||||
WebRegion exampleRegion1 = doc.GetRegion("tr:nth-child(1) td:nth-child(2)"); //2nd cell in 1st table row
|
||||
WebRegion referenceRegion2 = doc.GetRegion("tr:nth-child(2)"); //2nd table row
|
||||
WebRegion exampleRegion2 = doc.GetRegion("tr:nth-child(2) td:nth-child(2)"); //2nd cell in 2nd table row
|
||||
ExtractionExample<WebRegion> exampleSpec1 = new ExtractionExample<WebRegion>(referenceRegion1, exampleRegion1);
|
||||
ExtractionExample<WebRegion> exampleSpec2 = new ExtractionExample<WebRegion>(referenceRegion2, exampleRegion2);
|
||||
Web.Program prog = Web.Learner.Instance.LearnRegion(new[] { exampleSpec1, exampleSpec2 },
|
||||
Enumerable.Empty<ExtractionExample<WebRegion>>());
|
||||
if (prog != null)
|
||||
{
|
||||
//run the program on 5th table row
|
||||
WebRegion fifthRowRegion = doc.GetRegion("tr:nth-child(5)"); //5th table row
|
||||
IEnumerable<WebRegion> executionResult = prog.Run(fifthRowRegion);
|
||||
foreach (WebRegion region in executionResult)
|
||||
{
|
||||
Console.WriteLine("Learn surname with respect to table row: ");
|
||||
Console.WriteLine(region.GetSpecificSelector());
|
||||
Console.WriteLine(region.Text());
|
||||
Console.WriteLine();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learns a program to extract the surname from a given table row (rather than a whole document)
|
||||
/// using a negative example.
|
||||
/// </summary>
|
||||
public static void LearnSurnameWithRespectToTableRowUsingNegativeExample()
|
||||
{
|
||||
string s = File.ReadAllText(@"..\..\SampleDocuments\sample-document-1.html");
|
||||
HtmlDoc doc = HtmlDoc.Create(s);
|
||||
WebRegion referenceRegion1 = doc.GetRegion("tr:nth-child(1)"); //1st table row
|
||||
WebRegion referenceRegion2 = doc.GetRegion("tr:nth-child(2)"); //2nd table row
|
||||
ExtractionExample<WebRegion> posExampleSpec = new ExtractionExample<WebRegion>(referenceRegion1, doc.GetRegion("tr:nth-child(1) td:nth-child(2)"));
|
||||
ExtractionExample<WebRegion> negExampleSpec = new ExtractionExample<WebRegion>(referenceRegion2, doc.GetRegion("tr:nth-child(2) td:nth-child(1)"));
|
||||
Web.Program prog = Web.Learner.Instance.LearnRegion(new[] { posExampleSpec },
|
||||
new[] { negExampleSpec });
|
||||
if (prog != null)
|
||||
{
|
||||
IEnumerable<WebRegion> executionResult = prog.Run(referenceRegion1);
|
||||
foreach (WebRegion region in executionResult)
|
||||
{
|
||||
Console.WriteLine("Learn surname with respect to table row using negative example: ");
|
||||
Console.WriteLine(region.GetSpecificSelector());
|
||||
Console.WriteLine(region.Text());
|
||||
Console.WriteLine();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learns a program to extract the sequence of all surnames in a document.
|
||||
/// </summary>
|
||||
public static void LearnAllSurnamesInDocument()
|
||||
{
|
||||
string s = File.ReadAllText(@"..\..\SampleDocuments\sample-document-1.html");
|
||||
HtmlDoc doc = HtmlDoc.Create(s);
|
||||
WebRegion referenceRegion = new WebRegion(doc);
|
||||
WebRegion exampleRegion1 = doc.GetRegion("tr:nth-child(1) td:nth-child(2)"); //2nd cell in 1st table row of doc
|
||||
WebRegion exampleRegion2 = doc.GetRegion("tr:nth-child(2) td:nth-child(2)"); //2nd cell in 2nd table row of doc
|
||||
ExtractionExample<WebRegion> exampleSpec1 = new ExtractionExample<WebRegion>(referenceRegion, exampleRegion1);
|
||||
ExtractionExample<WebRegion> exampleSpec2 = new ExtractionExample<WebRegion>(referenceRegion, exampleRegion2);
|
||||
Web.Program prog = Web.Learner.Instance.LearnSequence(new[] { exampleSpec1, exampleSpec2 }, Enumerable.Empty<ExtractionExample<WebRegion>>());
|
||||
if (prog != null)
|
||||
{
|
||||
IEnumerable<WebRegion> executionResult = prog.Run(referenceRegion);
|
||||
Console.WriteLine("Learn all surnames in document: ");
|
||||
foreach (WebRegion region in executionResult)
|
||||
{
|
||||
Console.WriteLine(region.GetSpecificSelector());
|
||||
Console.WriteLine(region.Text());
|
||||
}
|
||||
Console.WriteLine();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learns a program and then serializes and deserializes it.
|
||||
/// </summary>
|
||||
public static void SerializeProgram()
|
||||
{
|
||||
string s = File.ReadAllText(@"..\..\SampleDocuments\sample-document-1.html");
|
||||
HtmlDoc doc = HtmlDoc.Create(s);
|
||||
WebRegion referenceRegion = new WebRegion(doc);
|
||||
WebRegion exampleRegion = doc.GetRegion("tr:nth-child(1) td:nth-child(2)"); //2nd cell in 1st table row
|
||||
ExtractionExample<WebRegion> exampleSpec = new ExtractionExample<WebRegion>(referenceRegion, exampleRegion);
|
||||
Web.Program prog = Web.Learner.Instance.LearnRegion(new[] { exampleSpec }, Enumerable.Empty<ExtractionExample<WebRegion>>());
|
||||
if (prog != null)
|
||||
{
|
||||
string progText = prog.Serialize();
|
||||
Web.Program loadProg = Web.Program.Load(progText);
|
||||
IEnumerable<WebRegion> executionResult = loadProg.Run(referenceRegion);
|
||||
Console.WriteLine("Run first surname extraction program after serialization and deserialization: ");
|
||||
foreach (WebRegion region in executionResult)
|
||||
{
|
||||
Console.WriteLine(region.GetSpecificSelector());
|
||||
Console.WriteLine(region.Text());
|
||||
}
|
||||
Console.WriteLine();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<packages>
|
||||
<package id="Antlr4.Runtime" version="4.3.0" targetFramework="net45" />
|
||||
<package id="CsQuery" version="1.3.5-beta5" targetFramework="net45" />
|
||||
<package id="Ix-Main" version="1.2.5" targetFramework="net45" />
|
||||
<package id="Microsoft.Experimental.Collections" version="1.0.3-alpha" targetFramework="net45" />
|
||||
<package id="Microsoft.ProgramSynthesis" version="0.1.1-preview" targetFramework="net45" />
|
||||
<package id="System.Collections.Immutable" version="1.1.36" targetFramework="net45" />
|
||||
<package id="System.Reflection.Metadata" version="1.0.21" targetFramework="net45" />
|
||||
</packages>
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<configuration>
|
||||
<startup>
|
||||
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5"/>
|
||||
</startup>
|
||||
</configuration>
|
|
@ -0,0 +1,141 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
|
||||
<PropertyGroup>
|
||||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||
<ProjectGuid>{2698A7F0-26B7-42E7-A9C6-11BB4AEAF23D}</ProjectGuid>
|
||||
<OutputType>Exe</OutputType>
|
||||
<AppDesignerFolder>Properties</AppDesignerFolder>
|
||||
<RootNamespace>Microsoft.ProgramSynthesis.FlashFill.Sample</RootNamespace>
|
||||
<AssemblyName>Microsoft.ProgramSynthesis.FlashFill.Sample</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
|
||||
<TargetFrameworkProfile />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
<DebugType>full</DebugType>
|
||||
<Optimize>false</Optimize>
|
||||
<OutputPath>bin\Debug\</OutputPath>
|
||||
<DefineConstants>DEBUG;TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugType>pdbonly</DebugType>
|
||||
<Optimize>true</Optimize>
|
||||
<OutputPath>bin\Release\</OutputPath>
|
||||
<DefineConstants>TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="Antlr4.Runtime.net45, Version=4.3.0.0, Culture=neutral, PublicKeyToken=eb42632606e9261f, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Antlr4.Runtime.4.3.0\lib\net45\Antlr4.Runtime.net45.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="CsQuery, Version=1.3.5.124, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\CsQuery.1.3.5-beta5\lib\net40\CsQuery.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.Experimental.Collections, Version=1.0.3.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.Experimental.Collections.1.0.3-alpha\lib\portable-net45+win8+wp8\Microsoft.Experimental.Collections.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction.Text, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.Text.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction.Text.Learning, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.Text.Learning.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction.Text.Semantics, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.Text.Semantics.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction.Web, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.Web.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction.Web.Learning, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.Web.Learning.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Extraction.Web.Semantics, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Extraction.Web.Semantics.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.FlashFill, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.FlashFill.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.FlashFill.Semantics, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.FlashFill.Semantics.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Learning, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Learning.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Paraphrasing, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Paraphrasing.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="Microsoft.ProgramSynthesis.Utils, Version=0.1.1.2395, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Microsoft.ProgramSynthesis.0.1.1-preview\lib\net45\Microsoft.ProgramSynthesis.Utils.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Collections.Immutable, Version=1.1.36.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\System.Collections.Immutable.1.1.36\lib\portable-net45+win8+wp8+wpa81\System.Collections.Immutable.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="System.Core" />
|
||||
<Reference Include="System.Interactive, Version=1.2.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\Ix-Main.1.2.5\lib\net45\System.Interactive.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="System.Reflection.Metadata, Version=1.0.21.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
|
||||
<HintPath>..\packages\System.Reflection.Metadata.1.0.21\lib\portable-net45+win8\System.Reflection.Metadata.dll</HintPath>
|
||||
<Private>True</Private>
|
||||
</Reference>
|
||||
<Reference Include="System.Xml.Linq" />
|
||||
<Reference Include="System.Data.DataSetExtensions" />
|
||||
<Reference Include="Microsoft.CSharp" />
|
||||
<Reference Include="System.Data" />
|
||||
<Reference Include="System.Net.Http" />
|
||||
<Reference Include="System.Xml" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="SampleProgram.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
<Compile Include="SimpleAPI.cs" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="App.config">
|
||||
<SubType>Designer</SubType>
|
||||
</None>
|
||||
<None Include="packages.config" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
||||
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
|
||||
Other similar extension points exist, see Microsoft.Common.targets.
|
||||
<Target Name="BeforeBuild">
|
||||
</Target>
|
||||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
|
@ -0,0 +1,36 @@
|
|||
using System.Reflection;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
// General Information about an assembly is controlled through the following
|
||||
// set of attributes. Change these attribute values to modify the information
|
||||
// associated with an assembly.
|
||||
[assembly: AssemblyTitle("Microsoft.ProgramSynthesis.FlashFill.Sample")]
|
||||
[assembly: AssemblyDescription("")]
|
||||
[assembly: AssemblyConfiguration("")]
|
||||
[assembly: AssemblyCompany("Microsoft")]
|
||||
[assembly: AssemblyProduct("Microsoft.ProgramSynthesis.FlashFill.Sample")]
|
||||
[assembly: AssemblyCopyright("Copyright © 2015")]
|
||||
[assembly: AssemblyTrademark("")]
|
||||
[assembly: AssemblyCulture("")]
|
||||
|
||||
// Setting ComVisible to false makes the types in this assembly not visible
|
||||
// to COM components. If you need to access a type in this assembly from
|
||||
// COM, set the ComVisible attribute to true on that type.
|
||||
[assembly: ComVisible(false)]
|
||||
|
||||
// The following GUID is for the ID of the typelib if this project is exposed to COM
|
||||
[assembly: Guid("2698a7f0-26b7-42e7-a9c6-11bb4aeaf23d")]
|
||||
|
||||
// Version information for an assembly consists of the following four values:
|
||||
//
|
||||
// Major Version
|
||||
// Minor Version
|
||||
// Build Number
|
||||
// Revision
|
||||
//
|
||||
// You can specify all the values or you can default the Build and Revision Numbers
|
||||
// by using the '*' as shown below:
|
||||
// [assembly: AssemblyVersion("1.0.*")]
|
||||
[assembly: AssemblyVersion("1.0.0.0")]
|
||||
[assembly: AssemblyInformationalVersion("")]
|
||||
[assembly: AssemblyFileVersion("1.0.0.0")]
|
|
@ -0,0 +1,252 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
|
||||
namespace Microsoft.ProgramSynthesis.FlashFill.Sample
|
||||
{
|
||||
/// <summary>
|
||||
/// Sample of how to use the FlashFill API. FlashFill generates string programs from input/output examples.
|
||||
/// </summary>
|
||||
internal static class SampleProgram
|
||||
{
|
||||
private static void Main(string[] args)
|
||||
{
|
||||
// Simplest usage: single example of a single string input:
|
||||
LearnFormatName();
|
||||
// Learning a program using multiple examples:
|
||||
LearnNormalizePhoneNumber();
|
||||
// Learning a program that takes multiple strings (columns) as input:
|
||||
LearnMergeNames();
|
||||
// Learning top-k ranked programs instead of just the single best one:
|
||||
LearnTop10NormalizePhoneNumber();
|
||||
LearnTop10FormatName();
|
||||
// Learning with additional inputs:
|
||||
LearnNormalizeDate();
|
||||
// Convert program to string and back:
|
||||
SerializeProgram();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learn to reformat a name written "First Last" as "Last, F." where 'F' is the first initial.
|
||||
/// Demonstrates basic usage of FlashFill API.
|
||||
/// </summary>
|
||||
private static void LearnFormatName()
|
||||
{
|
||||
// Examples are given as a FlashFillExample object which takes an input and output.
|
||||
IEnumerable<FlashFillExample> examples = new[]
|
||||
{
|
||||
new FlashFillExample("Kettil Hansson", "Hansson, K.")
|
||||
};
|
||||
// Given just the examples, the best program is returned
|
||||
FlashFillProgram topRankedProgram = FlashFillProgram.Learn(examples);
|
||||
|
||||
if (topRankedProgram == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: failed to learn format name program.");
|
||||
}
|
||||
else
|
||||
{
|
||||
// Run the program on some new inputs.
|
||||
foreach (var name in new[] {"Etelka Bala", "Myron Lampros"})
|
||||
{
|
||||
string formatted = topRankedProgram.Run(name);
|
||||
Console.WriteLine("\"{0}\" => \"{1}\"", name, formatted);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learn to normalize phone numbers in a few input formats to the same output format.
|
||||
/// Demonstrates giving FlashFill multiple examples.
|
||||
/// </summary>
|
||||
private static void LearnNormalizePhoneNumber()
|
||||
{
|
||||
// Some programs may require multiple examples.
|
||||
// More examples ensures the proper program is learned and may speed up learning.
|
||||
IEnumerable<FlashFillExample> examples = new[]
|
||||
{
|
||||
new FlashFillExample("425-829-5512", "425-829-5512"),
|
||||
new FlashFillExample("(425) 829 5512", "425-829-5512")
|
||||
};
|
||||
FlashFillProgram topRankedProgram = FlashFillProgram.Learn(examples);
|
||||
|
||||
if (topRankedProgram == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: failed to learn normalize phone number program.");
|
||||
}
|
||||
else
|
||||
{
|
||||
foreach (var phoneNumber in new[] {"425 233 1234", "(425) 777 3333"})
|
||||
{
|
||||
string normalized = topRankedProgram.Run(phoneNumber);
|
||||
Console.WriteLine("\"{0}\" => \"{1}\"", phoneNumber, normalized);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learn to take two strings of a first name and last name and combine them into "Last, First" format.
|
||||
/// Demonstrates inputs with multiple strings (columns) and also providing inputs without a known output.
|
||||
/// </summary>
|
||||
private static void LearnMergeNames()
|
||||
{
|
||||
// Inputs may be made up of multiple strings. If so, all inputs must contain the same number of strings.
|
||||
IEnumerable<FlashFillExample> examples = new[]
|
||||
{
|
||||
new FlashFillExample(new FlashFillInput("Kettil", "Hansson"), "Hansson, Kettil")
|
||||
};
|
||||
// Inputs for which the corresponding output is not known. May be used for improving ranking.
|
||||
FlashFillInput[] additionalInputs =
|
||||
{
|
||||
new FlashFillInput("Greta", "Hermansson")
|
||||
};
|
||||
FlashFillProgram topRankedProgram = FlashFillProgram.Learn(examples, additionalInputs);
|
||||
|
||||
if (topRankedProgram == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: failed to learn merge names program.");
|
||||
}
|
||||
else
|
||||
{
|
||||
var testInputs = new[] {new FlashFillInput("Etelka", "Bala"), new FlashFillInput("Myron", "Lampros")};
|
||||
foreach (var name in testInputs)
|
||||
{
|
||||
string merged = topRankedProgram.Run(name);
|
||||
Console.WriteLine("{0} => \"{1}\"", name, merged);
|
||||
}
|
||||
// Instead of a FlashFillInput, .Run() can also take the inputs as an IEnumerable<string>
|
||||
// or as a params string[]:
|
||||
Console.WriteLine("\"Nelly\", \"Akesson\" => \"{0}\"",
|
||||
topRankedProgram.Run(new List<string> {"Nelly", "Akesson"}));
|
||||
Console.WriteLine("\"Nelly\", \"Akesson\" => \"{0}\"",
|
||||
topRankedProgram.Run("Nelly", "Akesson"));
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Look at the top 10 programs learned from a single example for normalizing a phone number like in
|
||||
/// <see cref="LearnNormalizePhoneNumber" /> and show the behavior of them on a couple other phone nubmers.
|
||||
/// Demonstrates learning more than just the single top program, and shows the variation in outputs
|
||||
/// among the top-ranked programs on unseen input formats.
|
||||
/// </summary>
|
||||
/// <seealso cref="LearnTop10FormatName" />
|
||||
private static void LearnTop10NormalizePhoneNumber()
|
||||
{
|
||||
IEnumerable<FlashFillExample> examples = new[]
|
||||
{
|
||||
new FlashFillExample("(425) 829 5512", "425-829-5512")
|
||||
};
|
||||
// Request is for number of distinct rankings, not number of programs,
|
||||
// so more programs will be generated if there are ties.
|
||||
int numRankingsToGenerate = 10;
|
||||
IList<FlashFillProgram> programs = FlashFillProgram.LearnTopK(examples, k: numRankingsToGenerate).ToList();
|
||||
|
||||
if (!programs.Any())
|
||||
{
|
||||
Console.Error.WriteLine("Error: failed to learn normalize phone number program.");
|
||||
}
|
||||
else
|
||||
{
|
||||
// More than numRankingsToGenerate programs may be generated if there are ties in the ranking.
|
||||
Console.WriteLine("Learned {0} programs.", programs.Count);
|
||||
// Run all of the programs to see how their output differs.
|
||||
for (int i = 0; i < programs.Count; i++)
|
||||
{
|
||||
foreach (var phoneNumber in new[] {"425 233 1234", "(425) 777 3333"})
|
||||
{
|
||||
string normalized = programs[i].Run(phoneNumber);
|
||||
Console.WriteLine("Program {2}: \"{0}\" => \"{1}\"", phoneNumber, normalized, (i + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Look at the top 10 programs learned from a single example for formatting a name like in
|
||||
/// <see cref="LearnFormatName" /> and show the behavior of them on slightly differently formatted name.
|
||||
/// Demonstrates learning more than just the single top program, and shows the variation in outputs
|
||||
/// among the top-ranked programs on unseen input formats.
|
||||
/// </summary>
|
||||
/// <seealso cref="LearnTop10NormalizePhoneNumber" />
|
||||
private static void LearnTop10FormatName()
|
||||
{
|
||||
var examples = new[] {new FlashFillExample("Greta Hermansson", "Hermansson, G.")};
|
||||
IEnumerable<FlashFillProgram> programs = FlashFillProgram.LearnTopK(examples, k: 10);
|
||||
|
||||
// This attempts running the top 10 programs on an input not directly similar to the example
|
||||
// to see if any of them work anyway.
|
||||
int i = 0;
|
||||
foreach (var program in programs)
|
||||
{
|
||||
var input = "Kettil hansson"; // Notice it's "hansson", not "Hansson".
|
||||
Console.WriteLine("Program {0}: \"{1}\" => \"{2}\"", ++i, input, program.Run(input));
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learns a program to convert dates from "DD/MM/YYYY" to "YYYY-MM-DD".
|
||||
/// Demonstrates providing examples using <see cref="string" /> instead of <see cref="FlashFillExample" />
|
||||
/// and providing additional inputs.
|
||||
/// </summary>
|
||||
private static void LearnNormalizeDate()
|
||||
{
|
||||
// Can give FlashFillProgram's .Learn() function an IDictionary<string, string>
|
||||
// instead of an IEnumerable of FlashFillExample.
|
||||
IDictionary<string, string> examples = new Dictionary<string, string>
|
||||
{
|
||||
{"02/04/1953", "1953-04-02"}
|
||||
};
|
||||
// Inputs for which the corresponding output is not known. May be used for improving ranking.
|
||||
// Given as strings instead of FlashFillInputs when the examples are given as Tuple<string, string>.
|
||||
IEnumerable<string> additionalInputs = new[]
|
||||
{
|
||||
"04/02/1962",
|
||||
"27/08/1998"
|
||||
};
|
||||
FlashFillProgram topRankedProgram = FlashFillProgram.Learn(examples, additionalInputs);
|
||||
|
||||
if (topRankedProgram == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: failed to learn normalize date program.");
|
||||
}
|
||||
else
|
||||
{
|
||||
foreach (var date in new[] {"12/02/1972", "31/01/1983"})
|
||||
{
|
||||
string normalized = topRankedProgram.Run(date);
|
||||
Console.WriteLine("\"{0}\" => \"{1}\"", date, normalized);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Learns a programs for formatting a name but serializes and deserializes it before running it.
|
||||
/// Demonstrates serializing a FlashFillProgram to a string.
|
||||
/// </summary>
|
||||
private static void SerializeProgram()
|
||||
{
|
||||
IEnumerable<FlashFillExample> examples = new[]
|
||||
{
|
||||
new FlashFillExample("Kettil Hansson", "Hansson, K.")
|
||||
};
|
||||
FlashFillProgram topRankedProgram = FlashFillProgram.Learn(examples);
|
||||
|
||||
if (topRankedProgram == null)
|
||||
{
|
||||
Console.Error.WriteLine("Error: failed to learn format name program.");
|
||||
}
|
||||
else
|
||||
{
|
||||
// FlashFillPrograms can be serialized using .ToString().
|
||||
string serializedProgram = topRankedProgram.ToString();
|
||||
// Serialized programs can be loaded in another program using the FlashFill API using .Load():
|
||||
var parsedProgram = FlashFillProgram.Load(serializedProgram);
|
||||
foreach (var name in new[] {"Etelka Bala", "Myron Lampros"})
|
||||
{
|
||||
string formatted = parsedProgram.Run(name);
|
||||
Console.WriteLine("\"{0}\" => \"{1}\"", name, formatted);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using Microsoft.ProgramSynthesis.AST;
|
||||
using Microsoft.ProgramSynthesis.Extraction.Text.Semantics;
|
||||
using Microsoft.ProgramSynthesis.Learning;
|
||||
using Microsoft.ProgramSynthesis.Learning.Strategies;
|
||||
using Microsoft.ProgramSynthesis.Specifications;
|
||||
|
||||
namespace Microsoft.ProgramSynthesis.FlashFill.Sample
|
||||
{
|
||||
/// <summary>
|
||||
/// Simplified version of <see cref="FlashFillProgram" /> to demonstrate lower level API usage.
|
||||
/// </summary>
|
||||
public class FlashFillProg
|
||||
{
|
||||
/// <summary>
|
||||
/// Constructor for a FlashFill Program.
|
||||
/// </summary>
|
||||
/// <param name="program">The learnt program.</param>
|
||||
private FlashFillProg(ProgramNode program)
|
||||
{
|
||||
ProgramNode = program;
|
||||
}
|
||||
|
||||
public ProgramNode ProgramNode { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Learn <paramref name="k" /> top-ranked FlashFill programs for a given set of input-output examples.
|
||||
/// </summary>
|
||||
/// <param name="trainingExamples">
|
||||
/// The set of input-output examples as a Tuple of the input and the output.
|
||||
/// </param>
|
||||
/// <param name="additionalInputs">
|
||||
/// The set of additional inputs that do not have output examples, which helps rank learnt programs.
|
||||
/// </param>
|
||||
/// <param name="k">the number of top programs</param>
|
||||
/// <returns>The top-k ranked programs as <see cref="FlashFillProg" />s</returns>
|
||||
public static IEnumerable<FlashFillProg> LearnTopK(IDictionary<string, string> trainingExamples,
|
||||
IEnumerable<string> additionalInputs = null, int k = 1)
|
||||
{
|
||||
if (trainingExamples == null) throw new ArgumentNullException("trainingExamples");
|
||||
// Load FlashFill grammar
|
||||
Grammar grammar = FlashFillGrammar.Grammar;
|
||||
|
||||
// Setup configuration of synthesis process.
|
||||
var engine = new SynthesisEngine(grammar, new SynthesisEngine.Config
|
||||
{
|
||||
// Strategies perform the actual logic of the synthesis process.
|
||||
Strategies = new[] {typeof (DeductiveSynthesis)},
|
||||
UseThreads = false,
|
||||
CacheSize = int.MaxValue
|
||||
});
|
||||
// Convert the examples in the format expected by Microsoft.ProgramSynthesis.
|
||||
// Internally, FlashFill represents strings as StringRegions to save on
|
||||
// allocating new strings for each substring.
|
||||
// Could also use FlashFillInput.AsState() to construct the input state.
|
||||
Dictionary<State, object> trainExamples = trainingExamples.ToDictionary(
|
||||
t => State.Create(grammar.InputSymbol, new[] {StringRegion.Create(t.Key)}),
|
||||
t => (object) StringRegion.Create(t.Value));
|
||||
var spec = new ExampleSpec(trainExamples);
|
||||
// Learn an entire FlashFill program (i.e. start at the grammar's start symbol)
|
||||
// for the specificiation consisting of the examples.
|
||||
// Learn the top-k programs according to the score feature used by FlashFill by default.
|
||||
// You could define your own feature on the FlashFill grammar to rank programs differently.
|
||||
var task = new LearningTask(grammar.StartSymbol, spec, k, FlashFillGrammar.ScoreFeature);
|
||||
if (additionalInputs != null)
|
||||
{
|
||||
task.AdditionalInputs =
|
||||
additionalInputs.Select(
|
||||
input => State.Create(grammar.InputSymbol, new[] {StringRegion.Create(input)})).ToList();
|
||||
}
|
||||
IEnumerable<ProgramNode> topk = engine.LearnSymbol(task).RealizedPrograms;
|
||||
// Return the generated programs wraped in a FlashFillProg object.
|
||||
return topk.Select(prog => new FlashFillProg(prog));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Run the program on a given input
|
||||
/// </summary>
|
||||
/// <param name="input">The input</param>
|
||||
/// <returns></returns>
|
||||
public string Run(string input)
|
||||
{
|
||||
Grammar grammar = FlashFillGrammar.Grammar;
|
||||
State inputState = new FlashFillInput(input).AsState();
|
||||
// Same as above without using the FlashFillInput class:
|
||||
inputState = State.Create(grammar.InputSymbol, new[] {StringRegion.Create(input)});
|
||||
var result = (StringRegion) ProgramNode.Invoke(inputState);
|
||||
return result == null ? null : result.Value;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<packages>
|
||||
<package id="Antlr4.Runtime" version="4.3.0" targetFramework="net45" />
|
||||
<package id="CsQuery" version="1.3.5-beta5" targetFramework="net45" />
|
||||
<package id="Ix-Main" version="1.2.5" targetFramework="net45" />
|
||||
<package id="Microsoft.Experimental.Collections" version="1.0.3-alpha" targetFramework="net45" />
|
||||
<package id="Microsoft.ProgramSynthesis" version="0.1.1-preview" targetFramework="net45" />
|
||||
<package id="System.Collections.Immutable" version="1.1.36" targetFramework="net45" />
|
||||
<package id="System.Reflection.Metadata" version="1.0.21" targetFramework="net45" />
|
||||
</packages>
|
|
@ -0,0 +1,26 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Microsoft
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
Third Party Programs: The software may include third party programs that
|
||||
Microsoft, not the third party, licenses to you under this agreement.
|
||||
Notices, if any, for the third party programs are included for your
|
||||
information only.
|
|
@ -1,2 +1,5 @@
|
|||
# prose
|
||||
PROgram Synthesis by Example
|
||||
# Microsoft Program Synthesis using Examples SDK
|
||||
|
||||
The Program Synthesis using Examples (PROSE) SDK includes a set of technologies for the automatic generation of programs from input-output examples. This repo includes samples and sample data for the Microsoft PROSE SDK.
|
||||
|
||||
Find guides for these sample projects here: [https://microsoft.github.io/prose/](https://microsoft.github.io/prose/)
|
|
@ -0,0 +1,40 @@
|
|||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 14
|
||||
VisualStudioVersion = 14.0.23107.0
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.ProgramSynthesis.FlashFill.Sample", "FlashFill\Microsoft.ProgramSynthesis.FlashFill.Sample.csproj", "{2698A7F0-26B7-42E7-A9C6-11BB4AEAF23D}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.ProgramSynthesis.Extraction.Web.Sample", "Extraction.Web\Microsoft.ProgramSynthesis.Extraction.Web.Sample.csproj", "{D24046B6-4E7C-4112-B88C-D360695D1E66}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.ProgramSynthesis.Extraction.Text.Sample", "Extraction.Text\Microsoft.ProgramSynthesis.Extraction.Text.Sample.csproj", "{96D149D8-0A64-451A-A909-499D2162683D}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
RecompileAndRelease|Any CPU = RecompileAndRelease|Any CPU
|
||||
Release|Any CPU = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{2698A7F0-26B7-42E7-A9C6-11BB4AEAF23D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{2698A7F0-26B7-42E7-A9C6-11BB4AEAF23D}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{2698A7F0-26B7-42E7-A9C6-11BB4AEAF23D}.RecompileAndRelease|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{2698A7F0-26B7-42E7-A9C6-11BB4AEAF23D}.RecompileAndRelease|Any CPU.Build.0 = Release|Any CPU
|
||||
{2698A7F0-26B7-42E7-A9C6-11BB4AEAF23D}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{2698A7F0-26B7-42E7-A9C6-11BB4AEAF23D}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{D24046B6-4E7C-4112-B88C-D360695D1E66}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{D24046B6-4E7C-4112-B88C-D360695D1E66}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{D24046B6-4E7C-4112-B88C-D360695D1E66}.RecompileAndRelease|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{D24046B6-4E7C-4112-B88C-D360695D1E66}.RecompileAndRelease|Any CPU.Build.0 = Release|Any CPU
|
||||
{D24046B6-4E7C-4112-B88C-D360695D1E66}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{D24046B6-4E7C-4112-B88C-D360695D1E66}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{96D149D8-0A64-451A-A909-499D2162683D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{96D149D8-0A64-451A-A909-499D2162683D}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{96D149D8-0A64-451A-A909-499D2162683D}.RecompileAndRelease|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{96D149D8-0A64-451A-A909-499D2162683D}.RecompileAndRelease|Any CPU.Build.0 = Release|Any CPU
|
||||
{96D149D8-0A64-451A-A909-499D2162683D}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{96D149D8-0A64-451A-A909-499D2162683D}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
Загрузка…
Ссылка в новой задаче