Cache TryDeterminize() call results & add proper support for enum constant to Compiler (#127)

TryDeterminized() tried to do something even if it is known that automaton is already determinized
or non-determinizable.

Because automaton state is immutable, it is possible to store determinization state alongside with it.
There are 3 states:
* Unknown - TryDeterminize() was never called for this automaton
* IsDeterminized - TryDeterminized() successfully determinized automaton
* IsNonDeterminizable - TryDeterminize() was called but didn't succeed.

Because determinization state depends on maximum number of states,
`TryDeterminize(int maxStatesCount)` method was removed in favour of using defaults.
Because this overload was never used in practice.

Also, as an implementation detail an enum type was exposed as part of automaton quoting interface.
Compiler generated incorrect C# code for quoting enum constants. Fixed that.
This commit is contained in:
Ivan Korostelev 2019-03-13 01:00:16 +00:00 коммит произвёл GitHub
Родитель dc0b30487e
Коммит a2a3498f6f
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
7 изменённых файлов: 67 добавлений и 38 удалений

2
.gitignore поставляемый
Просмотреть файл

@ -262,4 +262,4 @@ _site/
src/IronPythonWrapper/InferNetExamples/InferNetExamples/GeneratedSource/
#Compiler for IronPython
src/IronPythonWrapper/Compiler
src/IronPythonWrapper/Compiler

Просмотреть файл

@ -711,7 +711,9 @@ namespace Microsoft.ML.Probabilistic.Compiler
}
else if (typeof (Enum).IsAssignableFrom(t))
{
sb.Append(t.FullName + "." + Enum.GetName(t, ile.Value));
AppendType(sb, t);
sb.Append(".");
sb.Append(Enum.GetName(t, ile.Value));
}
else
{

Просмотреть файл

@ -421,7 +421,13 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
nextResultTransitionIndex == resultTransitions.Length,
"number of copied transitions must match result array size");
return new DataContainer(this.StartStateIndex, !hasEpsilonTransitions, usesGroups, resultStates, resultTransitions);
return new DataContainer(
this.StartStateIndex,
!hasEpsilonTransitions,
usesGroups,
DeterminizationState.Unknown,
resultStates,
resultTransitions);
}
#endregion

Просмотреть файл

@ -2,14 +2,14 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using Microsoft.ML.Probabilistic.Serialization;
namespace Microsoft.ML.Probabilistic.Distributions.Automata
{
using System;
using System.Diagnostics;
using System.Runtime.Serialization;
using Microsoft.ML.Probabilistic.Collections;
using Microsoft.ML.Probabilistic.Serialization;
public abstract partial class Automaton<TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis>
{
@ -50,25 +50,50 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
/// </summary>
public bool UsesGroups => (this.flags & Flags.UsesGroups) != 0;
/// <summary>
/// Gets value indicating whether this automaton is
/// </summary>
public DeterminizationState DeterminizationState =>
((this.flags & Flags.DeterminizationStateKnown) == 0)
? DeterminizationState.Unknown
: ((this.flags & Flags.IsDeterminized) != 0
? DeterminizationState.IsDeterminized
: DeterminizationState.IsNonDeterminizable);
/// <summary>
/// Initializes instance of <see cref="DataContainer"/>.
/// </summary>
[Construction("StartStateIndex", "IsEpsilonFree", "UsesGroups", "States", "Transitions")]
[Construction("StartStateIndex", "IsEpsilonFree", "UsesGroups", "DeterminizationState", "States", "Transitions")]
public DataContainer(
int startStateIndex,
bool isEpsilonFree,
bool usesGroups,
DeterminizationState determinizationState,
ReadOnlyArray<StateData> states,
ReadOnlyArray<Transition> transitions)
{
this.flags =
(isEpsilonFree ? Flags.IsEpsilonFree : 0) |
(usesGroups ? Flags.UsesGroups : 0);
(usesGroups ? Flags.UsesGroups : 0) |
(determinizationState != DeterminizationState.Unknown ? Flags.DeterminizationStateKnown : 0) |
(determinizationState == DeterminizationState.IsDeterminized ? Flags.IsDeterminized : 0);
this.StartStateIndex = startStateIndex;
this.States = states;
this.Transitions = transitions;
}
public DataContainer WithDeterminizationState(DeterminizationState determinizationState)
{
Debug.Assert(this.DeterminizationState == DeterminizationState.Unknown);
return new DataContainer(
this.StartStateIndex,
this.IsEpsilonFree,
this.UsesGroups,
determinizationState,
this.States,
this.Transitions);
}
/// <summary>
/// Returns true if indices assigned to given states and their transitions are consistent with each other.
/// </summary>
@ -142,7 +167,16 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
{
IsEpsilonFree = 0x1,
UsesGroups = 0x2,
DeterminizationStateKnown = 0x4,
IsDeterminized = 0x8,
}
}
public enum DeterminizationState
{
Unknown,
IsDeterminized,
IsNonDeterminizable,
}
}
}

Просмотреть файл

@ -29,38 +29,19 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
/// <remarks>See <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf"/> for algorithm details.</remarks>
public bool TryDeterminize()
{
// We'd like to break if the determinized automaton is much larger than the original one,
// or the original automaton is not determinizable at all.
int maxStatesBeforeStop = Math.Min(this.States.Count * 3, MaxStateCount);
return this.TryDeterminize(maxStatesBeforeStop);
}
if (this.Data.DeterminizationState != DeterminizationState.Unknown)
{
return this.Data.DeterminizationState == DeterminizationState.IsDeterminized;
}
/// <summary>
/// Attempts to determinize the automaton,
/// i.e. modify it such that for every state and every element there is at most one transition that allows for that element,
/// and there are no epsilon transitions.
/// </summary>
/// <param name="maxStatesBeforeStop">
/// The maximum number of states the resulting automaton can have. If the number of states exceeds the value
/// of this parameter during determinization, the process is aborted.
/// </param>
/// <returns>
/// <see langword="true"/> if the determinization attempt was successful and the automaton is now deterministic,
/// <see langword="false"/> otherwise.
/// </returns>
/// <remarks>See <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf"/> for algorithm details.</remarks>
public bool TryDeterminize(int maxStatesBeforeStop)
{
Argument.CheckIfInRange(
maxStatesBeforeStop > 0 && maxStatesBeforeStop <= MaxStateCount,
"maxStatesBeforeStop",
"The maximum number of states must be positive and not greater than the maximum number of states allowed in an automaton.");
int maxStatesBeforeStop = Math.Min(this.States.Count * 3, MaxStateCount);
this.MakeEpsilonFree(); // Deterministic automata cannot have epsilon-transitions
if (this.UsesGroups)
{
// Determinization will result in lost of group information, which we cannot allow
this.Data = this.Data.WithDeterminizationState(DeterminizationState.IsNonDeterminizable);
return false;
}
@ -125,10 +106,9 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
var simplification = new Simplification(builder, this.PruneStatesWithLogEndWeightLessThan);
simplification.MergeParallelTransitions(); // Determinization produces a separate transition for each segment
var result = builder.GetAutomaton();
result.PruneStatesWithLogEndWeightLessThan = this.PruneStatesWithLogEndWeightLessThan;
result.LogValueOverride = this.LogValueOverride;
this.SwapWith(result);
this.Data = builder.GetData().WithDeterminizationState(DeterminizationState.IsDeterminized);
this.PruneStatesWithLogEndWeightLessThan = this.PruneStatesWithLogEndWeightLessThan;
this.LogValueOverride = this.LogValueOverride;
return true;
}

Просмотреть файл

@ -1596,7 +1596,8 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
/// </summary>
public void SetToZero()
{
this.Data = new DataContainer(0, true, false, ZeroStates, ZeroTransitions);
this.Data = new DataContainer(
0, true, false, DeterminizationState.IsDeterminized, ZeroStates, ZeroTransitions);
}
/// <summary>

Просмотреть файл

@ -860,6 +860,7 @@ namespace Microsoft.ML.Probabilistic.Tests
0,
true,
false,
StringAutomaton.DeterminizationState.Unknown,
new[]
{
new StringAutomaton.StateData(0, 1, Weight.One),
@ -876,6 +877,7 @@ namespace Microsoft.ML.Probabilistic.Tests
0,
true,
false,
StringAutomaton.DeterminizationState.IsDeterminized,
new[] { new StringAutomaton.StateData(0, 0, Weight.Zero) },
Array.Empty<StringAutomaton.Transition>()));
Assert.True(automaton2.IsZero());
@ -887,6 +889,7 @@ namespace Microsoft.ML.Probabilistic.Tests
0,
true,
false,
StringAutomaton.DeterminizationState.IsNonDeterminizable,
Array.Empty<StringAutomaton.StateData>(),
Array.Empty<StringAutomaton.Transition>())));
@ -897,6 +900,7 @@ namespace Microsoft.ML.Probabilistic.Tests
0,
false,
false,
StringAutomaton.DeterminizationState.Unknown,
new[] { new StringAutomaton.StateData(0, 0, Weight.Zero) },
Array.Empty<StringAutomaton.Transition>())));
@ -907,6 +911,7 @@ namespace Microsoft.ML.Probabilistic.Tests
0,
false,
false,
StringAutomaton.DeterminizationState.Unknown,
new[] { new StringAutomaton.StateData(0, 1, Weight.Zero) },
new[] { new StringAutomaton.Transition(Option.None, Weight.One, 1) })));
@ -917,6 +922,7 @@ namespace Microsoft.ML.Probabilistic.Tests
0,
true,
false,
StringAutomaton.DeterminizationState.Unknown,
new[] { new StringAutomaton.StateData(0, 1, Weight.One) },
new[] { new StringAutomaton.Transition(Option.None, Weight.One, 2) })));
}