Make MaxStateCount overrides thread local (#194)

UnlimitedStatesComputation() is used temporary to alter maximal size of automaton
which is defined my MaxStateCount. Using it from different threads could mess up the limit.
Now each threads gets its own limit.

Also, the default MaxStateCount limit is increased to 300k, because that is what the biggest String inference customer uses.
This commit is contained in:
Ivan Korostelev 2019-11-07 16:44:54 +00:00 коммит произвёл GitHub
Родитель 470ae84c36
Коммит d6e7d5b975
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 80 добавлений и 49 удалений

Просмотреть файл

@ -39,6 +39,13 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
/// </summary>
private int numRemovedTransitions = 0;
/// <summary>
/// Cached value of <see cref="MaxStateCount"/>. Getting MaxStateCount involves checking
/// thread static variable value and a comparison. Caching this property values is a little
/// faster.
/// </summary>
private readonly int maxStateCount;
/// <summary>
/// Creates a new empty <see cref="Builder"/>.
/// </summary>
@ -46,6 +53,7 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
{
this.states = new List<LinkedStateData>();
this.transitions = new List<LinkedTransitionNode>();
this.maxStateCount = MaxStateCount;
this.AddStates(startStateCount);
}
@ -112,9 +120,9 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
/// </summary>
public StateBuilder AddState()
{
if (this.states.Count >= maxStateCount)
if (this.states.Count >= this.maxStateCount)
{
throw new AutomatonTooLargeException(MaxStateCount);
throw new AutomatonTooLargeException(this.maxStateCount);
}
var index = this.states.Count;
@ -842,4 +850,4 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
}
}
}
}
}

Просмотреть файл

@ -75,7 +75,18 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
/// <summary>
/// The maximum number of states an automaton can have.
/// </summary>
private static int maxStateCount = 50000;
private static int maxStateCount = 300_000;
/// <summary>
/// The maximum number of states an automaton can have in current thread.
/// </summary>
/// <remarks>
/// If non-zero, this value overrides the default <see cref="GlobalMaxStateCount"/>.
/// This value is used by <see cref="UnlimitedStatesComputation"/> to temporary increase
/// the state count limit in a thread-safe manner.
/// </remarks>
[ThreadStatic]
private static int threadMaxStateCountOverride;
/// <summary>
/// Whether to use the Regex builder for the ToString method.
@ -145,12 +156,15 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
public double? PruneStatesWithLogEndWeightLessThan { get; set; }
/// <summary>
/// Gets or sets the maximum number of states an automaton can have.
/// Gets or sets the maximum number of states an automaton can have. This setting is shared
/// by all threads of the program.
/// </summary>
public static int MaxStateCount
/// <remarks>
/// This value can only be set because it is intended to be a program-level setting.
/// For inspecting the value use the <see cref="MaxStateCount"/>.
/// </remarks>
public static int GlobalMaxStateCount
{
get => maxStateCount;
set
{
Argument.CheckIfInRange(value > 0, nameof(value), "The maximum number of states must be positive.");
@ -158,6 +172,12 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
}
}
public static int MaxStateCount
{
get => threadMaxStateCountOverride != 0 ? threadMaxStateCountOverride : maxStateCount;
internal set => threadMaxStateCountOverride = value;
}
/// <summary>
/// Gets or sets the maximum number of states an automaton can have
/// before an attempt to simplify it will be made.
@ -2617,12 +2637,12 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
private readonly int originalMaxStateCount;
/// <summary>
/// Initilizes a new instance of the <see cref="UnlimitedStatesComputation"/> class.
/// Initializes a new instance of the <see cref="UnlimitedStatesComputation"/> class.
/// </summary>
public UnlimitedStatesComputation()
{
originalMaxStateCount = StringAutomaton.MaxStateCount;
StringAutomaton.MaxStateCount = int.MaxValue;
originalMaxStateCount = threadMaxStateCountOverride;
threadMaxStateCountOverride = int.MaxValue;
}
/// <summary>
@ -2630,12 +2650,15 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
/// </summary>
public void CheckStateCount(TThis automaton)
{
if(automaton.States.Count > originalMaxStateCount) throw new AutomatonTooLargeException(originalMaxStateCount);
if (automaton.States.Count > originalMaxStateCount)
{
throw new AutomatonTooLargeException(originalMaxStateCount);
}
}
public void Dispose()
{
StringAutomaton.MaxStateCount = originalMaxStateCount;
threadMaxStateCountOverride = originalMaxStateCount;
}
}
#endregion
@ -2673,7 +2696,7 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
// This state is serialized only for its index.
this.Start.Write(writeDouble, writeInt32, writeElementDistribution);
writeInt32(this.States.Count);
foreach (var state in this.States)
{
@ -2722,7 +2745,7 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
}
var numStates = readInt32();
for (var i = 0; i < numStates; i++)
{
State.ReadTo(ref builder, readInt32, readDouble, readElementDistribution);
@ -2732,4 +2755,5 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
}
#endregion
}
}
}

Просмотреть файл

@ -44,19 +44,6 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
/// </summary>
protected PairListAutomaton sequencePairToWeight = new PairListAutomaton();
#region Properties
/// <summary>
/// Gets or sets the maximum number of states a transducer can have.
/// </summary>
public static int MaxStateCount
{
get { return PairListAutomaton.MaxStateCount; }
set { PairListAutomaton.MaxStateCount = value; }
}
#endregion
#region Factory methods
/// <summary>
@ -556,6 +543,17 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
#region Nested classes
public class UnlimitedStatesComputation : IDisposable
{
private readonly PairListAutomaton.UnlimitedStatesComputation unlimitedAutomatonStatesComputation;
public UnlimitedStatesComputation() =>
this.unlimitedAutomatonStatesComputation = new PairListAutomaton.UnlimitedStatesComputation();
public void Dispose() =>
this.unlimitedAutomatonStatesComputation.Dispose();
}
/// <summary>
/// Represents an automaton that maps lists of element pairs to real values. Such automata are used to represent transducers internally.
/// </summary>
@ -581,4 +579,4 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
#endregion
}
}
}

Просмотреть файл

@ -26,26 +26,27 @@ namespace Microsoft.ML.Probabilistic.Tests
[Trait("Category", "StringInference")]
public void LargeTransducer()
{
StringAutomaton.MaxStateCount = 1200000; // Something big
var bigAutomatonBuilder = new StringAutomaton.Builder();
bigAutomatonBuilder.AddStates(StringAutomaton.MaxStateCount - bigAutomatonBuilder.StatesCount);
Func<Option<DiscreteChar>, Weight, ValueTuple<Option<PairDistribution<char, DiscreteChar>>, Weight>> transitionConverter =
(dist, weight) => ValueTuple.Create(Option.Some(PairDistribution<char, DiscreteChar>.FromFirstSecond(dist, dist)), weight);
var bigAutomaton = bigAutomatonBuilder.GetAutomaton();
Assert.Throws<AutomatonTooLargeException>(() => StringTransducer.FromAutomaton(bigAutomaton, transitionConverter));
// Shouldn't throw if the maximum number of states is increased
int prevMaxStateCount = StringTransducer.MaxStateCount;
try
var largeStatesCount = 1200000; // bigger than default MaxStatesCount in automata
using (var unlimitedAutomatonStates = new StringAutomaton.UnlimitedStatesComputation())
{
StringTransducer.MaxStateCount = StringAutomaton.MaxStateCount;
StringTransducer.FromAutomaton(bigAutomaton, transitionConverter);
}
finally
{
StringTransducer.MaxStateCount = prevMaxStateCount;
var bigAutomatonBuilder = new StringAutomaton.Builder();
bigAutomatonBuilder.AddStates(largeStatesCount - bigAutomatonBuilder.StatesCount);
Func<Option<DiscreteChar>, Weight, ValueTuple<Option<PairDistribution<char, DiscreteChar>>, Weight>>
transitionConverter =
(dist, weight) =>
ValueTuple.Create(
Option.Some(PairDistribution<char, DiscreteChar>.FromFirstSecond(dist, dist)), weight);
var bigAutomaton = bigAutomatonBuilder.GetAutomaton();
Assert.Throws<AutomatonTooLargeException>(() =>
StringTransducer.FromAutomaton(bigAutomaton, transitionConverter));
// Shouldn't throw if the maximum number of states is increased
using (var unlimitedTransducerStates = new StringTransducer.UnlimitedStatesComputation())
{
StringTransducer.FromAutomaton(bigAutomaton, transitionConverter);
}
}
}