зеркало из https://github.com/dotnet/infer.git
Make MaxStateCount overrides thread local (#194)
UnlimitedStatesComputation() is used temporary to alter maximal size of automaton which is defined my MaxStateCount. Using it from different threads could mess up the limit. Now each threads gets its own limit. Also, the default MaxStateCount limit is increased to 300k, because that is what the biggest String inference customer uses.
This commit is contained in:
Родитель
470ae84c36
Коммит
d6e7d5b975
|
@ -39,6 +39,13 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
|
|||
/// </summary>
|
||||
private int numRemovedTransitions = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Cached value of <see cref="MaxStateCount"/>. Getting MaxStateCount involves checking
|
||||
/// thread static variable value and a comparison. Caching this property values is a little
|
||||
/// faster.
|
||||
/// </summary>
|
||||
private readonly int maxStateCount;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new empty <see cref="Builder"/>.
|
||||
/// </summary>
|
||||
|
@ -46,6 +53,7 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
|
|||
{
|
||||
this.states = new List<LinkedStateData>();
|
||||
this.transitions = new List<LinkedTransitionNode>();
|
||||
this.maxStateCount = MaxStateCount;
|
||||
this.AddStates(startStateCount);
|
||||
}
|
||||
|
||||
|
@ -112,9 +120,9 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
|
|||
/// </summary>
|
||||
public StateBuilder AddState()
|
||||
{
|
||||
if (this.states.Count >= maxStateCount)
|
||||
if (this.states.Count >= this.maxStateCount)
|
||||
{
|
||||
throw new AutomatonTooLargeException(MaxStateCount);
|
||||
throw new AutomatonTooLargeException(this.maxStateCount);
|
||||
}
|
||||
|
||||
var index = this.states.Count;
|
||||
|
@ -842,4 +850,4 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -75,7 +75,18 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
|
|||
/// <summary>
|
||||
/// The maximum number of states an automaton can have.
|
||||
/// </summary>
|
||||
private static int maxStateCount = 50000;
|
||||
private static int maxStateCount = 300_000;
|
||||
|
||||
/// <summary>
|
||||
/// The maximum number of states an automaton can have in current thread.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// If non-zero, this value overrides the default <see cref="GlobalMaxStateCount"/>.
|
||||
/// This value is used by <see cref="UnlimitedStatesComputation"/> to temporary increase
|
||||
/// the state count limit in a thread-safe manner.
|
||||
/// </remarks>
|
||||
[ThreadStatic]
|
||||
private static int threadMaxStateCountOverride;
|
||||
|
||||
/// <summary>
|
||||
/// Whether to use the Regex builder for the ToString method.
|
||||
|
@ -145,12 +156,15 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
|
|||
public double? PruneStatesWithLogEndWeightLessThan { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the maximum number of states an automaton can have.
|
||||
/// Gets or sets the maximum number of states an automaton can have. This setting is shared
|
||||
/// by all threads of the program.
|
||||
/// </summary>
|
||||
public static int MaxStateCount
|
||||
/// <remarks>
|
||||
/// This value can only be set because it is intended to be a program-level setting.
|
||||
/// For inspecting the value use the <see cref="MaxStateCount"/>.
|
||||
/// </remarks>
|
||||
public static int GlobalMaxStateCount
|
||||
{
|
||||
get => maxStateCount;
|
||||
|
||||
set
|
||||
{
|
||||
Argument.CheckIfInRange(value > 0, nameof(value), "The maximum number of states must be positive.");
|
||||
|
@ -158,6 +172,12 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
|
|||
}
|
||||
}
|
||||
|
||||
public static int MaxStateCount
|
||||
{
|
||||
get => threadMaxStateCountOverride != 0 ? threadMaxStateCountOverride : maxStateCount;
|
||||
internal set => threadMaxStateCountOverride = value;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the maximum number of states an automaton can have
|
||||
/// before an attempt to simplify it will be made.
|
||||
|
@ -2617,12 +2637,12 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
|
|||
private readonly int originalMaxStateCount;
|
||||
|
||||
/// <summary>
|
||||
/// Initilizes a new instance of the <see cref="UnlimitedStatesComputation"/> class.
|
||||
/// Initializes a new instance of the <see cref="UnlimitedStatesComputation"/> class.
|
||||
/// </summary>
|
||||
public UnlimitedStatesComputation()
|
||||
{
|
||||
originalMaxStateCount = StringAutomaton.MaxStateCount;
|
||||
StringAutomaton.MaxStateCount = int.MaxValue;
|
||||
originalMaxStateCount = threadMaxStateCountOverride;
|
||||
threadMaxStateCountOverride = int.MaxValue;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -2630,12 +2650,15 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
|
|||
/// </summary>
|
||||
public void CheckStateCount(TThis automaton)
|
||||
{
|
||||
if(automaton.States.Count > originalMaxStateCount) throw new AutomatonTooLargeException(originalMaxStateCount);
|
||||
if (automaton.States.Count > originalMaxStateCount)
|
||||
{
|
||||
throw new AutomatonTooLargeException(originalMaxStateCount);
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
StringAutomaton.MaxStateCount = originalMaxStateCount;
|
||||
threadMaxStateCountOverride = originalMaxStateCount;
|
||||
}
|
||||
}
|
||||
#endregion
|
||||
|
@ -2673,7 +2696,7 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
|
|||
|
||||
// This state is serialized only for its index.
|
||||
this.Start.Write(writeDouble, writeInt32, writeElementDistribution);
|
||||
|
||||
|
||||
writeInt32(this.States.Count);
|
||||
foreach (var state in this.States)
|
||||
{
|
||||
|
@ -2722,7 +2745,7 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
|
|||
}
|
||||
|
||||
var numStates = readInt32();
|
||||
|
||||
|
||||
for (var i = 0; i < numStates; i++)
|
||||
{
|
||||
State.ReadTo(ref builder, readInt32, readDouble, readElementDistribution);
|
||||
|
@ -2732,4 +2755,5 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
|
|||
}
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -44,19 +44,6 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
|
|||
/// </summary>
|
||||
protected PairListAutomaton sequencePairToWeight = new PairListAutomaton();
|
||||
|
||||
#region Properties
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the maximum number of states a transducer can have.
|
||||
/// </summary>
|
||||
public static int MaxStateCount
|
||||
{
|
||||
get { return PairListAutomaton.MaxStateCount; }
|
||||
set { PairListAutomaton.MaxStateCount = value; }
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Factory methods
|
||||
|
||||
/// <summary>
|
||||
|
@ -556,6 +543,17 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
|
|||
|
||||
#region Nested classes
|
||||
|
||||
public class UnlimitedStatesComputation : IDisposable
|
||||
{
|
||||
private readonly PairListAutomaton.UnlimitedStatesComputation unlimitedAutomatonStatesComputation;
|
||||
|
||||
public UnlimitedStatesComputation() =>
|
||||
this.unlimitedAutomatonStatesComputation = new PairListAutomaton.UnlimitedStatesComputation();
|
||||
|
||||
public void Dispose() =>
|
||||
this.unlimitedAutomatonStatesComputation.Dispose();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents an automaton that maps lists of element pairs to real values. Such automata are used to represent transducers internally.
|
||||
/// </summary>
|
||||
|
@ -581,4 +579,4 @@ namespace Microsoft.ML.Probabilistic.Distributions.Automata
|
|||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,26 +26,27 @@ namespace Microsoft.ML.Probabilistic.Tests
|
|||
[Trait("Category", "StringInference")]
|
||||
public void LargeTransducer()
|
||||
{
|
||||
StringAutomaton.MaxStateCount = 1200000; // Something big
|
||||
var bigAutomatonBuilder = new StringAutomaton.Builder();
|
||||
bigAutomatonBuilder.AddStates(StringAutomaton.MaxStateCount - bigAutomatonBuilder.StatesCount);
|
||||
Func<Option<DiscreteChar>, Weight, ValueTuple<Option<PairDistribution<char, DiscreteChar>>, Weight>> transitionConverter =
|
||||
(dist, weight) => ValueTuple.Create(Option.Some(PairDistribution<char, DiscreteChar>.FromFirstSecond(dist, dist)), weight);
|
||||
|
||||
var bigAutomaton = bigAutomatonBuilder.GetAutomaton();
|
||||
|
||||
Assert.Throws<AutomatonTooLargeException>(() => StringTransducer.FromAutomaton(bigAutomaton, transitionConverter));
|
||||
|
||||
// Shouldn't throw if the maximum number of states is increased
|
||||
int prevMaxStateCount = StringTransducer.MaxStateCount;
|
||||
try
|
||||
var largeStatesCount = 1200000; // bigger than default MaxStatesCount in automata
|
||||
using (var unlimitedAutomatonStates = new StringAutomaton.UnlimitedStatesComputation())
|
||||
{
|
||||
StringTransducer.MaxStateCount = StringAutomaton.MaxStateCount;
|
||||
StringTransducer.FromAutomaton(bigAutomaton, transitionConverter);
|
||||
}
|
||||
finally
|
||||
{
|
||||
StringTransducer.MaxStateCount = prevMaxStateCount;
|
||||
var bigAutomatonBuilder = new StringAutomaton.Builder();
|
||||
bigAutomatonBuilder.AddStates(largeStatesCount - bigAutomatonBuilder.StatesCount);
|
||||
Func<Option<DiscreteChar>, Weight, ValueTuple<Option<PairDistribution<char, DiscreteChar>>, Weight>>
|
||||
transitionConverter =
|
||||
(dist, weight) =>
|
||||
ValueTuple.Create(
|
||||
Option.Some(PairDistribution<char, DiscreteChar>.FromFirstSecond(dist, dist)), weight);
|
||||
|
||||
var bigAutomaton = bigAutomatonBuilder.GetAutomaton();
|
||||
|
||||
Assert.Throws<AutomatonTooLargeException>(() =>
|
||||
StringTransducer.FromAutomaton(bigAutomaton, transitionConverter));
|
||||
|
||||
// Shouldn't throw if the maximum number of states is increased
|
||||
using (var unlimitedTransducerStates = new StringTransducer.UnlimitedStatesComputation())
|
||||
{
|
||||
StringTransducer.FromAutomaton(bigAutomaton, transitionConverter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче