Fixed ExpOp with point mass arguments (#303)

* Added Gamma.GetLogMeanMinusMeanLog and FromLogMeanMinusMeanLog
2020-11-23 22:02:32 +00:00 · 2020-11-23 22:02:32 +00:00 · 3baf283b6f
--- a/src/Runtime/Distributions/ConjugateDirichlet.cs
+++ b/src/Runtime/Distributions/ConjugateDirichlet.cs
@ -504,24 +504,6 @@ namespace Microsoft.ML.Probabilistic.Distributions
            return ConjugateDirichlet.FromShapeAndRate(gamma.Shape, gamma.Rate);
        }

-        /// <summary>
-        /// Constructs a Conjugate Dirichlet distribution with the given log mean and mean logarithm.
-        /// </summary>
-        /// <param name="mean">Desired expected value.</param>
-        /// <param name="meanLog">Desired expected logarithm.</param>
-        /// <param name="logMean">Log of desired expected value.</param>
-        /// <returns>A new Gamma distribution.</returns>
-        /// <remarks>
-        /// This function is significantly slower than the other constructors since it
-        /// involves nonlinear optimization. The algorithm is a generalized Newton iteration, 
-        /// described in "Estimating a Gamma distribution" by T. Minka, 2002.
-        /// </remarks>
-        public static ConjugateDirichlet FromMeanAndMeanLog(double mean, double meanLog, double logMean)
-        {
-            var gamma = Gamma.FromMeanAndMeanLog(mean, meanLog, logMean);
-            return ConjugateDirichlet.FromShapeAndRate(gamma.Shape, gamma.Rate);
-        }
-
        /// <summary>
        /// Sets the natural parameters of the distribution.
        /// </summary>
--- a/src/Runtime/Distributions/Gamma.cs
+++ b/src/Runtime/Distributions/Gamma.cs
@ -5,6 +5,7 @@
 namespace Microsoft.ML.Probabilistic.Distributions
 {
    using System;
+    using System.Diagnostics;
    using System.Runtime.Serialization;
    using Math;
    using Utilities;
@ -244,6 +245,17 @@ namespace Microsoft.ML.Probabilistic.Distributions
            return result;
        }

+        /// <summary>
+        /// Gets the logarithm of the expected value minus the expected logarithm, more accurately than directly computing <c>Math.Log(GetMean()) - GetMeanLog()</c>.
+        /// </summary>
+        /// <returns></returns>
+        public double GetLogMeanMinusMeanLog()
+        {
+            if (IsPointMass) return 0;
+            else if (!IsProper()) throw new ImproperDistributionException(this);
+            else return LogMinusDigamma(Shape);
+        }
+
        static readonly double largeShape = Math.Sqrt(Math.Sqrt(1.0 / 120 / MMath.Ulp1));

        private static double LogMinusDigamma(double shape)
@ -269,15 +281,14 @@ namespace Microsoft.ML.Probabilistic.Distributions
        /// </remarks>
        public static Gamma FromMeanAndMeanLog(double mean, double meanLog)
        {
-            return FromMeanAndMeanLog(mean, meanLog, Math.Log(mean));
+            return FromLogMeanMinusMeanLog(mean, Math.Log(mean) - meanLog);
        }

        /// <summary>
        /// Constructs a Gamma distribution with the given mean and mean logarithm.
        /// </summary>
        /// <param name="mean">Desired expected value.</param>
-        /// <param name="meanLog">Desired expected logarithm.</param>
-        /// <param name="logMean">Logarithm of desired expected value.</param>
+        /// <param name="logMeanMinusMeanLog">Logarithm of desired expected value minus desired expected logarithm.</param>
        /// <returns>A new Gamma distribution.</returns>
        /// <remarks>This function is equivalent to maximum-likelihood estimation of a Gamma distribution
        /// from data given by sufficient statistics.
@ -285,14 +296,14 @@ namespace Microsoft.ML.Probabilistic.Distributions
        /// involves nonlinear optimization. The algorithm is a generalized Newton iteration, 
        /// described in "Estimating a Gamma distribution" by T. Minka, 2002.
        /// </remarks>
-        public static Gamma FromMeanAndMeanLog(double mean, double meanLog, double logMean)
+        public static Gamma FromLogMeanMinusMeanLog(double mean, double logMeanMinusMeanLog)
        {
-            double delta = logMean - meanLog;
-            if (delta <= 0) return Gamma.PointMass(mean);
-            double shape = 0.5 / delta;
+            if (logMeanMinusMeanLog <= 0) return Gamma.PointMass(mean);
+            double shape = 0.5 / logMeanMinusMeanLog;
            for (int iter = 0; iter < 100; iter++)
            {
-                double g = LogMinusDigamma(shape) - delta;
+                double g = LogMinusDigamma(shape) - logMeanMinusMeanLog;
+                //Trace.WriteLine($"shape = {shape} g = {g}");
                if (MMath.AreEqual(g, 0)) break;
                shape /= 1 + g / (1 - shape * MMath.Trigamma(shape));
            }
--- a/src/Runtime/Distributions/GammaPower.cs
+++ b/src/Runtime/Distributions/GammaPower.cs
@ -267,15 +267,14 @@ namespace Microsoft.ML.Probabilistic.Distributions
        /// </remarks>
        public static GammaPower FromMeanAndMeanLog(double mean, double meanLog, double power)
        {
-            return FromMeanAndMeanLog(mean, meanLog, Math.Log(mean), power);
+            return FromLogMeanMinusMeanLog(mean, Math.Log(mean) - meanLog, power);
        }

        /// <summary>
        /// Constructs a GammaPower distribution with the given mean and mean logarithm.
        /// </summary>
        /// <param name="mean">Desired expected value.</param>
-        /// <param name="meanLog">Desired expected logarithm.</param>
-        /// <param name="logMean">Logarithm of desired expected value.</param>
+        /// <param name="logMeanMinusMeanLog">Logarithm of desired expected value minus desired expected logarithm.</param>
        /// <param name="power">Desired power.</param>
        /// <returns>A new GammaPower distribution.</returns>
        /// <remarks>This function is equivalent to maximum-likelihood estimation of a Gamma distribution
@ -284,11 +283,10 @@ namespace Microsoft.ML.Probabilistic.Distributions
        /// involves nonlinear optimization. The algorithm is a generalized Newton iteration, 
        /// described in "Estimating a Gamma distribution" by T. Minka, 2002.
        /// </remarks>
-        public static GammaPower FromMeanAndMeanLog(double mean, double meanLog, double logMean, double power)
+        public static GammaPower FromLogMeanMinusMeanLog(double mean, double logMeanMinusMeanLog, double power)
        {
-            if (power == 1) return FromGamma(Gamma.FromMeanAndMeanLog(mean, meanLog, logMean), power);
-            double delta = logMean - meanLog;
-            if (delta <= 0)
+            if (power == 1) return FromGamma(Gamma.FromLogMeanMinusMeanLog(mean, logMeanMinusMeanLog), power);
+            if (logMeanMinusMeanLog <= 0)
            {
                // By Jensen's inequality, log(E[x]) >= E[log(x)]
                // with equality only for a point mass.
@ -298,16 +296,13 @@ namespace Microsoft.ML.Probabilistic.Distributions
            // mean = Gamma(Shape + power)/Gamma(Shape)/Rate^power
            // meanLog = power*(digamma(Shape) - log(Rate))
            // log(mean) = log(Gamma(Shape + power)) - log(Gamma(Shape)) - power*log(Rate)
-            double logMeanOverPower = logMean / power;
-            double meanLogOverPower = meanLog / power;
-            double shape = (power == -1) ? (1 + 0.5 / MMath.ExpMinus1(delta)) : Math.Max(1 - power, 1);
-            double logRate = 0;
+            double logMeanMinusMeanLogOverPower = logMeanMinusMeanLog / power;
+            double shape = (power == -1) ? (1 + 0.5 / MMath.ExpMinus1(logMeanMinusMeanLog)) : Math.Max(1 - power, 1);
            int maxiter = 10000;
            int backtrackCount = 0;
            bool previouslyIncreased = false;
            for (int iter = 0; iter < maxiter; iter++)
            {
-                double oldLogRate = logRate;
                double oldShape = shape;
                if (power == -1)
                {
@ -315,8 +310,7 @@ namespace Microsoft.ML.Probabilistic.Distributions
                    // meanLog = log(rate) - digamma(shape)
                    // derivative wrt shape is trigamma(shape) =approx 1/(shape - 0.5)
                    double digammaShape = MMath.Digamma(shape);
-                    logRate = meanLog + digammaShape;
-                    if (delta > 1 || shape <= 1 || backtrackCount > 5)
+                    if (logMeanMinusMeanLog > 1 || shape <= 1 || backtrackCount > 5)
                    {
                        // derivative wrt logRate is exp(logRate - logMean) 
                        // = exp(meanLog - logMean + digamma(shape)) 
@ -324,7 +318,7 @@ namespace Microsoft.ML.Probabilistic.Distributions
                        // So the convergence rate is approx exp(meanLog - logMean)
                        // If meanLog is close to logMean, this will converge very slowly.
                        //shape = 1 + Math.Exp(logRate - logMean);
-                        shape = 1 + Math.Exp(digammaShape - delta);
+                        shape = 1 + Math.Exp(digammaShape - logMeanMinusMeanLog);
                    }
                    else if (backtrackCount > 1)
                    {
@ -332,7 +326,7 @@ namespace Microsoft.ML.Probabilistic.Distributions
                        // meanLog = log(rate) - digamma(shape) + log(shape) -log(shape)
                        // exp(log(mean) - meanLog - digamma(shape) + log(shape))*(shape-1) = shape
                        // (exp(...) - 1)*(shape-1) = 1
-                        shape = 1 + 1 / MMath.ExpMinus1(Math.Log(shape) - digammaShape + delta);
+                        shape = 1 + 1 / MMath.ExpMinus1(Math.Log(shape) - digammaShape + logMeanMinusMeanLog);
                    }
                    else
                    {
@ -344,7 +338,7 @@ namespace Microsoft.ML.Probabilistic.Distributions
                        // This comes from:
                        // meanLog = log(rate) - digamma(shape) + log(shape-0.5)-log(shape-0.5)
                        // = log(mean) + log(shape-1) - digamma(shape) + log(shape-0.5)-log(shape-0.5)
-                        shape = 1 + 0.5 / MMath.ExpMinus1(Math.Log(shape - 0.5) - digammaShape + delta);
+                        shape = 1 + 0.5 / MMath.ExpMinus1(Math.Log(shape - 0.5) - digammaShape + logMeanMinusMeanLog);
                    }
                }
                else
@ -353,7 +347,6 @@ namespace Microsoft.ML.Probabilistic.Distributions
                    // power*log(Rate) = log(Gamma(Shape + power)) - log(Gamma(Shape)) - log(mean)
                    // derivative wrt shape is (digamma(Shape + power) - digamma(Shape))/power
                    double risingFactorial = MMath.RisingFactorialLnOverN(shape, power);
-                    logRate = risingFactorial - logMeanOverPower;
                    if (power > 1 && backtrackCount < 2)
                    {
                        // logRate = risingFactorial - logMeanOverPower
@ -366,7 +359,7 @@ namespace Microsoft.ML.Probabilistic.Distributions
                        // risingFactorial <= log(x) + (1-n)/2/x  if n <= -1
                        double logShape = Math.Log(shape);
                        double halfOverShape = 0.5 / shape;
-                        shape = power * 0.5 / ((MMath.Digamma(shape) - (logShape - halfOverShape)) + ((power - 1) * halfOverShape + logShape - risingFactorial) + delta / power);
+                        shape = power * 0.5 / ((MMath.Digamma(shape) - (logShape - halfOverShape)) + ((power - 1) * halfOverShape + logShape - risingFactorial) + logMeanMinusMeanLogOverPower);
                    }
                    else
                    {
@ -379,17 +372,17 @@ namespace Microsoft.ML.Probabilistic.Distributions
                        {
                            // This comes from:
                            // meanLog = power*(digamma(Shape)-log(Shape)+log(Shape) - log(Rate))
-                            shape = Math.Exp(Math.Log(shape) - MMath.Digamma(shape) + meanLogOverPower + logRate);
+                            shape = Math.Exp(Math.Log(shape) - MMath.Digamma(shape) + risingFactorial - logMeanMinusMeanLogOverPower);
                        }
                        else
                        {
                            // This also works but is slower.
-                            shape = MMath.DigammaInv(meanLogOverPower + logRate);
+                            shape = MMath.DigammaInv(risingFactorial - logMeanMinusMeanLogOverPower);
                        }
                    }
                }
                //Console.WriteLine($"shape = {shape:g17}, logRate = {logRate:g17}, mean = {Math.Exp(power * (MMath.RisingFactorialLnOverN(shape, power) - logRate))} should be {mean}, meanLog = {power * (MMath.Digamma(shape) - logRate)} should be {meanLog}");
-                if (MMath.AreEqual(oldLogRate, logRate) && MMath.AreEqual(oldShape, shape))
+                if (MMath.AreEqual(oldShape, shape))
                {
                    //Console.WriteLine($"FromMeanAndMeanLog: {iter + 1} iters");
                    //Console.WriteLine($"shape = {shape:g17}, logRate = {logRate:g17}, mean = {Math.Exp(logRate) / (shape - 1)} should be {mean}, meanLog = {power * (MMath.Digamma(shape) - logRate)} should be {meanLog}");
@ -401,7 +394,8 @@ namespace Microsoft.ML.Probabilistic.Distributions
                    backtrackCount++;
                previouslyIncreased = increased;
            }
-            return FromShapeAndRate(shape, Math.Exp(logRate), power);
+            double rate = Math.Exp(MMath.RisingFactorialLnOverN(shape, power)) / Math.Pow(mean, 1 / power);
+            return FromShapeAndRate(shape, rate, power);
        }

        /// <summary>
--- a/src/Runtime/Distributions/SparseGammaList.cs
+++ b/src/Runtime/Distributions/SparseGammaList.cs
@ -72,22 +72,6 @@ namespace Microsoft.ML.Probabilistic.Distributions
            return SparseGammaList.Constant(size, Gamma.FromMeanAndMeanLog(mean, meanLog), tolerance);
        }

-        /// <summary>
-        /// Returns a new instance of the <see cref="SparseGammaList"/> class
-        /// of a given size, with each element having a given log mean and mean log.
-        /// </summary>
-        /// <param name="size">The size of the list.</param>
-        /// <param name="mean">Desired expected value.</param>
-        /// <param name="meanLog">Desired expected logarithm.</param>
-        /// <param name="logMean">Log of desired expected value.</param>
-        /// <param name="tolerance">The tolerance for the approximation.</param>
-        /// <returns>The new <see cref="SparseGammaList"/> instance.</returns>
-        public static SparseGammaList FromMeanAndMeanLog(
-            int size, double mean, double meanLog, double logMean, double tolerance)
-        {
-            return SparseGammaList.Constant(size, Gamma.FromMeanAndMeanLog(mean, meanLog, logMean), tolerance);
-        }
-
        /// <summary>
        /// Returns a new instance of the <see cref="SparseGammaList"/> class
        /// of a given size, with each element having a given shape and rate.
--- a/src/Runtime/Factors/Exp.cs
+++ b/src/Runtime/Factors/Exp.cs
@ -198,21 +198,25 @@ namespace Microsoft.ML.Probabilistic.Factors
        {
            if (exp.IsUniform()) return ExpAverageLogarithm(d, exp);
            Gamma gamma = Gamma.FromShapeAndRate(exp.Shape + (1 - exp.Power), exp.Rate);
+            if (exp.IsPointMass) gamma = Gamma.PointMass(Math.Pow(exp.Point, 1 / exp.Power));
            Gaussian dOverPower = GaussianProductOp.AAverageConditional(d, exp.Power);
-            Gaussian to_dOverPower = GaussianProductOp.ProductAverageConditional(to_d, exp.Power);
+            Gaussian to_dOverPower = GaussianProductOp.AAverageConditional(to_d, exp.Power);
            Gamma to_gamma = ExpAverageConditional(gamma, dOverPower, to_dOverPower);
-            return GammaPower.FromShapeAndRate(to_gamma.Shape, to_gamma.Rate, exp.Power);
+            return PowerOp.PowAverageConditional(to_gamma, exp.Power);
        }

        /// <include file='FactorDocs.xml' path='factor_docs/message_op_class[@name="ExpOp"]/message_doc[@name="DAverageConditional(GammaPower, Gaussian)"]/*'/>
        public static Gaussian DAverageConditional([SkipIfUniform] GammaPower exp, [Proper] Gaussian d, [NoInit] Gaussian to_d)
        {
+            if (exp.IsPointMass) return DAverageConditional(exp.Point);
            // as a function of d, the factor is Ga(exp(d); shape, rate, power) = exp(d*(shape/power -1) -rate*exp(d/power))
            // = Ga(exp(d/power); shape - power + 1, rate)
-            Gaussian forward = GaussianProductOp.AAverageConditional(d, exp.Power);
-            Gaussian to_dOverPower = GaussianProductOp.ProductAverageConditional(to_d, exp.Power);
            Gamma gamma = Gamma.FromShapeAndRate(exp.Shape + (1 - exp.Power), exp.Rate);
+            // "forward" goes to d/power
+            Gaussian forward = GaussianProductOp.AAverageConditional(d, exp.Power);
+            Gaussian to_dOverPower = GaussianProductOp.AAverageConditional(to_d, exp.Power);
            Gaussian message = DAverageConditional(gamma, forward, to_dOverPower);
+            // "message" goes to d/power
            Gaussian backward = GaussianProductOp.ProductAverageConditional(message, exp.Power);
            return backward;
        }
@ -331,10 +335,12 @@ namespace Microsoft.ML.Probabilistic.Factors
            }
            if (Z == 0)
                throw new InferRuntimeException("Z==0");
-            double meanLog = sumY / Z + mD;
-            double logMean = logsumExpY - Math.Log(Z) + mD;
-            double mean = Math.Exp(logMean);
-            Gamma result = Gamma.FromMeanAndMeanLog(mean, meanLog, logMean);
+            double meanLogMinusMd = sumY / Z;
+            double logMeanMinusMd = logsumExpY - Math.Log(Z);
+            double logMeanMinusMeanLog = logMeanMinusMd - meanLogMinusMd;
+            double mean = Math.Exp(logMeanMinusMd + mD);
+            Gamma result = Gamma.FromLogMeanMinusMeanLog(mean, logMeanMinusMeanLog);
+            //Trace.WriteLine($"result = {result} mean = {mean} logMeanMinusMeanLog = {logMeanMinusMeanLog}");
            result.SetToRatio(result, exp, ForceProper);
            if (Double.IsNaN(result.Shape) || Double.IsNaN(result.Rate))
                throw new InferRuntimeException($"result is NaN.  exp={exp}, d={d}, to_d={to_d}");
@ -472,11 +478,17 @@ namespace Microsoft.ML.Probabilistic.Factors
            if (d.IsPointMass) return Gamma.PointMass(Math.Exp(d.Point));
            double mD, vD;
            d.GetMeanAndVariance(out mD, out vD);
+            return ExpDistribution(mD, vD);
+        }
+
+        private static Gamma ExpDistribution(double mD, double vD)
+        { 
            // E[exp(d)] = exp(mD + vD/2)
-            double logMean = mD + vD / 2;
+            double logMeanMinusMeanLog = vD / 2;
+            double logMean = mD + logMeanMinusMeanLog;
            double mean = Math.Exp(logMean);
            //return Gamma.FromMeanAndVariance(Math.Exp(lm), Math.Exp(2*lm)*(Math.Exp(vD)-1));
-            return Gamma.FromMeanAndMeanLog(mean, mD, logMean);
+            return Gamma.FromLogMeanMinusMeanLog(mean, logMeanMinusMeanLog);
        }

        /// <include file='FactorDocs.xml' path='factor_docs/message_op_class[@name="ExpOp"]/message_doc[@name="ExpAverageLogarithm(NonconjugateGaussian)"]/*'/>
@ -485,11 +497,7 @@ namespace Microsoft.ML.Probabilistic.Factors
        {
            double mD, vD;
            d.GetMeanAndVariance(out mD, out vD);
-            // E[exp(d)] = exp(mD + vD/2)
-            double logMean = mD + vD / 2;
-            double mean = Math.Exp(logMean);
-            //return Gamma.FromMeanAndVariance(Math.Exp(lm), Math.Exp(2*lm)*(Math.Exp(vD)-1));
-            return Gamma.FromMeanAndMeanLog(mean, mD, logMean);
+            return ExpDistribution(mD, vD);
        }

        /// <include file='FactorDocs.xml' path='factor_docs/message_op_class[@name="ExpOp"]/message_doc[@name="ExpAverageLogarithm(Gaussian, GammaPower)"]/*'/>
@ -499,10 +507,11 @@ namespace Microsoft.ML.Probabilistic.Factors
            double mD, vD;
            d.GetMeanAndVariance(out mD, out vD);
            // E[exp(d)] = exp(mD + vD/2)
-            double logMean = mD + vD / 2;
+            double logMeanMinusMeanLog = vD / 2;
+            double logMean = mD + logMeanMinusMeanLog;
            double mean = Math.Exp(logMean);
            //return Gamma.FromMeanAndVariance(Math.Exp(lm), Math.Exp(2*lm)*(Math.Exp(vD)-1));
-            return GammaPower.FromMeanAndMeanLog(mean, mD, logMean, result.Power);
+            return GammaPower.FromLogMeanMinusMeanLog(mean, logMeanMinusMeanLog, result.Power);
        }

        // Finds the maximum of -C exp(v/2) + .5 log(v)
@ -948,10 +957,13 @@ namespace Microsoft.ML.Probabilistic.Factors
            //double bpost = b + d.Precision/expx;
            //double mpost = expx - d.Precision*(MMath.Digamma(apost) - Math.Log(apost))/bpost;
            double v = 1 / (d.Precision + b * expx);
-            double meanLog = x - 0.5 * v * v * b * expx;
-            double logMean = x + Math.Log(1 + 0.5 * v * v * d.Precision);
+            double meanLogMinusX = -0.5 * v * v * b * expx;
+            double meanLog = x + meanLogMinusX;
+            double logMeanMinusX = Math.Log(1 + 0.5 * v * v * d.Precision);
+            double logMeanMinusMeanLog = logMeanMinusX - meanLogMinusX;
+            double logMean = x + logMeanMinusX;
            double mean = Math.Exp(logMean);
-            Gamma result = Gamma.FromMeanAndMeanLog(mean, meanLog, logMean);
+            Gamma result = Gamma.FromLogMeanMinusMeanLog(mean, logMeanMinusMeanLog);
            result.SetToRatio(result, exp, true);
            return result;
        }
--- a/src/Runtime/Factors/ProductGamma.cs
+++ b/src/Runtime/Factors/ProductGamma.cs
@ -67,7 +67,7 @@ namespace Microsoft.ML.Probabilistic.Factors
                return result;
            }
            // (ab)^(shape/power-1) exp(-rate*(ab)^(1/power))
-            return GammaPower.FromShapeAndRate(Product.Shape, Product.Rate * Math.Pow(B, 1/result.Power), result.Power);
+            return GammaPower.FromShapeAndRate(Product.Shape, Product.Rate * Math.Pow(B, 1 / result.Power), result.Power);
        }

        /// <include file='FactorDocs.xml' path='factor_docs/message_op_class[@name="GammaProductOp"]/message_doc[@name="AAverageConditional(double, double, GammaPower)"]/*'/>
@ -671,7 +671,7 @@ namespace Microsoft.ML.Probabilistic.Factors
        {
            // E[x] = E[a]*E[b]
            // E[log(x)] = E[log(a)]+E[log(b)]
-            return Gamma.FromMeanAndMeanLog(A.GetMean() * B.GetMean(), A.GetMeanLog() + B.GetMeanLog());
+            return Gamma.FromLogMeanMinusMeanLog(A.GetMean() * B.GetMean(), A.GetLogMeanMinusMeanLog() + B.GetLogMeanMinusMeanLog());
        }

        /// <include file='FactorDocs.xml' path='factor_docs/message_op_class[@name="GammaProductVmpOp"]/message_doc[@name="ProductAverageLogarithm(double, Gamma)"]/*'/>
@ -697,7 +697,7 @@ namespace Microsoft.ML.Probabilistic.Factors
                return GammaPower.PointMass(A * B.Point, B.Power);
            if (A == 0)
                return GammaPower.PointMass(0, B.Power);
-            return GammaPower.FromShapeAndRate(B.Shape, B.Rate * Math.Pow(A, -1/B.Power), B.Power);
+            return GammaPower.FromShapeAndRate(B.Shape, B.Rate * Math.Pow(A, -1 / B.Power), B.Power);
        }

        /// <include file='FactorDocs.xml' path='factor_docs/message_op_class[@name="GammaProductVmpOp"]/message_doc[@name="ProductAverageLogarithm(Gamma, double)"]/*'/>
@ -814,7 +814,16 @@ namespace Microsoft.ML.Probabilistic.Factors
                return A;
            double mean = A.GetMean() * B.GetMeanInverse();
            double meanLog = A.GetMeanLog() - B.GetMeanLog();
-            return Gamma.FromMeanAndMeanLog(mean, meanLog);
+            return Gamma.FromLogMeanMinusMeanLog(mean, A.GetLogMeanMinusMeanLog() + GetLogMeanInversePlusMeanLog(B));
+        }
+
+        private static double GetLogMeanInversePlusMeanLog(Gamma B)
+        {
+            // log(rate) - log(shape - 1) + digamma(shape) - log(rate) 
+            // = digamma(shape) - log(shape - 1)
+            // = 1/(shape - 1) + digamma(shape - 1) - log(shape - 1)
+            double shapeMinus1 = B.Shape - 1;
+            return 1 / shapeMinus1 - Gamma.FromShapeAndRate(shapeMinus1, B.Rate).GetLogMeanMinusMeanLog();
        }

        /// <include file='FactorDocs.xml' path='factor_docs/message_op_class[@name="GammaRatioVmpOp"]/message_doc[@name="RatioAverageLogarithm(Gamma, double)"]/*'/>
--- a/test/Tests/Distributions/DistributionTests.cs
+++ b/test/Tests/Distributions/DistributionTests.cs
@ -78,8 +78,8 @@ namespace Microsoft.ML.Probabilistic.Tests
                Assert.Equal(mean, gammaPower.GetMean(), 1e-10);
                Assert.Equal(meanLog, gammaPower.GetMeanLog(), 1e-10);
            }
-            GammaPower.FromMeanAndMeanLog(0.82657135035599683, -0.19053040661796108, -0.19046903712771776, -1.0);
-            GammaPower.FromMeanAndMeanLog(0.78123008687766349, -0.24698718364292091, -0.2468855670529615, -1.0);
+            GammaPower.FromMeanAndMeanLog(0.82657135035599683, -0.19053040661796108, -1.0);
+            GammaPower.FromMeanAndMeanLog(0.78123008687766349, -0.24698718364292091, -1.0);
        }

        [Fact]
--- a/test/Tests/Operators/OperatorTests.cs
+++ b/test/Tests/Operators/OperatorTests.cs
@ -657,6 +657,7 @@ namespace Microsoft.ML.Probabilistic.Tests
        [Fact]
        public void ExpOpGammaPowerTest()
        {
+            Assert.True(ExpOp.DAverageConditional(GammaPower.PointMass(0, -1), new Gaussian(0, 1), Gaussian.Uniform()).Point < double.MinValue);
            ExpOp.ExpAverageConditional(GammaPower.FromShapeAndRate(-1, 283.673, -1), Gaussian.FromNatural(0.004859823703146038, 6.6322755562737905E-06), Gaussian.FromNatural(0.00075506803981220758, 8.24487022054953E-07));
            GammaPower exp = GammaPower.FromShapeAndRate(0, 0, -1);
            Gaussian[] ds = new[]
@ -669,12 +670,12 @@ namespace Microsoft.ML.Probabilistic.Tests
            {
                Gaussian to_d = ExpOp.DAverageConditional(exp, d, Gaussian.Uniform());
                Gaussian to_d_slow = ExpOp_Slow.DAverageConditional(exp, d);
-                Trace.WriteLine($"{to_d}");
-                Trace.WriteLine($"{to_d_slow}");
+                //Trace.WriteLine($"{to_d}");
+                //Trace.WriteLine($"{to_d_slow}");
                Assert.True(to_d_slow.MaxDiff(to_d) < 1e-10);
                to_d = Gaussian.FromNatural(1, 0);
                GammaPower to_exp = ExpOp.ExpAverageConditional(exp, d, to_d);
-                Trace.WriteLine($"{to_exp}");
+                //Trace.WriteLine($"{to_exp}");
            }
            ExpOp.ExpAverageConditional(GammaPower.FromShapeAndRate(-1, 883.22399999999993, -1), Gaussian.FromNatural(0.0072160312702854888, 8.1788482512051846E-06), Gaussian.FromNatural(0.00057861649495666474, 5.6316164560235272E-07));
        }
@ -701,6 +702,7 @@ namespace Microsoft.ML.Probabilistic.Tests
            }
        }

+        // This test fails because of cancellation in logMeanMinusMd and Gamma.SetToRatio in ExpOp.ExpAverageConditional
        [Fact]
        [Trait("Category", "ModifiesGlobals")]
        [Trait("Category", "OpenBug")]
@ -714,14 +716,15 @@ namespace Microsoft.ML.Probabilistic.Tests
                double ve = 2e-3;
                //ve = 1;
                Gaussian uniform = Gaussian.Uniform();
-                Gamma to_exp_point = ExpOp.ExpAverageConditional(Gamma.PointMass(1), d, uniform);
-                Gaussian to_d_point = ExpOp.DAverageConditional(Gamma.PointMass(1), d, uniform);
+                Gamma expPoint = Gamma.PointMass(2);
+                Gamma to_exp_point = ExpOp.ExpAverageConditional(expPoint, d, uniform);
+                Gaussian to_d_point = ExpOp.DAverageConditional(expPoint, d, uniform);
                double to_exp_oldError = double.PositiveInfinity;
                double to_d_oldError = double.PositiveInfinity;
-                for (int i = 0; i < 100; i++)
+                for (int i = 5; i < 100; i++)
                {
                    ve = System.Math.Pow(10, -i);
-                    Gamma exp = Gamma.FromMeanAndVariance(1, ve);
+                    Gamma exp = Gamma.FromMeanAndVariance(2, ve);
                    Gamma to_exp = ExpOp.ExpAverageConditional(exp, d, uniform);
                    Gaussian to_d = ExpOp.DAverageConditional(exp, d, uniform);
                    double to_exp_error = to_exp.MaxDiff(to_exp_point);
@ -740,6 +743,36 @@ namespace Microsoft.ML.Probabilistic.Tests
            }
        }

+        [Fact]
+        [Trait("Category", "OpenBug")]
+        public void ExpOpGammaPower_PointExp()
+        {
+            double power = -1;
+            double vd = 1e-4;
+            vd = 1e-3;
+            Gaussian d = new Gaussian(0, vd);
+            Gaussian uniform = Gaussian.Uniform();
+            GammaPower expPoint = GammaPower.PointMass(2, power);
+            GammaPower to_exp_point = ExpOp.ExpAverageConditional(expPoint, d, uniform);
+            Gaussian to_d_point = ExpOp.DAverageConditional(expPoint, d, uniform);
+            double to_exp_oldError = double.PositiveInfinity;
+            double to_d_oldError = double.PositiveInfinity;
+            for (int i = 0; i < 100; i++)
+            {
+                double ve = System.Math.Pow(10, -i);
+                GammaPower exp = GammaPower.FromMeanAndVariance(2, ve, power);
+                GammaPower to_exp = ExpOp.ExpAverageConditional(exp, d, uniform);
+                Gaussian to_d = ExpOp.DAverageConditional(exp, d, uniform);
+                double to_exp_error = to_exp.MaxDiff(to_exp_point);
+                double to_d_error = System.Math.Abs(to_d.GetMean() - to_d_point.GetMean());
+                Trace.WriteLine($"ve={ve}: to_exp={to_exp} error={to_exp_error} to_d={to_d} error={to_d_error}");
+                Assert.True(to_exp_error <= to_exp_oldError);
+                to_exp_oldError = to_exp_error;
+                Assert.True(to_d_error <= to_d_oldError);
+                to_d_oldError = to_d_error;
+            }
+        }
+
        [Fact]
        public void LogisticOpTest()
        {