Support gathering "top" percentile statistics

Adds support for collecting statistics on top percentiles, instead of the default bottom percentiles. You specify a top percentile by using a negative number - so -10 will collect the top 10% of data. It will emit: mean_top10, lower_top10, and sum_top10. Using a negative number may seem hacky, but it's convenient and there is a precedent - referencing an array from the end in some languages can be done with negative indexes.
2012-12-04 20:53:21 -07:00 · 2012-12-04 20:53:21 -07:00 · f369dfade2
--- a/exampleConfig.js
+++ b/exampleConfig.js
@ -29,6 +29,7 @@ Optional Variables:
  flushInterval:    interval (in ms) to flush to Graphite
  percentThreshold: for time information, calculate the Nth percentile(s)
                    (can be a single value or list of floating-point values)
+                    negative values mean to use "top" Nth percentile(s) values
                    [%, default: 90]
  keyFlush:         log the most frequently sent keys [object, default: undefined]
    interval:       how often to log frequent keys [ms, default: 0]
--- a/lib/process_metrics.js
+++ b/lib/process_metrics.js
@ -34,24 +34,32 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) {

        var sum = min;
        var mean = min;
-        var maxAtThreshold = max;
+        var thresholdBoundary = max;

        var key2;

        for (key2 in pctThreshold) {
          var pct = pctThreshold[key2];
          if (count > 1) {
-            var numInThreshold = Math.round(pct / 100 * count);
+            var numInThreshold = Math.round(Math.abs(pct) / 100 * count);
+            if (numInThreshold === 0) {
+              continue;
+            }

-            maxAtThreshold = values[numInThreshold - 1];
-            sum = cumulativeValues[numInThreshold - 1];
+            if (pct > 0) {
+              thresholdBoundary = values[numInThreshold - 1];
+              sum = cumulativeValues[numInThreshold - 1];
+            } else {
+              thresholdBoundary = values[count - numInThreshold];
+              sum = cumulativeValues[count - 1] - cumulativeValues[count - numInThreshold - 1];
+            }
            mean = sum / numInThreshold;
          }

          var clean_pct = '' + pct;
-          clean_pct = clean_pct.replace('.', '_');
+          clean_pct = clean_pct.replace('.', '_').replace('-', 'top');
          current_timer_data["mean_" + clean_pct] = mean;
-          current_timer_data["upper_" + clean_pct] = maxAtThreshold;
+          current_timer_data[(pct > 0 ? "upper_" : "lower_") + clean_pct] = thresholdBoundary;
          current_timer_data["sum_" + clean_pct] = sum;

        }
--- a/test/process_metrics_tests.js
+++ b/test/process_metrics_tests.js
@ -182,6 +182,28 @@ module.exports = {
    test.equal(undefined, timer_data['bar.bazfoobar.abc']['histogram']);
    test.equal(undefined, timer_data['xyz']['histogram']);

+    test.done();
+  },
+    timers_single_time_single_top_percentile: function(test) {
+    test.expect(3);
+    this.metrics.timers['a'] = [100];
+    this.metrics.pctThreshold = [-10];
+    pm.process_metrics(this.metrics, 100, this.time_stamp, function(){});
+    timer_data = this.metrics.timer_data['a'];
+    test.equal(100, timer_data.mean_top10);
+    test.equal(100, timer_data.lower_top10);
+    test.equal(100, timer_data.sum_top10);
+    test.done();
+  },
+    timers_multiple_times_single_top_percentile: function(test) {
+    test.expect(3);
+    this.metrics.timers['a'] = [10, 10, 10, 10, 10, 10, 10, 10, 100, 200];
+    this.metrics.pctThreshold = [-20];
+    pm.process_metrics(this.metrics, 100, this.time_stamp, function(){});
+    timer_data = this.metrics.timer_data['a'];
+    test.equal(150, timer_data.mean_top20);
+    test.equal(100, timer_data.lower_top20);
+    test.equal(300, timer_data.sum_top20);
    test.done();
  },
    statsd_metrics_exist: function(test) {