From ea39ca624204a12ef5aa2e71d186c1dfb831cf5a Mon Sep 17 00:00:00 2001 From: Vivien Barousse Date: Wed, 8 Aug 2012 23:05:06 +0100 Subject: [PATCH 1/3] Add support for sets, counting unique events Sets are backed using a set data-structure, discarding duplicate values being inserted. This allows backend to retrieve the number of unique events that happened since the last flush. Sets are all emptied after each flush. --- README.md | 7 +++++++ lib/set.js | 30 ++++++++++++++++++++++++++++++ stats.js | 14 ++++++++++++++ test/set_tests.js | 41 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 92 insertions(+) create mode 100644 lib/set.js create mode 100644 test/set_tests.js diff --git a/README.md b/README.md index 7c9be9e..78e13f5 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,13 @@ StatsD now also supports gauges, arbitrary values, which can be recorded. gaugor:333|g +Sets +---- +StatsD supports counting unique occurences of events between flushes, +using a Set to store all occuring events. + + uniques:765|s + All metrics can also be batch send in a single UDP packet, separated by a newline character. diff --git a/lib/set.js b/lib/set.js new file mode 100644 index 0000000..8458b06 --- /dev/null +++ b/lib/set.js @@ -0,0 +1,30 @@ +var Set = function() { + this.store = {}; +} + +Set.prototype = { + has: function(value) { + if (value) { + return this.store.hasOwnProperty(value); + } else { + return false; + } + }, + insert: function(value) { + if (value) { + this.store[value] = value; + } + }, + clear: function() { + this.store = {}; + }, + values: function() { + var values = []; + for (value in this.store) { + values.push(value); + } + return values; + } +} + +exports.Set = Set; diff --git a/stats.js b/stats.js index 903d820..7e1d40e 100644 --- a/stats.js +++ b/stats.js @@ -5,6 +5,7 @@ var dgram = require('dgram') , fs = require('fs') , events = require('events') , logger = require('./lib/logger') + , set = require('./lib/set') // initialize data structures with defaults for statsd stats var keyCounter = {}; @@ -16,6 +17,8 @@ var timers = { "statsd.packet_process_time": [] }; var gauges = {}; +var sets = { +}; var pctThreshold = null; var debugInt, flushInterval, keyFlushInt, server, mgmtServer; var startup_time = Math.round(new Date().getTime() / 1000); @@ -44,6 +47,7 @@ function flushMetrics() { counters: counters, gauges: gauges, timers: timers, + sets: sets, pctThreshold: pctThreshold } @@ -58,6 +62,11 @@ function flushMetrics() { for (key in metrics.timers) { metrics.timers[key] = []; } + + // Clear the sets + for (key in metrics.sets) { + metrics.sets[key] = new set.Set(); + } }); // Flush metrics to each backend. @@ -139,6 +148,11 @@ config.configFile(process.argv[2], function (config, oldConfig) { timers[key].push(Number(fields[0] || 0)); } else if (fields[1].trim() == "g") { gauges[key] = Number(fields[0] || 0); + } else if (fields[1].trim() == "s") { + if (! sets[key]) { + sets[key] = new set.Set(); + } + sets[key].insert(fields[0] || '0'); } else { if (fields[2] && fields[2].match(/^@([\d\.]+)/)) { sampleRate = Number(fields[2].match(/^@([\d\.]+)/)[1]); diff --git a/test/set_tests.js b/test/set_tests.js new file mode 100644 index 0000000..47b645b --- /dev/null +++ b/test/set_tests.js @@ -0,0 +1,41 @@ +var set = require('../lib/set') + +module.exports = { + has_returns_expected_values: function(test) { + test.expect(2); + var s = new set.Set(); + s.insert('a'); + test.ok(s.has('a')); + test.ok(!s.has('b')); + test.done(); + }, + clear_empties_the_set: function(test) { + test.expect(3); + var s = new set.Set(); + s.insert('a'); + test.equal(1, s.values().length); + s.clear(); + test.equal(0, s.values().length); + test.equal([], s.values().length); + test.done(); + }, + values_returns_values: function(test) { + test.expect(3); + var s = new set.Set(); + s.insert('a'); + s.insert('b'); + test.equal(2, s.values().length); + test.ok(s.values().indexOf('a') != -1); + test.ok(s.values().indexOf('b') != -1); + test.done(); + }, + values_are_unique: function(test) { + test.expect(1); + var s = new set.Set(); + s.insert('a'); + s.insert('a'); + s.insert('b'); + test.equal(2, s.values().length); + test.done(); + } +} From 159b4c264120313d922dfe9177f4653ea5b3ca4a Mon Sep 17 00:00:00 2001 From: Vivien Barousse Date: Wed, 8 Aug 2012 23:09:56 +0100 Subject: [PATCH 2/3] Add sets support in the console backend. The backend dumps all values stored in all the buckets being sets. This allows easy debugging by showing all values that are stored in each bucket. --- backends/console.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/backends/console.js b/backends/console.js index b48d8e3..3fc7236 100644 --- a/backends/console.js +++ b/backends/console.js @@ -34,6 +34,13 @@ ConsoleBackend.prototype.flush = function(timestamp, metrics) { counter: this.statsCache.counters, timers: this.statsCache.timers, gauges: metrics.gauges, + sets: function (vals) { + var ret = {}; + for (val in vals) { + ret[val] = vals[val].values(); + } + return ret; + }(metrics.sets), pctThreshold: metrics.pctThreshold }; From a5f161cea567c055d055fd74222e10115d62b702 Mon Sep 17 00:00:00 2001 From: Vivien Barousse Date: Thu, 9 Aug 2012 14:16:47 +0100 Subject: [PATCH 3/3] Add sets support in the graphite backend The backend doesn't support sets of data, so the count of unique elements is sent instead --- backends/graphite.js | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/backends/graphite.js b/backends/graphite.js index d5b8e2a..1b704c0 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -59,6 +59,7 @@ var flush_stats = function graphite_flush(ts, metrics) { var counters = metrics.counters; var gauges = metrics.gauges; var timers = metrics.timers; + var sets = metrics.sets; var pctThreshold = metrics.pctThreshold; for (key in counters) { @@ -135,6 +136,11 @@ var flush_stats = function graphite_flush(ts, metrics) { numStats += 1; } + for (key in sets) { + statString += 'stats.sets.' + key + '.count ' + sets[key].values().length + ' ' + ts + "\n"; + numStats += 1; + } + statString += 'statsd.numStats ' + numStats + ' ' + ts + "\n"; statString += 'stats.statsd.graphiteStats.calculationtime ' + (Date.now() - starttime) + ' ' + ts + "\n"; post_stats(statString);