Stat key name sanitization is now configurable at the top-level.

Setting keyNameSanitize to false pushes the requirement of sanitizing
key names to the backends. This permits backends that have less strict
character set requirements to take advantage of an expanded stat name
character set. The default behavior remains the same as collisions in
key name space are not handled if two different stat names map to the
same sanitized key name.
This commit is contained in:
Mike Heffner 2012-09-24 14:12:13 -04:00 коммит произвёл shaylang
Родитель 4e7a60049e
Коммит c7e6421d1e
6 изменённых файлов: 175 добавлений и 10 удалений

Просмотреть файл

@ -33,6 +33,7 @@ var prefixGauge;
var prefixSet;
var globalSuffix;
var prefixStats;
var globalKeySanitize = true;
// set up namespaces
var legacyNamespace = true;
@ -97,15 +98,27 @@ var flush_stats = function graphite_flush(ts, metrics) {
var timer_data = metrics.timer_data;
var statsd_metrics = metrics.statsd_metrics;
// Sanitize key for graphite if not done globally
function sk(key) {
if (globalKeySanitize) {
return key;
} else {
return key.replace(/\s+/g, '_')
.replace(/\//g, '-')
.replace(/[^a-zA-Z_\-0-9\.]/g, '');
}
};
for (key in counters) {
var namespace = counterNamespace.concat(key);
var value = counters[key];
var valuePerSecond = counter_rates[key]; // pre-calculated "per second" rate
var keyName = sk(key);
var namespace = counterNamespace.concat(keyName);
if (legacyNamespace === true) {
statString += namespace.join(".") + globalSuffix + valuePerSecond + ts_suffix;
if (flush_counts) {
statString += 'stats_counts.' + key + globalSuffix + value + ts_suffix;
statString += 'stats_counts.' + keyName + globalSuffix + value + ts_suffix;
}
} else {
statString += namespace.concat('rate').join(".") + globalSuffix + valuePerSecond + ts_suffix;
@ -118,8 +131,9 @@ var flush_stats = function graphite_flush(ts, metrics) {
}
for (key in timer_data) {
var namespace = timerNamespace.concat(key);
var namespace = timerNamespace.concat(sk(key));
var the_key = namespace.join(".");
for (timer_data_key in timer_data[key]) {
if (typeof(timer_data[key][timer_data_key]) === 'number') {
statString += the_key + '.' + timer_data_key + globalSuffix + timer_data[key][timer_data_key] + ts_suffix;
@ -137,13 +151,13 @@ var flush_stats = function graphite_flush(ts, metrics) {
}
for (key in gauges) {
var namespace = gaugesNamespace.concat(key);
var namespace = gaugesNamespace.concat(sk(key));
statString += namespace.join(".") + globalSuffix + gauges[key] + ts_suffix;
numStats += 1;
}
for (key in sets) {
var namespace = setsNamespace.concat(key);
var namespace = setsNamespace.concat(sk(key));
statString += namespace.join(".") + '.count' + globalSuffix + sets[key].values().length + ts_suffix;
numStats += 1;
}
@ -239,6 +253,10 @@ exports.init = function graphite_init(startup_time, config, events, logger) {
graphiteStats.flush_time = 0;
graphiteStats.flush_length = 0;
if (config.keyNameSanitize !== undefined) {
globalKeySanitize = config.keyNameSanitize;
}
flushInterval = config.flushInterval;
flush_counts = typeof(config.flush_counts) === "undefined" ? true : config.flush_counts;

113
config.js Normal file
Просмотреть файл

@ -0,0 +1,113 @@
/*
Graphite Required Variables:
(Leave these unset to avoid sending stats to Graphite.
Set debug flag and leave these unset to run in 'dry' debug mode -
useful for testing statsd clients without a Graphite server.)
graphiteHost: hostname or IP of Graphite server
graphitePort: port of Graphite server
Optional Variables:
backends: an array of backends to load. Each backend must exist
by name in the directory backends/. If not specified,
the default graphite backend will be loaded.
* example for console and graphite:
[ "./backends/console", "./backends/graphite" ]
server: the server to load. The server must exist by name in the directory
servers/. If not specified, the default udp server will be loaded.
* example for tcp server:
"./servers/tcp"
debug: debug flag [default: false]
address: address to listen on [default: 0.0.0.0]
address_ipv6: defines if the address is an IPv4 or IPv6 address [true or false, default: false]
port: port to listen for messages on [default: 8125]
mgmt_address: address to run the management TCP interface on
[default: 0.0.0.0]
mgmt_port: port to run the management TCP interface on [default: 8126]
title: Allows for overriding the process title. [default: statsd]
if set to false, will not override the process title and let the OS set it.
The length of the title has to be less than or equal to the binary name + cli arguments
NOTE: This does not work on Mac's with node versions prior to v0.10
healthStatus: default health status to be returned and statsd process starts ['up' or 'down', default: 'up']
dumpMessages: log all incoming messages
flushInterval: interval (in ms) to flush metrics to each backend
percentThreshold: for time information, calculate the Nth percentile(s)
(can be a single value or list of floating-point values)
negative values mean to use "top" Nth percentile(s) values
[%, default: 90]
flush_counts: send stats_counts metrics [default: true]
keyFlush: log the most frequently sent keys [object, default: undefined]
interval: how often to log frequent keys [ms, default: 0]
percent: percentage of frequent keys to log [%, default: 100]
log: location of log file for frequent keys [default: STDOUT]
deleteIdleStats: don't send values to graphite for inactive counters, sets, gauges, or timers
as opposed to sending 0. For gauges, this unsets the gauge (instead of sending
the previous value). Can be individually overriden. [default: false]
deleteGauges: don't send values to graphite for inactive gauges, as opposed to sending the previous value [default: false]
deleteTimers: don't send values to graphite for inactive timers, as opposed to sending 0 [default: false]
deleteSets: don't send values to graphite for inactive sets, as opposed to sending 0 [default: false]
deleteCounters: don't send values to graphite for inactive counters, as opposed to sending 0 [default: false]
prefixStats: prefix to use for the statsd statistics data for this running instance of statsd [default: statsd]
applies to both legacy and new namespacing
keyNameSanitize: sanitize all stat names on ingress [default: true]
If disabled, it is up to the backends to sanitize keynames
as appropriate per their storage requirements.
console:
prettyprint: whether to prettyprint the console backend
output [true or false, default: true]
log: log settings [object, default: undefined]
backend: where to log: stdout or syslog [string, default: stdout]
application: name of the application for syslog [string, default: statsd]
level: log level for [node-]syslog [string, default: LOG_INFO]
graphite:
legacyNamespace: use the legacy namespace [default: true]
globalPrefix: global prefix to use for sending stats to graphite [default: "stats"]
prefixCounter: graphite prefix for counter metrics [default: "counters"]
prefixTimer: graphite prefix for timer metrics [default: "timers"]
prefixGauge: graphite prefix for gauge metrics [default: "gauges"]
prefixSet: graphite prefix for set metrics [default: "sets"]
globalSuffix: global suffix to use for sending stats to graphite [default: ""]
This is particularly useful for sending per host stats by
settings this value to: require('os').hostname().split('.')[0]
repeater: an array of hashes of the for host: and port:
that details other statsd servers to which the received
packets should be "repeated" (duplicated to).
e.g. [ { host: '10.10.10.10', port: 8125 },
{ host: 'observer', port: 88125 } ]
repeaterProtocol: whether to use udp4 or udp6 for repeaters.
["udp4" or "udp6", default: "udp4"]
histogram: for timers, an array of mappings of strings (to match metrics) and
corresponding ordered non-inclusive upper limits of bins.
For all matching metrics, histograms are maintained over
time by writing the frequencies for all bins.
'inf' means infinity. A lower limit of 0 is assumed.
default: [], meaning no histograms for any timer.
First match wins. examples:
* histogram to only track render durations, with unequal
class intervals and catchall for outliers:
[ { metric: 'render', bins: [ 0.01, 0.1, 1, 10, 'inf'] } ]
* histogram for all timers except 'foo' related,
equal class interval and catchall for outliers:
[ { metric: 'foo', bins: [] },
{ metric: '', bins: [ 50, 100, 150, 200, 'inf'] } ]
automaticConfigReload: whether to watch the config file and reload it when it
changes. The default is true. Set this to false to disable.
*/
{
graphitePort: 2003
, graphiteHost: "127.0.0.1"
, port: 8125
, keyNameSanitize: false
, backends: [ "./backends/graphite", "./backends/console" ]
}

Просмотреть файл

@ -53,6 +53,9 @@ Optional Variables:
deleteCounters: don't send values to graphite for inactive counters, as opposed to sending 0 [default: false]
prefixStats: prefix to use for the statsd statistics data for this running instance of statsd [default: statsd]
applies to both legacy and new namespacing
keyNameSanitize: sanitize all stat names on ingress [default: true]
If disabled, it is up to the backends to sanitize keynames
as appropriate per their storage requirements.
console:
prettyprint: whether to prettyprint the console backend

Просмотреть файл

@ -29,6 +29,7 @@ var backendEvents = new events.EventEmitter();
var healthStatus = config.healthStatus || 'up';
var old_timestamp = 0;
var timestamp_lag_namespace;
var keyNameSanitize = true;
// Load and init the backend from the backends/ directory.
function loadBackend(config, name) {
@ -158,6 +159,16 @@ var stats = {
}
};
function sanitizeKeyName(key) {
if (keyNameSanitize) {
return key.replace(/\s+/g, '_')
.replace(/\//g, '-')
.replace(/[^a-zA-Z_\-0-9\.]/g, '');
} else {
return key;
}
}
// Global for the logger
var l;
@ -180,8 +191,11 @@ config.configFile(process.argv[2], function (config) {
counters[bad_lines_seen] = 0;
counters[packets_received] = 0;
if (!serverLoaded) {
if (config.keyNameSanitize !== undefined) {
keyNameSanitize = config.keyNameSanitize;
}
if (!serverLoaded) {
// key counting
var keyFlushInterval = Number((config.keyFlush && config.keyFlush.interval) || 0);
@ -205,10 +219,7 @@ config.configFile(process.argv[2], function (config) {
l.log(metrics[midx].toString());
}
var bits = metrics[midx].toString().split(':');
var key = bits.shift()
.replace(/\s+/g, '_')
.replace(/\//g, '-')
.replace(/[^a-zA-Z_\-0-9\.]/g, '');
var key = sanitizeKeyName(bits.shift());
if (keyFlushInterval > 0) {
if (! keyCounter[key]) {

Просмотреть файл

@ -358,5 +358,24 @@ module.exports = {
});
});
});
},
metric_names_are_sanitized: function(test) {
var me = this;
this.acceptor.once('connection', function(c) {
statsd_send('fo/o:250|c',me.sock,'127.0.0.1',8125,function(){
statsd_send('b ar:250|c',me.sock,'127.0.0.1',8125,function(){
statsd_send('foo+bar:250|c',me.sock,'127.0.0.1',8125,function(){
collect_for(me.acceptor, me.myflush * 2, function(strings){
var str = strings.join();
test.ok(str.indexOf('fo-o') !== -1, "Did not map 'fo/o' => 'fo-o'");
test.ok(str.indexOf('b_ar') !== -1, "Did not map 'b ar' => 'b_ar'");
test.ok(str.indexOf('foobar') !== -1, "Did not map 'foo+bar' => 'foobar'");
test.done();
});
});
});
});
});
}
}

1
test/stam Normal file
Просмотреть файл

@ -0,0 +1 @@
stats.counters.statsd.bad_lines_seen.rate 0 1411387496,stats.counters.statsd.bad_lines_seen.count 0 1411387496,stats.counters.statsd.packets_received.rate 15 1411387496,stats.counters.statsd.packets_received.count 3 1411387496,stats.counters.fo-o.rate 1250 1411387496,stats.counters.fo-o.count 250 1411387496,stats.counters.b_ar.rate 1250 1411387496,stats.counters.b_ar.count 250 1411387496,stats.counters.foobar.rate 1250 1411387496,stats.counters.foobar.count 250 1411387496,stats.gauges.statsd.timestamp_lag -0.2 1411387496,stats.statsd.numStats 6 1411387496,stats.statsd.graphiteStats.calculationtime 0 1411387496,stats.statsd.processing_time 0 1411387496,stats.statsd.graphiteStats.last_exception 1411387495 1411387496,stats.statsd.graphiteStats.last_flush 1411387496 1411387496,stats.statsd.graphiteStats.flush_time 1 1411387496,stats.statsd.graphiteStats.flush_length 587 1411387496,