зеркало из https://github.com/microsoft/statsd.git
484 строки
14 KiB
JavaScript
484 строки
14 KiB
JavaScript
/*jshint node:true, laxcomma:true */
|
|
|
|
var util = require('util')
|
|
, net = require('net')
|
|
, config = require('./lib/config')
|
|
, helpers = require('./lib/helpers')
|
|
, fs = require('fs')
|
|
, events = require('events')
|
|
, logger = require('./lib/logger')
|
|
, set = require('./lib/set')
|
|
, pm = require('./lib/process_metrics')
|
|
, process_mgmt = require('./lib/process_mgmt')
|
|
, mgmt = require('./lib/mgmt_console');
|
|
|
|
|
|
// initialize data structures with defaults for statsd stats
|
|
var keyCounter = {};
|
|
var counters = {};
|
|
var timers = {};
|
|
var timer_counters = {};
|
|
var gauges = {};
|
|
var sets = {};
|
|
var counter_rates = {};
|
|
var timer_data = {};
|
|
var pctThreshold = null;
|
|
var flushInterval, keyFlushInt, serversLoaded, mgmtServer;
|
|
var startup_time = Math.round(new Date().getTime() / 1000);
|
|
var backendEvents = new events.EventEmitter();
|
|
var healthStatus = config.healthStatus || 'up';
|
|
var old_timestamp = 0;
|
|
var timestamp_lag_namespace;
|
|
var keyNameSanitize = true;
|
|
|
|
// Load and init the backend from the backends/ directory.
|
|
function loadBackend(config, name) {
|
|
var backendmod = require(name);
|
|
|
|
if (config.debug) {
|
|
l.log("Loading backend: " + name, 'DEBUG');
|
|
}
|
|
|
|
var ret = backendmod.init(startup_time, config, backendEvents, l);
|
|
if (!ret) {
|
|
l.log("Failed to load backend: " + name);
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
// Load and init the server from the servers/ directory.
|
|
// The callback mimics the dgram 'message' event parameters (msg, rinfo)
|
|
// msg: the message received by the server. may contain more than one metric
|
|
// rinfo: contains remote address information and message length
|
|
// (attributes are .address, .port, .family, .size - you're welcome)
|
|
function startServer(config, name, callback) {
|
|
var servermod = require(name);
|
|
|
|
if (config.debug) {
|
|
l.log("Loading server: " + name, 'DEBUG');
|
|
}
|
|
|
|
var ret = servermod.start(config, callback);
|
|
if (!ret) {
|
|
l.log("Failed to load server: " + name);
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
// global for conf
|
|
var conf;
|
|
|
|
// Flush metrics to each backend.
|
|
function flushMetrics() {
|
|
var time_stamp = Math.round(new Date().getTime() / 1000);
|
|
if (old_timestamp > 0) {
|
|
gauges[timestamp_lag_namespace] = (time_stamp - old_timestamp - (Number(conf.flushInterval)/1000));
|
|
}
|
|
old_timestamp = time_stamp;
|
|
|
|
var metrics_hash = {
|
|
counters: counters,
|
|
gauges: gauges,
|
|
timers: timers,
|
|
timer_counters: timer_counters,
|
|
sets: sets,
|
|
counter_rates: counter_rates,
|
|
timer_data: timer_data,
|
|
pctThreshold: pctThreshold,
|
|
histogram: conf.histogram
|
|
};
|
|
|
|
// After all listeners, reset the stats
|
|
backendEvents.once('flush', function clear_metrics(ts, metrics) {
|
|
// TODO: a lot of this should be moved up into an init/constructor so we don't have to do it every
|
|
// single flushInterval....
|
|
// allows us to flag all of these on with a single config but still override them individually
|
|
conf.deleteIdleStats = conf.deleteIdleStats !== undefined ? conf.deleteIdleStats : false;
|
|
if (conf.deleteIdleStats) {
|
|
conf.deleteCounters = conf.deleteCounters !== undefined ? conf.deleteCounters : true;
|
|
conf.deleteTimers = conf.deleteTimers !== undefined ? conf.deleteTimers : true;
|
|
conf.deleteSets = conf.deleteSets !== undefined ? conf.deleteSets : true;
|
|
conf.deleteGauges = conf.deleteGauges !== undefined ? conf.deleteGauges : true;
|
|
}
|
|
|
|
// Clear the counters
|
|
conf.deleteCounters = conf.deleteCounters || false;
|
|
for (var counter_key in metrics.counters) {
|
|
if (conf.deleteCounters) {
|
|
if ((counter_key.indexOf("packets_received") != -1) ||
|
|
(counter_key.indexOf("metrics_received") != -1) ||
|
|
(counter_key.indexOf("bad_lines_seen") != -1)) {
|
|
metrics.counters[counter_key] = 0;
|
|
} else {
|
|
delete(metrics.counters[counter_key]);
|
|
}
|
|
} else {
|
|
metrics.counters[counter_key] = 0;
|
|
}
|
|
}
|
|
|
|
// Clear the timers
|
|
conf.deleteTimers = conf.deleteTimers || false;
|
|
for (var timer_key in metrics.timers) {
|
|
if (conf.deleteTimers) {
|
|
delete(metrics.timers[timer_key]);
|
|
delete(metrics.timer_counters[timer_key]);
|
|
} else {
|
|
metrics.timers[timer_key] = [];
|
|
metrics.timer_counters[timer_key] = 0;
|
|
}
|
|
}
|
|
|
|
// Clear the sets
|
|
conf.deleteSets = conf.deleteSets || false;
|
|
for (var set_key in metrics.sets) {
|
|
if (conf.deleteSets) {
|
|
delete(metrics.sets[set_key]);
|
|
} else {
|
|
metrics.sets[set_key] = new set.Set();
|
|
}
|
|
}
|
|
|
|
// normally gauges are not reset. so if we don't delete them, continue to persist previous value
|
|
conf.deleteGauges = conf.deleteGauges || false;
|
|
if (conf.deleteGauges) {
|
|
for (var gauge_key in metrics.gauges) {
|
|
delete(metrics.gauges[gauge_key]);
|
|
}
|
|
}
|
|
});
|
|
|
|
pm.process_metrics(metrics_hash, flushInterval, time_stamp, function emitFlush(metrics) {
|
|
backendEvents.emit('flush', time_stamp, metrics);
|
|
});
|
|
|
|
}
|
|
|
|
var stats = {
|
|
messages: {
|
|
last_msg_seen: startup_time,
|
|
bad_lines_seen: 0
|
|
}
|
|
};
|
|
|
|
function sanitizeKeyName(key) {
|
|
if (keyNameSanitize) {
|
|
return key.replace(/\s+/g, '_')
|
|
.replace(/\//g, '-')
|
|
.replace(/[^a-zA-Z_\-0-9\.]/g, '');
|
|
} else {
|
|
return key;
|
|
}
|
|
}
|
|
|
|
// Global for the logger
|
|
var l;
|
|
|
|
config.configFile(process.argv[2], function (config) {
|
|
conf = config;
|
|
|
|
process_mgmt.init(config);
|
|
|
|
l = new logger.Logger(config.log || {});
|
|
|
|
// setup config for stats prefix
|
|
var prefixStats = config.prefixStats;
|
|
prefixStats = prefixStats !== undefined ? prefixStats : "statsd";
|
|
//setup the names for the stats stored in counters{}
|
|
bad_lines_seen = prefixStats + ".bad_lines_seen";
|
|
packets_received = prefixStats + ".packets_received";
|
|
metrics_received = prefixStats + ".metrics_received";
|
|
timestamp_lag_namespace = prefixStats + ".timestamp_lag";
|
|
|
|
//now set to zero so we can increment them
|
|
counters[bad_lines_seen] = 0;
|
|
counters[packets_received] = 0;
|
|
counters[metrics_received] = 0;
|
|
|
|
if (config.keyNameSanitize !== undefined) {
|
|
keyNameSanitize = config.keyNameSanitize;
|
|
}
|
|
if (!serversLoaded) {
|
|
|
|
// key counting
|
|
var keyFlushInterval = Number((config.keyFlush && config.keyFlush.interval) || 0);
|
|
|
|
var handlePacket = function (msg, rinfo) {
|
|
backendEvents.emit('packet', msg, rinfo);
|
|
counters[packets_received]++;
|
|
var packet_data = msg.toString();
|
|
if (packet_data.indexOf("\n") > -1) {
|
|
var metrics = packet_data.split("\n");
|
|
} else {
|
|
var metrics = [ packet_data ] ;
|
|
}
|
|
|
|
for (var midx in metrics) {
|
|
if (metrics[midx].length === 0) {
|
|
continue;
|
|
}
|
|
|
|
counters[metrics_received]++;
|
|
if (config.dumpMessages) {
|
|
l.log(metrics[midx].toString());
|
|
}
|
|
var bits = metrics[midx].toString().split(':');
|
|
var key = sanitizeKeyName(bits.shift());
|
|
|
|
if (keyFlushInterval > 0) {
|
|
if (! keyCounter[key]) {
|
|
keyCounter[key] = 0;
|
|
}
|
|
keyCounter[key] += 1;
|
|
}
|
|
|
|
if (bits.length === 0) {
|
|
bits.push("1");
|
|
}
|
|
|
|
for (var i = 0; i < bits.length; i++) {
|
|
var sampleRate = 1;
|
|
var fields = bits[i].split("|");
|
|
if (!helpers.is_valid_packet(fields)) {
|
|
l.log('Bad line: ' + fields + ' in msg "' + metrics[midx] +'"');
|
|
counters[bad_lines_seen]++;
|
|
stats.messages.bad_lines_seen++;
|
|
continue;
|
|
}
|
|
if (fields[2]) {
|
|
sampleRate = Number(fields[2].match(/^@([\d\.]+)/)[1]);
|
|
}
|
|
|
|
var metric_type = fields[1].trim();
|
|
if (metric_type === "ms") {
|
|
if (! timers[key]) {
|
|
timers[key] = [];
|
|
timer_counters[key] = 0;
|
|
}
|
|
timers[key].push(Number(fields[0] || 0));
|
|
timer_counters[key] += (1 / sampleRate);
|
|
} else if (metric_type === "g") {
|
|
if (gauges[key] && fields[0].match(/^[-+]/)) {
|
|
gauges[key] += Number(fields[0] || 0);
|
|
} else {
|
|
gauges[key] = Number(fields[0] || 0);
|
|
}
|
|
} else if (metric_type === "s") {
|
|
if (! sets[key]) {
|
|
sets[key] = new set.Set();
|
|
}
|
|
sets[key].insert(fields[0] || '0');
|
|
} else {
|
|
if (! counters[key]) {
|
|
counters[key] = 0;
|
|
}
|
|
counters[key] += Number(fields[0] || 1) * (1 / sampleRate);
|
|
}
|
|
}
|
|
}
|
|
|
|
stats.messages.last_msg_seen = Math.round(new Date().getTime() / 1000);
|
|
}
|
|
|
|
// If config.servers isn't specified, use the top-level config for backwards-compatibility
|
|
var server_config = config.servers || [config]
|
|
for (var i = 0; i < server_config.length; i++) {
|
|
// The default server is UDP
|
|
var server = server_config[i].server || './servers/udp'
|
|
startServer(server_config[i], server, handlePacket)
|
|
}
|
|
serversLoaded = true
|
|
|
|
mgmtServer = net.createServer(function(stream) {
|
|
stream.setEncoding('ascii');
|
|
|
|
stream.on('error', function(err) {
|
|
l.log('Caught ' + err +', Moving on');
|
|
});
|
|
|
|
stream.on('data', function(data) {
|
|
var cmdline = data.trim().split(" ");
|
|
var cmd = cmdline.shift();
|
|
|
|
switch(cmd) {
|
|
case "help":
|
|
stream.write("Commands: stats, counters, timers, gauges, delcounters, deltimers, delgauges, health, config, quit\n\n");
|
|
break;
|
|
|
|
case "config":
|
|
stream.write("\n");
|
|
for (var prop in config) {
|
|
if (!config.hasOwnProperty(prop)) {
|
|
continue;
|
|
}
|
|
if (typeof config[prop] !== 'object') {
|
|
stream.write(prop + ": " + config[prop] + "\n");
|
|
continue;
|
|
}
|
|
subconfig = config[prop];
|
|
for (var subprop in subconfig) {
|
|
if (!subconfig.hasOwnProperty(subprop)) {
|
|
continue;
|
|
}
|
|
stream.write(prop + " > " + subprop + ": " + subconfig[subprop] + "\n");
|
|
}
|
|
}
|
|
break;
|
|
|
|
case "health":
|
|
if (cmdline.length > 0) {
|
|
var cmdaction = cmdline[0].toLowerCase();
|
|
if (cmdaction === 'up') {
|
|
healthStatus = 'up';
|
|
} else if (cmdaction === 'down') {
|
|
healthStatus = 'down';
|
|
}
|
|
}
|
|
stream.write("health: " + healthStatus + "\n");
|
|
break;
|
|
|
|
case "stats":
|
|
var now = Math.round(new Date().getTime() / 1000);
|
|
var uptime = now - startup_time;
|
|
|
|
stream.write("uptime: " + uptime + "\n");
|
|
|
|
var stat_writer = function(group, metric, val) {
|
|
var delta;
|
|
|
|
if (metric.match("^last_")) {
|
|
delta = now - val;
|
|
}
|
|
else {
|
|
delta = val;
|
|
}
|
|
|
|
stream.write(group + "." + metric + ": " + delta + "\n");
|
|
};
|
|
|
|
// Loop through the base stats
|
|
for (var group in stats) {
|
|
for (var metric in stats[group]) {
|
|
stat_writer(group, metric, stats[group][metric]);
|
|
}
|
|
}
|
|
|
|
backendEvents.once('status', function(writeCb) {
|
|
stream.write("END\n\n");
|
|
});
|
|
|
|
// Let each backend contribute its status
|
|
backendEvents.emit('status', function(err, name, stat, val) {
|
|
if (err) {
|
|
l.log("Failed to read stats for backend " +
|
|
name + ": " + err);
|
|
} else {
|
|
stat_writer(name, stat, val);
|
|
}
|
|
});
|
|
|
|
break;
|
|
|
|
case "counters":
|
|
stream.write(util.inspect(counters) + "\n");
|
|
stream.write("END\n\n");
|
|
break;
|
|
|
|
case "timers":
|
|
stream.write(util.inspect(timers) + "\n");
|
|
stream.write("END\n\n");
|
|
break;
|
|
|
|
case "gauges":
|
|
stream.write(util.inspect(gauges) + "\n");
|
|
stream.write("END\n\n");
|
|
break;
|
|
|
|
case "delcounters":
|
|
mgmt.delete_stats(counters, cmdline, stream);
|
|
break;
|
|
|
|
case "deltimers":
|
|
mgmt.delete_stats(timers, cmdline, stream);
|
|
break;
|
|
|
|
case "delgauges":
|
|
mgmt.delete_stats(gauges, cmdline, stream);
|
|
break;
|
|
|
|
case "quit":
|
|
stream.end();
|
|
break;
|
|
|
|
default:
|
|
stream.write("ERROR\n");
|
|
break;
|
|
}
|
|
|
|
});
|
|
});
|
|
|
|
mgmtServer.listen(config.mgmt_port || 8126, config.mgmt_address || undefined);
|
|
|
|
util.log("server is up");
|
|
|
|
pctThreshold = config.percentThreshold || 90;
|
|
if (!Array.isArray(pctThreshold)) {
|
|
pctThreshold = [ pctThreshold ]; // listify percentiles so single values work the same
|
|
}
|
|
|
|
flushInterval = Number(config.flushInterval || 10000);
|
|
config.flushInterval = flushInterval;
|
|
|
|
if (config.backends) {
|
|
for (var i = 0; i < config.backends.length; i++) {
|
|
loadBackend(config, config.backends[i]);
|
|
}
|
|
} else {
|
|
// The default backend is graphite
|
|
loadBackend(config, './backends/graphite');
|
|
}
|
|
|
|
// Setup the flush timer
|
|
var flushInt = setInterval(flushMetrics, flushInterval);
|
|
|
|
if (keyFlushInterval > 0) {
|
|
var keyFlushPercent = Number((config.keyFlush && config.keyFlush.percent) || 100);
|
|
var keyFlushLog = config.keyFlush && config.keyFlush.log;
|
|
|
|
keyFlushInt = setInterval(function () {
|
|
var sortedKeys = [];
|
|
|
|
for (var key in keyCounter) {
|
|
sortedKeys.push([key, keyCounter[key]]);
|
|
}
|
|
|
|
sortedKeys.sort(function(a, b) { return b[1] - a[1]; });
|
|
|
|
var logMessage = "";
|
|
var timeString = (new Date()) + "";
|
|
|
|
// only show the top "keyFlushPercent" keys
|
|
for (var i = 0, e = sortedKeys.length * (keyFlushPercent / 100); i < e; i++) {
|
|
logMessage += timeString + " count=" + sortedKeys[i][1] + " key=" + sortedKeys[i][0] + "\n";
|
|
}
|
|
|
|
if (keyFlushLog) {
|
|
var logFile = fs.createWriteStream(keyFlushLog, {flags: 'a+'});
|
|
logFile.write(logMessage);
|
|
logFile.end();
|
|
} else {
|
|
process.stdout.write(logMessage);
|
|
}
|
|
|
|
// clear the counter
|
|
keyCounter = {};
|
|
}, keyFlushInterval);
|
|
}
|
|
}
|
|
});
|
|
|
|
process.on('exit', function () {
|
|
flushMetrics();
|
|
});
|