Commit 7482b0c2 authored by Alan Szlosek's avatar Alan Szlosek

update readme, remove old helper modules

parent 04defa27
Pipeline #13562152 failed with stage
in 55 seconds
......@@ -3,22 +3,22 @@ I AM HARBINGER
If you need a simple alerting system for your servers and/or apps, this node.js project might be of use to you. I wanted a simple alerting system for my linode server that didn't require exotic databases or other heavy software (Graphite, Grafana, ELK, etc). If I've overlooked a piece of lightweight software that might fit the bill, please let me know! Otherwise...
This application has some batteries included. Howevere, it acknowledges that everyone's alerting needs are different and feels the best way to be flexible is to avoid unnecessary assumptions or layers of complexity. Instead, it invites DevOps to write code to solve their problems. This project is written in JavaScript (node.js) and there is no DSL or config syntax to learn.
This application has some batteries included. However, it acknowledges that everyone's alerting needs are different and feels the best way to be flexible is to avoid unnecessary assumptions or layers of complexity. Instead, it invites DevOps to write code to solve their problems. This project is written in JavaScript (node.js) and there is no DSL or config syntax to learn.
In general, here's what you need to do:
Example scenario:
1. Create a StatsD listener (see src/example-statsd.js)
1. Create a StatsD listener (see examples/statsd.js)
1. Configure your main StatsD daemon to forward messages to harbinger
1. Write JavaScript to analyze the incoming messages using regex or other comparisons
1. Use the throttle module and specify your desired alerting/output method (console, email, Slack, PagerDuty)
1. Use the digest module to batch messages together, deliver on a schedule to your desired channel (console, email, Slack, PagerDuty)
Comes with the following batteries:
* File watcher (so you can tail log files)
* Syslog message parser
* StatsD message parser
* Ability to throttle messages in-memory and using redis (for when you need to scale this horizontally)
* In dev: alert if an accumulated value reaches M over N number of seconds
* Ability to create a digest of messages, delivered when you want
* Timeseries module to track counters, timers in Redis
* Other helpers
If you have any other ideas, please let me know: alan.szlosek@gmail.com
......@@ -33,11 +33,7 @@ Contributions greatly appreciated! See CONTRIBUTING.md for more information.
USAGE
====
See src/example-statsd.js, which shows:
* Listening for statsd metrics
* Throttling repeat messages
* Alerting via email
See the examples folder.
OVERVIEW
......@@ -48,8 +44,8 @@ Recommended setup
1. Get messages into harbinger somehow. We have examples for StatsD, Syslog and file tailing.
1. Use lib/message.js to create message object with payload.
1. Write delivery logic to pick which messages you want to alert on, and how.
1. Use lib modules to group, throttle, etc. Or write your own custom threshold calculations.
1. Use output modules to send message to desired locations.
1. Use digest module to group/throttle messages.
1. Write your own callback to send messages to desired locations.
Message object example:
......@@ -65,11 +61,11 @@ Message object example:
source: 'statsd',
// StatsD server that forwarded the metric to our harbinger app
// Can usually glean this from the TCP/UDP connection info
remoteAddress: '192.168.1.1',
remoteAddress: '192.168.1.23',
// We may want to track localAddress too, if we're listening on more than 1 interface
localAddress: localAddress,
localAddress: 192.168.1.1,
// We may have multiple listeners for the same service (statsd) on different ports
localPort: localPort,
localPort: 8000,
// Unix timestamp in milliseconds when harbinger received the message
receivedMilliseconds: Date.now()
}
......
var cache = {};
module.exports = function(message, rollupId, timeoutMilliseconds, keepAlerting, callback) {
var cacheEntry;
if (rollupId in cache) {
cacheEntry = cache[rollupId];
cacheEntry.message = message;
} else {
cacheEntry = {
message: message,
handle: null
};
cache[rollupId] = cacheEntry;
}
if (keepAlerting) {
// Clear current interval
if (cacheEntry.handle) {
clearInterval(cacheEntry.handle);
}
cacheEntry.handle = setInterval(callback, timeoutMilliseconds);
} else {
// Clear current timeout
if (cacheEntry.handle) {
clearTimeout(cacheEntry.handle);
}
cacheEntry.handle = setTimeout(callback, timeoutMilliseconds);
}
};
/*
A module to alert if there are M of something for N duration, where M is an operation (sum, count, avg) and N is a duration (last 10 minutes, etc)
Like:
- 10 of a certain error message over the past 10 minutes
*/
var Since = function(redisClient) {
this.redis = redisClient;
};
module.exports = Since;
Since.prototype.countForSeconds = function(rollupId, message, number, seconds, callback) {
var self = this;
var ts = Date.now();
// Use milliseconds timestamp for score and value
this.redis.zadd(rollupId, ts, ts, function(err) {
self.redis.expire(rollupId, seconds);
self.redis.zremrangebyscore(rollupId, 0, ts - (seconds * 1000));
if (err) {
console.log('error', err);
return;
}
self.redis.zcount(rollupId, ts - (seconds * 1000), ts, function(err, count) {
if (err) {
console.log('error', err);
return;
}
// Deliver message if the count for this key is 1,
// ie. the first time we've seen it within timeframe
if (count >= number) {
callback(message);
}
});
});
};
var stats = require('statistics');
var log = require('./log')('timeseries', false);
/*
20170824 - Dreaming up new interface and semantics. StatsD count and timer terminology can be confusing, I feel mainly because it hides the details behind new terms that devs don't take the time to learn.
Timeseries is intended to store and run calculations on timeseries data, similar to what Graphite or InFluxDb can do.
Data types:
Count/Increment
Key/value pair where the value is incremented or decremented. Think "website page hit counter"
Duration/Time
Key->[ [timestamp, value], ...] mapping. Useful if you want to track how long an operation takes. Maybe fetchUser() takes 100ms today, but only took 10ms a month ago. Durations help you see that.
*/
/*
METRIC TYPES
For all metric types, the key prefix includes the unix timestamp for which values are being added
Count
- Implemented using hashes
- Incremented using hIncrBy
- Need to implement a cleanup method, since we can't rely on redis to expire hash keys at diff rates
Time
- Implemented using lists
Gauge
- not yet implemented
Set
- not yet implemented
FEATURES
* All functions should return number of values that were present in Redis
* All get*() functions should return the number of seconds that we have data for. You don't want to alert on a "metric above average" if there's only 1 data point.
*/
var Timeseries = function(redisClient, ttl, namespace) {
this.redisClient = redisClient;
this.namespace = namespace || 'ht:'; // default to Harbinger Timeseries as namespace
this.ttl = ttl;
};
module.exports = Timeseries;
// Use a zset to sum values into time buckets
/*
Still unsure how to handle multiple types yet: counters, timers, etc
But this method could be used by getAverage() to get the sum and number of items
type: count, timer
*/
Timeseries.prototype._getKeyPrefix = function(prefix) {
return this.namespace + (prefix || '');
};
// NEW METHODS
// TODO: need to set a scheduler to clean up count buckets somehow
// Casual scan over keys for type and use hrem
Timeseries.prototype.increment = function(unixTimestamp, count, callback) {
var self = this;
var key = this._getKeyPrefix();
log('hincrby', key, unixTimestamp, count);
self.redisClient.hincrby(
key,
unixTimestamp,
count,
callback
);
};
Timeseries.prototype.count = Timeseries.prototype.increment;
// Return an array of counts for each seconds bucket from start to end
// Returns 0 for seconds buckets that have no data
Timeseries.prototype.getValues = function(startSeconds, endSeconds, callback) {
var self = this;
var args = [
this._getKeyPrefix()
];
for (var i = startSeconds; i <= endSeconds; i++) {
args.push( i );
}
// From the hash, get values for all the unixTimestamps we care about
self.redisClient.hmget(args, function(err, value) {
if (err) {
callback(err);
return;
}
value = value.map(function(item) {
var i = parseInt(item);
return isNaN(i) ? 0 : i;
});
// Replace nulls with 0?
callback(null, value);
});
};
/*
Use this to get sum, mean/average, standard deviation, etc
TODO: Use Lua script to calculate and cache these stats within Redis
*/
Timeseries.prototype.getCountStatistics = function(startSeconds, endSeconds, callback) {
this.getCounts(startSeconds, endSeconds, function(err, counts) {
if (err) {
callback(err);
return;
}
callback(null, counts.reduce(stats) );
});
};
Timeseries.prototype.push = function(unixTimestamp, value, callback) {
var self = this;
var key = this._getKeyPrefix(unixTimestamp);
log('lpush', key, value);
this.redisClient.lpush(
key,
value,
function(err) {
if (err) {
callback(err);
return;
}
self.redisClient.expire(key, self.ttl);
callback(err, value);
}
);
};
Timeseries.prototype.time = Timeseries.prototype.push;
// Return an array of times for each seconds bucket from start to end
Timeseries.prototype.getTimes = function(startSeconds, endSeconds, callback) {
var self = this;
var keys = [];
for (var i = startSeconds; i <= endSeconds; i++) {
keys.push( i );
}
var out = [];
var next = function() {
var key;
if (keys.length == 0) {
// Done
// Calculate the average now
callback(null, out);
return;
}
seconds = keys.shift();
key = self._getTimeKey(seconds);
// Fetch from redis
log('fetching: ', key);
self.redisClient.lrange([key, 0, -1], function(err, values) {
if (err) {
callback(err);
return;
}
log(values);
for (var i = 0; i < values.length; i++) {
out.push( parseInt(values[i]) );
}
next();
});
};
// Should perhaps cache the output of this so frequent pulls for this exact interval don't have to recalculate
next();
};
Timeseries.prototype.getTimeStatistics = function(startSeconds, endSeconds, callback) {
this.getTimes(startSeconds, endSeconds, function(err, times) {
if (err) {
callback(err);
return;
}
callback(null, times.reduce(stats) );
});
};
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment