-
Notifications
You must be signed in to change notification settings - Fork 5
added batch logging #5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 9 commits
334f7e7
f8e2e8b
9009666
49daf8b
d8daa5d
610a023
2e2ba0e
01c62e2
058e1cd
e4cc258
7c00406
3743ce5
be446fa
f9df13b
b195ad6
6186fc6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,5 @@ config/*.json | |
| config/facs/*.json | ||
| logs | ||
| sec | ||
| package-lock.json | ||
| *.swp | ||
| *.swo | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -3,6 +3,8 @@ | |||||
| const util = require('util') | ||||||
| const Base = require('bfx-facs-base') | ||||||
|
|
||||||
| const { formatTime } = require('./utils/date-time') | ||||||
|
|
||||||
| class GrcSlack extends Base { | ||||||
| constructor (caller, opts, ctx) { | ||||||
| super(caller, opts, ctx) | ||||||
|
|
@@ -12,6 +14,34 @@ class GrcSlack extends Base { | |||||
| this.init() | ||||||
|
|
||||||
| if (opts.conf) this.conf = opts.conf | ||||||
|
|
||||||
| if (this.conf.errorBatching && opts.lru) { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are we using lru and not map? with lru we would lose data right?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think in order to keep the buffer fixed in length. If a worker starts to spam, we don't want to keep the spam rather than track let's say last 100 messages of a certain type. that was the idea behind this update in general. If a worker starts spamming, this logic would still allow us to know about the problem, however, would cut down api calls and payload significantly. anyways, if i missed the purppose, @AceTheNinja please clarify
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. couldn't have explained it better 🫡 @batrudinych |
||||||
| this._errorBatch = opts.lru | ||||||
| this._initErrorBatching() | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| _initErrorBatching () { | ||||||
| this._errorBatchTimer = setInterval(() => { | ||||||
| this._processBatchedErrors() | ||||||
| }, this.conf.errorBatching?.interval || 60000) | ||||||
|
|
||||||
| this._errorBatchTimer.unref() | ||||||
| } | ||||||
|
|
||||||
| async _stop (cb) { | ||||||
| if (this._errorBatchTimer) { | ||||||
| clearInterval(this._errorBatchTimer) | ||||||
| this._errorBatchTimer = null | ||||||
| } | ||||||
|
|
||||||
| try { | ||||||
| await this._processBatchedErrors() | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why this is called on stop? wouldn't it hang for a long time possibly?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think it's called to flush the last batch of errors, otherwise it'll be lost |
||||||
| } catch (err) { | ||||||
| console.error('Failed to process final batch of errors during shutdown', err) | ||||||
| } finally { | ||||||
| super._stop(cb) | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| message (reqChannel, message) { | ||||||
|
|
@@ -21,7 +51,7 @@ class GrcSlack extends Base { | |||||
| const maxLength = slack.max_length || 1024 | ||||||
| const env = (slack.env) ? `Env: ${slack.env}, ` : '' | ||||||
| const rawText = env + message | ||||||
| const text = (maxLength) ? rawText.substr(0, maxLength) : rawText | ||||||
| const text = (maxLength) ? rawText.slice(0, maxLength) : rawText | ||||||
AceTheNinja marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| const channel = reqChannel || slack.channel | ||||||
| const send = [{ channel, text }] | ||||||
|
|
||||||
|
|
@@ -41,6 +71,143 @@ class GrcSlack extends Base { | |||||
|
|
||||||
| return this.message(reqChannel, `${extraP}${errTag}${error}`) | ||||||
| } | ||||||
|
|
||||||
| /** | ||||||
| * Batch log error to slack | ||||||
| * @param {string} reqChannel - Slack channel to log the error to, if not provided, the channel from the config will be used | ||||||
| * @param {Error} err - Error to log | ||||||
| * @param {string} sourceName - Source of the error | ||||||
| * @param {Object} payload - Payload to log | ||||||
| * @param {...any} extra - Additional information to log | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is extra a required parameter?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no we're just putting all other params in 1 object. It can be undefined also
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| */ | ||||||
| async logErrorEnqueue (reqChannel, err, sourceName, payload, ...extra) { | ||||||
| if (!this._errorBatch) { | ||||||
| console.error('Error batching not initialized, falling back to direct log') | ||||||
| return this.logError(reqChannel, err, sourceName, payload, ...extra) | ||||||
| } | ||||||
|
|
||||||
| if (!reqChannel) { | ||||||
| reqChannel = this.conf.channel | ||||||
| } | ||||||
|
|
||||||
| try { | ||||||
| const errorKey = this._createErrorKey(reqChannel, err, sourceName) | ||||||
|
|
||||||
| const now = new Date() | ||||||
| let errorEntry = this._errorBatch.get(errorKey) | ||||||
|
|
||||||
| if (!errorEntry) { | ||||||
| errorEntry = { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added it |
||||||
| errorMessage: err.message, | ||||||
| sourceName, | ||||||
| reqChannel, | ||||||
| payloads: [ | ||||||
| { payload, extras: extra } | ||||||
| ], | ||||||
| count: 0, | ||||||
|
||||||
| firstSeen: now, | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. looks like you use these dates just to know when you got first and last errors. Use separate properties, do not store inside the object and avoid excessive array passes |
||||||
| lastSeen: now | ||||||
| } | ||||||
| this._errorBatch.set(errorKey, errorEntry) | ||||||
| } | ||||||
|
|
||||||
| errorEntry.count++ | ||||||
| errorEntry.lastSeen = now | ||||||
| errorEntry.payloads.push({ payload, extras: extra }) | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you're adding same stuff twice if you just initialized cache entry. btw if we spam under same errorKey, aren't we bloating the payloads array? Size should be restricted, otherwise we can run out of memory |
||||||
| } catch (e) { | ||||||
| console.error('Error batching failed, falling back to direct log', e) | ||||||
| await this.logError(reqChannel, err, sourceName, payload, ...extra) | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| _createErrorKey (reqChannel, err, sourceName) { | ||||||
|
||||||
| const errorMsg = err?.message || 'Unknown error' | ||||||
|
||||||
| return `${reqChannel}:${sourceName}:${errorMsg}` | ||||||
| } | ||||||
|
|
||||||
| async _processBatchedErrors () { | ||||||
| if (!this._errorBatch || this._errorBatch.cache.length === 0) { | ||||||
| return | ||||||
| } | ||||||
|
|
||||||
| try { | ||||||
| const errorsByFunctionNameAndChannel = new Map() | ||||||
|
|
||||||
| const allEntries = Object.values(this._errorBatch.cache.cache || {}) | ||||||
|
|
||||||
| for (const { value: errorEntry } of allEntries) { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the part below is overcomplicated. You already grouped by channel + message + source. Each grouped item has a list of payloads. Go over each item, concatenate payloads until you reach restricted length and send a message, continue until the lru is exhausted. Pay attention to not hit limits while you're doing so (i.e. overall messages stored should not be too high relatively to the length of a single message sent so we wouldn't send too many msgs too quickly) |
||||||
| const groupKey = `${errorEntry.reqChannel}:${errorEntry.sourceName}` | ||||||
| if (!errorsByFunctionNameAndChannel.has(groupKey)) { | ||||||
| errorsByFunctionNameAndChannel.set(groupKey, []) | ||||||
| } | ||||||
|
|
||||||
| const errorKey = this._createErrorKey(errorEntry.reqChannel, { message: errorEntry.errorMessage }, errorEntry.sourceName) | ||||||
| errorsByFunctionNameAndChannel.get(groupKey).push({ errorKey, ...errorEntry }) | ||||||
| } | ||||||
|
|
||||||
| for (const [groupKey, errors] of errorsByFunctionNameAndChannel) { | ||||||
| const [reqChannel, sourceName] = groupKey.split(':') | ||||||
|
||||||
| await this._sendBatchedErrorMessage(reqChannel, sourceName, errors) | ||||||
| } | ||||||
| } catch (e) { | ||||||
| console.error('Failed to process batched errors', e) | ||||||
| } finally { | ||||||
| this._errorBatch.clear() | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| async _sendBatchedErrorMessage (reqChannel, sourceName, errors) { | ||||||
| const totalErrors = errors.reduce((sum, error) => sum + error.count, 0) | ||||||
|
||||||
| const timeRange = this._getTimeRange(errors) | ||||||
|
|
||||||
| let message = `*Batched Error Report - ${sourceName}*\n` | ||||||
| message += `*Summary:* ${totalErrors} errors across ${errors.length} types (${timeRange})\n\n` | ||||||
|
|
||||||
| let truncated = false | ||||||
| for (const error of errors.slice(0, 10)) { | ||||||
|
||||||
| if (truncated) break | ||||||
|
|
||||||
| message += `• *${error.errorMessage}* (${error.count}x)\n` | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what about the rest message info? like extra, etc.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that goes in payload |
||||||
| message += ' Payloads:\n' | ||||||
|
|
||||||
| for (const item of error.payloads.slice(0, 3)) { | ||||||
|
||||||
| let payloadStr = ` - ${JSON.stringify(item.payload)}\n` | ||||||
| if (Array.isArray(item.extras) && item.extras.length) { | ||||||
| payloadStr += ` Extras: ${JSON.stringify(item.extras)}\n` | ||||||
| } | ||||||
|
|
||||||
| if (message.length + payloadStr.length > (this.conf.errorBatching?.maxMessageLength || 4000)) { | ||||||
|
||||||
| message += `\n... message truncated (${errors.length - errors.indexOf(error)} more error types)` | ||||||
| truncated = true | ||||||
| break | ||||||
| } | ||||||
|
|
||||||
| message += payloadStr | ||||||
| } | ||||||
|
|
||||||
| if (error.payloads.length > 3) { | ||||||
|
||||||
| message += ` ... and ${error.payloads.length - 3} more payloads\n` | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| if (errors.length > 10 && !truncated) { | ||||||
|
||||||
| message += `\n... and ${errors.length - 10} more error types` | ||||||
| } | ||||||
|
|
||||||
| await this.logError(reqChannel, message) | ||||||
| } | ||||||
|
|
||||||
| _getTimeRange (errors) { | ||||||
| const allTimes = errors.flatMap(error => [error.firstSeen, error.lastSeen]) | ||||||
| const earliest = new Date(Math.min(...allTimes)) | ||||||
| const latest = new Date(Math.max(...allTimes)) | ||||||
|
|
||||||
| if (earliest.getTime() === latest.getTime()) { | ||||||
| return formatTime(earliest) | ||||||
| } | ||||||
|
|
||||||
| return `${formatTime(earliest)} - ${formatTime(latest)}` | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| module.exports = GrcSlack | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| module.exports = { | ||
| testEnvironment: 'node', | ||
| testMatch: ['<rootDir>/test/**/*.test.js'], | ||
| verbose: true | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is it possible to compose and send 50x4000 symbols to slack at once?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can create and send a file in that case
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no files