-
Notifications
You must be signed in to change notification settings - Fork 5
added batch logging #5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
334f7e7
f8e2e8b
9009666
49daf8b
d8daa5d
610a023
2e2ba0e
01c62e2
058e1cd
e4cc258
7c00406
3743ce5
be446fa
f9df13b
b195ad6
6186fc6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,5 @@ config/*.json | |
| config/facs/*.json | ||
| logs | ||
| sec | ||
| package-lock.json | ||
| *.swp | ||
| *.swo | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -3,6 +3,9 @@ | |||||
| const util = require('util') | ||||||
| const Base = require('bfx-facs-base') | ||||||
|
|
||||||
| const { formatTime } = require('./utils/date-time') | ||||||
| const { createHash } = require('crypto') | ||||||
|
|
||||||
| class GrcSlack extends Base { | ||||||
| constructor (caller, opts, ctx) { | ||||||
| super(caller, opts, ctx) | ||||||
|
|
@@ -12,6 +15,44 @@ class GrcSlack extends Base { | |||||
| this.init() | ||||||
|
|
||||||
| if (opts.conf) this.conf = opts.conf | ||||||
|
|
||||||
| if (this.conf.errorBatching && opts.lru) { | ||||||
| this._errorBatch = opts.lru | ||||||
| this._validateBatchingConfig() | ||||||
| this._initErrorBatching() | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| _validateBatchingConfig () { | ||||||
| if (!this.conf.errorBatching.interval || this.conf.errorBatching.interval <= 0) { | ||||||
| throw new Error('errorBatching.interval is required and must be greater than 0') | ||||||
| } | ||||||
| if (!this.conf.errorBatching.maxMessageLength || this.conf.errorBatching.maxMessageLength <= 0) { | ||||||
| throw new Error('errorBatching.maxMessageLength is required and must be greater than 0') | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| _initErrorBatching () { | ||||||
| this._errorBatchTimer = setInterval(() => { | ||||||
| this._processBatchedErrors() | ||||||
| }, this.conf.errorBatching.interval) | ||||||
|
|
||||||
| this._errorBatchTimer.unref() | ||||||
| } | ||||||
|
|
||||||
| async _stop (cb) { | ||||||
| if (this._errorBatchTimer) { | ||||||
| clearInterval(this._errorBatchTimer) | ||||||
| this._errorBatchTimer = null | ||||||
| } | ||||||
|
|
||||||
| try { | ||||||
| await this._processBatchedErrors() | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why this is called on stop? wouldn't it hang for a long time possibly?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think it's called to flush the last batch of errors, otherwise it'll be lost |
||||||
| } catch (err) { | ||||||
| console.error('Failed to process final batch of errors during shutdown', err) | ||||||
| } finally { | ||||||
| super._stop(cb) | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| message (reqChannel, message) { | ||||||
|
|
@@ -21,7 +62,7 @@ class GrcSlack extends Base { | |||||
| const maxLength = slack.max_length || 1024 | ||||||
| const env = (slack.env) ? `Env: ${slack.env}, ` : '' | ||||||
| const rawText = env + message | ||||||
| const text = (maxLength) ? rawText.substr(0, maxLength) : rawText | ||||||
| const text = (rawText.length > maxLength) ? rawText.slice(0, maxLength) : rawText | ||||||
| const channel = reqChannel || slack.channel | ||||||
| const send = [{ channel, text }] | ||||||
|
|
||||||
|
|
@@ -41,6 +82,161 @@ class GrcSlack extends Base { | |||||
|
|
||||||
| return this.message(reqChannel, `${extraP}${errTag}${error}`) | ||||||
| } | ||||||
|
|
||||||
| _createErrorGroupKey (reqChannel, sourceName) { | ||||||
| return `${reqChannel}:${sourceName}` | ||||||
| } | ||||||
|
|
||||||
| _createErrorKey (reqChannel, err, sourceName = 'unknown') { | ||||||
| const errorMsg = err?.message || err?.toString() || 'Unknown error' | ||||||
| const hash = createHash('sha1').update(errorMsg).digest('hex') | ||||||
| return this._createErrorGroupKey(reqChannel, sourceName) + `:${hash}` | ||||||
| } | ||||||
|
|
||||||
| /** | ||||||
| * Batch log error to slack | ||||||
| * @param {string} reqChannel - Slack channel to log the error to, if not provided, the channel from the config will be used | ||||||
| * @param {Error} err - Error to log | ||||||
| * @param {string} sourceName - Source of the error | ||||||
| * @param {Object} payload - Payload to log | ||||||
| * @param {...any} extra - Additional information to log | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is extra a required parameter?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no we're just putting all other params in 1 object. It can be undefined also
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| */ | ||||||
| async logErrorEnqueue (reqChannel, err, sourceName, payload, ...extra) { | ||||||
| if (!this._errorBatch) { | ||||||
| console.error('Error batching not initialized, falling back to direct log') | ||||||
| return this.logError(reqChannel, err, sourceName, payload, ...extra) | ||||||
| } | ||||||
|
|
||||||
| if (!reqChannel) { | ||||||
| reqChannel = this.conf.channel | ||||||
| } | ||||||
|
|
||||||
| try { | ||||||
| const errorKey = this._createErrorKey(reqChannel, err, sourceName) | ||||||
|
|
||||||
| const now = new Date() | ||||||
| let errorEntry = this._errorBatch.get(errorKey) | ||||||
|
|
||||||
| if (!errorEntry) { | ||||||
| errorEntry = { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added it |
||||||
| errorMessage: err.message, | ||||||
| sourceName, | ||||||
| reqChannel, | ||||||
| payloads: [ | ||||||
| { payload, extras: extra } | ||||||
| ], | ||||||
| count: 1, | ||||||
| firstSeen: now, | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. looks like you use these dates just to know when you got first and last errors. Use separate properties, do not store inside the object and avoid excessive array passes |
||||||
| lastSeen: now | ||||||
| } | ||||||
| this._errorBatch.set(errorKey, errorEntry) | ||||||
| return | ||||||
| } | ||||||
|
|
||||||
| errorEntry.count++ | ||||||
| errorEntry.lastSeen = now | ||||||
| errorEntry.payloads.push({ payload, extras: extra }) | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you're adding same stuff twice if you just initialized cache entry. btw if we spam under same errorKey, aren't we bloating the payloads array? Size should be restricted, otherwise we can run out of memory |
||||||
|
|
||||||
| // Keep only the last 3 payloads | ||||||
| if (errorEntry.payloads.length > 3) { | ||||||
| errorEntry.payloads.shift() | ||||||
| } | ||||||
| } catch (e) { | ||||||
| console.error('Error batching failed, falling back to direct log', e) | ||||||
| await this.logError(reqChannel, err, sourceName, payload, ...extra) | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| async _processBatchedErrors () { | ||||||
| if (!this._errorBatch || this._errorBatch.cache.length === 0) { | ||||||
| return | ||||||
| } | ||||||
|
|
||||||
| try { | ||||||
| const errorGroups = new Map() // group errors by channel and source | ||||||
|
|
||||||
| const allEntries = Object.values(this._errorBatch.cache.cache || {}) | ||||||
|
|
||||||
| for (const { value: errorEntry } of allEntries) { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the part below is overcomplicated. You already grouped by channel + message + source. Each grouped item has a list of payloads. Go over each item, concatenate payloads until you reach restricted length and send a message, continue until the lru is exhausted. Pay attention to not hit limits while you're doing so (i.e. overall messages stored should not be too high relatively to the length of a single message sent so we wouldn't send too many msgs too quickly) |
||||||
| const groupKey = this._createErrorGroupKey(errorEntry.reqChannel, errorEntry.sourceName) | ||||||
| if (!errorGroups.has(groupKey)) { | ||||||
| errorGroups.set(groupKey, { | ||||||
| errors: [], | ||||||
| totalCount: 0, | ||||||
| earliestTime: Infinity, | ||||||
| latestTime: -Infinity | ||||||
| }) | ||||||
| } | ||||||
|
|
||||||
| const group = errorGroups.get(groupKey) | ||||||
| group.errors.push(errorEntry) | ||||||
| group.totalCount += errorEntry.count | ||||||
|
|
||||||
| // Track time range | ||||||
| const firstTime = errorEntry.firstSeen.getTime() | ||||||
| const lastTime = errorEntry.lastSeen.getTime() | ||||||
| if (firstTime < group.earliestTime) group.earliestTime = firstTime | ||||||
| if (lastTime > group.latestTime) group.latestTime = lastTime | ||||||
| } | ||||||
|
|
||||||
| for (const { errors, totalCount, earliestTime, latestTime } of errorGroups.values()) { | ||||||
| const { reqChannel, sourceName } = errors[0] | ||||||
| await this._sendBatchedErrorMessage(reqChannel, sourceName, errors, totalCount, earliestTime, latestTime) | ||||||
| } | ||||||
| } catch (e) { | ||||||
| console.error('Failed to process batched errors', e) | ||||||
| } finally { | ||||||
| this._errorBatch.clear() | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| async _sendBatchedErrorMessage (reqChannel, sourceName, errors, totalErrors, earliestTime, latestTime) { | ||||||
| const timeRange = this._formatTimeRange(earliestTime, latestTime) | ||||||
|
|
||||||
| let message = `*Batched Error Report - ${sourceName}*\n` | ||||||
| message += `*Summary:* ${totalErrors} errors across ${errors.length} types (${timeRange})\n\n` | ||||||
|
|
||||||
| let truncated = false | ||||||
| let i = 0 | ||||||
| for (; i < errors.length; i++) { | ||||||
| if (truncated) break | ||||||
|
|
||||||
| const error = errors[i] | ||||||
| message += `• *${error.errorMessage}* (${error.count}x)\n` | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what about the rest message info? like extra, etc.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that goes in payload |
||||||
| message += ' Payloads:\n' | ||||||
|
|
||||||
| for (const item of error.payloads) { | ||||||
| let payloadStr = ` - ${JSON.stringify(item.payload)}\n` | ||||||
| if (Array.isArray(item.extras) && item.extras.length) { | ||||||
| payloadStr += ` Extras: ${JSON.stringify(item.extras)}\n` | ||||||
| } | ||||||
|
|
||||||
| if (message.length + payloadStr.length > this.conf.errorBatching.maxMessageLength) { | ||||||
| truncated = true | ||||||
| break | ||||||
| } | ||||||
|
|
||||||
| message += payloadStr | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| if (truncated) { | ||||||
| message += `\n... message truncated (${errors.length - i} more error types)` | ||||||
| } | ||||||
|
|
||||||
| await this.logError(reqChannel, message) | ||||||
| } | ||||||
|
|
||||||
| _formatTimeRange (earliestTime, latestTime) { | ||||||
| const earliest = new Date(earliestTime) | ||||||
| const latest = new Date(latestTime) | ||||||
|
|
||||||
| if (earliestTime === latestTime) { | ||||||
| return formatTime(earliest) | ||||||
| } | ||||||
|
|
||||||
| return `${formatTime(earliest)} - ${formatTime(latest)}` | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| module.exports = GrcSlack | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| module.exports = { | ||
| testEnvironment: 'node', | ||
| testMatch: ['<rootDir>/test/**/*.test.js'], | ||
| verbose: true | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why are we using lru and not map? with lru we would lose data right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i think in order to keep the buffer fixed in length. If a worker starts to spam, we don't want to keep the spam rather than track let's say last 100 messages of a certain type.
for instance, a worker sends error messages for different occasions, i.e. API call failure and processing issue. Let's say it reports 1 api call failure and 1000 processing issues, We want both types to be reported, however we don't want 1000 processing issues (same type issue) being reported, we cut it down to, let's say, 10 only
that was the idea behind this update in general. If a worker starts spamming, this logic would still allow us to know about the problem, however, would cut down api calls and payload significantly.
anyways, if i missed the purppose, @AceTheNinja please clarify
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
couldn't have explained it better 🫡 @batrudinych