Skip to content

feat: Added New Relic Control health check #2841

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

'use strict'

const HealthReporter = require('./lib/health-reporter')

// Record opening times before loading any other files.
const preAgentTime = process.uptime()
const agentStart = Date.now()
Expand Down Expand Up @@ -154,6 +156,7 @@ function createAgent(config) {
'New Relic requires that you name this application!\n' +
'Set app_name in your newrelic.js or newrelic.cjs file or set environment variable\n' +
'NEW_RELIC_APP_NAME. Not starting!'
agent.healthReporter.setStatus(HealthReporter.STATUS_MISSING_APP_NAME)
throw new Error(message)
}

Expand All @@ -167,6 +170,7 @@ function createAgent(config) {

agent.start(function afterStart(error) {
if (error) {
agent.healthReporter.setStatus(HealthReporter.STATUS_INTERNAL_UNEXPECTED_ERROR)
const errorMessage = 'New Relic for Node.js halted startup due to an error:'
logger.error(error, errorMessage)

Expand Down
45 changes: 28 additions & 17 deletions lib/agent.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ const {
const synthetics = require('./synthetics')
const Harvester = require('./harvester')
const { createFeatureUsageMetrics } = require('./util/application-logging')
const HealthReporter = require('./health-reporter')

// Map of valid states to whether or not data collection is valid
const STATES = {
Expand Down Expand Up @@ -162,6 +163,8 @@ function Agent(config) {
throw new Error('Agent must be created with a configuration!')
}

this.healthReporter = new HealthReporter({ agentConfig: config })

// The agent base attributes which last throughout its lifetime.
this._state = 'stopped'
this.config = config
Expand Down Expand Up @@ -326,6 +329,7 @@ Agent.prototype.start = function start(callback) {
if (this.config.agent_enabled !== true) {
logger.warn('The New Relic Node.js agent is disabled by its configuration. ' + 'Not starting!')

this.healthReporter.setStatus(HealthReporter.STATUS_AGENT_DISABLED)
this.setState('stopped')
return process.nextTick(callback)
}
Expand All @@ -342,17 +346,21 @@ Agent.prototype.start = function start(callback) {
'Has a license key been specified in the agent configuration ' +
'file or via the NEW_RELIC_LICENSE_KEY environment variable?'
)
this.healthReporter.setStatus(HealthReporter.STATUS_LICENSE_KEY_MISSING)

this.setState('errored')
sampler.stop()
return process.nextTick(function onNextTick() {
callback(new Error('Not starting without license key!'))
agent.healthReporter.stop(() => {
callback(new Error('Not starting without license key!'))
})
})
}
logger.info('Starting New Relic for Node.js connection process.')

this.collector.connect(function onStartConnect(error, response) {
if (error || response.shouldShutdownRun()) {
agent.healthReporter.setStatus(HealthReporter.STATUS_CONNECT_ERROR)
agent.setState('errored')
sampler.stop()
callback(error || new Error('Failed to connect to collector'), response && response.payload)
Expand Down Expand Up @@ -476,23 +484,26 @@ Agent.prototype.stop = function stop(callback) {

sampler.stop()

if (this.collector.isConnected()) {
this.collector.shutdown(function onShutdown(error) {
if (error) {
agent.setState('errored')
logger.warn(error, 'Got error shutting down connection to New Relic:')
} else {
agent.setState('stopped')
logger.info('Stopped New Relic for Node.js.')
}

callback(error)
})
} else {
logger.trace('Collector was not connected, invoking callback.')
this.healthReporter.setStatus(HealthReporter.STATUS_AGENT_SHUTDOWN)
this.healthReporter.stop(() => {
if (agent.collector.isConnected()) {
agent.collector.shutdown(function onShutdown(error) {
if (error) {
agent.setState('errored')
logger.warn(error, 'Got error shutting down connection to New Relic:')
} else {
agent.setState('stopped')
logger.info('Stopped New Relic for Node.js.')
}

callback(error)
})
} else {
logger.trace('Collector was not connected, invoking callback.')

process.nextTick(callback)
}
process.nextTick(callback)
}
})
}

/**
Expand Down
15 changes: 15 additions & 0 deletions lib/collector/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const CollectorResponse = require('./response')
const facts = require('./facts')
const logger = require('../logger').child({ component: 'collector_api' })
const RemoteMethod = require('./remote-method')
const HealthReporter = require('../health-reporter')

const NAMES = require('../metrics/names')

Expand Down Expand Up @@ -221,6 +222,7 @@ CollectorAPI.prototype._retry = function _retry(ctx, error, response) {
ctx.errors.push(error)
} else if (response && SUCCESS.has(response.status)) {
dumpErrors(ctx.errors, 'connect')
this._agent.healthReporter.setStatus(HealthReporter.STATUS_HEALTHY)
ctx.callback(null, CollectorResponse.success(response.payload))
return
}
Expand All @@ -231,6 +233,7 @@ CollectorAPI.prototype._retry = function _retry(ctx, error, response) {
// Retry everything except for an explicit Disconnect response code.
if (response.status === 410 || response.agentRun === AGENT_RUN_BEHAVIOR.SHUTDOWN) {
logger.error('The New Relic collector rejected this agent.')
this._agent.healthReporter.setStatus(HealthReporter.STATUS_FORCED_DISCONNECT)
return ctx.callback(null, CollectorResponse.fatal(response.payload))
} else if (response.status === 401) {
logger.warn(
Expand All @@ -240,6 +243,7 @@ CollectorAPI.prototype._retry = function _retry(ctx, error, response) {
' (status code %s)',
response.status
)
this._agent.healthReporter.setStatus(HealthReporter.STATUS_INVALID_LICENSE_KEY)
} else if (this._isProxyMisconfigured(error)) {
logger.warn(
error,
Expand All @@ -248,6 +252,17 @@ CollectorAPI.prototype._retry = function _retry(ctx, error, response) {
'SSL(https). If your proxy is configured to accept connections over http, try ' +
'setting `proxy` to a fully qualified URL(e.g http://proxy-host:8080).'
)
this._agent.healthReporter.setStatus(HealthReporter.STATUS_HTTP_PROXY_MISCONFIGURED)
} else {
// Sometimes we get a `CollectorResponse` instance instead of an
// `http.ServerResponse`. In such cases, we do not have access to the
// status code.
let msg = 'Unexpected error communicating with New Relic backend.'
if (response.status) {
msg = `Received error status code from New Relic backend: ${response.status}.`
}
logger.warn(error, msg)
this._agent.healthReporter.setStatus(HealthReporter.STATUS_BACKEND_ERROR)
}

const backoff = BACKOFFS[Math.min(ctx.attempts, ctx.max) - 1]
Expand Down
39 changes: 39 additions & 0 deletions lib/config/default.js
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,45 @@ defaultConfig.definition = () => ({
formatter: boolean,
default: true
},

/**
* Collects configuration related to New Relic Agent Control, i.e. centralized
* agent management in container based environments.
*/
agent_control: {
/**
* Indicates that the agent is being managed by Agent Control. Must be set
* to true health monitoring.
*/
enabled: {
formatter: boolean,
default: false
},

/**
* Settings specific to the health monitoring aspect of Agent Control.
*/
health: {
/**
* A string file path to a directory that the agent is expected to write
* health status files to. Must be set for health monitoring to be
* enabled.
*/
delivery_location: {
default: 'file:///newrelic/apm/health'
},

/**
* The time, in seconds, that the agent should wait between writing
* updates to its health status. The default interval is 5 seconds.
*/
frequency: {
formatter: int,
default: 5
}
}
},

/**
* The default Apdex tolerating / threshold value for applications, in
* seconds. The default for Node is apdexT to 100 milliseconds, which is
Expand Down
Loading
Loading