Skip to content

Commit fbdc9d3

Browse files
authored
fix: add metrics for agentkeepalive (#551)
1 parent ddd1558 commit fbdc9d3

19 files changed

+622
-200
lines changed

package-lock.json

+422-153
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

+3-3
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,9 @@
5555
"conventional-changelog-conventionalcommits": "^5.0.0",
5656
"crypto-js": "^4.2.0",
5757
"dotenv": "^16.0.0",
58-
"fastify": "^4.8.1",
58+
"fastify": "^4.28.1",
5959
"fastify-metrics": "^10.2.0",
60-
"fastify-plugin": "^4.0.0",
60+
"fastify-plugin": "^4.5.1",
6161
"fastify-xml-body-parser": "^2.2.0",
6262
"fs-extra": "^10.0.1",
6363
"fs-xattr": "0.3.1",
@@ -113,7 +113,7 @@
113113
"ts-node-dev": "^1.1.8",
114114
"tsx": "^4.16.0",
115115
"tus-js-client": "^3.1.0",
116-
"typescript": "^4.5.5"
116+
"typescript": "^5.6.2"
117117
},
118118
"bin": "./dist/server.js"
119119
}

src/http/plugins/storage.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ const storageBackend = createStorageBackend(storageBackendType)
1717

1818
export const storage = fastifyPlugin(
1919
async function storagePlugin(fastify) {
20-
fastify.decorateRequest('storage', undefined)
20+
fastify.decorateRequest('storage', null)
2121
fastify.addHook('preHandler', async (request) => {
2222
const database = new StorageKnexDB(request.db, {
2323
tenantId: request.tenantId,

src/http/plugins/tracing.ts

+10-5
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,11 @@ export const tracing = fastifyPlugin(
4242
const span = trace.getSpan(context.active())
4343

4444
if (span) {
45-
// We collect logs only in full and logs mode
45+
// We collect logs only in full,logs,debug mode
4646
if (
4747
tracingEnabled &&
4848
request.tracingMode &&
49-
!['full', 'logs'].includes(request.tracingMode)
49+
!['full', 'logs', 'debug'].includes(request.tracingMode)
5050
) {
5151
traceCollector.clearTrace(span.spanContext().traceId)
5252
}
@@ -68,7 +68,7 @@ export const traceServerTime = fastifyPlugin(
6868
const spans = traceCollector.getSpansForTrace(traceId)
6969
if (spans) {
7070
try {
71-
const serverTimingHeaders = spansToServerTimings(spans)
71+
const serverTimingHeaders = spansToServerTimings(spans, reply.statusCode >= 500)
7272

7373
request.serverTimings = serverTimingHeaders
7474

@@ -94,12 +94,15 @@ export const traceServerTime = fastifyPlugin(
9494
})
9595

9696
fastify.addHook('onRequestAbort', async (req) => {
97-
const traceId = trace.getSpan(context.active())?.spanContext().traceId
97+
const span = trace.getSpan(context.active())
98+
const traceId = span?.spanContext().traceId
99+
100+
span?.setAttribute('req_aborted', true)
98101

99102
if (traceId) {
100103
const spans = traceCollector.getSpansForTrace(traceId)
101104
if (spans) {
102-
req.serverTimings = spansToServerTimings(spans)
105+
req.serverTimings = spansToServerTimings(spans, true)
103106
}
104107
traceCollector.clearTrace(traceId)
105108
}
@@ -155,6 +158,8 @@ function spansToServerTimings(
155158
spanName,
156159
duration,
157160
action: span.item.attributes['db.statement'],
161+
error: span.item.attributes.error,
162+
status: span.item.status,
158163
host: hostName
159164
? isIP(hostName)
160165
? hostName

src/http/routes/object/deleteObject.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { FastifyInstance } from 'fastify'
2-
import { FromSchema, JSONSchema } from 'json-schema-to-ts'
2+
import { FromSchema } from 'json-schema-to-ts'
33
import { createDefaultSchema, createResponse } from '../../routes-helper'
44
import { AuthenticatedRequest } from '../../types'
55
import { ROUTE_OPERATIONS } from '../operations'

src/http/routes/object/updateObject.ts

+1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ export default async function routes(fastify: FastifyInstance) {
7777
.uploadOverridingObject(request, {
7878
owner,
7979
objectName: objectName,
80+
signal: request.signals.body.signal,
8081
})
8182

8283
return response.status(objectMetadata?.httpStatusCode ?? 200).send({

src/http/routes/object/uploadSignedObject.ts

+1
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ export default async function routes(fastify: FastifyInstance) {
9494
owner,
9595
objectName,
9696
isUpsert: upsert,
97+
signal: request.signals.body.signal,
9798
})
9899

99100
return response.status(objectMetadata?.httpStatusCode ?? 200).send({

src/http/routes/s3/commands/upload-part.ts

+13-10
Original file line numberDiff line numberDiff line change
@@ -97,16 +97,19 @@ export default function UploadPart(s3Router: S3Router) {
9797

9898
const metadata = s3Protocol.parseMetadataHeaders(req.Headers)
9999

100-
return s3Protocol.putObject({
101-
Body: ctx.req as any,
102-
Bucket: req.Params.Bucket,
103-
Key: req.Params['*'],
104-
CacheControl: req.Headers?.['cache-control'],
105-
ContentType: req.Headers?.['content-type'],
106-
Expires: req.Headers?.['expires'] ? new Date(req.Headers?.['expires']) : undefined,
107-
ContentEncoding: req.Headers?.['content-encoding'],
108-
Metadata: metadata,
109-
})
100+
return s3Protocol.putObject(
101+
{
102+
Body: ctx.req as any,
103+
Bucket: req.Params.Bucket,
104+
Key: req.Params['*'],
105+
CacheControl: req.Headers?.['cache-control'],
106+
ContentType: req.Headers?.['content-type'],
107+
Expires: req.Headers?.['expires'] ? new Date(req.Headers?.['expires']) : undefined,
108+
ContentEncoding: req.Headers?.['content-encoding'],
109+
Metadata: metadata,
110+
},
111+
ctx.signals.body
112+
)
110113
}
111114
)
112115
}

src/http/routes/tus/index.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ type MultiPartRequest = http.IncomingMessage & {
5959

6060
function createTusStore() {
6161
if (storageBackendType === 's3') {
62-
const agent = createAgent(storageS3Endpoint?.includes('http://') ? 'http' : 'https')
62+
const agent = createAgent('s3_tus')
6363
return new S3Store({
6464
partSize: tusPartSize * 1024 * 1024, // Each uploaded part will have ${tusPartSize}MB,
6565
expirationPeriodInMilliseconds: tusUrlExpiryMs,

src/internal/monitoring/metrics.ts

+25
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,28 @@ export const DbActiveConnection = new client.Gauge({
6868
help: 'Number of database connections',
6969
labelNames: ['region', 'is_external'],
7070
})
71+
72+
// Create Prometheus metrics
73+
export const HttpPoolSocketsGauge = new client.Gauge({
74+
name: 'storage_api_http_pool_busy_sockets',
75+
help: 'Number of busy sockets currently in use',
76+
labelNames: ['name', 'region', 'protocol'],
77+
})
78+
79+
export const HttpPoolFreeSocketsGauge = new client.Gauge({
80+
name: 'storage_api_http_pool_free_sockets',
81+
help: 'Number of free sockets available for reuse',
82+
labelNames: ['name', 'region', 'protocol'],
83+
})
84+
85+
export const HttpPoolPendingRequestsGauge = new client.Gauge({
86+
name: 'storage_api_http_pool_requests',
87+
help: 'Number of pending requests waiting for a socket',
88+
labelNames: ['name', 'region', 'protocol'],
89+
})
90+
91+
export const HttpPoolErrorGauge = new client.Gauge({
92+
name: 'storage_api_http_pool_errors',
93+
help: 'Number of pending requests waiting for a socket',
94+
labelNames: ['name', 'region', 'type', 'protocol'],
95+
})

src/internal/monitoring/otel-instrumentation.ts

+7
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,13 @@ class ClassInstrumentation implements Instrumentation {
108108
span.setStatus({ code: SpanStatusCode.OK })
109109
return result
110110
} catch (error) {
111+
if (error instanceof Error) {
112+
span.setAttributes({
113+
error: JSON.stringify({ message: error.message, stack: error.stack }),
114+
stack: error.stack,
115+
})
116+
}
117+
111118
span.setStatus({
112119
code: SpanStatusCode.ERROR,
113120
message: error instanceof Error ? error.message : String(error),

src/internal/monitoring/otel.ts

+6
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import { S3Backend } from '@storage/backend'
3131
import { StorageKnexDB } from '@storage/database'
3232
import { TenantConnection } from '@internal/database'
3333
import { S3Store } from '@tus/s3-store'
34+
import { Upload } from '@aws-sdk/lib-storage'
3435

3536
const tracingEnabled = process.env.TRACING_ENABLED === 'true'
3637
const headersEnv = process.env.OTEL_EXPORTER_OTLP_TRACES_HEADERS || ''
@@ -239,6 +240,11 @@ const sdk = new NodeSDK({
239240
},
240241
setName: (name, attrs) => 'S3.' + attrs.operation,
241242
}),
243+
new ClassInstrumentation({
244+
targetClass: Upload,
245+
enabled: true,
246+
methodsToInstrument: ['done', '__notifyProgress'],
247+
}),
242248
getNodeAutoInstrumentations({
243249
'@opentelemetry/instrumentation-http': {
244250
enabled: false,

src/storage/backend/adapter.ts

+4-2
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,8 @@ export abstract class StorageBackendAdapter {
8282
version: string | undefined,
8383
body: NodeJS.ReadableStream,
8484
contentType: string,
85-
cacheControl: string
85+
cacheControl: string,
86+
signal?: AbortSignal
8687
): Promise<ObjectMetadata> {
8788
throw new Error('uploadObject not implemented')
8889
}
@@ -172,7 +173,8 @@ export abstract class StorageBackendAdapter {
172173
uploadId: string,
173174
partNumber: number,
174175
body?: string | Uint8Array | Buffer | Readable,
175-
length?: number
176+
length?: number,
177+
signal?: AbortSignal
176178
): Promise<{ ETag?: string }> {
177179
throw new Error('not implemented')
178180
}

src/storage/backend/s3.ts

+91-12
Original file line numberDiff line numberDiff line change
@@ -30,23 +30,91 @@ import { ERRORS, StorageBackendError } from '@internal/errors'
3030
import { getConfig } from '../../config'
3131
import Agent, { HttpsAgent } from 'agentkeepalive'
3232
import { Readable } from 'stream'
33+
import {
34+
HttpPoolErrorGauge,
35+
HttpPoolFreeSocketsGauge,
36+
HttpPoolPendingRequestsGauge,
37+
HttpPoolSocketsGauge,
38+
} from '@internal/monitoring/metrics'
39+
40+
const { storageS3MaxSockets, region } = getConfig()
41+
42+
const watchers: NodeJS.Timeout[] = []
3343

34-
const { storageS3MaxSockets } = getConfig()
44+
process.once('SIGTERM', () => {
45+
watchers.forEach((watcher) => {
46+
clearInterval(watcher)
47+
})
48+
})
3549

3650
/**
3751
* Creates an agent for the given protocol
38-
* @param protocol
52+
* @param name
3953
*/
40-
export function createAgent(protocol: 'http' | 'https') {
54+
export function createAgent(name: string) {
4155
const agentOptions = {
4256
maxSockets: storageS3MaxSockets,
4357
keepAlive: true,
4458
keepAliveMsecs: 1000,
59+
freeSocketTimeout: 1000 * 15,
60+
}
61+
62+
const httpAgent = new Agent(agentOptions)
63+
const httpsAgent = new HttpsAgent(agentOptions)
64+
65+
if (httpsAgent) {
66+
const watcher = setInterval(() => {
67+
const httpStatus = httpAgent.getCurrentStatus()
68+
const httpsStatus = httpsAgent.getCurrentStatus()
69+
updateHttpPoolMetrics(name, 'http', httpStatus)
70+
updateHttpPoolMetrics(name, 'https', httpsStatus)
71+
}, 5000)
72+
73+
watchers.push(watcher)
74+
}
75+
76+
return { httpAgent, httpsAgent }
77+
}
78+
79+
// Function to update Prometheus metrics based on the current status of the agent
80+
function updateHttpPoolMetrics(name: string, protocol: string, status: Agent.AgentStatus): void {
81+
// Calculate the number of busy sockets by iterating over the `sockets` object
82+
let busySocketCount = 0
83+
for (const host in status.sockets) {
84+
if (status.sockets.hasOwnProperty(host)) {
85+
busySocketCount += status.sockets[host]
86+
}
4587
}
4688

47-
return protocol === 'http'
48-
? { httpAgent: new Agent(agentOptions) }
49-
: { httpsAgent: new HttpsAgent(agentOptions) }
89+
// Calculate the number of free sockets by iterating over the `freeSockets` object
90+
let freeSocketCount = 0
91+
for (const host in status.freeSockets) {
92+
if (status.freeSockets.hasOwnProperty(host)) {
93+
freeSocketCount += status.freeSockets[host]
94+
}
95+
}
96+
97+
// Calculate the number of pending requests by iterating over the `requests` object
98+
let pendingRequestCount = 0
99+
for (const host in status.requests) {
100+
if (status.requests.hasOwnProperty(host)) {
101+
pendingRequestCount += status.requests[host]
102+
}
103+
}
104+
105+
// Update the metrics with calculated values
106+
HttpPoolSocketsGauge.set({ name, region, protocol }, busySocketCount)
107+
HttpPoolFreeSocketsGauge.set({ name, region, protocol }, freeSocketCount)
108+
HttpPoolPendingRequestsGauge.set({ name, region }, pendingRequestCount)
109+
HttpPoolErrorGauge.set({ name, region, type: 'socket_error', protocol }, status.errorSocketCount)
110+
HttpPoolErrorGauge.set(
111+
{ name, region, type: 'timeout_socket_error', protocol },
112+
status.timeoutSocketCount
113+
)
114+
HttpPoolErrorGauge.set(
115+
{ name, region, type: 'create_socket_error', protocol },
116+
status.createSocketErrorCount
117+
)
50118
}
51119

52120
export interface S3ClientOptions {
@@ -56,7 +124,7 @@ export interface S3ClientOptions {
56124
accessKey?: string
57125
secretKey?: string
58126
role?: string
59-
httpAgent?: { httpAgent: Agent } | { httpsAgent: HttpsAgent }
127+
httpAgent?: { httpAgent: Agent; httpsAgent: HttpsAgent }
60128
requestTimeout?: number
61129
downloadTimeout?: number
62130
uploadTimeout?: number
@@ -75,18 +143,21 @@ export class S3Backend implements StorageBackendAdapter {
75143
// Default client for API operations
76144
this.client = this.createS3Client({
77145
...options,
146+
name: 's3_default',
78147
requestTimeout: options.requestTimeout,
79148
})
80149

81150
// Upload client exclusively for upload operations
82151
this.uploadClient = this.createS3Client({
83152
...options,
153+
name: 's3_upload',
84154
requestTimeout: options.uploadTimeout,
85155
})
86156

87157
// Download client exclusively for download operations
88158
this.downloadClient = this.createS3Client({
89159
...options,
160+
name: 's3_download',
90161
requestTimeout: options.downloadTimeout,
91162
})
92163
}
@@ -144,14 +215,16 @@ export class S3Backend implements StorageBackendAdapter {
144215
* @param body
145216
* @param contentType
146217
* @param cacheControl
218+
* @param signal
147219
*/
148220
async uploadObject(
149221
bucketName: string,
150222
key: string,
151223
version: string | undefined,
152224
body: NodeJS.ReadableStream,
153225
contentType: string,
154-
cacheControl: string
226+
cacheControl: string,
227+
signal?: AbortSignal
155228
): Promise<ObjectMetadata> {
156229
try {
157230
const paralellUploadS3 = new Upload({
@@ -166,6 +239,14 @@ export class S3Backend implements StorageBackendAdapter {
166239
},
167240
})
168241

242+
signal?.addEventListener(
243+
'abort',
244+
() => {
245+
paralellUploadS3.abort()
246+
},
247+
{ once: true }
248+
)
249+
169250
const data = (await paralellUploadS3.done()) as CompleteMultipartUploadCommandOutput
170251

171252
const metadata = await this.headObject(bucketName, key, version)
@@ -451,10 +532,8 @@ export class S3Backend implements StorageBackendAdapter {
451532
}
452533
}
453534

454-
protected createS3Client(options: S3ClientOptions) {
455-
const storageS3Protocol = options.endpoint?.includes('http://') ? 'http' : 'https'
456-
457-
const agent = options.httpAgent ? options.httpAgent : createAgent(storageS3Protocol)
535+
protected createS3Client(options: S3ClientOptions & { name: string }) {
536+
const agent = options.httpAgent ?? createAgent(options.name)
458537

459538
const params: S3ClientConfig = {
460539
region: options.region,

0 commit comments

Comments
 (0)