-
Notifications
You must be signed in to change notification settings - Fork 318
feat: add health monitoring utility for API tracking #486
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,195 @@ | ||||||
| /** | ||||||
| * @license | ||||||
| * Copyright 2024 Google LLC | ||||||
| * | ||||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
| * you may not use this file except in compliance with the License. | ||||||
| * You may obtain a copy of the License at | ||||||
| * | ||||||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||||||
| * | ||||||
| * Unless required by applicable law or agreed to in writing, software | ||||||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
| * See the License for the specific language governing permissions and | ||||||
| * limitations under the License. | ||||||
| */ | ||||||
|
|
||||||
| import { HealthMonitor } from "./health-monitor"; | ||||||
|
|
||||||
| describe("HealthMonitor", () => { | ||||||
| let monitor: HealthMonitor; | ||||||
|
|
||||||
| beforeEach(() => { | ||||||
| monitor = new HealthMonitor(); | ||||||
| }); | ||||||
|
|
||||||
| describe("recordSuccess", () => { | ||||||
| it("should increment successful requests", () => { | ||||||
| monitor.recordSuccess(100); | ||||||
| const metrics = monitor.getHealthMetrics(); | ||||||
|
|
||||||
| expect(metrics.totalRequests).toBe(1); | ||||||
| expect(metrics.successfulRequests).toBe(1); | ||||||
| expect(metrics.failedRequests).toBe(0); | ||||||
| }); | ||||||
|
|
||||||
| it("should calculate average response time", () => { | ||||||
| monitor.recordSuccess(100); | ||||||
| monitor.recordSuccess(200); | ||||||
| monitor.recordSuccess(300); | ||||||
|
|
||||||
| const metrics = monitor.getHealthMetrics(); | ||||||
| expect(metrics.averageResponseTime).toBe(200); | ||||||
| }); | ||||||
|
|
||||||
| it("should update last request timestamp", () => { | ||||||
| const before = Date.now(); | ||||||
| monitor.recordSuccess(50); | ||||||
| const after = Date.now(); | ||||||
|
|
||||||
| const metrics = monitor.getHealthMetrics(); | ||||||
| expect(metrics.lastRequestTimestamp).toBeGreaterThanOrEqual(before); | ||||||
| expect(metrics.lastRequestTimestamp).toBeLessThanOrEqual(after); | ||||||
| }); | ||||||
| }); | ||||||
|
|
||||||
| describe("recordFailure", () => { | ||||||
| it("should increment failed requests", () => { | ||||||
| monitor.recordFailure(new Error("Test error")); | ||||||
| const metrics = monitor.getHealthMetrics(); | ||||||
|
|
||||||
| expect(metrics.totalRequests).toBe(1); | ||||||
| expect(metrics.successfulRequests).toBe(0); | ||||||
| expect(metrics.failedRequests).toBe(1); | ||||||
| }); | ||||||
|
|
||||||
| it("should track error types", () => { | ||||||
| monitor.recordFailure(new Error("First error")); | ||||||
| monitor.recordFailure(new Error("Second error")); | ||||||
|
|
||||||
| const metrics = monitor.getHealthMetrics(); | ||||||
| expect(metrics.errors).toHaveLength(1); | ||||||
| expect(metrics.errors[0].type).toBe("Error"); | ||||||
| expect(metrics.errors[0].count).toBe(2); | ||||||
| }); | ||||||
|
|
||||||
| it("should track different error types separately", () => { | ||||||
| monitor.recordFailure(new Error("Error 1")); | ||||||
| monitor.recordFailure(new TypeError("TypeError 1")); | ||||||
|
|
||||||
| const metrics = monitor.getHealthMetrics(); | ||||||
| expect(metrics.errors).toHaveLength(2); | ||||||
|
|
||||||
| const errorTypes = metrics.errors.map((e) => e.type); | ||||||
| expect(errorTypes).toContain("Error"); | ||||||
| expect(errorTypes).toContain("TypeError"); | ||||||
| }); | ||||||
| }); | ||||||
|
|
||||||
| describe("getHealthMetrics", () => { | ||||||
| it("should return correct success rate", () => { | ||||||
| monitor.recordSuccess(100); | ||||||
| monitor.recordSuccess(100); | ||||||
| monitor.recordFailure(new Error("Test")); | ||||||
|
|
||||||
| const metrics = monitor.getHealthMetrics(); | ||||||
| expect(metrics.successRate).toBeCloseTo(66.67, 1); | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since the
Suggested change
|
||||||
| }); | ||||||
|
|
||||||
| it("should return 0 success rate when no requests", () => { | ||||||
| const metrics = monitor.getHealthMetrics(); | ||||||
| expect(metrics.successRate).toBe(0); | ||||||
| }); | ||||||
|
|
||||||
| it("should return 100 success rate when all succeed", () => { | ||||||
| monitor.recordSuccess(100); | ||||||
| monitor.recordSuccess(100); | ||||||
|
|
||||||
| const metrics = monitor.getHealthMetrics(); | ||||||
| expect(metrics.successRate).toBe(100); | ||||||
| }); | ||||||
| }); | ||||||
|
|
||||||
| describe("isHealthy", () => { | ||||||
| it("should return true when success rate is above threshold", () => { | ||||||
| monitor.recordSuccess(100); | ||||||
| monitor.recordSuccess(100); | ||||||
|
|
||||||
| expect(monitor.isHealthy(95)).toBe(true); | ||||||
| }); | ||||||
|
|
||||||
| it("should return false when success rate is below threshold", () => { | ||||||
| monitor.recordSuccess(100); | ||||||
| monitor.recordFailure(new Error("Test")); | ||||||
| monitor.recordFailure(new Error("Test")); | ||||||
|
|
||||||
| expect(monitor.isHealthy(95)).toBe(false); | ||||||
| }); | ||||||
|
|
||||||
| it("should return true when no requests have been made", () => { | ||||||
| expect(monitor.isHealthy()).toBe(true); | ||||||
| }); | ||||||
|
|
||||||
| it("should use custom threshold", () => { | ||||||
| monitor.recordSuccess(100); | ||||||
| monitor.recordFailure(new Error("Test")); | ||||||
|
|
||||||
| expect(monitor.isHealthy(50)).toBe(true); | ||||||
| expect(monitor.isHealthy(60)).toBe(false); | ||||||
| }); | ||||||
| }); | ||||||
|
|
||||||
| describe("reset", () => { | ||||||
| it("should clear all metrics", () => { | ||||||
| monitor.recordSuccess(100); | ||||||
| monitor.recordFailure(new Error("Test")); | ||||||
| monitor.reset(); | ||||||
|
|
||||||
| const metrics = monitor.getHealthMetrics(); | ||||||
| expect(metrics.totalRequests).toBe(0); | ||||||
| expect(metrics.successfulRequests).toBe(0); | ||||||
| expect(metrics.failedRequests).toBe(0); | ||||||
| expect(metrics.errors).toHaveLength(0); | ||||||
| expect(metrics.lastRequestTimestamp).toBeNull(); | ||||||
| }); | ||||||
| }); | ||||||
|
|
||||||
| describe("getHealthReport", () => { | ||||||
| it("should return formatted health report", () => { | ||||||
| monitor.recordSuccess(100); | ||||||
| monitor.recordSuccess(200); | ||||||
| monitor.recordFailure(new Error("Test error")); | ||||||
|
|
||||||
| const report = monitor.getHealthReport(); | ||||||
| expect(report).toContain("Health Status:"); | ||||||
| expect(report).toContain("Total Requests: 3"); | ||||||
| expect(report).toContain("Success Rate:"); | ||||||
| expect(report).toContain("Average Response Time:"); | ||||||
| }); | ||||||
|
|
||||||
| it("should show HEALTHY status when success rate is high", () => { | ||||||
| monitor.recordSuccess(100); | ||||||
| monitor.recordSuccess(100); | ||||||
|
|
||||||
| const report = monitor.getHealthReport(); | ||||||
| expect(report).toContain("HEALTHY"); | ||||||
| }); | ||||||
|
|
||||||
| it("should show DEGRADED status when success rate is low", () => { | ||||||
| monitor.recordFailure(new Error("Test")); | ||||||
| monitor.recordFailure(new Error("Test")); | ||||||
|
|
||||||
| const report = monitor.getHealthReport(); | ||||||
| expect(report).toContain("DEGRADED"); | ||||||
| }); | ||||||
|
|
||||||
| it("should include error details in report", () => { | ||||||
| monitor.recordFailure(new TypeError("Type error")); | ||||||
|
|
||||||
| const report = monitor.getHealthReport(); | ||||||
| expect(report).toContain("Errors:"); | ||||||
| expect(report).toContain("TypeError"); | ||||||
| }); | ||||||
| }); | ||||||
| }); | ||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,185 @@ | ||||||||||||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||||||||||||
| * @license | ||||||||||||||||||||||||||||||||||||||
| * Copyright 2024 Google LLC | ||||||||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||||||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||||||||||||||||||||||||||||||||||||||
| * you may not use this file except in compliance with the License. | ||||||||||||||||||||||||||||||||||||||
| * You may obtain a copy of the License at | ||||||||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||||||||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||||||||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||||||||
| * Unless required by applicable law or agreed to in writing, software | ||||||||||||||||||||||||||||||||||||||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||||||||||||||||||||||||||||||||||||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||||||||||||||||||||||||||||||||||
| * See the License for the specific language governing permissions and | ||||||||||||||||||||||||||||||||||||||
| * limitations under the License. | ||||||||||||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||||||||||||
| * Health monitoring utility for tracking API request health and performance. | ||||||||||||||||||||||||||||||||||||||
| * @public | ||||||||||||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||
| export interface HealthMetrics { | ||||||||||||||||||||||||||||||||||||||
| totalRequests: number; | ||||||||||||||||||||||||||||||||||||||
| successfulRequests: number; | ||||||||||||||||||||||||||||||||||||||
| failedRequests: number; | ||||||||||||||||||||||||||||||||||||||
| successRate: number; | ||||||||||||||||||||||||||||||||||||||
| averageResponseTime: number; | ||||||||||||||||||||||||||||||||||||||
| lastRequestTimestamp: number | null; | ||||||||||||||||||||||||||||||||||||||
| errors: ErrorSummary[]; | ||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||
| export interface ErrorSummary { | ||||||||||||||||||||||||||||||||||||||
| type: string; | ||||||||||||||||||||||||||||||||||||||
| count: number; | ||||||||||||||||||||||||||||||||||||||
| lastOccurrence: number; | ||||||||||||||||||||||||||||||||||||||
| message?: string; | ||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+23
to
+38
|
||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||||||||||||
| * Monitor API health and track request metrics. | ||||||||||||||||||||||||||||||||||||||
| * Useful for debugging, monitoring, and performance analysis. | ||||||||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||||||||
| * @example | ||||||||||||||||||||||||||||||||||||||
| * ```typescript | ||||||||||||||||||||||||||||||||||||||
| * const monitor = new HealthMonitor(); | ||||||||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||||||||
| * try { | ||||||||||||||||||||||||||||||||||||||
| * const startTime = Date.now(); | ||||||||||||||||||||||||||||||||||||||
| * const result = await model.generateContent("Hello"); | ||||||||||||||||||||||||||||||||||||||
| * monitor.recordSuccess(Date.now() - startTime); | ||||||||||||||||||||||||||||||||||||||
| * } catch (error) { | ||||||||||||||||||||||||||||||||||||||
| * monitor.recordFailure(error); | ||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||
| * monitor.recordFailure(error); | |
| * monitor.recordFailure(error as Error); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The maxResponseTimesSaved is hardcoded, but the pull request description mentions it's configurable. Making this a constructor option would align with the description and make the utility more flexible. This change would also require updating how HealthMonitor is instantiated in the tests.
| private readonly maxResponseTimesSaved = 100; | |
| private readonly maxResponseTimesSaved: number; | |
| constructor(options?: { maxResponseTimesSaved?: number }) { | |
| this.maxResponseTimesSaved = options?.maxResponseTimesSaved ?? 100; | |
| } |
Copilot
AI
Nov 6, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Magic number 100 is used without explanation. Consider making this a configurable parameter in the constructor or a named constant with documentation explaining why this limit exists (e.g., memory management, focus on recent performance).
| private readonly maxResponseTimesSaved = 100; | |
| /** | |
| /** | |
| * The maximum number of recent response times to keep in memory. | |
| * This helps limit memory usage and focuses metrics on recent performance. | |
| */ | |
| private readonly maxResponseTimesSaved: number; | |
| /** | |
| * Create a new HealthMonitor. | |
| * @param maxResponseTimesSaved - Maximum number of recent response times to keep (default: 100). | |
| */ | |
| constructor(maxResponseTimesSaved: number = 100) { | |
| this.maxResponseTimesSaved = maxResponseTimesSaved; | |
| } | |
| /** |
Copilot
AI
Nov 6, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing input validation for the responseTime parameter. Negative values or non-finite numbers (NaN, Infinity) could corrupt the average response time calculation. Consider adding validation to ensure responseTime is a positive finite number.
Copilot
AI
Nov 6, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The error message is only stored when an error type is first encountered. When the same error type occurs again (lines 98-100), the message is not updated. This means if subsequent errors of the same type have different messages, only the first message will be retained, which could be misleading when debugging.
| existing.lastOccurrence = Date.now(); | |
| existing.lastOccurrence = Date.now(); | |
| existing.message = error.message; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This method calls Date.now() in multiple places. To ensure a consistent timestamp for a single failure event and for a minor performance improvement, it's better to call Date.now() once at the beginning of the method and reuse the value.
const now = Date.now();
this.lastRequestTimestamp = now;
const errorType = error.constructor.name;
const existing = this.errorMap.get(errorType);
if (existing) {
existing.count++;
existing.lastOccurrence = now;
} else {
this.errorMap.set(errorType, {
type: errorType,
count: 1,
lastOccurrence: now,
message: error.message,
});
}
Copilot
AI
Nov 6, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The maxResponseTimesSaved limit (100) could lead to inaccurate average response time calculations when there are more than 100 successful requests. The average will only reflect the last 100 requests, which may not represent the overall performance. Consider either documenting this sliding window behavior in the getHealthMetrics() documentation, or renaming averageResponseTime to something like recentAverageResponseTime to make the behavior clear.
Copilot
AI
Nov 6, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The average response time is recalculated from scratch every time getHealthMetrics() is called by iterating through all stored response times. This is inefficient, especially if getHealthMetrics() is called frequently. Consider maintaining a running sum that's updated in recordSuccess() and reset() to compute the average in O(1) time instead of O(n).
Copilot
AI
Nov 6, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing input validation for the threshold parameter. Values outside the range 0-100 don't make sense for a percentage threshold and could lead to unexpected behavior. Consider validating that the threshold is within a reasonable range (e.g., 0-100).
| isHealthy(threshold: number = 95): boolean { | |
| isHealthy(threshold: number = 95): boolean { | |
| if (threshold < 0 || threshold > 100) { | |
| throw new RangeError("Threshold must be between 0 and 100 (inclusive)."); | |
| } |
Copilot
AI
Nov 6, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Magic number 95 is used as the default threshold without a clear explanation. Consider defining this as a named constant like DEFAULT_HEALTH_THRESHOLD to improve code readability and make it easier to maintain consistent thresholds across the codebase.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Calling getHealthMetrics() here is inefficient because it calculates all metrics, including the average response time which involves iterating over an array. Since this method only needs the success rate, you can improve performance by calculating it directly.
const successRate = (this.successfulRequests / this.totalRequests) * 100;
return successRate >= threshold;
Copilot
AI
Nov 6, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
getHealthMetrics() is called twice - once on line 168 and indirectly again on line 169 through isHealthy(). This is inefficient as it recalculates all metrics twice. Consider storing the metrics result and using it to determine the status: const status = metrics.successRate >= 95 ? "HEALTHY" : "DEGRADED";
| const status = this.isHealthy() ? "HEALTHY" : "DEGRADED"; | |
| const status = metrics.successRate >= 95 ? "HEALTHY" : "DEGRADED"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using += for string building, especially with conditional parts, can be hard to read and maintain. Refactoring to build an array of strings and then joining them can improve both readability and performance.
const reportParts = [
`Health Status: ${status}`,
`Total Requests: ${metrics.totalRequests}`,
`Success Rate: ${metrics.successRate}%`,
`Average Response Time: ${metrics.averageResponseTime}ms`,
];
if (metrics.errors.length > 0) {
reportParts.push('');
reportParts.push('Errors:');
metrics.errors.forEach(error => {
reportParts.push(` - ${error.type}: ${error.count} occurrence(s)`);
});
}
return reportParts.join('\n') + '\n';
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Testing time-dependent logic by capturing
Date.now()before and after an operation can lead to flaky tests, as it depends on the execution speed of the code. A more robust approach is to mock date and time. With Jest, you can usejest.useFakeTimers()andjest.spyOn(Date, 'now')to control the value returned byDate.now()and make your test assertions deterministic.