Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

None add mem stats and improve logging #2425

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
"minimatch": "^5.0.1",
"moment": "^2.29.4",
"moo": "^0.5.0",
"node-oom-heapdump": "^3.0.3",
"node_extra_ca_certs_mozilla_bundle": "^1.0.5",
"optional-require": "^1.1.8",
"pg": "^8.7.3",
Expand Down
2 changes: 2 additions & 0 deletions src/config/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ type Transforms<T, K extends keyof T = keyof T> = {
const transforms: Transforms<EnvVars> = {
MICROS_ENV: (value?: string) => EnvironmentEnum[value || EnvironmentEnum.development],
MICROS_GROUP: (value?: string) => value || "",
MICROS_INSTANCE_ID: (value?: string) => value || "",
NODE_ENV: () => nodeEnv,
PROXY: () => {
const proxyHost = process.env.EXTERNAL_ONLY_PROXY_HOST;
Expand Down Expand Up @@ -95,6 +96,7 @@ export interface EnvVars {
MICROS_ENVTYPE: MicrosEnvTypeEnum | undefined,
MICROS_SERVICE_VERSION?: string;
MICROS_GROUP: string;
MICROS_INSTANCE_ID: string;
SQS_BACKFILL_QUEUE_URL: string;
SQS_BACKFILL_QUEUE_REGION: string;
SQS_PUSH_QUEUE_URL: string;
Expand Down
1 change: 0 additions & 1 deletion src/config/feature-flags.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ export enum BooleanFlags {
LOG_CURLV_OUTPUT = "log-curlv-output",
SKIP_REQUESTED_REVIEWERS = "skip-requested-reviewers",
ENABLE_SUBSCRIPTION_DEFERRED_INSTALL = "enable-subscription-deferred-install",
EARLY_EXIT_ON_VALIDATION_FAILED = "early-exit-on-validation-failed",
ENABLE_CONNECTED_REPOS_VIEW="enable-connected-repos-view",
USE_REST_API_FOR_DISCOVERY = "use-rest-api-for-discovery-again",
ENABLE_GENERIC_CONTAINERS = "enable-generic-containers",
Expand Down
13 changes: 13 additions & 0 deletions src/routes/api/api-router.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,19 @@ describe("API Router", () => {

});

describe("kill-worker-with-oom", () => {
it("should return 200", () => {
return supertest(app)
.post("/api/kill-worker-with-oom?arraySize=4000000000&nIter=4000000000")
.set("host", "127.0.0.1")
.set("X-Slauth-Mechanism", "slauthtoken")
.expect(200)
.then((response) => {
expect(response.body).toEqual({ data: 14 });
});
});
});

describe("Ping", () => {

it("Should fail on missing url", () => {
Expand Down
22 changes: 22 additions & 0 deletions src/routes/api/api-router.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,28 @@ ApiRouter.post("/recrypt", ApiRecryptPost);

ApiRouter.post("/ping", ApiPingPost);

/**
* Query params to kill it for sure:
* arraySize=4000000000&nIter=4000000000
*
*/
const KillWorkerWithOom = (req: Request, res: Response) => {
const nIter = parseInt(req.query?.nIter?.toString() || "0");
const arraySize = parseInt(req.query?.arraySize?.toString() || "10");
const allocate = (iter) => {
const arr = new Array(arraySize).fill(`${Math.random()} This is a test string. ${Math.random()}`);

if (iter + 1 < nIter) {
const anotherOne = allocate(iter + 1);
return arr.concat(anotherOne);
}
return arr;
};
res.json({ data: allocate(0).length });
};

ApiRouter.post("/kill-worker-with-oom", KillWorkerWithOom);

// TODO: remove once move to DELETE /:installationId/:jiraHost
ApiRouter.delete(
"/deleteInstallation/:installationId/:jiraHost/github-app-id/:gitHubAppId",
Expand Down
8 changes: 2 additions & 6 deletions src/routes/api/api-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import Logger from "bunyan";
import { NextFunction, Request, Response } from "express";
import { validationResult } from "express-validator";
import { getLogger } from "config/logger";
import { BooleanFlags, booleanFlag } from "config/feature-flags";

// TODO: add tests
type SerializedSubscription = Pick<Subscription, "gitHubInstallationId" | "jiraHost" | "createdAt" | "updatedAt" | "syncStatus">;
Expand Down Expand Up @@ -48,11 +47,8 @@ export const returnOnValidationError = async (
if (!errors.isEmpty()) {
res.status(422).json({ errors: errors.array() });

if (await booleanFlag(BooleanFlags.EARLY_EXIT_ON_VALIDATION_FAILED)) {
(req.log || getLogger("requestValidator")).warn({ errors: errors.array(), paramUuid: req.params?.uuid }, "Fail on validator request, skip with 422");
return;
}

(req.log || getLogger("requestValidator")).warn({ errors: errors.array(), paramUuid: req.params?.uuid }, "Fail on validator request, skip with 422");
return;
}
next();
};
6 changes: 4 additions & 2 deletions src/routes/error-router.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { metricError } from "config/metric-names";
import { v4 as uuidv4 } from "uuid";
import { getCloudOrServerFromGitHubAppId } from "utils/get-cloud-or-server";
import { createUrlWithQueryString } from "utils/create-url-with-query-string";
import { getLogger } from "config/logger";

export const attachErrorHandler = (router: Router) => {

Expand Down Expand Up @@ -46,8 +47,9 @@ export const attachErrorHandler = (router: Router) => {
// Error and Catch all route - Handle anything that's missing or has throw an error
const catchAllMiddleware = (err: Error, req: Request, res: Response, next: NextFunction) => {
const errorReference = uuidv4();

req.log.error({ payload: req.body, errorReference, err, req, res }, "Error in frontend");
const log = req.log || getLogger("catchAllMiddleware");
// Inject atl-traceid here as well because the call might fail before we reach frontend-log-middleware
log.error({ payload: req.body, errorReference, err, req, res, atlTraceId: req.headers["atl-traceid"] }, "Error in frontend");

if (!isNodeProd() && res.locals.showError) {
return next(err);
Expand Down
1 change: 1 addition & 0 deletions src/routes/github/webhook/webhook-receiver-post.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ export const WebhookReceiverPost = async (request: Request, response: Response):
...extraLoggerInfo(payload, parentLogger)
});
logger.info("Webhook received");

let webhookContext;
try {
const { webhookSecrets, gitHubServerApp } = await getWebhookSecrets(uuid);
Expand Down
15 changes: 15 additions & 0 deletions src/util/logger-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import safeJsonStringify from "safe-json-stringify";
import bformat from "bunyan-format";
import { createHashWithSharedSecret } from "utils/encryption";
import { isNodeDev } from "utils/is-node-env";
import Logger from "bunyan";

const SENSITIVE_DATA_FIELDS = ["jiraHost", "orgName", "repoName", "userGroup", "userGroup", "aaid", "username", "prTitle", "prRef", "owner", "description", "repo"];
// For any Micros env we want the logs to be in JSON format.
Expand Down Expand Up @@ -69,3 +70,17 @@ export class SafeRawLogStream extends RawLogStream {
return recordClone;
}
}

const formatMemoryUsage = (data) => `${Math.round(data / 1024 / 1024 * 100) / 100} MB`;

export const logMemoryUsage = (logger: Logger, point: string) => {
const memoryData = process.memoryUsage();
const memStats = {
rss: formatMemoryUsage(memoryData.rss),
heapTotal: formatMemoryUsage(memoryData.heapTotal),
heapUsed: formatMemoryUsage(memoryData.heapUsed),
external: formatMemoryUsage(memoryData.external),
arrayBuffers: formatMemoryUsage(memoryData.arrayBuffers)
};
logger.info({ memStats, point }, `Logging memStats at ${point}`);
};
42 changes: 36 additions & 6 deletions src/util/workers-health-monitor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,29 @@ import Logger from "bunyan";
import cluster from "cluster";
import { exec } from "child_process";
import { logInfoSampled } from "utils/log-sampled";
import * as nodeOomHeapdump from "node-oom-heapdump";
import * as fs from "fs";
import * as path from "path";
import { envVars } from "config/env";

const heapdumpsDir = path.join("/tmp", "heapdumps");

if (!fs.existsSync(heapdumpsDir)){
fs.mkdirSync(heapdumpsDir, { recursive: true });
}

const generateHeapDumpFilePath = (pid: number) => {
return path.resolve(heapdumpsDir, `heapdump_${envVars.MICROS_INSTANCE_ID}_${pid}`);
};

const CONF_SHUTDOWN_MSG = "shutdown";

nodeOomHeapdump({
heapdumpOnOOM: true,

path: generateHeapDumpFilePath(process.pid)
});

export const startMonitorOnWorker = (parentLogger: Logger, iAmAliveInervalMsec: number) => {
const logger = parentLogger.child({ isWorker: true });
logger.info({ iAmAliveInervalMsec }, "worker config");
Expand Down Expand Up @@ -56,7 +77,8 @@ export const startMonitorOnMaster = (parentLogger: Logger, config: {
const liveWorkers: Record<string, number> = { }; // pid => timestamp

const registerNewWorkers = () => {
logger.info(`registering workers`);
logInfoSampled(logger, "monRegWorkers", `registering workers`, 100);

for (const worker of Object.values(cluster.workers)) {
if (worker) {
const workerPid = worker.process.pid;
Expand All @@ -69,7 +91,7 @@ export const startMonitorOnMaster = (parentLogger: Logger, config: {
});
worker.on("exit", (code, signal) => {
if (signal) {
logger.warn(`worker was killed by signal: ${signal}`);
logger.warn(`worker was killed by signal: ${signal}, code=${code}`);
} else if (code !== 0) {
logger.warn(`worker exited with error code: ${code}`);
} else {
Expand All @@ -84,6 +106,13 @@ export const startMonitorOnMaster = (parentLogger: Logger, config: {
let workersReadyAt: undefined | Date;
const areWorkersReady = () => workersReadyAt && workersReadyAt.getTime() < Date.now();
const maybeSetupWorkersReadyAt = () => {
if (areWorkersReady()) {
logInfoSampled(logger, "workersReadyNothingToDo", "all workers are considered ready, workersReadyAt", 100);
return ;
}

logRunningProcesses(logger);

if (!workersReadyAt) {
if (Object.keys(registeredWorkers).length > config.numberOfWorkersThreshold) {
workersReadyAt = new Date(Date.now() + config.workerStartupTimeMsecs);
Expand All @@ -94,7 +123,7 @@ export const startMonitorOnMaster = (parentLogger: Logger, config: {
} else {
logger.info({
workersReadyAt
}, "workersReadyAt is defined");
}, `workersReadyAt is defined, idling during ${config.workerStartupTimeMsecs} msecs`);
}
};

Expand All @@ -111,6 +140,7 @@ export const startMonitorOnMaster = (parentLogger: Logger, config: {
keysToKill.forEach((key) => {
logger.info(`remove worker with pid=${key} from live workers`);
delete liveWorkers[key];
logRunningProcesses(logger);
});
} else {
logger.warn("workers are not ready yet, skip removing logic");
Expand All @@ -126,11 +156,12 @@ export const startMonitorOnMaster = (parentLogger: Logger, config: {
for (const worker of Object.values(cluster.workers)) {
worker?.send(CONF_SHUTDOWN_MSG);
}
logRunningProcesses(logger);
} else {
logger.info({
logInfoSampled(logger.child({
areWorkersReady: areWorkersReady(),
nLiveWorkers
}, "not sending shutdown signal");
}), "notSendingSignal", "not sending shutdown signal", 100);
}
};

Expand All @@ -139,6 +170,5 @@ export const startMonitorOnMaster = (parentLogger: Logger, config: {
maybeSetupWorkersReadyAt();
maybeRemoveDeadWorkers();
maybeSendShutdownToAllWorkers();
logRunningProcesses(logger);
}, config.pollIntervalMsecs);
};
57 changes: 56 additions & 1 deletion yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,21 @@
semver "^7.3.5"
tar "^6.1.11"

"@mapbox/node-pre-gyp@^1.0.11":
version "1.0.11"
resolved "https://registry.yarnpkg.com/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz#417db42b7f5323d79e93b34a6d7a2a12c0df43fa"
integrity sha512-Yhlar6v9WQgUp/He7BdgzOz8lqMQ8sU+jkCq7Wx8Myc5YFJLbEe7lgui/V7G1qB1DJykHSGwreceSaD60Y0PUQ==
dependencies:
detect-libc "^2.0.0"
https-proxy-agent "^5.0.0"
make-dir "^3.1.0"
node-fetch "^2.6.7"
nopt "^5.0.0"
npmlog "^5.0.1"
rimraf "^3.0.2"
semver "^7.3.5"
tar "^6.1.11"

"@nodelib/[email protected]":
version "2.1.5"
resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5"
Expand Down Expand Up @@ -1834,7 +1849,7 @@ binary-extensions@^2.0.0:
resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-2.2.0.tgz#75f502eeaf9ffde42fc98829645be4ea76bd9e2d"
integrity sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==

bindings@^1.3.0:
bindings@^1.3.0, bindings@^1.5.0:
version "1.5.0"
resolved "https://registry.yarnpkg.com/bindings/-/bindings-1.5.0.tgz#10353c9e945334bc0511a6d90b38fbc7c9c504df"
integrity sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==
Expand Down Expand Up @@ -2128,6 +2143,14 @@ chownr@^2.0.0:
resolved "https://registry.npmjs.org/chownr/-/chownr-2.0.0.tgz#15bfbe53d2eab4cf70f18a8cd68ebe5b3cb1dece"
integrity sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ==

chrome-remote-interface@^0.33.0:
version "0.33.0"
resolved "https://registry.yarnpkg.com/chrome-remote-interface/-/chrome-remote-interface-0.33.0.tgz#9140b5612ee5cdc39212cd0296d3b61ea881c47a"
integrity sha512-tv/SgeBfShXk43fwFpQ9wnS7mOCPzETnzDXTNxCb6TqKOiOeIfbrJz+2NAp8GmzwizpKa058wnU1Te7apONaYg==
dependencies:
commander "2.11.x"
ws "^7.2.0"

ci-info@^3.2.0:
version "3.3.0"
resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-3.3.0.tgz#b4ed1fb6818dea4803a55c623041f9165d2066b2"
Expand Down Expand Up @@ -2248,6 +2271,11 @@ combined-stream@^1.0.8:
dependencies:
delayed-stream "~1.0.0"

[email protected]:
version "2.11.0"
resolved "https://registry.yarnpkg.com/commander/-/commander-2.11.0.tgz#157152fd1e7a6c8d98a5b715cf376df928004563"
integrity sha512-b0553uYA5YAEGgyYIGYROzKQ7X5RAqedkfjiZxwi0kL1g3bOaBNNZfYkzt/CL0umgD5wc9Jec2FbB98CjkMRvQ==

commander@^2.19.0:
version "2.20.3"
resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33"
Expand Down Expand Up @@ -5496,6 +5524,11 @@ nan@^2.13.2, nan@^2.14.0:
resolved "https://registry.yarnpkg.com/nan/-/nan-2.14.2.tgz#f5376400695168f4cc694ac9393d0c9585eeea19"
integrity sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ==

nan@^2.17.0:
version "2.18.0"
resolved "https://registry.yarnpkg.com/nan/-/nan-2.18.0.tgz#26a6faae7ffbeb293a39660e88a76b82e30b7554"
integrity sha512-W7tfG7vMOGtD30sHoZSSc/JVYiyDPEyQVso/Zz+/uQd0B0L46gtC+pHha5FFMRpil6fm/AoEcRWyOVi4+E/f8w==

nanoid@^3.3.6:
version "3.3.6"
resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.6.tgz#443380c856d6e9f9824267d960b4236ad583ea4c"
Expand Down Expand Up @@ -5606,6 +5639,18 @@ node-int64@^0.4.0:
resolved "https://registry.yarnpkg.com/node-int64/-/node-int64-0.4.0.tgz#87a9065cdb355d3182d8f94ce11188b825c68a3b"
integrity sha1-h6kGXNs1XTGC2PlM4RGIuCXGijs=

node-oom-heapdump@^3.0.3:
version "3.0.3"
resolved "https://registry.yarnpkg.com/node-oom-heapdump/-/node-oom-heapdump-3.0.3.tgz#73e336b20266a6aef331700c0e4e34a631517da8"
integrity sha512-0KZNXIAozEmkPyg1LcQdHybwl81NGYjJYQyWByFnBrfJXRI2Qjm72ih2RFIM9lhXdJn3EVb4rFGui2tDkbuPtA==
dependencies:
"@mapbox/node-pre-gyp" "^1.0.11"
bindings "^1.5.0"
chrome-remote-interface "^0.33.0"
nan "^2.17.0"
require-main-filename "^2.0.0"
ws "^8.13.0"

node-releases@^2.0.2:
version "2.0.2"
resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-2.0.2.tgz#7139fe71e2f4f11b47d4d2986aaf8c48699e0c01"
Expand Down Expand Up @@ -8109,11 +8154,21 @@ write-file-atomic@^3.0.0:
signal-exit "^3.0.2"
typedarray-to-buffer "^3.1.5"

ws@^7.2.0:
version "7.5.9"
resolved "https://registry.yarnpkg.com/ws/-/ws-7.5.9.tgz#54fa7db29f4c7cec68b1ddd3a89de099942bb591"
integrity sha512-F+P9Jil7UiSKSkppIiD94dN07AwvFixvLIj1Og1Rl9GGMuNipJnV9JzjD6XuqmAeiswGvUmNLjr5cFuXwNS77Q==

ws@^7.4.6:
version "7.5.7"
resolved "https://registry.yarnpkg.com/ws/-/ws-7.5.7.tgz#9e0ac77ee50af70d58326ecff7e85eb3fa375e67"
integrity sha512-KMvVuFzpKBuiIXW3E4u3mySRO2/mCHSyZDJQM5NQ9Q9KHWHWh0NHgfbRMLLrceUK5qAL4ytALJbpRMjixFZh8A==

ws@^8.13.0:
version "8.14.1"
resolved "https://registry.yarnpkg.com/ws/-/ws-8.14.1.tgz#4b9586b4f70f9e6534c7bb1d3dc0baa8b8cf01e0"
integrity sha512-4OOseMUq8AzRBI/7SLMUwO+FEDnguetSk7KMb1sHwvF2w2Wv5Hoj0nlifx8vtGsftE/jWHojPy8sMMzYLJ2G/A==

[email protected]:
version "1.3.0"
resolved "https://registry.yarnpkg.com/x-xss-protection/-/x-xss-protection-1.3.0.tgz#3e3a8dd638da80421b0e9fff11a2dbe168f6d52c"
Expand Down