|
| 1 | +/** |
| 2 | + * Shared hygiene checks for CI-injected values (GitHub secrets and builds.yml env values). |
| 3 | + * |
| 4 | + * The goal is to catch operator mistakes (trailing newline, Windows line endings, |
| 5 | + * invisible characters pasted from rich text editors, etc.) at build time, before |
| 6 | + * malformed values end up baked into a production binary. |
| 7 | + * |
| 8 | + * Intentionally format-agnostic: it does not try to understand whether a value |
| 9 | + * is a URL, base64 blob, JWT, etc. It only enforces generic hygiene. |
| 10 | + * |
| 11 | + * Usage: |
| 12 | + * const { checkValue } = require('./lib/validate-value'); |
| 13 | + * const issues = checkValue('MM_SENTRY_DSN', value); |
| 14 | + * // issues is an array; empty => value is clean. |
| 15 | + * |
| 16 | + * Output contract: issue messages MUST NOT include the value itself or any |
| 17 | + * substring of it. Only the length, offsets, and character code points are |
| 18 | + * safe to surface. |
| 19 | + */ |
| 20 | + |
| 21 | +/* global Buffer */ |
| 22 | + |
| 23 | +// eslint-disable-next-line no-misleading-character-class -- intentional set of invisible code points (ZWSP/ZWNJ/ZWJ/BOM) |
| 24 | +const ZERO_WIDTH_CHARS = /[\u200B\u200C\u200D\uFEFF]/; |
| 25 | + |
| 26 | +// C0/C1 control chars, excluding tab (\u0009), line feed (\u000A), and |
| 27 | +// carriage return (\u000D). CR is reported separately with a friendlier |
| 28 | +// message. LF is allowed mid-value for multi-line secrets (PEM keys, base64). |
| 29 | +/* eslint-disable no-control-regex -- intentionally matches control characters to flag them */ |
| 30 | +const CONTROL_CHARS = |
| 31 | + /[\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F-\u009F]/; |
| 32 | +/* eslint-enable no-control-regex */ |
| 33 | + |
| 34 | +function formatCodePoint(ch) { |
| 35 | + return `U+${ch.charCodeAt(0).toString(16).padStart(4, '0').toUpperCase()}`; |
| 36 | +} |
| 37 | + |
| 38 | +/** |
| 39 | + * @param {string} name - Identifier to report in violations (e.g. secret name or env key). |
| 40 | + * @param {unknown} value - The value to check. Non-string values are coerced via String(). |
| 41 | + * @param {object} [options] |
| 42 | + * @param {boolean} [options.allowEmpty=false] - If true, an empty string is not a violation. |
| 43 | + * Whitespace-only strings still fail even when allowEmpty is true (they indicate a typo). |
| 44 | + * @returns {{ code: string, message: string }[]} - One entry per distinct violation; empty array means clean. |
| 45 | + */ |
| 46 | +function checkValue(name, value, options = {}) { |
| 47 | + const { allowEmpty = false } = options; |
| 48 | + const violations = []; |
| 49 | + |
| 50 | + /** |
| 51 | + * `missing`: the value is `undefined` or `null`. For secrets, this means the |
| 52 | + * referenced GitHub Environment secret was never set (or the Environment |
| 53 | + * itself is misconfigured). For YAML env entries, it usually means a key |
| 54 | + * like `FOO:` was written with no value, which js-yaml parses as `null`. |
| 55 | + * Short-circuits: nothing else can be checked without a value. |
| 56 | + */ |
| 57 | + if (value === undefined || value === null) { |
| 58 | + violations.push({ |
| 59 | + code: 'missing', |
| 60 | + message: `${name}: value is null or not defined`, |
| 61 | + }); |
| 62 | + return violations; |
| 63 | + } |
| 64 | + |
| 65 | + const str = String(value); |
| 66 | + const len = Buffer.byteLength(str, 'utf8'); |
| 67 | + |
| 68 | + /** |
| 69 | + * `empty`: the value is the empty string `""`. Skipped when the caller |
| 70 | + * passes `{ allowEmpty: true }`, used for intentionally-empty YAML entries |
| 71 | + * such as optional allowlists (e.g. `MM_PERPS_HIP3_ALLOWLIST_MARKETS: ''`). |
| 72 | + * Short-circuits: the remaining checks don't apply to an empty string. |
| 73 | + */ |
| 74 | + if (str === '') { |
| 75 | + if (!allowEmpty) { |
| 76 | + violations.push({ |
| 77 | + code: 'empty', |
| 78 | + message: `${name}: value is an empty string`, |
| 79 | + }); |
| 80 | + } |
| 81 | + return violations; |
| 82 | + } |
| 83 | + |
| 84 | + /** |
| 85 | + * `whitespace_only`: the value is non-empty but contains nothing except |
| 86 | + * whitespace. Almost always a typo (e.g. someone pasted a single space |
| 87 | + * into the GitHub Secret UI). Fails even with `allowEmpty: true`, because |
| 88 | + * "intentionally empty" should be `""`, not `" "`. |
| 89 | + * Short-circuits: the value has no meaningful content to inspect further. |
| 90 | + */ |
| 91 | + if (str.trim() === '') { |
| 92 | + violations.push({ |
| 93 | + code: 'whitespace_only', |
| 94 | + message: `${name}: value is whitespace-only (${len} bytes)`, |
| 95 | + }); |
| 96 | + return violations; |
| 97 | + } |
| 98 | + |
| 99 | + /** |
| 100 | + * `leading_whitespace`: the value begins with whitespace (space, tab, LF, |
| 101 | + * etc.). Accidental leading whitespace breaks URL parsing, base64 decoding, |
| 102 | + * and token comparisons — almost never intentional. |
| 103 | + */ |
| 104 | + if (/^\s/.test(str)) { |
| 105 | + violations.push({ |
| 106 | + code: 'leading_whitespace', |
| 107 | + message: `${name}: value has leading whitespace (${len} bytes total)`, |
| 108 | + }); |
| 109 | + } |
| 110 | + |
| 111 | + /** |
| 112 | + * `trailing_whitespace`: the value ends with whitespace. This is the |
| 113 | + * single most common real-world paste mistake — e.g. copying a token from |
| 114 | + * a terminal or editor ends up including the trailing `\n`. It's the |
| 115 | + * specific failure mode this entire module was built to catch. |
| 116 | + */ |
| 117 | + if (/\s$/.test(str)) { |
| 118 | + violations.push({ |
| 119 | + code: 'trailing_whitespace', |
| 120 | + message: `${name}: value has trailing whitespace (${len} bytes total); a trailing newline pasted from a terminal or editor is the most common cause`, |
| 121 | + }); |
| 122 | + } |
| 123 | + |
| 124 | + /** |
| 125 | + * `carriage_return`: the value contains `\r` anywhere. CR is not part of |
| 126 | + * the base64 alphabet, PEM uses LF, and no sanctioned secret format needs |
| 127 | + * it — its presence is essentially always an artifact of Windows CRLF line |
| 128 | + * endings surviving a copy-paste. Reported separately from `control_chars` |
| 129 | + * so the remediation message can specifically call out Windows endings. |
| 130 | + */ |
| 131 | + const crIndex = str.indexOf('\r'); |
| 132 | + if (crIndex !== -1) { |
| 133 | + violations.push({ |
| 134 | + code: 'carriage_return', |
| 135 | + message: `${name}: value contains a carriage return (\\r) at offset ${crIndex}/${len}; strip Windows line endings before saving the secret`, |
| 136 | + }); |
| 137 | + } |
| 138 | + |
| 139 | + /** |
| 140 | + * `nul_byte`: the value contains `\u0000`. Never legitimate in any secret |
| 141 | + * format we use. A NUL byte can terminate strings prematurely in C-based |
| 142 | + * tooling and is a classic source of silent truncation bugs. |
| 143 | + */ |
| 144 | + const nulIndex = str.indexOf('\u0000'); |
| 145 | + if (nulIndex !== -1) { |
| 146 | + violations.push({ |
| 147 | + code: 'nul_byte', |
| 148 | + message: `${name}: value contains a NUL byte at offset ${nulIndex}/${len}`, |
| 149 | + }); |
| 150 | + } |
| 151 | + |
| 152 | + /** |
| 153 | + * `control_chars`: the value contains any other C0/C1 control character |
| 154 | + * (range defined in CONTROL_CHARS — excludes tab, LF, and CR, which are |
| 155 | + * allowed or reported separately). Hits here usually indicate non-text |
| 156 | + * binary data was pasted as if it were a string, or a stray escape |
| 157 | + * sequence. The message includes the specific code point (e.g. U+0007) |
| 158 | + * so operators can identify what was pasted. |
| 159 | + */ |
| 160 | + const ctrlMatch = CONTROL_CHARS.exec(str); |
| 161 | + if (ctrlMatch) { |
| 162 | + violations.push({ |
| 163 | + code: 'control_chars', |
| 164 | + message: `${name}: value contains control character ${formatCodePoint(ctrlMatch[0])} at offset ${ctrlMatch.index}/${len}`, |
| 165 | + }); |
| 166 | + } |
| 167 | + |
| 168 | + /** |
| 169 | + * `zero_width`: the value contains an invisible Unicode character — |
| 170 | + * zero-width space (U+200B), zero-width non-joiner (U+200C), zero-width |
| 171 | + * joiner (U+200D), or byte-order mark (U+FEFF). These are introduced when |
| 172 | + * copying from rich-text sources (Google Docs, Slack, Notion, Confluence) |
| 173 | + * and are invisible to the human eye but break exact-match comparisons |
| 174 | + * and break formats like base64 that only accept a strict alphabet. |
| 175 | + */ |
| 176 | + const zwMatch = ZERO_WIDTH_CHARS.exec(str); |
| 177 | + if (zwMatch) { |
| 178 | + violations.push({ |
| 179 | + code: 'zero_width', |
| 180 | + message: `${name}: value contains invisible character ${formatCodePoint(zwMatch[0])} at offset ${zwMatch.index}/${len}; likely pasted from a rich text source`, |
| 181 | + }); |
| 182 | + } |
| 183 | + |
| 184 | + return violations; |
| 185 | +} |
| 186 | + |
| 187 | +module.exports = { checkValue }; |
0 commit comments