diff --git a/docs-site/src/content/docs/reference/cli-reference.md b/docs-site/src/content/docs/reference/cli-reference.md index 2283f10..5475673 100644 --- a/docs-site/src/content/docs/reference/cli-reference.md +++ b/docs-site/src/content/docs/reference/cli-reference.md @@ -19,8 +19,9 @@ awf [options] -- | Option | Type | Default | Description | |--------|------|---------|-------------| -| `--allow-domains ` | string | — | Comma-separated list of allowed domains (required unless `--allow-domains-file` used) | +| `--allow-domains ` | string | — | Comma-separated list of allowed domains (required unless `--allow-domains-file` or `--ruleset-file` used) | | `--allow-domains-file ` | string | — | Path to file containing allowed domains | +| `--ruleset-file ` | string | — | Path to YAML rule file (repeatable) | | `--log-level ` | string | `info` | Logging verbosity: `debug`, `info`, `warn`, `error` | | `--keep-containers` | flag | `false` | Keep containers running after command exits | | `--tty` | flag | `false` | Allocate pseudo-TTY for interactive tools | @@ -54,6 +55,14 @@ Path to file with allowed domains. Supports comments (`#`) and one domain per li --allow-domains-file ./allowed-domains.txt ``` +### `--ruleset-file ` + +Load allowed domains from a YAML rule file. Can be specified multiple times to merge rule sets. + +```bash +--ruleset-file ./awf-rules.yaml --ruleset-file ./project-rules.yaml +``` + ### `--log-level ` Set logging verbosity. diff --git a/src/cli.ts b/src/cli.ts index 0bf2e75..9d94665 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -24,6 +24,7 @@ import { redactSecrets } from './redact-secrets'; import { validateDomainOrPattern } from './domain-patterns'; import { OutputFormat } from './types'; import { version } from '../package.json'; +import { loadRuleSet, mergeRuleSets } from './rules'; /** * Parses a comma-separated list of domains into an array of trimmed, non-empty domain strings @@ -309,6 +310,12 @@ program '--allow-domains-file ', 'Path to file containing allowed domains (one per line or comma-separated, supports # comments)' ) + .option( + '--ruleset-file ', + 'Path to YAML rule configuration file (can be specified multiple times)', + (value, previous: string[] = []) => [...previous, value], + [] + ) .option( '--log-level ', 'Log level: debug, info, warn, error', @@ -419,7 +426,7 @@ program logger.setLevel(logLevel); - // Parse domains from both --allow-domains flag and --allow-domains-file + // Parse domains from --allow-domains, --allow-domains-file, and --ruleset-file let allowedDomains: string[] = []; // Parse domains from command-line flag if provided @@ -438,9 +445,25 @@ program } } + // Parse domains from YAML ruleset files (can be multiple) + if (options.rulesetFile?.length) { + try { + const ruleSets = options.rulesetFile.map((filePath: string) => loadRuleSet(filePath)); + const ruleDomains = mergeRuleSets(ruleSets); + allowedDomains.push(...ruleDomains); + } catch (error) { + logger.error( + `Failed to load ruleset: ${error instanceof Error ? error.message : String(error)}` + ); + process.exit(1); + } + } + // Ensure at least one domain is specified if (allowedDomains.length === 0) { - logger.error('At least one domain must be specified with --allow-domains or --allow-domains-file'); + logger.error( + 'At least one domain must be specified with --allow-domains, --allow-domains-file, or --ruleset-file' + ); process.exit(1); } diff --git a/src/rules.test.ts b/src/rules.test.ts new file mode 100644 index 0000000..a60e0c6 --- /dev/null +++ b/src/rules.test.ts @@ -0,0 +1,77 @@ +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; +import { loadRuleSet, mergeRuleSets, RuleSet } from './rules'; + +describe('rules', () => { + let tempDir: string; + + beforeEach(() => { + tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'awf-rules-')); + }); + + afterEach(() => { + if (fs.existsSync(tempDir)) { + fs.rmSync(tempDir, { recursive: true, force: true }); + } + }); + + it('loads a valid ruleset file', () => { + const filePath = path.join(tempDir, 'rules.yaml'); + fs.writeFileSync( + filePath, + [ + 'version: 1', + 'rules:', + ' - domain: github.com', + ' subdomains: true', + ' - domain: api.github.com', + ].join('\n') + ); + + const ruleSet = loadRuleSet(filePath); + + expect(ruleSet).toEqual({ + version: 1, + rules: [ + { domain: 'github.com', subdomains: true }, + { domain: 'api.github.com' }, + ], + }); + }); + + it('throws on unsupported fields', () => { + const filePath = path.join(tempDir, 'rules.yaml'); + fs.writeFileSync( + filePath, + ['version: 1', 'rules:', ' - url: https://github.com/githubnext/*'].join('\n') + ); + + expect(() => loadRuleSet(filePath)).toThrow('Unsupported rule fields'); + }); + + it('throws on invalid version', () => { + const filePath = path.join(tempDir, 'rules.yaml'); + fs.writeFileSync(filePath, ['version: 2', 'rules:', ' - domain: github.com'].join('\n')); + + expect(() => loadRuleSet(filePath)).toThrow('Invalid ruleset version'); + }); + + it('throws when domain is missing', () => { + const filePath = path.join(tempDir, 'rules.yaml'); + fs.writeFileSync(filePath, ['version: 1', 'rules:', ' - subdomains: true'].join('\n')); + + expect(() => loadRuleSet(filePath)).toThrow('must include a non-empty "domain"'); + }); + + it('merges multiple rule sets', () => { + const sets: RuleSet[] = [ + { version: 1, rules: [{ domain: 'github.com' }, { domain: 'api.github.com' }] }, + { version: 1, rules: [{ domain: 'npmjs.org', subdomains: true }] }, + ]; + + const domains = mergeRuleSets(sets); + + expect(domains).toEqual(['github.com', 'api.github.com', 'npmjs.org']); + }); +}); diff --git a/src/rules.ts b/src/rules.ts new file mode 100644 index 0000000..cb8dd84 --- /dev/null +++ b/src/rules.ts @@ -0,0 +1,78 @@ +import * as fs from 'fs'; +import { load } from 'js-yaml'; + +export interface Rule { + domain?: string; + subdomains?: boolean; + url?: string; // Future: requires TLS inspection + methods?: string[]; // Future: requires TLS inspection +} + +export interface RuleSet { + version: number; + rules: Rule[]; +} + +function validateRuleSet(ruleSet: unknown, source: string): RuleSet { + if (!ruleSet || typeof ruleSet !== 'object') { + throw new Error(`Invalid ruleset in ${source}: expected an object`); + } + + const { version, rules } = ruleSet as Record; + + if (version !== 1) { + throw new Error(`Invalid ruleset version in ${source}: expected version 1`); + } + + if (!Array.isArray(rules)) { + throw new Error(`Invalid ruleset in ${source}: "rules" must be an array`); + } + + const sanitizedRules = rules.map((rule, index) => { + if (!rule || typeof rule !== 'object') { + throw new Error(`Invalid rule at index ${index} in ${source}: expected an object`); + } + + const { domain, subdomains, url, methods } = rule as Rule & Record; + + if (url !== undefined || methods !== undefined) { + throw new Error( + `Unsupported rule fields in ${source} (rule ${index + 1}): only "domain" and "subdomains" are supported` + ); + } + + if (typeof domain !== 'string' || domain.trim() === '') { + throw new Error(`Rule ${index + 1} in ${source} must include a non-empty "domain" string`); + } + + if (subdomains !== undefined && typeof subdomains !== 'boolean') { + throw new Error(`Rule ${index + 1} in ${source} has invalid "subdomains" value (must be boolean)`); + } + + return { + domain: domain.trim(), + subdomains, + }; + }); + + return { version: 1, rules: sanitizedRules }; +} + +export function loadRuleSet(filePath: string): RuleSet { + if (!fs.existsSync(filePath)) { + throw new Error(`Ruleset file not found: ${filePath}`); + } + + const content = fs.readFileSync(filePath, 'utf-8'); + const parsed = load(content); + + return validateRuleSet(parsed, filePath); +} + +export function mergeRuleSets(sets: RuleSet[]): string[] { + return sets.flatMap(set => + set.rules + .map(rule => rule.domain) + .filter((domain): domain is string => typeof domain === 'string') + ); +}