|
1 | | -import {describe, expect, it} from 'vitest'; |
| 1 | +import {describe, expect, it, vi} from 'vitest'; |
2 | 2 |
|
3 | | -import {extractExternalLinks, extractIncludePaths} from './index'; |
| 3 | +import type {Run} from '~/commands/build'; |
| 4 | + |
| 5 | +import { |
| 6 | + collectExternalLinksFromYaml, |
| 7 | + collectLinks, |
| 8 | + extractExternalLinks, |
| 9 | + extractIncludePaths, |
| 10 | +} from './index'; |
4 | 11 |
|
5 | 12 | describe('CrawlerManifest feature', () => { |
6 | 13 | describe('extractExternalLinks', () => { |
@@ -217,4 +224,145 @@ plain https://plain.example.com text |
217 | 224 | ); |
218 | 225 | }); |
219 | 226 | }); |
| 227 | + |
| 228 | + describe('collectExternalLinksFromYaml', () => { |
| 229 | + it('extracts external link from href field', () => { |
| 230 | + expect(collectExternalLinksFromYaml('href: https://example.com')).toContain( |
| 231 | + 'https://example.com', |
| 232 | + ); |
| 233 | + }); |
| 234 | + |
| 235 | + it('extracts external link from nested url field', () => { |
| 236 | + expect( |
| 237 | + collectExternalLinksFromYaml('navigation:\n href: https://nav.example.com'), |
| 238 | + ).toContain('https://nav.example.com'); |
| 239 | + }); |
| 240 | + |
| 241 | + it('ignores relative href value', () => { |
| 242 | + expect(collectExternalLinksFromYaml('href: ./relative.md')).toEqual([]); |
| 243 | + }); |
| 244 | + |
| 245 | + it('returns empty on invalid YAML', () => { |
| 246 | + expect(collectExternalLinksFromYaml(': invalid: {unclosed')).toEqual([]); |
| 247 | + }); |
| 248 | + |
| 249 | + it('returns empty when YAML is null', () => { |
| 250 | + expect(collectExternalLinksFromYaml('')).toEqual([]); |
| 251 | + }); |
| 252 | + |
| 253 | + it('returns empty when YAML is a scalar', () => { |
| 254 | + expect(collectExternalLinksFromYaml('just a string')).toEqual([]); |
| 255 | + }); |
| 256 | + }); |
| 257 | + |
| 258 | + describe('stripFencedBlocks (via extractExternalLinks)', () => { |
| 259 | + it('strips unclosed fenced block to end of content', () => { |
| 260 | + expect(extractExternalLinks('```\n[link](https://unclosed.example.com)')).toEqual([]); |
| 261 | + }); |
| 262 | + |
| 263 | + it('tilde fence does not close backtick fence', () => { |
| 264 | + const content = '```\n[inside](https://inside.example.com)\n~~~\nstill inside\n```'; |
| 265 | + |
| 266 | + expect(extractExternalLinks(content)).toEqual([]); |
| 267 | + }); |
| 268 | + |
| 269 | + it('shorter fence does not close longer opening fence', () => { |
| 270 | + const content = '````\n[inside](https://inside.example.com)\n```\nnot closed\n````'; |
| 271 | + |
| 272 | + expect(extractExternalLinks(content)).toEqual([]); |
| 273 | + }); |
| 274 | + |
| 275 | + it('handles CRLF line endings correctly', () => { |
| 276 | + const content = |
| 277 | + '```\r\n[inside](https://inside.example.com)\r\n```\r\n[outside](https://outside.example.com)'; |
| 278 | + const links = extractExternalLinks(content); |
| 279 | + |
| 280 | + expect(links).not.toContain('https://inside.example.com'); |
| 281 | + expect(links).toContain('https://outside.example.com'); |
| 282 | + }); |
| 283 | + |
| 284 | + it('content after closed fence is included', () => { |
| 285 | + const content = |
| 286 | + '```\n[inside](https://inside.example.com)\n```\n[outside](https://outside.example.com)'; |
| 287 | + const links = extractExternalLinks(content); |
| 288 | + |
| 289 | + expect(links).not.toContain('https://inside.example.com'); |
| 290 | + expect(links).toContain('https://outside.example.com'); |
| 291 | + }); |
| 292 | + }); |
| 293 | + |
| 294 | + describe('collectLinks', () => { |
| 295 | + const makeRun = (files: Record<string, string>) => |
| 296 | + ({ |
| 297 | + input: '/input', |
| 298 | + read: vi.fn(async (path: string) => { |
| 299 | + const relative = path.slice('/input/'.length); |
| 300 | + if (relative in files) return files[relative]; |
| 301 | + throw new Error(`File not found: ${path}`); |
| 302 | + }), |
| 303 | + }) as unknown as Run; |
| 304 | + |
| 305 | + it('extracts links from a markdown file', async () => { |
| 306 | + const run = makeRun({'page.md': '[link](https://example.com)'}); |
| 307 | + |
| 308 | + expect(await collectLinks(run, 'page.md')).toContain('https://example.com'); |
| 309 | + }); |
| 310 | + |
| 311 | + it('returns empty array when file cannot be read', async () => { |
| 312 | + const run = makeRun({}); |
| 313 | + |
| 314 | + expect(await collectLinks(run, 'missing.md')).toEqual([]); |
| 315 | + }); |
| 316 | + |
| 317 | + it('returns empty array for already-visited file (circular protection)', async () => { |
| 318 | + const run = makeRun({'page.md': '[link](https://example.com)'}); |
| 319 | + const visited = new Set(['page.md']); |
| 320 | + |
| 321 | + expect(await collectLinks(run, 'page.md', visited)).toEqual([]); |
| 322 | + }); |
| 323 | + |
| 324 | + it('extracts links from YAML file including structured keys', async () => { |
| 325 | + const run = makeRun({ |
| 326 | + 'data.yaml': 'href: https://yaml.example.com', |
| 327 | + }); |
| 328 | + |
| 329 | + expect(await collectLinks(run, 'data.yaml')).toContain('https://yaml.example.com'); |
| 330 | + }); |
| 331 | + |
| 332 | + it('extracts links from .yml file', async () => { |
| 333 | + const run = makeRun({ |
| 334 | + 'data.yml': 'href: https://yml.example.com', |
| 335 | + }); |
| 336 | + |
| 337 | + expect(await collectLinks(run, 'data.yml')).toContain('https://yml.example.com'); |
| 338 | + }); |
| 339 | + |
| 340 | + it('recursively extracts links from included markdown files', async () => { |
| 341 | + const run = makeRun({ |
| 342 | + 'page.md': '{% include [snippet](./_includes/snippet.md) %}', |
| 343 | + '_includes/snippet.md': '[link](https://included.example.com)', |
| 344 | + }); |
| 345 | + |
| 346 | + expect(await collectLinks(run, 'page.md')).toContain('https://included.example.com'); |
| 347 | + }); |
| 348 | + |
| 349 | + it('does not follow includes from YAML files', async () => { |
| 350 | + const run = makeRun({ |
| 351 | + 'data.yaml': 'href: https://yaml.example.com', |
| 352 | + 'snippet.md': '[link](https://shouldnotappear.example.com)', |
| 353 | + }); |
| 354 | + const links = await collectLinks(run, 'data.yaml'); |
| 355 | + |
| 356 | + expect(links).toContain('https://yaml.example.com'); |
| 357 | + expect(links).not.toContain('https://shouldnotappear.example.com'); |
| 358 | + }); |
| 359 | + |
| 360 | + it('skips unreadable include files gracefully', async () => { |
| 361 | + const run = makeRun({ |
| 362 | + 'page.md': '{% include [missing](./missing.md) %}\n[link](https://example.com)', |
| 363 | + }); |
| 364 | + |
| 365 | + expect(await collectLinks(run, 'page.md')).toContain('https://example.com'); |
| 366 | + }); |
| 367 | + }); |
220 | 368 | }); |
0 commit comments