Skip to content

Commit 256f1e2

Browse files
committed
test core
1 parent 1513190 commit 256f1e2

File tree

1 file changed

+107
-0
lines changed

1 file changed

+107
-0
lines changed

packages/core/src/robots.test.ts

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
import { isPathAllowed, parseRobots } from "@merln/core/src/robots.ts";
2+
import { describe, expect, test } from "bun:test";
3+
4+
const USER_AGENT = "merln/rss (+https://natepapes.com)";
5+
6+
describe("RFC 9309 robots matcher", () => {
7+
test("longest match (RFC example) and allow precedence", () => {
8+
const robots = `User-Agent: merln
9+
Allow: /example/page/
10+
Disallow: /example/page/disallowed.gif`;
11+
12+
expect(isPathAllowed(robots, "/example/page/ok", USER_AGENT)).toBe(true);
13+
expect(
14+
isPathAllowed(robots, "/example/page/disallowed.gif", USER_AGENT)
15+
).toBe(false);
16+
});
17+
18+
test("empty Disallow means allow all for that agent", () => {
19+
const robots = `User-agent: merln\nDisallow:`;
20+
expect(isPathAllowed(robots, "/anything", USER_AGENT)).toBe(true);
21+
});
22+
23+
test("fallback to global rules when no specific group", () => {
24+
const robots = `User-agent: Googlebot\nDisallow: /google-only/\n\nUser-agent: *\nDisallow: /admin/`;
25+
const { rules } = parseRobots(robots, USER_AGENT);
26+
expect(rules.length).toBe(1);
27+
expect(isPathAllowed(robots, "/admin/panel", USER_AGENT)).toBe(false);
28+
expect(isPathAllowed(robots, "/public", USER_AGENT)).toBe(true);
29+
});
30+
31+
test("wildcard * and end-anchor $", () => {
32+
const robots = `User-Agent: *\nDisallow: *.gif$\nAllow: /publications/`;
33+
expect(isPathAllowed(robots, "/img/pic.gif", USER_AGENT)).toBe(false);
34+
expect(isPathAllowed(robots, "/img/pic.gifv", USER_AGENT)).toBe(true);
35+
expect(isPathAllowed(robots, "/publications/x", USER_AGENT)).toBe(true);
36+
});
37+
38+
test("match by full UA string if provided", () => {
39+
const robots = `User-agent: ${USER_AGENT}\nDisallow: /special/`;
40+
expect(isPathAllowed(robots, "/special/x", USER_AGENT)).toBe(false);
41+
});
42+
});
43+
44+
describe("Provider robots policies", () => {
45+
const youtubeRobots = `# robots.txt file for YouTube
46+
User-agent: Mediapartners-Google*
47+
Disallow:
48+
49+
User-agent: *
50+
Disallow: /api/
51+
Disallow: /comment
52+
Disallow: /feeds/videos.xml
53+
Disallow: /file_download
54+
Disallow: /get_video
55+
Disallow: /get_video_info
56+
Disallow: /get_midroll_info
57+
Disallow: /live_chat
58+
Disallow: /login
59+
Disallow: /qr
60+
Disallow: /results
61+
Disallow: /signup
62+
Disallow: /t/terms
63+
Disallow: /timedtext_video
64+
Disallow: /verify_age
65+
Disallow: /watch_ajax
66+
Disallow: /watch_fragments_ajax
67+
Disallow: /watch_popup
68+
Disallow: /watch_queue_ajax
69+
Disallow: /youtubei/
70+
`;
71+
72+
const githubRobots = `User-agent: *
73+
Disallow: /*/*/commits/
74+
Disallow: /*/raw/
75+
Disallow: /gist/
76+
Disallow: /search$
77+
Disallow: /*.atom$`;
78+
79+
const atomicRobots = `Crawl-delay: 10
80+
User-agent: *
81+
Disallow:
82+
Sitemap: https://spin.atomicobject.com/sitemap_index.xml`;
83+
84+
test("YouTube: typical video page allowed; certain APIs disallowed", () => {
85+
expect(isPathAllowed(youtubeRobots, "/watch?v=abc", USER_AGENT)).toBe(true);
86+
expect(isPathAllowed(youtubeRobots, "/feeds/videos.xml", USER_AGENT)).toBe(
87+
false
88+
);
89+
expect(
90+
isPathAllowed(youtubeRobots, "/youtubei/v1/browse", USER_AGENT)
91+
).toBe(false);
92+
});
93+
94+
test("GitHub: block raw, commits list, gist path, and anchored search", () => {
95+
expect(isPathAllowed(githubRobots, "/org/repo/commits/", USER_AGENT)).toBe(
96+
false
97+
);
98+
expect(isPathAllowed(githubRobots, "/foo/raw/", USER_AGENT)).toBe(false);
99+
expect(isPathAllowed(githubRobots, "/gist/", USER_AGENT)).toBe(false);
100+
expect(isPathAllowed(githubRobots, "/search", USER_AGENT)).toBe(false);
101+
expect(isPathAllowed(githubRobots, "/papes1ns", USER_AGENT)).toBe(true);
102+
});
103+
104+
test("Atomic Object: allow all for *", () => {
105+
expect(isPathAllowed(atomicRobots, "/anything", USER_AGENT)).toBe(true);
106+
});
107+
});

0 commit comments

Comments
 (0)