@@ -21,84 +21,61 @@ def mock_crawl4ai_env():
2121 if 'handlers.web' in sys .modules :
2222 del sys .modules ['handlers.web' ]
2323
24- @pytest .mark .asyncio
25- async def test_llms_txt_uses_default_generator (mock_crawl4ai_env ):
26- handlers_web = mock_crawl4ai_env
27- handle_web_task = handlers_web .handle_web_task
24+ @pytest .mark .asyncio
25+ async def test_llms_txt_uses_default_generator (mock_crawl4ai_env ):
26+ handlers_web = mock_crawl4ai_env
27+ handle_web_task = handlers_web .handle_web_task
2828
29- url = "https://example.com/llms.txt"
30- mock_result = MagicMock ()
31- mock_result .success = True
32- mock_result .markdown = "content"
33- mock_result .url = url
34- mock_result .links = {'internal' : []}
35-
36- mock_crawler = MagicMock ()
37- async def fake_arun (url , config = None ):
38- return mock_result
39- mock_crawler .arun .side_effect = fake_arun
40-
41- mock_crawler_cm = AsyncMock ()
42- mock_crawler_cm .__aenter__ .return_value = mock_crawler
43- mock_crawler_cm .__aexit__ .return_value = None
29+ url = "https://example.com/llms.txt"
30+ mock_result = MagicMock ()
31+ mock_result .success = True
32+ mock_result .markdown = "content"
33+ mock_result .url = url
34+ mock_result .links = {'internal' : []}
4435
45- mock_factory = MagicMock (return_value = mock_crawler_cm )
36+ mock_crawler = AsyncMock ()
37+ async def fake_arun (url , config = None ):
38+ return mock_result
39+ mock_crawler .arun .side_effect = fake_arun
4640
47- # Patch DefaultMarkdownGenerator IN the reloaded module
48- with patch .object (handlers_web , 'DefaultMarkdownGenerator' ) as MockGen :
49- generator_instance = MagicMock (name = "generator_instance" )
50- MockGen .return_value = generator_instance
51-
52- await handle_web_task (url , crawler_factory = mock_factory )
53-
54- # Verify via CrawlerRunConfig calls
55- MockCrawlerRunConfig = sys .modules ['crawl4ai' ].CrawlerRunConfig
56-
57- calls = MockCrawlerRunConfig .call_args_list
58- found = False
59- for call in calls :
60- if call .kwargs .get ('markdown_generator' ) == generator_instance :
61- found = True
62- break
63- assert found , "CrawlerRunConfig should be initialized with DefaultMarkdownGenerator instance"
64-
65- @pytest .mark .asyncio
66- async def test_standard_page_uses_llm_filter (mock_crawl4ai_env ):
67- handlers_web = mock_crawl4ai_env
68- handle_web_task = handlers_web .handle_web_task
41+ # Patch DefaultMarkdownGenerator IN the reloaded module
42+ with patch .object (handlers_web , 'DefaultMarkdownGenerator' ) as MockGen :
43+ generator_instance = MagicMock (name = "generator_instance" )
44+ MockGen .return_value = generator_instance
6945
70- url = "https://example.com/page"
71- mock_result = MagicMock ()
72- mock_result .success = True
73- mock_result .markdown = "content"
74- mock_result .url = url
75- mock_result .links = {'internal' : []}
76-
77- mock_crawler = MagicMock ()
78- async def fake_arun (url , config = None ):
79- if "llms.txt" in url :
80- m_res = MagicMock ()
81- m_res .success = False
82- return m_res
83- return mock_result
84-
85- mock_crawler .arun .side_effect = fake_arun
46+ await handle_web_task (url , crawler = mock_crawler )
47+
48+ # Verify DefaultMarkdownGenerator was used without content_filter
49+ MockGen .assert_called_with ()
50+
51+ @pytest .mark .asyncio
52+ async def test_standard_page_uses_llm_filter (mock_crawl4ai_env ):
53+ handlers_web = mock_crawl4ai_env
54+ handle_web_task = handlers_web .handle_web_task
55+
56+ url = "https://example.com/page"
57+ mock_result = MagicMock ()
58+ mock_result .success = True
59+ mock_result .markdown = "content"
60+ mock_result .url = url
61+ mock_result .links = {'internal' : []}
62+
63+ mock_crawler = AsyncMock ()
64+ async def fake_arun (url , config = None ):
65+ # No manifest check logic needed in mock as we removed it from handler
66+ return mock_result
8667
87- mock_crawler_cm = AsyncMock ()
88- mock_crawler_cm .__aenter__ .return_value = mock_crawler
89- mock_crawler_cm .__aexit__ .return_value = None
68+ mock_crawler .arun .side_effect = fake_arun
9069
91- mock_factory = MagicMock (return_value = mock_crawler_cm )
70+ with patch .object (handlers_web , 'DefaultMarkdownGenerator' ) as MockGen :
71+ generator_instance = MagicMock (name = "generator_instance" )
72+ MockGen .return_value = generator_instance
73+
74+ await handle_web_task (url , crawler = mock_crawler )
9275
93- with patch .object (handlers_web , 'DefaultMarkdownGenerator' ) as MockGen :
94- await handle_web_task (url , crawler_factory = mock_factory )
95-
96- calls = MockGen .call_args_list
97- assert len (calls ) > 0
98-
99- has_filter = False
100- for call in calls :
101- if 'content_filter' in call .kwargs :
102- has_filter = True
103- break
104- assert has_filter , "DefaultMarkdownGenerator should be called with content_filter for standard pages"
76+ # Verify DefaultMarkdownGenerator was called with content_filter
77+ call_args = MockGen .call_args
78+ assert call_args is not None , "DefaultMarkdownGenerator should be called"
79+ kwargs = call_args .kwargs
80+ assert 'content_filter' in kwargs , "Should pass content_filter"
81+ assert kwargs ['content_filter' ] is not None
0 commit comments