@@ -20,133 +20,49 @@ def test_epub_init_parser():
2020 assert parser .parser_config_set
2121
2222
23- def test_epub_parser_ebooklib_import_error (epub_parser ):
24- """Test that ImportError is raised when ebooklib is not available."""
25- with patch .dict (sys .modules , {"ebooklib " : None }):
26- with pytest .raises (ValueError , match = "`EbookLib ` is required to read Epub files" ):
23+ def test_epub_parser_fast_ebook_import_error (epub_parser ):
24+ """Test that ImportError is raised when fast-ebook is not available."""
25+ with patch .dict (sys .modules , {"fast_ebook " : None }):
26+ with pytest .raises (ValueError , match = "`fast-ebook ` is required to read Epub files" ):
2727 epub_parser .parse_file (Path ("test.epub" ))
2828
2929
30- def test_epub_parser_html2text_import_error (epub_parser ):
31- """Test that ImportError is raised when html2text is not available."""
32- fake_ebooklib = types .ModuleType ("ebooklib" )
33- fake_epub = types .ModuleType ("ebooklib.epub" )
34- fake_ebooklib .epub = fake_epub
35-
36- with patch .dict (sys .modules , {"ebooklib" : fake_ebooklib , "ebooklib.epub" : fake_epub }):
37- with patch .dict (sys .modules , {"html2text" : None }):
38- with pytest .raises (ValueError , match = "`html2text` is required to parse Epub files" ):
39- epub_parser .parse_file (Path ("test.epub" ))
40-
41-
4230def test_epub_parser_successful_parsing (epub_parser ):
4331 """Test successful parsing of an epub file."""
32+ fake_fast_ebook = types .ModuleType ("fast_ebook" )
33+ fake_epub = types .ModuleType ("fast_ebook.epub" )
34+ fake_fast_ebook .epub = fake_epub
4435
45- fake_ebooklib = types .ModuleType ("ebooklib" )
46- fake_epub = types .ModuleType ("ebooklib.epub" )
47- fake_html2text = types .ModuleType ("html2text" )
48-
49- # Mock ebooklib constants
50- fake_ebooklib .ITEM_DOCUMENT = "document"
51- fake_ebooklib .epub = fake_epub
52-
53- mock_item1 = MagicMock ()
54- mock_item1 .get_type .return_value = "document"
55- mock_item1 .get_content .return_value = b"<h1>Chapter 1</h1><p>Content 1</p>"
56-
57- mock_item2 = MagicMock ()
58- mock_item2 .get_type .return_value = "document"
59- mock_item2 .get_content .return_value = b"<h1>Chapter 2</h1><p>Content 2</p>"
60-
61- mock_item3 = MagicMock ()
62- mock_item3 .get_type .return_value = "other" # Should be ignored
63- mock_item3 .get_content .return_value = b"<p>Other content</p>"
64-
6536 mock_book = MagicMock ()
66- mock_book .get_items .return_value = [ mock_item1 , mock_item2 , mock_item3 ]
67-
37+ mock_book .to_markdown .return_value = "# Chapter 1 \n \n Content 1 \n \n # Chapter 2 \n \n Content 2 \n "
38+
6839 fake_epub .read_epub = MagicMock (return_value = mock_book )
69-
70- def mock_html2text_func (html_content ):
71- if "Chapter 1" in html_content :
72- return "# Chapter 1\n \n Content 1\n "
73- elif "Chapter 2" in html_content :
74- return "# Chapter 2\n \n Content 2\n "
75- return "Other content\n "
76-
77- fake_html2text .html2text = mock_html2text_func
78-
40+
7941 with patch .dict (sys .modules , {
80- "ebooklib" : fake_ebooklib ,
81- "ebooklib.epub" : fake_epub ,
82- "html2text" : fake_html2text
42+ "fast_ebook" : fake_fast_ebook ,
43+ "fast_ebook.epub" : fake_epub ,
8344 }):
8445 result = epub_parser .parse_file (Path ("test.epub" ))
85-
86- expected_result = "# Chapter 1\n \n Content 1\n \n # Chapter 2\n \n Content 2\n "
87- assert result == expected_result
88-
89- # Verify epub.read_epub was called with correct parameters
90- fake_epub .read_epub .assert_called_once_with (Path ("test.epub" ), options = {"ignore_ncx" : True })
46+
47+ assert result == "# Chapter 1\n \n Content 1\n \n # Chapter 2\n \n Content 2\n "
48+ fake_epub .read_epub .assert_called_once_with (Path ("test.epub" ))
9149
9250
9351def test_epub_parser_empty_book (epub_parser ):
94- """Test parsing an epub file with no document items."""
95- # Create mock modules
96- fake_ebooklib = types .ModuleType ("ebooklib" )
97- fake_epub = types .ModuleType ("ebooklib.epub" )
98- fake_html2text = types .ModuleType ("html2text" )
99-
100- fake_ebooklib .ITEM_DOCUMENT = "document"
101- fake_ebooklib .epub = fake_epub
102-
103- # Create mock book with no document items
52+ """Test parsing an epub file with no content."""
53+ fake_fast_ebook = types .ModuleType ("fast_ebook" )
54+ fake_epub = types .ModuleType ("fast_ebook.epub" )
55+ fake_fast_ebook .epub = fake_epub
56+
10457 mock_book = MagicMock ()
105- mock_book .get_items .return_value = []
106-
58+ mock_book .to_markdown .return_value = ""
59+
10760 fake_epub .read_epub = MagicMock (return_value = mock_book )
108- fake_html2text .html2text = MagicMock ()
109-
61+
11062 with patch .dict (sys .modules , {
111- "ebooklib" : fake_ebooklib ,
112- "ebooklib.epub" : fake_epub ,
113- "html2text" : fake_html2text
63+ "fast_ebook" : fake_fast_ebook ,
64+ "fast_ebook.epub" : fake_epub ,
11465 }):
11566 result = epub_parser .parse_file (Path ("empty.epub" ))
116- assert result == ""
11767
118- fake_html2text .html2text .assert_not_called ()
119-
120-
121- def test_epub_parser_non_document_items_ignored (epub_parser ):
122- """Test that non-document items are ignored during parsing."""
123- fake_ebooklib = types .ModuleType ("ebooklib" )
124- fake_epub = types .ModuleType ("ebooklib.epub" )
125- fake_html2text = types .ModuleType ("html2text" )
126-
127- fake_ebooklib .ITEM_DOCUMENT = "document"
128- fake_ebooklib .epub = fake_epub
129-
130- mock_doc_item = MagicMock ()
131- mock_doc_item .get_type .return_value = "document"
132- mock_doc_item .get_content .return_value = b"<p>Document content</p>"
133-
134- mock_other_item = MagicMock ()
135- mock_other_item .get_type .return_value = "image" # Not a document
136-
137- mock_book = MagicMock ()
138- mock_book .get_items .return_value = [mock_other_item , mock_doc_item ]
139-
140- fake_epub .read_epub = MagicMock (return_value = mock_book )
141- fake_html2text .html2text = MagicMock (return_value = "Document content\n " )
142-
143- with patch .dict (sys .modules , {
144- "ebooklib" : fake_ebooklib ,
145- "ebooklib.epub" : fake_epub ,
146- "html2text" : fake_html2text
147- }):
148- result = epub_parser .parse_file (Path ("test.epub" ))
149-
150- assert result == "Document content\n "
151-
152- fake_html2text .html2text .assert_called_once_with ("<p>Document content</p>" )
68+ assert result == ""
0 commit comments