@@ -57,7 +57,7 @@ async def test_download_single_article_xml(test_dois: Sequence[str]) -> None:
5757
5858@pytest .mark .asyncio ()
5959async def test_download_marks_truncated_full_text () -> None :
60- """When the payload lacks body content we should mark the view as STANDARD ."""
60+ """When the payload lacks body content we emit an error but keep going ."""
6161
6262 doi = "10.1016/j.neucli.2007.12.007"
6363 payload = b"""
@@ -80,16 +80,30 @@ async def handler(request: httpx.Request) -> httpx.Response:
8080
8181 cfg = _test_settings ()
8282 transport = httpx .MockTransport (handler )
83+ progress_calls : list [tuple [dict [str , str ], ArticleContent | None , BaseException | None ]] = []
84+
85+ def progress_cb (
86+ record : dict [str , str ],
87+ article : ArticleContent | None ,
88+ error : BaseException | None ,
89+ ) -> None :
90+ progress_calls .append ((record , article , error ))
91+
8392 async with ScienceDirectClient (cfg , transport = transport ) as client :
84- with pytest .raises (httpx .HTTPStatusError , match = "metadata-only payload" ):
85- await download_articles ([{"doi" : doi }], client = client )
93+ articles = await download_articles ([{"doi" : doi }], client = client , progress_callback = progress_cb )
8694
8795 assert len (captured_requests ) == 1
96+ assert articles == []
97+ assert len (progress_calls ) == 1
98+ record , article , error = progress_calls [0 ]
99+ assert record ["doi" ] == doi
100+ assert article is None
101+ assert isinstance (error , httpx .HTTPStatusError )
88102
89103
90104@pytest .mark .asyncio ()
91105async def test_download_errors_when_full_view_invalid (test_dois : Sequence [str ]) -> None :
92- """Client should raise if Elsevier rejects FULL view requests ."""
106+ """Client reports invalid FULL view errors without raising ."""
93107
94108 doi = test_dois [0 ]
95109
@@ -108,9 +122,24 @@ async def handler(request: httpx.Request) -> httpx.Response:
108122
109123 cfg = _test_settings ()
110124 transport = httpx .MockTransport (handler )
125+ progress_calls : list [tuple [dict [str , str ], ArticleContent | None , BaseException | None ]] = []
126+
127+ def progress_cb (
128+ record : dict [str , str ],
129+ article : ArticleContent | None ,
130+ error : BaseException | None ,
131+ ) -> None :
132+ progress_calls .append ((record , article , error ))
133+
111134 async with ScienceDirectClient (cfg , transport = transport ) as client :
112- with pytest .raises (httpx .HTTPStatusError , match = "rejected FULL view" ):
113- await download_articles ([{"doi" : doi }], client = client )
135+ articles = await download_articles ([{"doi" : doi }], client = client , progress_callback = progress_cb )
136+
137+ assert articles == []
138+ assert len (progress_calls ) == 1
139+ record , article , error = progress_calls [0 ]
140+ assert record ["doi" ] == doi
141+ assert article is None
142+ assert isinstance (error , httpx .HTTPStatusError )
114143
115144
116145@pytest .mark .asyncio ()
@@ -271,7 +300,7 @@ def progress_cb(
271300
272301@pytest .mark .asyncio ()
273302async def test_download_progress_callback_receives_errors (test_dois : Sequence [str ]) -> None :
274- """Progress callback should receive exceptions before they propagate ."""
303+ """Progress callback should receive exceptions even when downloads do not raise ."""
275304
276305 cfg = _test_settings ()
277306 doi = test_dois [0 ]
@@ -290,12 +319,68 @@ async def progress_cb(
290319 await asyncio .sleep (0 )
291320 progress_calls .append ((record , article , error ))
292321
293- with pytest .raises (httpx .TimeoutException ):
294- async with ScienceDirectClient (cfg , transport = transport ) as client :
295- await download_articles ([{"doi" : doi }], client = client , progress_callback = progress_cb )
322+ async with ScienceDirectClient (cfg , transport = transport ) as client :
323+ articles = await download_articles ([{"doi" : doi }], client = client , progress_callback = progress_cb )
296324
325+ assert articles == []
297326 assert len (progress_calls ) == 1
298327 record , article , error = progress_calls [0 ]
299328 assert record ["doi" ] == doi
300329 assert article is None
301330 assert isinstance (error , httpx .TimeoutException )
331+
332+
333+ @pytest .mark .asyncio ()
334+ async def test_download_continues_after_identifier_error (test_dois : Sequence [str ]) -> None :
335+ """Errors for one record should not prevent later records from being processed."""
336+
337+ cfg = _test_settings ()
338+ bad_doi , good_doi = test_dois [:2 ]
339+
340+ call_counter = 0
341+
342+ async def handler (request : httpx .Request ) -> httpx .Response :
343+ nonlocal call_counter
344+ call_counter += 1
345+ doi = request .url .path .rsplit ("/" , 1 )[- 1 ]
346+ if call_counter == 1 :
347+ raise httpx .TimeoutException ("simulated timeout" )
348+ payload = f"""
349+ <article xmlns="http://www.elsevier.com/xml/svapi/article/dtd" xmlns:ce="http://www.elsevier.com/xml/common/dtd">
350+ <item-info>
351+ <doi>{ doi } </doi>
352+ <pii>S105381192400679X</pii>
353+ </item-info>
354+ <ce:body><ce:para>{ doi } </ce:para></ce:body>
355+ </article>
356+ """ .encode ("utf-8" )
357+ return httpx .Response (
358+ 200 ,
359+ content = payload ,
360+ headers = {"content-type" : "application/xml" },
361+ request = request ,
362+ )
363+
364+ transport = httpx .MockTransport (handler )
365+ progress_calls : list [tuple [dict [str , str ], ArticleContent | None , BaseException | None ]] = []
366+
367+ def progress_cb (
368+ record : dict [str , str ],
369+ article : ArticleContent | None ,
370+ error : BaseException | None ,
371+ ) -> None :
372+ progress_calls .append ((record , article , error ))
373+
374+ records = [{"doi" : bad_doi }, {"doi" : good_doi }]
375+ async with ScienceDirectClient (cfg , transport = transport ) as client :
376+ articles = await download_articles (records , client = client , progress_callback = progress_cb )
377+
378+ assert len (articles ) == 1
379+ assert articles [0 ].doi == good_doi
380+ assert len (progress_calls ) == 2
381+ assert progress_calls [0 ][0 ]["doi" ] == bad_doi
382+ assert progress_calls [0 ][1 ] is None
383+ assert isinstance (progress_calls [0 ][2 ], httpx .TimeoutException )
384+ assert progress_calls [1 ][0 ]["doi" ] == good_doi
385+ assert progress_calls [1 ][1 ] is not None
386+ assert progress_calls [1 ][2 ] is None
0 commit comments