Skip to content

Commit 1b9b183

Browse files
committed
make fetch_doi_page a private function
1 parent 114f722 commit 1b9b183

File tree

3 files changed

+26
-48
lines changed

3 files changed

+26
-48
lines changed

src/datacite/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
"""DataCite utilities package."""
22
from .datacite import (
33
fetch_doi,
4-
fetch_doi_page,
54
write_ndjson_gz,
65
)
76

87
__all__ = [
98
"fetch_doi",
10-
"fetch_doi_page",
119
"write_ndjson_gz",
1210
]

src/datacite/datacite.py

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@
1313
Returns an iterator yielding individual DOI objects. Handles session
1414
management and pagination transparently.
1515
16-
fetch_doi_page: Fetch a single page of DOI objects. Useful for manual
17-
pagination control or when you only need a specific page of results.
18-
1916
write_ndjson_gz: Write an iterable of objects to a gzipped newline-delimited
2017
JSON file. Streams data to disk without loading everything into memory.
2118
@@ -35,23 +32,6 @@
3532
>>> count = write_ndjson_gz(dois, "synapse_dois.ndjson.gz")
3633
>>> print(f"Saved {count} DOI records")
3734
38-
Advanced usage with custom pagination:
39-
40-
>>> import requests
41-
>>> from datacite import fetch_doi_page
42-
>>>
43-
>>> with requests.Session() as session:
44-
... # Fetch only the first page
45-
... result = fetch_doi_page(
46-
... session=session,
47-
... prefixes=["10.7303"],
48-
... state="findable",
49-
... page_size=100,
50-
... page_number=0,
51-
... detail=True
52-
... )
53-
... print(f"Got {len(result['data'])} DOIs")
54-
5535
Notes:
5636
- DataCite provides higher rate limits (1000 requests per 5 minutes) when
5737
you include an email address in the User-Agent header via user_agent_mailto.
@@ -250,7 +230,7 @@ def _validate_fetch_params(page_size: int, state: str) -> None:
250230
)
251231

252232

253-
def fetch_doi_page(
233+
def _fetch_doi_page(
254234
session: requests.Session,
255235
prefixes: List[str],
256236
state: str,
@@ -341,7 +321,7 @@ def fetch_doi(
341321

342322
page_number = start_page
343323
while True:
344-
payload = fetch_doi_page(
324+
payload = _fetch_doi_page(
345325
s,
346326
prefixes=prefixes,
347327
state=state,

tests/datacite/test_datacite.py

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
_build_user_agent_headers,
2525
_should_continue_pagination,
2626
_serialize_to_ndjson,
27-
fetch_doi_page,
27+
_fetch_doi_page,
2828
fetch_doi,
2929
write_ndjson_gz,
3030
)
@@ -517,7 +517,7 @@ def test_nested_objects(self):
517517

518518

519519
class TestFetchDoiPage:
520-
"""Tests for fetch_doi_page function.
520+
"""Tests for _fetch_doi_page function.
521521
522522
Tests single page fetching with mocked HTTP calls.
523523
"""
@@ -531,7 +531,7 @@ def test_successful_fetch(self, prefixes, mock_api_response_full_page, create_mo
531531
)
532532
mock_session.get.return_value = mock_response
533533

534-
result = fetch_doi_page(
534+
result = _fetch_doi_page(
535535
session=mock_session,
536536
prefixes=prefixes,
537537
state="findable",
@@ -553,7 +553,7 @@ def test_passes_correct_parameters(self, prefixes, create_mock_response):
553553
)
554554
mock_session.get.return_value = mock_response
555555

556-
fetch_doi_page(
556+
_fetch_doi_page(
557557
session=mock_session,
558558
prefixes=prefixes,
559559
state="registered",
@@ -582,7 +582,7 @@ def test_handles_http_error(self, prefixes, create_mock_response):
582582
mock_session.get.return_value = mock_response
583583

584584
with pytest.raises(requests.HTTPError):
585-
fetch_doi_page(
585+
_fetch_doi_page(
586586
session=mock_session,
587587
prefixes=prefixes,
588588
state="findable",
@@ -599,7 +599,7 @@ def test_malformed_json_response(self, prefixes, create_mock_response):
599599
mock_session.get.return_value = mock_response
600600

601601
with pytest.raises(json.JSONDecodeError):
602-
fetch_doi_page(
602+
_fetch_doi_page(
603603
session=mock_session,
604604
prefixes=prefixes,
605605
state="findable",
@@ -617,7 +617,7 @@ def test_response_missing_data_key(self, prefixes, create_mock_response):
617617
)
618618
mock_session.get.return_value = mock_response
619619

620-
result = fetch_doi_page(
620+
result = _fetch_doi_page(
621621
session=mock_session,
622622
prefixes=prefixes,
623623
state="findable",
@@ -635,7 +635,7 @@ def test_invalid_page_size_zero(self, prefixes):
635635
mock_session = Mock(spec=requests.Session)
636636

637637
with pytest.raises(ValueError, match="page_size must be at least 1"):
638-
fetch_doi_page(
638+
_fetch_doi_page(
639639
session=mock_session,
640640
prefixes=prefixes,
641641
state="findable",
@@ -649,7 +649,7 @@ def test_invalid_page_size_negative(self, prefixes):
649649
mock_session = Mock(spec=requests.Session)
650650

651651
with pytest.raises(ValueError, match="page_size must be at least 1"):
652-
fetch_doi_page(
652+
_fetch_doi_page(
653653
session=mock_session,
654654
prefixes=prefixes,
655655
state="findable",
@@ -663,7 +663,7 @@ def test_invalid_page_size_exceeds_maximum(self, prefixes):
663663
mock_session = Mock(spec=requests.Session)
664664

665665
with pytest.raises(ValueError, match="page_size cannot exceed 1000"):
666-
fetch_doi_page(
666+
_fetch_doi_page(
667667
session=mock_session,
668668
prefixes=prefixes,
669669
state="findable",
@@ -677,7 +677,7 @@ def test_invalid_state(self, prefixes):
677677
mock_session = Mock(spec=requests.Session)
678678

679679
with pytest.raises(ValueError, match="state must be one of"):
680-
fetch_doi_page(
680+
_fetch_doi_page(
681681
session=mock_session,
682682
prefixes=prefixes,
683683
state="invalid_state",
@@ -697,7 +697,7 @@ def test_valid_states(self, prefixes, state, create_mock_response):
697697
mock_session.get.return_value = mock_response
698698

699699
# Should not raise ValueError
700-
result = fetch_doi_page(
700+
result = _fetch_doi_page(
701701
session=mock_session,
702702
prefixes=prefixes,
703703
state=state,
@@ -717,7 +717,7 @@ class TestFetchDoi:
717717

718718
def test_single_full_page(self, prefixes, sample_doi_objects, mocker):
719719
"""Test fetching a single full page."""
720-
mock_fetch_page = mocker.patch("src.datacite.datacite.fetch_doi_page")
720+
mock_fetch_page = mocker.patch("src.datacite.datacite._fetch_doi_page")
721721
mock_fetch_page.side_effect = [
722722
{"data": sample_doi_objects},
723723
{"data": []} # Empty next page
@@ -742,7 +742,7 @@ def test_multiple_pages(self, prefixes, mocker):
742742
page2_data = [{"id": f"10.7303/syn{i:05d}"} for i in range(11, 21)]
743743
page3_data = [{"id": f"10.7303/syn{i:05d}"} for i in range(21, 25)]
744744

745-
mock_fetch_page = mocker.patch("src.datacite.datacite.fetch_doi_page")
745+
mock_fetch_page = mocker.patch("src.datacite.datacite._fetch_doi_page")
746746
mock_fetch_page.side_effect = [
747747
{"data": page1_data},
748748
{"data": page2_data},
@@ -762,7 +762,7 @@ def test_multiple_pages(self, prefixes, mocker):
762762

763763
def test_empty_results(self, prefixes, mocker):
764764
"""Test handling of no results."""
765-
mock_fetch_page = mocker.patch("src.datacite.datacite.fetch_doi_page")
765+
mock_fetch_page = mocker.patch("src.datacite.datacite._fetch_doi_page")
766766
mock_fetch_page.return_value = {"data": []}
767767

768768
results = list(fetch_doi(
@@ -778,7 +778,7 @@ def test_empty_results(self, prefixes, mocker):
778778

779779
def test_user_agent_header_set(self, prefixes, mocker):
780780
"""Test that User-Agent header is set when email provided."""
781-
mock_fetch_page = mocker.patch("src.datacite.datacite.fetch_doi_page")
781+
mock_fetch_page = mocker.patch("src.datacite.datacite._fetch_doi_page")
782782
mock_fetch_page.return_value = {"data": []}
783783

784784
# Mock Session to capture headers
@@ -804,7 +804,7 @@ def test_user_agent_header_set(self, prefixes, mocker):
804804

805805
def test_start_page_parameter(self, prefixes, mocker):
806806
"""Test that start_page parameter is respected."""
807-
mock_fetch_page = mocker.patch("src.datacite.datacite.fetch_doi_page")
807+
mock_fetch_page = mocker.patch("src.datacite.datacite._fetch_doi_page")
808808
mock_fetch_page.return_value = {"data": [{"id": "test"}]}
809809

810810
list(fetch_doi(
@@ -824,7 +824,7 @@ def test_stops_on_partial_page(self, prefixes, mocker):
824824
page1_data = [{"id": f"id{i}"} for i in range(10)] # Full page
825825
page2_data = [{"id": "last"}] # Partial page
826826

827-
mock_fetch_page = mocker.patch("src.datacite.datacite.fetch_doi_page")
827+
mock_fetch_page = mocker.patch("src.datacite.datacite._fetch_doi_page")
828828
mock_fetch_page.side_effect = [
829829
{"data": page1_data},
830830
{"data": page2_data}
@@ -843,7 +843,7 @@ def test_handles_api_error_mid_pagination(self, prefixes, mocker):
843843
"""Test that API errors during pagination are propagated."""
844844
page1_data = [{"id": f"id{i}"} for i in range(10)]
845845

846-
mock_fetch_page = mocker.patch("src.datacite.datacite.fetch_doi_page")
846+
mock_fetch_page = mocker.patch("src.datacite.datacite._fetch_doi_page")
847847
mock_fetch_page.side_effect = [
848848
{"data": page1_data},
849849
requests.HTTPError("500 Server Error") # Error on second page
@@ -857,7 +857,7 @@ def test_handles_api_error_mid_pagination(self, prefixes, mocker):
857857

858858
def test_response_with_missing_data_key(self, prefixes, mocker):
859859
"""Test pagination when response is missing 'data' key."""
860-
mock_fetch_page = mocker.patch("src.datacite.datacite.fetch_doi_page")
860+
mock_fetch_page = mocker.patch("src.datacite.datacite._fetch_doi_page")
861861
# API returns response without 'data' key
862862
mock_fetch_page.return_value = {"meta": {"total": 0}, "links": {}}
863863

@@ -872,7 +872,7 @@ def test_response_with_missing_data_key(self, prefixes, mocker):
872872

873873
def test_no_user_agent_when_mailto_none(self, prefixes, mocker):
874874
"""Test that no User-Agent header is set when mailto is None."""
875-
mock_fetch_page = mocker.patch("src.datacite.datacite.fetch_doi_page")
875+
mock_fetch_page = mocker.patch("src.datacite.datacite._fetch_doi_page")
876876
mock_fetch_page.return_value = {"data": []}
877877

878878
with patch("src.datacite.datacite.requests.Session") as mock_session_class:
@@ -896,7 +896,7 @@ def test_large_page_size_boundary(self, prefixes, mocker):
896896
# Simulate API returning exactly 1000 items
897897
page_data = [{"id": f"id{i}"} for i in range(1000)]
898898

899-
mock_fetch_page = mocker.patch("src.datacite.datacite.fetch_doi_page")
899+
mock_fetch_page = mocker.patch("src.datacite.datacite._fetch_doi_page")
900900
mock_fetch_page.side_effect = [
901901
{"data": page_data},
902902
{"data": []} # No more data
@@ -937,7 +937,7 @@ def test_invalid_page_size_exceeds_maximum(self, prefixes):
937937

938938
def test_page_size_boundary_values(self, prefixes, mocker):
939939
"""Test that boundary values 1 and 1000 are accepted."""
940-
mock_fetch_page = mocker.patch("src.datacite.datacite.fetch_doi_page")
940+
mock_fetch_page = mocker.patch("src.datacite.datacite._fetch_doi_page")
941941
mock_fetch_page.return_value = {"data": []}
942942

943943
# page_size=1 should work
@@ -961,7 +961,7 @@ def test_invalid_state(self, prefixes):
961961
@pytest.mark.parametrize("state", ["findable", "registered", "draft"])
962962
def test_valid_states(self, prefixes, state, mocker):
963963
"""Test that all valid states are accepted."""
964-
mock_fetch_page = mocker.patch("src.datacite.datacite.fetch_doi_page")
964+
mock_fetch_page = mocker.patch("src.datacite.datacite._fetch_doi_page")
965965
mock_fetch_page.return_value = {"data": []}
966966

967967
# Should not raise ValueError

0 commit comments

Comments
 (0)