Skip to content

Commit 8b125d5

Browse files
committed
enh: Add Dandiset DOIs
- Dandiset DOI will redirect to the DLP - Example: 10.80507/dandi.000004 - Dandiset DOI is stored in the doi field of the draft version - Dandiset DOI metadata (on Datacite) will match the draft version until first publication - Once a Dandiset is published, the Dandiset DOI metadata will match the latest publication See the design document for more details: #2012
1 parent 6c2e7a6 commit 8b125d5

File tree

15 files changed

+1098
-113
lines changed

15 files changed

+1098
-113
lines changed

dandiapi/api/checks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from django.conf import settings
44
from django.core.checks import Error, register
55

6-
from dandiapi.api.doi import DANDI_DOI_SETTINGS
6+
from dandiapi.api.datacite import DANDI_DOI_SETTINGS
77

88

99
@register()

dandiapi/api/datacite.py

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
"""
2+
DataCite API client implementation.
3+
4+
This module provides the implementation details for interacting with the DataCite API.
5+
The public interface is exposed through doi.py.
6+
"""
7+
8+
from __future__ import annotations
9+
10+
import copy
11+
import logging
12+
from typing import TYPE_CHECKING
13+
14+
from django.conf import settings
15+
import requests
16+
17+
if TYPE_CHECKING:
18+
from dandiapi.api.models import Version
19+
20+
# All of the required DOI configuration settings
21+
# Cannot be in doi.py to avoid circular imports
22+
DANDI_DOI_SETTINGS = [
23+
(settings.DANDI_DOI_API_URL, 'DANDI_DOI_API_URL'),
24+
(settings.DANDI_DOI_API_USER, 'DANDI_DOI_API_USER'),
25+
(settings.DANDI_DOI_API_PASSWORD, 'DANDI_DOI_API_PASSWORD'),
26+
(settings.DANDI_DOI_API_PREFIX, 'DANDI_DOI_API_PREFIX'),
27+
]
28+
29+
logger = logging.getLogger(__name__)
30+
31+
32+
class DataCiteClient:
33+
"""Client for interacting with the DataCite API."""
34+
35+
def __init__(self):
36+
self.api_url = settings.DANDI_DOI_API_URL
37+
self.api_user = settings.DANDI_DOI_API_USER
38+
self.api_password = settings.DANDI_DOI_API_PASSWORD
39+
self.api_prefix = settings.DANDI_DOI_API_PREFIX
40+
self.auth = requests.auth.HTTPBasicAuth(self.api_user, self.api_password)
41+
self.headers = {'Accept': 'application/vnd.api+json'}
42+
self.timeout = 30
43+
44+
def is_configured(self) -> bool:
45+
"""Check if the DOI client is properly configured."""
46+
return all(setting is not None for setting, _ in DANDI_DOI_SETTINGS)
47+
48+
def format_doi(self, dandiset_id: str, version_id: str | None = None) -> str:
49+
"""
50+
Format a DOI string for a dandiset or version.
51+
52+
Args:
53+
dandiset_id: The dandiset identifier.
54+
version_id: Optional version identifier. If provided, creates a Version DOI.
55+
If omitted, creates a Dandiset DOI.
56+
57+
Returns:
58+
Formatted DOI string.
59+
"""
60+
if version_id:
61+
# TODO(asmaco) replace "dandi" with non-hardcoded ID_PATTERN
62+
# https://github.com/dandi/dandi-schema/pull/294/files#diff-43c9cc813638d87fd33e527a7baccb2fd7dff85595a7e686bfaf61f0409bd403R47
63+
return f'{self.api_prefix}/dandi.{dandiset_id}/{version_id}'
64+
return f'{self.api_prefix}/dandi.{dandiset_id}'
65+
66+
def generate_doi_data(
67+
self, version: Version, version_doi: bool = True, event: str | None = None
68+
) -> tuple[str, dict]:
69+
"""
70+
Generate DOI data for a version or dandiset.
71+
72+
Args:
73+
version: Version object containing metadata.
74+
version_doi: If True, generate a Version DOI, otherwise generate a Dandiset DOI.
75+
event: The DOI event type.
76+
- None: Creates a Draft DOI.
77+
- "publish": Creates or promotes to a Findable DOI.
78+
- "hide": Converts to a Registered DOI.
79+
80+
Returns:
81+
Tuple of (doi_string, datacite_payload)
82+
"""
83+
# TODO(asmacdo) if not datacite configured make sure we dont save any dois to model
84+
from dandischema.datacite import to_datacite
85+
dandiset_id = version.dandiset.identifier
86+
version_id = version.version
87+
metadata = copy.deepcopy(version.metadata)
88+
89+
# Generate the appropriate DOI string
90+
if version_doi:
91+
doi = self.format_doi(dandiset_id, version_id)
92+
else:
93+
doi = self.format_doi(dandiset_id)
94+
# Dandiset DOI is the same as version url without version
95+
metadata['url'] = metadata['url'].rsplit('/', 1)[0]
96+
97+
metadata['doi'] = doi
98+
99+
# Generate the datacite payload with the appropriate event
100+
datacite_payload = to_datacite(metadata, event=event)
101+
102+
return (doi, datacite_payload)
103+
104+
def create_or_update_doi(self, original_datacite_payload: dict) -> str | None:
105+
"""
106+
Create or update a DOI with the DataCite API.
107+
108+
Args:
109+
datacite_payload: The DOI payload to send to DataCite.
110+
111+
Returns:
112+
The DOI string on success, None on failure when not configured.
113+
114+
Raises:
115+
requests.exceptions.HTTPError: If the API request fails.
116+
"""
117+
datacite_payload = copy.deepcopy(original_datacite_payload)
118+
doi = datacite_payload['data']['attributes']['doi']
119+
120+
if not self.is_configured():
121+
logger.warning('DOI API not configured. Skipping operations for %s', doi)
122+
return None
123+
124+
# Check if we're trying to create a non-draft DOI when it's not allowed
125+
event = datacite_payload['data']['attributes'].get('event')
126+
if not settings.DANDI_DOI_PUBLISH and event in ['publish', 'hide']:
127+
# Remove the event to make it a draft DOI
128+
if 'event' in datacite_payload['data']['attributes']:
129+
del datacite_payload['data']['attributes']['event']
130+
131+
logger.warning(
132+
'DANDI_DOI_PUBLISH is not enabled. DOI %s will be created as draft.', doi
133+
)
134+
135+
try:
136+
response = requests.post(
137+
self.api_url,
138+
json=datacite_payload,
139+
auth=self.auth,
140+
headers=self.headers,
141+
timeout=self.timeout,
142+
)
143+
response.raise_for_status()
144+
# Return early on success
145+
return doi
146+
except requests.exceptions.HTTPError as e:
147+
# HTTP 422 status code means DOI already exists
148+
already_exists_code = 422
149+
if e.response is not None and e.response.status_code == already_exists_code:
150+
# Retry with PUT if DOI already exists
151+
update_url = f'{self.api_url}/{doi}'
152+
try:
153+
update_response = requests.put(
154+
update_url,
155+
json=datacite_payload,
156+
auth=self.auth,
157+
headers=self.headers,
158+
timeout=self.timeout,
159+
)
160+
update_response.raise_for_status()
161+
# Success with update
162+
return doi
163+
except Exception:
164+
error_details = f'Failed to update existing DOI {doi}'
165+
if e.response and hasattr(e.response, 'text'):
166+
error_details += f'\nResponse: {e.response.text}'
167+
error_details += f'\nPayload: {datacite_payload}'
168+
logger.exception(error_details)
169+
raise
170+
else:
171+
error_details = f'Failed to create DOI {doi}'
172+
if e.response and hasattr(e.response, 'text'):
173+
error_details += f'\nResponse: {e.response.text}'
174+
error_details += f'\nPayload: {datacite_payload}'
175+
logger.exception(error_details)
176+
raise
177+
178+
def delete_or_hide_doi(self, doi: str) -> None:
179+
"""
180+
Delete a draft DOI or hide a findable DOI depending on its state.
181+
182+
This method first checks the DOI's state and then either deletes it (if it's a draft)
183+
or hides it (if it's findable). Hiding a DOI requires DANDI_DOI_PUBLISH to be enabled.
184+
185+
Args:
186+
doi: The DOI to delete or hide.
187+
188+
Raises:
189+
requests.exceptions.HTTPError: If the API request fails.
190+
"""
191+
if not self.is_configured():
192+
logger.warning('DOI API not configured. Skipping operations for %s', doi)
193+
return
194+
195+
doi_url = f'{self.api_url}/{doi}'
196+
197+
try:
198+
# First, get DOI information to check its state
199+
response = requests.get(
200+
doi_url, auth=self.auth, headers=self.headers, timeout=self.timeout
201+
)
202+
response.raise_for_status()
203+
204+
doi_data = response.json()
205+
# Get the state, defaulting to 'draft' if absent
206+
doi_state = doi_data.get('data', {}).get('attributes', {}).get('state', 'draft')
207+
208+
if doi_state == 'draft':
209+
# Draft DOIs can be deleted
210+
delete_response = requests.delete(
211+
doi_url, auth=self.auth, headers=self.headers, timeout=self.timeout
212+
)
213+
delete_response.raise_for_status()
214+
logger.info('Successfully deleted draft DOI: %s', doi)
215+
else:
216+
# Findable DOIs must be hidden
217+
# Check if DANDI_DOI_PUBLISH is enabled for hiding
218+
if not settings.DANDI_DOI_PUBLISH:
219+
logger.warning(
220+
'DANDI_DOI_PUBLISH is not enabled. DOI %s will remain findable.', doi
221+
)
222+
return
223+
224+
# Create hide payload
225+
hide_payload = {
226+
'data': {'id': doi, 'type': 'dois', 'attributes': {'event': 'hide'}}
227+
}
228+
229+
hide_response = requests.put(
230+
doi_url,
231+
json=hide_payload,
232+
auth=self.auth,
233+
headers=self.headers,
234+
timeout=self.timeout,
235+
)
236+
hide_response.raise_for_status()
237+
logger.info('Successfully hid findable DOI: %s', doi)
238+
239+
except requests.exceptions.HTTPError as e:
240+
if e.response and e.response.status_code == requests.codes.not_found:
241+
logger.warning('Tried to get data for nonexistent DOI %s', doi)
242+
return
243+
logger.exception('Failed to delete or hide DOI %s', doi)
244+
raise
245+
246+

0 commit comments

Comments
 (0)