|
| 1 | +""" |
| 2 | +DataCite API client implementation. |
| 3 | +
|
| 4 | +This module provides the implementation details for interacting with the DataCite API. |
| 5 | +The public interface is exposed through doi.py. |
| 6 | +""" |
| 7 | + |
| 8 | +from __future__ import annotations |
| 9 | + |
| 10 | +import copy |
| 11 | +import logging |
| 12 | +from typing import TYPE_CHECKING |
| 13 | + |
| 14 | +from django.conf import settings |
| 15 | +import requests |
| 16 | + |
| 17 | +if TYPE_CHECKING: |
| 18 | + from dandiapi.api.models import Version |
| 19 | + |
| 20 | +# All of the required DOI configuration settings |
| 21 | +# Cannot be in doi.py to avoid circular imports |
| 22 | +DANDI_DOI_SETTINGS = [ |
| 23 | + (settings.DANDI_DOI_API_URL, 'DANDI_DOI_API_URL'), |
| 24 | + (settings.DANDI_DOI_API_USER, 'DANDI_DOI_API_USER'), |
| 25 | + (settings.DANDI_DOI_API_PASSWORD, 'DANDI_DOI_API_PASSWORD'), |
| 26 | + (settings.DANDI_DOI_API_PREFIX, 'DANDI_DOI_API_PREFIX'), |
| 27 | +] |
| 28 | + |
| 29 | +logger = logging.getLogger(__name__) |
| 30 | + |
| 31 | + |
| 32 | +class DataCiteClient: |
| 33 | + """Client for interacting with the DataCite API.""" |
| 34 | + |
| 35 | + def __init__(self): |
| 36 | + self.api_url = settings.DANDI_DOI_API_URL |
| 37 | + self.api_user = settings.DANDI_DOI_API_USER |
| 38 | + self.api_password = settings.DANDI_DOI_API_PASSWORD |
| 39 | + self.api_prefix = settings.DANDI_DOI_API_PREFIX |
| 40 | + self.auth = requests.auth.HTTPBasicAuth(self.api_user, self.api_password) |
| 41 | + self.headers = {'Accept': 'application/vnd.api+json'} |
| 42 | + self.timeout = 30 |
| 43 | + |
| 44 | + def is_configured(self) -> bool: |
| 45 | + """Check if the DOI client is properly configured.""" |
| 46 | + return all(setting is not None for setting, _ in DANDI_DOI_SETTINGS) |
| 47 | + |
| 48 | + def format_doi(self, dandiset_id: str, version_id: str | None = None) -> str: |
| 49 | + """ |
| 50 | + Format a DOI string for a dandiset or version. |
| 51 | +
|
| 52 | + Args: |
| 53 | + dandiset_id: The dandiset identifier. |
| 54 | + version_id: Optional version identifier. If provided, creates a Version DOI. |
| 55 | + If omitted, creates a Dandiset DOI. |
| 56 | +
|
| 57 | + Returns: |
| 58 | + Formatted DOI string. |
| 59 | + """ |
| 60 | + if version_id: |
| 61 | + # TODO(asmaco) replace "dandi" with non-hardcoded ID_PATTERN |
| 62 | + # https://github.com/dandi/dandi-schema/pull/294/files#diff-43c9cc813638d87fd33e527a7baccb2fd7dff85595a7e686bfaf61f0409bd403R47 |
| 63 | + return f'{self.api_prefix}/dandi.{dandiset_id}/{version_id}' |
| 64 | + return f'{self.api_prefix}/dandi.{dandiset_id}' |
| 65 | + |
| 66 | + def generate_doi_data( |
| 67 | + self, version: Version, version_doi: bool = True, event: str | None = None |
| 68 | + ) -> tuple[str, dict]: |
| 69 | + """ |
| 70 | + Generate DOI data for a version or dandiset. |
| 71 | +
|
| 72 | + Args: |
| 73 | + version: Version object containing metadata. |
| 74 | + version_doi: If True, generate a Version DOI, otherwise generate a Dandiset DOI. |
| 75 | + event: The DOI event type. |
| 76 | + - None: Creates a Draft DOI. |
| 77 | + - "publish": Creates or promotes to a Findable DOI. |
| 78 | + - "hide": Converts to a Registered DOI. |
| 79 | +
|
| 80 | + Returns: |
| 81 | + Tuple of (doi_string, datacite_payload) |
| 82 | + """ |
| 83 | + # TODO(asmacdo) if not datacite configured make sure we dont save any dois to model |
| 84 | + from dandischema.datacite import to_datacite |
| 85 | + dandiset_id = version.dandiset.identifier |
| 86 | + version_id = version.version |
| 87 | + metadata = copy.deepcopy(version.metadata) |
| 88 | + |
| 89 | + # Generate the appropriate DOI string |
| 90 | + if version_doi: |
| 91 | + doi = self.format_doi(dandiset_id, version_id) |
| 92 | + else: |
| 93 | + doi = self.format_doi(dandiset_id) |
| 94 | + # Dandiset DOI is the same as version url without version |
| 95 | + metadata['url'] = metadata['url'].rsplit('/', 1)[0] |
| 96 | + |
| 97 | + metadata['doi'] = doi |
| 98 | + |
| 99 | + # Generate the datacite payload with the appropriate event |
| 100 | + datacite_payload = to_datacite(metadata, event=event) |
| 101 | + |
| 102 | + return (doi, datacite_payload) |
| 103 | + |
| 104 | + def create_or_update_doi(self, original_datacite_payload: dict) -> str | None: |
| 105 | + """ |
| 106 | + Create or update a DOI with the DataCite API. |
| 107 | +
|
| 108 | + Args: |
| 109 | + datacite_payload: The DOI payload to send to DataCite. |
| 110 | +
|
| 111 | + Returns: |
| 112 | + The DOI string on success, None on failure when not configured. |
| 113 | +
|
| 114 | + Raises: |
| 115 | + requests.exceptions.HTTPError: If the API request fails. |
| 116 | + """ |
| 117 | + datacite_payload = copy.deepcopy(original_datacite_payload) |
| 118 | + doi = datacite_payload['data']['attributes']['doi'] |
| 119 | + |
| 120 | + if not self.is_configured(): |
| 121 | + logger.warning('DOI API not configured. Skipping operations for %s', doi) |
| 122 | + return None |
| 123 | + |
| 124 | + # Check if we're trying to create a non-draft DOI when it's not allowed |
| 125 | + event = datacite_payload['data']['attributes'].get('event') |
| 126 | + if not settings.DANDI_DOI_PUBLISH and event in ['publish', 'hide']: |
| 127 | + # Remove the event to make it a draft DOI |
| 128 | + if 'event' in datacite_payload['data']['attributes']: |
| 129 | + del datacite_payload['data']['attributes']['event'] |
| 130 | + |
| 131 | + logger.warning( |
| 132 | + 'DANDI_DOI_PUBLISH is not enabled. DOI %s will be created as draft.', doi |
| 133 | + ) |
| 134 | + |
| 135 | + try: |
| 136 | + response = requests.post( |
| 137 | + self.api_url, |
| 138 | + json=datacite_payload, |
| 139 | + auth=self.auth, |
| 140 | + headers=self.headers, |
| 141 | + timeout=self.timeout, |
| 142 | + ) |
| 143 | + response.raise_for_status() |
| 144 | + # Return early on success |
| 145 | + return doi |
| 146 | + except requests.exceptions.HTTPError as e: |
| 147 | + # HTTP 422 status code means DOI already exists |
| 148 | + already_exists_code = 422 |
| 149 | + if e.response is not None and e.response.status_code == already_exists_code: |
| 150 | + # Retry with PUT if DOI already exists |
| 151 | + update_url = f'{self.api_url}/{doi}' |
| 152 | + try: |
| 153 | + update_response = requests.put( |
| 154 | + update_url, |
| 155 | + json=datacite_payload, |
| 156 | + auth=self.auth, |
| 157 | + headers=self.headers, |
| 158 | + timeout=self.timeout, |
| 159 | + ) |
| 160 | + update_response.raise_for_status() |
| 161 | + # Success with update |
| 162 | + return doi |
| 163 | + except Exception: |
| 164 | + error_details = f'Failed to update existing DOI {doi}' |
| 165 | + if e.response and hasattr(e.response, 'text'): |
| 166 | + error_details += f'\nResponse: {e.response.text}' |
| 167 | + error_details += f'\nPayload: {datacite_payload}' |
| 168 | + logger.exception(error_details) |
| 169 | + raise |
| 170 | + else: |
| 171 | + error_details = f'Failed to create DOI {doi}' |
| 172 | + if e.response and hasattr(e.response, 'text'): |
| 173 | + error_details += f'\nResponse: {e.response.text}' |
| 174 | + error_details += f'\nPayload: {datacite_payload}' |
| 175 | + logger.exception(error_details) |
| 176 | + raise |
| 177 | + |
| 178 | + def delete_or_hide_doi(self, doi: str) -> None: |
| 179 | + """ |
| 180 | + Delete a draft DOI or hide a findable DOI depending on its state. |
| 181 | +
|
| 182 | + This method first checks the DOI's state and then either deletes it (if it's a draft) |
| 183 | + or hides it (if it's findable). Hiding a DOI requires DANDI_DOI_PUBLISH to be enabled. |
| 184 | +
|
| 185 | + Args: |
| 186 | + doi: The DOI to delete or hide. |
| 187 | +
|
| 188 | + Raises: |
| 189 | + requests.exceptions.HTTPError: If the API request fails. |
| 190 | + """ |
| 191 | + if not self.is_configured(): |
| 192 | + logger.warning('DOI API not configured. Skipping operations for %s', doi) |
| 193 | + return |
| 194 | + |
| 195 | + doi_url = f'{self.api_url}/{doi}' |
| 196 | + |
| 197 | + try: |
| 198 | + # First, get DOI information to check its state |
| 199 | + response = requests.get( |
| 200 | + doi_url, auth=self.auth, headers=self.headers, timeout=self.timeout |
| 201 | + ) |
| 202 | + response.raise_for_status() |
| 203 | + |
| 204 | + doi_data = response.json() |
| 205 | + # Get the state, defaulting to 'draft' if absent |
| 206 | + doi_state = doi_data.get('data', {}).get('attributes', {}).get('state', 'draft') |
| 207 | + |
| 208 | + if doi_state == 'draft': |
| 209 | + # Draft DOIs can be deleted |
| 210 | + delete_response = requests.delete( |
| 211 | + doi_url, auth=self.auth, headers=self.headers, timeout=self.timeout |
| 212 | + ) |
| 213 | + delete_response.raise_for_status() |
| 214 | + logger.info('Successfully deleted draft DOI: %s', doi) |
| 215 | + else: |
| 216 | + # Findable DOIs must be hidden |
| 217 | + # Check if DANDI_DOI_PUBLISH is enabled for hiding |
| 218 | + if not settings.DANDI_DOI_PUBLISH: |
| 219 | + logger.warning( |
| 220 | + 'DANDI_DOI_PUBLISH is not enabled. DOI %s will remain findable.', doi |
| 221 | + ) |
| 222 | + return |
| 223 | + |
| 224 | + # Create hide payload |
| 225 | + hide_payload = { |
| 226 | + 'data': {'id': doi, 'type': 'dois', 'attributes': {'event': 'hide'}} |
| 227 | + } |
| 228 | + |
| 229 | + hide_response = requests.put( |
| 230 | + doi_url, |
| 231 | + json=hide_payload, |
| 232 | + auth=self.auth, |
| 233 | + headers=self.headers, |
| 234 | + timeout=self.timeout, |
| 235 | + ) |
| 236 | + hide_response.raise_for_status() |
| 237 | + logger.info('Successfully hid findable DOI: %s', doi) |
| 238 | + |
| 239 | + except requests.exceptions.HTTPError as e: |
| 240 | + if e.response and e.response.status_code == requests.codes.not_found: |
| 241 | + logger.warning('Tried to get data for nonexistent DOI %s', doi) |
| 242 | + return |
| 243 | + logger.exception('Failed to delete or hide DOI %s', doi) |
| 244 | + raise |
| 245 | + |
| 246 | + |
0 commit comments