Skip to content

Commit e48f5b7

Browse files
committed
Switch back to using DOI as persistent_id
1 parent 60c0d70 commit e48f5b7

File tree

1 file changed

+10
-5
lines changed

1 file changed

+10
-5
lines changed

repo2docker/contentproviders/dataverse.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,6 @@ def detect(self, spec, ref=None, extra_args=None):
5858
if host is None:
5959
return
6060

61-
# Used only for content_id
62-
self.url = url
63-
6461
# At this point, we *know* this is a dataverse URL, because:
6562
# 1. The DOI resolved to a particular host (if using DOI)
6663
# 2. The host is in the list of known dataverse installations
@@ -171,6 +168,10 @@ def get_datafiles(self, url: str) -> List[dict]:
171168
# We already handled 404, raise error for everything else
172169
resp.raise_for_status()
173170

171+
# We know the exact persistent_id of the dataset we fetched now
172+
# Save it for use as content_id
173+
self.persistent_id = persistent_id
174+
174175
data = resp.json()["data"]
175176

176177
return data["latestVersion"]["files"]
@@ -212,5 +213,9 @@ def fetch(self, spec, output_dir, yield_output=False):
212213

213214
@property
214215
def content_id(self):
215-
"""The Dataverse persistent identifier."""
216-
return hashlib.sha256(self.url.encode()).hexdigest()
216+
"""
217+
The Dataverse persistent identifier.
218+
219+
Only valid if called after a succesfull fetch
220+
"""
221+
return self.persistent_id

0 commit comments

Comments
 (0)