Skip to content

Commit a46e71b

Browse files
authored
Features/automatic dataverse upload with title change (#340)
* Include title update from release tag retrieval * adapted upload script added * explicit pydataverse dependency added * action de-bug * requests install added * arguments bug fix * args parse bug fix * NativeApi debug * debugs * pydataverse version bump * title change using requests * server check * directory change * directory one level deeper * set publish automatic flag to true
1 parent c23b5e2 commit a46e71b

File tree

2 files changed

+180
-7
lines changed

2 files changed

+180
-7
lines changed

.github/workflows/dataverse_workflow.yml

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,40 @@ on:
88
jobs:
99
build:
1010
runs-on: ubuntu-latest
11+
1112
steps:
12-
- name: Send repo to Dataverse
13-
uses: IQSS/dataverse-uploader@v1.3
13+
- name: Install Python
14+
uses: actions/setup-python@v4
1415
with:
15-
DATAVERSE_TOKEN: ${{secrets.DATAVERSE_TOKEN}}
16-
DATAVERSE_SERVER: https://dataverse.geus.dk
17-
DATAVERSE_DATASET_DOI: doi:10.22008/FK2/3TSBF0
18-
DELETE: True
19-
PUBLISH: False
16+
python-version: "3.10"
17+
18+
- name: Checkout repo
19+
uses: actions/checkout@v3
20+
with:
21+
path: "main"
22+
token: ${{ secrets.GITHUB_TOKEN }}
23+
24+
- name: Install dependencies
25+
shell: bash
26+
run: |
27+
python -m pip install --upgrade pip
28+
pip install wheel requests
29+
python3 -m pip install --upgrade setuptools
30+
pip install -U pyDataverse>=0.3.1
31+
32+
- id: getrelease
33+
uses: pozetroninc/github-action-get-latest-release@master
34+
with:
35+
repository: GEUS-Glaciology-and-Climate/pypromice
36+
37+
- name: Send repo to Dataverse
38+
run: |
39+
cd $GITHUB_WORKSPACE/main
40+
python3 $GITHUB_WORKSPACE/main/.github/workflows/upload_to_dataverse.py \
41+
--token "${{secrets.DATAVERSE_TOKEN}}" \
42+
--server https://dataverse.geus.dk \
43+
--doi doi:10.22008/FK2/3TSBF0 \
44+
--title "pypromice ${{ steps.getrelease.outputs.release }}" \
45+
--repo $GITHUB_REPOSITORY \
46+
--remove True \
47+
--publish True
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
import argparse
2+
from time import sleep
3+
from os.path import join, relpath
4+
from os import walk, getcwd
5+
import requests, json
6+
from pyDataverse.api import NativeApi
7+
from pyDataverse.models import Datafile
8+
9+
def parse_arguments():
10+
""" Parses cmd-line arguments """
11+
parser = argparse.ArgumentParser()
12+
13+
# Mandatory arguments
14+
parser.add_argument("-t", "--token", help="Dataverse token.")
15+
parser.add_argument("-s","--server", help="Dataverse server.")
16+
parser.add_argument("-d", "--doi", help="Dataset DOI.")
17+
parser.add_argument("-r", "--repo", help="GitHub repository.")
18+
parser.add_argument("-e", "--title", help="Amended title of Dataset.")
19+
20+
# Optional arguments
21+
parser.add_argument("-i", "--dir", help="Uploads only a specific dir.")
22+
parser.add_argument(
23+
"-v", "--remove", help="Remove (delete) all files before upload.", \
24+
choices=('True', 'TRUE', 'true', 'False', 'FALSE', 'false'), \
25+
default='true')
26+
parser.add_argument(
27+
"-p", "--publish", help="Publish a new dataset version after upload.", \
28+
choices=('True', 'TRUE', 'true', 'False', 'FALSE', 'false'), \
29+
default='false')
30+
31+
args_ = parser.parse_args()
32+
return args_
33+
34+
35+
def check_dataset_lock(num):
36+
""" Gives Dataverse server more time for upload """
37+
if num <= 1:
38+
print('Lock found for dataset id ' + \
39+
str(dataset_dbid) + '\nTry again later!')
40+
return
41+
42+
query_str = dataverse_server + \
43+
'/api/datasets/' + str(dataset_dbid) + '/locks/'
44+
resp_ = requests.get(query_str, auth = (token, ""))
45+
locks = resp_.json()['data']
46+
47+
if bool(locks):
48+
print('Lock found for dataset id ' + \
49+
str(dataset_dbid) + '\n... sleeping...')
50+
sleep(2)
51+
check_dataset_lock(num-1)
52+
return
53+
54+
55+
if __name__ == '__main__':
56+
57+
args = parse_arguments()
58+
token = args.token
59+
dataverse_server = args.server.strip("/")
60+
print(f"Using Dataverse server: {dataverse_server}")
61+
62+
api = NativeApi(dataverse_server , token)
63+
resp = api.get_dataset(args.doi)
64+
resp.raise_for_status()
65+
dataset = resp
66+
67+
files_list = dataset.json()['data']['latestVersion']['files']
68+
dataset_dbid = dataset.json()['data']['id']
69+
70+
if args.remove.lower() == 'true':
71+
# the following deletes all the files in the dataset
72+
delete_api = dataverse_server + \
73+
'/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/'
74+
for f in files_list:
75+
fileid = f["dataFile"]["id"]
76+
resp = requests.delete(
77+
delete_api + str(fileid), \
78+
auth = (token , ""))
79+
80+
# check if there is a list of dirs to upload
81+
repo_root = getcwd()
82+
paths = [repo_root]
83+
if args.dir:
84+
dirs = args.dir.strip().replace(",", " ")
85+
dirs = dirs.split()
86+
paths = [join(repo_root, d) for d in dirs]
87+
88+
# the following adds all files from the repository to Dataverse
89+
for path in paths:
90+
for root, subdirs, files in walk(path):
91+
if '.git' in subdirs:
92+
subdirs.remove('.git')
93+
if '.github' in subdirs:
94+
subdirs.remove('.github')
95+
for f in files:
96+
df = Datafile()
97+
df.set({
98+
"pid" : args.doi,
99+
"filename" : f,
100+
"directoryLabel": relpath(root, start='repo'),
101+
"description" : \
102+
"Uploaded with GitHub Action from {}.".format(
103+
args.repo),
104+
})
105+
resp = api.upload_datafile(
106+
args.doi, join(root,f), df.json())
107+
print(f"Uploaded: {join(root, f)} — Status: {resp.status_code}")
108+
check_dataset_lock(5)
109+
110+
# Extract and modify the citation block
111+
full_metadata = dataset.json()["data"]["latestVersion"]["metadataBlocks"]
112+
citation_block = full_metadata["citation"]
113+
114+
# Update the title field
115+
for field in citation_block["fields"]:
116+
if field["typeName"] == "title":
117+
field["value"] = args.title
118+
119+
# Construct full metadata payload
120+
updated_metadata = {
121+
"metadataBlocks": {
122+
"citation": citation_block
123+
}
124+
}
125+
126+
# Build PUT request
127+
headers = {
128+
"Content-Type": "application/json",
129+
"X-Dataverse-key": token
130+
}
131+
url = f"{dataverse_server}/api/datasets/:persistentId/versions/:draft"
132+
params = {
133+
"persistentId": args.doi,
134+
"replace": "true"
135+
}
136+
resp = requests.put(url, headers=headers, params=params, data=json.dumps(updated_metadata))
137+
print("Metadata update response code:", resp.status_code)
138+
print("Metadata update response body:", resp.text)
139+
140+
if resp.status_code != 200:
141+
raise Exception("Failed to update metadata.")
142+
143+
if args.publish.lower() == 'true':
144+
# publish updated dataset
145+
resp = api.publish_dataset(args.doi, release_type="major")

0 commit comments

Comments
 (0)