1+ import argparse
2+ from time import sleep
3+ from os .path import join , relpath
4+ from os import walk , getcwd
5+ import requests , json
6+ from pyDataverse .api import NativeApi
7+ from pyDataverse .models import Datafile
8+
9+ def parse_arguments ():
10+ """ Parses cmd-line arguments """
11+ parser = argparse .ArgumentParser ()
12+
13+ # Mandatory arguments
14+ parser .add_argument ("-t" , "--token" , help = "Dataverse token." )
15+ parser .add_argument ("-s" ,"--server" , help = "Dataverse server." )
16+ parser .add_argument ("-d" , "--doi" , help = "Dataset DOI." )
17+ parser .add_argument ("-r" , "--repo" , help = "GitHub repository." )
18+ parser .add_argument ("-e" , "--title" , help = "Amended title of Dataset." )
19+
20+ # Optional arguments
21+ parser .add_argument ("-i" , "--dir" , help = "Uploads only a specific dir." )
22+ parser .add_argument (
23+ "-v" , "--remove" , help = "Remove (delete) all files before upload." , \
24+ choices = ('True' , 'TRUE' , 'true' , 'False' , 'FALSE' , 'false' ), \
25+ default = 'true' )
26+ parser .add_argument (
27+ "-p" , "--publish" , help = "Publish a new dataset version after upload." , \
28+ choices = ('True' , 'TRUE' , 'true' , 'False' , 'FALSE' , 'false' ), \
29+ default = 'false' )
30+
31+ args_ = parser .parse_args ()
32+ return args_
33+
34+
35+ def check_dataset_lock (num ):
36+ """ Gives Dataverse server more time for upload """
37+ if num <= 1 :
38+ print ('Lock found for dataset id ' + \
39+ str (dataset_dbid ) + '\n Try again later!' )
40+ return
41+
42+ query_str = dataverse_server + \
43+ '/api/datasets/' + str (dataset_dbid ) + '/locks/'
44+ resp_ = requests .get (query_str , auth = (token , "" ))
45+ locks = resp_ .json ()['data' ]
46+
47+ if bool (locks ):
48+ print ('Lock found for dataset id ' + \
49+ str (dataset_dbid ) + '\n ... sleeping...' )
50+ sleep (2 )
51+ check_dataset_lock (num - 1 )
52+ return
53+
54+
55+ if __name__ == '__main__' :
56+
57+ args = parse_arguments ()
58+ token = args .token
59+ dataverse_server = args .server .strip ("/" )
60+ print (f"Using Dataverse server: { dataverse_server } " )
61+
62+ api = NativeApi (dataverse_server , token )
63+ resp = api .get_dataset (args .doi )
64+ resp .raise_for_status ()
65+ dataset = resp
66+
67+ files_list = dataset .json ()['data' ]['latestVersion' ]['files' ]
68+ dataset_dbid = dataset .json ()['data' ]['id' ]
69+
70+ if args .remove .lower () == 'true' :
71+ # the following deletes all the files in the dataset
72+ delete_api = dataverse_server + \
73+ '/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/'
74+ for f in files_list :
75+ fileid = f ["dataFile" ]["id" ]
76+ resp = requests .delete (
77+ delete_api + str (fileid ), \
78+ auth = (token , "" ))
79+
80+ # check if there is a list of dirs to upload
81+ repo_root = getcwd ()
82+ paths = [repo_root ]
83+ if args .dir :
84+ dirs = args .dir .strip ().replace ("," , " " )
85+ dirs = dirs .split ()
86+ paths = [join (repo_root , d ) for d in dirs ]
87+
88+ # the following adds all files from the repository to Dataverse
89+ for path in paths :
90+ for root , subdirs , files in walk (path ):
91+ if '.git' in subdirs :
92+ subdirs .remove ('.git' )
93+ if '.github' in subdirs :
94+ subdirs .remove ('.github' )
95+ for f in files :
96+ df = Datafile ()
97+ df .set ({
98+ "pid" : args .doi ,
99+ "filename" : f ,
100+ "directoryLabel" : relpath (root , start = 'repo' ),
101+ "description" : \
102+ "Uploaded with GitHub Action from {}." .format (
103+ args .repo ),
104+ })
105+ resp = api .upload_datafile (
106+ args .doi , join (root ,f ), df .json ())
107+ print (f"Uploaded: { join (root , f )} — Status: { resp .status_code } " )
108+ check_dataset_lock (5 )
109+
110+ # Extract and modify the citation block
111+ full_metadata = dataset .json ()["data" ]["latestVersion" ]["metadataBlocks" ]
112+ citation_block = full_metadata ["citation" ]
113+
114+ # Update the title field
115+ for field in citation_block ["fields" ]:
116+ if field ["typeName" ] == "title" :
117+ field ["value" ] = args .title
118+
119+ # Construct full metadata payload
120+ updated_metadata = {
121+ "metadataBlocks" : {
122+ "citation" : citation_block
123+ }
124+ }
125+
126+ # Build PUT request
127+ headers = {
128+ "Content-Type" : "application/json" ,
129+ "X-Dataverse-key" : token
130+ }
131+ url = f"{ dataverse_server } /api/datasets/:persistentId/versions/:draft"
132+ params = {
133+ "persistentId" : args .doi ,
134+ "replace" : "true"
135+ }
136+ resp = requests .put (url , headers = headers , params = params , data = json .dumps (updated_metadata ))
137+ print ("Metadata update response code:" , resp .status_code )
138+ print ("Metadata update response body:" , resp .text )
139+
140+ if resp .status_code != 200 :
141+ raise Exception ("Failed to update metadata." )
142+
143+ if args .publish .lower () == 'true' :
144+ # publish updated dataset
145+ resp = api .publish_dataset (args .doi , release_type = "major" )
0 commit comments