11import os
2+ import re
23
34def parse_description_file (file_path ):
45 """Parses a DESCRIPTION file and returns a dictionary of key-value pairs."""
@@ -33,19 +34,84 @@ def parse_description_file(file_path):
3334 # Save the last key-value pair
3435 if current_key :
3536 parsed_data [current_key ] = "\n " .join (current_value ).strip ()
37+ print (parsed_data )
38+
39+ # Parse the Authors@R part if it is present
40+ if 'Authors@R' in parsed_data :
41+ print ("Parsing Authors@R REGEX" )
42+ authors_string = parsed_data ['Authors@R' ]
43+ authors_string = authors_string .replace ('\n ' , "" )
44+ authors_string = authors_string .replace (" " , "" )
45+ authors_string = authors_string .replace ("c(person(" , "" )
46+ authors_string = authors_string [:- 2 ]
47+ temp = authors_string .split ("),person(" )
48+
49+ parsed_data ["Authors" ] = []
50+
51+ for this_author in temp :
52+ print (this_author )
53+ # Email
54+ email = re .findall ("email=\" ([\w\-\.]+@[\w\-\.]+\.+[\w\-]{2,4})\" " , this_author )
55+ if len (email ):
56+ this_author = re .sub ("email=\" ([\w\-\.]+@[\w\-\.]+\.+[\w\-]{2,4})\" " ,"" , this_author )
57+ email = email [0 ]
58+ print (email )
59+ else :
60+ email = ""
61+
62+ #ORCID
63+ orcid = re .findall ("comment=c\(ORCID=\" ([0-9]+-[0-9]+-[0-9]+-[0-9]+)\" \)" , this_author )
64+ if len (orcid ):
65+ this_author = re .sub ("comment=c\(ORCID=\" ([0-9]+-[0-9]+-[0-9]+-[0-9]+)\" \)" ,"" , this_author )
66+ orcid = orcid [0 ]
67+ print (orcid )
68+ else :
69+ orcid = ""
70+
71+ #Role
72+ role = re .findall ("role=c?\(?([\" \w+\" ,?]+)\)?" , this_author )
73+ this_author = re .sub ("role=c?\(?([\" \w+\" ,?]+)\)?" ,"" , this_author )
74+ print (role )
75+
76+ # The only thing left should be the name, either with or without given/family
77+ this_author = this_author .replace (",," ,"," )
78+ temp = this_author .split ("," )
79+ given = temp [0 ]
80+ given = given .replace ("given=" ,"" ).replace ('"' ,"" )
81+ print (given )
82+ family = temp [1 ]
83+ family = family .replace ("family=" ,"" ).replace ('"' ,"" )
84+ print (family )
85+
86+ parsed_data ["Authors" ].append ({"given" :given , "family" :family , "orcid" :orcid , "role" :role , "email" :email })
87+
88+ # Create a maintainer_string out of the relevant author
89+ if "cre" in str (role ):
90+ print ("MAINTAINER" )
91+ parsed_data ["maintainer_string" ] = f"""{ given } { family } ({ email } )"""
92+
93+ elif 'Maintainer' in parsed_data :
94+ # The old way was to have an explicit Maintainer item.
95+ parsed_data ['maintainer_string' ] = parsed_data ['Maintainer' ].replace ("<" ,"" ).replace (">" ,"" )
3696
3797 return parsed_data
3898
3999
40100def main ():
41- input_file = "DESCRIPTION.txt " # Path to the DESCRIPTION file
101+ input_file = "./DESCRIPTION " # Path to the DESCRIPTION file
42102
43103 # Parse the DESCRIPTION file
44104 parsed_data = parse_description_file (input_file )
45105
106+ print (parsed_data )
107+
46108 print (parsed_data ['Title' ])
47109 print (parsed_data ['Description' ])
110+ print (parsed_data ['Authors@R' ])
111+ print ("############" )
48112
113+ print (parsed_data )
114+
49115
50116if __name__ == "__main__" :
51117 main ()
0 commit comments