@@ -1203,32 +1203,16 @@ def yield_works(url, simple=True):
12031203 yield item
12041204
12051205
1206- def make_xml (workpath , out_dir , save = False ):
1206+ def make_xml (workpath , out_dir , dump , save = False , test = False ):
12071207 """serializes a manifestation as XML/TEI document
12081208
12091209 :param save: if set, a XML/TEI file `{workpath}` is saved
12101210 :param type: bool
12111211
12121212 :return: A lxml.etree
12131213 """
1214- data = glob .glob (os .path .join (out_dir , workpath , "data" , "*.json" ))
1215- doc = []
1216- for x in data :
1217- try :
1218- with open (x , 'r' , encoding = 'utf8' ) as f :
1219- json_dump = json .load (f )
1220- except FileNotFoundError :
1221- print (f"file { x } not found, run get_man_json_dump() function first" )
1222- json_dump ['publicationHistory' ] = []
1223- history = glob .glob (os .path .join (out_dir , workpath , "data" , "*.json" ))
1224- for x in history :
1225- try :
1226- with open (x , 'r' , encoding = 'utf8' ) as f :
1227- json_dump ['publicationHistory' ].append (
1228- json .load (f )
1229- )
1230- except FileNotFoundError :
1231- print ("no json dump found" )
1214+ if test :
1215+ json_dump = dump
12321216 templateLoader = jinja2 .PackageLoader (
12331217 "freud_api_crawler" , "templates"
12341218 )
@@ -1239,11 +1223,38 @@ def make_xml(workpath, out_dir, save=False):
12391223 tei = ET .fromstring (tei )
12401224 transform = ET .XSLT (XSL_DOC )
12411225 tei = transform (tei )
1242- if save :
1243- signatur = json_dump ["signature" ]
1244- filename = signatur .replace ("/" , "__" )
1245- savepath = os .path .join (out_dir , workpath )
1246- with open (os .path .join (savepath , f"sfe-{ filename } .xml" ), 'wb' ) as f :
1247- f .write (ET .tostring (tei , pretty_print = True , encoding = "utf-8" ))
1248- doc .append (tei )
1249- return doc [0 ]
1226+ else :
1227+ data = glob .glob (os .path .join (out_dir , workpath , "data" , "*.json" ))
1228+ for x in data :
1229+ try :
1230+ with open (x , 'r' , encoding = 'utf8' ) as f :
1231+ json_dump = json .load (f )
1232+ except FileNotFoundError :
1233+ print (f"file { x } not found, run get_man_json_dump() function first" )
1234+ json_dump ['publicationHistory' ] = []
1235+ history = glob .glob (os .path .join (out_dir , workpath , "data" , "*.json" ))
1236+ for x in history :
1237+ try :
1238+ with open (x , 'r' , encoding = 'utf8' ) as f :
1239+ json_dump ['publicationHistory' ].append (
1240+ json .load (f )
1241+ )
1242+ except FileNotFoundError :
1243+ print ("no json dump found" )
1244+ templateLoader = jinja2 .PackageLoader (
1245+ "freud_api_crawler" , "templates"
1246+ )
1247+ templateEnv = jinja2 .Environment (loader = templateLoader )
1248+ template = templateEnv .get_template ('./tei.xml' )
1249+ tei = template .render ({"objects" : [json_dump ]})
1250+ tei = re .sub (r'\s+$' , '' , tei , flags = re .MULTILINE )
1251+ tei = ET .fromstring (tei )
1252+ transform = ET .XSLT (XSL_DOC )
1253+ tei = transform (tei )
1254+ if save :
1255+ signatur = json_dump ["signature" ]
1256+ filename = signatur .replace ("/" , "__" )
1257+ savepath = os .path .join (out_dir , workpath )
1258+ with open (os .path .join (savepath , f"sfe-{ filename } .xml" ), 'wb' ) as f :
1259+ f .write (ET .tostring (tei , pretty_print = True , encoding = "utf-8" ))
1260+ return tei
0 commit comments