Skip to content

Commit 2bc0f5f

Browse files
committed
Issue-80 fix? FGDC1 and FGDC2 tools write out non-ASCII characters saved in either template xml or attribute table values.
1 parent d026433 commit 2bc0f5f

File tree

2 files changed

+51
-41
lines changed

2 files changed

+51
-41
lines changed

Scripts/GeMS_FGDC1_Arc10.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
###########################################
2626
def __newElement(dom,tag,text):
2727
nd = dom.createElement(tag)
28-
ndText = dom.createTextNode(text)
28+
ndText = dom.createTextNode(str(text).decode("utf-8"))
2929
nd.appendChild(ndText)
3030
return nd
3131

@@ -89,7 +89,7 @@ def writeDomToFile(workDir,dom,fileName):
8989
outf = os.path.join(workDir,fileName)
9090

9191
with codecs.open(outf, "w", encoding="utf-8", errors="xmlcharrefreplace") as out:
92-
dom.writexml(out, addindent="")
92+
dom.writexml(out, encoding="utf-8")
9393

9494
###########################################
9595
inGdb = sys.argv[1]

Scripts/GeMS_FGDC2_Arc10.py

+49-39
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
debug = False
1111

12-
versionString = 'GeMS_FGDC2_Arc10.py, version of 4 May 2021'
12+
versionString = 'GeMS_FGDC2_Arc10.py, version of 5 October 2021'
1313
rawurl = 'https://raw.githubusercontent.com/usgs/gems-tools-arcmap/master/Scripts/GeMS_FGDC2_Arc10.py'
1414
checkVersion(versionString, rawurl, 'gems-tools-arcmap')
1515

@@ -37,7 +37,9 @@ def __appendOrReplace(rootNode,newNode,nodeTag):
3737

3838
def __newElement(dom,tag,text):
3939
nd = dom.createElement(tag)
40-
ndText = dom.createTextNode(text)
40+
# dom needs bytes. For text coming from table fields, we need to decode it from unicode to bytes in case there are non-ASCII characters
41+
# tags coming from field names will never have non-ASCII characters
42+
ndText = dom.createTextNode(str(text).decode("utf-8"))
4143
nd.appendChild(ndText)
4244
return nd
4345

@@ -339,10 +341,10 @@ def updateTableDom(dom,fc,logFile):
339341
if len(eainfo.getElementsByTagName('detailed')) == 0:
340342
#add detailed/enttyp/enttypl nodes
341343
detailed = dom.createElement('detailed')
342-
enttyp = dom.createElement('enttyp')
343-
enttypl = __newElement(dom,'enttypl',fc)
344-
enttypd = __newElement(dom,'enttypd',descText)
345-
enttypds = __newElement(dom,'enttypds',descSourceText)
344+
enttyp = dom.createElement(b'enttyp')
345+
enttypl = __newElement(dom, 'enttypl', fc)
346+
enttypd = __newElement(dom, 'enttypd', descText)
347+
enttypds = __newElement(dom, 'enttypds', descSourceText)
346348
for nd in enttypl,enttypd,enttypds:
347349
enttyp.appendChild(nd)
348350
detailed.appendChild(enttyp)
@@ -403,8 +405,11 @@ def replaceSpatialStuff(dom, arcXML):
403405
if len(spds) > 0:
404406
addMsgAndPrint(' Replacing spdoinfo and spref')
405407
__appendOrReplace(md,spds[0],'spdoinfo')
406-
spr = arcXML.getElementsByTagName('spref')[0]
407-
__appendOrReplace(md,spr,'spref')
408+
try:
409+
spr = arcXML.getElementsByTagName('spref')[0]
410+
__appendOrReplace(md,spr,'spref')
411+
except Exception as error:
412+
arcpy.AddMessage(' Spatial reference is "Unknown"')
408413
return dom
409414

410415
def replaceTitleSupplinf(objectType,aTable,gdb,dom):
@@ -427,12 +432,15 @@ def fixObjXML(objName,objType,objLoc,domMR, fdDataSourceValues=[]):
427432
arcXMLfile = wksp+'/'+objName+'.xml'
428433
testAndDelete(arcXMLfile)
429434
arcpy.ExportMetadata_conversion(objLoc,translator,arcXMLfile)
430-
#with open(xml_file) as xml:
431-
# arcXML = parse(xml)
432-
arcXML = xml.dom.minidom.parse(arcXMLfile)
435+
with open(arcXMLfile) as xml:
436+
arcXML = parse(xml)
437+
438+
#arcXML = xml.dom.minidom.parse(arcXMLfile)
433439
dom = copy.deepcopy(domMR)
440+
434441
# updateTableDom updates entity-attribute info, also returns dataSourceValues
435442
dom, dataSourceValues = updateTableDom(dom,objLoc,logFile)
443+
436444
if objType <> 'Feature dataset':
437445
# delete unused dataqual/lineage/srcinfo branches
438446
dom = pruneSrcInfo(dom,dataSourceValues)
@@ -441,14 +449,14 @@ def fixObjXML(objName,objType,objLoc,domMR, fdDataSourceValues=[]):
441449

442450
# add spdoinfo and spref from arcXML
443451
dom = replaceSpatialStuff(dom, arcXML)
452+
444453
# redo title and supplinfo
445-
#dom = replaceTitleSupplinf('Non-spatial table',objName,gdb,dom)
446454
dom = replaceTitleSupplinf(objType,objName,gdb,dom)
447455
domName = gdb[:-4]+'_'+objName+'-metadata.xml'
448456
writeDomToFile(wksp,dom,domName)
449457
if not debug:
450458
os.remove(arcXMLfile)
451-
459+
452460
return dataSourceValues
453461

454462
def writeDomToFile(workDir,dom,fileName):
@@ -484,8 +492,10 @@ def writeDomToFile(workDir,dom,fileName):
484492

485493
# read mrXML into domMR
486494
addMsgAndPrint(' Parsing '+mrXML)
487-
try:
488-
domMR = xml.dom.minidom.parse(mrXML)
495+
try:
496+
with open(mrXML) as xml:
497+
domMR = parse(xml)
498+
#domMR = xml.dom.minidom.parse(mrXML)
489499
addMsgAndPrint(' Master record parsed successfully')
490500
except:
491501
addMsgAndPrint(arcpy.GetMessages())
@@ -504,29 +514,29 @@ def writeDomToFile(workDir,dom,fileName):
504514
objLoc = inGdb+'/'+aTable
505515
fixObjXML(objName,objType,objLoc,domMR)
506516

507-
# fcs = arcpy.ListFeatureClasses()
508-
# for fc in fcs:
509-
# objName = fc
510-
# objType = 'Feature class'
511-
# objLoc = inGdb+'/'+fc
512-
# fixObjXML(objName,objType,objLoc,domMR)
513-
514-
# fds = arcpy.ListDatasets('','Feature')
515-
# for fd in fds:
516-
# arcpy.env.workspace = inGdb+'/'+fd
517-
# fcs = arcpy.ListFeatureClasses()
518-
# arcpy.env.workspace = inGdb
519-
# fdDS = [] # inventory of all DataSource_IDs used in feature dataset
520-
# for fc in fcs:
521-
# objName = fc
522-
# objType = 'Feature class'
523-
# objLoc = inGdb+'/'+fd+'/'+fc
524-
# localDS = fixObjXML(objName,objType,objLoc,domMR)
525-
# for ds in localDS:
526-
# fdDS.append(ds)
527-
# objName = fd
528-
# objType = 'Feature dataset'
529-
# objLoc = inGdb+'/'+fd
530-
# fixObjXML(objName,objType,objLoc,domMR,set(fdDS))
517+
fcs = arcpy.ListFeatureClasses()
518+
for fc in fcs:
519+
objName = fc
520+
objType = 'Feature class'
521+
objLoc = inGdb+'/'+fc
522+
fixObjXML(objName,objType,objLoc,domMR)
523+
524+
fds = arcpy.ListDatasets('','Feature')
525+
for fd in fds:
526+
arcpy.env.workspace = inGdb+'/'+fd
527+
fcs = arcpy.ListFeatureClasses()
528+
arcpy.env.workspace = inGdb
529+
fdDS = [] # inventory of all DataSource_IDs used in feature dataset
530+
for fc in fcs:
531+
objName = fc
532+
objType = 'Feature class'
533+
objLoc = inGdb+'/'+fd+'/'+fc
534+
localDS = fixObjXML(objName,objType,objLoc,domMR)
535+
for ds in localDS:
536+
fdDS.append(ds)
537+
objName = fd
538+
objType = 'Feature dataset'
539+
objLoc = inGdb+'/'+fd
540+
fixObjXML(objName,objType,objLoc,domMR,set(fdDS))
531541

532542
logFile.close()

0 commit comments

Comments
 (0)