Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 114 additions & 77 deletions xmltojson.py
Original file line number Diff line number Diff line change
@@ -1,102 +1,139 @@
#!/usr/bin/python
''' Make me a module
'''

import argparse
import datetime
import json
import os
import stat
from xml.dom import minidom
import json,re

# jsonwalker class copied from src/xmljson.js from citeproc-js project
# https://bitbucket.org/fbennett/citeproc-js
class jsonwalker:

def __init__(self):
pass

def makedoc(self,xmlstring):
#xmlstring = re.sub("(?ms)^<\?[^>]*\?>","",xmlstring);
dom = minidom.parseString(xmlstring)


class JsonWalker:

@staticmethod
def make_doc(xml_string):
dom = minidom.parseString(xml_string)
return dom.documentElement

def walktojson(self, elem):
obj = {}
obj["name"] = elem.nodeName
obj["attrs"] = {}
def walk_to_json(self, elem):
obj = {
"name": elem.nodeName,
"attrs": {},
"children": [],
}

if elem.attributes:
for key in elem.attributes.keys():
obj["attrs"][key] = elem.attributes[key].value
obj["children"] = []

if len(elem.childNodes) == 0 and elem.nodeName == "term":
obj["children"] = [""]

for child in elem.childNodes:
if child.nodeName == "#comment":
pass
elif child.nodeName == "#text":
if len(elem.childNodes) == 1 and elem.nodeName in ["term","single","multiple"]:
if len(elem.childNodes) == 1 and elem.nodeName in ["term", "single", "multiple"]:
obj["children"].append(child.wholeText)
else:
obj["children"].append(self.walktojson(child))
obj["children"].append(self.walk_to_json(child))

return obj

if __name__ == "__main__":
#convert file or directory from csl xml to json
#usage:
# convert all styles in ./csl that have been modified in the last 5 minutes and place them into ./csljson
# xmltojson.py --changed 300 ./csl ./csljson
import sys,os,argparse,datetime
from stat import *

parser = argparse.ArgumentParser(description='Convert xml to json for use with citeproc-js')
parser.add_argument('source', type=str, help='source file or directory')
parser.add_argument('dest', type=str, help='destination filename or directory')
parser.add_argument('--changed', nargs='?', metavar="N", type=int, default=0, help='convert files that have been modified within the last <N> seconds')

def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description='Convert xml to json for use with citeproc-js.',
epilog='''Examples:

Convert all styles in ./csl that have been modified in the last 5 minutes and place them into ./csljson

./xmltojson.py --changed 300 ./csl ./csljson
'''
)
parser.add_argument(
'source',
type=str,
help='Source file or directory.'
)
parser.add_argument(
'dest',
type=str,
help='Destination file or directory.'
)
parser.add_argument(
'--changed',
nargs='?',
metavar="N",
type=int,
default=0,
help='Convert files that have been modified within the last <N> seconds.'
)
parser.add_argument(
'-v', '--verbose',
action='store_true',
help='Show verbose progress output.'
)

args = parser.parse_args()
w = jsonwalker()

verbose = args.verbose
mode = os.stat(args.source).st_mode
if S_ISDIR(mode):
if stat.S_ISDIR(mode):
# It's a directory, convert all csl files inside
sourceDir = args.source
destDir = args.dest
if not os.path.exists(destDir):
os.mkdir(destDir)
directory = True
elif S_ISREG(mode):
if not os.path.exists(args.dest):
os.mkdir(args.dest)
convert_directory(
source_dir=args.source,
dest_dir=args.dest,
changed=args.changed,
verbose=verbose,
)
elif stat.S_ISREG(mode):
# It's a file, only convert this csl file
sourceFile = args.source
destFile = args.dest
singleFile = True
convert_file(
source_file=args.source,
dest_file=args.dest,
verbose=verbose,
)
else:
print("unknown file mode")
sys.exit(1)

if directory:
changedCutoff = datetime.datetime.now() - datetime.timedelta(seconds=args.changed)
names = os.listdir(args.source)
for name in names:
if name[-4:] == '.csl':
fullname = os.path.join(sourceDir, name)
newname = os.path.join(destDir, name)
elif name[-4:] == '.xml':
fullname = os.path.join(sourceDir, name)
newname = os.path.join(destDir, name)[0:-3] + 'json'
else:
raise RuntimeError("Unknown file mode.")


def convert_file(source_file, dest_file, verbose):
if source_file[-4:] != '.csl':
raise RuntimeError("Unexpected file extension")

if verbose:
print("Converting " + source_file + " to " + dest_file)

w = JsonWalker()
doc = w.make_doc(open(source_file).read())
obj = w.walk_to_json(doc)
with open(dest_file, 'w') as f:
f.write(json.dumps(obj, indent=2))


def convert_directory(source_dir, dest_dir, changed, verbose):
changed_cutoff = datetime.datetime.now() - datetime.timedelta(seconds=changed)
for name in os.listdir(source_dir):
if name[-4:] == '.csl':
source_file = os.path.join(source_dir, name)
dest_file = os.path.join(dest_dir, name)
elif name[-4:] == '.xml':
source_file = os.path.join(source_dir, name)
dest_file = os.path.join(dest_dir, name)[0:-3] + 'json'
else:
continue
if changed != 0:
modified = datetime.datetime.fromtimestamp(os.stat(source_file).st_mtime)
if modified < changed_cutoff:
# not modified recently enough; continue without converting
continue
if args.changed != 0:
modified = datetime.datetime.fromtimestamp(os.stat(fullname).st_mtime)
if modified < changedCutoff:
#not modified recently enough; continue without converting
continue

print("converting " + fullname + " to " + newname)
doc = w.makedoc(open(fullname).read())
obj = w.walktojson(doc)
open(newname, 'w').write(json.dumps(obj,indent=2))
elif singleFile:
if sourceFile[-4:] != '.csl':
print("Unexpected file extension")
sys.exit(2)
print("converting " + sourceFile + " to " + destFile)
doc = w.makedoc(open(sourceFile).read())
obj = w.walktojson(doc)
open(destFile, 'w').write(json.dumps(obj,indent=2))

convert_file(source_file, dest_file, verbose)


if __name__ == "__main__":
main()