Skip to content

Commit b5aca4e

Browse files
committed
Updated run_mermaid script
1 parent 4e378c1 commit b5aca4e

6 files changed

Lines changed: 146 additions & 26 deletions

File tree

pyproject.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ full = ["visualheist", "dataraider", "kgwizard"]
3636
[project.scripts]
3737
dataraider = "scripts.run_dataraider:main"
3838
visualheist = "scripts.run_visualheist:main"
39-
mermaid = "scripts.run_mermaid:main"
4039
kgwizard = "src.kgwizard.__main__:main"
40+
mermaid = "scripts.run_mermaid:main"
4141

4242
[tool.setuptools]
4343
# Tell setuptools to look in BOTH src/ and the current directory (where "scripts" is).
@@ -49,7 +49,8 @@ MERMaid = [
4949
"kgwizard/**/*.py",
5050
"visualheist/**/*.py",
5151
"kgwizard/prompt/assets/**/*",
52-
"kgwizard/graphdb/schemas/**/*"
52+
"kgwizard/graphdb/schemas/**/*",
53+
"scripts/startup.json"
5354
]
5455

5556
[tool.setuptools.exclude-package-data]

scripts/run_dataraider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def main():
5656
config = load_config(args.config)
5757
else:
5858
package_dir = os.path.dirname(os.path.dirname(__file__))
59-
config_path = os.path.join(package_dir, 'startup.json')
59+
config_path = os.path.join(package_dir, 'scripts/startup.json')
6060
config = load_config(config_path) if os.path.exists(config_path) else {}
6161

6262
prompt_dir = config.get('prompt_dir', "./Prompts")

scripts/run_mermaid.py

Lines changed: 115 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,37 @@
1+
import argparse
2+
from pathlib import Path
3+
import json
14
import subprocess
5+
import os
6+
from enum import auto, StrEnum
27

3-
def run_subprocess(module_name):
8+
from shutil import copyfile
9+
10+
SCRIPT_PATH = Path(os.path.abspath(__file__))
11+
CFG_PATH = SCRIPT_PATH.parent / "startup.json"
12+
13+
class Commands(StrEnum):
14+
RUN = auto()
15+
CFG = auto()
16+
17+
def run_subprocess(module_name, opt_args=None, python=True):
418
"""
519
Runs a Python script as a subprocess.
620
721
:param module_name: Name of the module to run.
822
:type module_name: str
9-
:param args: List of command-line arguments to pass to the script (optional).
10-
:type args: list, optional
23+
:param opt_args: List of command-line arguments to pass to the script (optional).
24+
:type opt_args: list, optional
25+
:param python: Wether or not use python to execute the script
26+
:type python: bool, optional
1127
:return: None
1228
"""
13-
cmd = ["python", module_name]
29+
if python:
30+
cmd = ["python", module_name]
31+
else:
32+
cmd = [module_name]
33+
if opt_args:
34+
cmd += opt_args
1435
result = subprocess.run(cmd, capture_output=True, text=True)
1536

1637
print(f"\n===== {module_name} Output =====\n")
@@ -20,20 +41,103 @@ def run_subprocess(module_name):
2041
print(f"\n===== {module_name} Errors =====\n")
2142
print(result.stderr)
2243

44+
45+
def load_json_config(json_path):
46+
"""Load argument settings from a JSON file."""
47+
with open(json_path, 'r') as f:
48+
return json.load(f)
49+
50+
51+
def json_to_arg_list(config):
52+
"""Convert JSON config to a list mimicking CLI arguments."""
53+
arg_list = []
54+
for key, value in config.items():
55+
key_arg = f"--{key}" # Convert to argparse format
56+
if isinstance(value, list): # Handle list arguments
57+
arg_list.extend([key_arg] + [str(v) for v in value])
58+
elif isinstance(value, bool): # Handle boolean flags
59+
if value:
60+
arg_list.append(key_arg)
61+
else: # Normal key-value pairs
62+
arg_list.extend([key_arg, str(value)])
63+
return arg_list
64+
65+
66+
def build_main_argparser() -> argparse.ArgumentParser:
67+
main_parser = argparse.ArgumentParser(description="Mermad runs.")
68+
subparsers = main_parser.add_subparsers(
69+
title="Commands",
70+
description="Available commands",
71+
help="Description",
72+
dest="command",
73+
required=True
74+
)
75+
subparsers.required = True
76+
77+
run_parser = subparsers.add_parser(
78+
Commands.RUN,
79+
help="Run mermad pipeline"
80+
)
81+
82+
run_parser.add_argument(
83+
"-c", "--config",
84+
type=Path,
85+
default=CFG_PATH,
86+
help="Path to the configuration file"
87+
)
88+
89+
cfg_parser = subparsers.add_parser(
90+
Commands.CFG,
91+
help="Output a configuration file"
92+
)
93+
94+
cfg_parser.add_argument(
95+
"out_location",
96+
type=Path,
97+
help="Path to the configuration file"
98+
)
99+
100+
return main_parser
101+
102+
def exec_cfg(args):
103+
copyfile(CFG_PATH, args.out_location)
104+
105+
def exec_run(args):
106+
cfg = load_json_config(args.config)
107+
kgwizard_args = [
108+
"transform",
109+
cfg["json_dir"],
110+
"--output_dir", cfg["json_dir"] + "/results/",
111+
"--output_file", cfg["graph_dir"] + f"/{cfg["kgwizard"]["graph_name"]}.graphml",
112+
]
113+
kgwizard_args += json_to_arg_list(cfg["kgwizard"])
114+
115+
116+
117+
# print("\n### Running VisualHeist ###\n")
118+
# run_subprocess("scripts/run_visualheist.py")
119+
120+
# print("\n### Running DataRaider ###\n")
121+
# run_subprocess("scripts/run_dataraider.py")
122+
123+
print("\n### Running KGWizard ###\n")
124+
run_subprocess("kgwizard", kgwizard_args, python=False)
125+
126+
23127
def main():
24128
"""
25129
Runs VisualHeist, DataRaider and KGWizard sequentially.
26130
27131
:return: None
28132
"""
29-
print("\n### Running VisualHeist ###\n")
30-
run_subprocess("scripts/run_visualheist.py")
31-
32-
print("\n### Running DataRaider ###\n")
33-
run_subprocess("scripts/run_dataraider.py")
133+
parser = build_main_argparser()
134+
args = parser.parse_args()
34135

35-
# print("\n### Running KGWizard ###\n")
36-
# run_subprocess("scripts/run_kgwizard.py")
136+
match args.command:
137+
case Commands.RUN:
138+
exec_run(args)
139+
case Commands.CFG:
140+
exec_cfg(args)
37141

38142
if __name__ == "__main__":
39143
main()

scripts/run_visualheist.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def main():
4343

4444
else:
4545
package_dir = os.path.dirname(os.path.dirname(__file__))
46-
config_path = os.path.join(package_dir, 'startup.json')
46+
config_path = os.path.join(package_dir, 'scripts/startup.json')
4747
config = load_config(config_path) if os.path.exists(config_path) else {}
4848

4949
pdf_dir = args.pdf_dir or config.get('pdf_dir', "./pdfs")
@@ -60,4 +60,4 @@ def main():
6060
batch_pdf_to_figures_and_tables(pdf_dir, image_dir, large_model=model_size)
6161

6262
if __name__ == "__main__":
63-
main()
63+
main()

startup.json renamed to scripts/startup.json

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,15 @@
1515

1616
"default_image_dir": "Results/extracted_images/",
1717
"default_json_dir": "Results/jsons/",
18-
"default_graph_dir": "Results/graphs/"
19-
}
18+
"default_graph_dir": "Results/graphs/",
19+
20+
"kgwizard": {
21+
"address": "ws://localhost",
22+
"port": 8182,
23+
"graph_name": "g",
24+
"schema": "echem",
25+
"dynamic_start": 1,
26+
"dynamic_steps": 5,
27+
"dynamic_max_workers": 15
28+
}
29+
}

src/kgwizard/__main__.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,14 @@ def build_janus_argparser():
141141
Defaults to echem"""
142142
)
143143

144+
145+
parser.add_argument(
146+
"-of", "--output_file",
147+
type=Path,
148+
help=""""If set, save the generated graph into the specified file after
149+
updating the database."""
150+
)
151+
144152
return parser
145153

146154

@@ -160,13 +168,6 @@ def build_parser_argparser():
160168
help="Folder where the JSON files from transform are stored."
161169
)
162170

163-
parser.add_argument(
164-
"-of", "--output_file",
165-
type=Path,
166-
help=""""If set, save the graph into the specified file after updating
167-
the database."""
168-
)
169-
170171
return parser
171172

172173

@@ -838,10 +839,14 @@ def exec_transform(
838839

839840
# Create output directiory
840841
args.output_dir.mkdir(parents=True, exist_ok=True)
842+
if args.substitutions is not None:
843+
subs = dict(args.substitutions)
844+
else:
845+
subs = None
841846

842847
exec_fn_args = {
843848
"results_path": args.output_dir
844-
, "substitutions": dict(args.substitutions)
849+
, "substitutions": subs
845850
, "address": args.address
846851
, "port": args.port
847852
, "graph_name": args.graph_name

0 commit comments

Comments
 (0)