Skip to content

Commit ec3d01c

Browse files
committed
merge branch 'gromacs-example' into revisions
2 parents 5c13166 + e07f599 commit ec3d01c

7 files changed

Lines changed: 1151 additions & 4 deletions

File tree

mdcrow/agent/agent.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import os
22
from datetime import datetime
33

4-
from dotenv import load_dotenv
4+
# from dotenv import load_dotenv
55
from langchain.agents import AgentExecutor, OpenAIFunctionsAgent
66
from langchain.agents.structured_chat.base import StructuredChatAgent
77

@@ -10,7 +10,7 @@
1010
from .memory import MemoryManager
1111
from .prompt import openaifxn_prompt, structured_prompt
1212

13-
load_dotenv()
13+
# load_dotenv()
1414

1515

1616
class AgentType:
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from .create_system import PrepareGromacsProteinSystemTool
2+
from .simulate import RunGromacsMDTool
3+
4+
__all__ = [
5+
"PrepareGromacsProteinSystemTool",
6+
"RunGromacsMDTool",
7+
]
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
import os
2+
import subprocess
3+
from typing import Optional
4+
5+
from langchain.tools import BaseTool
6+
from pydantic import BaseModel, Field
7+
8+
from mdcrow.utils import FileType, PathRegistry
9+
10+
11+
class PrepareGromacsProteinInput(BaseModel):
12+
protein_file_id: str = Field(
13+
..., description="File ID of the protein PDB in the path registry"
14+
)
15+
forcefield: str = Field(
16+
default="amber99sb-ildn", description="GROMACS force field name"
17+
)
18+
water_model: str = Field(default="tip3p", description="Water model")
19+
box_padding_nm: float = Field(
20+
default=1.0, description="Padding distance to box edge (nm)"
21+
)
22+
23+
24+
class PrepareGromacsProteinSystemTool(BaseTool):
25+
name = "PrepareGromacsProteinSystem"
26+
description = (
27+
"Prepare a protein-only system for GROMACS by assigning force fields, "
28+
"adding hydrogens, solvating, and neutralizing the system."
29+
)
30+
args_schema = PrepareGromacsProteinInput
31+
32+
path_registry: Optional[PathRegistry]
33+
34+
def __init__(self, path_registry: PathRegistry):
35+
super().__init__()
36+
self.path_registry = path_registry
37+
38+
def _run(
39+
self,
40+
protein_file_id: str,
41+
forcefield: str = "amber99sb-ildn",
42+
water_model: str = "tip3p",
43+
box_padding_nm: float = 1.0,
44+
) -> str:
45+
46+
pdb_path = self.path_registry.get_mapped_path(protein_file_id)
47+
if not os.path.exists(pdb_path):
48+
return f"Failed. PDB file not found for ID {protein_file_id}"
49+
50+
workdir = self.path_registry.ckpt_dir + "/gromacs_preparation"
51+
os.makedirs(workdir, exist_ok=True)
52+
os.chdir(workdir)
53+
54+
subprocess.run(
55+
[
56+
"gmx",
57+
"pdb2gmx",
58+
"-f",
59+
pdb_path,
60+
"-o",
61+
"processed.gro",
62+
"-p",
63+
"topol.top",
64+
"-ff",
65+
forcefield,
66+
"-water",
67+
water_model,
68+
"-ignh",
69+
],
70+
check=True,
71+
)
72+
73+
subprocess.run(
74+
[
75+
"gmx",
76+
"editconf",
77+
"-f",
78+
"processed.gro",
79+
"-o",
80+
"boxed.gro",
81+
"-c",
82+
"-d",
83+
str(box_padding_nm),
84+
"-bt",
85+
"cubic",
86+
],
87+
check=True,
88+
)
89+
90+
subprocess.run(
91+
[
92+
"gmx",
93+
"solvate",
94+
"-cp",
95+
"boxed.gro",
96+
"-cs",
97+
"spc216.gro",
98+
"-o",
99+
"solvated.gro",
100+
"-p",
101+
"topol.top",
102+
],
103+
check=True,
104+
)
105+
106+
with open("ions.mdp", "w") as f:
107+
f.write("integrator = steep\nnsteps = 500\n")
108+
109+
subprocess.run(
110+
[
111+
"gmx",
112+
"grompp",
113+
"-f",
114+
"ions.mdp",
115+
"-c",
116+
"solvated.gro",
117+
"-p",
118+
"topol.top",
119+
"-o",
120+
"ions.tpr",
121+
"-maxwarn",
122+
"1",
123+
],
124+
check=True,
125+
)
126+
127+
subprocess.run(
128+
[
129+
"gmx",
130+
"genion",
131+
"-s",
132+
"ions.tpr",
133+
"-o",
134+
"solv_ions.gro",
135+
"-p",
136+
"topol.top",
137+
"-neutral",
138+
],
139+
input=b"SOL\n",
140+
check=True,
141+
)
142+
coord_name = self.path_registry.write_file_name(
143+
type=FileType.RECORD,
144+
record_type="GROMACS_COORD",
145+
protein_file_id=protein_file_id,
146+
file_format="gro",
147+
)
148+
149+
top_name = self.path_registry.write_file_name(
150+
type=FileType.RECORD,
151+
record_type="GROMACS_TOP",
152+
protein_file_id=protein_file_id,
153+
file_format="top",
154+
)
155+
156+
os.rename("solv_ions.gro", coord_name)
157+
os.rename("topol.top", top_name)
158+
159+
coord_id = self.path_registry.get_fileid(coord_name, FileType.RECORD)
160+
self.path_registry.map_path(
161+
coord_id, f"{workdir}/{coord_name}", "GROMACS solvated coordinates"
162+
)
163+
164+
top_id = self.path_registry.get_fileid(top_name, FileType.RECORD)
165+
self.path_registry.map_path(
166+
top_id, f"{workdir}/{top_name}", "GROMACS topology file"
167+
)
168+
169+
return (
170+
"Succeeded. GROMACS system prepared.\n"
171+
f"Coordinates ID: {coord_id}\nTopology ID: {top_id}"
172+
)
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
import os
2+
import subprocess
3+
from typing import Optional
4+
5+
from langchain.tools import BaseTool
6+
from pydantic import BaseModel, Field
7+
8+
from mdcrow.utils import FileType, PathRegistry
9+
10+
11+
class RunGromacsMDInput(BaseModel):
12+
topology_id: str
13+
coordinates_id: str
14+
ensemble: str = Field(description="NVT or NPT")
15+
temperature: float = Field(default=300.0)
16+
nsteps: int = Field(default=500_000)
17+
18+
19+
class RunGromacsMDTool(BaseTool):
20+
name = "RunGromacsMD"
21+
description = (
22+
"Run a GROMACS MD simulation using NVT or NPT."
23+
"Requires topology (.top) and coordinate (.gro) files."
24+
)
25+
args_schema = RunGromacsMDInput
26+
27+
path_registry: Optional[PathRegistry]
28+
29+
def __init__(self, path_registry: PathRegistry):
30+
super().__init__()
31+
self.path_registry = path_registry
32+
33+
def _run(self, topology_id, coordinates_id, ensemble, temperature, nsteps):
34+
35+
top_path = self.path_registry.get_mapped_path(topology_id)
36+
gro_path = self.path_registry.get_mapped_path(coordinates_id)
37+
38+
if not os.path.exists(top_path):
39+
return f"Failed. Topology file not found: {top_path}"
40+
if not os.path.exists(gro_path):
41+
return f"Failed. Coordinate file not found: {gro_path}"
42+
43+
workdir = self.path_registry.ckpt_simulations
44+
os.chdir(workdir)
45+
46+
for stale in ["md.xtc", "md.gro"]:
47+
if os.path.exists(stale):
48+
os.remove(stale)
49+
50+
with open("minim.mdp", "w") as f:
51+
f.write("integrator = steep\nnsteps = 5000\n")
52+
53+
try:
54+
subprocess.run(
55+
[
56+
"gmx",
57+
"grompp",
58+
"-f",
59+
"minim.mdp",
60+
"-c",
61+
gro_path,
62+
"-p",
63+
top_path,
64+
"-o",
65+
"em.tpr",
66+
],
67+
check=True,
68+
capture_output=True,
69+
text=True,
70+
)
71+
subprocess.run(
72+
["gmx", "mdrun", "-deffnm", "em"],
73+
check=True,
74+
capture_output=True,
75+
text=True,
76+
)
77+
except subprocess.CalledProcessError as e:
78+
return f"Failed during energy minimization: {e.stderr}"
79+
80+
# just explicitly setting many of these. we could make this more flexible later
81+
if ensemble.upper() == "NVT":
82+
mdp = f"""
83+
integrator = md
84+
nsteps = {nsteps}
85+
dt = 0.002
86+
tcoupl = V-rescale
87+
tc-grps = Protein Non-Protein
88+
tau_t = 0.1 0.1
89+
ref_t = {temperature} {temperature}
90+
constraints = h-bonds
91+
cutoff-scheme = Verlet
92+
nstxout-compressed = 500
93+
compressed-x-grps = System
94+
"""
95+
elif ensemble.upper() == "NPT":
96+
mdp = f"""
97+
integrator = md
98+
nsteps = {nsteps}
99+
dt = 0.002
100+
tcoupl = V-rescale
101+
tc-grps = Protein Non-Protein
102+
tau_t = 0.1 0.1
103+
ref_t = {temperature} {temperature}
104+
pcoupl = Parrinello-Rahman
105+
pcoupltype = isotropic
106+
tau_p = 2.0
107+
ref_p = 1.0
108+
compressibility = 4.5e-5
109+
constraints = h-bonds
110+
cutoff-scheme = Verlet
111+
nstxout-compressed = 500
112+
compressed-x-grps = System
113+
"""
114+
else:
115+
return f"Failed. Unsupported ensemble: {ensemble}"
116+
117+
with open("md.mdp", "w") as f:
118+
f.write(mdp)
119+
120+
try:
121+
subprocess.run(
122+
[
123+
"gmx",
124+
"grompp",
125+
"-f",
126+
"md.mdp",
127+
"-c",
128+
"em.gro",
129+
"-p",
130+
top_path,
131+
"-o",
132+
"md.tpr",
133+
],
134+
check=True,
135+
capture_output=True,
136+
text=True,
137+
)
138+
subprocess.run(
139+
["gmx", "mdrun", "-deffnm", "md", "-x", "md.xtc"],
140+
check=True,
141+
capture_output=True,
142+
text=True,
143+
)
144+
except subprocess.CalledProcessError as e:
145+
return f"Failed during MD simulation: {e.stderr}"
146+
147+
traj_name = self.path_registry.write_file_name(
148+
type=FileType.RECORD,
149+
record_type="TRAJ",
150+
file_format="xtc",
151+
)
152+
gro_name = self.path_registry.write_file_name(
153+
type=FileType.RECORD,
154+
record_type="FINAL",
155+
file_format="gro",
156+
)
157+
158+
if not os.path.exists("md.xtc"):
159+
return "Failed. GROMACS did not produce md.xtc. Check md.log for errors."
160+
if not os.path.exists("md.gro"):
161+
return "Failed. GROMACS did not produce md.gro. Check md.log for errors."
162+
163+
os.rename("md.xtc", traj_name)
164+
os.rename("md.gro", gro_name)
165+
166+
traj_id = self.path_registry.get_fileid(traj_name, FileType.RECORD)
167+
self.path_registry.map_path(traj_id, f"{workdir}/{traj_name}", "MD trajectory")
168+
169+
gro_id = self.path_registry.get_fileid(gro_name, FileType.RECORD)
170+
self.path_registry.map_path(gro_id, f"{workdir}/{gro_name}", "Final structure")
171+
172+
return (
173+
f"Succeeded. MD completed using {ensemble} ensemble.\n"
174+
f"Trajectory ID: {traj_id}\nFinal structure ID: {gro_id}"
175+
)

0 commit comments

Comments
 (0)