Skip to content

Commit f41d56a

Browse files
authored
Add files via upload
hdock_batch.py
1 parent e019cdc commit f41d56a

1 file changed

Lines changed: 174 additions & 0 deletions

File tree

hdock_batch.py

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
#!/usr/bin/env python3
2+
3+
from __future__ import annotations
4+
5+
import argparse
6+
import asyncio
7+
import csv
8+
import pathlib
9+
import sys
10+
import time
11+
from typing import Dict
12+
13+
import pandas as pd
14+
from playwright.async_api import async_playwright, Page, Browser, TimeoutError as PWTimeout
15+
16+
HDOCK_URL = "http://hdock.phys.hust.edu.cn/"
17+
18+
# ───────────────────────── helper functions ──────────────────────────
19+
async def attach_file(page: Page, selector: str, file_path: pathlib.Path):
20+
await page.set_input_files(selector, file_path.as_posix())
21+
ok = await page.eval_on_selector(selector, "el => el.files.length > 0")
22+
if not ok:
23+
raise RuntimeError(f"File did not attach to {selector}")
24+
25+
26+
def pick(row: Dict[str, str], *candidates) -> str:
27+
for c in candidates:
28+
if c in row and str(row[c]).strip():
29+
return str(row[c]).strip()
30+
return ""
31+
32+
33+
async def fill_receptor_site(page: Page, residues: str):
34+
try:
35+
await page.click("#option1")
36+
except PWTimeout:
37+
pass
38+
await page.fill("input[name=sitenum1]", residues)
39+
40+
41+
async def submit_one(row: Dict[str, str], idx: int, sem: asyncio.Semaphore, pw) -> Dict[str, str]:
42+
async with sem:
43+
browser: Browser = await pw.chromium.launch(headless=True)
44+
page: Page = await browser.new_page()
45+
await page.goto(HDOCK_URL, timeout=90_000)
46+
47+
# receptor
48+
rec_path = pathlib.Path(row["receptor_pdb"]).expanduser().resolve()
49+
if not rec_path.exists():
50+
raise FileNotFoundError(f"[row {idx}] receptor_pdb not found: {rec_path}")
51+
await attach_file(page, "#pdbfile1", rec_path)
52+
53+
# ligand autodetect fasta vs path
54+
ligand_raw = pick(
55+
row,
56+
"ligand_fasta",
57+
"ligand_path",
58+
"ligand_seq",
59+
"ligand_sequence",
60+
"ligand_pdb",
61+
"ligand_file",
62+
"ligand",
63+
).strip()
64+
if not ligand_raw:
65+
raise ValueError(f"[row {idx}] Provide a ligand sequence or file path.")
66+
67+
is_fasta_text = ligand_raw.startswith(">") or "\n" in ligand_raw
68+
ligand_seq = ""
69+
ligand_file: pathlib.Path | None = None
70+
if is_fasta_text and len(ligand_raw.splitlines()) >= 2:
71+
ligand_seq = ligand_raw
72+
else:
73+
candidate = pathlib.Path(ligand_raw).expanduser().resolve()
74+
if candidate.exists():
75+
ligand_file = candidate
76+
elif is_fasta_text:
77+
ligand_seq = ligand_raw
78+
else:
79+
raise FileNotFoundError(f"[row {idx}] ligand file not found: {candidate}")
80+
if ligand_file:
81+
await attach_file(page, "#pdbfile2", ligand_file)
82+
else:
83+
await page.fill("#fastaseq2", ligand_seq)
84+
await page.select_option("#ligtyp", value="protein")
85+
86+
# optional binding site
87+
if rsite := pick(row, "receptor_site_residues"):
88+
await fill_receptor_site(page, rsite)
89+
90+
# optional email/jobname
91+
jobname = pick(row, "jobname", "name")
92+
if mail := pick(row, "email"):
93+
await page.fill("#emailaddress", mail)
94+
if jobname:
95+
await page.fill("input[name=jobname]", jobname)
96+
97+
# submit
98+
await page.click("input[name=upload]")
99+
await page.wait_for_load_state("networkidle")
100+
result_url = page.url
101+
102+
# token logic (unchanged)
103+
token = ""
104+
if "token=" in result_url:
105+
token = result_url.split("token=")[-1]
106+
else:
107+
tail = result_url.rstrip("/").split("/")[-1]
108+
if tail and len(tail) >= 8:
109+
token = tail
110+
111+
await browser.close()
112+
return {
113+
"row": idx,
114+
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
115+
"jobname": jobname,
116+
"token": token,
117+
"result_url": result_url,
118+
"ok": bool(token),
119+
"error": "" if token else "submission_failed",
120+
}
121+
122+
123+
# ───────────────────────── orchestrator function ─────────────────────
124+
async def main(args):
125+
df = pd.read_csv(args.csv).fillna("")
126+
df.columns = [c.lower() for c in df.columns]
127+
if "receptor_pdb" not in df.columns:
128+
sys.exit("CSV requires 'receptor_pdb' column.")
129+
needed = {
130+
"ligand_fasta",
131+
"ligand_path",
132+
"ligand_seq",
133+
"ligand_sequence",
134+
"ligand_pdb",
135+
"ligand_file",
136+
"ligand",
137+
}
138+
if not (needed & set(df.columns)):
139+
sys.exit("CSV needs a ligand column (sequence text or file path).")
140+
141+
out_dir = pathlib.Path(args.out).expanduser()
142+
out_dir.mkdir(parents=True, exist_ok=True)
143+
log_file = out_dir / "run-log.csv"
144+
145+
sem = asyncio.Semaphore(args.jobs)
146+
async with async_playwright() as pw:
147+
tasks = [
148+
submit_one(row, idx, sem, pw)
149+
for idx, row in enumerate(df.to_dict(orient="records"), start=1)
150+
]
151+
total = len(tasks)
152+
completed = 0
153+
header_written = False
154+
for coro in asyncio.as_completed(tasks):
155+
res = await coro
156+
completed += 1
157+
status = "OK" if res["ok"] else "FAIL"
158+
print(f"{completed}/{total} | row {res['row']} | {status:<4} | {res['result_url'] if res['ok'] else '-'}")
159+
with open(log_file, "a", newline="", encoding="utf-8") as fh:
160+
writer = csv.DictWriter(fh, fieldnames=res.keys())
161+
if not header_written:
162+
writer.writeheader()
163+
header_written = True
164+
writer.writerow(res)
165+
166+
print(f"Finished. Log saved to {log_file}")
167+
168+
169+
if __name__ == "__main__":
170+
parser = argparse.ArgumentParser(description="Batch-submit to HDOCK with live progress.")
171+
parser.add_argument("csv", help="Input CSV file")
172+
parser.add_argument("--out", default="./hdock_logs", help="Run-log directory")
173+
parser.add_argument("-j", "--jobs", type=int, default=1, help="Concurrent browsers")
174+
asyncio.run(main(parser.parse_args()))

0 commit comments

Comments
 (0)