1+ #!/usr/bin/env python3
2+
3+ from __future__ import annotations
4+
5+ import argparse
6+ import asyncio
7+ import csv
8+ import pathlib
9+ import sys
10+ import time
11+ from typing import Dict
12+
13+ import pandas as pd
14+ from playwright .async_api import async_playwright , Page , Browser , TimeoutError as PWTimeout
15+
16+ HDOCK_URL = "http://hdock.phys.hust.edu.cn/"
17+
18+ # ───────────────────────── helper functions ──────────────────────────
19+ async def attach_file (page : Page , selector : str , file_path : pathlib .Path ):
20+ await page .set_input_files (selector , file_path .as_posix ())
21+ ok = await page .eval_on_selector (selector , "el => el.files.length > 0" )
22+ if not ok :
23+ raise RuntimeError (f"File did not attach to { selector } " )
24+
25+
26+ def pick (row : Dict [str , str ], * candidates ) -> str :
27+ for c in candidates :
28+ if c in row and str (row [c ]).strip ():
29+ return str (row [c ]).strip ()
30+ return ""
31+
32+
33+ async def fill_receptor_site (page : Page , residues : str ):
34+ try :
35+ await page .click ("#option1" )
36+ except PWTimeout :
37+ pass
38+ await page .fill ("input[name=sitenum1]" , residues )
39+
40+
41+ async def submit_one (row : Dict [str , str ], idx : int , sem : asyncio .Semaphore , pw ) -> Dict [str , str ]:
42+ async with sem :
43+ browser : Browser = await pw .chromium .launch (headless = True )
44+ page : Page = await browser .new_page ()
45+ await page .goto (HDOCK_URL , timeout = 90_000 )
46+
47+ # receptor
48+ rec_path = pathlib .Path (row ["receptor_pdb" ]).expanduser ().resolve ()
49+ if not rec_path .exists ():
50+ raise FileNotFoundError (f"[row { idx } ] receptor_pdb not found: { rec_path } " )
51+ await attach_file (page , "#pdbfile1" , rec_path )
52+
53+ # ligand autodetect fasta vs path
54+ ligand_raw = pick (
55+ row ,
56+ "ligand_fasta" ,
57+ "ligand_path" ,
58+ "ligand_seq" ,
59+ "ligand_sequence" ,
60+ "ligand_pdb" ,
61+ "ligand_file" ,
62+ "ligand" ,
63+ ).strip ()
64+ if not ligand_raw :
65+ raise ValueError (f"[row { idx } ] Provide a ligand sequence or file path." )
66+
67+ is_fasta_text = ligand_raw .startswith (">" ) or "\n " in ligand_raw
68+ ligand_seq = ""
69+ ligand_file : pathlib .Path | None = None
70+ if is_fasta_text and len (ligand_raw .splitlines ()) >= 2 :
71+ ligand_seq = ligand_raw
72+ else :
73+ candidate = pathlib .Path (ligand_raw ).expanduser ().resolve ()
74+ if candidate .exists ():
75+ ligand_file = candidate
76+ elif is_fasta_text :
77+ ligand_seq = ligand_raw
78+ else :
79+ raise FileNotFoundError (f"[row { idx } ] ligand file not found: { candidate } " )
80+ if ligand_file :
81+ await attach_file (page , "#pdbfile2" , ligand_file )
82+ else :
83+ await page .fill ("#fastaseq2" , ligand_seq )
84+ await page .select_option ("#ligtyp" , value = "protein" )
85+
86+ # optional binding site
87+ if rsite := pick (row , "receptor_site_residues" ):
88+ await fill_receptor_site (page , rsite )
89+
90+ # optional email/jobname
91+ jobname = pick (row , "jobname" , "name" )
92+ if mail := pick (row , "email" ):
93+ await page .fill ("#emailaddress" , mail )
94+ if jobname :
95+ await page .fill ("input[name=jobname]" , jobname )
96+
97+ # submit
98+ await page .click ("input[name=upload]" )
99+ await page .wait_for_load_state ("networkidle" )
100+ result_url = page .url
101+
102+ # token logic (unchanged)
103+ token = ""
104+ if "token=" in result_url :
105+ token = result_url .split ("token=" )[- 1 ]
106+ else :
107+ tail = result_url .rstrip ("/" ).split ("/" )[- 1 ]
108+ if tail and len (tail ) >= 8 :
109+ token = tail
110+
111+ await browser .close ()
112+ return {
113+ "row" : idx ,
114+ "timestamp" : time .strftime ("%Y-%m-%d %H:%M:%S" ),
115+ "jobname" : jobname ,
116+ "token" : token ,
117+ "result_url" : result_url ,
118+ "ok" : bool (token ),
119+ "error" : "" if token else "submission_failed" ,
120+ }
121+
122+
123+ # ───────────────────────── orchestrator function ─────────────────────
124+ async def main (args ):
125+ df = pd .read_csv (args .csv ).fillna ("" )
126+ df .columns = [c .lower () for c in df .columns ]
127+ if "receptor_pdb" not in df .columns :
128+ sys .exit ("CSV requires 'receptor_pdb' column." )
129+ needed = {
130+ "ligand_fasta" ,
131+ "ligand_path" ,
132+ "ligand_seq" ,
133+ "ligand_sequence" ,
134+ "ligand_pdb" ,
135+ "ligand_file" ,
136+ "ligand" ,
137+ }
138+ if not (needed & set (df .columns )):
139+ sys .exit ("CSV needs a ligand column (sequence text or file path)." )
140+
141+ out_dir = pathlib .Path (args .out ).expanduser ()
142+ out_dir .mkdir (parents = True , exist_ok = True )
143+ log_file = out_dir / "run-log.csv"
144+
145+ sem = asyncio .Semaphore (args .jobs )
146+ async with async_playwright () as pw :
147+ tasks = [
148+ submit_one (row , idx , sem , pw )
149+ for idx , row in enumerate (df .to_dict (orient = "records" ), start = 1 )
150+ ]
151+ total = len (tasks )
152+ completed = 0
153+ header_written = False
154+ for coro in asyncio .as_completed (tasks ):
155+ res = await coro
156+ completed += 1
157+ status = "OK" if res ["ok" ] else "FAIL"
158+ print (f"{ completed } /{ total } | row { res ['row' ]} | { status :<4} | { res ['result_url' ] if res ['ok' ] else '-' } " )
159+ with open (log_file , "a" , newline = "" , encoding = "utf-8" ) as fh :
160+ writer = csv .DictWriter (fh , fieldnames = res .keys ())
161+ if not header_written :
162+ writer .writeheader ()
163+ header_written = True
164+ writer .writerow (res )
165+
166+ print (f"Finished. Log saved to { log_file } " )
167+
168+
169+ if __name__ == "__main__" :
170+ parser = argparse .ArgumentParser (description = "Batch-submit to HDOCK with live progress." )
171+ parser .add_argument ("csv" , help = "Input CSV file" )
172+ parser .add_argument ("--out" , default = "./hdock_logs" , help = "Run-log directory" )
173+ parser .add_argument ("-j" , "--jobs" , type = int , default = 1 , help = "Concurrent browsers" )
174+ asyncio .run (main (parser .parse_args ()))
0 commit comments