Skip to content

Commit a731734

Browse files
cyangNYUndonyapour
authored andcommitted
Add onionnet-SFCT scoring
1 parent 8e1ae21 commit a731734

8 files changed

+381
-5
lines changed

cwl_adapters/clean_smina_pdb.cwl

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#!/usr/bin/env cwl-runner
2+
cwlVersion: v1.0
3+
4+
class: CommandLineTool
5+
6+
label: Clean smina pdb file (clean the pdb and rename the resname to LIG)
7+
8+
doc: |
9+
Clean smina pdb file (clean the pdb and rename the resname to LIG)
10+
11+
baseCommand: ["python", "/clean_smina_pdb.py"]
12+
13+
hints:
14+
DockerRequirement:
15+
dockerPull: cyangnyu/clean_smina_pdb
16+
17+
requirements:
18+
InlineJavascriptRequirement: {}
19+
20+
inputs:
21+
input_pdb:
22+
label: Input pdb file
23+
type: File
24+
format:
25+
- edam:format_1476
26+
inputBinding:
27+
prefix: --input_pdb
28+
29+
output_pdb:
30+
label: Output pdb file
31+
type: string?
32+
format:
33+
- edam:format_1476
34+
inputBinding:
35+
prefix: --output_pdb
36+
37+
outputs:
38+
output_pdb:
39+
type: File
40+
format: edam:format_1476
41+
outputBinding:
42+
glob: $(inputs.output_pdb)
43+
44+
$namespaces:
45+
edam: https://edamontology.org/
46+
47+
$schemas:
48+
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl

cwl_adapters/onionnet-feature.cwl

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env cwl-runner
2+
3+
cwlVersion: v1.0
4+
class: CommandLineTool
5+
6+
label: OnionNet (version1) for feature generation of docking poses
7+
8+
baseCommand: ["python", "/onionnet/generate_features.py"]
9+
10+
hints:
11+
DockerRequirement:
12+
dockerPull: cyangnyu/onionnet
13+
14+
requirements:
15+
InlineJavascriptRequirement: {}
16+
17+
inputs:
18+
complex_path_file:
19+
label: path file of protein-ligand complexes (structures in pdb format)
20+
type: File?
21+
format:
22+
- edam:format_1476
23+
inputBinding:
24+
prefix: -inp
25+
26+
num_of_cpus:
27+
label: number of CPUs to use.
28+
type: int?
29+
format:
30+
- edam:format_2330
31+
inputBinding:
32+
prefix: -nt
33+
default: 1
34+
35+
output_feature_file:
36+
label: the output file name containing the features.
37+
type: string?
38+
format:
39+
- edam:format_3752
40+
inputBinding:
41+
prefix: -out
42+
default: "output.csv"
43+
44+
outputs:
45+
output_feature_file:
46+
type: File
47+
format: edam:format_3752
48+
outputBinding:
49+
glob: $(inputs.output_feature_file)
50+
51+
$namespaces:
52+
edam: https://edamontology.org/
53+
cwltool: http://commonwl.org/cwltool#
54+
55+
$schemas:
56+
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl

cwl_adapters/onionnet-score.cwl

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
#!/usr/bin/env cwl-runner
2+
3+
cwlVersion: v1.0
4+
class: CommandLineTool
5+
6+
label: OnionNet (version1) for rescoring of docking poses
7+
8+
baseCommand: ["python", "/onionnet/predict.py"]
9+
10+
hints:
11+
DockerRequirement:
12+
dockerPull: cyangnyu/onionnet
13+
14+
requirements:
15+
InlineJavascriptRequirement: {}
16+
17+
inputs:
18+
input_feature_file:
19+
label: feature csv file for protein-ligand complexes
20+
type: File?
21+
format:
22+
- edam:format_3752
23+
inputBinding:
24+
prefix: -fn
25+
26+
scaler:
27+
label: the standard scaler file.
28+
type: string?
29+
format:
30+
- edam:format_2330
31+
inputBinding:
32+
prefix: -scaler
33+
default: "/onionnet/models/StandardScaler.model"
34+
35+
weights:
36+
label: the trained DNN model file.
37+
type: string?
38+
format:
39+
- edam:format_2330
40+
inputBinding:
41+
prefix: -weights
42+
default: "/onionnet/models/CNN_final_model_weights.h5"
43+
44+
output_score_file:
45+
label: the predicted pKa values file
46+
type: string?
47+
format:
48+
- edam:format_3752
49+
inputBinding:
50+
prefix: -out
51+
default: "predicted_pKa.csv"
52+
53+
onionnet_score:
54+
type: string?
55+
56+
outputs:
57+
output_score_file:
58+
type: File
59+
outputBinding:
60+
glob: $(inputs.output_score_file)
61+
format: edam:format_3752
62+
63+
onionnet_score:
64+
label: Estimated Free Energy of Binding (onionnet score)
65+
doc: |-
66+
Estimated Free Energy of Binding
67+
type: float
68+
outputBinding:
69+
glob: $(inputs.output_score_file)
70+
loadContents: true
71+
outputEval: |
72+
${
73+
const lines = self[0].contents.split("\n");
74+
// The correct line should be of the form
75+
// ,pKa_predicted
76+
// /var/lib/cwl/stg19c300d1-f7fd-4a38-80d2-0f5615e3eb8f/complex_pdbs.pdb,7.441
77+
const bfe_line = lines[1];
78+
// refactor can be used to convert pKa to binding free enegy, based on deltaG = -RT*lnK
79+
const refactor = -0.73349;
80+
const docking_score_string = bfe_line.split(",").filter(function(s) {return !isNaN(parseFloat(s))})[0];
81+
const onionnet_score = parseFloat(docking_score_string)/refactor;
82+
return onionnet_score
83+
}
84+
85+
$namespaces:
86+
edam: https://edamontology.org/
87+
cwltool: http://commonwl.org/cwltool#
88+
89+
$schemas:
90+
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl

cwl_adapters/smina_docking.cwl

+12-5
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ inputs:
3333
- edam:format_3815
3434
- edam:format_3816
3535
inputBinding:
36-
position: 1
3736
prefix: -r
3837

3938
ligand_file:
@@ -50,7 +49,6 @@ inputs:
5049
- edam:format_3815
5150
- edam:format_3816
5251
inputBinding:
53-
position: 2
5452
prefix: -l
5553

5654
ligand_box:
@@ -67,14 +65,24 @@ inputs:
6765
- edam:format_3815
6866
- edam:format_3816
6967
inputBinding:
70-
position: 3
7168
prefix: --autobox_ligand
7269

70+
local_only:
71+
label: try local minimization only rather than docking
72+
type: boolean?
73+
inputBinding:
74+
prefix: --local_only
75+
76+
score_only:
77+
label: Do not do any conformational search; simply rescore.
78+
type: boolean?
79+
inputBinding:
80+
prefix: --score_only
81+
7382
scoring:
7483
label: scoring function option, default is vina, options can be (vina, vinardo, or a customized scoring function)
7584
type: string?
7685
inputBinding:
77-
position: 4
7886
prefix: --scoring
7987
default: "vina"
8088

@@ -83,7 +91,6 @@ inputs:
8391
type: string?
8492
format: edam:format_1476
8593
inputBinding:
86-
position: 5
8794
prefix: -o
8895
default: "docked.pdb"
8996

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
## Protein-ligand docking and docking poses re-ranking
2+
##
3+
## input: pdb structures from PDBbind refined dataset
4+
## output:
5+
## 1. docking poses
6+
## 2. scoring file (vina score, sfct correction, combined_score for re-ranking docking poses)
7+
8+
steps:
9+
#
10+
- extract_pdbbind_refined:
11+
in:
12+
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html
13+
# "The query() method uses a slightly modified Python syntax by default.
14+
# For example, the & and | (bitwise) operators have the precedence of their boolean cousins, and and or.
15+
# This is syntactically valid Python, however the semantics are different."
16+
query: '(Kd_Ki == "Kd") and (value < 0.000002)'
17+
# to obtain a broader experimental dGs
18+
max_row: 1
19+
convert_Kd_dG: 'True'
20+
output_txt_path: '&binding_data.txt'
21+
output_pdb_paths: '&pdbbind_pdbs'
22+
output_sdf_paths: '&pdbbind_sdfs'
23+
experimental_dGs: '&exp_dGs'
24+
25+
- fix_side_chain:
26+
scatter: [input_pdb_path]
27+
in:
28+
input_pdb_path: '*pdbbind_pdbs'
29+
output_pdb_path: '&pdbbind_pdbs.pdb'
30+
31+
- minimize_ligand_only.yml:
32+
scatter: [sdf_path]
33+
in:
34+
sdf_path: '*pdbbind_sdfs'
35+
36+
- smina_docking:
37+
scatter: [receptor_file, ligand_file, ligand_box]
38+
scatterMethod: dotproduct
39+
in:
40+
receptor_file: '*pdbbind_pdbs.pdb'
41+
ligand_file: '*ligand_min.mol2'
42+
ligand_box: '*ligand_min.mol2'
43+
scoring: 'vina'
44+
local_only: True
45+
output_dock_file: '&ligand_opt.pdb'
46+
output_path: output
47+
48+
- clean_smina_pdb:
49+
scatter: [input_pdb]
50+
in:
51+
input_pdb: '*ligand_opt.pdb'
52+
output_pdb: '&ligand_opt_clean.pdb'
53+
54+
- cat_pdb:
55+
scatter: [input_structure1, input_structure2]
56+
scatterMethod: dotproduct
57+
in:
58+
input_structure1: '*pdbbind_pdbs.pdb'
59+
input_structure2: '*ligand_opt_clean.pdb'
60+
output_structure_path: '&complex_pdbs.pdb'
61+
62+
- onionnet-feature:
63+
scatter: [complex_path_file]
64+
in:
65+
complex_path_file: '*complex_pdbs.pdb'
66+
output_feature_file: '&output_features.csv'
67+
68+
- onionnet-score:
69+
scatter: [input_feature_file]
70+
in:
71+
input_feature_file: '*output_features.csv'
72+
output_score_file: '&predicted_pKa.csv'
73+
onionnet_score: '&onionnet_score'
74+
75+
- scatter_plot:
76+
in:
77+
xs: '*exp_dGs'
78+
ys: '*onionnet_score'
79+
80+
wic:
81+
graphviz:
82+
label: Protein-ligand docking (Smina) and docking poses re-ranking (OnionNet-sfct)
83+
steps:
84+
(1, extract_pdbbind_refined):
85+
wic:
86+
graphviz:
87+
label: extract protein-ligand structure (protein.pdb and ligand.sdf) from pdbbind_refined dataset
88+
(2, fix_side_chain):
89+
wic:
90+
graphviz:
91+
label: fix_side_chain of protein structure.
92+
(3, minimize_ligand_only.yml):
93+
wic:
94+
inlineable: False
95+
graphviz:
96+
label: minimize (obminimize) ligand structure.
97+
(4, smina_docking):
98+
wic:
99+
graphviz:
100+
label: Smina docking (flexible ligand - rigid protein docking)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
FROM python
2+
3+
RUN apt-get update && apt-get install -y wget
4+
RUN apt-get clean
5+
6+
COPY clean_smina_pdb.py /

examples/scripts/Dockerfile_onionnet

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
FROM condaforge/mambaforge
2+
# NOT mambaforge-pypy3 (pandas & rdkit & mdtraj are incompatible with pypy)
3+
4+
# Install requirements
5+
RUN apt-get update && apt-get install -y wget git
6+
7+
# Create environment
8+
# Since python 3.10 is already installed in the base image condaforge/mambaforge,
9+
# if not specify the python version requirement, python version will has conflict with the openbabel <3.0.
10+
#0 23.40 Pinned packages:
11+
#0 23.40 - python 3.10.*
12+
#0 23.40 The following packages are incompatible
13+
#0 23.40 └─ openbabel <3.0 is installable with the potential options
14+
#0 23.40 ├─ openbabel 2.4.1 would require
15+
#0 23.40 │ └─ python >=2.7,<2.8.0a0 , which can be installed;
16+
#0 23.40 ├─ openbabel 2.4.1 would require
17+
#0 23.40 │ └─ python >=3.6,<3.7.0a0 , which can be installed;
18+
#0 23.40 └─ openbabel 2.4.1 would require
19+
#0 23.40 └─ python >=3.7,<3.8.0a0 , which can be installed.
20+
# So, explicitly downgrade to python=3.7.*
21+
RUN mamba install -c conda-forge "python=3.7.*" "openbabel<3.0" numpy pandas mdtraj biopandas tensorflow -y
22+
# /opt/conda/lib/python3.7/site-packages/sklearn/externals/joblib/__init__.py:15:
23+
# FutureWarning: sklearn.externals.joblib is deprecated in 0.21 and will be removed in 0.23.
24+
RUN pip install -U "scikit-learn<0.23" rdkit-pypi
25+
26+
# cleanup
27+
RUN apt-get clean
28+
RUN mamba clean --all --yes
29+
RUN pip cache purge
30+
31+
# Install onionnet
32+
RUN git clone https://github.com/cyangNYU/onionnet.git
33+
WORKDIR /onionnet
34+
35+
# Download models
36+
## the default model of onionnet-v1 in github repo is not correct, the actually size is around 600 MB.
37+
## The authors provided a google drive link to download it,
38+
## but their command wget "https://drive.google.com/uc?export=download&id=1cwJN44TgaVBWYEEb_SGU5JBJp6WbFdM1" -O "CNN_final_model_weights.h5" is not working.
39+
RUN cd models && rm CNN_final_model_weights.h5 && wget https://huggingface.co/cyangNYU/onionnet-v1/resolve/main/CNN_final_model_weights.h5
40+
ADD Dockerfile_onionnet .

0 commit comments

Comments
 (0)