Skip to content

Commit a1de187

Browse files
Brandon Duane WalkerBrandon Duane Walker
authored andcommitted
extract protein and ligand in same workflow step
1 parent e2a40b8 commit a1de187

File tree

9 files changed

+141
-86
lines changed

9 files changed

+141
-86
lines changed

.github/workflows/docker_build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
rename_residues_mol, combine_structure,
2727
remove_terminal_residue_name_prefixes, molgan,
2828
pdbbind_refined, onionnet-sfct, smina, pdbfixer,
29-
fix_pdb_atom_column, extract_protein, generate_conformers] # No username for pdbind_refined
29+
fix_pdb_atom_column, extract_ligand_protein, generate_conformers] # No username for pdbind_refined
3030
# skip data/ and cwl_adapters/file_format_conversions/biosimspace/
3131
runs-on: [ubuntu-latest]
3232

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,16 @@ cwlVersion: v1.0
33

44
class: CommandLineTool
55

6-
label: A tool that employs OpenMM to extract protein from a PDB file
6+
label: A tool that employs OpenMM to extract ligands and protein from a PDB file
77

88
doc: |-
9-
A tool that employs OpenMM to extract protein from a PDB file
9+
A tool that employs OpenMM to extract ligands and protein from a PDB file
1010

11-
baseCommand: ['python', '/extract_protein.py']
11+
baseCommand: ['python', '/extract_ligand_protein.py']
1212

1313
hints:
1414
DockerRequirement:
15-
dockerPull: ndonyapour/extract_protein
15+
dockerPull: mrbrandonwalker/extract_ligand_protein
1616

1717
inputs:
1818
input_pdb_path:
@@ -44,6 +44,20 @@ inputs:
4444
prefix: --output_pdb_path
4545
default: system.pdb
4646

47+
output_pdb_ligand_path:
48+
label: Output pdb ligand file path
49+
doc: |-
50+
Output pdb ligand file path
51+
Type: string
52+
File type: output
53+
Accepted formats: sdf
54+
type: string
55+
format:
56+
- edam:format_1476
57+
inputBinding:
58+
prefix: --output_pdb_ligand_path
59+
default: ligand_system.pdb
60+
4761
outputs:
4862
output_pdb_path:
4963
label: Output pdb file path
@@ -54,6 +68,16 @@ outputs:
5468
glob: $(inputs.output_pdb_path)
5569
format: edam:format_1476
5670

71+
output_pdb_ligand_path:
72+
label: Output ligand pdb file path
73+
doc: |-
74+
Output ligand pdb file path
75+
Use optional File? since ligand may not exist in complex
76+
type: File?
77+
outputBinding:
78+
glob: $(inputs.output_pdb_ligand_path)
79+
format: edam:format_1476
80+
5781
$namespaces:
5882
edam: https://edamontology.org/
5983

docker/dockerBuild.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ sudo docker build --no-cache --pull -f Dockerfile_molgan -t ndonyapour/molgan .
3333
sudo docker build --no-cache --pull -f Dockerfile_onionnet-sfct -t cyangnyu/onionnet-sfct .
3434
sudo docker build --no-cache --pull -f Dockerfile_smina -t cyangnyu/smina .
3535
sudo docker build --no-cache --pull -f Dockerfile_pdbfixer -t ndonyapour/pdbfixer .
36-
sudo docker build --no-cache --pull -f Dockerfile_extract_protein -t ndonyapour/extract_protein .
36+
sudo docker build --no-cache --pull -f Dockerfile_extract_ligand_protein -t mrbrandonwalker/extract_ligand_protein .
3737
sudo docker build --no-cache --pull -f Dockerfile_fix_pdb_atom_column -t ndonyapour/fix_pdb_atom_column .
3838
sudo docker build --no-cache --pull -f Dockerfile_generate_conformers -t ndonyapour/generate_conformers .
3939

dockerPull.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,6 @@ docker pull ndonyapour/combine_structure
2727
docker pull mrbrandonwalker/diffdock_gpu
2828
docker pull mrbrandonwalker/diffdock_cpu
2929
docker pull ndonyapour/pdbfixer
30-
docker pull ndonyapour/extract_protein
30+
docker pull mrbrandonwalker/extract_ligand_protein
3131
docker pull ndonyapour/fix_pdb_atom_column
3232
docker pull ndonyapour/generate_conformers

examples/docking/vs_demo_4.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ steps:
1414
output_sdf_paths: '&pdbbind_sdfs'
1515
experimental_dGs: '&exp_dGs'
1616
pdb_ids: '&pdbids'
17-
17+
1818
- config_tag_pdb:
1919
scatter: [pdb_id]
2020
in:
@@ -39,11 +39,12 @@ steps:
3939
scatter: [input_pdb_path, input_helper_pdb_path]
4040
scatterMethod: dotproduct
4141

42-
- extract_protein:
42+
- extract_ligand_protein:
4343
scatter: [input_pdb_path]
4444
in:
4545
input_pdb_path: '*pdbbind_pdbfixer.pdb'
4646
output_pdb_path: '&protein.pdb'
47+
output_pdb_ligand_path: '&ligand.pdb'
4748

4849
# assign partial charges (ligand)
4950
- convert_pdbqt.yml:
@@ -160,7 +161,7 @@ wic:
160161
inlineable: False
161162
graphviz:
162163
label: Fix Protein Structure
163-
(5, extract_protein):
164+
(5, extract_ligand_protein):
164165
wic:
165166
inlineable: False
166167
graphviz:
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
FROM condaforge/mambaforge
2+
# NOT mambaforge-pypy3 (mdanalysis is incompatible with pypy)
3+
RUN mamba config --add channels conda-forge
4+
RUN mamba install mdanalysis
5+
6+
ADD extract_ligand_protein.py .
7+
ADD Dockerfile_extract_ligand_protein .

examples/scripts/Dockerfile_extract_protein

Lines changed: 0 additions & 6 deletions
This file was deleted.
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# pylint: disable=no-member
2+
import sys
3+
import os
4+
import argparse
5+
6+
import MDAnalysis as mda
7+
8+
9+
def parse_arguments() -> argparse.Namespace:
10+
""" This function parses the arguments.
11+
12+
Returns:
13+
argparse.Namespace: The command line arguments
14+
"""
15+
parser = argparse.ArgumentParser()
16+
parser.add_argument('--input_pdb_path', type=str)
17+
parser.add_argument('--output_pdb_path', type=str)
18+
parser.add_argument('--output_pdb_ligand_path', type=str)
19+
args = parser.parse_args()
20+
return args
21+
22+
23+
def extract_ligand_protein(input_pdb_path: str, output_pdb_path: str, output_pdb_ligand_path: str) -> None:
24+
""" Extract ligand & protein from the PDB file
25+
26+
Args:
27+
input_pdb_path (str): The path to the input pdb file
28+
output_pdb_path (str): The path to the output pdb file
29+
output_pdb_ligand_path (str): The path to the output pdb ligand file
30+
"""
31+
32+
# Load the PDB file
33+
u = mda.Universe(input_pdb_path)
34+
35+
# Get unique residue names
36+
protein_atoms = u.select_atoms('protein') # use simple atom selection when possible
37+
38+
# Create a new Universe with only protein atoms
39+
protein_u = mda.Universe.empty(n_atoms=protein_atoms.n_atoms, trajectory=True) # needed for coordinates
40+
protein_u.atoms = protein_atoms
41+
42+
# duplicate the universe object
43+
dup_u = mda.Universe(input_pdb_path)
44+
45+
# guess the bonds, since input PDB may not have bonds
46+
dup_u.atoms.guess_bonds()
47+
48+
# Identify water molecules based on the connectivity pattern (Oxygen bonded to two Hydrogens)
49+
water_indices = set()
50+
for atom in dup_u.atoms: # dont use selection resname == 'HOH', pdb file may have different water residue names
51+
if atom.name == 'O' and len(atom.bonds) == 2: # if hydrogens are added
52+
bonded_atoms_names = set([a.name for a in atom.bonded_atoms])
53+
if bonded_atoms_names == {'H'}: # Check if both bonds are Hydrogens
54+
water_indices.add(atom.index)
55+
water_indices.update([a.index for a in atom.bonded_atoms])
56+
57+
# now want to remove all salts, waters without H
58+
non_bonded = set()
59+
for atom in dup_u.atoms:
60+
if len(atom.bonds) == 0:
61+
non_bonded.add(atom.index)
62+
63+
# now do the same for the ligand, not protein and not water or salts
64+
ligand_atoms = u.select_atoms('not protein')
65+
66+
# Remove water by excluding the water indices
67+
if len(water_indices) > 0:
68+
water_indices_string = ' '.join([str(i) for i in water_indices])
69+
ligand_atoms = ligand_atoms.select_atoms(f'not index {water_indices_string}')
70+
71+
# Remove non bonded atoms
72+
if len(non_bonded) > 0:
73+
non_bonded_string = ' '.join([str(i) for i in non_bonded])
74+
ligand_atoms = ligand_atoms.select_atoms(f'not index {non_bonded_string}')
75+
76+
ligand_u = mda.Universe.empty(n_atoms=ligand_atoms.n_atoms, trajectory=True) # needed for coordinates
77+
ligand_u.atoms = ligand_atoms
78+
79+
with open(output_pdb_path, mode="w", encoding='utf-8') as wfile:
80+
protein_u.atoms.write(output_pdb_path)
81+
if len(ligand_u.atoms) > 0: # will crash if no ligand atoms
82+
with open(output_pdb_ligand_path, mode="w", encoding='utf-8') as wfile:
83+
ligand_u.atoms.write(output_pdb_ligand_path)
84+
85+
86+
def main() -> None:
87+
""" Reads the command line arguments and extract protein from the PDB file
88+
"""
89+
args = parse_arguments()
90+
91+
if not os.path.exists(args.input_pdb_path):
92+
print(f'Error: Can not find file {args.input_pdb_path}')
93+
sys.exit(1)
94+
95+
extract_ligand_protein(args.input_pdb_path, args.output_pdb_path, args.output_pdb_ligand_path)
96+
97+
98+
if __name__ == '__main__':
99+
main()

examples/scripts/extract_protein.py

Lines changed: 0 additions & 70 deletions
This file was deleted.

0 commit comments

Comments
 (0)