Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/docker_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
rename_residues_mol, combine_structure,
remove_terminal_residue_name_prefixes, molgan,
pdbbind_refined, onionnet-sfct, smina, pdbfixer,
fix_pdb_atom_column, extract_protein, generate_conformers] # No username for pdbind_refined
fix_pdb_atom_column, extract_protein, extract_ligand_protein, generate_conformers] # No username for pdbind_refined
# skip data/ and cwl_adapters/file_format_conversions/biosimspace/
runs-on: [ubuntu-latest]

Expand Down
85 changes: 85 additions & 0 deletions cwl_adapters/extract_ligand_protein.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0

class: CommandLineTool

label: A tool that employs OpenMM to extract ligands and protein from a PDB file

doc: |-
A tool that employs OpenMM to extract ligands and protein from a PDB file

baseCommand: ['python', '/extract_ligand_protein.py']

hints:
DockerRequirement:
dockerPull: mrbrandonwalker/extract_ligand_protein

inputs:
input_pdb_path:
label: Input pdb file path
doc: |-
Input pdb file path
Type: string
File type: input
Accepted formats: pdb
Example file: https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/cat_protein.pdb
type: File
format:
- edam:format_1476
inputBinding:
prefix: --input_pdb_path

output_pdb_path:
label: Output pdb file path
doc: |-
Output pdb file path
Type: string
File type: output
Accepted formats: pdb
Example file: https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_cat_pdb.pdb
type: string
format:
- edam:format_1476
inputBinding:
prefix: --output_pdb_path
default: system.pdb

output_pdb_ligand_path:
label: Output pdb ligand file path
doc: |-
Output pdb ligand file path
Type: string
File type: output
Accepted formats: sdf
type: string
format:
- edam:format_1476
inputBinding:
prefix: --output_pdb_ligand_path
default: ligand_system.pdb

outputs:
output_pdb_path:
label: Output pdb file path
doc: |-
Output pdb file path
type: File
outputBinding:
glob: $(inputs.output_pdb_path)
format: edam:format_1476

output_pdb_ligand_path:
label: Output ligand pdb file path
doc: |-
Output ligand pdb file path
Use optional File? since ligand may not exist in complex
type: File?
outputBinding:
glob: $(inputs.output_pdb_ligand_path)
format: edam:format_1476

$namespaces:
edam: https://edamontology.org/

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
2 changes: 1 addition & 1 deletion cwl_adapters/extract_protein.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@ $namespaces:
edam: https://edamontology.org/

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
1 change: 1 addition & 0 deletions docker/dockerBuild.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ sudo docker build --no-cache --pull -f Dockerfile_onionnet-sfct -t polusai/onion
sudo docker build --no-cache --pull -f Dockerfile_smina -t cyangnyu/smina .
sudo docker build --no-cache --pull -f Dockerfile_pdbfixer -t ndonyapour/pdbfixer .
sudo docker build --no-cache --pull -f Dockerfile_extract_protein -t ndonyapour/extract_protein .
sudo docker build --no-cache --pull -f Dockerfile_extract_ligand_protein -t mrbrandonwalker/extract_ligand_protein .
sudo docker build --no-cache --pull -f Dockerfile_fix_pdb_atom_column -t ndonyapour/fix_pdb_atom_column .
sudo docker build --no-cache --pull -f Dockerfile_generate_conformers -t ndonyapour/generate_conformers .

Expand Down
1 change: 1 addition & 0 deletions dockerPull.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@ docker pull mrbrandonwalker/diffdock_gpu
docker pull mrbrandonwalker/diffdock_cpu
docker pull ndonyapour/pdbfixer
docker pull ndonyapour/extract_protein
docker pull mrbrandonwalker/extract_ligand_protein
docker pull ndonyapour/fix_pdb_atom_column
docker pull ndonyapour/generate_conformers
7 changes: 7 additions & 0 deletions examples/scripts/Dockerfile_extract_ligand_protein
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# docker build -f Dockerfile_extract_ligand_protein -t mrbrandonwalker/extract_ligand_protein .
FROM condaforge/mambaforge
# NOT mambaforge-pypy3 (mdanalysis is incompatible with pypy)
RUN mamba install mdanalysis

ADD extract_ligand_protein.py .
ADD Dockerfile_extract_ligand_protein .
111 changes: 111 additions & 0 deletions examples/scripts/extract_ligand_protein.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# pylint: disable=no-member
import sys
import os
import argparse

import MDAnalysis as mda


def parse_arguments() -> argparse.Namespace:
""" This function parses the arguments.

Returns:
argparse.Namespace: The command line arguments
"""
parser = argparse.ArgumentParser()
parser.add_argument('--input_pdb_path', type=str)
parser.add_argument('--output_pdb_path', type=str)
parser.add_argument('--output_pdb_ligand_path', type=str)
args = parser.parse_args()
return args


def extract_ligand_protein(input_pdb_path: str, output_pdb_path: str, output_pdb_ligand_path: str) -> None:
""" Extract ligand & protein from the PDB file

Args:
input_pdb_path (str): The path to the input pdb file
output_pdb_path (str): The path to the output pdb file
output_pdb_ligand_path (str): The path to the output pdb ligand file
"""

# Load the PDB file
u = mda.Universe(input_pdb_path)

# Get unique residue names
protein_atoms = u.select_atoms('protein') # use simple atom selection when possible

# Create a new Universe with only protein atoms
protein_u = mda.Universe.empty(n_atoms=protein_atoms.n_atoms, trajectory=True) # needed for coordinates
protein_u.atoms = protein_atoms

# duplicate the universe object
dup_u = mda.Universe(input_pdb_path)

# now do the same for the ligand, not protein and not water or salts
ligand_atoms = u.select_atoms('not protein')

try:
# guess the bonds, since input PDB may not have bonds
dup_u.atoms.guess_bonds()
except ValueError:
# ValueError: vdw radii for types: AS. These can be defined manually using the keyword 'vdwradii'
print('Error: Could not guess bonds. Check the input PDB file.')

has_bonds = False
try:
num_bonds = len(dup_u.atoms.bonds)
has_bonds = True
except mda.exceptions.NoDataError:
print('No bonds found in the PDB file.')

# Identify water molecules based on the connectivity pattern (Oxygen bonded to two Hydrogens)
if has_bonds:
water_indices = set()
for atom in dup_u.atoms: # dont use selection resname == 'HOH', pdb file may have different water residue names
if atom.name == 'O' and len(atom.bonds) == 2: # if hydrogens are added
bonded_atoms_names = set([a.name for a in atom.bonded_atoms])
if bonded_atoms_names == {'H'}: # Check if both bonds are Hydrogens
water_indices.add(atom.index)
water_indices.update([a.index for a in atom.bonded_atoms])

# now want to remove all salts, waters without H
non_bonded = set()
for atom in dup_u.atoms:
if len(atom.bonds) == 0:
non_bonded.add(atom.index)

# Remove water by excluding the water indices
if len(water_indices) > 0:
water_indices_string = ' '.join([str(i) for i in water_indices])
ligand_atoms = ligand_atoms.select_atoms(f'not index {water_indices_string}')

# Remove non bonded atoms
if len(non_bonded) > 0:
non_bonded_string = ' '.join([str(i) for i in non_bonded])
ligand_atoms = ligand_atoms.select_atoms(f'not index {non_bonded_string}')

ligand_u = mda.Universe.empty(n_atoms=ligand_atoms.n_atoms, trajectory=True) # needed for coordinates
ligand_u.atoms = ligand_atoms

with open(output_pdb_path, mode="w", encoding='utf-8') as wfile:
protein_u.atoms.write(output_pdb_path)
if len(ligand_u.atoms) > 0: # will crash if no ligand atoms
with open(output_pdb_ligand_path, mode="w", encoding='utf-8') as wfile:
ligand_u.atoms.write(output_pdb_ligand_path)


def main() -> None:
""" Reads the command line arguments and extract protein from the PDB file
"""
args = parse_arguments()

if not os.path.exists(args.input_pdb_path):
print(f'Error: Can not find file {args.input_pdb_path}')
sys.exit(1)

extract_ligand_protein(args.input_pdb_path, args.output_pdb_path, args.output_pdb_ligand_path)


if __name__ == '__main__':
main()