Merge pull request #27 from prehensilecode/update-2-3-1

prehensilecode · web-flow · commit 2d86f588fcdc · 2023-03-01T16:18:05.000-05:00
Update for AlphaFold 2.3.1
diff --git a/README.md b/README.md
@@ -25,7 +25,7 @@ A prebuilt image is hosted on cloud.sylabs.io: [https://cloud.sylabs.io/library/
 N.B. The AlphaFold version and the alphafold_singularity versions must match.
 
 ```
-$ export ALPHAFOLD_VERSION=2.2.4
+$ export ALPHAFOLD_VERSION=2.3.1
 $ wget https://github.com/deepmind/alphafold/archive/refs/tags/v${ALPHAFOLD_VERSION}.tar.gz -O alphafold-${ALPHAFOLD_VERSION}.tar.gz
 ...
 2023-02-08 17:28:50 (1.24 MB/s) - ‘alphafold-x.x.x.tar.gz’ saved [5855095]
@@ -55,7 +55,18 @@ If your `/tmp` directory is small, you may need to set the [`SINGULARITY_TMPDIR`
 environment variable](https://sylabs.io/guides/3.3/user-guide/build_env.html#temporary-folders) to a directory on a filesystem with more free space.
 My builds have consumed up to 15 GiB of space. The resulting image file may be up to 10 GiB.
 
-### Install and run
+### Download genetic databases
+See [AlphaFold 2.3.1 README](https://github.com/deepmind/alphafold/tree/v2.3.1) 
+for instructions on downloading genetic databases. These are necessary
+to run AlphaFold.
+
+This step requires [aria2c](https://aria2.github.io/).
+
+N.B. The difference between downloading the "reduced databases" as opposed
+to the "full databases" is that the reduced databases download "small BFD" 
+instead of "BFD".
+
+### Modify run script, install, and run
 To run, modify the `$ALPHAFOLD_SRC/singularity/run_singularity.py` and change the 
 section marked `USER CONFIGURATION`. At the least, you will need to modify the values
 of:
@@ -68,5 +79,22 @@ E.g.
 singularity_image = Client.load(os.path.join(os.environ['ALPHAFOLD_DIR'], 'alphafold.sif'))
 ```
 
+## Running on an HPC cluster
+Currently, this project only supports Slurm. Please open an issue to request
+support for other job schedulers/resource managers.
+
+
 ### Run as a Slurm job on a cluster
-See the example job script [`example_slurm_job.sh`](https://github.com/prehensilecode/alphafold_singularity/blob/main/example_slurm_job.sh)
+See the example job script [`example_slurm_job.sh`](https://github.com/prehensilecode/alphafold_singularity/blob/main/example_slurm_job.sh). 
+N.B. this example must be modified to suit your specific HPC environment.
+
+The `run_singularity.py` script will use all GPUs available to the job. If
+Slurm has been set up with [`cgroups`](https://en.wikipedia.org/wiki/Cgroups),
+the job may request fewer than the total number of GPUs installed on a node.
+E.g. if the GPU nodes in the cluster have 4 GPU devices each, the job can
+do
+```bash
+#SBATCH --gpus=2
+```
+and AlphaFold Singularity will use only two of the four GPUs. This is 
+because the `cgroup` for the job only shows 2 GPUs to the job.
diff --git a/Singularity.def b/Singularity.def
@@ -22,7 +22,8 @@ Stage: spython-base
 # FROM directive resets ARGS, so we specify again (the value is retained if
 # previously set).
 
-apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
+apt-get update \
+&& DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
 build-essential \
 cmake \
 cuda-command-line-tools-11-1 \
@@ -48,9 +49,9 @@ wget \
 
 # Install Miniconda package manager.
 wget -q -P /tmp \
-https://repo.anaconda.com/miniconda/Miniconda3-py37_4.12.0-Linux-x86_64.sh \
-&& bash /tmp/Miniconda3-py37_4.12.0-Linux-x86_64.sh -b -p /opt/conda \
-&& rm /tmp/Miniconda3-py37_4.12.0-Linux-x86_64.sh
+https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
+&& bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \
+&& rm /tmp/Miniconda3-latest-Linux-x86_64.sh
 
 # Install conda packages.
 PATH="/opt/conda/bin:/usr/local/cuda-11.1/bin:$PATH"
@@ -60,7 +61,7 @@ openmm=7.5.1 \
 cudatoolkit==11.1.1 \
 pdbfixer \
 pip \
-python=3.7 \
+python=3.8 \
 && conda clean --all --force-pkgs-dirs --yes
 
 ### /bin/cp -r . /app/alphafold
@@ -73,12 +74,12 @@ https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c49412
 pip3 install --upgrade pip  --no-cache-dir \
 && pip3 install -r /app/alphafold/requirements.txt --no-cache-dir \
 && pip3 install --upgrade --no-cache-dir \
-jax==0.3.17 \
-jaxlib==0.3.15+cuda11.cudnn805 \
+jax==0.3.25 \
+jaxlib==0.3.25+cuda11.cudnn805 \
 -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
 
 # Apply OpenMM patch.
-cd /opt/conda/lib/python3.7/site-packages
+cd /opt/conda/lib/python3.8/site-packages
 patch -p0 < /app/alphafold/docker/openmm.patch
 
 # Add SETUID bit to the ldconfig binary so that non-root users can run it.
diff --git a/example_slurm_job.sh b/example_slurm_job.sh
@@ -1,44 +1,41 @@
 #!/bin/bash
-#SBATCH -p gpu
+#SBATCH --partition=gpu
 #SBATCH --time=18:00:00
 #SBATCH --gpus=4
 #SBATCH --cpus-per-gpu=12
-#SBATCH --mem=140G
+#SBATCH --mem=45G
 
 ### NOTE
 ### This job script cannot be used without modification for your specific environment.
 
-module load alphafold/2.2.4
-module load python/gcc/3.10
+module load python/gcc/3.11
+module load alphafold/2.3.1
 
 ### Check values of some environment variables
-echo SLURM_JOB_GPUS=$SLURM_JOB_GPUS
 echo ALPHAFOLD_DIR=$ALPHAFOLD_DIR
 echo ALPHAFOLD_DATADIR=$ALPHAFOLD_DATADIR
 
 ###
-### README This runs AlphaFold 2.2.2 on the T1050.fasta file
+### README This runs AlphaFold 2.3.1 on the T1050.fasta file
 ###
 
 # AlphaFold should use all GPU devices available to the job by default.
-# To explicitly specify use of GPUs, and the GPU devices to use, add
-#   --use_gpu --gpu_devices=${SLURM_JOB_GPUS}
 #
 # To run the CASP14 evaluation, use:
 #   --model_preset=monomer_casp14
+#   --db_preset=full_dbs (or delete the line; default is "full_dbs")
 #
 # To benchmark, running multiple JAX model evaluations (NB this 
 # significantly increases run time):
 #   --benchmark
 
-# Run AlphaFold; default is to use GPUs, i.e. "--use_gpu" can be omitted.
+# Run AlphaFold; default is to use GPUs
 python3 ${ALPHAFOLD_DIR}/singularity/run_singularity.py \
-    --use_gpu --gpu_devices=${SLURM_JOB_GPUS} \
     --data_dir=${ALPHAFOLD_DATADIR} \
     --fasta_paths=T1050.fasta \
     --max_template_date=2020-05-14 \
-    --model_preset=monomer_casp14 \
-    --benchmark
+    --db_preset=reduced_dbs \
+    --model_preset=monomer
 
 echo INFO: AlphaFold returned $?
 
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
 # Dependencies necessary to execute run_singularity.py
-absl-py==0.13.0
-spython==0.1.16
+# absl-py version to match deepmind/alphafold
+absl-py==1.0.0
+spython==0.3.0
diff --git a/run_singularity.py b/run_singularity.py
@@ -15,16 +15,19 @@
 """Singularity launch script for Alphafold Singularity image."""
 
 import os
+import sys
 import pathlib
 import signal
 from typing import Tuple
 
 from absl import app
 from absl import flags
 from absl import logging
+from spython.main import Client
 
 import tempfile
-from spython.main import Client
+import subprocess
+
 
 #### USER CONFIGURATION ####
 
@@ -34,11 +37,16 @@
 singularity_image = Client.load(os.path.join(os.environ['ALPHAFOLD_DIR'], 'alphafold.sif'))
 
 # Path to a directory that will store the results.
-if 'TMPDIR' in os.environ:
+if 'TMP' in os.environ:
+    output_dir = os.environ['TMP']
+elif 'TMPDIR' in os.environ:
     output_dir = os.environ['TMPDIR']
 else:
     output_dir = tempfile.mkdtemp(dir='/tmp', prefix='alphafold-')
 
+# set tmp dir the same as output dir
+tmp_dir = output_dir
+
 #### END USER CONFIGURATION ####
 
 
@@ -62,7 +70,7 @@
     'separated by commas. All FASTA paths must have a unique basename as the '
     'basename is used to name the output directories for each prediction.')
 flags.DEFINE_string(
-    'output_dir', '/tmp/alphafold',
+    'output_dir', output_dir,
     'Path to a directory that will store the results.')
 flags.DEFINE_string(
     'data_dir', None,
@@ -113,6 +121,7 @@
 
 
 def _create_bind(bind_name: str, path: str) -> Tuple[str, str]:
+  """Create a bind point for each file and directory used by the model."""
   path = os.path.abspath(path)
   source_path = os.path.dirname(path) if bind_name != 'data_dir' else path
   target_path = os.path.join(_ROOT_MOUNT_DIRECTORY, bind_name)
@@ -145,7 +154,7 @@ def main(argv):
 
   # Path to the MGnify database for use by JackHMMER.
   mgnify_database_path = os.path.join(
-      FLAGS.data_dir, 'mgnify', 'mgy_clusters_2018_12.fa')
+      FLAGS.data_dir, 'mgnify', 'mgy_clusters_2022_05.fa')
 
   # Path to the BFD database for use by HHblits.
   bfd_database_path = os.path.join(
@@ -156,9 +165,9 @@ def main(argv):
   small_bfd_database_path = os.path.join(
       FLAGS.data_dir, 'small_bfd', 'bfd-first_non_consensus_sequences.fasta')
 
-  # Path to the Uniclust30 database for use by HHblits.
-  uniclust30_database_path = os.path.join(
-      FLAGS.data_dir, 'uniclust30', 'uniclust30_2018_08', 'uniclust30_2018_08')
+  # Path to the Uniref30 database for use by HHblits.
+  uniref30_database_path = os.path.join(
+      FLAGS.data_dir, 'uniref30', 'UniRef30_2021_03')
 
   # Path to the PDB70 database for use by HHsearch.
   pdb70_database_path = os.path.join(FLAGS.data_dir, 'pdb70', 'pdb70')
@@ -178,7 +187,7 @@ def main(argv):
   if alphafold_path == data_dir_path or alphafold_path in data_dir_path.parents:
     raise app.UsageError(
         f'The download directory {FLAGS.data_dir} should not be a subdirectory '
-        f'in the AlphaFold repository directory. If it is, the Docker build is '
+        f'in the AlphaFold repository directory. If it is, the Singularity build is '
         f'slow since the large databases are copied during the image creation.')
 
   binds = []
@@ -211,7 +220,7 @@ def main(argv):
     database_paths.append(('small_bfd_database_path', small_bfd_database_path))
   else:
     database_paths.extend([
-        ('uniclust30_database_path', uniclust30_database_path),
+        ('uniref30_database_path', uniref30_database_path),
         ('bfd_database_path', bfd_database_path),
     ])
   for name, path in database_paths:
@@ -222,6 +231,11 @@ def main(argv):
 
   output_target_path = os.path.join(_ROOT_MOUNT_DIRECTORY, 'output')
   binds.append(f'{output_dir}:{output_target_path}')
+  logging.info('Binding %s -> %s', output_dir, output_target_path)
+
+  tmp_target_path = '/tmp'
+  binds.append(f'{tmp_dir}:{tmp_target_path}')
+  logging.info('Binding %s -> %s', tmp_dir, tmp_target_path)
 
   use_gpu_relax = FLAGS.enable_gpu_relax and FLAGS.use_gpu
 
@@ -240,9 +254,11 @@ def main(argv):
 
   options = [
     '--bind', f'{",".join(binds)}',
+    '--env', f'NVIDIA_VISIBLE_DEVICES={FLAGS.gpu_devices}',
+    # The following flags allow us to make predictions on proteins that
+    # would typically be too long to fit into GPU memory.
     '--env', 'TF_FORCE_UNIFIED_MEMORY=1',
     '--env', 'XLA_PYTHON_CLIENT_MEM_FRACTION=4.0',
-    '--env', 'OPENMM_CPU_THREADS=12'
   ]
 
   # Run the container.