Skip to content

Commit fd75215

Browse files
authored
Merge pull request #100 from matsengrp/99-modernize-dockerfile
Modernize Dockerfile: Update to debian:latest and remove custom base image dependency
2 parents 2f8b463 + 13b7780 commit fd75215

20 files changed

+449
-115
lines changed

.dockerignore

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Git directories (including submodules) - can be hundreds of MB
2+
.git
3+
.gitignore
4+
.gitmodules
5+
# Note: lib/revbayes/ is needed for building the base image (Dockerfile.base)
6+
7+
# macOS metadata
8+
.DS_Store
9+
.AppleDouble
10+
.LSOverride
11+
12+
# Build artifacts
13+
_build/
14+
output/
15+
*.o
16+
*.a
17+
*.so
18+
*.dylib
19+
20+
# Python cache
21+
__pycache__/
22+
*.py[cod]
23+
*$py.class
24+
*.egg-info/
25+
.pytest_cache/
26+
27+
# Documentation
28+
html/
29+
latex/
30+
Doxyfile
31+
32+
# IDE and editor files
33+
.vscode/
34+
.idea/
35+
*.swp
36+
*.swo
37+
*~
38+
39+
# CI/CD
40+
.github/
41+
42+
# Docker files
43+
Dockerfile.base
44+
.dockerignore
45+
test-docker-build.sh
46+
build-base-image.sh
47+
48+
# Test outputs (but keep test scripts and data)
49+
output/
50+
*.log
51+
!test.sh
52+
!test-fast.sh
53+
!data/
54+
55+
# Documentation source files (keep generated docs out)
56+
*.md
57+
!README.md
58+
LICENSE
59+
COPYING

.github/workflows/build.yml

Lines changed: 53 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,35 +3,69 @@ on: [push]
33
jobs:
44
build:
55
runs-on: ubuntu-latest
6-
env:
7-
ACTIONS_ALLOW_UNSECURE_COMMANDS: true
86
steps:
9-
- uses: actions/checkout@v2
10-
- name: Checkout tags
11-
run: git fetch --unshallow origin +refs/tags/*:refs/tags/*
12-
- name: Set git branch variable
13-
run: echo ::set-env name=BRANCH::$(git branch --show-current)
14-
- name: Set git tag variable
15-
run: if [ $BRANCH == "main" ];then echo ::set-env name=TAG::$(git describe --tags);else echo ::set-env name=TAG::$BRANCH;fi
16-
- name: Checkout submodules
17-
shell: bash
7+
- uses: actions/checkout@v4
8+
with:
9+
fetch-depth: 0
10+
submodules: recursive
11+
12+
- name: Free up disk space
13+
run: |
14+
echo "Disk space before cleanup:"
15+
df -h
16+
# Remove unnecessary tools and packages (frees ~15-20GB)
17+
sudo rm -rf /usr/share/dotnet
18+
sudo rm -rf /opt/ghc
19+
sudo rm -rf "/usr/local/share/boost"
20+
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
21+
sudo rm -rf /usr/local/lib/android
22+
sudo rm -rf /usr/local/.ghcup
23+
sudo rm -rf /usr/share/swift
24+
# Clean apt cache
25+
sudo apt-get clean
26+
# Clean Docker system
27+
docker system prune -af --volumes
28+
echo "Disk space after cleanup:"
29+
df -h
30+
31+
- name: Set git branch and tag variables
1832
run: |
19-
auth_header="$(git config --local --get http.https://github.com/.extraheader)"
20-
git submodule sync --recursive
21-
git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1
33+
echo "BRANCH=$(git branch --show-current)" >> $GITHUB_ENV
34+
if [ "$(git branch --show-current)" == "main" ]; then
35+
echo "TAG=$(git describe --tags)" >> $GITHUB_ENV
36+
else
37+
echo "TAG=$(git branch --show-current)" >> $GITHUB_ENV
38+
fi
39+
40+
- name: Set up Docker Buildx
41+
uses: docker/setup-buildx-action@v3
42+
2243
- name: Login to Registry
2344
run: docker login quay.io -u ${{ secrets.QUAY_USERNAME }} --password ${{ secrets.QUAY_PASSWORD }}
45+
2446
- name: Build the Docker image
25-
run: docker build . -t quay.io/matsengrp/linearham:$TAG
47+
uses: docker/build-push-action@v5
48+
with:
49+
context: .
50+
push: false
51+
load: true
52+
tags: quay.io/matsengrp/linearham:${{ env.TAG }}
53+
no-cache: true
2654
- name: Run tests in the Docker image
27-
run: docker run quay.io/matsengrp/linearham:$TAG sh -c "/linearham/_build/test/test"
28-
- name: Run example in the Docker image
29-
run: docker run --rm quay.io/matsengrp/linearham:$TAG sh -c "/linearham/test.sh && /linearham/clean.sh"
55+
run: docker run quay.io/matsengrp/linearham:${{ env.TAG }} sh -c "/linearham/_build/test/test"
56+
57+
- name: Run fast integration test in the Docker image
58+
run: docker run --rm quay.io/matsengrp/linearham:${{ env.TAG }} sh -c "/linearham/test-fast.sh && /linearham/clean.sh"
59+
3060
#- name: publish to Registry
31-
# run: docker push quay.io/matsengrp/linearham:$TAG
61+
# run: docker push quay.io/matsengrp/linearham:${{ env.TAG }}
62+
3263
- name: Build documentation
64+
if: github.ref == 'refs/heads/main'
3365
run: docker run --rm -v $(pwd):/data nakatt/doxygen:1.8.17 Doxyfile
66+
3467
- name: Deploy to GitHub Pages
68+
if: github.ref == 'refs/heads/main'
3569
uses: JamesIves/[email protected]
3670
with:
3771
ACCESS_TOKEN: ${{ secrets.GH_PAGES_PERSONAL_ACCESS_TOKEN }}

.gitmodules

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,9 @@
1010
[submodule "lib/tclap"]
1111
path = lib/tclap
1212
url = https://github.com/mirror/tclap.git
13-
[submodule "lib/partis"]
14-
path = lib/partis
15-
url = https://github.com/psathyrella/partis.git
16-
branch = dev
17-
[submodule "lib/revbayes"]
18-
path = lib/revbayes
19-
url = https://github.com/eharkins/revbayes.git
2013
[submodule "lib/phylomd"]
2114
path = lib/phylomd
2215
url = https://github.com/dunleavy005/phylomd.git
16+
[submodule "lib/revbayes"]
17+
path = lib/revbayes
18+
url = https://github.com/eharkins/revbayes.git

Dockerfile

Lines changed: 52 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,76 @@
1-
FROM quay.io/matsengrp/linearham:2022-11-29-base-image
1+
# Multi-stage Dockerfile for linearham
2+
# Stage 1: Extract pre-built RevBayes (with bundled Boost) from base image
3+
# Stage 2: Use modern Debian for Python packages with pre-built wheels
4+
#
5+
# Platform is set to linux/amd64 because partis-bcr contains x86-specific code (SSE2 intrinsics)
6+
# RevBayes with bundled Boost gives ~180x faster MCMC performance than v1.3.0+ without Boost
7+
ARG TARGETPLATFORM=linux/amd64
28

9+
# Stage 1: Get pre-compiled RevBayes with bundled Boost from Buster base image
10+
FROM quay.io/matsengrp/linearham:2025-12-01-base-image AS revbayes-builder
11+
12+
# Stage 2: Modern Debian Bookworm for Python packages
13+
FROM debian:bookworm-slim
14+
15+
# Metadata labels
16+
LABEL org.opencontainers.image.source="https://github.com/matsengrp/linearham"
17+
LABEL org.opencontainers.image.description="Bayesian phylogenetic hidden Markov model for B cell receptor sequence analysis"
18+
LABEL org.opencontainers.image.licenses="GPL-3.0"
19+
LABEL org.opencontainers.image.title="linearham"
20+
21+
# Set TERM to enable colored-traceback to work in non-TTY environments (e.g., CI)
22+
ENV TERM=xterm
23+
24+
# Copy pre-built RevBayes binary with bundled Boost from the base image
25+
COPY --from=revbayes-builder /usr/local/bin/rb /usr/local/bin/rb
26+
27+
# Install system dependencies needed for linearham
328
RUN apt-get update && apt-get install -y --no-install-recommends \
429
autoconf \
530
automake \
631
bison \
7-
libblas-dev \
832
build-essential \
933
cmake \
1034
flex \
1135
gfortran \
1236
ghostscript \
1337
graphviz \
14-
libgsl0-dev \
38+
libblas-dev \
39+
libbz2-dev \
40+
libgsl-dev \
1541
liblapack-dev \
16-
libncurses-dev \
17-
python-dev \
18-
python-pip \
19-
python-setuptools \
42+
liblzma-dev \
43+
libtool \
44+
libyaml-cpp-dev \
45+
libyaml-dev \
46+
libz-dev \
47+
mafft \
48+
python3-dev \
49+
python3-pip \
50+
python3-setuptools \
2051
r-cran-ape \
2152
r-cran-coda \
2253
r-cran-data.table \
23-
r-cran-littler \
2454
scons \
25-
libtool \
26-
libyaml-dev \
27-
libyaml-cpp-dev \
28-
libz-dev \
29-
libbz2-dev \
30-
liblzma-dev \
31-
less \
32-
wget \
33-
git
55+
&& apt-get clean \
56+
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
3457

35-
RUN wget https://mafft.cbrc.jp/alignment/software/mafft_7.450-1_amd64.deb && dpkg -i mafft_7.450-1_amd64.deb
58+
# Install R packages needed for phylomd
3659
RUN Rscript --slave --vanilla -e 'install.packages(c("phylotate", "Rcpp", "RcppArmadillo"), repos = "https://cloud.r-project.org")'
3760

61+
# Copy linearham source code
3862
COPY . /linearham
3963
WORKDIR /linearham
4064

41-
RUN pip install wheel
42-
RUN pip install -r requirements.txt
43-
RUN Rscript --slave --vanilla -e 'install.packages("lib/phylomd", repos = NULL, type = "source")'
44-
RUN scons --build-partis-linearham && ./clean.sh
65+
# Install Python packages and build linearham
66+
# Use --break-system-packages for Debian Bookworm (externally-managed-environment)
67+
RUN pip3 install --break-system-packages wheel && \
68+
pip3 install --break-system-packages -r requirements.txt && \
69+
Rscript --slave --vanilla -e 'install.packages("lib/phylomd", repos = NULL, type = "source")' && \
70+
scons --build && ./clean.sh
71+
72+
# Verify RevBayes installation (check if rb is in PATH from the base image)
73+
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
74+
CMD rb --version || exit 1
4575

4676
CMD ./test.sh && ./clean.sh

Dockerfile.base

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Base image for linearham with RevBayes compiled with bundled Boost from submodule
2+
# This image should be built and pushed manually when needed
3+
# Prerequisites: Ensure lib/revbayes/ submodule is initialized (git submodule update --init --recursive)
4+
# Build: Use ./build-base-image.sh script
5+
# Or manually: docker build --platform linux/amd64 -f Dockerfile.base -t quay.io/matsengrp/linearham:YYYY-MM-DD-base-image .
6+
# Push: docker push quay.io/matsengrp/linearham:YYYY-MM-DD-base-image
7+
8+
ARG TARGETPLATFORM=linux/amd64
9+
# Use Debian Buster (oldoldstable) for g++ 8.3 compatibility with older RevBayes
10+
FROM debian:buster-slim
11+
12+
# Set TERM to enable colored-traceback to work in non-TTY environments
13+
ENV TERM=xterm
14+
15+
# Metadata labels
16+
LABEL org.opencontainers.image.source="https://github.com/matsengrp/linearham"
17+
LABEL org.opencontainers.image.description="Base image for linearham with RevBayes compiled with bundled Boost from submodule"
18+
LABEL org.opencontainers.image.licenses="GPL-3.0"
19+
20+
# Configure apt to use archive repositories (Buster is now archived)
21+
RUN echo "deb http://archive.debian.org/debian/ buster main contrib non-free" > /etc/apt/sources.list && \
22+
echo "deb http://archive.debian.org/debian-security buster/updates main contrib non-free" >> /etc/apt/sources.list && \
23+
echo 'Acquire::Check-Valid-Until "false";' > /etc/apt/apt.conf.d/99no-check-valid-until
24+
25+
# Install all dependencies needed for building RevBayes and linearham
26+
RUN apt-get update && apt-get install -y --no-install-recommends \
27+
autoconf \
28+
automake \
29+
bison \
30+
build-essential \
31+
cmake \
32+
flex \
33+
gfortran \
34+
ghostscript \
35+
graphviz \
36+
libblas-dev \
37+
libgsl0-dev \
38+
liblapack-dev \
39+
libncurses-dev \
40+
libtool \
41+
libyaml-dev \
42+
libyaml-cpp-dev \
43+
libz-dev \
44+
libbz2-dev \
45+
liblzma-dev \
46+
ncurses-base \
47+
ncurses-term \
48+
python3-dev \
49+
python3-pip \
50+
python3-setuptools \
51+
r-cran-ape \
52+
r-cran-coda \
53+
r-cran-data.table \
54+
r-cran-littler \
55+
scons \
56+
less \
57+
wget \
58+
curl \
59+
ca-certificates \
60+
mafft \
61+
git \
62+
&& apt-get clean \
63+
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
64+
65+
# Install R packages needed for phylomd
66+
RUN Rscript --slave --vanilla -e 'install.packages(c("phylotate", "Rcpp", "RcppArmadillo"), repos = "https://cloud.r-project.org")'
67+
68+
# Copy the revbayes submodule which includes bundled boost_1_60_0
69+
COPY lib/revbayes /tmp/revbayes
70+
71+
# Set BOOST_ROOT to use the bundled Boost library from the submodule
72+
ENV BOOST_ROOT=/tmp/revbayes/boost_1_60_0
73+
74+
# Build RevBayes with bundled Boost (this takes 20-40 minutes)
75+
WORKDIR /tmp/revbayes/projects/cmake
76+
RUN ./build.sh
77+
78+
# Install RevBayes to standard location
79+
RUN mkdir -p /usr/local/bin && \
80+
cp /tmp/revbayes/projects/cmake/rb /usr/local/bin/rb && \
81+
chmod +x /usr/local/bin/rb
82+
83+
# Clean up build artifacts to reduce image size
84+
RUN rm -rf /tmp/revbayes
85+
86+
WORKDIR /linearham

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
- [how to run](#running-linearham)
1818
- [run partis](#--run-partis)
1919
- [run linearham](#--run-linearham)
20-
- [compile](#--build-partis-linearham)
20+
- [compile](#--build)
2121
- [run steps](#run-steps)
2222
- [output files](#output-files)
2323
- [naive sequence comparisons](#naive-sequence-comparisons)
@@ -26,7 +26,7 @@
2626

2727
Linearham's dependencies are described in the Dockerfile.
2828
We recommend that you run linearham inside a Docker container, since this will make installation much easier (if you're new to Docker, [read this](http://erick.matsen.org/2018/04/19/docker.html)).
29-
However, you can also install the dependencies by hand, in which case you should clone the repository and run each command in the [Dockerfile](https://github.com/matsengrp/linearham/blob/edit-readme/Dockerfile) that's on a line starting with `RUN` (treat `WORKDIR` as `cd`).
29+
However, you can also install the dependencies by hand, in which case you should clone the repository and run each command in the [Dockerfile](https://github.com/matsengrp/linearham/blob/main/Dockerfile) that's on a line starting with `RUN` (treat `WORKDIR` as `cd`).
3030
The more similar your system is to that described by the Dockerfile's `FROM` line (at the moment, debian), the easier this will be.
3131

3232
#### Using Docker
@@ -50,7 +50,7 @@ Note that because Docker must run as root, this means that you will be writing t
5050
## Running linearham
5151

5252
All linearham actions are run using scons in the main linearham directory.
53-
Available actions are `--run-partis`, `--run-linearham`, and `--build-partis-linearham`.
53+
Available actions are `--run-partis`, `--run-linearham`, and `--build`.
5454
Note that because of the way scons parses arguments, you must always use an `=` sign in all args: `--arg=val`.
5555
For the same reason, you also have to spell args exactly right, e.g. writing `--arg-nam` instead of `--arg-name` will silently ignore it.
5656

@@ -142,9 +142,9 @@ Other linearham-related arguments:
142142
For the arguments that can be specified as a (`,`-separated) list (see middle column), linearham will run revbayes separately, writing to separate nested output directories, for all combinations of all such parameters.
143143
For more information on these arguments, run `scons --help`.
144144

145-
#### `--build-partis-linearham`
145+
#### `--build`
146146

147-
This compiles linearham, partis, and other dependencies.
147+
This compiles linearham and other dependencies.
148148
You'll only need to run this if you've either modified some source code or you're installing without docker.
149149

150150
## Run steps
@@ -156,7 +156,7 @@ See also [below](#output-files) for more detail on the various inputs and output
156156
| get linearham info | `lib/partis/bin/partis get-linearham-info` | reformat the information in all annotations in the partis output file for use by subsequent linearham steps, writes to `partis_run.yaml` |
157157
| select single cluster | `scripts/parse_cluster.py` | pull annotation for single specified cluster out of `partis_run.yaml`, and write it to `cluster.yaml` and its sequences to `cluster_seqs.fasta` |
158158
| make revbayes input | `scripts/generate_revbayes_rev_file.py` | use seqs in `cluster_seqs.fasta` and template revbayes config `templates/revbayes_template.rev` to write revbayes config for this run to `revbayes_run.rev` |
159-
| run revbayes | `lib/revbayes/projects/cmake/rb` | run revbayes with config file `revbayes_run.rev`, writing output to `revbayes_run.stdout.log`. This step is usually by far the slowest; you can adjust e.g. the mcmc options above to trade off speed for confidence/accuracy. |
159+
| run revbayes | `rb` | run revbayes with config file `revbayes_run.rev`, writing output to `revbayes_run.stdout.log`. This step is usually by far the slowest; you can adjust e.g. the mcmc options above to trade off speed for confidence/accuracy. |
160160
| run phylo hmm | `_build/linearham/linearham --pipeline` | run actual linearham phylo hmm, using `cluster.yaml`, `<--parameter-dir>`, and `revbayes_run.trees` to write `lh_revbayes_run.trees` |
161161
| collect run statistics | `scripts/run_bootstrap_asr_ess.R` | collects info from `lh_revbayes_run.trees` and `cluster_seqs.fasta` to write three output files: `linearham_run.{trees,log,ess}` |
162162
| calculate naive seq stats | `scripts/tabulate_naive_probs.py` | collect info from `linearham_run.trees` to write `aa_naive_seqs.{png,fasta,dnamap}` |

0 commit comments

Comments
 (0)