Skip to content

Commit d6a7fcd

Browse files
jamesemerydroazen
authored andcommitted
Refactored the docker build script to only only include the gatk bundle in order to shrink the docker image size (#4955)
* Refactored the docker image to contain only the runtime jars and other necessary files for space concerns
1 parent 4521d32 commit d6a7fcd

File tree

11 files changed

+366
-110
lines changed

11 files changed

+366
-110
lines changed

.dockerignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
# These are files that for whatever reason we don't want to include in our distribution docker images
22
src/test/resources
33
src/test/resources/*
4+
.git/*

.travis.yml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ env:
2121
#gradle needs this
2222
- TERM=dumb
2323
#limit gradle jvm memory and disable daemon
24-
- GRADLE_OPTS="-Xmx1024m -Dorg.gradle.daemon=false"
24+
- GRADLE_OPTS="-Xmx2048m -Dorg.gradle.daemon=false"
2525
#google cloud stuff
2626
- CLOUDSDK_CORE_DISABLE_PROMPTS=1
2727
- GCLOUD_HOME=$HOME/gcloud/google-cloud-sdk/bin
@@ -57,7 +57,7 @@ cache:
5757
before_install:
5858
- REPORT_PATH=${TRAVIS_BRANCH}_${TRAVIS_JOB_NUMBER};
5959
- if [[ $TRAVIS_SECURE_ENV_VARS == true && $TRAVIS_EVENT_TYPE != cron ]]; then
60-
echo "Test report will be written to https://storage.googleapis.com${HELLBENDER_TEST_LOGS}${REPORT_PATH}/tests/test/index.html";
60+
echo "Test report will be written to https://storage.googleapis.com$HELLBENDER_TEST_LOGS$REPORT_PATH/tests/test/index.html";
6161
fi
6262
#setup google cloud and github authentication
6363
- if [ $TRAVIS_SECURE_ENV_VARS == true ]; then
@@ -134,18 +134,19 @@ script:
134134
elif [[ $TEST_DOCKER == true ]]; then
135135
echo "Building docker image and running appropriate tests..." ;
136136
if [ ${TRAVIS_PULL_REQUEST} != false ]; then
137-
sudo bash build_docker.sh -e FETCH_HEAD -s -u -d $PWD/temp_staging/ -t ${TRAVIS_PULL_REQUEST};
137+
sudo bash build_docker.sh -e FETCH_HEAD -s -u -t ${TRAVIS_PULL_REQUEST};
138138
DOCKER_TAG=FETCH_HEAD;
139139
else
140140
echo ${TRAVIS_COMMIT};
141-
sudo bash build_docker.sh -e ${TRAVIS_COMMIT} -s -u -d $PWD/temp_staging/;
141+
sudo bash build_docker.sh -e ${TRAVIS_COMMIT} -s -u;
142142
DOCKER_TAG=$TRAVIS_COMMIT;
143143
fi;
144144
sudo docker images;
145145
echo ${TEST_TYPE};
146146
sudo mkdir -p build/reports/;
147147
sudo chmod -R a+w build/reports/;
148-
sudo docker run -v $(pwd)/src/test/resources:/testdata -v $(pwd)/build/reports/:/gatk/build/reports/ --rm -e "TEST_VERBOSITY=minimal" -e "TEST_TYPE=${TEST_TYPE}" -t broadinstitute/gatk:${DOCKER_TAG} bash --init-file /gatk/gatkenv.rc /root/run_unit_tests.sh;
148+
cp scripts/docker/dockertest.gradle .;
149+
sudo docker run -v $(pwd):/gatkCloneMountPoint:cached -v $(pwd)/testJars:/jars:cached --rm -e "TEST_VERBOSITY=minimal" -e "TEST_TYPE=${TEST_TYPE}" -t broadinstitute/gatk:${DOCKER_TAG} bash --init-file /gatk/gatkenv.rc /root/run_unit_tests.sh && sudo mkdir build/reports/tests/test && sudo cp -rp build/reports/tests/testOnPackagedReleaseJar/* build/reports/tests/test && sudo rm -r build/reports/tests/testOnPackagedReleaseJar;
149150
else
150151
./gatk PrintReads -I src/test/resources/NA12878.chr17_69k_70k.dictFix.bam -O output.bam;
151152
travis_wait 50 ./gradlew jacocoTestReport;

Dockerfile

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,41 @@
11
# Using OpenJDK 8
2-
FROM broadinstitute/gatk:gatkbase-1.2.3
3-
ARG DRELEASE
2+
FROM broadinstitute/gatk:gatkbase-2.0.0
43

5-
ADD . /gatk
4+
# Location of the unzipped gatk bundle files
5+
ARG ZIPPATH
6+
7+
ADD $ZIPPATH /gatk
68

79
WORKDIR /gatk
8-
RUN /gatk/gradlew clean compileTestJava sparkJar localJar condaEnvironmentDefinition -Drelease=$DRELEASE
10+
11+
#Setup linked jars that may be needed for running gatk
12+
RUN ln -s $( find /gatk -name "gatk*local.jar" ) gatk.jar
13+
RUN ln -s $( find /gatk -name "gatk*local.jar" ) /root/gatk.jar
14+
RUN ln -s $( find /gatk -name "gatk*spark.jar" ) gatk-spark.jar
915

1016
WORKDIR /root
1117

12-
# Make sure we can see a help message
13-
RUN ln -sFv /gatk/build/libs/gatk.jar
18+
# Make sure we can see a help message
1419
RUN java -jar gatk.jar -h
20+
RUN mkdir /gatkCloneMountPoint
21+
RUN mkdir /jars
22+
RUN mkdir .gradle
1523

16-
#Setup test data
1724
WORKDIR /gatk
18-
# Create link to where test data is expected
19-
RUN ln -s /testdata src/test/resources
2025

2126
# Create a simple unit test runner
2227
ENV CI true
2328
RUN echo "source activate gatk" > /root/run_unit_tests.sh && \
24-
echo "cd /gatk/ && ./gradlew jacocoTestReport" >> /root/run_unit_tests.sh
29+
echo "export TEST_JAR=\$( find /jars -name \"gatk*test.jar\" )" >> /root/run_unit_tests.sh && \
30+
echo "export TEST_DEPENDENCY_JAR=\$( find /jars -name \"gatk*testDependencies.jar\" )" >> /root/run_unit_tests.sh && \
31+
echo "export GATK_JAR=$( find /gatk -name "gatk*local.jar" )" >> /root/run_unit_tests.sh && \
32+
echo "cp -rp /gatkCloneMountPoint/src/main/java/* /gatk/srcdir" >> /root/run_unit_tests.sh && \
33+
echo "export SOURCE_DIR=/gatk/srcdir" >> /root/run_unit_tests.sh && \
34+
echo "export GRADLE_OPTS=\"-Xmx1024m -Dorg.gradle.daemon=false\"" /root/run_unit_tests.sh && \
35+
echo "export CP_DIR=/gatk/testClasses" /root/run_unit_tests.sh && \
36+
echo "ln -s /gatkCloneMountPoint/src/ /gatkCloneMountPoint/scripts/docker/src" >> /root/run_unit_tests.sh && \
37+
echo "ln -s /gatkCloneMountPoint/build/ /gatkCloneMountPoint/scripts/docker/build" >> /root/run_unit_tests.sh && \
38+
echo "cd /gatk/ && /gatkCloneMountPoint/gradlew -b /gatkCloneMountPoint/dockertest.gradle testOnPackagedReleaseJar jacocoTestReportOnPackagedReleaseJar -a -p /gatkCloneMountPoint" >> /root/run_unit_tests.sh
2539

2640
WORKDIR /root
2741
RUN cp -r /root/run_unit_tests.sh /gatk
@@ -39,16 +53,15 @@ RUN mkdir $DOWNLOAD_DIR && \
3953
test "`md5sum $DOWNLOAD_DIR/miniconda.sh | awk -v FS=' ' '{print $1}'` = $CONDA_MD5" && \
4054
bash $DOWNLOAD_DIR/miniconda.sh -p $CONDA_PATH -b && \
4155
rm $DOWNLOAD_DIR/miniconda.sh
42-
ENV PATH $CONDA_PATH/envs/gatk/bin:$CONDA_PATH/bin:$PATH
43-
WORKDIR /gatk/build
44-
RUN conda env create -n gatk -f gatkcondaenv.yml && \
45-
echo "source activate gatk" >> /gatk/gatkenv.rc
4656
WORKDIR /gatk
57+
ENV PATH $CONDA_PATH/envs/gatk/bin:$CONDA_PATH/bin:$PATH
58+
RUN conda env create -n gatk -f /gatk/gatkcondaenv.yml && \
59+
echo "source activate gatk" >> /gatk/gatkenv.rc && \
60+
conda clean -y -all && \
61+
rm -rf /root/.cache/pip
4762

4863
CMD ["bash", "--init-file", "/gatk/gatkenv.rc"]
4964

5065
# End GATK Python environment
5166

52-
WORKDIR /gatk
53-
5467
ENV PATH /gatk:$PATH

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ echo "source <PATH_TO>/gatk-completion.sh" >> ~/.bashrc
393393
* Test report is in `build/reports/tests/test/index.html`.
394394
* What will happen depends on the value of the `TEST_TYPE` environment variable:
395395
* unset or any other value : run non-cloud unit and integration tests, this is the default
396-
* `cloud`, `unit`, `integration`, `spark` : run only the cloud, unit, integration, or Spark tests
396+
* `cloud`, `unit`, `integration`, `spark`, `python` : run only the cloud, unit, integration, python, or Spark tests
397397
* `all` : run the entire test suite
398398
* Cloud tests require being logged into `gcloud` and authenticated with a project that has access
399399
to the cloud test data. They also require setting several certain environment variables.

build.gradle

Lines changed: 85 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -174,19 +174,6 @@ configurations.all {
174174
all*.exclude group: 'junit', module: 'junit'
175175
}
176176

177-
178-
jacocoTestReport {
179-
dependsOn test
180-
group = "Reporting"
181-
description = "Generate Jacoco coverage reports after running tests."
182-
additionalSourceDirs = files(sourceSets.main.allJava.srcDirs)
183-
184-
reports {
185-
xml.enabled = true
186-
html.enabled = true
187-
}
188-
}
189-
190177
//NOTE: we ignore contracts for now
191178
compileJava {
192179
options.compilerArgs = ['-proc:none', '-Xlint:all', '-Werror', '-Xdiags:verbose']
@@ -379,7 +366,6 @@ version = (isRelease ? gitVersion() : gitVersion() + "-SNAPSHOT").replaceAll(".d
379366
logger.info("build for version:" + version)
380367
group = 'org.broadinstitute'
381368

382-
383369
tasks.withType(Jar) {
384370
manifest {
385371
attributes 'Implementation-Title': 'The Genome Analysis Toolkit (GATK)',
@@ -390,7 +376,33 @@ tasks.withType(Jar) {
390376
}
391377
}
392378

393-
test {
379+
task wrapper(type: Wrapper) {
380+
gradleVersion = '3.1'
381+
}
382+
383+
tasks.withType(ShadowJar) {
384+
from(project.sourceSets.main.output)
385+
baseName = project.name + '-package'
386+
mergeServiceFiles()
387+
relocate 'com.google.common', 'org.broadinstitute.hellbender.relocated.com.google.common'
388+
zip64 true
389+
exclude 'log4j.properties' // from adam jar as it clashes with hellbender's log4j2.xml
390+
exclude '**/*.SF' // these are Manifest signature files and
391+
exclude '**/*.RSA' // keys which may accidentally be imported from other signed projects and then fail at runtime
392+
393+
// Suggested by the akka devs to make sure that we do not get the spark configuration error.
394+
// http://doc.akka.io/docs/akka/snapshot/general/configuration.html#When_using_JarJar__OneJar__Assembly_or_any_jar-bundler
395+
transform(com.github.jengelman.gradle.plugins.shadow.transformers.AppendingTransformer) {
396+
resource = 'reference.conf'
397+
}
398+
}
399+
400+
//============================================================================================================================
401+
// WARNING
402+
//============================================================================================================================
403+
// This is duplicated code with the build script in scripts/docker/dockertest.gradle. If you alter the tests in any way
404+
// please make sure to alter this test method there as well.
405+
tasks.withType(Test) {
394406
outputs.upToDateWhen { false } //tests will never be "up to date" so you can always rerun them
395407
String TEST_VERBOSITY = "$System.env.TEST_VERBOSITY"
396408

@@ -401,7 +413,11 @@ test {
401413
* anything else : run the non-cloud tests
402414
*/
403415
String TEST_TYPE = "$System.env.TEST_TYPE"
404-
416+
//============================================================================================================================
417+
// WARNING
418+
//============================================================================================================================
419+
// This is duplicated code with the build script in scripts/docker/dockertest.gradle. If you alter the tests in any way
420+
// please make sure to alter this test method there as well.
405421
useTestNG {
406422
if (TEST_TYPE == "cloud") {
407423
// run only the cloud tests
@@ -473,28 +489,11 @@ test {
473489
}
474490
}
475491
}
476-
}
477-
478-
479-
task wrapper(type: Wrapper) {
480-
gradleVersion = '3.1'
481-
}
482-
483-
tasks.withType(ShadowJar) {
484-
from(project.sourceSets.main.output)
485-
baseName = project.name + '-package'
486-
mergeServiceFiles()
487-
relocate 'com.google.common', 'org.broadinstitute.hellbender.relocated.com.google.common'
488-
zip64 true
489-
exclude 'log4j.properties' // from adam jar as it clashes with hellbender's log4j2.xml
490-
exclude '**/*.SF' // these are Manifest signature files and
491-
exclude '**/*.RSA' // keys which may accidentally be imported from other signed projects and then fail at runtime
492-
493-
// Suggested by the akka devs to make sure that we do not get the spark configuration error.
494-
// http://doc.akka.io/docs/akka/snapshot/general/configuration.html#When_using_JarJar__OneJar__Assembly_or_any_jar-bundler
495-
transform(com.github.jengelman.gradle.plugins.shadow.transformers.AppendingTransformer) {
496-
resource = 'reference.conf'
497-
}
492+
//============================================================================================================================
493+
// WARNING
494+
//============================================================================================================================
495+
// This is duplicated code with the build script in scripts/docker/dockertest.gradle. If you alter the tests in any way
496+
// please make sure to alter this test method there as well.
498497
}
499498

500499
shadowJar {
@@ -524,7 +523,27 @@ task sparkJar(type: ShadowJar) {
524523
}
525524
}
526525

527-
task bundle(type: Zip) {
526+
// A jar that only contains the test classes and resources (to be extracted for testing)
527+
task shadowTestClassJar(type: ShadowJar){
528+
group = "Shadow"
529+
from sourceSets.test.output
530+
description = "Create a jar that packages the compiled test classes"
531+
classifier = "test"
532+
}
533+
534+
// A minimal jar that only contains the extra dependencies needed for running the tests
535+
task shadowTestJar(type: ShadowJar){
536+
group = "Shadow"
537+
description = " A minimal jar that only contains the extra dependencies needed for running the tests that arent packaged in the main shadow jar"
538+
from {
539+
(project.configurations.testRuntime - project.configurations.runtime ).collect {
540+
it.isDirectory() ? it : it.getName().endsWith(".jar") ? zipTree(it) : it
541+
}
542+
}
543+
classifier = "testDependencies"
544+
}
545+
546+
task collectBundleIntoDir(type: Copy) {
528547
dependsOn shadowJar, sparkJar, 'condaEnvironmentDefinition', 'gatkTabComplete', 'gatkDoc'
529548

530549
doFirst {
@@ -534,10 +553,6 @@ task bundle(type: Zip) {
534553
assert file("src/main/resources/org/broadinstitute/hellbender/utils/config/GATKConfig.properties").exists()
535554
}
536555

537-
baseName = project.name + "-" + project.version
538-
destinationDir file("$buildDir")
539-
archiveName baseName + ".zip"
540-
541556
from(shadowJar.archivePath)
542557
from(sparkJar.archivePath)
543558
from("gatk")
@@ -552,13 +567,40 @@ task bundle(type: Zip) {
552567
from("$buildDir/$pythonPackageArchiveName")
553568
from("$buildDir/$gatkCondaYML")
554569
from("$buildDir/$gatkCondaIntelYML")
570+
from("scripts/sv", { into("scripts/sv") })
571+
from("scripts/cnv_wdl/", { into("scripts/cnv_wdl") })
572+
from("scripts/mutect2_wdl/", { into("scripts/mutect2_wdl") })
573+
into "$buildDir/bundle-files-collected"
574+
}
575+
576+
task bundle(type: Zip) {
577+
dependsOn collectBundleIntoDir
578+
579+
baseName = project.name + "-" + project.version
580+
destinationDir file("$buildDir")
581+
archiveName baseName + ".zip"
582+
583+
from("$buildDir/bundle-files-collected")
555584
into(baseName)
556585

557586
doLast {
558587
logger.lifecycle("Created GATK distribution in ${destinationDir}/${archiveName}")
559588
}
560589
}
561590

591+
jacocoTestReport {
592+
dependsOn test
593+
594+
group = "Reporting"
595+
description = "Generate Jacoco coverage reports after running tests."
596+
additionalSourceDirs = files(sourceSets.main.allJava.srcDirs)
597+
598+
reports {
599+
xml.enabled = true
600+
html.enabled = true
601+
}
602+
}
603+
562604
task condaStandardEnvironmentDefinition(type: Copy) {
563605
from "scripts"
564606
into buildDir

build_docker.sh

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ Optional arguments: \n \
4242
exit 1
4343
fi
4444

45-
4645
# -z is like "not -n"
4746
if [ -z ${IS_NOT_LATEST} ] && [ -n "${IS_HASH}" ] && [ -n "${IS_PUSH}" ]; then
4847
echo -e "\n##################"
@@ -113,8 +112,22 @@ if [ -n "${IS_PUSH}" ]; then
113112
else
114113
RELEASE=false
115114
fi
115+
./gradlew clean collectBundleIntoDir shadowTestClassJar shadowTestJar -Drelease=$RELEASE
116+
ZIPPATHGATK=$( find ./build -name "*bundle-files-collected" )
117+
mv ${ZIPPATHGATK} ./unzippedJar
118+
ZIPPATHPYTHON=$( find ./unzippedJar -name "gatkPython*.zip" )
119+
unzip -o -j ${ZIPPATHPYTHON} -d ./unzippedJar/scripts
120+
121+
mkdir ${STAGING_ABSOLUTE_PATH:-.}/testJars
122+
mv $( find ./build/libs/ -name "gatk*test.jar" ) ${STAGING_ABSOLUTE_PATH:-.}/testJars
123+
mv $( find ./build/libs/ -name "gatk*testDependencies.jar" ) ${STAGING_ABSOLUTE_PATH:-.}/testJars
124+
116125
echo "Building image to tag ${REPO_PRJ}:${GITHUB_TAG}..."
117-
docker build -t ${REPO_PRJ}:${GITHUB_TAG} --build-arg DRELEASE=$RELEASE .
126+
if [ -n "${IS_PUSH}" ]; then
127+
docker build -t ${REPO_PRJ}:${GITHUB_TAG} --squash --build-arg ZIPPATH=./unzippedJar .
128+
else
129+
docker build -t ${REPO_PRJ}:${GITHUB_TAG} --build-arg ZIPPATH=./unzippedJar .
130+
fi
118131

119132
if [ -z "${IS_NOT_RUN_UNIT_TESTS}" ] ; then
120133

@@ -128,9 +141,13 @@ if [ -z "${IS_NOT_RUN_UNIT_TESTS}" ] ; then
128141
git lfs pull
129142
chmod -R a+w ${STAGING_ABSOLUTE_PATH}/src/test/resources
130143

131-
echo docker run ${REMOVE_CONTAINER_STRING} -v ${STAGING_ABSOLUTE_PATH}/src/test/resources:/testdata -t ${REPO_PRJ}:${GITHUB_TAG} bash /root/run_unit_tests.sh
132-
docker run ${REMOVE_CONTAINER_STRING} -v ${STAGING_ABSOLUTE_PATH}/src/test/resources:/testdata -t ${REPO_PRJ}:${GITHUB_TAG} bash /root/run_unit_tests.sh
144+
cp build.gradle build.gradle.backup
145+
cp /scripts/docker/dockertest.gradle .
146+
147+
echo docker run ${REMOVE_CONTAINER_STRING} -v ${STAGING_ABSOLUTE_PATH}:/gatkCloneMountPoint -v ${STAGING_ABSOLUTE_PATH}/testJars:/jars -t ${REPO_PRJ}:${GITHUB_TAG} bash /root/run_unit_tests.sh
148+
docker run ${REMOVE_CONTAINER_STRING} -v ${STAGING_ABSOLUTE_PATH}:/gatkCloneMountPoint -v ${STAGING_ABSOLUTE_PATH}/testJars:/jars -t ${REPO_PRJ}:${GITHUB_TAG} bash /root/run_unit_tests.sh
133149
echo " Unit tests passed..."
150+
mv build.gradle.backup build.gradle
134151
fi
135152

136153
## Push

0 commit comments

Comments
 (0)