55
66SCRIPT_DIR=$( cd -- " $( dirname -- " ${BASH_SOURCE[0]} " ) " & > /dev/null && pwd )
77DOCKER_IMAGE_TAG=" example_algorithm_{{ object.slug }}"
8+ CONTAINER_NAME=" example_algorithm_{{ object.slug }}_container"
9+ PORT=37847
810
911DOCKER_NOOP_VOLUME=" ${DOCKER_IMAGE_TAG} -volume"
1012
@@ -26,6 +28,9 @@ cleanup() {
2628 $DOCKER_IMAGE_TAG \
2729 -c " chmod -R -f o+rwX /output/* || true"
2830
31+ docker rm -f " $CONTAINER_NAME " > /dev/null 2>&1 || true
32+ echo " =+= Container stopped"
33+
2934 # Ensure volume is removed
3035 docker volume rm " $DOCKER_NOOP_VOLUME " > /dev/null
3136}
@@ -56,12 +61,14 @@ docker volume create "$DOCKER_NOOP_VOLUME" > /dev/null
5661
5762trap cleanup EXIT
5863
59- run_docker_forward_pass () {
64+ start_docker_container () {
6065 local interface_dir=" $1 "
6166
62- echo " =+= Doing a forward pass on ${interface_dir} "
67+ echo " =+= Starting container "
6368
6469 # # Note the extra arguments that are passed here:
70+ # '-p ${PORT}:4743'
71+ # maps local port to container port 4743
6572 # '--network none'
6673 # entails there is no internet connection
6774 # '--gpus all'
@@ -70,20 +77,83 @@ run_docker_forward_pass() {
7077 # is added because on Grand Challenge this directory cannot be used to store permanent files
7178 # '--volume ../model:/opt/ml/model/":ro'
7279 # is added to provide access to the (optional) tarball-upload locally
73- docker run --rm {% if not no_gpus %}--gpus all {% endif %}\
80+ docker run -d {% if not no_gpus %}--gpus all {% endif %}\
81+ --name " $CONTAINER_NAME " \
7482 --platform=linux/amd64 \
75- --network none \
83+ -p ${PORT} :4743 \
7684 --volume " ${INPUT_DIR} /${interface_dir} " :/input:ro \
7785 --volume " ${OUTPUT_DIR} /${interface_dir} " :/output \
7886 --volume " $DOCKER_NOOP_VOLUME " :/tmp \
7987 --volume " ${SCRIPT_DIR} /model" :/opt/ml/model:ro \
80- " $DOCKER_IMAGE_TAG "
88+ " $DOCKER_IMAGE_TAG " \
89+ > /dev/null
90+
91+ echo " =+= Container started"
92+ }
93+
94+ check_health () {
95+ echo " =+= Waiting for health endpoint..."
96+
97+ local max_attempts=30
98+ local delay=10
99+
100+ for (( i= 1 ;i<= max_attempts;i++ )) ; do
101+ STATUS=$( curl -s -o /dev/null -w " %{http_code}" \
102+ --max-time 10 \
103+ http://localhost:${PORT} /health || echo " 000" )
104+
105+ echo " Health check attempt $i /$max_attempts returned $STATUS "
106+
107+ if [[ " $STATUS " == " 200" ]]; then
108+ echo " =+= API healthy"
109+ return 0
110+ fi
111+
112+ if [[ " $STATUS " == " 302" ]]; then
113+ echo " Health endpoint returned 302 — failing"
114+ return 1
115+ fi
116+
117+ echo " Retrying in ${delay} s"
118+ sleep " $delay "
119+ done
120+
121+ echo " Health endpoint never returned 200"
122+ return 1
123+ }
124+
125+ run_docker_forward_pass () {
126+ local interface_dir=" $1 "
127+
128+ echo " =+= Doing a forward pass on ${interface_dir} "
129+
130+ echo " =+= Calling invoke endpoint"
131+
132+ STATUS=$( curl -s -o /dev/null -w " %{http_code}" \
133+ --max-time 300 \
134+ -X POST http://localhost:${PORT} /invoke || echo " 000" )
135+
136+ if [ " $STATUS " != " 201" ]; then
137+ echo " Invoke failed with status $STATUS "
138+ exit 1
139+ fi
140+
141+ echo " =+= Invoke completed"
142+
143+ echo " =+= Wrote results to ${OUTPUT_DIR} /${interface_dir} "
144+ }
81145
82- echo " =+= Wrote results to ${OUTPUT_DIR} /${interface_dir} "
146+ stop_docker_container () {
147+ echo " =+= Stopping container"
148+ docker rm -f " $CONTAINER_NAME " > /dev/null 2>&1 || true
149+ echo " =+= Container stopped"
83150}
84151
85152{% for interface_name in object.algorithm_interface_names %}
153+ start_docker_container " {{ interface_name }}"
154+ check_health
86155run_docker_forward_pass " {{ interface_name }}"
156+ stop_docker_container
87157{% endfor %}
88158
89159
0 commit comments