Skip to content

Commit bf82aad

Browse files
pgrillrenzodgc
andauthored
Fault tolerance (#82)
* Add fault tolerance in Area and Video processing. * Include docker healthcheck in containers. * Change start_services.bash with supervisord. * Use supervisorctl in healthcheck. * Removing empty line in dockerfile. Fixing comment. * Update libs/area_reporting.py Co-authored-by: Renzo Gambone <42361379+renzodgc@users.noreply.github.com> * Include max_retries in threads restarts. * Add MaxThreadRestarts parameter in the config files. * Reset the restarts counter when last restart is previous than 1 minute ago. Co-authored-by: Renzo Gambone <42361379+renzodgc@users.noreply.github.com>
1 parent 01b6cb2 commit bf82aad

21 files changed

+140
-60
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,8 @@ All the configurations are grouped in *sections* and some of them can vary depen
227227
- `DashboardURL`: Sets the url where the frontend is running. Unless you are using a custom domain, you should keep this value as https://beta.lanthorn.ai/.
228228
- `EnableSlackNotifications`: A boolean parameter to enable/disable the Slack integration for notifications and daily reports. We recommend not editing this parameter directly and manage it from the [UI](https://beta.lanthorn.ai) to configure your workspace correctly.
229229
- `SlackChannel`: Configures the slack channel used by the notifications. The chosen slack channel must exist in the configured workspace.
230+
- `OccupancyAlertsMinInterval`: Sets the desired interval (in seconds) between occupancy alerts.
231+
- `MaxThreadRestarts`: Defines the number of restarts allowed per thread.
230232

231233
- `[Api]`
232234
- `Host`: Configures the host IP of the processor's API (inside docker). We recommend don't change that value and keep it as *0.0.0.0*.

amd64-usbtpu.Dockerfile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
8383
python3-requests \
8484
build-essential \
8585
libedgetpu1-std \
86+
supervisor \
8687
&& rm -rf /var/lib/apt/lists/* \
8788
&& python3 -m pip install --upgrade pip setuptools==41.0.0 wheel && pip install -r /requirements.txt \
8889
https://dl.google.com/coral/python/tflite_runtime-2.1.0.post1-cp37-cp37m-linux_x86_64.whl \
@@ -99,8 +100,9 @@ RUN cd / && apt-get update && apt-get install -y git python3-edgetpu && git clon
99100
https://github.com/google-coral/project-posenet.git && sed -i 's/sudo / /g' \
100101
/project-posenet/install_requirements.sh && sh /project-posenet/install_requirements.sh
101102
ENV PYTHONPATH=$PYTHONPATH:/project-posenet
103+
ENV CONFIG_FILE=config-coral.ini
102104

103105
COPY . /repo
104106
WORKDIR /repo
105-
ENTRYPOINT ["bash", "start_services.bash"]
106-
CMD ["config-coral.ini"]
107+
HEALTHCHECK --interval=30s --retries=2 --start-period=15s CMD bash healthcheck.bash
108+
CMD supervisord -c supervisord.conf -n

config-coral.ini

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ EnableSlackNotifications = no
1111
SlackChannel = lanthorn-notifications
1212
; OccupancyAlertsMinInterval time is measured in seconds (if interval < 0 then no occupancy alerts are triggered)
1313
OccupancyAlertsMinInterval = 180
14+
MaxThreadRestarts = 5
15+
1416

1517
[API]
1618
Host = 0.0.0.0

config-jetson.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ EnableSlackNotifications = no
1414
SlackChannel = lanthorn-notifications
1515
; OccupancyAlertsMinInterval time is measured in seconds (if interval < 0 then no occupancy alerts are triggered)
1616
OccupancyAlertsMinInterval = 180
17+
MaxThreadRestarts = 5
1718

1819
[API]
1920
Host = 0.0.0.0

config-x86-gpu.ini

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ DashboardURL = http://0.0.0.0:8000
2323
ScreenshotsDirectory = /repo/data/processor/static/screenshots
2424
EnableSlackNotifications = no
2525
SlackChannel = lanthorn-notifications
26+
OccupancyAlertsMinInterval = 180
27+
MaxThreadRestarts = 5
2628

2729
[Area_0]
2830
Id = area0

config-x86-openvino.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ EnableSlackNotifications = no
2525
SlackChannel = lanthorn-notifications
2626
; OccupancyAlertsMinInterval time is measured in seconds (if interval < 0 then no occupancy alerts are triggered)
2727
OccupancyAlertsMinInterval = 180
28+
MaxThreadRestarts = 5
2829

2930
[Area_0]
3031
Id = area0

config-x86.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ EnableSlackNotifications = no
2525
SlackChannel = lanthorn-notifications
2626
; OccupancyAlertsMinInterval time is measured in seconds (if interval < 0 then no occupancy alerts are triggered)
2727
OccupancyAlertsMinInterval = 180
28+
MaxThreadRestarts = 5
2829

2930
[Area_0]
3031
Id = area0

coral-dev-board.Dockerfile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
6767
python3-pip \
6868
python3-scipy \
6969
python3-wget \
70+
supervisor \
7071
&& rm -rf /var/lib/apt/lists/* \
7172
&& python3 -m pip install --upgrade pip setuptools==41.0.0 && pip install -r /requirements.txt \
7273
https://dl.google.com/coral/python/tflite_runtime-2.1.0.post1-cp37-cp37m-linux_aarch64.whl \
@@ -82,9 +83,10 @@ RUN cd / && apt-get update && apt-get install -y git python3-edgetpu && git clon
8283
https://github.com/google-coral/project-posenet.git && sed -i 's/sudo / /g' \
8384
/project-posenet/install_requirements.sh && sh /project-posenet/install_requirements.sh
8485
ENV PYTHONPATH=$PYTHONPATH:/project-posenet
85-
86+
ENV CONFIG_FILE=config-coral.ini
8687
# Also if you use opencv: LD_PRELOAD="/usr/lib/aarch64-linux-gnu/libgomp.so.1.0.0"
88+
8789
COPY . /repo
8890
WORKDIR /repo
89-
ENTRYPOINT ["bash", "start_services.bash"]
90-
CMD ["config-coral.ini"]
91+
HEALTHCHECK --interval=30s --retries=2 --start-period=15s CMD bash healthcheck.bash
92+
CMD supervisord -c supervisord.conf -n

healthcheck.bash

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
if fatalError=$(supervisorctl -c supervisord.conf status all | grep -i "FATAL\|UNKNOWN"); then
3+
exit 1;
4+
else
5+
exit 0;
6+
fi

jetson-nano.Dockerfile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
8282
python3-pip \
8383
python3-scipy \
8484
python3-wget \
85+
supervisor \
8586
&& rm -rf /var/lib/apt/lists/* \
8687
&& ln -sf $(which gcc) /usr/local/bin/gcc-aarch64-linux-gnu \
8788
&& ln -sf $(which g++) /usr/local/bin/g++-aarch64-linux-gnu \
@@ -96,8 +97,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
9697
ENV DEV_ALLOW_ALL_ORIGINS=true
9798
ENV AWS_SHARED_CREDENTIALS_FILE=/repo/.aws/credentials
9899
ENV AWS_CONFIG_FILE=/repo/.aws/config
100+
ENV CONFIG_FILE=config-jetson.ini
99101

100102
COPY . /repo/
101103
WORKDIR /repo
102-
ENTRYPOINT ["bash", "start_services.bash"]
103-
CMD ["config-jetson.ini"]
104+
HEALTHCHECK --interval=30s --retries=2 --start-period=15s CMD bash healthcheck.bash
105+
CMD supervisord -c supervisord.conf -n

0 commit comments

Comments
 (0)