Skip to content

Commit 5f28c57

Browse files
committed
health checks for ports added
Signed-off-by: RAWx18 <rawx18.dev@gmail.com>
1 parent a488e8b commit 5f28c57

File tree

2 files changed

+366
-14
lines changed

2 files changed

+366
-14
lines changed

scripts/cli

Lines changed: 364 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -516,27 +516,377 @@ function wait_for_db() {
516516
say "Scaffold is up and running!"
517517
}
518518

519+
# Health check functions
520+
function check_service_health() {
521+
local service_name="$1"
522+
local check_command="$2"
523+
local timeout="${3:-5}"
524+
525+
if timeout "$timeout" bash -c "$check_command" >/dev/null 2>&1; then
526+
echo "${GREEN}healthy${RESET}"
527+
else
528+
echo "${RED}unhealthy${RESET}"
529+
fi
530+
}
531+
532+
function check_http_service_health() {
533+
local service_name="$1"
534+
local url="$2"
535+
local timeout="${3:-3}"
536+
537+
# First check if container is running
538+
local container_status=$(check_container_health "${PROJECT_NAME}.*${service_name}")
539+
540+
if [[ "$container_status" == *"not running"* ]]; then
541+
echo "${GREY}not running${RESET}"
542+
return 0
543+
fi
544+
545+
if [[ "$container_status" == *"unhealthy"* ]]; then
546+
echo "${RED}unhealthy${RESET}"
547+
return 0
548+
fi
549+
550+
# If container is running, try HTTP check
551+
if timeout "$timeout" curl -f -s "$url" >/dev/null 2>&1; then
552+
echo "${GREEN}healthy${RESET}"
553+
elif timeout "$timeout" curl -s "$url" >/dev/null 2>&1; then
554+
# HTTP connection works but might return non-200 status
555+
echo "${YELLOW}starting${RESET}"
556+
else
557+
# Container running but HTTP not responding
558+
echo "${YELLOW}starting${RESET}"
559+
fi
560+
}
561+
562+
function check_container_health() {
563+
local container_pattern="$1"
564+
565+
# Check if container exists (running or stopped)
566+
local container_id=$(docker ps -a --filter "name=$container_pattern" --format "{{.ID}}" 2>/dev/null | head -n1)
567+
568+
if [[ -z "$container_id" ]]; then
569+
# No container exists at all
570+
echo "${GREY}not running${RESET}"
571+
return 0
572+
fi
573+
574+
# Container exists, check if it's running
575+
local status=$(docker inspect --format='{{.State.Status}}' "$container_id" 2>/dev/null || echo "not_found")
576+
577+
if [[ "$status" == "running" ]]; then
578+
# Check health status if available
579+
local health_status=$(docker inspect --format='{{.State.Health.Status}}' "$container_id" 2>/dev/null || echo "none")
580+
581+
if [[ "$health_status" == "healthy" ]]; then
582+
echo "${GREEN}healthy${RESET}"
583+
elif [[ "$health_status" == "starting" ]]; then
584+
echo "${YELLOW}starting${RESET}"
585+
elif [[ "$health_status" == "unhealthy" ]]; then
586+
echo "${RED}unhealthy${RESET}"
587+
elif [[ "$health_status" == "none" ]]; then
588+
# No health check defined, but container is running - consider it healthy
589+
echo "${GREEN}healthy${RESET}"
590+
else
591+
echo "${RED}unhealthy${RESET}"
592+
fi
593+
else
594+
# Container exists but is not running (stopped, exited, etc.)
595+
echo "${RED}unhealthy${RESET}"
596+
fi
597+
return 0
598+
}
599+
600+
function check_application_service_health() {
601+
local service_name="$1"
602+
603+
# Check if container exists (running or stopped)
604+
local container_id=$(docker ps -a --filter "name=${PROJECT_NAME}_${service_name}_" --format "{{.ID}}" 2>/dev/null | head -n1)
605+
606+
if [[ -z "$container_id" ]]; then
607+
# Try dev version
608+
container_id=$(docker ps -a --filter "name=${PROJECT_NAME}_${service_name}-dev_" --format "{{.ID}}" 2>/dev/null | head -n1)
609+
fi
610+
611+
if [[ -z "$container_id" ]]; then
612+
# No container exists at all
613+
echo "${GREY}not running${RESET}"
614+
return 0
615+
fi
616+
617+
# Container exists, check if it's running
618+
local status=$(docker inspect --format='{{.State.Status}}' "$container_id" 2>/dev/null || echo "not_found")
619+
620+
if [[ "$status" == "running" ]]; then
621+
# Check health status if available
622+
local health_status=$(docker inspect --format='{{.State.Health.Status}}' "$container_id" 2>/dev/null || echo "none")
623+
624+
if [[ "$health_status" == "healthy" ]]; then
625+
echo "${GREEN}healthy${RESET}"
626+
elif [[ "$health_status" == "starting" ]]; then
627+
echo "${YELLOW}starting${RESET}"
628+
elif [[ "$health_status" == "unhealthy" ]]; then
629+
echo "${RED}unhealthy${RESET}"
630+
elif [[ "$health_status" == "none" ]]; then
631+
# No health check defined, but container is running - consider it healthy
632+
echo "${GREEN}healthy${RESET}"
633+
else
634+
echo "${RED}unhealthy${RESET}"
635+
fi
636+
else
637+
# Container exists but is not running (stopped, exited, etc.)
638+
echo "${RED}unhealthy${RESET}"
639+
fi
640+
return 0
641+
}
642+
643+
function should_check_service() {
644+
local service_name="$1"
645+
646+
# Skip chat-orchestrator for CE edition
647+
if [[ "$__EDITION" != "gitmesh-ee" && "$service_name" == "chat-orchestrator" ]]; then
648+
return 1
649+
fi
650+
651+
# Check if service is in ignored services array
652+
if [[ ${#INGORED_SERVICES[@]} -ne 0 ]]; then
653+
for IGNORED_SERVICE in "${INGORED_SERVICES[@]}"
654+
do
655+
if [[ "$service_name" == "${IGNORED_SERVICE}" ]]; then
656+
return 1
657+
fi
658+
done
659+
fi
660+
661+
return 0
662+
}
663+
519664
function print_success_and_ports() {
520-
echo -e "\n${GREEN}Script ran successfully!${RESET}"
665+
# Temporarily disable exit on error for health checks
666+
set +e
667+
668+
# Ensure __EDITION is set
669+
if [[ -z "$__EDITION" ]]; then
670+
source_edition
671+
fi
672+
521673
echo -e "\n${YELLOW}Services and Ports:${RESET}"
522674
echo -e "--------------------------------------------------"
523-
echo -e "${BLUE}Frontend:${RESET} http://localhost:8081"
524-
echo -e "${BLUE}Backend API:${RESET} http://localhost:8080"
525-
echo -e "${BLUE}Nango:${RESET} http://localhost:3003"
526-
echo -e "${BLUE}CubeJS:${RESET} http://localhost:4000"
527-
echo -e "${BLUE}PostgreSQL:${RESET} http://localhost:5432"
528-
echo -e "${BLUE}Redis:${RESET} http://localhost:6379"
529-
echo -e "${BLUE}Chat Orchestrator:${RESET} http://localhost:8001"
530-
echo -e "${BLUE}Temporal UI:${RESET} http://localhost:8233"
531-
echo -e "${BLUE}Kafka:${RESET} http://localhost:9094"
532-
echo -e "${BLUE}OpenSearch:${RESET} http://localhost:9200"
533-
echo -e "${BLUE}SQS UI:${RESET} http://localhost:9325"
534-
echo -e "${BLUE}Ollama:${RESET} http://localhost:11434"
535675

676+
local unhealthy_count=0
677+
local total_services=0
678+
679+
# Core Application Services
680+
echo -e "${GREY}Core Services:${RESET}"
681+
682+
# Check Frontend
683+
if should_check_service "frontend"; then
684+
local frontend_status=$(check_application_service_health "frontend")
685+
echo -e "${BLUE}Frontend:${RESET} http://localhost:8081 - $frontend_status"
686+
[[ "$frontend_status" == *"unhealthy"* ]] && ((unhealthy_count++))
687+
[[ "$frontend_status" != *"not running"* ]] && ((total_services++))
688+
fi
689+
690+
# Check Backend API
691+
if should_check_service "api"; then
692+
local backend_status=$(check_application_service_health "api")
693+
echo -e "${BLUE}Backend API:${RESET} http://localhost:8080 - $backend_status"
694+
[[ "$backend_status" == *"unhealthy"* ]] && ((unhealthy_count++))
695+
[[ "$backend_status" != *"not running"* ]] && ((total_services++))
696+
fi
697+
698+
# Check Search Sync API
699+
if should_check_service "search-sync-api"; then
700+
local search_sync_api_status=$(check_application_service_health "search-sync-api")
701+
echo -e "${BLUE}Search Sync API:${RESET} http://localhost:8082 - $search_sync_api_status"
702+
[[ "$search_sync_api_status" == *"unhealthy"* ]] && ((unhealthy_count++))
703+
[[ "$search_sync_api_status" != *"not running"* ]] && ((total_services++))
704+
fi
705+
706+
# Check Webhook API
707+
if should_check_service "webhook-api"; then
708+
local webhook_api_status=$(check_application_service_health "webhook-api")
709+
echo -e "${BLUE}Webhook API:${RESET} http://localhost:8083 - $webhook_api_status"
710+
[[ "$webhook_api_status" == *"unhealthy"* ]] && ((unhealthy_count++))
711+
[[ "$webhook_api_status" != *"not running"* ]] && ((total_services++))
712+
fi
713+
714+
# Check Chat Orchestrator (only for EE)
715+
if should_check_service "chat-orchestrator"; then
716+
local chat_status=$(check_application_service_health "chat-orchestrator")
717+
echo -e "${BLUE}Chat Orchestrator:${RESET} http://localhost:8001 - $chat_status"
718+
[[ "$chat_status" == *"unhealthy"* ]] && ((unhealthy_count++))
719+
[[ "$chat_status" != *"not running"* ]] && ((total_services++))
720+
fi
721+
722+
# echo ""
723+
# echo -e "${GREY}Worker Services:${RESET}"
724+
725+
# # Check all worker services
726+
# if should_check_service "automations-worker"; then
727+
# local automations_worker_status=$(check_application_service_health "automations-worker")
728+
# echo -e "${BLUE}Automations Worker:${RESET} - $automations_worker_status"
729+
# [[ "$automations_worker_status" == *"unhealthy"* ]] && ((unhealthy_count++))
730+
# [[ "$automations_worker_status" != *"not running"* ]] && ((total_services++))
731+
# fi
732+
733+
# if should_check_service "data-sink-worker"; then
734+
# local data_sink_worker_status=$(check_application_service_health "data-sink-worker")
735+
# echo -e "${BLUE}Data Sink Worker:${RESET} - $data_sink_worker_status"
736+
# [[ "$data_sink_worker_status" == *"unhealthy"* ]] && ((unhealthy_count++))
737+
# [[ "$data_sink_worker_status" != *"not running"* ]] && ((total_services++))
738+
# fi
739+
740+
# if should_check_service "emails-worker"; then
741+
# local emails_worker_status=$(check_application_service_health "emails-worker")
742+
# echo -e "${BLUE}Emails Worker:${RESET} - $emails_worker_status"
743+
# [[ "$emails_worker_status" == *"unhealthy"* ]] && ((unhealthy_count++))
744+
# [[ "$emails_worker_status" != *"not running"* ]] && ((total_services++))
745+
# fi
746+
747+
# if should_check_service "integration-data-worker"; then
748+
# local integration_data_worker_status=$(check_application_service_health "integration-data-worker")
749+
# echo -e "${BLUE}Integration Data Worker:${RESET} - $integration_data_worker_status"
750+
# [[ "$integration_data_worker_status" == *"unhealthy"* ]] && ((unhealthy_count++))
751+
# [[ "$integration_data_worker_status" != *"not running"* ]] && ((total_services++))
752+
# fi
753+
754+
# if should_check_service "integration-run-worker"; then
755+
# local integration_run_worker_status=$(check_application_service_health "integration-run-worker")
756+
# echo -e "${BLUE}Integration Run Worker:${RESET} - $integration_run_worker_status"
757+
# [[ "$integration_run_worker_status" == *"unhealthy"* ]] && ((unhealthy_count++))
758+
# [[ "$integration_run_worker_status" != *"not running"* ]] && ((total_services++))
759+
# fi
760+
761+
# if should_check_service "integration-stream-worker"; then
762+
# local integration_stream_worker_status=$(check_application_service_health "integration-stream-worker")
763+
# echo -e "${BLUE}Integration Stream Worker:${RESET} - $integration_stream_worker_status"
764+
# [[ "$integration_stream_worker_status" == *"unhealthy"* ]] && ((unhealthy_count++))
765+
# [[ "$integration_stream_worker_status" != *"not running"* ]] && ((total_services++))
766+
# fi
767+
768+
# if should_check_service "integration-sync-worker"; then
769+
# local integration_sync_worker_status=$(check_application_service_health "integration-sync-worker")
770+
# echo -e "${BLUE}Integration Sync Worker:${RESET} - $integration_sync_worker_status"
771+
# [[ "$integration_sync_worker_status" == *"unhealthy"* ]] && ((unhealthy_count++))
772+
# [[ "$integration_sync_worker_status" != *"not running"* ]] && ((total_services++))
773+
# fi
774+
775+
# if should_check_service "search-sync-worker"; then
776+
# local search_sync_worker_status=$(check_application_service_health "search-sync-worker")
777+
# echo -e "${BLUE}Search Sync Worker:${RESET} - $search_sync_worker_status"
778+
# [[ "$search_sync_worker_status" == *"unhealthy"* ]] && ((unhealthy_count++))
779+
# [[ "$search_sync_worker_status" != *"not running"* ]] && ((total_services++))
780+
# fi
781+
782+
# if should_check_service "nodejs-worker"; then
783+
# local nodejs_worker_status=$(check_application_service_health "nodejs-worker")
784+
# echo -e "${BLUE}NodeJS Worker:${RESET} - $nodejs_worker_status"
785+
# [[ "$nodejs_worker_status" == *"unhealthy"* ]] && ((unhealthy_count++))
786+
# [[ "$nodejs_worker_status" != *"not running"* ]] && ((total_services++))
787+
# fi
788+
789+
# if should_check_service "python-worker"; then
790+
# local python_worker_status=$(check_application_service_health "python-worker")
791+
# echo -e "${BLUE}Python Worker:${RESET} - $python_worker_status"
792+
# [[ "$python_worker_status" == *"unhealthy"* ]] && ((unhealthy_count++))
793+
# [[ "$python_worker_status" != *"not running"* ]] && ((total_services++))
794+
# fi
795+
796+
# if should_check_service "job-generator"; then
797+
# local job_generator_status=$(check_application_service_health "job-generator")
798+
# echo -e "${BLUE}Job Generator:${RESET} - $job_generator_status"
799+
# [[ "$job_generator_status" == *"unhealthy"* ]] && ((unhealthy_count++))
800+
# [[ "$job_generator_status" != *"not running"* ]] && ((total_services++))
801+
# fi
802+
803+
# if should_check_service "discord-ws"; then
804+
# local discord_ws_status=$(check_application_service_health "discord-ws")
805+
# echo -e "${BLUE}Discord WebSocket:${RESET} - $discord_ws_status"
806+
# [[ "$discord_ws_status" == *"unhealthy"* ]] && ((unhealthy_count++))
807+
# [[ "$discord_ws_status" != *"not running"* ]] && ((total_services++))
808+
# fi
809+
810+
echo ""
811+
echo -e "${GREY}Infrastructure Services:${RESET}"
812+
813+
# Infrastructure services are always checked
814+
# Check Nango - has HTTP health endpoint
815+
local nango_status=$(check_http_service_health "nango" "http://localhost:3003/health")
816+
echo -e "${BLUE}Nango:${RESET} http://localhost:3003 - $nango_status"
817+
[[ "$nango_status" == *"unhealthy"* ]] && ((unhealthy_count++))
818+
[[ "$nango_status" != *"not running"* ]] && ((total_services++))
819+
820+
# Check CubeJS - has HTTP endpoint
821+
local cubejs_status=$(check_http_service_health "cubejs" "http://localhost:4000/readyz")
822+
echo -e "${BLUE}CubeJS:${RESET} http://localhost:4000 - $cubejs_status"
823+
[[ "$cubejs_status" == *"unhealthy"* ]] && ((unhealthy_count++))
824+
[[ "$cubejs_status" != *"not running"* ]] && ((total_services++))
825+
826+
# Check PostgreSQL - has built-in health check
827+
local postgres_status=$(check_container_health "${PROJECT_NAME}.*db")
828+
echo -e "${BLUE}PostgreSQL:${RESET} http://localhost:5432 - $postgres_status"
829+
[[ "$postgres_status" == *"unhealthy"* ]] && ((unhealthy_count++))
830+
[[ "$postgres_status" != *"not running"* ]] && ((total_services++))
831+
832+
# Check Redis - has built-in health check
833+
local redis_status=$(check_container_health "${PROJECT_NAME}.*redis")
834+
echo -e "${BLUE}Redis:${RESET} http://localhost:6379 - $redis_status"
835+
[[ "$redis_status" == *"unhealthy"* ]] && ((unhealthy_count++))
836+
[[ "$redis_status" != *"not running"* ]] && ((total_services++))
837+
838+
# Check Temporal UI - has HTTP endpoint
839+
local temporal_status=$(check_http_service_health "temporal" "http://localhost:8233")
840+
echo -e "${BLUE}Temporal UI:${RESET} http://localhost:8233 - $temporal_status"
841+
[[ "$temporal_status" == *"unhealthy"* ]] && ((unhealthy_count++))
842+
[[ "$temporal_status" != *"not running"* ]] && ((total_services++))
843+
844+
# Check Kafka - has built-in health check
845+
local kafka_status=$(check_container_health "${PROJECT_NAME}.*kafka")
846+
echo -e "${BLUE}Kafka:${RESET} http://localhost:9094 - $kafka_status"
847+
[[ "$kafka_status" == *"unhealthy"* ]] && ((unhealthy_count++))
848+
[[ "$kafka_status" != *"not running"* ]] && ((total_services++))
849+
850+
# Check OpenSearch - has HTTP health endpoint
851+
local opensearch_status=$(check_http_service_health "open-search" "http://localhost:9200/_cluster/health")
852+
echo -e "${BLUE}OpenSearch:${RESET} http://localhost:9200 - $opensearch_status"
853+
[[ "$opensearch_status" == *"unhealthy"* ]] && ((unhealthy_count++))
854+
[[ "$opensearch_status" != *"not running"* ]] && ((total_services++))
855+
856+
# Check SQS UI - has HTTP endpoint
857+
local sqs_status=$(check_http_service_health "sqs" "http://localhost:9325")
858+
echo -e "${BLUE}SQS UI:${RESET} http://localhost:9325 - $sqs_status"
859+
[[ "$sqs_status" == *"unhealthy"* ]] && ((unhealthy_count++))
860+
[[ "$sqs_status" != *"not running"* ]] && ((total_services++))
861+
862+
# Check Ollama - has HTTP endpoint
863+
local ollama_status=$(check_http_service_health "ollama" "http://localhost:11434")
864+
echo -e "${BLUE}Ollama:${RESET} http://localhost:11434 - $ollama_status"
865+
[[ "$ollama_status" == *"unhealthy"* ]] && ((unhealthy_count++))
866+
[[ "$ollama_status" != *"not running"* ]] && ((total_services++))
867+
868+
# Check Unleash (only for EE) - has HTTP endpoint
536869
if [[ "$__EDITION" == "gitmesh-ee" ]]; then
537-
echo -e "${BLUE}Unleash:${RESET} http://localhost:4242"
870+
local unleash_status=$(check_http_service_health "unleash[^-]" "http://localhost:4242/health")
871+
echo -e "${BLUE}Unleash:${RESET} http://localhost:4242 - $unleash_status"
872+
[[ "$unleash_status" == *"unhealthy"* ]] && ((unhealthy_count++))
873+
[[ "$unleash_status" != *"not running"* ]] && ((total_services++))
538874
fi
875+
539876
echo -e "--------------------------------------------------"
877+
878+
# Print summary message
879+
if [[ $unhealthy_count -eq 0 ]]; then
880+
echo -e "\n${GREEN}Script ran successfully! All $total_services services are healthy.${RESET}"
881+
else
882+
local healthy_count=$((total_services - unhealthy_count))
883+
echo -e "\n${YELLOW}Script ran successfully but $unhealthy_count of $total_services service(s) are unhealthy.${RESET}"
884+
echo -e "${GREEN}$healthy_count services are healthy.${RESET}"
885+
echo -e "${GREY}Check the logs for unhealthy services: ./cli service <service-name> logs${RESET}"
886+
fi
887+
888+
# Re-enable exit on error
889+
set -e
540890
}
541891

542892
function start() {

0 commit comments

Comments
 (0)