Skip to content

feat: enhance CI/CD workflows and Helm chart configurations #132

feat: enhance CI/CD workflows and Helm chart configurations

feat: enhance CI/CD workflows and Helm chart configurations #132

Workflow file for this run

name: Unified CI/CD Pipeline
on:
push:
branches:
- '**'
pull_request:
branches:
- main
env:
GHCR_REGISTRY: ghcr.io
IMAGE_NAMESPACE: aet-devops25/team-cache-me-if-you-can
jobs:
test-genai:
name: Test GenAI
runs-on: ubuntu-latest
if: github.event_name == 'push' || github.event_name == 'pull_request'
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install dependencies
run: pip install -r genai/requirements.txt
- name: Run tests
run: pytest genai/tests
build-and-push:
name: Build & Push Service Images
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
strategy:
matrix:
service: [user, group, gateway, files, genai, client]
fail-fast: false
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up JDK 17 (for Java services)
if: matrix.service != 'genai' && matrix.service != 'client'
uses: actions/setup-java@v3
with:
java-version: '17'
distribution: temurin
- name: Build Java service (skip tests)
if: matrix.service != 'genai' && matrix.service != 'client'
working-directory: ./server/${{ matrix.service }}
run: |
export GRADLE_OPTS="-Dorg.gradle.jvmargs=-Xmx2048m -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8"
./gradlew clean build -x test
- name: Run Java tests (allow failures)
if: matrix.service != 'genai' && matrix.service != 'client'
working-directory: ./server/${{ matrix.service }}
continue-on-error: true
run: ./gradlew test --info
- name: Set up Node.js (for client)
if: matrix.service == 'client'
uses: actions/setup-node@v4
with:
node-version: 22
- name: Build client
if: matrix.service == 'client'
working-directory: ./client
run: |
npm ci
npm run build
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to GHCR
uses: docker/login-action@v3
with:
registry: ${{ env.GHCR_REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build & push Java service image
if: matrix.service != 'genai' && matrix.service != 'client'
uses: docker/build-push-action@v5
with:
context: ./server/${{ matrix.service }}
file: ./server/${{ matrix.service }}/Dockerfile
platforms: linux/amd64
push: true
tags: |
${{ env.GHCR_REGISTRY }}/${{ env.IMAGE_NAMESPACE }}/${{ matrix.service }}-service:latest
${{ env.GHCR_REGISTRY }}/${{ env.IMAGE_NAMESPACE }}/${{ matrix.service }}-service:${{ github.sha }}
- name: Build & push GenAI service image
if: matrix.service == 'genai'
uses: docker/build-push-action@v5
with:
context: ./genai
file: ./genai/Dockerfile
platforms: linux/amd64
push: true
tags: |
${{ env.GHCR_REGISTRY }}/${{ env.IMAGE_NAMESPACE }}/genai-app:latest
${{ env.GHCR_REGISTRY }}/${{ env.IMAGE_NAMESPACE }}/genai-app:${{ github.sha }}
- name: Build & push Client service image
if: matrix.service == 'client'
uses: docker/build-push-action@v5
with:
context: ./client
file: ./client/Dockerfile
platforms: linux/amd64
push: true
tags: |
${{ env.GHCR_REGISTRY }}/${{ env.IMAGE_NAMESPACE }}/client:latest
${{ env.GHCR_REGISTRY }}/${{ env.IMAGE_NAMESPACE }}/client:${{ github.sha }}
terraform-deploy:
name: Terraform Deploy to Kubernetes
needs: [build-and-push, test-genai]
runs-on: ubuntu-latest
if: needs.build-and-push.result == 'success' && needs.test-genai.result == 'success'
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Terraform
uses: hashicorp/setup-terraform@v3
- name: Set up kubectl
uses: azure/setup-kubectl@v3
- name: Configure kubeconfig
env:
KUBECONFIG: ~/.kube/config
run: |
mkdir -p ~/.kube
echo '${{ secrets.KUBE_CONFIG_DATA }}' > ~/.kube/config
chmod 600 ~/.kube/config
- name: Terraform Init
working-directory: ./infra
env:
KUBECONFIG: ~/.kube/config
run: terraform init -input=false
- name: Import existing resources with correct syntax
working-directory: ./infra
env:
KUBECONFIG: ~/.kube/config
TF_VAR_openai_api_key: ${{ secrets.OPENAI_API_KEY }}
TF_VAR_image_tag_user: ${{ github.sha }}
TF_VAR_image_tag_group: ${{ github.sha }}
TF_VAR_image_tag_gateway: ${{ github.sha }}
TF_VAR_image_tag_files: ${{ github.sha }}
TF_VAR_image_tag_genai: ${{ github.sha }}
TF_VAR_image_tag_client: ${{ github.sha }}
run: |
echo "=== Smart Import Strategy ==="
# Function to check if a Kubernetes resource exists and import it
import_if_exists() {
local resource_type=$1
local resource_name=$2
local terraform_resource=$3
local namespace="developmentv1"
echo "Checking $resource_type/$resource_name..."
if kubectl get $resource_type $resource_name -n $namespace >/dev/null 2>&1; then
echo "✅ Found $resource_type/$resource_name - attempting import"
if terraform import $terraform_resource $namespace/$resource_name; then
echo "✅ Successfully imported $terraform_resource"
else
echo "⚠️ Import failed for $terraform_resource (may already be in state)"
fi
else
echo "❌ $resource_type/$resource_name not found - will be created"
fi
}
# Import services
echo "=== Importing Services ==="
import_if_exists "service" "user-service" "kubernetes_service.user"
import_if_exists "service" "group-service" "kubernetes_service.group"
import_if_exists "service" "group" "kubernetes_service.group_alias"
import_if_exists "service" "gateway-service" "kubernetes_service.gateway"
import_if_exists "service" "files-service" "kubernetes_service.files"
import_if_exists "service" "genai-app-service" "kubernetes_service.genai_app"
import_if_exists "service" "redis" "kubernetes_service.redis"
import_if_exists "service" "weaviate-service" "kubernetes_service.weaviate"
import_if_exists "service" "prometheus" "kubernetes_service.prometheus"
import_if_exists "service" "grafana" "kubernetes_service.grafana"
import_if_exists "service" "loki" "kubernetes_service.loki"
import_if_exists "service" "client-service" "kubernetes_service.client"
# Import deployments
echo "=== Importing Deployments ==="
import_if_exists "deployment" "user-service" "kubernetes_deployment.user"
import_if_exists "deployment" "group-service" "kubernetes_deployment.group"
import_if_exists "deployment" "gateway-service" "kubernetes_deployment.gateway"
import_if_exists "deployment" "files-service" "kubernetes_deployment.files"
import_if_exists "deployment" "genai-app" "kubernetes_deployment.genai_app"
import_if_exists "deployment" "genai-celery-worker" "kubernetes_deployment.genai_celery_worker"
import_if_exists "deployment" "redis" "kubernetes_deployment.genai_redis"
import_if_exists "deployment" "weaviate" "kubernetes_deployment.weaviate"
import_if_exists "deployment" "prometheus" "kubernetes_deployment.prometheus"
import_if_exists "deployment" "grafana" "kubernetes_deployment.grafana"
import_if_exists "deployment" "loki" "kubernetes_deployment.loki"
import_if_exists "deployment" "promtail" "kubernetes_deployment.promtail"
import_if_exists "deployment" "client" "kubernetes_deployment.client"
# Import secrets
echo "=== Importing Secrets ==="
import_if_exists "secret" "user-env-secret" "kubernetes_secret.user_env"
import_if_exists "secret" "group-env-secret" "kubernetes_secret.group_env"
import_if_exists "secret" "gateway-env-secret" "kubernetes_secret.gateway_env"
import_if_exists "secret" "files-env-secret" "kubernetes_secret.files_env"
import_if_exists "secret" "openai-credentials" "kubernetes_secret.openai_credentials"
import_if_exists "secret" "grafana-admin" "kubernetes_secret.grafana_admin"
import_if_exists "secret" "client-env-secret" "kubernetes_secret.client_env"
# Import ingress resources
echo "=== Importing Ingress ==="
import_if_exists "ingress" "grafana-ingress" "kubernetes_ingress_v1.grafana_ingress"
import_if_exists "ingress" "genai-dev-ingress" "kubernetes_ingress_v1.genai_ingress"
import_if_exists "ingress" "prometheus-ingress" "kubernetes_ingress_v1.prometheus_ingress"
import_if_exists "ingress" "client-ingress" "kubernetes_ingress_v1.client_ingress"
echo "=== Import phase completed ==="
- name: Terraform Plan
working-directory: ./infra
env:
KUBECONFIG: ~/.kube/config
TF_VAR_openai_api_key: ${{ secrets.OPENAI_API_KEY }}
TF_VAR_image_tag_user: ${{ github.sha }}
TF_VAR_image_tag_group: ${{ github.sha }}
TF_VAR_image_tag_gateway: ${{ github.sha }}
TF_VAR_image_tag_files: ${{ github.sha }}
TF_VAR_image_tag_genai: ${{ github.sha }}
TF_VAR_image_tag_client: ${{ github.sha }}
run: |
echo "=== Refreshing Terraform state ==="
terraform refresh -input=false
echo "=== Creating Terraform plan ==="
terraform plan -input=false -detailed-exitcode
PLAN_EXIT_CODE=$?
if [ $PLAN_EXIT_CODE -eq 0 ]; then
echo "No changes needed - infrastructure is up to date"
elif [ $PLAN_EXIT_CODE -eq 2 ]; then
echo "Changes detected - will proceed with apply"
else
echo "Plan failed with exit code $PLAN_EXIT_CODE"
exit 1
fi
- name: Terraform Apply
working-directory: ./infra
env:
KUBECONFIG: ~/.kube/config
TF_VAR_openai_api_key: ${{ secrets.OPENAI_API_KEY }}
TF_VAR_image_tag_user: ${{ github.sha }}
TF_VAR_image_tag_group: ${{ github.sha }}
TF_VAR_image_tag_gateway: ${{ github.sha }}
TF_VAR_image_tag_files: ${{ github.sha }}
TF_VAR_image_tag_genai: ${{ github.sha }}
TF_VAR_image_tag_client: ${{ github.sha }}
run: |
echo "=== Applying Terraform configuration ==="
# First attempt: normal apply
if terraform apply -auto-approve -input=false; then
echo "✅ Terraform apply completed successfully"
else
echo "❌ Normal apply failed, checking for resource conflicts..."
# Check for specific "already exists" errors and handle them
if terraform plan -input=false 2>&1 | grep -q "already exists"; then
echo "🔄 Detected existing resources, attempting state import recovery..."
# Re-run import for any missed resources
echo "Re-attempting imports for any missed resources..."
terraform import kubernetes_service.user developmentv1/user-service 2>/dev/null || true
terraform import kubernetes_service.group developmentv1/group-service 2>/dev/null || true
terraform import kubernetes_service.group_alias developmentv1/group 2>/dev/null || true
terraform import kubernetes_service.gateway developmentv1/gateway-service 2>/dev/null || true
terraform import kubernetes_service.files developmentv1/files-service 2>/dev/null || true
terraform import kubernetes_service.genai_app developmentv1/genai-app-service 2>/dev/null || true
terraform import kubernetes_service.redis developmentv1/redis 2>/dev/null || true
terraform import kubernetes_service.weaviate developmentv1/weaviate-service 2>/dev/null || true
terraform import kubernetes_service.prometheus developmentv1/prometheus 2>/dev/null || true
terraform import kubernetes_service.grafana developmentv1/grafana 2>/dev/null || true
terraform import kubernetes_service.loki developmentv1/loki 2>/dev/null || true
terraform import kubernetes_service.client developmentv1/client-service 2>/dev/null || true
terraform import kubernetes_secret.grafana_admin developmentv1/grafana-admin 2>/dev/null || true
terraform import kubernetes_secret.client_env developmentv1/client-env-secret 2>/dev/null || true
echo "Checking for specific resource failures..."
if ! kubectl get deployment weaviate -n developmentv1 >/dev/null 2>&1; then
echo "⚠️ Weaviate deployment missing - will be created"
fi
# Try apply again after re-import
echo "Retrying apply after import recovery..."
if terraform apply -auto-approve -input=false; then
echo "✅ Terraform apply completed successfully after import recovery"
else
echo "❌ Apply still failing, using targeted approach..."
# Try applying specific resources that are known to work
terraform apply -auto-approve -target=kubernetes_deployment.user -target=kubernetes_deployment.group -target=kubernetes_deployment.gateway -target=kubernetes_deployment.files -target=kubernetes_deployment.genai_app -target=kubernetes_deployment.genai_celery_worker -target=kubernetes_deployment.genai_redis -target=kubernetes_deployment.weaviate -target=kubernetes_deployment.prometheus -target=kubernetes_deployment.grafana -target=kubernetes_deployment.loki -target=kubernetes_deployment.promtail -target=kubernetes_deployment.client
echo "✅ Targeted apply completed - some resources may need manual reconciliation"
fi
else
echo "❌ Apply failed for reasons other than resource conflicts"
exit 1
fi
fi
echo "=== Terraform apply phase completed ==="