diff --git a/test-suite/README.md b/test-suite/README.md index 9a5a1e9ce3..2d8a1a52a8 100644 --- a/test-suite/README.md +++ b/test-suite/README.md @@ -44,13 +44,16 @@ cd test-suite/fhevm # Deploy with local BuildKit cache (disables provenance attestations) ./fhevm-cli deploy --local +# Deploy with threshold 2 out of 2 coprocessors (local multicoprocessor mode) +./fhevm-cli deploy --coprocessors 2 --coprocessor-threshold 2 + # Resume a failed deploy from a specific step (keeps existing containers/volumes) ./fhevm-cli deploy --resume kms-connector # Deploy only a single step (useful for redeploying one service) ./fhevm-cli deploy --only coprocessor -# Run specific tests +# Run specific tests (works for both 1/1 and n/t topologies) ./fhevm-cli test input-proof # Skip Hardhat compile when artifacts are already up to date ./fhevm-cli test input-proof --no-hardhat-compile diff --git a/test-suite/fhevm/docker-compose/database-docker-compose.yml b/test-suite/fhevm/docker-compose/database-docker-compose.yml index deec2e549a..f51943a4ec 100644 --- a/test-suite/fhevm/docker-compose/database-docker-compose.yml +++ b/test-suite/fhevm/docker-compose/database-docker-compose.yml @@ -3,6 +3,10 @@ services: container_name: coprocessor-and-kms-db image: postgres:15.7 restart: always + command: + - postgres + - -c + - max_connections=500 env_file: - ../env/staging/.env.database.local ports: @@ -16,4 +20,4 @@ services: - db:/var/lib/postgresql/data volumes: - db: \ No newline at end of file + db: diff --git a/test-suite/fhevm/env/staging/.env.coprocessor b/test-suite/fhevm/env/staging/.env.coprocessor index 9099c5f31f..2a50e995bd 100644 --- a/test-suite/fhevm/env/staging/.env.coprocessor +++ b/test-suite/fhevm/env/staging/.env.coprocessor @@ -6,12 +6,6 @@ POSTGRES_USER=postgres POSTGRES_PASSWORD=postgres DATABASE_URL="postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@db:5432/coprocessor" -# ============================================================================= -# API & AUTHENTICATION - SENSITIVE -# ============================================================================= -# IMPORTANT: For testing only - move to secure storage in production -TENANT_API_KEY=a1503fb6-d79b-4e9e-826d-44cf262f3e05 - # ============================================================================= # AWS/MINIO CONFIGURATION # ============================================================================= diff --git a/test-suite/fhevm/fhevm-cli b/test-suite/fhevm/fhevm-cli index c5aad1435b..ced6d1903a 100755 --- a/test-suite/fhevm/fhevm-cli +++ b/test-suite/fhevm/fhevm-cli @@ -14,6 +14,25 @@ RESET='\033[0m' SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" PROJECT="fhevm" +# Portable grep helpers: prefer rg when available, fallback to grep. +function has_pattern() { + local pattern="$1" + if command -v rg >/dev/null 2>&1; then + rg -q "$pattern" + else + grep -qE "$pattern" + fi +} + +function filter_pattern() { + local pattern="$1" + if command -v rg >/dev/null 2>&1; then + rg "$pattern" + else + grep -E "$pattern" + fi +} + # Default versions for the fhevm stack. # KMS connector services. @@ -58,10 +77,11 @@ function usage { echo -e "${BOLD}Usage:${RESET} ${YELLOW}fhevm-cli${RESET} ${CYAN}COMMAND [OPTIONS]${RESET}" echo echo -e "${BOLD}${LIGHT_BLUE}Commands:${RESET}" - echo -e " ${YELLOW}deploy${RESET} ${CYAN}[--build] [--local]${RESET} WIP: Deploy the full fhevm stack (optionally rebuild images)" + echo -e " ${YELLOW}deploy${RESET} ${CYAN}[--build] [--local] [--coprocessors N] [--coprocessor-threshold T]${RESET} Deploy fhevm stack" echo -e " ${YELLOW}pause${RESET} ${CYAN}[CONTRACTS]${RESET} Pause specific contracts (host|gateway)" echo -e " ${YELLOW}unpause${RESET} ${CYAN}[CONTRACTS]${RESET} Unpause specific contracts (host|gateway)" echo -e " ${YELLOW}test${RESET} ${CYAN}[TYPE]${RESET} Run tests (input-proof|user-decryption|public-decryption|delegated-user-decryption|random|random-subset|operators|erc20|debug)" + echo -e " ${YELLOW}smoke${RESET} ${CYAN}[PROFILE]${RESET} Run multicoproc smoke profile (multi-2-2|multi-3-5)" echo -e " ${YELLOW}upgrade${RESET} ${CYAN}[SERVICE]${RESET} Upgrade specific service (host|gateway|connector|coprocessor|relayer|test-suite)" echo -e " ${YELLOW}clean${RESET} Remove all containers and volumes" echo -e " ${YELLOW}logs${RESET} ${CYAN}[SERVICE]${RESET} View logs for a specific service" @@ -78,6 +98,9 @@ function usage { echo -e " ${PURPLE}./fhevm-cli deploy${RESET}" echo -e " ${PURPLE}./fhevm-cli deploy --build${RESET}" echo -e " ${PURPLE}./fhevm-cli deploy --local${RESET}" + echo -e " ${PURPLE}./fhevm-cli deploy --coprocessors 2 --coprocessor-threshold 2${RESET}" + echo -e " ${PURPLE}./fhevm-cli smoke multi-2-2${RESET}" + echo -e " ${PURPLE}./fhevm-cli smoke multi-3-5${RESET}" echo -e " ${PURPLE}./fhevm-cli test input-proof${RESET}" echo -e " ${PURPLE}./fhevm-cli test input-proof --no-hardhat-compile${RESET}" echo -e " ${PURPLE}./fhevm-cli test user-decryption ${RESET}" @@ -88,6 +111,77 @@ function usage { echo -e "${BLUE}============================================================${RESET}" } +# Best-effort log dump across all coprocessor instances for quick local triage. +function collect_coprocessor_topology_logs() { + echo -e "${LIGHT_BLUE}${BOLD}[LOGS] Collecting coprocessor topology logs (last 10m)...${RESET}" + local containers + containers=$(docker ps --format '{{.Names}}' | filter_pattern '^coprocessor[0-9-]*-(host-listener|gw-listener|tfhe-worker|sns-worker|transaction-sender)$' || true) + if [ -z "$containers" ]; then + echo -e "${YELLOW}[WARN]${RESET} No coprocessor containers found" + return + fi + + while IFS= read -r container; do + [ -z "$container" ] && continue + echo -e "\n${CYAN}===== ${container} =====${RESET}" + docker logs --since=10m "$container" | tail -n 160 || true + done <<< "$containers" +} + +function wait_for_coprocessor_key_bootstrap() { + local timeout_seconds=300 + local poll_interval=5 + local elapsed=0 + echo -e "${LIGHT_BLUE}${BOLD}[WAIT] Waiting for coprocessor key bootstrap...${RESET}" + while [ "$elapsed" -lt "$timeout_seconds" ]; do + local ready_count=0 + local total_count=0 + local pending="" + local containers + containers=$(docker ps --format '{{.Names}}' | filter_pattern '^coprocessor([0-9]+)?-sns-worker$' || true) + if [ -z "$containers" ]; then + echo -e "${YELLOW}[WARN]${RESET} No sns-worker containers found yet" + sleep "$poll_interval" + elapsed=$((elapsed + poll_interval)) + continue + fi + + while IFS= read -r container; do + [ -z "$container" ] && continue + total_count=$((total_count + 1)) + if docker logs --since=20m "$container" 2>&1 | has_pattern "Fetched keyset"; then + ready_count=$((ready_count + 1)) + else + pending="${pending} ${container}" + fi + done <<< "$containers" + + local configured_threshold="" + if [ -f "test-suite/fhevm/env/staging/.env.host-sc.local" ]; then + configured_threshold=$(filter_pattern '^COPROCESSOR_THRESHOLD=' test-suite/fhevm/env/staging/.env.host-sc.local | tail -n1 | cut -d'=' -f2 || true) + fi + if ! [[ "$configured_threshold" =~ ^[0-9]+$ ]] || [ "$configured_threshold" -lt 1 ]; then + configured_threshold="$total_count" + fi + + if [ "$configured_threshold" -gt "$total_count" ]; then + configured_threshold="$total_count" + fi + + if [ "$ready_count" -ge "$configured_threshold" ]; then + echo -e "${GREEN}[READY]${RESET} Key bootstrap confirmed for ${ready_count}/${total_count} sns-worker containers (threshold=${configured_threshold})" + return 0 + fi + + echo -e "${YELLOW}[WAIT]${RESET} Key bootstrap ready=${ready_count}/${total_count} threshold=${configured_threshold} pending:${pending}" + sleep "$poll_interval" + elapsed=$((elapsed + poll_interval)) + done + + echo -e "${RED}[ERROR]${RESET} Key bootstrap did not reach threshold within ${timeout_seconds}s" + return 1 +} + COMMAND=$1 shift @@ -112,6 +206,26 @@ case $COMMAND in DEPLOY_ARGS+=("--local") shift ;; + --coprocessors) + if [ -n "$2" ] && [ "${2:0:1}" != "-" ]; then + DEPLOY_ARGS+=("--coprocessors" "$2") + shift 2 + else + echo -e "${RED}[ERROR]${RESET} ${BOLD}Coprocessor count argument missing${RESET}" + usage + exit 1 + fi + ;; + --coprocessor-threshold) + if [ -n "$2" ] && [ "${2:0:1}" != "-" ]; then + DEPLOY_ARGS+=("--coprocessor-threshold" "$2") + shift 2 + else + echo -e "${RED}[ERROR]${RESET} ${BOLD}Coprocessor threshold argument missing${RESET}" + usage + exit 1 + fi + ;; *) echo -e "${RED}[ERROR]${RESET} ${BOLD}Unknown argument for deploy: $1${RESET}" usage @@ -167,6 +281,7 @@ case $COMMAND in GREP="" NO_RELAYER="" NO_COMPILE="" + WAIT_FOR_KEY_BOOTSTRAP="" while (( "$#" )); do case "$1" in @@ -229,6 +344,7 @@ case $COMMAND in input-proof) log_message="${LIGHT_BLUE}${BOLD}[TEST] INPUT PROOF (uint64)${RESET}" docker_args+=("-g" "test user input uint64") + WAIT_FOR_KEY_BOOTSTRAP="1" ;; input-proof-compute-decrypt) log_message="${LIGHT_BLUE}${BOLD}[TEST] INPUT PROOF (uint64)${RESET}" @@ -291,8 +407,49 @@ case $COMMAND in fi if [ "$TEST_TYPE" != "debug" ]; then echo -e "${log_message}" - docker exec fhevm-test-suite-e2e-debug "${docker_args[@]}" + if [ -n "$WAIT_FOR_KEY_BOOTSTRAP" ]; then + wait_for_coprocessor_key_bootstrap + fi + test_status=0 + if docker exec fhevm-test-suite-e2e-debug "${docker_args[@]}"; then + test_status=0 + else + test_status=$? + fi + if [ -n "$WAIT_FOR_KEY_BOOTSTRAP" ]; then + collect_coprocessor_topology_logs + fi + if [ "$test_status" -ne 0 ]; then + exit "$test_status" + fi + fi + ;; + + smoke) + print_logo + PROFILE=$1 + if [[ ! $PROFILE =~ ^(multi-2-2|multi-3-5)$ ]]; then + echo -e "${RED}[ERROR]${RESET} ${BOLD}Unknown smoke profile: ${PROFILE}${RESET}" + usage + exit 1 fi + + coprocessors="" + threshold="" + case "$PROFILE" in + multi-2-2) + coprocessors=2 + threshold=2 + ;; + multi-3-5) + coprocessors=5 + threshold=3 + ;; + esac + + echo -e "${LIGHT_BLUE}${BOLD}[SMOKE] Running profile ${PROFILE} (n=${coprocessors}, t=${threshold})...${RESET}" + "${SCRIPT_DIR}/fhevm-cli" deploy --local --coprocessors "${coprocessors}" --coprocessor-threshold "${threshold}" + "${SCRIPT_DIR}/fhevm-cli" test input-proof ;; help|-h|--help) diff --git a/test-suite/fhevm/scripts/deploy-fhevm-stack.sh b/test-suite/fhevm/scripts/deploy-fhevm-stack.sh index 24b9b47e60..3c81f39b11 100755 --- a/test-suite/fhevm/scripts/deploy-fhevm-stack.sh +++ b/test-suite/fhevm/scripts/deploy-fhevm-stack.sh @@ -99,6 +99,10 @@ RESUME_STEP="" ONLY_STEP="" RESUME_FLAG_DETECTED=false ONLY_FLAG_DETECTED=false +COPROCESSOR_COUNT=1 +COPROCESSOR_THRESHOLD_OVERRIDE="" +COPROCESSOR_COUNT_FLAG_DETECTED=false +COPROCESSOR_THRESHOLD_FLAG_DETECTED=false NEW_ARGS=() for arg in "$@"; do @@ -112,6 +116,10 @@ for arg in "$@"; do RESUME_FLAG_DETECTED=true elif [[ "$arg" == "--only" ]]; then ONLY_FLAG_DETECTED=true + elif [[ "$arg" == "--coprocessors" ]]; then + COPROCESSOR_COUNT_FLAG_DETECTED=true + elif [[ "$arg" == "--coprocessor-threshold" ]]; then + COPROCESSOR_THRESHOLD_FLAG_DETECTED=true elif [[ "$RESUME_FLAG_DETECTED" == true ]]; then RESUME_STEP="$arg" RESUME_FLAG_DETECTED=false @@ -132,6 +140,20 @@ for arg in "$@"; do exit 1 fi log_info "Only mode: deploying only step '$ONLY_STEP'" + elif [[ "$COPROCESSOR_COUNT_FLAG_DETECTED" == true ]]; then + COPROCESSOR_COUNT="$arg" + COPROCESSOR_COUNT_FLAG_DETECTED=false + if ! [[ "$COPROCESSOR_COUNT" =~ ^[0-9]+$ ]] || [[ "$COPROCESSOR_COUNT" -lt 1 ]]; then + log_error "--coprocessors expects a positive integer" + exit 1 + fi + elif [[ "$COPROCESSOR_THRESHOLD_FLAG_DETECTED" == true ]]; then + COPROCESSOR_THRESHOLD_OVERRIDE="$arg" + COPROCESSOR_THRESHOLD_FLAG_DETECTED=false + if ! [[ "$COPROCESSOR_THRESHOLD_OVERRIDE" =~ ^[0-9]+$ ]] || [[ "$COPROCESSOR_THRESHOLD_OVERRIDE" -lt 1 ]]; then + log_error "--coprocessor-threshold expects a positive integer" + exit 1 + fi else NEW_ARGS+=("$arg") fi @@ -150,12 +172,32 @@ if [[ "$ONLY_FLAG_DETECTED" == true ]]; then exit 1 fi +if [[ "$COPROCESSOR_COUNT_FLAG_DETECTED" == true ]]; then + log_error "--coprocessors requires a value" + exit 1 +fi + +if [[ "$COPROCESSOR_THRESHOLD_FLAG_DETECTED" == true ]]; then + log_error "--coprocessor-threshold requires a value" + exit 1 +fi + # Check for conflicting flags if [[ -n "$RESUME_STEP" && -n "$ONLY_STEP" ]]; then log_error "Cannot use --resume and --only together" exit 1 fi +if [[ -n "$COPROCESSOR_THRESHOLD_OVERRIDE" ]] && [[ "$COPROCESSOR_THRESHOLD_OVERRIDE" -gt "$COPROCESSOR_COUNT" ]]; then + log_error "Invalid coprocessor threshold: $COPROCESSOR_THRESHOLD_OVERRIDE (must be <= --coprocessors $COPROCESSOR_COUNT)" + exit 1 +fi + +if [[ "$COPROCESSOR_COUNT" -gt 5 ]]; then + log_error "This local multicoprocessor mode currently supports up to 5 coprocessors" + exit 1 +fi + # Overwrite original arguments with the filtered list (removes local flags from $@) set -- "${NEW_ARGS[@]}" @@ -215,6 +257,15 @@ if [ "$LOCAL_BUILD" = true ]; then fi # Function to check if services are ready based on expected state +log_contains() { + local pattern=$1 + if command -v rg >/dev/null 2>&1; then + rg -q "$pattern" + else + grep -q "$pattern" + fi +} + wait_for_service() { local compose_file=$1 local service_name=$2 @@ -241,6 +292,17 @@ wait_for_service() { local status=$(docker inspect --format "{{.State.Status}}" "$container_id") local exit_code=$(docker inspect --format "{{.State.ExitCode}}" "$container_id") + # Some one-shot jobs may complete their work but keep a process alive. + # For host-sc-deploy, treat the deployment completion log as success and stop it. + if [[ "$expect_running" == "false" && "$service_name" == "host-sc-deploy" && "$status" == "running" ]]; then + if docker logs "$container_id" 2>&1 | log_contains "Contract deployment done!"; then + log_warn "$service_name reported completion marker while still running; stopping container to unblock flow" + docker stop "$container_id" >/dev/null 2>&1 || true + status=$(docker inspect --format "{{.State.Status}}" "$container_id") + exit_code=$(docker inspect --format "{{.State.ExitCode}}" "$container_id") + fi + fi + # Check if service meets the expected state if [[ "$expect_running" == "true" && "$status" == "running" ]]; then log_info "$service_name is now running" @@ -265,6 +327,30 @@ wait_for_service() { fi done } + +wait_for_relayer_ready() { + local max_retries=24 + local retry_interval=5 + local container_name="${PROJECT}-relayer" + + log_info "Waiting for $container_name readiness signal..." + + for ((i=1; i<=max_retries; i++)); do + if docker logs --since=10m "$container_name" 2>&1 | log_contains "All servers are ready and responding"; then + log_info "$container_name reported ready" + return 0 + fi + + if [ "$i" -lt "$max_retries" ]; then + log_warn "$container_name not ready yet, waiting ${retry_interval}s... (${i}/${max_retries})" + sleep "$retry_interval" + else + log_error "$container_name did not report ready within expected time" + docker logs --tail 200 "$container_name" || true + return 1 + fi + done +} # Function to prepare the local environment file for a component prepare_local_env_file() { local component=$1 @@ -311,6 +397,148 @@ prepare_all_env_files() { log_info "All local environment files prepared successfully" } +get_env_value() { + local file=$1 + local key=$2 + awk -F= -v k="$key" '$1 == k {print substr($0, index($0, "=") + 1); exit}' "$file" +} + +set_env_value() { + local file=$1 + local key=$2 + local value=$3 + local escaped_value + escaped_value=$(printf '%s' "$value" | sed 's/[\/&]/\\&/g') + if grep -q "^${key}=" "$file"; then + sed -i.bak "s|^${key}=.*|${key}=${escaped_value}|" "$file" + else + printf '%s=%s\n' "$key" "$value" >> "$file" + fi +} + +# Build effective n/t topology config and per-instance coprocessor env files. +configure_multicoprocessor_envs() { + local gateway_env="$SCRIPT_DIR/../env/staging/.env.gateway-sc.local" + local host_env="$SCRIPT_DIR/../env/staging/.env.host-sc.local" + local coprocessor_env="$SCRIPT_DIR/../env/staging/.env.coprocessor.local" + + local configured_threshold + configured_threshold=$(get_env_value "$gateway_env" "COPROCESSOR_THRESHOLD") + if [[ -z "$configured_threshold" ]]; then + configured_threshold=1 + fi + if [[ -n "$COPROCESSOR_THRESHOLD_OVERRIDE" ]]; then + configured_threshold="$COPROCESSOR_THRESHOLD_OVERRIDE" + fi + + if [[ "$configured_threshold" -gt "$COPROCESSOR_COUNT" ]]; then + log_error "Configured coprocessor threshold ($configured_threshold) cannot exceed number of coprocessors ($COPROCESSOR_COUNT)" + exit 1 + fi + + set_env_value "$gateway_env" "NUM_COPROCESSORS" "$COPROCESSOR_COUNT" + set_env_value "$gateway_env" "COPROCESSOR_THRESHOLD" "$configured_threshold" + set_env_value "$host_env" "NUM_COPROCESSORS" "$COPROCESSOR_COUNT" + set_env_value "$host_env" "COPROCESSOR_THRESHOLD" "$configured_threshold" + + # Default 1/1 topology does not require deriving extra coprocessor keys. + if [[ "$COPROCESSOR_COUNT" -eq 1 ]]; then + return 0 + fi + + local gateway_mnemonic + gateway_mnemonic=$(get_env_value "$gateway_env" "MNEMONIC") + if [[ -z "$gateway_mnemonic" ]]; then + log_error "Missing MNEMONIC in $gateway_env; cannot derive coprocessor accounts" + exit 1 + fi + + if ! command -v cast >/dev/null 2>&1; then + log_error "cast is required to derive coprocessor accounts from mnemonic" + exit 1 + fi + + local -a account_indices=(5 8 9 10 11) + if [[ "$COPROCESSOR_COUNT" -gt "${#account_indices[@]}" ]]; then + log_error "Not enough predefined account indices for $COPROCESSOR_COUNT coprocessors" + exit 1 + fi + + for ((idx=0; idx "$temp_compose" + + local db_migration_service="coprocessor${instance_idx}-db-migration" + local runtime_services=( + "coprocessor${instance_idx}-host-listener" + "coprocessor${instance_idx}-host-listener-poller" + "coprocessor${instance_idx}-gw-listener" + "coprocessor${instance_idx}-tfhe-worker" + "coprocessor${instance_idx}-zkproof-worker" + "coprocessor${instance_idx}-sns-worker" + "coprocessor${instance_idx}-transaction-sender" + ) + + log_info "Starting additional coprocessor instance #$instance_idx (db migration phase)" + if [[ "$FORCE_BUILD" == true ]]; then + docker compose -p "${PROJECT}" --env-file "$env_file" -f "$temp_compose" up --build -d "$db_migration_service" + else + docker compose -p "${PROJECT}" --env-file "$env_file" -f "$temp_compose" up -d "$db_migration_service" + fi + + wait_for_service "$temp_compose" "$db_migration_service" "false" + + log_info "Starting additional coprocessor instance #$instance_idx (runtime phase)" + if [[ "$FORCE_BUILD" == true ]]; then + docker compose -p "${PROJECT}" --env-file "$env_file" -f "$temp_compose" up --build -d "${runtime_services[@]}" + else + docker compose -p "${PROJECT}" --env-file "$env_file" -f "$temp_compose" up -d "${runtime_services[@]}" + fi + + wait_for_service "$temp_compose" "coprocessor${instance_idx}-host-listener" "true" + wait_for_service "$temp_compose" "coprocessor${instance_idx}-gw-listener" "true" + wait_for_service "$temp_compose" "coprocessor${instance_idx}-tfhe-worker" "true" + wait_for_service "$temp_compose" "coprocessor${instance_idx}-zkproof-worker" "true" + wait_for_service "$temp_compose" "coprocessor${instance_idx}-sns-worker" "true" + wait_for_service "$temp_compose" "coprocessor${instance_idx}-transaction-sender" "true" + + rm -f "$temp_compose" +} + # Function to start an entire docker-compose file and wait for specified services run_compose() { local component=$1 @@ -405,12 +633,19 @@ get_minio_ip() { local minio_container_name=$1 local minio_ip minio_ip=$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$minio_container_name") - local coprocessor_file="$SCRIPT_DIR/../env/staging/.env.coprocessor.local" if [ -n "$minio_ip" ]; then echo "Found $minio_container_name container IP: $minio_ip" - sed -i.bak "s|AWS_ENDPOINT_URL=http://[^:]*:9000|AWS_ENDPOINT_URL=http://$minio_ip:9000|" \ - "$coprocessor_file" - echo "Updated AWS_ENDPOINT_URL to http://$minio_ip:9000" + local coprocessor_files=("$SCRIPT_DIR/../env/staging/.env.coprocessor.local") + for ((idx=1; idx