From 106476858dc25613ba4bf89ed02c19903b05e6fb Mon Sep 17 00:00:00 2001 From: Vaibhav Mittal Date: Sun, 9 Mar 2025 12:52:32 +0000 Subject: [PATCH 1/5] Enhance error handling and logging in init.sh script --- config/init.sh | 48 ++++++++++++++++++++++++++++++++++++++++-------- data/init.sh | 49 ++++++++++++++++++++++++++++++++++++++++++++----- debug_env.sh | 43 +++++++++++++++++++++++++++++++++++++++++++ init.sh | 41 ++++++++++++++++++++++++++++++++++++++++- study/init.sh | 49 ++++++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 213 insertions(+), 17 deletions(-) create mode 100755 debug_env.sh diff --git a/config/init.sh b/config/init.sh index bec1d9b..db547ff 100755 --- a/config/init.sh +++ b/config/init.sh @@ -1,12 +1,33 @@ #!/usr/bin/env bash + + +set -eo pipefail +if [ "${DEBUG_MODE}" = "true" ]; then + set -x +fi + SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL ../.env | tail -n 1 | cut -d '=' -f 2-) +# Ensure .env file exists +if [ ! -f "$SCRIPT_DIR/../.env" ]; then + echo " Error: .env file is missing in the parent directory." >&2 + exit 1 +fi + + +VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL "$SCRIPT_DIR/../.env" | tail -n 1 | cut -d '=' -f 2-) +if [ -z "$VERSION" ]; then + echo "❌ Error: Unable to extract DOCKER_IMAGE_CBIOPORTAL version from .env." >&2 + exit 1 +fi -# This is a hack. Docker run doesn't escape '&' but docker compose does. -sed 's/&/\\&/g' ../.env > ../.env.temp +# Create a temporary .env file to escape special characters (e.g., `&`) +TEMP_ENV_FILE="$SCRIPT_DIR/../.env.temp" +sed 's/&/\\&/g' "$SCRIPT_DIR/../.env" > "$TEMP_ENV_FILE" -docker run --rm -i --env-file ../.env.temp $VERSION bin/sh -c 'cat /cbioportal-webapp/application.properties | + +echo "⚙️ Generating application.properties using Docker image: $VERSION" +docker run --rm -i --env-file "$TEMP_ENV_FILE" "$VERSION" bin/sh -c 'cat /cbioportal-webapp/application.properties | sed "s|spring.datasource.password=.*|spring.datasource.password=${DB_MYSQL_PASSWORD}|" | \ sed "s|spring.datasource.username=.*|spring.datasource.username=${DB_MYSQL_USERNAME}|" | \ sed "s|spring.datasource.url=.*|spring.datasource.url=${DB_MYSQL_URL}|" | \ @@ -17,8 +38,19 @@ docker run --rm -i --env-file ../.env.temp $VERSION bin/sh -c 'cat /cbioportal-w sed "s|.*spring.datasource.clickhouse.password=.*|spring.datasource.clickhouse.password=${DB_CLICKHOUSE_PASSWORD}|" | \ sed "s|.*spring.datasource.clickhouse.url=.*|spring.datasource.clickhouse.url=${DB_CLICKHOUSE_URL}|" | \ sed "s|.*spring.datasource.mysql.driver-class-name=com.mysql.jdbc.Driver|spring.datasource.mysql.driver-class-name=com.mysql.jdbc.Driver|" | \ - sed "s|.*spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|"' \ -> application.properties + sed "s|.*spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|" > application.properties' || { + echo "❌ Error: Failed to generate application.properties using Docker." >&2 + rm -f "$TEMP_ENV_FILE" + exit 1 +} + + +rm -f "$TEMP_ENV_FILE" + + +if [ ! -f application.properties ]; then + echo " Error: application.properties file was not created." >&2 + exit 1 +fi -# Cleanup for the hack above -rm ../.env.temp \ No newline at end of file +echo " application.properties generated successfully." diff --git a/data/init.sh b/data/init.sh index 7898015..1811638 100755 --- a/data/init.sh +++ b/data/init.sh @@ -1,11 +1,50 @@ #!/usr/bin/env bash + +set -eo pipefail +if [ "${DEBUG_MODE}" = "true" ]; then + set -x +fi + + SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL ../.env | tail -n 1 | cut -d '=' -f 2-) +#this Extracts Docker image version from .env +VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL "$SCRIPT_DIR/../.env" | tail -n 1 | cut -d '=' -f 2-) +if [ -z "$VERSION" ]; then + echo "❌ Error: Unable to extract DOCKER_IMAGE_CBIOPORTAL version from .env." >&2 + exit 1 +fi + +# This Fetchs the schema file (cgds.sql) +echo "⚙️ Fetching schema file (cgds.sql) from Docker image: $VERSION" +if ! docker run --rm -i "$VERSION" cat /cbioportal/db-scripts/cgds.sql > "$SCRIPT_DIR/cgds.sql"; then + echo "Error: Failed to fetch cgds.sql from Docker image." >&2 + exit 2 +fi + +# This Validates that cgds.sql was created successfully +if [ ! -f "$SCRIPT_DIR/cgds.sql" ]; then + echo " Error: cgds.sql file was not created." >&2 + exit 3 +fi + +echo " Schema file (cgds.sql) fetched successfully." + +# This Downloads the seed database (seed.sql.gz) +SEED_URL="https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz" +echo "⚙️ Downloading seed database from: $SEED_URL" +if ! wget -O "$SCRIPT_DIR/seed.sql.gz" "$SEED_URL"; then + echo " Error: Failed to download seed database from $SEED_URL." >&2 + exit 4 +fi + +# this Validates that seed.sql.gz was downloaded successfully +if [ ! -f "$SCRIPT_DIR/seed.sql.gz" ]; then + echo " Error: seed.sql.gz file was not downloaded." >&2 + exit 5 +fi -# Get the schema -docker run --rm -i $VERSION cat /cbioportal/db-scripts/cgds.sql > cgds.sql +echo " Seed database (seed.sql.gz) downloaded successfully." -# Download the combined hg19 + hg38 seed database -wget -O seed.sql.gz "https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz" +echo "=== Data initialization completed successfully ===" diff --git a/debug_env.sh b/debug_env.sh new file mode 100755 index 0000000..a988987 --- /dev/null +++ b/debug_env.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +# This runs Diagnostic checks for environment setup and logs + +check_file_permissions() { + echo " Checking file permissions..." + find . -name "*.sh" ! -perm /a+x && { + echo " Missing execute permissions:" + find . -name "*.sh" ! -perm /a+x | xargs ls -l + return 1 + } +} + +check_line_endings() { + echo " Checking line endings..." + find . -name "*.sh" -exec file {} \; | grep CRLF && { + echo " CRLF line endings detected:" + find . -name "*.sh" -exec file {} \; | grep CRLF | cut -d: -f1 | xargs dos2unix + return 1 + } +} + +main() { + echo "=== Starting Environment Diagnostics ===" + + # this is for System info checks + echo "## Platform Info ##" + uname -a + + # this is for File system checks + check_file_permissions || exit 1 + check_line_endings || exit 1 + + + ./init.sh || { + echo " Initialization failed. Check logs." + exit 1 + } + + echo " All diagnostics passed successfully." +} + +main | tee debug.log diff --git a/init.sh b/init.sh index 2eb7130..51bbfc2 100755 --- a/init.sh +++ b/init.sh @@ -1,6 +1,45 @@ #!/usr/bin/env bash + +# Enable strict error handling +set -eo pipefail +if [ "${DEBUG_MODE}" = "true" ]; then + set -x +fi + +PS4='+ $(date "+%Y-%m-%d %H:%M:%S") : ' + +# Initialize debug log +DEBUG_LOG="debug_$(date +%Y%m%d%H%M%S).log" +exec > >(tee -a "$DEBUG_LOG") 2>&1 + +echo "=== Starting initialization at $(date) ===" + for d in config study data; do - cd $d; ./init.sh + echo "▶ Entering directory: $d" + if [ ! -f "$d/init.sh" ]; then + echo " Error: Missing $d/init.sh" >&2 + exit 1 + fi + + # Ensure execute permissions + chmod +x "$d/init.sh" || { + echo " Failed to set execute permissions on $d/init.sh" >&2 + exit 2 + } + + # Execute the subdirectory's init.sh script + if ! cd "$d"; then + echo " Failed to enter directory $d" >&2 + exit 3 + fi + + echo "⚙️ Running init.sh in $d" + if ! ./init.sh; then + echo " Critical failure in $d/init.sh" >&2 + exit 4 + fi + cd .. done +echo "=== Initialization completed successfully at $(date) ===" diff --git a/study/init.sh b/study/init.sh index 0be86a4..a479bb7 100755 --- a/study/init.sh +++ b/study/init.sh @@ -1,10 +1,53 @@ #!/usr/bin/env bash -# download data hub study and import + + +set -eo pipefail +if [ "${DEBUG_MODE}" = "true" ]; then + set -x +fi + SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + DATAHUB_STUDIES="${DATAHUB_STUDIES:-lgg_ucsf_2014 msk_impact_2017}" + +# Base URL for downloading studies +DATAHUB_BASE_URL="https://cbioportal-datahub.s3.amazonaws.com" + + for study in ${DATAHUB_STUDIES}; do - wget -O ${study}.tar.gz "https://cbioportal-datahub.s3.amazonaws.com/${study}.tar.gz" - tar xvfz ${study}.tar.gz + echo " Processing study: $study" + + + STUDY_ARCHIVE="${SCRIPT_DIR}/${study}.tar.gz" + STUDY_DIR="${SCRIPT_DIR}/${study}" + + + if [ -f "$STUDY_ARCHIVE" ]; then + echo " Archive already exists: $STUDY_ARCHIVE" + else + # Download the study archive + echo "⬇ Downloading $study from $DATAHUB_BASE_URL" + if ! wget -O "$STUDY_ARCHIVE" "${DATAHUB_BASE_URL}/${study}.tar.gz"; then + echo " Error: Failed to download ${study}.tar.gz from $DATAHUB_BASE_URL" >&2 + exit 1 + fi + echo " Download completed: $STUDY_ARCHIVE" + fi + + # Extract the archive if it hasn't been extracted yet + if [ -d "$STUDY_DIR" ]; then + echo " Study directory already exists: $STUDY_DIR" + else + echo " Extracting $STUDY_ARCHIVE to $STUDY_DIR" + if ! tar xvfz "$STUDY_ARCHIVE" -C "$SCRIPT_DIR"; then + echo " Error: Failed to extract $STUDY_ARCHIVE" >&2 + exit 2 + fi + echo " Extraction completed: $STUDY_DIR" + fi + done + +echo "=== All studies processed successfully ===" From bd07b3c12d61ed4f601f97fcbe67f26fd9a31817 Mon Sep 17 00:00:00 2001 From: Vaibhav Mittal Date: Sun, 9 Mar 2025 12:59:34 +0000 Subject: [PATCH 2/5] removed emojis --- data/init.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data/init.sh b/data/init.sh index 1811638..0c7abc3 100755 --- a/data/init.sh +++ b/data/init.sh @@ -12,12 +12,12 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" #this Extracts Docker image version from .env VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL "$SCRIPT_DIR/../.env" | tail -n 1 | cut -d '=' -f 2-) if [ -z "$VERSION" ]; then - echo "❌ Error: Unable to extract DOCKER_IMAGE_CBIOPORTAL version from .env." >&2 + echo " Error: Unable to extract DOCKER_IMAGE_CBIOPORTAL version from .env." >&2 exit 1 fi # This Fetchs the schema file (cgds.sql) -echo "⚙️ Fetching schema file (cgds.sql) from Docker image: $VERSION" +echo " Fetching schema file (cgds.sql) from Docker image: $VERSION" if ! docker run --rm -i "$VERSION" cat /cbioportal/db-scripts/cgds.sql > "$SCRIPT_DIR/cgds.sql"; then echo "Error: Failed to fetch cgds.sql from Docker image." >&2 exit 2 @@ -33,7 +33,7 @@ echo " Schema file (cgds.sql) fetched successfully." # This Downloads the seed database (seed.sql.gz) SEED_URL="https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz" -echo "⚙️ Downloading seed database from: $SEED_URL" +echo " Downloading seed database from: $SEED_URL" if ! wget -O "$SCRIPT_DIR/seed.sql.gz" "$SEED_URL"; then echo " Error: Failed to download seed database from $SEED_URL." >&2 exit 4 From 50abb9010c036ed62be2fa28fb442061d5b848e5 Mon Sep 17 00:00:00 2001 From: Vaibhav Mittal Date: Tue, 11 Mar 2025 20:57:58 +0000 Subject: [PATCH 3/5] Enhance network resilience with retry mechanism for downloads --- data/init.sh | 14 +++++++--- study/init.sh | 4 +++ utils.sh | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 4 deletions(-) create mode 100755 utils.sh diff --git a/data/init.sh b/data/init.sh index 0c7abc3..cc5d2b7 100755 --- a/data/init.sh +++ b/data/init.sh @@ -8,6 +8,10 @@ fi SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +ROOT_DIR="$(dirname "$SCRIPT_DIR")" + +# Source utility functions +source "$ROOT_DIR/utils.sh" #this Extracts Docker image version from .env VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL "$SCRIPT_DIR/../.env" | tail -n 1 | cut -d '=' -f 2-) @@ -31,11 +35,13 @@ fi echo " Schema file (cgds.sql) fetched successfully." -# This Downloads the seed database (seed.sql.gz) +# Download the seed database (seed.sql.gz) with retries SEED_URL="https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz" -echo " Downloading seed database from: $SEED_URL" -if ! wget -O "$SCRIPT_DIR/seed.sql.gz" "$SEED_URL"; then - echo " Error: Failed to download seed database from $SEED_URL." >&2 +echo "Downloading seed database from: $SEED_URL" + +# Use the download_with_retry function (5 retries, 15s delay) +if ! download_with_retry "$SEED_URL" "$SCRIPT_DIR/seed.sql.gz" 5 15; then + echo "Error: Failed to download seed database after multiple attempts." >&2 exit 4 fi diff --git a/study/init.sh b/study/init.sh index a479bb7..2be4f5c 100755 --- a/study/init.sh +++ b/study/init.sh @@ -8,6 +8,10 @@ fi SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +ROOT_DIR="$(dirname "$SCRIPT_DIR")" + +# Source utility functions +source "$ROOT_DIR/utils.sh" DATAHUB_STUDIES="${DATAHUB_STUDIES:-lgg_ucsf_2014 msk_impact_2017}" diff --git a/utils.sh b/utils.sh new file mode 100755 index 0000000..a382675 --- /dev/null +++ b/utils.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash + +# Function to download files with retries +# Usage: download_with_retry URL DESTINATION MAX_RETRIES RETRY_DELAY +download_with_retry() { + local url="$1" + local destination="$2" + local max_retries="${3:-3}" + local retry_delay="${4:-10}" + local attempt=1 + local http_code=0 + + echo "[INFO] Downloading $url to $destination (max $max_retries attempts)" + + while [ $attempt -le $max_retries ]; do + echo "[INFO] Download attempt $attempt of $max_retries" + + + if wget --spider --server-response "$url" 2>&1 | grep -q "200 OK"; then + if wget --no-verbose --continue --timeout=30 -O "$destination" "$url"; then + echo "[SUCCESS] Download completed successfully on attempt $attempt" + return 0 + fi + else + echo "[WARNING] URL not accessible or returned non-200 status" + fi + + attempt=$((attempt + 1)) + if [ $attempt -le $max_retries ]; then + echo "[INFO] Retrying download in $retry_delay seconds..." + sleep $retry_delay + else + echo "[ERROR] Failed to download after $max_retries attempts" >&2 + return 1 + fi + done +} + +# Function to extract archives with validation +# Usage: extract_with_validation ARCHIVE_PATH EXTRACT_DIR +extract_with_validation() { + local archive="$1" + local extract_dir="$2" + + echo "[INFO] Extracting $archive to $extract_dir" + + + if [ ! -s "$archive" ]; then + echo "[ERROR] Archive $archive does not exist or is empty" >&2 + return 1 + fi + + + mkdir -p "$extract_dir" + + + if [[ "$archive" == *.tar.gz ]]; then + if ! tar xzf "$archive" -C "$extract_dir"; then + echo "[ERROR] Failed to extract tar.gz archive" >&2 + return 1 + fi + elif [[ "$archive" == *.zip ]]; then + if ! unzip -q "$archive" -d "$extract_dir"; then + echo "[ERROR] Failed to extract zip archive" >&2 + return 1 + fi + else + echo "[ERROR] Unsupported archive format" >&2 + return 1 + fi + + echo "[SUCCESS] Extraction completed successfully" + return 0 +} From a1a6b15e72aa61b9a34b9d3659fd5e05fa83247e Mon Sep 17 00:00:00 2001 From: Vaibhav701161 Date: Wed, 19 Mar 2025 19:05:59 +0000 Subject: [PATCH 4/5] Enhancement: Configurable Environment Settings in utils.sh --- .env.defaults.template | 19 +++++++ README.md | 38 +++++++++++++ utils.sh | 123 +++++++++++++++++++++++++---------------- 3 files changed, 133 insertions(+), 47 deletions(-) create mode 100644 .env.defaults.template diff --git a/.env.defaults.template b/.env.defaults.template new file mode 100644 index 0000000..eea2869 --- /dev/null +++ b/.env.defaults.template @@ -0,0 +1,19 @@ +# Template configuration for cBioPortal Docker initialization +# Copy this file to .env.defaults and customize as needed +# Command: cp .env.defaults.template .env.defaults + +# Network settings +DOWNLOAD_RETRY_COUNT=3 +DOWNLOAD_RETRY_DELAY=10 +DOWNLOAD_TIMEOUT=30 + +# Study settings +DATAHUB_STUDIES=lgg_ucsf_2014 msk_impact_2017 +DATAHUB_BASE_URL=https://cbioportal-datahub.s3.amazonaws.com + +# Seed database +SEED_DB_URL=https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz + +# Logging settings +VERBOSE_LOGS=true +DEBUG_MODE=false diff --git a/README.md b/README.md index 5a3969f..5a290bf 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,44 @@ docker compose down -v If you were able to successfully set up a local installation of cBioPortal, please add it here: https://www.cbioportal.org/installations. Thank you! +## Configuration System + +The initialization scripts support customization through a flexible configuration system: + +### Configuration Files + +1. **Built-in Defaults**: A set of sensible defaults is included in the code if no configuration files are present. + +2. **Site-wide Configuration** (optional): + - Copy `.env.defaults.template` to `.env.defaults` + - Modify values as needed for your environment + - This file should NOT be committed to version control + +3. **User-specific Configuration** (optional): + - Create or modify `.env` file with any values you want to override + - This takes precedence over `.env.defaults` + - This file should NOT be committed to version control + +### Available Configuration Options + +| Option | Description | Default | +|--------|-------------|---------| +| `DOWNLOAD_RETRY_COUNT` | Number of retry attempts for downloads | 3 | +| `DOWNLOAD_RETRY_DELAY` | Delay in seconds between retries | 10 | +| `DOWNLOAD_TIMEOUT` | Timeout in seconds for download operations | 30 | +| `DATAHUB_STUDIES` | Space-separated list of studies to download | "lgg_ucsf_2014 msk_impact_2017" | +| `DATAHUB_BASE_URL` | Base URL for datahub studies | "https://cbioportal-datahub.s3.amazonaws.com" | +| `SEED_DB_URL` | URL for seed database | "https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz" | +| `VERBOSE_LOGS` | Enable verbose logging | true | +| `DEBUG_MODE` | Enable debug mode (prints all commands) | false | + + +## Setup Instructions +1. Copy `.env.defaults.example` to `.env.defaults`: + ```sh + cp .env.defaults.example .env.defaults + + ## Loading custom studies By default, the cbioportal docker compose setup comes preloaded with the `lgg_ucsf_2014` study, which is imported as part of the `DOCKER_IMAGE_MYSQL` environment variable [here](.env). If you want to load custom studies, run the following commands. ```shell diff --git a/utils.sh b/utils.sh index a382675..fc43142 100755 --- a/utils.sh +++ b/utils.sh @@ -1,74 +1,103 @@ #!/usr/bin/env bash +# Script directory +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +# Default configuration values +DEFAULT_DOWNLOAD_RETRY_COUNT=3 +DEFAULT_DOWNLOAD_RETRY_DELAY=10 +DEFAULT_DOWNLOAD_TIMEOUT=30 +DEFAULT_DATAHUB_STUDIES="lgg_ucsf_2014 msk_impact_2017" +DEFAULT_DATAHUB_BASE_URL="https://cbioportal-datahub.s3.amazonaws.com" +DEFAULT_SEED_DB_URL="https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz" +DEFAULT_VERBOSE_LOGS=true +DEFAULT_DEBUG_MODE=false + +# Initialize with default values +DOWNLOAD_RETRY_COUNT=${DEFAULT_DOWNLOAD_RETRY_COUNT} +DOWNLOAD_RETRY_DELAY=${DEFAULT_DOWNLOAD_RETRY_DELAY} +DOWNLOAD_TIMEOUT=${DEFAULT_DOWNLOAD_TIMEOUT} +DATAHUB_STUDIES=${DEFAULT_DATAHUB_STUDIES} +DATAHUB_BASE_URL=${DEFAULT_DATAHUB_BASE_URL} +SEED_DB_URL=${DEFAULT_SEED_DB_URL} +VERBOSE_LOGS=${DEFAULT_VERBOSE_LOGS} +DEBUG_MODE=${DEFAULT_DEBUG_MODE} + +# Load .env.defaults if it exists +if [ -f "$SCRIPT_DIR/.env.defaults" ]; then + echo "[INFO] Loading configuration from .env.defaults" + source <(grep -E "^(DOWNLOAD_RETRY_COUNT|DOWNLOAD_RETRY_DELAY|DOWNLOAD_TIMEOUT|DATAHUB_STUDIES|DATAHUB_BASE_URL|SEED_DB_URL|VERBOSE_LOGS|DEBUG_MODE)=" "$SCRIPT_DIR/.env.defaults") +fi + +# Override with .env if it exists +if [ -f "$SCRIPT_DIR/.env" ]; then + echo "[INFO] Loading configuration overrides from .env" + source <(grep -E "^(DOWNLOAD_RETRY_COUNT|DOWNLOAD_RETRY_DELAY|DOWNLOAD_TIMEOUT|DATAHUB_STUDIES|DATAHUB_BASE_URL|SEED_DB_URL|VERBOSE_LOGS|DEBUG_MODE)=" "$SCRIPT_DIR/.env") +fi + +# Enable debug logging if requested +if [ "$DEBUG_MODE" = "true" ]; then + set -x +fi + # Function to download files with retries -# Usage: download_with_retry URL DESTINATION MAX_RETRIES RETRY_DELAY download_with_retry() { local url="$1" local destination="$2" - local max_retries="${3:-3}" - local retry_delay="${4:-10}" + local max_retries="${3:-$DOWNLOAD_RETRY_COUNT}" + local retry_delay="${4:-$DOWNLOAD_RETRY_DELAY}" + local timeout="${DOWNLOAD_TIMEOUT:-30}" local attempt=1 - local http_code=0 - + + if [ -f "$destination" ]; then + echo "[INFO] File already exists: $destination (skipping download)" + return 0 + fi + echo "[INFO] Downloading $url to $destination (max $max_retries attempts)" - + while [ $attempt -le $max_retries ]; do - echo "[INFO] Download attempt $attempt of $max_retries" - - - if wget --spider --server-response "$url" 2>&1 | grep -q "200 OK"; then - if wget --no-verbose --continue --timeout=30 -O "$destination" "$url"; then - echo "[SUCCESS] Download completed successfully on attempt $attempt" - return 0 - fi - else - echo "[WARNING] URL not accessible or returned non-200 status" + echo "[INFO] Attempt $attempt of $max_retries" + + if wget --progress=dot:giga --timeout="$timeout" -O "$destination" "$url"; then + echo "[SUCCESS] Download completed successfully on attempt $attempt" + return 0 fi - + + echo "[WARNING] Download failed. Retrying in $retry_delay seconds..." + sleep "$retry_delay" attempt=$((attempt + 1)) - if [ $attempt -le $max_retries ]; then - echo "[INFO] Retrying download in $retry_delay seconds..." - sleep $retry_delay - else - echo "[ERROR] Failed to download after $max_retries attempts" >&2 - return 1 - fi done + + echo "[ERROR] Failed to download after $max_retries attempts" >&2 + return 1 } # Function to extract archives with validation -# Usage: extract_with_validation ARCHIVE_PATH EXTRACT_DIR extract_with_validation() { local archive="$1" local extract_dir="$2" - + + if [ -d "$extract_dir" ] && [ "$(ls -A "$extract_dir")" ]; then + echo "[INFO] Extraction already completed: $extract_dir (skipping)" + return 0 + fi + echo "[INFO] Extracting $archive to $extract_dir" - - + if [ ! -s "$archive" ]; then echo "[ERROR] Archive $archive does not exist or is empty" >&2 return 1 fi - - + mkdir -p "$extract_dir" - - - if [[ "$archive" == *.tar.gz ]]; then - if ! tar xzf "$archive" -C "$extract_dir"; then - echo "[ERROR] Failed to extract tar.gz archive" >&2 - return 1 - fi - elif [[ "$archive" == *.zip ]]; then - if ! unzip -q "$archive" -d "$extract_dir"; then - echo "[ERROR] Failed to extract zip archive" >&2 - return 1 - fi - else - echo "[ERROR] Unsupported archive format" >&2 - return 1 - fi - + + case "$archive" in + *.tar.gz) tar xzf "$archive" -C "$extract_dir" || { echo "[ERROR] Failed to extract tar.gz archive" >&2; return 1; } ;; + *.zip) unzip -q "$archive" -d "$extract_dir" || { echo "[ERROR] Failed to extract zip archive" >&2; return 1; } ;; + *) echo "[ERROR] Unsupported archive format" >&2; return 1 ;; + esac + echo "[SUCCESS] Extraction completed successfully" return 0 } From 405176e5732e53da1653455e048e2c843dc7dacc Mon Sep 17 00:00:00 2001 From: Vaibhav701161 Date: Wed, 19 Mar 2025 19:52:29 +0000 Subject: [PATCH 5/5] Enhancement: Progress monitoring and summary reporting for initialization --- init.sh | 29 ++++++++++--- utils.sh | 124 ++++++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 128 insertions(+), 25 deletions(-) diff --git a/init.sh b/init.sh index 51bbfc2..d8dd202 100755 --- a/init.sh +++ b/init.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Enable strict error handling + set -eo pipefail if [ "${DEBUG_MODE}" = "true" ]; then set -x @@ -8,38 +8,57 @@ fi PS4='+ $(date "+%Y-%m-%d %H:%M:%S") : ' -# Initialize debug log + DEBUG_LOG="debug_$(date +%Y%m%d%H%M%S).log" exec > >(tee -a "$DEBUG_LOG") 2>&1 +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +# Source utility functions +source "$SCRIPT_DIR/utils.sh" + + +init_report + echo "=== Starting initialization at $(date) ===" for d in config study data; do - echo "▶ Entering directory: $d" + echo " Entering directory: $d" if [ ! -f "$d/init.sh" ]; then echo " Error: Missing $d/init.sh" >&2 + echo "FAILED: Missing $d/init.sh script" >> "$SCRIPT_DIR/init_report.txt" exit 1 fi # Ensure execute permissions chmod +x "$d/init.sh" || { echo " Failed to set execute permissions on $d/init.sh" >&2 + echo "FAILED: Setting permissions on $d/init.sh" >> "$SCRIPT_DIR/init_report.txt" exit 2 } # Execute the subdirectory's init.sh script if ! cd "$d"; then - echo " Failed to enter directory $d" >&2 + echo " Failed to enter directory $d" >&2 + echo "FAILED: Changing to directory $d" >> "$SCRIPT_DIR/init_report.txt" exit 3 fi - echo "⚙️ Running init.sh in $d" + echo " Running init.sh in $d" + start_time=$(date +%s) if ! ./init.sh; then echo " Critical failure in $d/init.sh" >&2 + echo "FAILED: Executing $d/init.sh" >> "$SCRIPT_DIR/../init_report.txt" exit 4 fi + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo "SUCCESS: Executed $d/init.sh (took ${duration}s)" >> "$SCRIPT_DIR/../init_report.txt" cd .. done echo "=== Initialization completed successfully at $(date) ===" + + +generate_summary_report | tee "init_summary_$(date +%Y%m%d%H%M%S).txt" \ No newline at end of file diff --git a/utils.sh b/utils.sh index fc43142..8f5f777 100755 --- a/utils.sh +++ b/utils.sh @@ -26,13 +26,13 @@ DEBUG_MODE=${DEFAULT_DEBUG_MODE} # Load .env.defaults if it exists if [ -f "$SCRIPT_DIR/.env.defaults" ]; then echo "[INFO] Loading configuration from .env.defaults" - source <(grep -E "^(DOWNLOAD_RETRY_COUNT|DOWNLOAD_RETRY_DELAY|DOWNLOAD_TIMEOUT|DATAHUB_STUDIES|DATAHUB_BASE_URL|SEED_DB_URL|VERBOSE_LOGS|DEBUG_MODE)=" "$SCRIPT_DIR/.env.defaults") + source "$SCRIPT_DIR/.env.defaults" fi # Override with .env if it exists if [ -f "$SCRIPT_DIR/.env" ]; then echo "[INFO] Loading configuration overrides from .env" - source <(grep -E "^(DOWNLOAD_RETRY_COUNT|DOWNLOAD_RETRY_DELAY|DOWNLOAD_TIMEOUT|DATAHUB_STUDIES|DATAHUB_BASE_URL|SEED_DB_URL|VERBOSE_LOGS|DEBUG_MODE)=" "$SCRIPT_DIR/.env") + source "$SCRIPT_DIR/.env" fi # Enable debug logging if requested @@ -40,7 +40,7 @@ if [ "$DEBUG_MODE" = "true" ]; then set -x fi -# Function to download files with retries +# Function to download files with retries and progress bar download_with_retry() { local url="$1" local destination="$2" @@ -48,38 +48,58 @@ download_with_retry() { local retry_delay="${4:-$DOWNLOAD_RETRY_DELAY}" local timeout="${DOWNLOAD_TIMEOUT:-30}" local attempt=1 - + local report_file="$SCRIPT_DIR/../init_report.txt" + + # Check if file already exists if [ -f "$destination" ]; then echo "[INFO] File already exists: $destination (skipping download)" + # Add to summary report + echo "SUCCESS: $(basename "$destination") already exists (skipped download)" >> "$report_file" return 0 fi - + echo "[INFO] Downloading $url to $destination (max $max_retries attempts)" - + + # Create a temporary file to track progress + local temp_log=$(mktemp) + while [ $attempt -le $max_retries ]; do - echo "[INFO] Attempt $attempt of $max_retries" - - if wget --progress=dot:giga --timeout="$timeout" -O "$destination" "$url"; then + echo "[INFO] Download attempt $attempt of $max_retries" + + # Try to download with wget and show progress bar + if wget --no-verbose --continue --timeout=$timeout \ + --progress=bar:force --show-progress \ + -O "$destination" "$url" 2>&1 | tee "$temp_log"; then echo "[SUCCESS] Download completed successfully on attempt $attempt" + # Add to summary report + echo "SUCCESS: Downloaded $(basename "$destination") on attempt $attempt" >> "$report_file" + rm -f "$temp_log" return 0 fi - - echo "[WARNING] Download failed. Retrying in $retry_delay seconds..." - sleep "$retry_delay" + attempt=$((attempt + 1)) + if [ $attempt -le $max_retries ]; then + echo "[INFO] Retrying download in $retry_delay seconds..." + sleep $retry_delay + else + echo "[ERROR] Failed to download after $max_retries attempts" >&2 + # Add to summary report + echo "FAILED: Download of $(basename "$destination") after $max_retries attempts" >> "$report_file" + rm -f "$temp_log" + return 1 + fi done - - echo "[ERROR] Failed to download after $max_retries attempts" >&2 - return 1 } # Function to extract archives with validation extract_with_validation() { local archive="$1" local extract_dir="$2" + local report_file="$SCRIPT_DIR/../init_report.txt" if [ -d "$extract_dir" ] && [ "$(ls -A "$extract_dir")" ]; then echo "[INFO] Extraction already completed: $extract_dir (skipping)" + echo "SUCCESS: Extraction of $(basename "$archive") already completed (skipped)" >> "$report_file" return 0 fi @@ -87,17 +107,81 @@ extract_with_validation() { if [ ! -s "$archive" ]; then echo "[ERROR] Archive $archive does not exist or is empty" >&2 + echo "FAILED: Extraction of $(basename "$archive") - file does not exist or is empty" >> "$report_file" return 1 fi mkdir -p "$extract_dir" case "$archive" in - *.tar.gz) tar xzf "$archive" -C "$extract_dir" || { echo "[ERROR] Failed to extract tar.gz archive" >&2; return 1; } ;; - *.zip) unzip -q "$archive" -d "$extract_dir" || { echo "[ERROR] Failed to extract zip archive" >&2; return 1; } ;; - *) echo "[ERROR] Unsupported archive format" >&2; return 1 ;; + *.tar.gz) + if tar xzf "$archive" -C "$extract_dir"; then + echo "[SUCCESS] Extraction completed successfully" + echo "SUCCESS: Extracted $(basename "$archive") to $(basename "$extract_dir")" >> "$report_file" + return 0 + else + echo "[ERROR] Failed to extract tar.gz archive" >&2 + echo "FAILED: Extraction of $(basename "$archive") - tar extraction error" >> "$report_file" + return 1 + fi ;; + *.zip) + if unzip -q "$archive" -d "$extract_dir"; then + echo "[SUCCESS] Extraction completed successfully" + echo "SUCCESS: Extracted $(basename "$archive") to $(basename "$extract_dir")" >> "$report_file" + return 0 + else + echo "[ERROR] Failed to extract zip archive" >&2 + echo "FAILED: Extraction of $(basename "$archive") - unzip extraction error" >> "$report_file" + return 1 + fi ;; + *) + echo "[ERROR] Unsupported archive format" >&2 + echo "FAILED: Extraction of $(basename "$archive") - unsupported format" >> "$report_file" + return 1 ;; esac +} + +# Function to generate a summary report +generate_summary_report() { + local report_file="$SCRIPT_DIR/../init_report.txt" + + echo "========================================" + echo "cBioPortal Initialization Summary Report" + echo "========================================" + echo "Generated at: $(date)" + echo "" + + if [ -f "$report_file" ]; then + echo "Operation Summary:" + echo "-----------------" + grep "SUCCESS:" "$report_file" | wc -l | xargs echo "Total successful operations:" + grep "FAILED:" "$report_file" | wc -l | xargs echo "Total failed operations:" + echo "" + + if grep -q "FAILED:" "$report_file"; then + echo "Failed Operations:" + echo "-----------------" + grep "FAILED:" "$report_file" + echo "" + fi + + echo "Details:" + echo "--------" + cat "$report_file" + else + echo "No operations recorded" + fi +} - echo "[SUCCESS] Extraction completed successfully" - return 0 +# Initialize report file +init_report() { + local report_file="$SCRIPT_DIR/../init_report.txt" + echo "# cBioPortal Initialization Report - $(date)" > "$report_file" + echo "# ====================================" >> "$report_file" + echo "" >> "$report_file" } + +# Make sure the report file exists +if [ ! -f "$SCRIPT_DIR/../init_report.txt" ]; then + init_report +fi \ No newline at end of file