From 106476858dc25613ba4bf89ed02c19903b05e6fb Mon Sep 17 00:00:00 2001
From: Vaibhav Mittal <vaibhavmittal146@gmail.com>
Date: Sun, 9 Mar 2025 12:52:32 +0000
Subject: [PATCH 1/5] Enhance error handling and logging in init.sh script

---
 config/init.sh | 48 ++++++++++++++++++++++++++++++++++++++++--------
 data/init.sh   | 49 ++++++++++++++++++++++++++++++++++++++++++++-----
 debug_env.sh   | 43 +++++++++++++++++++++++++++++++++++++++++++
 init.sh        | 41 ++++++++++++++++++++++++++++++++++++++++-
 study/init.sh  | 49 ++++++++++++++++++++++++++++++++++++++++++++++---
 5 files changed, 213 insertions(+), 17 deletions(-)
 create mode 100755 debug_env.sh

diff --git a/config/init.sh b/config/init.sh
index bec1d9b..db547ff 100755
--- a/config/init.sh
+++ b/config/init.sh
@@ -1,12 +1,33 @@
 #!/usr/bin/env bash
+
+
+set -eo pipefail
+if [ "${DEBUG_MODE}" = "true" ]; then
+    set -x
+fi
+
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL ../.env | tail -n 1 | cut -d '=' -f 2-)
+# Ensure .env file exists
+if [ ! -f "$SCRIPT_DIR/../.env" ]; then
+    echo " Error: .env file is missing in the parent directory." >&2
+    exit 1
+fi
+
+
+VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL "$SCRIPT_DIR/../.env" | tail -n 1 | cut -d '=' -f 2-)
+if [ -z "$VERSION" ]; then
+    echo "❌ Error: Unable to extract DOCKER_IMAGE_CBIOPORTAL version from .env." >&2
+    exit 1
+fi
 
-# This is a hack. Docker run doesn't escape '&' but docker compose does.
-sed 's/&/\\&/g' ../.env > ../.env.temp
+# Create a temporary .env file to escape special characters (e.g., `&`)
+TEMP_ENV_FILE="$SCRIPT_DIR/../.env.temp"
+sed 's/&/\\&/g' "$SCRIPT_DIR/../.env" > "$TEMP_ENV_FILE"
 
-docker run --rm -i --env-file ../.env.temp $VERSION bin/sh -c 'cat /cbioportal-webapp/application.properties |
+
+echo "⚙️ Generating application.properties using Docker image: $VERSION"
+docker run --rm -i --env-file "$TEMP_ENV_FILE" "$VERSION" bin/sh -c 'cat /cbioportal-webapp/application.properties |
     sed "s|spring.datasource.password=.*|spring.datasource.password=${DB_MYSQL_PASSWORD}|" | \
     sed "s|spring.datasource.username=.*|spring.datasource.username=${DB_MYSQL_USERNAME}|" | \
     sed "s|spring.datasource.url=.*|spring.datasource.url=${DB_MYSQL_URL}|" | \
@@ -17,8 +38,19 @@ docker run --rm -i --env-file ../.env.temp $VERSION bin/sh -c 'cat /cbioportal-w
     sed "s|.*spring.datasource.clickhouse.password=.*|spring.datasource.clickhouse.password=${DB_CLICKHOUSE_PASSWORD}|" | \
     sed "s|.*spring.datasource.clickhouse.url=.*|spring.datasource.clickhouse.url=${DB_CLICKHOUSE_URL}|" | \
     sed "s|.*spring.datasource.mysql.driver-class-name=com.mysql.jdbc.Driver|spring.datasource.mysql.driver-class-name=com.mysql.jdbc.Driver|" | \
-    sed "s|.*spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|"' \
-> application.properties
+    sed "s|.*spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|" > application.properties' || {
+        echo "❌ Error: Failed to generate application.properties using Docker." >&2
+        rm -f "$TEMP_ENV_FILE"
+        exit 1
+}
+
+
+rm -f "$TEMP_ENV_FILE"
+
+
+if [ ! -f application.properties ]; then
+    echo " Error: application.properties file was not created." >&2
+    exit 1
+fi
 
-# Cleanup for the hack above
-rm ../.env.temp
\ No newline at end of file
+echo " application.properties generated successfully."
diff --git a/data/init.sh b/data/init.sh
index 7898015..1811638 100755
--- a/data/init.sh
+++ b/data/init.sh
@@ -1,11 +1,50 @@
 #!/usr/bin/env bash
 
+
+set -eo pipefail
+if [ "${DEBUG_MODE}" = "true" ]; then
+    set -x
+fi
+
+
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL ../.env | tail -n 1 | cut -d '=' -f 2-)
+#this  Extracts Docker image version from .env
+VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL "$SCRIPT_DIR/../.env" | tail -n 1 | cut -d '=' -f 2-)
+if [ -z "$VERSION" ]; then
+    echo "❌ Error: Unable to extract DOCKER_IMAGE_CBIOPORTAL version from .env." >&2
+    exit 1
+fi
+
+# This Fetchs the schema file (cgds.sql)
+echo "⚙️ Fetching schema file (cgds.sql) from Docker image: $VERSION"
+if ! docker run --rm -i "$VERSION" cat /cbioportal/db-scripts/cgds.sql > "$SCRIPT_DIR/cgds.sql"; then
+    echo "Error: Failed to fetch cgds.sql from Docker image." >&2
+    exit 2
+fi
+
+# This Validates that cgds.sql was created successfully
+if [ ! -f "$SCRIPT_DIR/cgds.sql" ]; then
+    echo " Error: cgds.sql file was not created." >&2
+    exit 3
+fi
+
+echo " Schema file (cgds.sql) fetched successfully."
+
+# This  Downloads the seed database (seed.sql.gz)
+SEED_URL="https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz"
+echo "⚙️ Downloading seed database from: $SEED_URL"
+if ! wget -O "$SCRIPT_DIR/seed.sql.gz" "$SEED_URL"; then
+    echo " Error: Failed to download seed database from $SEED_URL." >&2
+    exit 4
+fi
+
+# this Validates that seed.sql.gz was downloaded successfully
+if [ ! -f "$SCRIPT_DIR/seed.sql.gz" ]; then
+    echo " Error: seed.sql.gz file was not downloaded." >&2
+    exit 5
+fi
 
-# Get the schema
-docker run --rm -i $VERSION cat /cbioportal/db-scripts/cgds.sql > cgds.sql
+echo " Seed database (seed.sql.gz) downloaded successfully."
 
-# Download the combined hg19 + hg38 seed database
-wget -O seed.sql.gz "https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz"
+echo "=== Data initialization completed successfully ==="
diff --git a/debug_env.sh b/debug_env.sh
new file mode 100755
index 0000000..a988987
--- /dev/null
+++ b/debug_env.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+
+# This runs Diagnostic checks for environment setup and logs
+
+check_file_permissions() {
+    echo " Checking file permissions..."
+    find . -name "*.sh" ! -perm /a+x && {
+        echo " Missing execute permissions:"
+        find . -name "*.sh" ! -perm /a+x | xargs ls -l 
+        return 1
+    }
+}
+
+check_line_endings() {
+    echo " Checking line endings..."
+    find . -name "*.sh" -exec file {} \; | grep CRLF && {
+        echo " CRLF line endings detected:"
+        find . -name "*.sh" -exec file {} \; | grep CRLF | cut -d: -f1 | xargs dos2unix 
+        return 1
+    }
+}
+
+main() {
+    echo "=== Starting Environment Diagnostics ==="
+    
+    # this is for System info checks
+    echo "## Platform Info ##"
+    uname -a
+    
+    # this is for File system checks
+    check_file_permissions || exit 1 
+    check_line_endings || exit 1
+    
+    
+    ./init.sh || {
+        echo " Initialization failed. Check logs."
+        exit 1 
+    }
+
+    echo " All diagnostics passed successfully."
+}
+
+main | tee debug.log 
diff --git a/init.sh b/init.sh
index 2eb7130..51bbfc2 100755
--- a/init.sh
+++ b/init.sh
@@ -1,6 +1,45 @@
 #!/usr/bin/env bash
+
+# Enable strict error handling
+set -eo pipefail
+if [ "${DEBUG_MODE}" = "true" ]; then
+    set -x
+fi
+
+PS4='+ $(date "+%Y-%m-%d %H:%M:%S") : '
+
+# Initialize debug log
+DEBUG_LOG="debug_$(date +%Y%m%d%H%M%S).log"
+exec > >(tee -a "$DEBUG_LOG") 2>&1
+
+echo "=== Starting initialization at $(date) ==="
+
 for d in config study data; do
-    cd $d; ./init.sh
+    echo "▶ Entering directory: $d"
+    if [ ! -f "$d/init.sh" ]; then
+        echo " Error: Missing $d/init.sh" >&2
+        exit 1
+    fi
+    
+    # Ensure execute permissions
+    chmod +x "$d/init.sh" || {
+        echo " Failed to set execute permissions on $d/init.sh" >&2
+        exit 2
+    }
+
+    # Execute the subdirectory's init.sh script
+    if ! cd "$d"; then
+        echo " Failed to enter directory $d" >&2 
+        exit 3
+    fi
+    
+    echo "⚙️ Running init.sh in $d"
+    if ! ./init.sh; then
+        echo " Critical failure in $d/init.sh" >&2
+        exit 4
+    fi
+    
     cd ..
 done
 
+echo "=== Initialization completed successfully at $(date) ==="
diff --git a/study/init.sh b/study/init.sh
index 0be86a4..a479bb7 100755
--- a/study/init.sh
+++ b/study/init.sh
@@ -1,10 +1,53 @@
 #!/usr/bin/env bash
-# download data hub study and import
+
+
+set -eo pipefail
+if [ "${DEBUG_MODE}" = "true" ]; then
+    set -x
+fi
+
 
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
+
 DATAHUB_STUDIES="${DATAHUB_STUDIES:-lgg_ucsf_2014 msk_impact_2017}"
+
+# Base URL for downloading studies
+DATAHUB_BASE_URL="https://cbioportal-datahub.s3.amazonaws.com"
+
+
 for study in ${DATAHUB_STUDIES}; do
-        wget -O ${study}.tar.gz "https://cbioportal-datahub.s3.amazonaws.com/${study}.tar.gz"
-        tar xvfz ${study}.tar.gz
+    echo " Processing study: $study"
+
+   
+    STUDY_ARCHIVE="${SCRIPT_DIR}/${study}.tar.gz"
+    STUDY_DIR="${SCRIPT_DIR}/${study}"
+
+    
+    if [ -f "$STUDY_ARCHIVE" ]; then
+        echo " Archive already exists: $STUDY_ARCHIVE"
+    else
+        # Download the study archive
+        echo "⬇ Downloading $study from $DATAHUB_BASE_URL"
+        if ! wget -O "$STUDY_ARCHIVE" "${DATAHUB_BASE_URL}/${study}.tar.gz"; then
+            echo " Error: Failed to download ${study}.tar.gz from $DATAHUB_BASE_URL" >&2
+            exit 1
+        fi
+        echo " Download completed: $STUDY_ARCHIVE"
+    fi
+
+    # Extract the archive if it hasn't been extracted yet
+    if [ -d "$STUDY_DIR" ]; then
+        echo " Study directory already exists: $STUDY_DIR"
+    else
+        echo " Extracting $STUDY_ARCHIVE to $STUDY_DIR"
+        if ! tar xvfz "$STUDY_ARCHIVE" -C "$SCRIPT_DIR"; then
+            echo " Error: Failed to extract $STUDY_ARCHIVE" >&2
+            exit 2
+        fi
+        echo " Extraction completed: $STUDY_DIR"
+    fi
+
 done
+
+echo "=== All studies processed successfully ==="

From bd07b3c12d61ed4f601f97fcbe67f26fd9a31817 Mon Sep 17 00:00:00 2001
From: Vaibhav Mittal <vaibhavmittal146@gmail.com>
Date: Sun, 9 Mar 2025 12:59:34 +0000
Subject: [PATCH 2/5] removed emojis

---
 data/init.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/data/init.sh b/data/init.sh
index 1811638..0c7abc3 100755
--- a/data/init.sh
+++ b/data/init.sh
@@ -12,12 +12,12 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 #this  Extracts Docker image version from .env
 VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL "$SCRIPT_DIR/../.env" | tail -n 1 | cut -d '=' -f 2-)
 if [ -z "$VERSION" ]; then
-    echo "❌ Error: Unable to extract DOCKER_IMAGE_CBIOPORTAL version from .env." >&2
+    echo " Error: Unable to extract DOCKER_IMAGE_CBIOPORTAL version from .env." >&2
     exit 1
 fi
 
 # This Fetchs the schema file (cgds.sql)
-echo "⚙️ Fetching schema file (cgds.sql) from Docker image: $VERSION"
+echo " Fetching schema file (cgds.sql) from Docker image: $VERSION"
 if ! docker run --rm -i "$VERSION" cat /cbioportal/db-scripts/cgds.sql > "$SCRIPT_DIR/cgds.sql"; then
     echo "Error: Failed to fetch cgds.sql from Docker image." >&2
     exit 2
@@ -33,7 +33,7 @@ echo " Schema file (cgds.sql) fetched successfully."
 
 # This  Downloads the seed database (seed.sql.gz)
 SEED_URL="https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz"
-echo "⚙️ Downloading seed database from: $SEED_URL"
+echo " Downloading seed database from: $SEED_URL"
 if ! wget -O "$SCRIPT_DIR/seed.sql.gz" "$SEED_URL"; then
     echo " Error: Failed to download seed database from $SEED_URL." >&2
     exit 4

From 50abb9010c036ed62be2fa28fb442061d5b848e5 Mon Sep 17 00:00:00 2001
From: Vaibhav Mittal <vaibhavmittal146@gmail.com>
Date: Tue, 11 Mar 2025 20:57:58 +0000
Subject: [PATCH 3/5] Enhance network resilience with retry mechanism for
 downloads

---
 data/init.sh  | 14 +++++++---
 study/init.sh |  4 +++
 utils.sh      | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 88 insertions(+), 4 deletions(-)
 create mode 100755 utils.sh

diff --git a/data/init.sh b/data/init.sh
index 0c7abc3..cc5d2b7 100755
--- a/data/init.sh
+++ b/data/init.sh
@@ -8,6 +8,10 @@ fi
 
 
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+ROOT_DIR="$(dirname "$SCRIPT_DIR")"
+
+# Source utility functions
+source "$ROOT_DIR/utils.sh"
 
 #this  Extracts Docker image version from .env
 VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL "$SCRIPT_DIR/../.env" | tail -n 1 | cut -d '=' -f 2-)
@@ -31,11 +35,13 @@ fi
 
 echo " Schema file (cgds.sql) fetched successfully."
 
-# This  Downloads the seed database (seed.sql.gz)
+# Download the seed database (seed.sql.gz) with retries
 SEED_URL="https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz"
-echo " Downloading seed database from: $SEED_URL"
-if ! wget -O "$SCRIPT_DIR/seed.sql.gz" "$SEED_URL"; then
-    echo " Error: Failed to download seed database from $SEED_URL." >&2
+echo "Downloading seed database from: $SEED_URL"
+
+# Use the download_with_retry function (5 retries, 15s delay)
+if ! download_with_retry "$SEED_URL" "$SCRIPT_DIR/seed.sql.gz" 5 15; then
+    echo "Error: Failed to download seed database after multiple attempts." >&2
     exit 4
 fi
 
diff --git a/study/init.sh b/study/init.sh
index a479bb7..2be4f5c 100755
--- a/study/init.sh
+++ b/study/init.sh
@@ -8,6 +8,10 @@ fi
 
 
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+ROOT_DIR="$(dirname "$SCRIPT_DIR")"
+
+# Source utility functions
+source "$ROOT_DIR/utils.sh"
 
 
 DATAHUB_STUDIES="${DATAHUB_STUDIES:-lgg_ucsf_2014 msk_impact_2017}"
diff --git a/utils.sh b/utils.sh
new file mode 100755
index 0000000..a382675
--- /dev/null
+++ b/utils.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+
+# Function to download files with retries
+# Usage: download_with_retry URL DESTINATION MAX_RETRIES RETRY_DELAY
+download_with_retry() {
+    local url="$1"
+    local destination="$2"
+    local max_retries="${3:-3}" 
+    local retry_delay="${4:-10}" 
+    local attempt=1
+    local http_code=0
+    
+    echo "[INFO] Downloading $url to $destination (max $max_retries attempts)"
+    
+    while [ $attempt -le $max_retries ]; do
+        echo "[INFO] Download attempt $attempt of $max_retries"
+        
+       
+        if wget --spider --server-response "$url" 2>&1 | grep -q "200 OK"; then
+            if wget --no-verbose --continue --timeout=30 -O "$destination" "$url"; then
+                echo "[SUCCESS] Download completed successfully on attempt $attempt"
+                return 0
+            fi
+        else
+            echo "[WARNING] URL not accessible or returned non-200 status"
+        fi
+        
+        attempt=$((attempt + 1))
+        if [ $attempt -le $max_retries ]; then
+            echo "[INFO] Retrying download in $retry_delay seconds..."
+            sleep $retry_delay
+        else
+            echo "[ERROR] Failed to download after $max_retries attempts" >&2
+            return 1
+        fi
+    done
+}
+
+# Function to extract archives with validation
+# Usage: extract_with_validation ARCHIVE_PATH EXTRACT_DIR
+extract_with_validation() {
+    local archive="$1"
+    local extract_dir="$2"
+    
+    echo "[INFO] Extracting $archive to $extract_dir"
+    
+    
+    if [ ! -s "$archive" ]; then
+        echo "[ERROR] Archive $archive does not exist or is empty" >&2
+        return 1
+    fi
+    
+    
+    mkdir -p "$extract_dir"
+    
+    
+    if [[ "$archive" == *.tar.gz ]]; then
+        if ! tar xzf "$archive" -C "$extract_dir"; then
+            echo "[ERROR] Failed to extract tar.gz archive" >&2
+            return 1
+        fi
+    elif [[ "$archive" == *.zip ]]; then
+        if ! unzip -q "$archive" -d "$extract_dir"; then
+            echo "[ERROR] Failed to extract zip archive" >&2
+            return 1
+        fi
+    else
+        echo "[ERROR] Unsupported archive format" >&2
+        return 1
+    fi
+    
+    echo "[SUCCESS] Extraction completed successfully"
+    return 0
+}

From a1a6b15e72aa61b9a34b9d3659fd5e05fa83247e Mon Sep 17 00:00:00 2001
From: Vaibhav701161 <vaibhavmittal929@gmail.com>
Date: Wed, 19 Mar 2025 19:05:59 +0000
Subject: [PATCH 4/5] Enhancement: Configurable Environment Settings in
 utils.sh

---
 .env.defaults.template |  19 +++++++
 README.md              |  38 +++++++++++++
 utils.sh               | 123 +++++++++++++++++++++++++----------------
 3 files changed, 133 insertions(+), 47 deletions(-)
 create mode 100644 .env.defaults.template

diff --git a/.env.defaults.template b/.env.defaults.template
new file mode 100644
index 0000000..eea2869
--- /dev/null
+++ b/.env.defaults.template
@@ -0,0 +1,19 @@
+# Template configuration for cBioPortal Docker initialization
+# Copy this file to .env.defaults and customize as needed
+# Command: cp .env.defaults.template .env.defaults
+
+# Network settings
+DOWNLOAD_RETRY_COUNT=3
+DOWNLOAD_RETRY_DELAY=10
+DOWNLOAD_TIMEOUT=30
+
+# Study settings
+DATAHUB_STUDIES=lgg_ucsf_2014 msk_impact_2017
+DATAHUB_BASE_URL=https://cbioportal-datahub.s3.amazonaws.com
+
+# Seed database
+SEED_DB_URL=https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz
+
+# Logging settings
+VERBOSE_LOGS=true
+DEBUG_MODE=false
diff --git a/README.md b/README.md
index 5a3969f..5a290bf 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,44 @@ docker compose down -v
 
 If you were able to successfully set up a local installation of cBioPortal, please add it here: https://www.cbioportal.org/installations. Thank you!
 
+## Configuration System
+
+The initialization scripts support customization through a flexible configuration system:
+
+### Configuration Files
+
+1. **Built-in Defaults**: A set of sensible defaults is included in the code if no configuration files are present.
+
+2. **Site-wide Configuration** (optional):
+   - Copy `.env.defaults.template` to `.env.defaults`
+   - Modify values as needed for your environment
+   - This file should NOT be committed to version control
+
+3. **User-specific Configuration** (optional):
+   - Create or modify `.env` file with any values you want to override
+   - This takes precedence over `.env.defaults`
+   - This file should NOT be committed to version control
+
+### Available Configuration Options
+
+| Option | Description | Default |
+|--------|-------------|---------|
+| `DOWNLOAD_RETRY_COUNT` | Number of retry attempts for downloads | 3 |
+| `DOWNLOAD_RETRY_DELAY` | Delay in seconds between retries | 10 |
+| `DOWNLOAD_TIMEOUT` | Timeout in seconds for download operations | 30 |
+| `DATAHUB_STUDIES` | Space-separated list of studies to download | "lgg_ucsf_2014 msk_impact_2017" |
+| `DATAHUB_BASE_URL` | Base URL for datahub studies | "https://cbioportal-datahub.s3.amazonaws.com" |
+| `SEED_DB_URL` | URL for seed database | "https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz" |
+| `VERBOSE_LOGS` | Enable verbose logging | true |
+| `DEBUG_MODE` | Enable debug mode (prints all commands) | false |
+
+
+## Setup Instructions
+1. Copy `.env.defaults.example` to `.env.defaults`:
+   ```sh
+   cp .env.defaults.example .env.defaults
+
+
 ## Loading custom studies
 By default, the cbioportal docker compose setup comes preloaded with the `lgg_ucsf_2014` study, which is imported as part of the `DOCKER_IMAGE_MYSQL` environment variable [here](.env). If you want to load custom studies, run the following commands.
 ```shell
diff --git a/utils.sh b/utils.sh
index a382675..fc43142 100755
--- a/utils.sh
+++ b/utils.sh
@@ -1,74 +1,103 @@
 #!/usr/bin/env bash
 
+# Script directory
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+# Default configuration values
+DEFAULT_DOWNLOAD_RETRY_COUNT=3
+DEFAULT_DOWNLOAD_RETRY_DELAY=10
+DEFAULT_DOWNLOAD_TIMEOUT=30
+DEFAULT_DATAHUB_STUDIES="lgg_ucsf_2014 msk_impact_2017"
+DEFAULT_DATAHUB_BASE_URL="https://cbioportal-datahub.s3.amazonaws.com"
+DEFAULT_SEED_DB_URL="https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz"
+DEFAULT_VERBOSE_LOGS=true
+DEFAULT_DEBUG_MODE=false
+
+# Initialize with default values
+DOWNLOAD_RETRY_COUNT=${DEFAULT_DOWNLOAD_RETRY_COUNT}
+DOWNLOAD_RETRY_DELAY=${DEFAULT_DOWNLOAD_RETRY_DELAY}
+DOWNLOAD_TIMEOUT=${DEFAULT_DOWNLOAD_TIMEOUT}
+DATAHUB_STUDIES=${DEFAULT_DATAHUB_STUDIES}
+DATAHUB_BASE_URL=${DEFAULT_DATAHUB_BASE_URL}
+SEED_DB_URL=${DEFAULT_SEED_DB_URL}
+VERBOSE_LOGS=${DEFAULT_VERBOSE_LOGS}
+DEBUG_MODE=${DEFAULT_DEBUG_MODE}
+
+# Load .env.defaults if it exists
+if [ -f "$SCRIPT_DIR/.env.defaults" ]; then
+    echo "[INFO] Loading configuration from .env.defaults"
+    source <(grep -E "^(DOWNLOAD_RETRY_COUNT|DOWNLOAD_RETRY_DELAY|DOWNLOAD_TIMEOUT|DATAHUB_STUDIES|DATAHUB_BASE_URL|SEED_DB_URL|VERBOSE_LOGS|DEBUG_MODE)=" "$SCRIPT_DIR/.env.defaults")
+fi
+
+# Override with .env if it exists
+if [ -f "$SCRIPT_DIR/.env" ]; then
+    echo "[INFO] Loading configuration overrides from .env"
+    source <(grep -E "^(DOWNLOAD_RETRY_COUNT|DOWNLOAD_RETRY_DELAY|DOWNLOAD_TIMEOUT|DATAHUB_STUDIES|DATAHUB_BASE_URL|SEED_DB_URL|VERBOSE_LOGS|DEBUG_MODE)=" "$SCRIPT_DIR/.env")
+fi
+
+# Enable debug logging if requested
+if [ "$DEBUG_MODE" = "true" ]; then
+    set -x
+fi
+
 # Function to download files with retries
-# Usage: download_with_retry URL DESTINATION MAX_RETRIES RETRY_DELAY
 download_with_retry() {
     local url="$1"
     local destination="$2"
-    local max_retries="${3:-3}" 
-    local retry_delay="${4:-10}" 
+    local max_retries="${3:-$DOWNLOAD_RETRY_COUNT}"
+    local retry_delay="${4:-$DOWNLOAD_RETRY_DELAY}"
+    local timeout="${DOWNLOAD_TIMEOUT:-30}"
     local attempt=1
-    local http_code=0
-    
+
+    if [ -f "$destination" ]; then
+        echo "[INFO] File already exists: $destination (skipping download)"
+        return 0
+    fi
+
     echo "[INFO] Downloading $url to $destination (max $max_retries attempts)"
-    
+
     while [ $attempt -le $max_retries ]; do
-        echo "[INFO] Download attempt $attempt of $max_retries"
-        
-       
-        if wget --spider --server-response "$url" 2>&1 | grep -q "200 OK"; then
-            if wget --no-verbose --continue --timeout=30 -O "$destination" "$url"; then
-                echo "[SUCCESS] Download completed successfully on attempt $attempt"
-                return 0
-            fi
-        else
-            echo "[WARNING] URL not accessible or returned non-200 status"
+        echo "[INFO] Attempt $attempt of $max_retries"
+
+        if wget --progress=dot:giga --timeout="$timeout" -O "$destination" "$url"; then
+            echo "[SUCCESS] Download completed successfully on attempt $attempt"
+            return 0
         fi
-        
+
+        echo "[WARNING] Download failed. Retrying in $retry_delay seconds..."
+        sleep "$retry_delay"
         attempt=$((attempt + 1))
-        if [ $attempt -le $max_retries ]; then
-            echo "[INFO] Retrying download in $retry_delay seconds..."
-            sleep $retry_delay
-        else
-            echo "[ERROR] Failed to download after $max_retries attempts" >&2
-            return 1
-        fi
     done
+
+    echo "[ERROR] Failed to download after $max_retries attempts" >&2
+    return 1
 }
 
 # Function to extract archives with validation
-# Usage: extract_with_validation ARCHIVE_PATH EXTRACT_DIR
 extract_with_validation() {
     local archive="$1"
     local extract_dir="$2"
-    
+
+    if [ -d "$extract_dir" ] && [ "$(ls -A "$extract_dir")" ]; then
+        echo "[INFO] Extraction already completed: $extract_dir (skipping)"
+        return 0
+    fi
+
     echo "[INFO] Extracting $archive to $extract_dir"
-    
-    
+
     if [ ! -s "$archive" ]; then
         echo "[ERROR] Archive $archive does not exist or is empty" >&2
         return 1
     fi
-    
-    
+
     mkdir -p "$extract_dir"
-    
-    
-    if [[ "$archive" == *.tar.gz ]]; then
-        if ! tar xzf "$archive" -C "$extract_dir"; then
-            echo "[ERROR] Failed to extract tar.gz archive" >&2
-            return 1
-        fi
-    elif [[ "$archive" == *.zip ]]; then
-        if ! unzip -q "$archive" -d "$extract_dir"; then
-            echo "[ERROR] Failed to extract zip archive" >&2
-            return 1
-        fi
-    else
-        echo "[ERROR] Unsupported archive format" >&2
-        return 1
-    fi
-    
+
+    case "$archive" in
+        *.tar.gz) tar xzf "$archive" -C "$extract_dir" || { echo "[ERROR] Failed to extract tar.gz archive" >&2; return 1; } ;;
+        *.zip) unzip -q "$archive" -d "$extract_dir" || { echo "[ERROR] Failed to extract zip archive" >&2; return 1; } ;;
+        *) echo "[ERROR] Unsupported archive format" >&2; return 1 ;;
+    esac
+
     echo "[SUCCESS] Extraction completed successfully"
     return 0
 }

From 405176e5732e53da1653455e048e2c843dc7dacc Mon Sep 17 00:00:00 2001
From: Vaibhav701161 <vaibhavmittal929@gmail.com>
Date: Wed, 19 Mar 2025 19:52:29 +0000
Subject: [PATCH 5/5] Enhancement: Progress monitoring and summary reporting
 for initialization

---
 init.sh  |  29 ++++++++++---
 utils.sh | 124 ++++++++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 128 insertions(+), 25 deletions(-)

diff --git a/init.sh b/init.sh
index 51bbfc2..d8dd202 100755
--- a/init.sh
+++ b/init.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-# Enable strict error handling
+
 set -eo pipefail
 if [ "${DEBUG_MODE}" = "true" ]; then
     set -x
@@ -8,38 +8,57 @@ fi
 
 PS4='+ $(date "+%Y-%m-%d %H:%M:%S") : '
 
-# Initialize debug log
+
 DEBUG_LOG="debug_$(date +%Y%m%d%H%M%S).log"
 exec > >(tee -a "$DEBUG_LOG") 2>&1
 
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+# Source utility functions
+source "$SCRIPT_DIR/utils.sh"
+
+
+init_report
+
 echo "=== Starting initialization at $(date) ==="
 
 for d in config study data; do
-    echo "▶ Entering directory: $d"
+    echo " Entering directory: $d"
     if [ ! -f "$d/init.sh" ]; then
         echo " Error: Missing $d/init.sh" >&2
+        echo "FAILED: Missing $d/init.sh script" >> "$SCRIPT_DIR/init_report.txt"
         exit 1
     fi
     
     # Ensure execute permissions
     chmod +x "$d/init.sh" || {
         echo " Failed to set execute permissions on $d/init.sh" >&2
+        echo "FAILED: Setting permissions on $d/init.sh" >> "$SCRIPT_DIR/init_report.txt"
         exit 2
     }
 
     # Execute the subdirectory's init.sh script
     if ! cd "$d"; then
-        echo " Failed to enter directory $d" >&2 
+        echo " Failed to enter directory $d" >&2
+        echo "FAILED: Changing to directory $d" >> "$SCRIPT_DIR/init_report.txt"
         exit 3
     fi
     
-    echo "⚙️ Running init.sh in $d"
+    echo " Running init.sh in $d"
+    start_time=$(date +%s)
     if ! ./init.sh; then
         echo " Critical failure in $d/init.sh" >&2
+        echo "FAILED: Executing $d/init.sh" >> "$SCRIPT_DIR/../init_report.txt"
         exit 4
     fi
+    end_time=$(date +%s)
+    duration=$((end_time - start_time))
+    echo "SUCCESS: Executed $d/init.sh (took ${duration}s)" >> "$SCRIPT_DIR/../init_report.txt"
     
     cd ..
 done
 
 echo "=== Initialization completed successfully at $(date) ==="
+
+
+generate_summary_report | tee "init_summary_$(date +%Y%m%d%H%M%S).txt"
\ No newline at end of file
diff --git a/utils.sh b/utils.sh
index fc43142..8f5f777 100755
--- a/utils.sh
+++ b/utils.sh
@@ -26,13 +26,13 @@ DEBUG_MODE=${DEFAULT_DEBUG_MODE}
 # Load .env.defaults if it exists
 if [ -f "$SCRIPT_DIR/.env.defaults" ]; then
     echo "[INFO] Loading configuration from .env.defaults"
-    source <(grep -E "^(DOWNLOAD_RETRY_COUNT|DOWNLOAD_RETRY_DELAY|DOWNLOAD_TIMEOUT|DATAHUB_STUDIES|DATAHUB_BASE_URL|SEED_DB_URL|VERBOSE_LOGS|DEBUG_MODE)=" "$SCRIPT_DIR/.env.defaults")
+    source "$SCRIPT_DIR/.env.defaults"
 fi
 
 # Override with .env if it exists
 if [ -f "$SCRIPT_DIR/.env" ]; then
     echo "[INFO] Loading configuration overrides from .env"
-    source <(grep -E "^(DOWNLOAD_RETRY_COUNT|DOWNLOAD_RETRY_DELAY|DOWNLOAD_TIMEOUT|DATAHUB_STUDIES|DATAHUB_BASE_URL|SEED_DB_URL|VERBOSE_LOGS|DEBUG_MODE)=" "$SCRIPT_DIR/.env")
+    source "$SCRIPT_DIR/.env"
 fi
 
 # Enable debug logging if requested
@@ -40,7 +40,7 @@ if [ "$DEBUG_MODE" = "true" ]; then
     set -x
 fi
 
-# Function to download files with retries
+# Function to download files with retries and progress bar
 download_with_retry() {
     local url="$1"
     local destination="$2"
@@ -48,38 +48,58 @@ download_with_retry() {
     local retry_delay="${4:-$DOWNLOAD_RETRY_DELAY}"
     local timeout="${DOWNLOAD_TIMEOUT:-30}"
     local attempt=1
-
+    local report_file="$SCRIPT_DIR/../init_report.txt"
+    
+    # Check if file already exists
     if [ -f "$destination" ]; then
         echo "[INFO] File already exists: $destination (skipping download)"
+        # Add to summary report
+        echo "SUCCESS: $(basename "$destination") already exists (skipped download)" >> "$report_file"
         return 0
     fi
-
+    
     echo "[INFO] Downloading $url to $destination (max $max_retries attempts)"
-
+    
+    # Create a temporary file to track progress
+    local temp_log=$(mktemp)
+    
     while [ $attempt -le $max_retries ]; do
-        echo "[INFO] Attempt $attempt of $max_retries"
-
-        if wget --progress=dot:giga --timeout="$timeout" -O "$destination" "$url"; then
+        echo "[INFO] Download attempt $attempt of $max_retries"
+        
+        # Try to download with wget and show progress bar
+        if wget --no-verbose --continue --timeout=$timeout \
+                --progress=bar:force --show-progress \
+                -O "$destination" "$url" 2>&1 | tee "$temp_log"; then
             echo "[SUCCESS] Download completed successfully on attempt $attempt"
+            # Add to summary report
+            echo "SUCCESS: Downloaded $(basename "$destination") on attempt $attempt" >> "$report_file"
+            rm -f "$temp_log"
             return 0
         fi
-
-        echo "[WARNING] Download failed. Retrying in $retry_delay seconds..."
-        sleep "$retry_delay"
+        
         attempt=$((attempt + 1))
+        if [ $attempt -le $max_retries ]; then
+            echo "[INFO] Retrying download in $retry_delay seconds..."
+            sleep $retry_delay
+        else
+            echo "[ERROR] Failed to download after $max_retries attempts" >&2
+            # Add to summary report
+            echo "FAILED: Download of $(basename "$destination") after $max_retries attempts" >> "$report_file"
+            rm -f "$temp_log"
+            return 1
+        fi
     done
-
-    echo "[ERROR] Failed to download after $max_retries attempts" >&2
-    return 1
 }
 
 # Function to extract archives with validation
 extract_with_validation() {
     local archive="$1"
     local extract_dir="$2"
+    local report_file="$SCRIPT_DIR/../init_report.txt"
 
     if [ -d "$extract_dir" ] && [ "$(ls -A "$extract_dir")" ]; then
         echo "[INFO] Extraction already completed: $extract_dir (skipping)"
+        echo "SUCCESS: Extraction of $(basename "$archive") already completed (skipped)" >> "$report_file"
         return 0
     fi
 
@@ -87,17 +107,81 @@ extract_with_validation() {
 
     if [ ! -s "$archive" ]; then
         echo "[ERROR] Archive $archive does not exist or is empty" >&2
+        echo "FAILED: Extraction of $(basename "$archive") - file does not exist or is empty" >> "$report_file"
         return 1
     fi
 
     mkdir -p "$extract_dir"
 
     case "$archive" in
-        *.tar.gz) tar xzf "$archive" -C "$extract_dir" || { echo "[ERROR] Failed to extract tar.gz archive" >&2; return 1; } ;;
-        *.zip) unzip -q "$archive" -d "$extract_dir" || { echo "[ERROR] Failed to extract zip archive" >&2; return 1; } ;;
-        *) echo "[ERROR] Unsupported archive format" >&2; return 1 ;;
+        *.tar.gz) 
+            if tar xzf "$archive" -C "$extract_dir"; then
+                echo "[SUCCESS] Extraction completed successfully"
+                echo "SUCCESS: Extracted $(basename "$archive") to $(basename "$extract_dir")" >> "$report_file"
+                return 0
+            else
+                echo "[ERROR] Failed to extract tar.gz archive" >&2
+                echo "FAILED: Extraction of $(basename "$archive") - tar extraction error" >> "$report_file"
+                return 1
+            fi ;;
+        *.zip) 
+            if unzip -q "$archive" -d "$extract_dir"; then
+                echo "[SUCCESS] Extraction completed successfully"
+                echo "SUCCESS: Extracted $(basename "$archive") to $(basename "$extract_dir")" >> "$report_file"
+                return 0
+            else
+                echo "[ERROR] Failed to extract zip archive" >&2
+                echo "FAILED: Extraction of $(basename "$archive") - unzip extraction error" >> "$report_file"
+                return 1
+            fi ;;
+        *) 
+            echo "[ERROR] Unsupported archive format" >&2
+            echo "FAILED: Extraction of $(basename "$archive") - unsupported format" >> "$report_file"
+            return 1 ;;
     esac
+}
+
+# Function to generate a summary report
+generate_summary_report() {
+    local report_file="$SCRIPT_DIR/../init_report.txt"
+    
+    echo "========================================"
+    echo "cBioPortal Initialization Summary Report"
+    echo "========================================"
+    echo "Generated at: $(date)"
+    echo ""
+    
+    if [ -f "$report_file" ]; then
+        echo "Operation Summary:"
+        echo "-----------------"
+        grep "SUCCESS:" "$report_file" | wc -l | xargs echo "Total successful operations:"
+        grep "FAILED:" "$report_file" | wc -l | xargs echo "Total failed operations:"
+        echo ""
+        
+        if grep -q "FAILED:" "$report_file"; then
+            echo "Failed Operations:"
+            echo "-----------------"
+            grep "FAILED:" "$report_file"
+            echo ""
+        fi
+        
+        echo "Details:"
+        echo "--------"
+        cat "$report_file"
+    else
+        echo "No operations recorded"
+    fi
+}
 
-    echo "[SUCCESS] Extraction completed successfully"
-    return 0
+# Initialize report file
+init_report() {
+    local report_file="$SCRIPT_DIR/../init_report.txt"
+    echo "# cBioPortal Initialization Report - $(date)" > "$report_file"
+    echo "# ====================================" >> "$report_file"
+    echo "" >> "$report_file"
 }
+
+# Make sure the report file exists
+if [ ! -f "$SCRIPT_DIR/../init_report.txt" ]; then
+    init_report
+fi
\ No newline at end of file