cBioPortal · Vaibhav701161 · Mar 9, 2025 · Mar 9, 2025 · Mar 11, 2025 · Mar 19, 2025
diff --git a/.env.defaults.template b/.env.defaults.template
@@ -0,0 +1,19 @@
+# Template configuration for cBioPortal Docker initialization
+# Copy this file to .env.defaults and customize as needed
+# Command: cp .env.defaults.template .env.defaults
+
+# Network settings
+DOWNLOAD_RETRY_COUNT=3
+DOWNLOAD_RETRY_DELAY=10
+DOWNLOAD_TIMEOUT=30
+
+# Study settings
+DATAHUB_STUDIES=lgg_ucsf_2014 msk_impact_2017
+DATAHUB_BASE_URL=https://cbioportal-datahub.s3.amazonaws.com
+
+# Seed database
+SEED_DB_URL=https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz
+
+# Logging settings
+VERBOSE_LOGS=true
+DEBUG_MODE=false
diff --git a/README.md b/README.md
@@ -25,6 +25,44 @@ docker compose down -v
 
 If you were able to successfully set up a local installation of cBioPortal, please add it here: https://www.cbioportal.org/installations. Thank you!
 
+## Configuration System
+
+The initialization scripts support customization through a flexible configuration system:
+
+### Configuration Files
+
+1. **Built-in Defaults**: A set of sensible defaults is included in the code if no configuration files are present.
+
+2. **Site-wide Configuration** (optional):
+   - Copy `.env.defaults.template` to `.env.defaults`
+   - Modify values as needed for your environment
+   - This file should NOT be committed to version control
+
+3. **User-specific Configuration** (optional):
+   - Create or modify `.env` file with any values you want to override
+   - This takes precedence over `.env.defaults`
+   - This file should NOT be committed to version control
+
+### Available Configuration Options
+
+| Option | Description | Default |
+|--------|-------------|---------|
+| `DOWNLOAD_RETRY_COUNT` | Number of retry attempts for downloads | 3 |
+| `DOWNLOAD_RETRY_DELAY` | Delay in seconds between retries | 10 |
+| `DOWNLOAD_TIMEOUT` | Timeout in seconds for download operations | 30 |
+| `DATAHUB_STUDIES` | Space-separated list of studies to download | "lgg_ucsf_2014 msk_impact_2017" |
+| `DATAHUB_BASE_URL` | Base URL for datahub studies | "https://cbioportal-datahub.s3.amazonaws.com" |
+| `SEED_DB_URL` | URL for seed database | "https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz" |
+| `VERBOSE_LOGS` | Enable verbose logging | true |
+| `DEBUG_MODE` | Enable debug mode (prints all commands) | false |
+
+
+## Setup Instructions
+1. Copy `.env.defaults.example` to `.env.defaults`:
+   ```sh
+   cp .env.defaults.example .env.defaults
+
+
 ## Loading custom studies
 By default, the cbioportal docker compose setup comes preloaded with the `lgg_ucsf_2014` study, which is imported as part of the `DOCKER_IMAGE_MYSQL` environment variable [here](.env). If you want to load custom studies, run the following commands.
 ```shell

diff --git a/config/init.sh b/config/init.sh
@@ -1,12 +1,33 @@
 #!/usr/bin/env bash
+
+
+set -eo pipefail
+if [ "${DEBUG_MODE}" = "true" ]; then
+    set -x
+fi
+
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL ../.env | tail -n 1 | cut -d '=' -f 2-)
+# Ensure .env file exists
+if [ ! -f "$SCRIPT_DIR/../.env" ]; then
+    echo " Error: .env file is missing in the parent directory." >&2
+    exit 1
+fi
+
+
+VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL "$SCRIPT_DIR/../.env" | tail -n 1 | cut -d '=' -f 2-)
+if [ -z "$VERSION" ]; then
+    echo "❌ Error: Unable to extract DOCKER_IMAGE_CBIOPORTAL version from .env." >&2
+    exit 1
+fi
 
-# This is a hack. Docker run doesn't escape '&' but docker compose does.
-sed 's/&/\\&/g' ../.env > ../.env.temp
+# Create a temporary .env file to escape special characters (e.g., `&`)
+TEMP_ENV_FILE="$SCRIPT_DIR/../.env.temp"
+sed 's/&/\\&/g' "$SCRIPT_DIR/../.env" > "$TEMP_ENV_FILE"
 
-docker run --rm -i --env-file ../.env.temp $VERSION bin/sh -c 'cat /cbioportal-webapp/application.properties |
+
+echo "⚙️ Generating application.properties using Docker image: $VERSION"
+docker run --rm -i --env-file "$TEMP_ENV_FILE" "$VERSION" bin/sh -c 'cat /cbioportal-webapp/application.properties |
     sed "s|spring.datasource.password=.*|spring.datasource.password=${DB_MYSQL_PASSWORD}|" | \
     sed "s|spring.datasource.username=.*|spring.datasource.username=${DB_MYSQL_USERNAME}|" | \
     sed "s|spring.datasource.url=.*|spring.datasource.url=${DB_MYSQL_URL}|" | \
@@ -17,8 +38,19 @@ docker run --rm -i --env-file ../.env.temp $VERSION bin/sh -c 'cat /cbioportal-w
     sed "s|.*spring.datasource.clickhouse.password=.*|spring.datasource.clickhouse.password=${DB_CLICKHOUSE_PASSWORD}|" | \
     sed "s|.*spring.datasource.clickhouse.url=.*|spring.datasource.clickhouse.url=${DB_CLICKHOUSE_URL}|" | \
     sed "s|.*spring.datasource.mysql.driver-class-name=com.mysql.jdbc.Driver|spring.datasource.mysql.driver-class-name=com.mysql.jdbc.Driver|" | \
-    sed "s|.*spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|"' \
-> application.properties
+    sed "s|.*spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|" > application.properties' || {
+        echo "❌ Error: Failed to generate application.properties using Docker." >&2
+        rm -f "$TEMP_ENV_FILE"
+        exit 1
+}
+
+
+rm -f "$TEMP_ENV_FILE"
+
+
+if [ ! -f application.properties ]; then
+    echo " Error: application.properties file was not created." >&2
+    exit 1
+fi
 
-# Cleanup for the hack above
-rm ../.env.temp
+echo " application.properties generated successfully."
diff --git a/data/init.sh b/data/init.sh
@@ -1,11 +1,56 @@
 #!/usr/bin/env bash
 
+
+set -eo pipefail
+if [ "${DEBUG_MODE}" = "true" ]; then
+    set -x
+fi
+
+
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+ROOT_DIR="$(dirname "$SCRIPT_DIR")"
+
+# Source utility functions
+source "$ROOT_DIR/utils.sh"
+
+#this  Extracts Docker image version from .env
+VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL "$SCRIPT_DIR/../.env" | tail -n 1 | cut -d '=' -f 2-)
+if [ -z "$VERSION" ]; then
+    echo " Error: Unable to extract DOCKER_IMAGE_CBIOPORTAL version from .env." >&2
+    exit 1
+fi
+
+# This Fetchs the schema file (cgds.sql)
+echo " Fetching schema file (cgds.sql) from Docker image: $VERSION"
+if ! docker run --rm -i "$VERSION" cat /cbioportal/db-scripts/cgds.sql > "$SCRIPT_DIR/cgds.sql"; then
+    echo "Error: Failed to fetch cgds.sql from Docker image." >&2
+    exit 2
+fi
+
+# This Validates that cgds.sql was created successfully
+if [ ! -f "$SCRIPT_DIR/cgds.sql" ]; then
+    echo " Error: cgds.sql file was not created." >&2
+    exit 3
+fi
+
+echo " Schema file (cgds.sql) fetched successfully."
+
+# Download the seed database (seed.sql.gz) with retries
+SEED_URL="https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz"
+echo "Downloading seed database from: $SEED_URL"
+
+# Use the download_with_retry function (5 retries, 15s delay)
+if ! download_with_retry "$SEED_URL" "$SCRIPT_DIR/seed.sql.gz" 5 15; then
+    echo "Error: Failed to download seed database after multiple attempts." >&2
+    exit 4
+fi
 
-VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL ../.env | tail -n 1 | cut -d '=' -f 2-)
+# this Validates that seed.sql.gz was downloaded successfully
+if [ ! -f "$SCRIPT_DIR/seed.sql.gz" ]; then
+    echo " Error: seed.sql.gz file was not downloaded." >&2
+    exit 5
+fi
 
-# Get the schema
-docker run --rm -i $VERSION cat /cbioportal/db-scripts/cgds.sql > cgds.sql
+echo " Seed database (seed.sql.gz) downloaded successfully."
 
-# Download the combined hg19 + hg38 seed database
-wget -O seed.sql.gz "https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz"
+echo "=== Data initialization completed successfully ==="
diff --git a/debug_env.sh b/debug_env.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+
+# This runs Diagnostic checks for environment setup and logs
+
+check_file_permissions() {
+    echo " Checking file permissions..."
+    find . -name "*.sh" ! -perm /a+x && {
+        echo " Missing execute permissions:"
+        find . -name "*.sh" ! -perm /a+x | xargs ls -l 
+        return 1
+    }
+}
+
+check_line_endings() {
+    echo " Checking line endings..."
+    find . -name "*.sh" -exec file {} \; | grep CRLF && {
+        echo " CRLF line endings detected:"
+        find . -name "*.sh" -exec file {} \; | grep CRLF | cut -d: -f1 | xargs dos2unix 
+        return 1
+    }
+}
+
+main() {
+    echo "=== Starting Environment Diagnostics ==="
+
+    # this is for System info checks
+    echo "## Platform Info ##"
+    uname -a
+
+    # this is for File system checks
+    check_file_permissions || exit 1 
+    check_line_endings || exit 1
+
+
+    ./init.sh || {
+        echo " Initialization failed. Check logs."
+        exit 1 
+    }
+
+    echo " All diagnostics passed successfully."
+}
+
+main | tee debug.log 
diff --git a/init.sh b/init.sh
@@ -1,6 +1,64 @@
 #!/usr/bin/env bash
+
+
+set -eo pipefail
+if [ "${DEBUG_MODE}" = "true" ]; then
+    set -x
+fi
+
+PS4='+ $(date "+%Y-%m-%d %H:%M:%S") : '
+
+
+DEBUG_LOG="debug_$(date +%Y%m%d%H%M%S).log"
+exec > >(tee -a "$DEBUG_LOG") 2>&1
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+# Source utility functions
+source "$SCRIPT_DIR/utils.sh"
+
+
+init_report
+
+echo "=== Starting initialization at $(date) ==="
+
 for d in config study data; do
-    cd $d; ./init.sh
+    echo " Entering directory: $d"
+    if [ ! -f "$d/init.sh" ]; then
+        echo " Error: Missing $d/init.sh" >&2
+        echo "FAILED: Missing $d/init.sh script" >> "$SCRIPT_DIR/init_report.txt"
+        exit 1
+    fi
+
+    # Ensure execute permissions
+    chmod +x "$d/init.sh" || {
+        echo " Failed to set execute permissions on $d/init.sh" >&2
+        echo "FAILED: Setting permissions on $d/init.sh" >> "$SCRIPT_DIR/init_report.txt"
+        exit 2
+    }
+
+    # Execute the subdirectory's init.sh script
+    if ! cd "$d"; then
+        echo " Failed to enter directory $d" >&2
+        echo "FAILED: Changing to directory $d" >> "$SCRIPT_DIR/init_report.txt"
+        exit 3
+    fi
+
+    echo " Running init.sh in $d"
+    start_time=$(date +%s)
+    if ! ./init.sh; then
+        echo " Critical failure in $d/init.sh" >&2
+        echo "FAILED: Executing $d/init.sh" >> "$SCRIPT_DIR/../init_report.txt"
+        exit 4
+    fi
+    end_time=$(date +%s)
+    duration=$((end_time - start_time))
+    echo "SUCCESS: Executed $d/init.sh (took ${duration}s)" >> "$SCRIPT_DIR/../init_report.txt"
+
     cd ..
 done
 
+echo "=== Initialization completed successfully at $(date) ==="
+
+
+generate_summary_report | tee "init_summary_$(date +%Y%m%d%H%M%S).txt"
diff --git a/study/init.sh b/study/init.sh
@@ -1,10 +1,57 @@
 #!/usr/bin/env bash
-# download data hub study and import
+
+
+set -eo pipefail
+if [ "${DEBUG_MODE}" = "true" ]; then
+    set -x
+fi
+
 
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+ROOT_DIR="$(dirname "$SCRIPT_DIR")"
+
+# Source utility functions
+source "$ROOT_DIR/utils.sh"
+
 
 DATAHUB_STUDIES="${DATAHUB_STUDIES:-lgg_ucsf_2014 msk_impact_2017}"
+
+# Base URL for downloading studies
+DATAHUB_BASE_URL="https://cbioportal-datahub.s3.amazonaws.com"
+
+
 for study in ${DATAHUB_STUDIES}; do
-        wget -O ${study}.tar.gz "https://cbioportal-datahub.s3.amazonaws.com/${study}.tar.gz"
-        tar xvfz ${study}.tar.gz
+    echo " Processing study: $study"
+
+
+    STUDY_ARCHIVE="${SCRIPT_DIR}/${study}.tar.gz"
+    STUDY_DIR="${SCRIPT_DIR}/${study}"
+
+
+    if [ -f "$STUDY_ARCHIVE" ]; then
+        echo " Archive already exists: $STUDY_ARCHIVE"
+    else
+        # Download the study archive
+        echo "⬇ Downloading $study from $DATAHUB_BASE_URL"
+        if ! wget -O "$STUDY_ARCHIVE" "${DATAHUB_BASE_URL}/${study}.tar.gz"; then
+            echo " Error: Failed to download ${study}.tar.gz from $DATAHUB_BASE_URL" >&2
+            exit 1
+        fi
+        echo " Download completed: $STUDY_ARCHIVE"
+    fi
+
+    # Extract the archive if it hasn't been extracted yet
+    if [ -d "$STUDY_DIR" ]; then
+        echo " Study directory already exists: $STUDY_DIR"
+    else
+        echo " Extracting $STUDY_ARCHIVE to $STUDY_DIR"
+        if ! tar xvfz "$STUDY_ARCHIVE" -C "$SCRIPT_DIR"; then
+            echo " Error: Failed to extract $STUDY_ARCHIVE" >&2
+            exit 2
+        fi
+        echo " Extraction completed: $STUDY_DIR"
+    fi
+
 done
+
+echo "=== All studies processed successfully ==="