Skip to content

[Enhancement] Network Resilience Enhancement for Downloads #48

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 40 additions & 8 deletions config/init.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,33 @@
#!/usr/bin/env bash


set -eo pipefail
if [ "${DEBUG_MODE}" = "true" ]; then
set -x
fi

SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL ../.env | tail -n 1 | cut -d '=' -f 2-)
# Ensure .env file exists
if [ ! -f "$SCRIPT_DIR/../.env" ]; then
echo " Error: .env file is missing in the parent directory." >&2
exit 1
fi


VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL "$SCRIPT_DIR/../.env" | tail -n 1 | cut -d '=' -f 2-)
if [ -z "$VERSION" ]; then
echo "❌ Error: Unable to extract DOCKER_IMAGE_CBIOPORTAL version from .env." >&2
exit 1
fi

# This is a hack. Docker run doesn't escape '&' but docker compose does.
sed 's/&/\\&/g' ../.env > ../.env.temp
# Create a temporary .env file to escape special characters (e.g., `&`)
TEMP_ENV_FILE="$SCRIPT_DIR/../.env.temp"
sed 's/&/\\&/g' "$SCRIPT_DIR/../.env" > "$TEMP_ENV_FILE"

docker run --rm -i --env-file ../.env.temp $VERSION bin/sh -c 'cat /cbioportal-webapp/application.properties |

echo "⚙️ Generating application.properties using Docker image: $VERSION"
docker run --rm -i --env-file "$TEMP_ENV_FILE" "$VERSION" bin/sh -c 'cat /cbioportal-webapp/application.properties |
sed "s|spring.datasource.password=.*|spring.datasource.password=${DB_MYSQL_PASSWORD}|" | \
sed "s|spring.datasource.username=.*|spring.datasource.username=${DB_MYSQL_USERNAME}|" | \
sed "s|spring.datasource.url=.*|spring.datasource.url=${DB_MYSQL_URL}|" | \
Expand All @@ -17,8 +38,19 @@ docker run --rm -i --env-file ../.env.temp $VERSION bin/sh -c 'cat /cbioportal-w
sed "s|.*spring.datasource.clickhouse.password=.*|spring.datasource.clickhouse.password=${DB_CLICKHOUSE_PASSWORD}|" | \
sed "s|.*spring.datasource.clickhouse.url=.*|spring.datasource.clickhouse.url=${DB_CLICKHOUSE_URL}|" | \
sed "s|.*spring.datasource.mysql.driver-class-name=com.mysql.jdbc.Driver|spring.datasource.mysql.driver-class-name=com.mysql.jdbc.Driver|" | \
sed "s|.*spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|"' \
> application.properties
sed "s|.*spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|" > application.properties' || {
echo "❌ Error: Failed to generate application.properties using Docker." >&2
rm -f "$TEMP_ENV_FILE"
exit 1
}


rm -f "$TEMP_ENV_FILE"


if [ ! -f application.properties ]; then
echo " Error: application.properties file was not created." >&2
exit 1
fi

# Cleanup for the hack above
rm ../.env.temp
echo " application.properties generated successfully."
55 changes: 50 additions & 5 deletions data/init.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,56 @@
#!/usr/bin/env bash


set -eo pipefail
if [ "${DEBUG_MODE}" = "true" ]; then
set -x
fi


SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
ROOT_DIR="$(dirname "$SCRIPT_DIR")"

# Source utility functions
source "$ROOT_DIR/utils.sh"

#this Extracts Docker image version from .env
VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL "$SCRIPT_DIR/../.env" | tail -n 1 | cut -d '=' -f 2-)
if [ -z "$VERSION" ]; then
echo " Error: Unable to extract DOCKER_IMAGE_CBIOPORTAL version from .env." >&2
exit 1
fi

# This Fetchs the schema file (cgds.sql)
echo " Fetching schema file (cgds.sql) from Docker image: $VERSION"
if ! docker run --rm -i "$VERSION" cat /cbioportal/db-scripts/cgds.sql > "$SCRIPT_DIR/cgds.sql"; then
echo "Error: Failed to fetch cgds.sql from Docker image." >&2
exit 2
fi

# This Validates that cgds.sql was created successfully
if [ ! -f "$SCRIPT_DIR/cgds.sql" ]; then
echo " Error: cgds.sql file was not created." >&2
exit 3
fi

echo " Schema file (cgds.sql) fetched successfully."

# Download the seed database (seed.sql.gz) with retries
SEED_URL="https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz"
echo "Downloading seed database from: $SEED_URL"

# Use the download_with_retry function (5 retries, 15s delay)
if ! download_with_retry "$SEED_URL" "$SCRIPT_DIR/seed.sql.gz" 5 15; then
echo "Error: Failed to download seed database after multiple attempts." >&2
exit 4
fi

VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL ../.env | tail -n 1 | cut -d '=' -f 2-)
# this Validates that seed.sql.gz was downloaded successfully
if [ ! -f "$SCRIPT_DIR/seed.sql.gz" ]; then
echo " Error: seed.sql.gz file was not downloaded." >&2
exit 5
fi

# Get the schema
docker run --rm -i $VERSION cat /cbioportal/db-scripts/cgds.sql > cgds.sql
echo " Seed database (seed.sql.gz) downloaded successfully."

# Download the combined hg19 + hg38 seed database
wget -O seed.sql.gz "https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz"
echo "=== Data initialization completed successfully ==="
43 changes: 43 additions & 0 deletions debug_env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env bash

# This runs Diagnostic checks for environment setup and logs

check_file_permissions() {
echo " Checking file permissions..."
find . -name "*.sh" ! -perm /a+x && {
echo " Missing execute permissions:"
find . -name "*.sh" ! -perm /a+x | xargs ls -l
return 1
}
}

check_line_endings() {
echo " Checking line endings..."
find . -name "*.sh" -exec file {} \; | grep CRLF && {
echo " CRLF line endings detected:"
find . -name "*.sh" -exec file {} \; | grep CRLF | cut -d: -f1 | xargs dos2unix
return 1
}
}

main() {
echo "=== Starting Environment Diagnostics ==="

# this is for System info checks
echo "## Platform Info ##"
uname -a

# this is for File system checks
check_file_permissions || exit 1
check_line_endings || exit 1


./init.sh || {
echo " Initialization failed. Check logs."
exit 1
}

echo " All diagnostics passed successfully."
}

main | tee debug.log
41 changes: 40 additions & 1 deletion init.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,45 @@
#!/usr/bin/env bash

# Enable strict error handling
set -eo pipefail
if [ "${DEBUG_MODE}" = "true" ]; then
set -x
fi

PS4='+ $(date "+%Y-%m-%d %H:%M:%S") : '

# Initialize debug log
DEBUG_LOG="debug_$(date +%Y%m%d%H%M%S).log"
exec > >(tee -a "$DEBUG_LOG") 2>&1

echo "=== Starting initialization at $(date) ==="

for d in config study data; do
cd $d; ./init.sh
echo "▶ Entering directory: $d"
if [ ! -f "$d/init.sh" ]; then
echo " Error: Missing $d/init.sh" >&2
exit 1
fi

# Ensure execute permissions
chmod +x "$d/init.sh" || {
echo " Failed to set execute permissions on $d/init.sh" >&2
exit 2
}

# Execute the subdirectory's init.sh script
if ! cd "$d"; then
echo " Failed to enter directory $d" >&2
exit 3
fi

echo "⚙️ Running init.sh in $d"
if ! ./init.sh; then
echo " Critical failure in $d/init.sh" >&2
exit 4
fi

cd ..
done

echo "=== Initialization completed successfully at $(date) ==="
53 changes: 50 additions & 3 deletions study/init.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,57 @@
#!/usr/bin/env bash
# download data hub study and import


set -eo pipefail
if [ "${DEBUG_MODE}" = "true" ]; then
set -x
fi


SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
ROOT_DIR="$(dirname "$SCRIPT_DIR")"

# Source utility functions
source "$ROOT_DIR/utils.sh"


DATAHUB_STUDIES="${DATAHUB_STUDIES:-lgg_ucsf_2014 msk_impact_2017}"

# Base URL for downloading studies
DATAHUB_BASE_URL="https://cbioportal-datahub.s3.amazonaws.com"


for study in ${DATAHUB_STUDIES}; do
wget -O ${study}.tar.gz "https://cbioportal-datahub.s3.amazonaws.com/${study}.tar.gz"
tar xvfz ${study}.tar.gz
echo " Processing study: $study"


STUDY_ARCHIVE="${SCRIPT_DIR}/${study}.tar.gz"
STUDY_DIR="${SCRIPT_DIR}/${study}"


if [ -f "$STUDY_ARCHIVE" ]; then
echo " Archive already exists: $STUDY_ARCHIVE"
else
# Download the study archive
echo "⬇ Downloading $study from $DATAHUB_BASE_URL"
if ! wget -O "$STUDY_ARCHIVE" "${DATAHUB_BASE_URL}/${study}.tar.gz"; then
echo " Error: Failed to download ${study}.tar.gz from $DATAHUB_BASE_URL" >&2
exit 1
fi
echo " Download completed: $STUDY_ARCHIVE"
fi

# Extract the archive if it hasn't been extracted yet
if [ -d "$STUDY_DIR" ]; then
echo " Study directory already exists: $STUDY_DIR"
else
echo " Extracting $STUDY_ARCHIVE to $STUDY_DIR"
if ! tar xvfz "$STUDY_ARCHIVE" -C "$SCRIPT_DIR"; then
echo " Error: Failed to extract $STUDY_ARCHIVE" >&2
exit 2
fi
echo " Extraction completed: $STUDY_DIR"
fi

done

echo "=== All studies processed successfully ==="
74 changes: 74 additions & 0 deletions utils.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/usr/bin/env bash

# Function to download files with retries
# Usage: download_with_retry URL DESTINATION MAX_RETRIES RETRY_DELAY
download_with_retry() {
local url="$1"
local destination="$2"
local max_retries="${3:-3}"
local retry_delay="${4:-10}"
local attempt=1
local http_code=0

echo "[INFO] Downloading $url to $destination (max $max_retries attempts)"

while [ $attempt -le $max_retries ]; do
echo "[INFO] Download attempt $attempt of $max_retries"


if wget --spider --server-response "$url" 2>&1 | grep -q "200 OK"; then
if wget --no-verbose --continue --timeout=30 -O "$destination" "$url"; then
echo "[SUCCESS] Download completed successfully on attempt $attempt"
return 0
fi
else
echo "[WARNING] URL not accessible or returned non-200 status"
fi

attempt=$((attempt + 1))
if [ $attempt -le $max_retries ]; then
echo "[INFO] Retrying download in $retry_delay seconds..."
sleep $retry_delay
else
echo "[ERROR] Failed to download after $max_retries attempts" >&2
return 1
fi
done
}

# Function to extract archives with validation
# Usage: extract_with_validation ARCHIVE_PATH EXTRACT_DIR
extract_with_validation() {
local archive="$1"
local extract_dir="$2"

echo "[INFO] Extracting $archive to $extract_dir"


if [ ! -s "$archive" ]; then
echo "[ERROR] Archive $archive does not exist or is empty" >&2
return 1
fi


mkdir -p "$extract_dir"


if [[ "$archive" == *.tar.gz ]]; then
if ! tar xzf "$archive" -C "$extract_dir"; then
echo "[ERROR] Failed to extract tar.gz archive" >&2
return 1
fi
elif [[ "$archive" == *.zip ]]; then
if ! unzip -q "$archive" -d "$extract_dir"; then
echo "[ERROR] Failed to extract zip archive" >&2
return 1
fi
else
echo "[ERROR] Unsupported archive format" >&2
return 1
fi

echo "[SUCCESS] Extraction completed successfully"
return 0
}