diff --git a/config/init.sh b/config/init.sh index bec1d9b..db547ff 100755 --- a/config/init.sh +++ b/config/init.sh @@ -1,12 +1,33 @@ #!/usr/bin/env bash + + +set -eo pipefail +if [ "${DEBUG_MODE}" = "true" ]; then + set -x +fi + SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL ../.env | tail -n 1 | cut -d '=' -f 2-) +# Ensure .env file exists +if [ ! -f "$SCRIPT_DIR/../.env" ]; then + echo " Error: .env file is missing in the parent directory." >&2 + exit 1 +fi + + +VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL "$SCRIPT_DIR/../.env" | tail -n 1 | cut -d '=' -f 2-) +if [ -z "$VERSION" ]; then + echo "❌ Error: Unable to extract DOCKER_IMAGE_CBIOPORTAL version from .env." >&2 + exit 1 +fi -# This is a hack. Docker run doesn't escape '&' but docker compose does. -sed 's/&/\\&/g' ../.env > ../.env.temp +# Create a temporary .env file to escape special characters (e.g., `&`) +TEMP_ENV_FILE="$SCRIPT_DIR/../.env.temp" +sed 's/&/\\&/g' "$SCRIPT_DIR/../.env" > "$TEMP_ENV_FILE" -docker run --rm -i --env-file ../.env.temp $VERSION bin/sh -c 'cat /cbioportal-webapp/application.properties | + +echo "⚙️ Generating application.properties using Docker image: $VERSION" +docker run --rm -i --env-file "$TEMP_ENV_FILE" "$VERSION" bin/sh -c 'cat /cbioportal-webapp/application.properties | sed "s|spring.datasource.password=.*|spring.datasource.password=${DB_MYSQL_PASSWORD}|" | \ sed "s|spring.datasource.username=.*|spring.datasource.username=${DB_MYSQL_USERNAME}|" | \ sed "s|spring.datasource.url=.*|spring.datasource.url=${DB_MYSQL_URL}|" | \ @@ -17,8 +38,19 @@ docker run --rm -i --env-file ../.env.temp $VERSION bin/sh -c 'cat /cbioportal-w sed "s|.*spring.datasource.clickhouse.password=.*|spring.datasource.clickhouse.password=${DB_CLICKHOUSE_PASSWORD}|" | \ sed "s|.*spring.datasource.clickhouse.url=.*|spring.datasource.clickhouse.url=${DB_CLICKHOUSE_URL}|" | \ sed "s|.*spring.datasource.mysql.driver-class-name=com.mysql.jdbc.Driver|spring.datasource.mysql.driver-class-name=com.mysql.jdbc.Driver|" | \ - sed "s|.*spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|"' \ -> application.properties + sed "s|.*spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|spring.datasource.clickhouse.driver-class-name=com.clickhouse.jdbc.ClickHouseDriver|" > application.properties' || { + echo "❌ Error: Failed to generate application.properties using Docker." >&2 + rm -f "$TEMP_ENV_FILE" + exit 1 +} + + +rm -f "$TEMP_ENV_FILE" + + +if [ ! -f application.properties ]; then + echo " Error: application.properties file was not created." >&2 + exit 1 +fi -# Cleanup for the hack above -rm ../.env.temp \ No newline at end of file +echo " application.properties generated successfully." diff --git a/data/init.sh b/data/init.sh index 7898015..0c7abc3 100755 --- a/data/init.sh +++ b/data/init.sh @@ -1,11 +1,50 @@ #!/usr/bin/env bash + +set -eo pipefail +if [ "${DEBUG_MODE}" = "true" ]; then + set -x +fi + + SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL ../.env | tail -n 1 | cut -d '=' -f 2-) +#this Extracts Docker image version from .env +VERSION=$(grep DOCKER_IMAGE_CBIOPORTAL "$SCRIPT_DIR/../.env" | tail -n 1 | cut -d '=' -f 2-) +if [ -z "$VERSION" ]; then + echo " Error: Unable to extract DOCKER_IMAGE_CBIOPORTAL version from .env." >&2 + exit 1 +fi + +# This Fetchs the schema file (cgds.sql) +echo " Fetching schema file (cgds.sql) from Docker image: $VERSION" +if ! docker run --rm -i "$VERSION" cat /cbioportal/db-scripts/cgds.sql > "$SCRIPT_DIR/cgds.sql"; then + echo "Error: Failed to fetch cgds.sql from Docker image." >&2 + exit 2 +fi + +# This Validates that cgds.sql was created successfully +if [ ! -f "$SCRIPT_DIR/cgds.sql" ]; then + echo " Error: cgds.sql file was not created." >&2 + exit 3 +fi + +echo " Schema file (cgds.sql) fetched successfully." + +# This Downloads the seed database (seed.sql.gz) +SEED_URL="https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz" +echo " Downloading seed database from: $SEED_URL" +if ! wget -O "$SCRIPT_DIR/seed.sql.gz" "$SEED_URL"; then + echo " Error: Failed to download seed database from $SEED_URL." >&2 + exit 4 +fi + +# this Validates that seed.sql.gz was downloaded successfully +if [ ! -f "$SCRIPT_DIR/seed.sql.gz" ]; then + echo " Error: seed.sql.gz file was not downloaded." >&2 + exit 5 +fi -# Get the schema -docker run --rm -i $VERSION cat /cbioportal/db-scripts/cgds.sql > cgds.sql +echo " Seed database (seed.sql.gz) downloaded successfully." -# Download the combined hg19 + hg38 seed database -wget -O seed.sql.gz "https://github.com/cBioPortal/datahub/raw/master/seedDB/seed-cbioportal_hg19_hg38_v2.13.1.sql.gz" +echo "=== Data initialization completed successfully ===" diff --git a/debug_env.sh b/debug_env.sh new file mode 100755 index 0000000..a988987 --- /dev/null +++ b/debug_env.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +# This runs Diagnostic checks for environment setup and logs + +check_file_permissions() { + echo " Checking file permissions..." + find . -name "*.sh" ! -perm /a+x && { + echo " Missing execute permissions:" + find . -name "*.sh" ! -perm /a+x | xargs ls -l + return 1 + } +} + +check_line_endings() { + echo " Checking line endings..." + find . -name "*.sh" -exec file {} \; | grep CRLF && { + echo " CRLF line endings detected:" + find . -name "*.sh" -exec file {} \; | grep CRLF | cut -d: -f1 | xargs dos2unix + return 1 + } +} + +main() { + echo "=== Starting Environment Diagnostics ===" + + # this is for System info checks + echo "## Platform Info ##" + uname -a + + # this is for File system checks + check_file_permissions || exit 1 + check_line_endings || exit 1 + + + ./init.sh || { + echo " Initialization failed. Check logs." + exit 1 + } + + echo " All diagnostics passed successfully." +} + +main | tee debug.log diff --git a/init.sh b/init.sh index 2eb7130..51bbfc2 100755 --- a/init.sh +++ b/init.sh @@ -1,6 +1,45 @@ #!/usr/bin/env bash + +# Enable strict error handling +set -eo pipefail +if [ "${DEBUG_MODE}" = "true" ]; then + set -x +fi + +PS4='+ $(date "+%Y-%m-%d %H:%M:%S") : ' + +# Initialize debug log +DEBUG_LOG="debug_$(date +%Y%m%d%H%M%S).log" +exec > >(tee -a "$DEBUG_LOG") 2>&1 + +echo "=== Starting initialization at $(date) ===" + for d in config study data; do - cd $d; ./init.sh + echo "▶ Entering directory: $d" + if [ ! -f "$d/init.sh" ]; then + echo " Error: Missing $d/init.sh" >&2 + exit 1 + fi + + # Ensure execute permissions + chmod +x "$d/init.sh" || { + echo " Failed to set execute permissions on $d/init.sh" >&2 + exit 2 + } + + # Execute the subdirectory's init.sh script + if ! cd "$d"; then + echo " Failed to enter directory $d" >&2 + exit 3 + fi + + echo "⚙️ Running init.sh in $d" + if ! ./init.sh; then + echo " Critical failure in $d/init.sh" >&2 + exit 4 + fi + cd .. done +echo "=== Initialization completed successfully at $(date) ===" diff --git a/study/init.sh b/study/init.sh index 0be86a4..a479bb7 100755 --- a/study/init.sh +++ b/study/init.sh @@ -1,10 +1,53 @@ #!/usr/bin/env bash -# download data hub study and import + + +set -eo pipefail +if [ "${DEBUG_MODE}" = "true" ]; then + set -x +fi + SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + DATAHUB_STUDIES="${DATAHUB_STUDIES:-lgg_ucsf_2014 msk_impact_2017}" + +# Base URL for downloading studies +DATAHUB_BASE_URL="https://cbioportal-datahub.s3.amazonaws.com" + + for study in ${DATAHUB_STUDIES}; do - wget -O ${study}.tar.gz "https://cbioportal-datahub.s3.amazonaws.com/${study}.tar.gz" - tar xvfz ${study}.tar.gz + echo " Processing study: $study" + + + STUDY_ARCHIVE="${SCRIPT_DIR}/${study}.tar.gz" + STUDY_DIR="${SCRIPT_DIR}/${study}" + + + if [ -f "$STUDY_ARCHIVE" ]; then + echo " Archive already exists: $STUDY_ARCHIVE" + else + # Download the study archive + echo "⬇ Downloading $study from $DATAHUB_BASE_URL" + if ! wget -O "$STUDY_ARCHIVE" "${DATAHUB_BASE_URL}/${study}.tar.gz"; then + echo " Error: Failed to download ${study}.tar.gz from $DATAHUB_BASE_URL" >&2 + exit 1 + fi + echo " Download completed: $STUDY_ARCHIVE" + fi + + # Extract the archive if it hasn't been extracted yet + if [ -d "$STUDY_DIR" ]; then + echo " Study directory already exists: $STUDY_DIR" + else + echo " Extracting $STUDY_ARCHIVE to $STUDY_DIR" + if ! tar xvfz "$STUDY_ARCHIVE" -C "$SCRIPT_DIR"; then + echo " Error: Failed to extract $STUDY_ARCHIVE" >&2 + exit 2 + fi + echo " Extraction completed: $STUDY_DIR" + fi + done + +echo "=== All studies processed successfully ==="