Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
545 changes: 545 additions & 0 deletions CLIENT_INTEGRATION_GUIDE.md

Large diffs are not rendered by default.

170 changes: 127 additions & 43 deletions app/fel/fel.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,44 +13,114 @@ var fel = function (socket, stream, params) {
self.stream = stream;
self.params = params;

logger.info("Initializing FEL job with params: " + JSON.stringify({
id: params.analysis && params.analysis._id,
msaid: params.msa && params.msa._id
}));
// Check if this is a check-only operation
var isCheckOnly = params.checkOnly || false;

logger.info("FEL constructor called with:", {
stream_type: typeof stream,
stream_length: stream ? stream.length : 0,
stream_content: stream ? (stream.length > 100 ? stream.substring(0, 100) + "..." : stream) : "null",
params_keys: Object.keys(params),
params_full: JSON.stringify(params),
checkOnly: isCheckOnly
});

// object specific attributes
self.type = "fel";

// New attributes for multiple hits and site multihit
self.multiple_hits = self.params.analysis.multiple_hits || "None"; // e.g., [Double, Double+Triple, None]
self.site_multihit = self.params.analysis.site_multihit || "Estimate"; // e.g., [Estimate, Global]

// For check operations, we only need minimal initialization
if (isCheckOnly) {
// Set defaults for required fields
self.multiple_hits = params.multiple_hits || "None";
self.site_multihit = params.site_multihit || "Estimate";
self.branches = params.branches || "All";
self.bootstrap = params.bootstrap || false;
self.resample = params.resample || 1;
self.id = "check-" + Date.now();
self.msaid = "check";
self.genetic_code = params.genetic_code || "Universal";
self.rate_variation = "No";
self.ci = "No";
self.fn = __dirname + "/output/" + self.id;
self.output_dir = path.dirname(self.fn);
self.status_fn = self.fn + ".status";
self.results_short_fn = self.fn + ".fel";
self.results_fn = self.fn + ".FEL.json";
self.progress_fn = self.fn + ".fel.progress";
self.tree_fn = self.fn + ".tre";
} else {
// Normal operation with full parameters
var analysisParams = self.params.analysis || self.params;
self.multiple_hits = analysisParams.multiple_hits || "None";
self.site_multihit = analysisParams.site_multihit || "Estimate";
self.branches = analysisParams.branches || "All";

// bootstrap attributes
self.bootstrap = analysisParams.bootstrap || false;
self.resample = analysisParams.resample || 1;

// parameter attributes
if (self.params.msa) {
self.msaid = self.params.msa._id;
self.genetic_code = self.params.msa[0] ? code[self.params.msa[0].gencodeid + 1] : "Universal";
} else {
self.msaid = self.params.msaid || "unknown";
self.genetic_code = self.params.genetic_code || "Universal";
}

if (self.params.analysis) {
self.id = self.params.analysis._id || self.params.id || "unknown-" + Date.now();
self.nwk_tree = self.params.analysis.tagged_nwk_tree || self.params.nwk_tree || self.params.tree;
self.rate_variation = self.params.analysis.ds_variation == 1 ? "Yes" : "No";
self.ci = self.params.analysis.ci == true ? "Yes" : "No";
} else {
self.id = self.params.id || "unknown-" + Date.now();
self.nwk_tree = self.params.nwk_tree || self.params.tree || "";
self.rate_variation = self.params.rate_variation || "No";
self.ci = self.params.ci || "No";
}

// parameter-derived attributes
self.fn = __dirname + "/output/" + self.id;
self.output_dir = path.dirname(self.fn);
self.status_fn = self.fn + ".status";
self.results_short_fn = self.fn + ".fel";
self.results_fn = self.fn + ".FEL.json";
self.progress_fn = self.fn + ".fel.progress";
self.tree_fn = self.fn + ".tre";
}

// Set treemode with default value
self.treemode = self.params.treemode || "0";

self.qsub_script_name = "fel.sh";
self.qsub_script = __dirname + "/" + self.qsub_script_name;

// bootstrap attributes
self.bootstrap = self.params.analysis.bootstrap;
self.resample = self.params.analysis.resample;

// parameter attributes
self.msaid = self.params.msa._id;
self.id = self.params.analysis._id;
self.genetic_code = code[self.params.msa[0].gencodeid + 1];
self.nwk_tree = self.params.analysis.tagged_nwk_tree;
self.rate_variation = self.params.analysis.ds_variation == 1 ? "Yes" : "No";
self.ci = self.params.analysis.ci == true ? "Yes" : "No";

// parameter-derived attributes
self.fn = __dirname + "/output/" + self.id;
self.output_dir = path.dirname(self.fn);
self.status_fn = self.fn + ".status";
self.results_short_fn = self.fn + ".fel";
self.results_fn = self.fn + ".FEL.json";
self.progress_fn = self.fn + ".fel.progress";
self.tree_fn = self.fn + ".tre";

// Define parameters for job submission (different formats for qsub vs slurm)
if (config.submit_type === "slurm") {
// Define parameters for job submission (different formats for qsub vs slurm vs local)
if (config.submit_type === "local") {
// For local execution, the script path must be first
self.qsub_params = [
self.qsub_script,
"fn=" + self.fn,
"tree_fn=" + self.tree_fn,
"sfn=" + self.status_fn,
"pfn=" + self.progress_fn,
"rfn=" + self.results_short_fn,
"treemode=" + self.treemode,
"bootstrap=" + self.bootstrap,
"resample=" + self.resample,
"genetic_code=" + self.genetic_code,
"analysis_type=" + self.type,
"rate_variation=" + self.rate_variation,
"ci=" + self.ci,
"cwd=" + __dirname,
"msaid=" + self.msaid,
"procs=" + (config.fel_procs || 1),
"multiple_hits=" + self.multiple_hits,
"site_multihit=" + self.site_multihit,
"branches=" + self.branches
];
} else if (config.submit_type === "slurm") {
// Convert walltime from PBS format (DD:HH:MM:SS) to SLURM format (HH:MM:SS or minutes)
let slurmTime = "72:00:00"; // Default 3 days
if (config.fel_walltime) {
Expand Down Expand Up @@ -111,7 +181,9 @@ var fel = function (socket, stream, params) {
",multiple_hits=" +
self.multiple_hits +
",site_multihit=" +
self.site_multihit,
self.site_multihit +
",branches=" +
self.branches,
`--output=${self.output_dir}/fel_${self.id}_%j.out`,
`--error=${self.output_dir}/fel_${self.id}_%j.err`,
self.qsub_script
Expand Down Expand Up @@ -158,7 +230,9 @@ var fel = function (socket, stream, params) {
",multiple_hits=" +
self.multiple_hits +
",site_multihit=" +
self.site_multihit,
self.site_multihit +
",branches=" +
self.branches,
"-o",
self.output_dir,
"-e",
Expand All @@ -170,18 +244,28 @@ var fel = function (socket, stream, params) {
// Log the parameters being used
logger.info(`FEL job ${self.id}: Using ${config.submit_type} parameters: ${JSON.stringify(self.qsub_params)}`);

// Write tree to a file
fs.writeFile(self.tree_fn, self.nwk_tree, function (err) {
if (err) throw err;
});
// Skip file operations for check-only mode
if (!isCheckOnly) {
// Write tree to a file
logger.info(`FEL job ${self.id}: Writing tree file to ${self.tree_fn}`, {
tree_content: self.nwk_tree ? (self.nwk_tree.length > 100 ? self.nwk_tree.substring(0, 100) + "..." : self.nwk_tree) : "null"
});
fs.writeFile(self.tree_fn, self.nwk_tree, function (err) {
if (err) {
logger.error(`FEL job ${self.id}: Error writing tree file: ${err.message}`);
throw err;
}
logger.info(`FEL job ${self.id}: Tree file written successfully`);
});

// Ensure output directory exists
logger.info(`FEL job ${self.id}: Ensuring output directory exists at ${self.output_dir}`);
utilities.ensureDirectoryExists(self.output_dir);
// Ensure output directory exists
logger.info(`FEL job ${self.id}: Ensuring output directory exists at ${self.output_dir}`);
utilities.ensureDirectoryExists(self.output_dir);

// Ensure the progress file exists
logger.info(`FEL job ${self.id}: Creating progress file at ${self.progress_fn}`);
fs.openSync(self.progress_fn, "w");
// Ensure the progress file exists
logger.info(`FEL job ${self.id}: Creating progress file at ${self.progress_fn}`);
fs.openSync(self.progress_fn, "w");
}

logger.info(`FEL job ${self.id}: Initializing job`);
self.init();
Expand Down
128 changes: 110 additions & 18 deletions app/fel/fel.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,67 @@
# Set the PATH but skip module loading - system specific
export PATH=/usr/local/bin:$PATH

# Parse command line arguments and set environment variables
# For local execution, parameters are passed as command line arguments like "fn=/path/to/file"
for arg in "$@"; do
case $arg in
fn=*)
fn="${arg#*=}"
;;
tree_fn=*)
tree_fn="${arg#*=}"
;;
sfn=*)
sfn="${arg#*=}"
;;
pfn=*)
pfn="${arg#*=}"
;;
rfn=*)
rfn="${arg#*=}"
;;
treemode=*)
treemode="${arg#*=}"
;;
bootstrap=*)
bootstrap="${arg#*=}"
;;
resample=*)
resample="${arg#*=}"
;;
genetic_code=*)
genetic_code="${arg#*=}"
;;
analysis_type=*)
analysis_type="${arg#*=}"
;;
rate_variation=*)
rate_variation="${arg#*=}"
;;
ci=*)
ci="${arg#*=}"
;;
cwd=*)
cwd="${arg#*=}"
;;
msaid=*)
msaid="${arg#*=}"
;;
procs=*)
procs="${arg#*=}"
;;
multiple_hits=*)
multiple_hits="${arg#*=}"
;;
site_multihit=*)
site_multihit="${arg#*=}"
;;
branches=*)
branches="${arg#*=}"
;;
esac
done

# Try to load modules if they exist, but don't fail if they don't
if [ -f /etc/profile.d/modules.sh ]; then
source /etc/profile.d/modules.sh
Expand Down Expand Up @@ -39,16 +100,40 @@ GENETIC_CODE=$genetic_code
RATE_VARIATION=$rate_variation
MULTIPLE_HITS=$multiple_hits
SITE_MULTIHIT=$site_multihit
BRANCHES=${branches:-"All"}
PROCS=$procs

HYPHY=$CWD/../../.hyphy/HYPHYMPI
# Set HYPHY executable - prefer regular hyphy for local execution
HYPHY_REGULAR=$CWD/../../.hyphy/hyphy
HYPHY_NON_MPI=$CWD/../../.hyphy/HYPHYMP
HYPHY_MPI=$CWD/../../.hyphy/HYPHYMPI

# Check which HYPHY version to use
if [ -z "$SLURM_JOB_ID" ] && [ -f "$HYPHY_REGULAR" ]; then
# Local execution and regular hyphy exists - use it
HYPHY=$HYPHY_REGULAR
echo "Using regular HYPHY for local execution: $HYPHY"
elif [ -z "$SLURM_JOB_ID" ] && [ -f "$HYPHY_NON_MPI" ]; then
# Local execution and non-MPI version exists - use it
HYPHY=$HYPHY_NON_MPI
echo "Using non-MPI HYPHY for local execution: $HYPHY"
elif [ -f "$HYPHY_MPI" ]; then
# Use MPI version (for cluster execution or if others not available)
HYPHY=$HYPHY_MPI
echo "Using MPI HYPHY: $HYPHY"
else
# Fallback - try to find any HYPHY executable
HYPHY=$(which hyphy 2>/dev/null || echo "$CWD/../../.hyphy/hyphy")
echo "Using fallback HYPHY: $HYPHY"
fi

HYPHY_PATH=$CWD/../../.hyphy/res/
FEL=$HYPHY_PATH/TemplateBatchFiles/SelectionAnalyses/FEL.bf
RESULTS_FILE=$fn.FEL.json

export HYPHY_PATH=$HYPHY_PATH

trap 'echo "Error" > $STATUS_FILE; exit 1' ERR
trap 'echo "Error" > "$STATUS_FILE"; exit 1' ERR

# We don't need the MPI_COMMAND variable anymore as we're using direct commands
if [ -n "$SLURM_JOB_ID" ]; then
Expand All @@ -63,8 +148,13 @@ fi
echo "PROCS: $PROCS"
echo "SLURM_JOB_ID: $SLURM_JOB_ID"
echo "slurm_mpi_type: $slurm_mpi_type"
echo "PROGRESS_FILE: '$PROGRESS_FILE'"
echo "STATUS_FILE: '$STATUS_FILE'"
echo "FN: '$FN'"
echo "TREE_FN: '$TREE_FN'"
echo "RESULTS_FILE: '$RESULTS_FILE'"

if [ $BOOTSTRAP = "true" ]
if [ "$BOOTSTRAP" != "false" ] && [ "$BOOTSTRAP" != "0" ] && [ -n "$BOOTSTRAP" ]
then
echo "Running with bootstrap"
if [ -n "$SLURM_JOB_ID" ]; then
Expand All @@ -77,19 +167,20 @@ then
if [ -f "$HYPHY_NON_MPI" ]; then
echo "Using non-MPI HYPHY: $HYPHY_NON_MPI"
export TOLERATE_NUMERICAL_ERRORS=1
echo "$HYPHY_NON_MPI LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches FG --srv $RATE_VARIATION --output $RESULTS_FILE --resample $RESAMPLE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> $PROGRESS_FILE"
$HYPHY_NON_MPI LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches FG --srv $RATE_VARIATION --output $RESULTS_FILE --resample $RESAMPLE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> $PROGRESS_FILE
echo "$HYPHY_NON_MPI LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches $BRANCHES --srv $RATE_VARIATION --output $RESULTS_FILE --resample $RESAMPLE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> "$PROGRESS_FILE""
$HYPHY_NON_MPI LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches $BRANCHES --srv $RATE_VARIATION --output $RESULTS_FILE --resample $RESAMPLE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> "$PROGRESS_FILE"
else
echo "Non-MPI HYPHY not found at $HYPHY_NON_MPI, attempting to use MPI version"
export TOLERATE_NUMERICAL_ERRORS=1
echo "srun --mpi=$MPI_TYPE -n $PROCS $HYPHY LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches FG --srv $RATE_VARIATION --output $RESULTS_FILE --resample $RESAMPLE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> $PROGRESS_FILE"
srun --mpi=$MPI_TYPE -n $PROCS $HYPHY LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches FG --srv $RATE_VARIATION --output $RESULTS_FILE --resample $RESAMPLE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> $PROGRESS_FILE
echo "srun --mpi=$MPI_TYPE -n $PROCS $HYPHY LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches $BRANCHES --srv $RATE_VARIATION --output $RESULTS_FILE --resample $RESAMPLE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> "$PROGRESS_FILE""
srun --mpi=$MPI_TYPE -n $PROCS $HYPHY LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches $BRANCHES --srv $RATE_VARIATION --output $RESULTS_FILE --resample $RESAMPLE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> "$PROGRESS_FILE"
fi
else
# Using mpirun for non-SLURM environments
# For local execution, use the HYPHY executable determined above
echo "Using local HYPHY execution: $HYPHY"
export TOLERATE_NUMERICAL_ERRORS=1
echo "mpirun -np $PROCS $HYPHY LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches FG --srv $RATE_VARIATION --output $RESULTS_FILE --resample $RESAMPLE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> $PROGRESS_FILE"
mpirun -np $PROCS $HYPHY LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches FG --srv $RATE_VARIATION --output $RESULTS_FILE --resample $RESAMPLE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> $PROGRESS_FILE
echo "$HYPHY LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches $BRANCHES --srv $RATE_VARIATION --output $RESULTS_FILE --resample $RESAMPLE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> \"$PROGRESS_FILE\""
$HYPHY LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches $BRANCHES --srv $RATE_VARIATION --output $RESULTS_FILE --resample $RESAMPLE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> "$PROGRESS_FILE"
fi
else
echo "Running without bootstrap"
Expand All @@ -103,20 +194,21 @@ else
if [ -f "$HYPHY_NON_MPI" ]; then
echo "Using non-MPI HYPHY: $HYPHY_NON_MPI"
export TOLERATE_NUMERICAL_ERRORS=1
echo "$HYPHY_NON_MPI LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches All --srv $RATE_VARIATION --output $RESULTS_FILE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> $PROGRESS_FILE"
$HYPHY_NON_MPI LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches All --srv $RATE_VARIATION --output $RESULTS_FILE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> $PROGRESS_FILE
echo "$HYPHY_NON_MPI LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches $BRANCHES --srv $RATE_VARIATION --output $RESULTS_FILE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> "$PROGRESS_FILE""
$HYPHY_NON_MPI LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches $BRANCHES --srv $RATE_VARIATION --output $RESULTS_FILE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> "$PROGRESS_FILE"
else
echo "Non-MPI HYPHY not found at $HYPHY_NON_MPI, attempting to use MPI version"
export TOLERATE_NUMERICAL_ERRORS=1
echo "srun --mpi=$MPI_TYPE -n $PROCS $HYPHY LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches All --srv $RATE_VARIATION --output $RESULTS_FILE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> $PROGRESS_FILE"
srun --mpi=$MPI_TYPE -n $PROCS $HYPHY LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches All --srv $RATE_VARIATION --output $RESULTS_FILE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> $PROGRESS_FILE
echo "srun --mpi=$MPI_TYPE -n $PROCS $HYPHY LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches $BRANCHES --srv $RATE_VARIATION --output $RESULTS_FILE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> "$PROGRESS_FILE""
srun --mpi=$MPI_TYPE -n $PROCS $HYPHY LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches $BRANCHES --srv $RATE_VARIATION --output $RESULTS_FILE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> "$PROGRESS_FILE"
fi
else
# Using mpirun for non-SLURM environments
# For local execution, use the HYPHY executable determined above
echo "Using local HYPHY execution: $HYPHY"
export TOLERATE_NUMERICAL_ERRORS=1
echo "mpirun -np $PROCS $HYPHY LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches All --srv $RATE_VARIATION --output $RESULTS_FILE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> $PROGRESS_FILE"
mpirun -np $PROCS $HYPHY LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches All --srv $RATE_VARIATION --output $RESULTS_FILE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> $PROGRESS_FILE
echo "$HYPHY LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches $BRANCHES --srv $RATE_VARIATION --output $RESULTS_FILE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> \"$PROGRESS_FILE\""
$HYPHY LIBPATH=$HYPHY_PATH $FEL --alignment $FN --tree $TREE_FN --code $GENETIC_CODE --branches $BRANCHES --srv $RATE_VARIATION --output $RESULTS_FILE --ci $CI --multiple-hits $MULTIPLE_HITS --site-multihit $SITE_MULTIHIT >> "$PROGRESS_FILE"
fi
fi

echo "Completed" > $STATUS_FILE
echo "Completed" > "$STATUS_FILE"
Loading