Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hotfix for seqc_mama primary bash script #2448

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 20 additions & 7 deletions src/analysis/persephone/SeqC_pipeline/BASH_seqc_mama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Title: mama script: take run commands, est db link, run stepgen
# Program by: Wiley Barton - 2022.02.27
# Modified for conda/docker pipeline - 2024.02.22
# last update - 2025.01.30
# last update - 2025.02.25
# Modified code sources:
# https://stackoverflow.com/questions/2043453/executing-multi-line-statements-in-the-one-line-command-line
# Notes: generate bash files according to user input for the completion of pipeline
Expand Down Expand Up @@ -36,6 +36,7 @@
# flesh out func_demo to build complete demo run within /DB/DEPO_demo
# auto compile file list from input dir in absence of provided list
# expand splash to include system params: cpu, mem, du of key directories
# implement pv for progress bar... tar -I pigz -xvf stuff.tar.gz | pv
#refs
# prodigal:https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-119#citeas
# https://www.cyberciti.biz/tips/bash-shell-parameter-substitution-2.html
Expand All @@ -53,6 +54,8 @@ var_rep_len=${#var_rep_ary[@]}
#size check -db
v_vol_real=$(du -bs $v_dir_db | cut -f1 )
v_vol_none=60000
#system max cpu
v_sys_mem=$(nproc)
v_scrp_check=0
#var_rep_ary[var_rep_len]='x/y/z/cat'*'.kitty'
#var_rep_out=${var_rep_ary[@]}
Expand Down Expand Up @@ -1518,6 +1521,7 @@ if (( ${vopt_log} ));then
fi
printf 'Input file type: %s\n%s\n' "${vopt_file_type}" "${v_logblock1}" >> ${v_logfile}
# Check zip status - CRIT - extend to .tar files in dir
# pot. just exclude .tar
vt_L=$( printf '%s\n' "${v_file_in_good[@]}" | grep -E "\.gz|gzip$" | wc -l )
vt_R='0'
if [[ ${vt_L} -gt ${vt_R} ]];then
Expand Down Expand Up @@ -1724,12 +1728,21 @@ for istep in "${vopt_step[@]}";do
vin_head=''
vin_tail=''
fi
# proc check - ensure that too many processes are not called
if [[ -z ${venv_proc_max} ]];then
venv_proc_max=$(( ${venv_cpu_max} / 2 ))
fi
if [[ $(printf %.0f $(echo "${venv_cpu_max} * ${venv_proc_max}" | bc -l)) -gt "${v_sys_mem}" ]];then
vt="${venv_proc_max}"
while [[ $(printf %.0f $(echo "${venv_cpu_max} * ${vt}" | bc -l)) -gt "${v_sys_mem}" ]];do
((vt--))
#printf 'in%s\n' "${vt}"
done
#printf 'out%s\n' "${vt}"
venv_proc_max="${vt}"
fi
if [[ ${vopt_com_log} -eq 0 ]];then
#insert as awk print block - ex: vopt_com='{ printf "--output-format tsv --output-basename %s",va_nID }'
#improve with adjustment to :--processes <1>
if [[ -z ${venv_proc_max} ]];then
venv_proc_max=$(( ${venv_cpu_max} / 2 ))
fi
vin_com='{ printf " --remove-intermediate-output --reference-db %s --threads %s --processes %s --max-memory %sg --trimmomatic /opt/conda/envs/env_s1_kneaddata/share/trimmomatic --reorder","'${vin_host_rm}'",'${venv_cpu_max}','${venv_proc_max}','${venv_mem_max}' }'
fi
#WORKING EX
Expand Down Expand Up @@ -1986,7 +1999,7 @@ for istep in "${vopt_step[@]}";do
v_k2mpa_out=${v_kjoin_out/out/mpa_out_RA}
cat ${v_kjoin_out/out/mpa_out} | cut --fields 2${v_bk_col_frac} > "${v_k2mpa_out}"
#drop col type for match with meta data
sed -i "1 s/_$v_match//" "${v_k2mpa_out}"
sed -i "1,1s/_$v_match//g" "${v_k2mpa_out}"
#counts
v_match='num'
v_bk_cols=( $( head ${v_kjoin_out/out/mpa_out} -n 1 ) )
Expand All @@ -2006,7 +2019,7 @@ for istep in "${vopt_step[@]}";do
v_k2mpa_out=${v_kjoin_out/out/mpa_out_RC}
cat ${v_kjoin_out/out/mpa_out} | cut --fields 2${v_bk_col_num} > "${v_k2mpa_out}"
#drop col type for match with meta data
sed -i "1 s/_$v_match//" "${v_k2mpa_out}"
sed -i "1,1s/_$v_match//g" "${v_k2mpa_out}"
#default mars input is now "${v_k2mpa_out}" = /DB/DEPO_proc/step2_kraken/KB_S_mpa_out_RC.txt
vout_sX=${vout_s2}
# step keep-clean
Expand Down