Merge pull request #520 from BU-ISCIII/develop

victor5lm · web-flow · commit dd3cbdbfa729 · 2025-05-13T13:42:08.000+02:00
Develop merge for 2.2.9 release
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,36 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [2.2.9] - 2025-05-13 : https://github.com/BU-ISCIII/buisciii-tools/releases/tag/2.2.9
+
+### Credits
+
+- [Victor Lopez](https://github.com/victor5lm)
+- [Alejandro Bernabeu](https://github.com/Aberdur)
+- [Juan Ledesma](https://github.com/juanledesma78)
+
+### Template fixes and updates
+
+- Updated create_summary_report.sh to properly handle single end reads [#509](https://github.com/BU-ISCIII/buisciii-tools/pull/509).
+- Fix relative path handling in snpeff/snpsift annotation [#509](https://github.com/BU-ISCIII/buisciii-tools/pull/509).
+- Added sed to lablog_bam2fq so that _R1.bam is removed and the variable sample is created properly for those sample ids having several underscores (i.e. EPI_ISL_666)[#490](https://github.com/BU-ISCIII/buisciii-tools/pull/490)
+- Update IRMA 99-stats lablog to raise Error if taxprofiler results are missing [#515](https://github.com/BU-ISCIII/buisciii-tools/pull/515).
+- Added a new lablog to create a .csv file for software versions in IRMA's template [#514](https://github.com/BU-ISCIII/buisciii-tools/pull/514/files).
+- Fixed wrong variable definition in IRMA's 99-stats lablog and added Nextclade's info into viralrecon's create_summary_report.sh script to be added into the mapping_illumina report [#518](https://github.com/BU-ISCIII/buisciii-tools/pull/518).
+- Added virus_sequence variable into IRMA's 99-stats lablog for the creation of the summary stats report [#519](https://github.com/BU-ISCIII/buisciii-tools/pull/519).
+
+### Modules
+
+#### Added enhancements
+
+#### Fixes
+
+#### Changed
+
+#### Removed
+
+### Requirements
+
 ## [2.2.8] - 2025-04-29 : https://github.com/BU-ISCIII/buisciii-tools/releases/tag/2.2.8
 
 ### Credits
@@ -27,6 +57,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Fixed errors in IRMA template and fixed errors in irma2vcf script [#500](https://github.com/BU-ISCIII/buisciii-tools/pull/500)
 - Modified artic bed version in lablog_viralrecon for SARS-CoV-2 analysis [#505](https://github.com/BU-ISCIII/buisciii-tools/pull/505)
 
+
 ### Modules
 
 #### Added enhancements
diff --git a/buisciii/__main__.py b/buisciii/__main__.py
@@ -57,7 +57,7 @@ def run_buisciii():
     )
 
     # stderr.print("[green]                                          `._,._,'\n", highlight=False)
-    __version__ = "2.2.8"
+    __version__ = "2.2.9"
     stderr.print(
         "[grey39]    BU-ISCIII-tools version {}".format(__version__), highlight=False
     )
diff --git a/buisciii/templates/IRMA/ANALYSIS/ANALYSIS01_IRMA/05-nextclade/lablog b/buisciii/templates/IRMA/ANALYSIS/ANALYSIS01_IRMA/05-nextclade/lablog
@@ -31,7 +31,7 @@ echo "
 
           # Run nextclade for all sequences from A_HA.txt or B_HA.txt, in order to determine their clade.
           echo \"Running nextclade for \$subtype_folder with file \$input_file...\"
-          srun --chdir ${scratch_dir} --output logs/NEXTCLADE_RUN_\$(basename \$input_file)%j.log --job-name NEXTCLADE_RUN_\$(basename \$input_file) --partition short_idx --time 2:00:00 singularity exec -B ${scratch_dir}/../../../ /data/ucct/bi/pipelines/singularity-images/nextclade\:3.9.1--h9ee0642_0 nextclade run \"\$input_file\" -d \"\$nextclade_dataset\" -O \"./\$subtype_folder/\" -v &
+          srun --chdir ${scratch_dir} --output logs/NEXTCLADE_RUN_\${subtype_folder}_%j.log --job-name NEXTCLADE_RUN_\${subtype_folder} --partition short_idx --time 2:00:00 singularity exec -B ${scratch_dir}/../../../ /data/ucct/bi/pipelines/singularity-images/nextclade\:3.9.1--h9ee0642_0 nextclade run \"\$input_file\" -d \"\$nextclade_dataset\" -O \"./\$subtype_folder/\" --verbosity info &
       else
           echo \"Warning: Folder \$folder_path does not exist, skipping...\"
       fi
diff --git a/buisciii/templates/IRMA/ANALYSIS/ANALYSIS01_IRMA/06-variant-calling/lablog b/buisciii/templates/IRMA/ANALYSIS/ANALYSIS01_IRMA/06-variant-calling/lablog
@@ -94,12 +94,12 @@ cat ./sample_type_ref.txt | xargs -I {} echo "
   micromamba activate refgenie_v0.12.1
   dataDir=$(refgenie seek snpeff/flu_surveillance_db -c /data/ucct/bi/references/refgenie/genome_config.yaml)
   echo -e \"-----Annotating \$sample_id \${fragment} vcf file-----\"
-  singularity exec -B /data/ucct/bi /data/ucct/bi/pipelines/singularity-images/snpeff:5.2--hdfd78af_1.1 snpEff \
+  singularity exec -B /data/ucct/bi -B ${scratch_dir} /data/ucct/bi/pipelines/singularity-images/snpeff:5.2--hdfd78af_1.1 snpEff \
   -v \
   -dataDir "\$dataDir" \
   -c genome.config \
   "\$ref" \
-  vcf_files/\${sample_id}/\${sample_id}_\${fragment}.vcf > annotated_vcfs/\${sample_id}/\${sample_id}_\${fragment}.snpeff.vcf
+  ${scratch_dir}/vcf_files/\${sample_id}/\${sample_id}_\${fragment}.vcf > annotated_vcfs/\${sample_id}/\${sample_id}_\${fragment}.snpeff.vcf
   echo -e \"-----Finished annotating \$sample_id \${fragment} vcf file-----\"
 " > _03_snpeff.sh
 
@@ -115,11 +115,11 @@ cat ./sample_type_ref.txt | xargs -I {} echo "
   micromamba activate refgenie_v0.12.1
   dataDir=$(refgenie seek snpeff/flu_surveillance_db -c /data/ucct/bi/references/refgenie/genome_config.yaml)
   echo -e \"-----Extracting relevant fields for \$sample_id \${fragment} vcf file-----\"
-  singularity exec -B /data/ucct/bi /data/ucct/bi/pipelines/singularity-images/snpsift\:5.2--hdfd78af_0 SnpSift \
+  singularity exec -B /data/ucct/bi -B ${scratch_dir} /data/ucct/bi/pipelines/singularity-images/snpsift\:5.2--hdfd78af_0 SnpSift \
   extractFields \
   -s "," \
   -e "." \
-  annotated_vcfs/\${sample_id}/\${sample_id}_\${fragment}.snpeff.vcf \
+  ${scratch_dir}/annotated_vcfs/\${sample_id}/\${sample_id}_\${fragment}.snpeff.vcf \
   CHROM \
   POS \
   REF \
diff --git a/buisciii/templates/IRMA/ANALYSIS/ANALYSIS01_IRMA/99-stats/lablog b/buisciii/templates/IRMA/ANALYSIS/ANALYSIS01_IRMA/99-stats/lablog
@@ -72,10 +72,18 @@ echo "
 
 # Create a table with relevant statistics.
 echo "
+  # Verify that Taxprofiler results exist before proceeding
+  TAXPROFILER_DIR=\$(find ../../ -type d -name '*_TAXPROFILER' | head -n 1)
+
+  if [[ -z \"\$TAXPROFILER_DIR\" ]] || ! compgen -G \"\$TAXPROFILER_DIR/kraken2/*/*.kraken2.report.txt\" > /dev/null; then
+      echo \"ERROR: Taxprofiler results not found in path: \$TAXPROFILER_DIR/kraken2/*/*.kraken2.report.txt. Please execute Taxprofiler before running this script.\"
+      exit 1
+  fi
+
   # Activate the conda environment
   eval \"\$(micromamba shell hook --shell bash)\"
   micromamba activate refgenie_v0.12.1
-  HEADER=\"sample\tflu_type\tflu_subtype\tclade\ttotalreads\tqc_filtered_reads\treads_host\t%readshost\treads_virus\t%readsvirus\tunmapped_reads\t%unmappedreads\tmedianDPcoveragevirus\tCoverage>10x(%)\t%Ns10x\tVariantsinconsensusx10\tMissenseVariants\tTotal_Unambiguous_Bases\tTotal_Ns_count\tread_length\tanalysis_date\tcov_HA\tcov_MP\tcov_NA\tcov_NP\tcov_NS\tcov_PA\tcov_PB1\tcov_PB2\t10xcov_HA(%)\t10xcov_MP(%)\t10xcov_NA(%)\t10xcov_NP(%)\t10xcov_NS(%)\t10xcov_PA(%)\t10xcov_PB1(%)\t10xcov_PB2(%)\tperNs_HA\tperNs_MP\tperNs_NA\tperNs_NP\tperNs_NS\tperNs_PA\tperNs_PB1\tperNs_PB2\tvariants_HA\tvariants_MP\tvariants_NA\tvariants_NP\tvariants_NS\tvariants_PA\tvariants_PB1\tvariants_PB2\"
+  HEADER=\"sample\tVirussequence\tflu_type\tflu_subtype\tclade\tclade_assignment_date\tclade_assignment_software_database_version\ttotalreads\tqc_filtered_reads\treads_host\t%readshost\treads_virus\t%readsvirus\tunmapped_reads\t%unmappedreads\tmedianDPcoveragevirus\tCoverage>10x(%)\t%Ns10x\tVariantsinconsensusx10\tMissenseVariants\tTotal_Unambiguous_Bases\tTotal_Ns_count\tread_length\tanalysis_date\tcov_HA\tcov_MP\tcov_NA\tcov_NP\tcov_NS\tcov_PA\tcov_PB1\tcov_PB2\t10xcov_HA(%)\t10xcov_MP(%)\t10xcov_NA(%)\t10xcov_NP(%)\t10xcov_NS(%)\t10xcov_PA(%)\t10xcov_PB1(%)\t10xcov_PB2(%)\tperNs_HA\tperNs_MP\tperNs_NA\tperNs_NP\tperNs_NS\tperNs_PA\tperNs_PB1\tperNs_PB2\tvariants_HA\tvariants_MP\tvariants_NA\tvariants_NP\tvariants_NS\tvariants_PA\tvariants_PB1\tvariants_PB2\"
   echo -e \${HEADER} > summary_stats_\$(date \"+%Y%m%d\").tab
 
   cat ../samples_id.txt | while read in; do
@@ -149,6 +157,7 @@ echo "
     variants_in_consensus=\$(printf \"%s\\n\" \"\${variants_consensus[@]}\" | awk '{sum+=\$1} END{print sum}')
     variants_with_effect=\$(awk -F, -v sample=\"\$in\" '\$1 == sample && \$13 == \"missense_variant\" {count++} END {print count+0}' variants_long_table.csv)
     pc_genome_greater_10x=\$(printf \"%s\\n\" \"\${pc10x[@]}\" | sort -n | awk '{sum+=\$1} END {printf \"%.2f\", (NR ? sum/NR : 0)}')
+    virus_sequence=\$(awk -v sample=\"\${in}\" '\$1 == sample {ref = ref ? ref \",\" \$4 : \$4} END {if (ref) print ref}' ../06-variant-calling/sample_type_ref.txt)
     total_reads=\$(grep \"\\\"total_reads\\\"\" ../02-preprocessing/\${in}/\${in}_fastp.json | head -n1 | cut -d \":\" -f2 | sed \"s/,//g\")
     reads_hostR1=\$(cat ../../*_TAXPROFILER/kraken2/*/\${in}_*.kraken2.report.txt | grep \"Homo sapiens\" | awk '{print \$2}')
     reads_host_x2=\$((reads_hostR1 * 2))
@@ -162,6 +171,8 @@ echo "
     number_unambiguous_bases=\$(awk -F \"\t\" -v id=\"\$in\" '\$1 == id {print \$2}' qc_metrics.tsv)
     number_Ns=\$(awk -F \"\t\" -v id=\"\$in\" '\$1 == id {print \$3}' qc_metrics.tsv)
     clade=\$(awk -F \";\" -v sample=\"\$(echo \$in | sed \"s/-/\\//g\")_HA\" '\$2 == sample {print \$3}' ../05-nextclade/nextclade_combined.csv)
+    clade_assignment_date=\$(cat ../05-nextclade/\${flu_type}*/nextclade.json | awk -F'\"' '/createdAt/ {print \$4}' | cut -d 'T' -f 1 | tr -d '-' | head -n 1)
+    clade_assignment_software_database_version=\$(cat ../05-nextclade/logs/NEXTCLADE_RUN_\${flu_type}_\$(echo \${flu_subtype} | cut -d \"N\" -f 1)*.log | grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2}--[0-9]{2}-[0-9]{2}-[0-9]{2}Z' | head -n 1)
     cov_HA=\${gene_coverage[HA]}
     cov_MP=\${gene_coverage[MP]}
     cov_NA=\${gene_coverage[NA]}
@@ -195,8 +206,11 @@ echo "
     variants_PB1=\${variants[PB1]}
     variants_PB2=\${variants[PB2]}
 
-    echo -e \"\${in}\t\$flu_type\t\$flu_subtype\t\$clade\t\$total_reads\t\$qc_filtered\t\$reads_host_x2\t\$pc_reads_host\t\$reads_virus\t\$pc_reads_virus\t\$unmapped_reads\t\$pc_unmapped\t\$coverage_depth\t\$pc_genome_greater_10x\t\$pc_Ns\t\$variants_in_consensus\t\$variants_with_effect\t\$number_unambiguous_bases\t\$number_Ns\t\$read_length\t\$analysis_date\t\$cov_HA\t\$cov_MP\t\$cov_NA\t\$cov_NP\t\$cov_NS\t\$cov_PA\t\$cov_PB1\t\$cov_PB2\t\$cov10x_HA\t\$cov10x_MP\t\$cov10x_NA\t\$cov10x_NP\t\$cov10x_NS\t\$cov10x_PA\t\$cov10x_PB1\t\$cov10x_PB2\t\$perNs_HA\t\$perNs_MP\t\$perNs_NA\t\$perNs_NP\t\$perNs_NS\t\$perNs_PA\t\$perNs_PB1\t\$perNs_PB2\t\$variants_HA\t\$variants_MP\t\$variants_NA\t\$variants_NP\t\$variants_NS\t\$variants_PA\t\$variants_PB1\t\$variants_PB2\" >> summary_stats_\$(date \"+%Y%m%d\").tab
+    echo -e \"\${in}\t\$virus_sequence\t\$flu_type\t\$flu_subtype\t\$clade\t\$clade_assignment_date\t\$clade_assignment_software_database_version\t\$total_reads\t\$qc_filtered\t\$reads_host_x2\t\$pc_reads_host\t\$reads_virus\t\$pc_reads_virus\t\$unmapped_reads\t\$pc_unmapped\t\$coverage_depth\t\$pc_genome_greater_10x\t\$pc_Ns\t\$variants_in_consensus\t\$variants_with_effect\t\$number_unambiguous_bases\t\$number_Ns\t\$read_length\t\$analysis_date\t\$cov_HA\t\$cov_MP\t\$cov_NA\t\$cov_NP\t\$cov_NS\t\$cov_PA\t\$cov_PB1\t\$cov_PB2\t\$cov10x_HA\t\$cov10x_MP\t\$cov10x_NA\t\$cov10x_NP\t\$cov10x_NS\t\$cov10x_PA\t\$cov10x_PB1\t\$cov10x_PB2\t\$perNs_HA\t\$perNs_MP\t\$perNs_NA\t\$perNs_NP\t\$perNs_NS\t\$perNs_PA\t\$perNs_PB1\t\$perNs_PB2\t\$variants_HA\t\$variants_MP\t\$variants_NA\t\$variants_NP\t\$variants_NS\t\$variants_PA\t\$variants_PB1\t\$variants_PB2\" >> summary_stats_\$(date \"+%Y%m%d\").tab
     echo -e \"-----Statistics for \$in correctly added into summary_stats_\$(date \"+%Y%m%d\").tab-----\n\"
     unset gene_coverage coverages_10x per_Ns variants
   done
-" > _05_create_mapping_stats.sh
+" > _05_create_summary_stats.sh
+
+# Run _05_create_summary_stats.sh
+echo "srun --partition middle_idx --chdir ${scratch_dir} --output logs/SUMMARY_STATS.%j.log --job-name SUMMARY_STATS --time 01:00:00 bash ${scratch_dir}/_05_create_summary_stats.sh &" > _05_run_create_summary_stats.sh
diff --git a/buisciii/templates/IRMA/ANALYSIS/ANALYSIS01_IRMA/lablog_versions b/buisciii/templates/IRMA/ANALYSIS/ANALYSIS01_IRMA/lablog_versions
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+# Create a table with software versions.
+echo "
+    HEADER=\"software_name,software_version\"
+    echo -e \${HEADER} > versions.csv
+
+    IRMA_version=\$(/data/ucct/bi/pipelines/flu-amd/flu-amd-1.2.0/IRMA | grep -o \"v[0-9]\+\.[0-9]\+\.[0-9]\+\" | head -n 1)
+    dehosting_method_software_version=\$(cat ../*_ANALYSIS02_TAXPROFILER/pipeline_info/nf_core_pipeline_software_mqc_versions.yml | grep \"nf-core/taxprofiler:\" | awk '{print \$NF}')
+    preprocessing_software_version=\$(find 02-preprocessing/ -type f -name \"*_fastp.html\" | shuf -n 1 | xargs grep -oP 'fastp version:</td><td[^>]*>\K[0-9.]+' | sed 's/^/v/')
+    clade_assignment_software_version=\$(find ./05-nextclade/ -type f -name \"nextclade.json\" | shuf -n 1 | xargs grep -o '\"nextcladeAlgoVersion\": *\"[^\"]*\"' | awk -F'\"' '{print \"v\"\$4}')
+    mafft_version=\$(find 06-variant-calling/logs/ -type f -name \"ALIGN-*\" | shuf -n 1 | xargs grep -o \"Version [0-9.]\+\" | head -n 1 | cut -d \" \" -f 2 | sed \"s/^/v/g\")
+    snpeff_version=\$(find 06-variant-calling/logs/ -type f -name \"SNPEFF-*\" | shuf -n 1 | xargs grep -o -m 1 \"SnpEff version SnpEff [0-9.]\+\" | head -n 1 | cut -d \" \" -f 4 | sed \"s/^/v/g\")
+    snpsift_version=\$snpeff_version
+
+    declare -A software
+    software=(
+    [\"IRMA (Iterative Refinement Meta-Assembler)\"]=\$IRMA_version
+    [\"nf-core/taxprofiler\"]=\$dehosting_method_software_version
+    [\"fastp\"]=\$preprocessing_software_version
+    [\"nextclade\"]=\$clade_assignment_software_version
+    [\"mafft\"]=\$mafft_version
+    [\"snpEff\"]=\$snpeff_version
+    [\"snpSift\"]=\$snpsift_version)
+
+    for tool in \"\${!software[@]}\"; do
+        echo -e \"\${tool}\t\${software[\$tool]}\" >> versions.csv
+    done
+
+    # Convert tabs to commas for proper CSV format
+    sed -i 's/\t/,/g' versions.csv
+
+" > get_versions_table.sh
diff --git a/buisciii/templates/viralrecon/ANALYSIS/create_summary_report.sh b/buisciii/templates/viralrecon/ANALYSIS/create_summary_report.sh
@@ -6,7 +6,7 @@ USER=$(pwd | cut -d '/' -f7 | cut -d '_' -f4)
 HOST=$(pwd | cut -d '/' -f9 | cut -d '_' -f4 | tr '[:upper:]' '[:lower:]' | sed 's/.*/\u&/')
 
 # Define header for output file
-HEADER="run\tuser\thost\tVirussequence\tsample\ttotalreads\treadshostR1\treadshost\t%readshost\treadsvirus\t%readsvirus\tunmappedreads\t%unmapedreads\tmedianDPcoveragevirus\tCoverage>10x(%)\tVariantsinconsensusx10\tMissenseVariants\t%Ns10x\tLineage\tread_length\tanalysis_date"
+HEADER="run\tuser\thost\tVirussequence\tsample\ttotalreads\treadshostR1\treadshost\t%readshost\treadsvirus\t%readsvirus\tunmappedreads\t%unmappedreads\tmedianDPcoveragevirus\tCoverage>10x(%)\tVariantsinconsensusx10\tMissenseVariants\t%Ns10x\tLineage\tclade_assignment\tclade_assignment_software_database_version\tclade_assignment_date\tread_length\tanalysis_date"
 
 # Print header to output file
 echo -e $HEADER > mapping_illumina_$(date '+%Y%m%d').tab
@@ -21,7 +21,15 @@ do
     total_reads=$(grep 'total_reads' ${arr[1]}*/fastp/${arr[0]}.fastp.json | head -n2 | tail -n1 | cut -d ':' -f2 | sed 's/,//g')
 
     reads_hostR1=$(cat ${arr[1]}*/kraken2/${arr[0]}.kraken2.report.txt | grep -v 'unclassified' | cut -f3 | awk '{s+=$1}END{print s}')
-    reads_host_x2=$(echo $((reads_hostR1 * 2)) )
+
+    if [ -f "../../RAW/${arr[0]}_R2.fastq.gz" ]; then
+        # Paired-end reads
+        reads_host_x2=$(echo $((reads_hostR1 * 2)) )
+    else
+        # Single-end reads
+        reads_host_x2=$reads_hostR1
+    fi
+
     perc_host=$(echo $(awk -v v1=$total_reads -v v2=$reads_host_x2 'BEGIN {print (v2*100)/v1}') )
 
     reads_virus=$(cat ${arr[1]}*/variants/bowtie2/samtools_stats/${arr[0]}.sorted.bam.flagstat | grep '+ 0 mapped' | cut -d ' ' -f1)
@@ -46,6 +54,26 @@ do
 
     analysis_date=$(date '+%Y%m%d')
 
+    clade=$(tail -n +2 */variants/ivar/consensus/bcftools/nextclade/${arr[0]}.csv | cut -d ";" -f 3)
+    clade_assignment_date=$analysis_date
+    clade_assignment_software_database_version=$(cat *_viralrecon.log | grep 'nextclade_dataset_tag' | awk -F ': ' '{print $2}')
+    lineage_analysis_date=$analysis_date
+    lineage_algorithm_software_version=$(cat /data/ucct/bi/references/pangolin/$lineage_analysis_date/*_pangolin.log | grep -oP 'pangolin-data updated to \K[^ ]+')
+
+    # Updating the pangolin csv files
+    temp_file="${arr[0]}_tmp.csv"
+    touch $temp_file
+
+    # Read and update the CSV
+    {
+    IFS= read -r header
+    echo "${header},lineage_assignment_date,lineage_assignment_database_version"
+    IFS= read -r row
+    echo "${row},$lineage_analysis_date,$lineage_algorithm_software_version"
+    } < ./*/variants/ivar/consensus/bcftools/pangolin/${arr[0]}.pangolin.csv > $temp_file
+
+    mv $temp_file ./*/variants/ivar/consensus/bcftools/pangolin/${arr[0]}.pangolin.csv
+
     # Introduce data row into output file
-    echo -e "${RUN}\t${USER}\t${HOST}\t${arr[1]}\t${arr[0]}\t$total_reads\t$reads_hostR1\t$reads_host_x2\t$perc_host\t$reads_virus\t$reads_virus_perc\t$unmapped_reads\t$perc_unmapped\t$medianDPcov\t$cov10x\t$vars_in_cons10x\t$missense\t$ns_10x_perc\t$lineage\t$read_length\t$analysis_date" >> mapping_illumina_$(date '+%Y%m%d').tab
-done
+    echo -e "${RUN}\t${USER}\t${HOST}\t${arr[1]}\t${arr[0]}\t$total_reads\t$reads_hostR1\t$reads_host_x2\t$perc_host\t$reads_virus\t$reads_virus_perc\t$unmapped_reads\t$perc_unmapped\t$medianDPcov\t$cov10x\t$vars_in_cons10x\t$missense\t$ns_10x_perc\t$lineage\t$clade\t$clade_assignment_software_database_version\t$clade_assignment_date\t$read_length\t$analysis_date" >> mapping_illumina_$(date '+%Y%m%d').tab
+done
diff --git a/buisciii/templates/viralrecon/RAW/lablog_bam2fq b/buisciii/templates/viralrecon/RAW/lablog_bam2fq
@@ -11,7 +11,7 @@ fi
 
 find . -maxdepth 1 -type f -name "*.bam" | while read -r filepath; do
     filename=$(basename "$filepath")  
-    sample=$(echo "$filename" | cut -d '_' -f1)  
+    sample=$(echo "$filename"| sed -E 's/_R[12]\.bam//g')  
 
     output_log="logs/BAM2FQ.${sample}.%j.log"  
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "buisciii-tools"
-version = "2.2.8"
+version = "2.2.9"
 dynamic = ["dependencies"]
 
 authors = [

Original file line number	Diff line number	Diff line change
`@@ -57,7 +57,7 @@ def run_buisciii():`
`57`	`57`	`)`
`58`	`58`
`59`	`59`	# stderr.print("[green] `._,._,'\n", highlight=False)
`60`		`- __version__ = "2.2.8"`
	`60`	`+ __version__ = "2.2.9"`
`61`	`61`	`stderr.print(`
`62`	`62`	`"[grey39] BU-ISCIII-tools version {}".format(__version__), highlight=False`
`63`	`63`	`)`