Merge pull request #566 from BU-ISCIII/develop

victor5lm · web-flow · commit a0ded9e8f762 · 2025-07-18T12:17:10.000+02:00
Develop merge for 2.2.12 release
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,32 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [2.2.12] - 2025-07-18 : https://github.com/BU-ISCIII/buisciii-tools/releases/tag/2.2.12
+
+### Credits
+
+- [Victor Lopez](https://github.com/victor5lm)
+
+### Template fixes and updates
+
+- Fixed IRMA's 99-stats lablog to take host reads from samtools stats instead of kraken [#564](https://github.com/BU-ISCIII/buisciii-tools/pull/564).
+- Fixed sgene_metrics.sh to handle warnings properly [#565](https://github.com/BU-ISCIII/buisciii-tools/pull/565).
+
+### Modules
+
+- Fixed finish module so that the clean module is run correctly [#564](https://github.com/BU-ISCIII/buisciii-tools/pull/564).
+- Fixed bioinfo_doc module so that a text file can properly be used for email notes [#564](https://github.com/BU-ISCIII/buisciii-tools/pull/564).
+
+#### Added enhancements
+
+#### Fixes
+
+#### Changed
+
+#### Removed
+
+### Requirements
+
 ## [2.2.11] - 2025-07-11 : https://github.com/BU-ISCIII/buisciii-tools/releases/tag/2.2.11
 
 ### Credits
diff --git a/buisciii/__main__.py b/buisciii/__main__.py
@@ -57,7 +57,7 @@ def run_buisciii():
     )
 
     # stderr.print("[green]                                          `._,._,'\n", highlight=False)
-    __version__ = "2.2.11"
+    __version__ = "2.2.12"
     stderr.print(
         "[grey39]    BU-ISCIII-tools version {}".format(__version__), highlight=False
     )
@@ -419,10 +419,15 @@ def finish(ctx, resolution, path, ask_path, sftp_folder, tmp_dir):
     """
     Service cleaning, remove big files, rename folders before copy and copy resolution FOLDER to sftp.
     """
-    print("Starting cleaning scratch directory: " + tmp_dir)
+
+    clean_tmp_dir = tmp_dir
+    if tmp_dir == "/scratch/bi/":
+        clean_tmp_dir = "/data/ucct/bi/scratch_tmp/bi"
+
+    print("Starting cleaning scratch directory: " + clean_tmp_dir)
     clean_scratch = buisciii.clean.CleanUp(
         resolution,
-        tmp_dir,
+        clean_tmp_dir,
         ask_path,
         "clean",
         ctx.obj["api_user"],
diff --git a/buisciii/bioinfo_doc.py b/buisciii/bioinfo_doc.py
@@ -256,7 +256,7 @@ def create_structure(self):
         if os.path.exists(self.service_folder):
             log.info("Already creted the service folder for %s", self.service_folder)
             stderr.print(
-                "[green] Skiping folder creation for service "
+                "[green] Skipping folder creation for service "
                 + self.service_folder
                 + ". Trying with subfolders"
             )
@@ -267,7 +267,7 @@ def create_structure(self):
                         self.service_folder,
                     )
                     stderr.print(
-                        "[green] Skiping folder creation for service "
+                        "[green] Skipping folder creation for service "
                         + self.service_folder
                         + "/"
                         + folder
@@ -547,7 +547,7 @@ def copy_images(self):
             "images",
         )
         if not os.path.exists(file_path):
-            stderr.print("[green] Coping images folder temporarylly to " + file_path)
+            stderr.print("[green] Copying images folder temporarily to " + file_path)
             images_folder = os.path.join(
                 os.path.dirname(os.path.realpath(__file__)), "assets/reports/md/images"
             )
@@ -678,11 +678,13 @@ def email_creation(self):
                             stderr.print(
                                 "No more attempts. Email notes will be given by prompt"
                             )
-                            email_data["email_notes"] = (
-                                buisciii.utils.ask_for_some_text(
-                                    msg="Write email notes"
-                                ).replace("\n", "<br />")
-                            )
+                            email_data["email_notes"] = None
+                    else:
+                        email_data["email_notes"] = None
+
+                    if email_data["email_notes"]:
+                        with open(os.path.expanduser(email_data["email_notes"])) as f:
+                            email_data["email_notes"] = f.read().replace("\n", "<br />")
                     else:
                         email_data["email_notes"] = buisciii.utils.ask_for_some_text(
                             msg="Write email notes"
diff --git a/buisciii/templates/IRMA/ANALYSIS/ANALYSIS01_IRMA/99-stats/lablog b/buisciii/templates/IRMA/ANALYSIS/ANALYSIS01_IRMA/99-stats/lablog
@@ -159,12 +159,11 @@ echo "
     pc_genome_greater_10x=\$(printf \"%s\\n\" \"\${pc10x[@]}\" | sort -n | awk '{sum+=\$1} END {printf \"%.2f\", (NR ? sum/NR : 0)}')
     virus_sequence=\$(awk -v sample=\"\${in}\" '\$1 == sample {ref = ref ? ref \",\" \$4 : \$4} END {if (ref) print ref}' ../06-variant-calling/sample_type_ref.txt)
     total_reads=\$(grep \"\\\"total_reads\\\"\" ../02-preprocessing/\${in}/\${in}_fastp.json | head -n1 | cut -d \":\" -f2 | sed \"s/,//g\")
-    reads_hostR1=\$(cat ../../*_TAXPROFILER/kraken2/*/\${in}_*.kraken2.report.txt | grep \"Homo sapiens\" | awk '{print \$2}')
-    reads_host_x2=\$((reads_hostR1 * 2))
-    pc_reads_host=\$(awk -v v1=\$total_reads -v v2=\$reads_host_x2 'BEGIN {printf \"%.2f\", (v2*100)/v1}')
+    reads_host=\$(awk -v sample=\"\${in}_run1\" '\$1 == sample {print int(\$2)}' ../../*_TAXPROFILER/multiqc/multiqc_data/samtools_alignment_plot.txt)
+    pc_reads_host=\$(awk -v v1=\$total_reads -v v2=\$reads_host 'BEGIN {printf \"%.2f\", (v2*100)/v1}')
     reads_virus=\$(awk -F\"\t\" -v id=\"\$in\" '\$1 == id {print \$3}' ../04-irma/irma_stats_flu.txt)
     pc_reads_virus=\$(awk -v v1=\$reads_virus -v v2=\$total_reads 'BEGIN {if (v2 > 0) printf \"%.2f\", (v1 / v2) * 100; else print \"NA\"}')
-    unmapped_reads=\$((total_reads - (reads_host_x2+reads_virus)))
+    unmapped_reads=\$((total_reads - (reads_host+reads_virus)))
     pc_unmapped=\$(awk -v v1=\$total_reads -v v2=\$unmapped_reads  'BEGIN {printf \"%.2f\", (v2/v1)*100}')
     qc_filtered=\$(grep \"\\\"total_reads\\\"\" ../02-preprocessing/\${in}/\${in}_fastp.json | head -n2 | tail -n1 | cut -d \":\" -f2 | sed \"s/,//g\")
     read_length=\$(unzip -p ../03-procQC/\${in}/\${in}_R1_filtered_fastqc.zip */fastqc_data.txt | grep \"Sequence length\" | cut -d \"-\" -f2)
@@ -206,7 +205,7 @@ echo "
     variants_PB1=\${variants[PB1]}
     variants_PB2=\${variants[PB2]}
 
-    echo -e \"\${in}\t\$virus_sequence\t\$flu_type\t\$flu_subtype\t\$clade\t\$clade_assignment_date\t\$clade_assignment_software_database_version\t\$total_reads\t\$qc_filtered\t\$reads_host_x2\t\$pc_reads_host\t\$reads_virus\t\$pc_reads_virus\t\$unmapped_reads\t\$pc_unmapped\t\$coverage_depth\t\$pc_genome_greater_10x\t\$pc_Ns\t\$variants_in_consensus\t\$variants_with_effect\t\$number_unambiguous_bases\t\$number_Ns\t\$read_length\t\$analysis_date\t\$cov_HA\t\$cov_MP\t\$cov_NA\t\$cov_NP\t\$cov_NS\t\$cov_PA\t\$cov_PB1\t\$cov_PB2\t\$cov10x_HA\t\$cov10x_MP\t\$cov10x_NA\t\$cov10x_NP\t\$cov10x_NS\t\$cov10x_PA\t\$cov10x_PB1\t\$cov10x_PB2\t\$perNs_HA\t\$perNs_MP\t\$perNs_NA\t\$perNs_NP\t\$perNs_NS\t\$perNs_PA\t\$perNs_PB1\t\$perNs_PB2\t\$variants_HA\t\$variants_MP\t\$variants_NA\t\$variants_NP\t\$variants_NS\t\$variants_PA\t\$variants_PB1\t\$variants_PB2\" >> summary_stats_\$(date \"+%Y%m%d\").tab
+    echo -e \"\${in}\t\$virus_sequence\t\$flu_type\t\$flu_subtype\t\$clade\t\$clade_assignment_date\t\$clade_assignment_software_database_version\t\$total_reads\t\$qc_filtered\t\$reads_host\t\$pc_reads_host\t\$reads_virus\t\$pc_reads_virus\t\$unmapped_reads\t\$pc_unmapped\t\$coverage_depth\t\$pc_genome_greater_10x\t\$pc_Ns\t\$variants_in_consensus\t\$variants_with_effect\t\$number_unambiguous_bases\t\$number_Ns\t\$read_length\t\$analysis_date\t\$cov_HA\t\$cov_MP\t\$cov_NA\t\$cov_NP\t\$cov_NS\t\$cov_PA\t\$cov_PB1\t\$cov_PB2\t\$cov10x_HA\t\$cov10x_MP\t\$cov10x_NA\t\$cov10x_NP\t\$cov10x_NS\t\$cov10x_PA\t\$cov10x_PB1\t\$cov10x_PB2\t\$perNs_HA\t\$perNs_MP\t\$perNs_NA\t\$perNs_NP\t\$perNs_NS\t\$perNs_PA\t\$perNs_PB1\t\$perNs_PB2\t\$variants_HA\t\$variants_MP\t\$variants_NA\t\$variants_NP\t\$variants_NS\t\$variants_PA\t\$variants_PB1\t\$variants_PB2\" >> summary_stats_\$(date \"+%Y%m%d\").tab
     echo -e \"-----Statistics for \$in correctly added into summary_stats_\$(date \"+%Y%m%d\").tab-----\n\"
     unset gene_coverage coverages_10x per_Ns variants
   done
diff --git a/buisciii/templates/viralrecon/ANALYSIS/sgene_metrics.sh b/buisciii/templates/viralrecon/ANALYSIS/sgene_metrics.sh
@@ -1,5 +1,20 @@
 #!/bin/bash
 
+# Activate the micromamba environment
+eval "$(micromamba shell hook --shell bash)"
+micromamba activate outbreakinfo
+
+# Ensure required tools are available
+for tool in blastn samtools bcftools; do
+  if ! command -v "$tool" >/dev/null 2>&1; then
+    echo "Error: $tool is not available. Make sure you have activated the corresponding micromamba environment. Aborting." >&2
+    exit 1
+  fi
+done
+
+# Initialize warning counter
+warning_count=0
+
 # Define directories for variant analysis
 input_fasta_dir=$(echo ./*_viralrecon_mapping/variants/ivar/consensus/bcftools)
 input_bam_dir=$(echo ./*_viralrecon_mapping/variants/bowtie2)
@@ -24,14 +39,16 @@ for fasta_file in $input_fasta_dir/*.consensus.fa; do
   
   ref_genome=${ref_map["$sample_name"]}
   if [ -z "$ref_genome" ]; then
-    echo "Warning: No reference found for sample $sample_name" >&2
+    echo "Warning: No reference found for sample $sample_name!" >&2
+    ((warning_count++))
     continue
   fi
 
   # Run BLASTn to get S-gene coordinates in the consensus sequence
   blast_output=$(blastn -query "$SGENE_OUTPUT" -subject "$fasta_file" -outfmt "6 sstart send" | sort -k2,2nr | head -1)
   if [ -z "$blast_output" ]; then
-    echo "Error: No BLAST hit found for sample $sample_name" >&2
+    echo "Warning: No BLAST hit found for sample $sample_name!" >&2
+    ((warning_count++))
     continue
   fi
 
@@ -68,11 +85,15 @@ for fasta_file in $input_fasta_dir/*.consensus.fa; do
   coverage_percentage=$(printf "%.2f" "$coverage_percentage")
 
   # Count S-Gene Frameshifts
-  vcf_file="$input_vcf_dir/consensus/${sample_name}.vcf.gz"
+  vcf_file="$input_vcf_dir/consensus/bcftools/${sample_name}.vcf.gz"
   indels=$(bcftools view -r "$sample_name:$s_gene_start-$s_gene_end" -i 'TYPE="indel"' "$vcf_file" 2>/dev/null | wc -l)
 
   # Save results to output file
   echo -e "$sample_name\t$ambiguous_percentage\t$coverage_percentage\t$indels\t$total_unambiguous_count\t$total_ns_count" >> $output_file
 done
 
-echo "Process completed. File generated: $output_file"
+if [[ "$warning_count" -eq 0 ]]; then
+  echo "✅ Process completed successfully. File generated: $output_file"
+else
+  echo "⚠️ Process completed with $warning_count warning(s). Check logs above. File generated: $output_file"
+fi
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "buisciii-tools"
-version = "2.2.11"
+version = "2.2.12"
 dynamic = ["dependencies"]
 
 authors = [