Skip to content

Commit e23816b

Browse files
committed
refactor/remove
1 parent d921e8e commit e23816b

File tree

1 file changed

+23
-39
lines changed

1 file changed

+23
-39
lines changed

tasks/skylab/StarAlign.wdl

Lines changed: 23 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -355,19 +355,17 @@ task STARsoloFastq {
355355
samtools reheader header.txt Aligned.sortedByCoord.out.bam > Aligned.sortedByCoord.out.reheader.bam
356356
357357
echo "UMI LEN " $UMILen
358+
touch barcodes_sn_rna.tsv features_sn_rna.tsv matrix_sn_rna.mtx CellReads_sn_rna.stats Features_sn_rna.stats Summary_sn_rna.csv UMIperCellSorted_sn_rna.txt
358359
359360
###########################################################################
360361
# SAVE OUTPUT FILES
361362
###########################################################################
362-
touch barcodes_sn_rna.tsv features_sn_rna.tsv matrix_sn_rna.mtx CellReads_sn_rna.stats Features_sn_rna.stats Summary_sn_rna.csv UMIperCellSorted_sn_rna.txt
363-
364363
# Function to move .mtx files to /cromwell_root/
365364
move_mtx_files() {
366365
local directory=$1
367366
echo "Processing $directory"
368-
find "$directory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'echo Moving {}; mv {} /cromwell_root/'
367+
find "${directory}/raw" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'echo Moving {}; mv {} /cromwell_root/'
369368
}
370-
371369
# Function to move and rename common files
372370
move_common_files() {
373371
local src_dir=$1
@@ -395,7 +393,6 @@ task STARsoloFastq {
395393
echo "Warning: Missing file in $src_dir or $src_dir/raw: $file"
396394
fi
397395
done
398-
399396
}
400397
401398
if [[ "~{counting_mode}" == "sc_rna" ]]
@@ -406,9 +403,6 @@ task STARsoloFastq {
406403
move_mtx_files "$SoloDirectory"
407404
move_common_files "$SoloDirectory" ""
408405
409-
echo "Listing the files in the current directory:"
410-
ls -l
411-
412406
elif [[ "~{counting_mode}" == "sn_rna" ]]
413407
then
414408
SoloDirectory="Solo.out/GeneFull_Ex50pAS"
@@ -418,7 +412,6 @@ task STARsoloFastq {
418412
# Additional processing for sn_rna with exon counting
419413
SoloDirectory2="Solo.out/Gene"
420414
find "$SoloDirectory2" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; echo Renaming {}; mv {} "/cromwell_root/$new_name"'
421-
422415
move_common_files "$SoloDirectory2" "sn_rna_" # Add snRNA prefix for renaming
423416
fi
424417
@@ -430,23 +423,19 @@ task STARsoloFastq {
430423
# List the final directory contents
431424
echo "Final directory listing:"
432425
ls -l
433-
# Rename BAM file
434-
mv Aligned.sortedByCoord.out.reheader.bam ~{input_id}.bam
435-
###########################################################################
426+
mv Aligned.sortedByCoord.out.bam ~{output_bam_basename}.bam
436427
437428
###########################################################################
438-
# FROM MERGED BAM TASK
429+
# FROM MERGE STAR OUTPUT TASK
439430
###########################################################################
440-
INPUT_ID="~{input_id}"
441-
442431
# Function to process a matrix (regular or snRNA)
443432
process_matrix() {
444433
local MATRIX_NAME=$1 # matrix or matrix_sn_rna
445434
local BARCODE_FILE=$2
446435
local FEATURE_FILE=$3
447436
local MATRIX_FILE=$4
448437
local OUTPUT_DIR=$5
449-
local FILTERED_TAR="${INPUT_ID}_filtered_${MATRIX_NAME}_mtx_files.tar"
438+
local FILTERED_TAR="~{input_id}_filtered_${MATRIX_NAME}_mtx_files.tar"
450439
451440
echo "Processing $MATRIX_NAME data..."
452441
@@ -457,7 +446,7 @@ task STARsoloFastq {
457446
cp $FEATURE_FILE ./$MATRIX_NAME/features.tsv
458447
459448
# Compress matrix files
460-
tar -zcvf ${INPUT_ID}_${MATRIX_NAME}.mtx_files.tar -C ./$MATRIX_NAME .
449+
tar -zcvf ~{input_id}_${MATRIX_NAME}.mtx_files.tar -C ./$MATRIX_NAME .
461450
462451
# Run STAR soloCellFiltering
463452
STAR --runMode soloCellFiltering ./$MATRIX_NAME $OUTPUT_DIR --soloCellFilter EmptyDrops_CR
@@ -471,35 +460,23 @@ task STARsoloFastq {
471460
--barcodes $BARCODE_FILE \
472461
--features $FEATURE_FILE \
473462
--matrix $MATRIX_FILE \
474-
--input_id $INPUT_ID
463+
--input_id ~{input_id}
475464
476-
# Tar up filtered matrix files
465+
# Tar up filtered matrix files -- may need to be changed?
477466
echo "Tarring up filtered $MATRIX_NAME matrix files"
478467
tar -cvf $FILTERED_TAR outputbarcodes.tsv outputfeatures.tsv outputmatrix.mtx
479468
echo "Done processing $MATRIX_NAME"
480-
}
481-
482-
mkdir matrix
483-
cp matrix.mtx ./matrix/matrix.mtx && cp barcodes.tsv ./matrix/barcodes.tsv && cp features.tsv ./matrix/features.tsv
484-
tar -zcvf ~{input_id}.mtx_files.tar ./matrix/*
485-
STAR --runMode soloCellFiltering ./matrix ./output --soloCellFilter EmptyDrops_CR
469+
}
486470
487-
#list files
488-
echo "listing files"
489-
ls
490-
491-
# create the compressed raw count matrix with the counts, gene names and the barcodes
492-
python3 /scripts/scripts/create-merged-npz-output.py \
493-
--barcodes barcodes.tsv \
494-
--features features.tsv \
495-
--matrix matrix.mtx \
496-
--input_id ~{input_id}
471+
# Process main matrix
472+
process_matrix "matrix" "barcodes.tsv" "features.tsv" "matrix.mtx" "./output"
497473
498-
# tar up filtered matrix outputbarcodes.tsv, outputfeatures.tsv, outputmatrix.mtx
499-
echo "Tarring up filtered matrix files"
500-
tar -cvf ~{input_id}_filtered_mtx_files.tar outputbarcodes.tsv outputfeatures.tsv outputmatrix.mtx
501-
echo "Done"
474+
# Process snRNA matrix only if files exist
475+
if [ -s "barcodes_sn_rna.tsv" ]; then
476+
process_matrix "matrix_sn_rna" "barcodes_sn_rna.tsv" "features_sn_rna.tsv" "matrix_sn_rna.mtx" "./outputsnrna"
477+
fi
502478
479+
ls -lR
503480
>>>
504481

505482
runtime {
@@ -534,6 +511,13 @@ task STARsoloFastq {
534511
File? multimappers_Uniform_matrix = "UniqueAndMult-Uniform.mtx"
535512
File? multimappers_Rescue_matrix = "UniqueAndMult-Rescue.mtx"
536513
File? multimappers_PropUnique_matrix = "UniqueAndMult-PropUnique.mtx"
514+
# Output files for previous merging STAR output step
515+
File row_index = "~{input_id}_sparse_counts_row_index.npy"
516+
File col_index = "~{input_id}_sparse_counts_col_index.npy"
517+
File sparse_counts = "~{input_id}_sparse_counts.npz"
518+
File? library_metrics="~{input_id}_library_metrics.csv"
519+
File? mtx_files ="~{input_id}.mtx_files.tar"
520+
File? filtered_mtx_files = "~{input_id}_filtered_mtx_files.tar"
537521
}
538522
}
539523

0 commit comments

Comments
 (0)