@@ -71,12 +71,15 @@ import htsjdk.samtools.{ReservedTagConstants, SAMFileHeader, SAMReadGroupRecord}
71
71
|For more information on read structures see the
72
72
|[Read Structure Wiki Page](https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures)
73
73
|
74
- |UMIs may be extracted from the read sequences, the read names, or both. If `--extract-umis-from-read-names` is
74
+ |UMIs may be extracted from the read sequences, the read names (or comment) , or both. If `--extract-umis-from-read-names` is
75
75
|specified, any UMIs present in the read names are extracted; read names are expected to be `:`-separated with
76
76
|any UMIs present in the 8th field. If this option is specified, the `--umi-qual-tag` option may not be used as
77
77
|qualities are not available for UMIs in the read name. If UMI segments are present in the read structures those
78
78
|will also be extracted. If UMIs are present in both, the final UMIs are constructed by first taking the UMIs
79
- |from the read names, then adding a hyphen, then the UMIs extracted from the reads.
79
+ |from the read names, then adding a hyphen, then the UMIs extracted from the reads. If `--extract-umis-from-read-comment` is
80
+ |specified, any UMIs present in the read name comments are extracted; the read name comment is the text _after_
81
+ |the first white space in the read name (like a FASTA). If the comment is `:`-separated, then the UMI will be
82
+ |extracted from the last field, otherwise the full comment will be used.
80
83
|
81
84
|The same number of input files and read structures must be provided, with one exception: if supplying exactly
82
85
|1 or 2 fastq files, both of which are solely template reads, no read structures need be provided.
@@ -93,7 +96,10 @@ class FastqToBam
93
96
@ arg(flag= 'u' , doc= " Tag in which to store molecular barcodes/UMIs." ) val umiTag : String = ConsensusTags .UmiBases ,
94
97
@ arg(flag= 'q' , doc= " Tag in which to store molecular barcode/UMI qualities." ) val umiQualTag : Option [String ] = None ,
95
98
@ arg(flag= 'Q' , doc= " Store the sample barcode qualities in the QT Tag." ) val storeSampleBarcodeQualities : Boolean = false ,
96
- @ arg(flag= 'n' , doc= " Extract UMI(s) from read names and prepend to UMIs from reads." ) val extractUmisFromReadNames : Boolean = false ,
99
+ @ arg(flag= 'n' , doc= " Extract UMI(s) from read names and prepend to UMIs from reads." , mutex= Array (" extractUmisFromReadComment" ))
100
+ val extractUmisFromReadNames : Boolean = false ,
101
+ @ arg(flag= 'c' , doc= " Extract UMI(s) from read name comment and prepend to UMIs from reads." , mutex= Array (" extractUmisFromReadNames" ))
102
+ val extractUmisFromReadComment : Boolean = false ,
97
103
@ arg( doc= " Read group ID to use in the file header." ) val readGroupId : String = " A" ,
98
104
@ arg( doc= " The name of the sequenced sample." ) val sample : String ,
99
105
@ arg( doc= " The name/ID of the sequenced library." ) val library : String ,
@@ -117,6 +123,7 @@ class FastqToBam
117
123
validate(input.length == actualReadStructures.length, " input and read-structure must be supplied the same number of times." )
118
124
validate(1 to 2 contains actualReadStructures.flatMap(_.templateSegments).size, " read structures must contain 1-2 template reads total." )
119
125
validate(! extractUmisFromReadNames || umiQualTag.isEmpty, " Cannot extract UMI qualities when also extracting UMI from read names." )
126
+ validate(! extractUmisFromReadComment || umiQualTag.isEmpty, " Cannot extract UMI qualities when also extracting UMI from read description." )
120
127
121
128
override def execute (): Unit = {
122
129
val encoding = qualityEncoding
@@ -166,7 +173,11 @@ class FastqToBam
166
173
val templates = subs.iterator.filter(_.kind == Template ).toList
167
174
168
175
// If requested, pull out the UMI(s) from the read name
169
- val umiFromReadName = if (extractUmisFromReadNames) Umis .extractUmisFromReadName(fqs.head.name, strict= true ) else None
176
+ val umiFromReadName = {
177
+ if (extractUmisFromReadNames) Umis .extractUmisFromReadName(fqs.head.name, strict= true )
178
+ else if (extractUmisFromReadComment) fqs.head.comment.flatMap(comment => Umis .extractUmisFromReadComment(comment, strict= true ))
179
+ else None
180
+ }
170
181
171
182
templates.zipWithIndex.map { case (read, index) =>
172
183
// If the template read had no bases, we'll substitute in a single N @ Q2 below to keep htsjdk happy
0 commit comments