t-neumann · isaacvock · May 1, 2024 · May 2, 2024 · May 2, 2024 · May 6, 2024
diff --git a/slamdunk/dunks/tcounter.py b/slamdunk/dunks/tcounter.py
@@ -24,7 +24,6 @@
 import os
 import re
 
-### NEW CODE
 import csv
 
 from os.path import basename
@@ -151,11 +150,10 @@ def computeTconversions(ref, bed, snpsFile, bam, maxReadLength, minQual, outputC
     print("#annotation:", os.path.basename(bed), bedMD5, sep="\t", file=fileCSV)
     print(SlamSeqInterval.Header, file=fileCSV)
 
-    ### NEW CODE
     ## Create cB output file if necessary
     if (makeCB):
 
-        header = ['chromosome', 'UTR_name', 'UTR_start', 'UTR_end', 'UTR_strand', 'TC', 'nT', 'n']
+        header = ['Chromosome', 'Name', 'Start', 'End', 'Strand', 'TC', 'nT', 'n']
         fileCB = open(outputCB, 'w')
         wr = csv.writer(fileCB)
         wr.writerow(header)
@@ -216,7 +214,6 @@ def computeTconversions(ref, bed, snpsFile, bam, maxReadLength, minQual, outputC
         multiMapFwd = 0
         multiMapRev = 0
 
-        ### NEW CODE
         ## Dictionary to track number of unique combos
         ## of T-to-C counts and T counts
         if (makeCB):
@@ -225,6 +222,12 @@ def computeTconversions(ref, bed, snpsFile, bam, maxReadLength, minQual, outputC
 
         for read in readIterator:
 
+            # Total number of T-to-C mutations in the read
+            totalTCcnt = read.tcCount
+
+            # Total number of reference Ts overlapped (minus those mutated to something other than C)
+            totalTcnt = read.getTcount()
+
             # Overwrite any conversions for non-TC reads (reads with < 2 TC conversions)
             if (not read.isTcRead) :
                 read.tcCount = 0
@@ -255,13 +258,17 @@ def computeTconversions(ref, bed, snpsFile, bam, maxReadLength, minQual, outputC
                 if(mismatch.referencePosition >= 0 and mismatch.referencePosition < utr.getLength()):
                     if(mismatch.isT(read.direction == ReadDirection.Reverse)):
                         testN += 1
+                        totalTcnt += 1
                     if(mismatch.isTCMismatch(read.direction == ReadDirection.Reverse)):
                         testk += 1
+                else:
+                    if(mismatch.isT(read.direction == ReadDirection.Reverse)):
+                        totalTcnt += 1
 
-            ### NEW CODE
+            # Increment TC and nT counts for cB
             if (makeCB):
 
-                key = (testk, testN)
+                key = (totalTCcnt, totalTcnt)
 
                 if key in cB:
                     cB[key] += 1
@@ -278,7 +285,7 @@ def computeTconversions(ref, bed, snpsFile, bam, maxReadLength, minQual, outputC
                 if(i >= 0 and i < utr.getLength()):
                     coverageUtr[i] += 1
 
-        ### NEW CODE
+        # Write to cB file
         if (makeCB):
 
             for (tc, nt), count in cB.items():
@@ -365,7 +372,7 @@ def computeTconversions(ref, bed, snpsFile, bam, maxReadLength, minQual, outputC
     fileBedgraphPlus.close()
     fileBedgraphMinus.close()
 
-    ### NEW CODE
+    # Close cB file connection
     if (makeCB):
 
         fileCB.close()

diff --git a/slamdunk/slamdunk.py b/slamdunk/slamdunk.py
@@ -430,6 +430,7 @@ def run():
     allparser.add_argument('-st', "--sampleTime", type=int, dest="sampleTime", required = False, default = 0, help="Use this sample time for all supplied samples")
     allparser.add_argument("-i", "--sample-index", type=int, required=False, default=-1, dest="sampleIndex", help="Run analysis only for sample <i>. Use for distributing slamdunk analysis on a cluster (index is 1-based).")
     allparser.add_argument("-ss", "--skip-sam", action='store_true', dest="skipSAM", help="Output BAM while mapping. Slower but, uses less hard disk.")
+    allparser.add_argument('-cb', "--makeCB", action='store_true', dest="makeCB", help="Make cB file that can be used for mixture modeling")
 
     args = parser.parse_args()