-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcontact.py
More file actions
70 lines (62 loc) · 3.61 KB
/
contact.py
File metadata and controls
70 lines (62 loc) · 3.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import sys
import copy
# input:
# output:
# read IO locations from arguments
inputFile=open(sys.argv[1],"r")
outputFile=open(sys.argv[2],"w")
# parameters
maxMergeDistance = 1000
inputData = {}
for inputLine in inputFile:
inputLineData = inputLine.strip().split("\t")
inputData[inputLineData[0]] = []
# order the alignments by query start position, and put breakpoints in left-to-right order
for inputAlignment in inputLineData[1:]:
inputAlignmentData = inputAlignment.split(",")
# re-order breakpoints based on read 1/2 and strand
if (inputAlignmentData[0] == "0" and inputAlignmentData[6] == "-") or (inputAlignmentData[0] == "1" and inputAlignmentData[6] == "+"):
tempString = inputAlignmentData[4]
inputAlignmentData[4] = inputAlignmentData[5]
inputAlignmentData[5] = tempString
# re-order query positions based on read 1/2
if inputAlignmentData[0] == "1":
tempString = inputAlignmentData[1]
inputAlignmentData[1] = inputAlignmentData[2]
inputAlignmentData[2] = tempString
# insertion sort of alignments by query start position
i = len(inputData[inputLineData[0]])
for i in range(len(inputData[inputLineData[0]])-1,-1,-1):
currentData = inputData[inputLineData[0]][i]
if (currentData[0] == "0" and inputAlignmentData[0] == "0" and int(currentData[1]) < int(inputAlignmentData[1])) or (currentData[0] == "1" and inputAlignmentData[0] == "1" and int(currentData[2]) > int(inputAlignmentData[2])) or (currentData[0] == "0" and inputAlignmentData[0] == "1"):
i += 1
break
inputData[inputLineData[0]].insert(i,inputAlignmentData)
# merge the two adjacent alignments between mates, to utilize phasing information
for i in range(len(inputData[inputLineData[0]])-1):
leftData = inputData[inputLineData[0]][i]
rightData = inputData[inputLineData[0]][i+1]
if leftData[0] == "0" and rightData[0] == "1" and leftData[3] == rightData[3] and leftData[6] != rightData[6] and abs(int(leftData[5])-int(rightData[4])) <= maxMergeDistance:
# merge phasing information
mergedHaplotype = max(int(leftData[7]),int(rightData[7]))
if leftData[7] == "-2" or rightData[7] == "-2" or (leftData[7] == "1" and rightData[7] == "2") or (leftData[7] == "2" and rightData[7] == "1"):
mergedHaplotype = -2
mergedData = ["2",leftData[1],rightData[2],leftData[3],leftData[4],rightData[5],leftData[6],str(mergedHaplotype)]
# replace original with merged alignment
inputData[inputLineData[0]].pop(i)
inputData[inputLineData[0]].pop(i)
inputData[inputLineData[0]].insert(i,mergedData)
break
# read out contact information
for i in range(len(inputData[inputLineData[0]])-1):
leftData = inputData[inputLineData[0]][i]
rightData = inputData[inputLineData[0]][i+1]
# flip if chromosomes and positions are out of order
if leftData[3] > rightData[3] or (leftData[3] == rightData[3] and int(leftData[5]) > int(rightData[4])):
outputFile.write(rightData[3]+'\t'+rightData[4]+'\t'+rightData[7]+'\t'+leftData[3]+'\t'+leftData[5]+'\t'+leftData[7])
else:
outputFile.write(leftData[3]+'\t'+leftData[5]+'\t'+leftData[7]+'\t'+rightData[3]+'\t'+rightData[4]+'\t'+rightData[7])
if leftData[0] == "0" and rightData[0] == "1": # paired-end contact
outputFile.write('\tPE\n')
else:
outputFile.write('\tSR\n')