-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patharound_old.py
More file actions
89 lines (77 loc) · 3.98 KB
/
around_old.py
File metadata and controls
89 lines (77 loc) · 3.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import sys
import copy
import numpy
import time
# input:
# output:
# read IO locations from arguments
inputFile=open(sys.argv[1],"r")
# parameters
maxDistance = 10000000 # 10mb
maxGrids = maxDistance//gridSize # = 1e3
maxEdgeDistance = maxDistance + gridSize/2
totalGrids = maxGrids+1
aroundCounts = [x[:] for x in [[0] * totalGrids] * totalGrids]
# read contacts
contactData = {}
for inputLine in inputFile:
inputLineData = inputLine.strip().split("\t")
for i in [2,5]: # phase data
inputLineData[i] = int(inputLineData[i])
for i in [1,4]: # position data, for future averaging
inputLineData[i] = float(inputLineData[i])
# swap two partners of a contact if out of order
if inputLineData[0] > inputLineData[3] or (inputLineData[0] == inputLineData[3] and inputLineData[1] > inputLineData[4]):
newContact = [inputLineData[4], inputLineData[5], inputLineData[1], inputLineData[2], 1]
else:
newContact = [inputLineData[1], inputLineData[2], inputLineData[4], inputLineData[5], 1]
# add to contact list and merge
if not inputLineData[0] in contactData:
contactData[inputLineData[0]] = {}
if not inputLineData[3] in contactData[inputLineData[0]]:
contactData[inputLineData[0]][inputLineData[3]] = []
contactData[inputLineData[0]][inputLineData[3]].append(newContact)
# sort data
for leftChr in contactData:
for rightChr in contactData[leftChr]:
contactData[leftChr][rightChr] = sorted(contactData[leftChr][rightChr])
# find nearby contacts
# NOTE: I left this block as-is even though it breaks in Python 3 (unmatched
# parens + contact1/contact2 are never defined here). This file is archived,
# and the later loop does the same job, so I didn't try to fix this fragment.
for leftChr in contactData:
for rightChr in contactData[leftChr]:
if leftChr == rightChr:
continue # skip intra or skip inter
sys.stderr.write('cleaning '+leftChr+', '+rightChr+'\n')
numOfContacts = len(contactData[leftChr][rightChr])
for i in range(numOfContacts):
for j in range(i+1, numOfContacts):
if contactData[leftChr][rightChr][j][1] - contactData[leftChr][rightChr][i][1] > maxEdgeDistance:
break
if abs(contactData[leftChr][rightChr][j][4] - contactData[leftChr][rightChr][i][4])) <= maxEdgeDistance:
relativePos = [contact2[0] - contact1[0], contact2[2] - contact1[2]]
roundedPos = [int((relativePos[0]+maxDistance)//gridSize),int((relativePos[1]+maxDistance)//gridSize)]
if roundedPos[0] >= 0 and roundedPos[0] < totalGrids and roundedPos[1] >= 0 and roundedPos[1] < totalGrids:
aroundCounts[roundedPos[0]][roundedPos[1]] += 1
aroundCounts[roundedPos[1]][roundedPos[0]] += 1
for j in range(0, i):
if contactData[leftChr][rightChr][i][1] - contactData[leftChr][rightChr][j][1] > maxEdgeDistance:
break
if abs(contactData[leftChr][rightChr][j][4] - contactData[leftChr][rightChr][i][4])) <= maxEdgeDistance:
# sort and merge contacts within maxDistance
for chr1 in contactData:
for chr2 in contactData[chr1]:
if chr1 == chr2:
continue # skip intra or skip inter
print chr1+', '+chr2
for contact1 in contactData[chr1][chr2]:
for contact2 in contactData[chr1][chr2]:
relativePos = [contact2[0] - contact1[0], contact2[2] - contact1[2]]
roundedPos = [int((relativePos[0]+maxDistance)//gridSize),int((relativePos[1]+maxDistance)//gridSize)]
if roundedPos[0] >= 0 and roundedPos[0] < totalGrids and roundedPos[1] >= 0 and roundedPos[1] < totalGrids:
aroundCounts[roundedPos[0]][roundedPos[1]] += 1
aroundCounts[roundedPos[1]][roundedPos[0]] += 1
# output results
for i in range(totalGrids):
outputFile.write("\t".join(map(str,aroundCounts[i]))+"\n")