-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathloop.py
More file actions
75 lines (69 loc) · 2.89 KB
/
loop.py
File metadata and controls
75 lines (69 loc) · 2.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import sys
import copy
from scipy import interpolate
import math
import numpy as np
import bisect
# input:
# output:
# read IO locations from arguments
inputFile=open(sys.argv[1],"r")
inputPdbFile=open(sys.argv[2],"r")
resolution = 20e3
# load structure data
inputPdbData = {}
inputPdbLoci = {}
for inputPdbLine in inputPdbFile:
inputPdbLineData = inputPdbLine.strip().split()
inputPdbChr = int(inputPdbLineData[0])
inputPdbLocus = int(inputPdbLineData[1])
inputPdbPos = [float(inputPdbLineData[2]),float(inputPdbLineData[3]),float(inputPdbLineData[4])]
if not inputPdbChr in inputPdbData:
inputPdbData[inputPdbChr]=[]
inputPdbLoci[inputPdbChr]=[]
inputPdbData[inputPdbChr].append((inputPdbLocus,inputPdbPos))
inputPdbLoci[inputPdbChr].append(inputPdbLocus)
# sort
for inputPdbChr in inputPdbLoci:
inputPdbLoci[inputPdbChr] = sorted(inputPdbLoci[inputPdbChr])
# linear interpolate
inputPdbLinearInterpolate = {}
for inputPdbChr in inputPdbData:
inputPdbLinearInterpolate[inputPdbChr] = interpolate.interp1d(np.array([x[0] for x in inputPdbData[inputPdbChr]]),np.array([x[1] for x in inputPdbData[inputPdbChr]]),axis=0)
# assign phase to each dedup-ed contact
inputFile.readline() # skip header
for inputLine in inputFile:
inputLineData = inputLine.strip().split("\t")
if inputLineData[0] == 'X':
inputChr = 23
elif inputLineData[0] == 'Y':
inputChr = 24
else:
inputChr = int(inputLineData[0])
inputPos1 = (int(inputLineData[1]) + int(inputLineData[2])) // 2
inputPos2 = (int(inputLineData[4]) + int(inputLineData[5])) // 2
outputString = str(inputChr)+'\t'+str(inputPos1)+'\t'+str(inputPos2)
isOk = True
for hap in range(2):
# check if the loci is in the structure
#if inputChr*2+hap not in inputPdbLoci:
#isOk = False
#break
#index1 = bisect.bisect(inputPdbLoci[inputChr*2+hap],inputPos1)
#index2 = bisect.bisect(inputPdbLoci[inputChr*2+hap],inputPos2)
#if max(index1,index2) + 1 >= len(inputPdbLoci[inputChr*2+hap]):
#isOk = False
#break
#print inputPdbLoci[inputChr*2+hap][index1+1] - inputPdbLoci[inputChr*2+hap][index1], inputPdbLoci[inputChr*2+hap][index2+1] - inputPdbLoci[inputChr*2+hap][index2]
#if inputPdbLoci[inputChr*2+hap][index1+1] - inputPdbLoci[inputChr*2+hap][index1] > resolution or inputPdbLoci[inputChr*2+hap][index2+1] - inputPdbLoci[inputChr*2+hap][index2] > resolution:
#isOk = False
#break
# calculate distance
try:
loopDistance = np.linalg.norm(inputPdbLinearInterpolate[inputChr*2+hap](inputPos1) - inputPdbLinearInterpolate[inputChr*2+hap](inputPos2))
except (KeyError, ValueError) as error:
isOk = False
break
outputString += '\t'+str(loopDistance)
if isOk:
sys.stdout.write(outputString+'\n')