4646import numpy as np
4747import cython
4848import cython .cimports .numpy as cnp
49+ from cython .cimports .libc .stdio import FILE , fopen , fclose , fprintf
4950
5051# ------------------------------------
5152# from cython.cimports.libc.stdlib import malloc, free, qsort
@@ -73,6 +74,225 @@ def clean_up_ndarray(x: cnp.ndarray):
7374 return
7475
7576
77+ @cython .cfunc
78+ def fix_coordinates (poss : cnp .ndarray , rlength : cython .int ) -> cnp .ndarray :
79+ """Clip a sorted coordinate array to [0, rlength]."""
80+ i : cython .long
81+ ptr : cython .pointer (cython .int ) = cython .cast (cython .pointer (cython .int ),
82+ poss .data )
83+
84+ for i in range (poss .shape [0 ]):
85+ if ptr [i ] < 0 :
86+ ptr [i ] = 0
87+ else :
88+ break
89+
90+ for i in range (poss .shape [0 ]- 1 , - 1 , - 1 ):
91+ if ptr [i ] > rlength :
92+ ptr [i ] = rlength
93+ else :
94+ break
95+ return poss
96+
97+
98+ @cython .cfunc
99+ def _pileup_sorted_unit (start_poss : cnp .ndarray ,
100+ end_poss : cnp .ndarray ) -> cnp .ndarray :
101+ """Fast sweep for unit-weight sorted start/end positions."""
102+ p : cython .int
103+ pre_p : cython .int = 0
104+ z : cython .float = 0
105+ pre_z : cython .float = - 10000
106+ i_s : cython .long = 0
107+ i_e : cython .long = 0
108+ c : cython .long = 0
109+ ls : cython .long = start_poss .shape [0 ]
110+ le : cython .long = end_poss .shape [0 ]
111+ ret : cnp .ndarray
112+ ret_p : cnp .ndarray
113+ ret_v : cnp .ndarray
114+ start_ptr : cython .pointer (cython .int )
115+ end_ptr : cython .pointer (cython .int )
116+ ret_p_ptr : cython .pointer (cython .int )
117+ ret_v_ptr : cython .pointer (cython .float )
118+
119+ if ls + le == 0 :
120+ return np .zeros (shape = 0 , dtype = [('p' , 'i4' ), ('v' , 'f4' )])
121+
122+ ret_p = np .zeros (shape = ls + le , dtype = "i4" )
123+ ret_v = np .zeros (shape = ls + le , dtype = "f4" )
124+ start_ptr = cython .cast (cython .pointer (cython .int ), start_poss .data )
125+ end_ptr = cython .cast (cython .pointer (cython .int ), end_poss .data )
126+ ret_p_ptr = cython .cast (cython .pointer (cython .int ), ret_p .data )
127+ ret_v_ptr = cython .cast (cython .pointer (cython .float ), ret_v .data )
128+
129+ while i_s < ls or i_e < le :
130+ if i_s < ls and (i_e >= le or start_ptr [i_s ] < end_ptr [i_e ]):
131+ p = start_ptr [i_s ]
132+ elif i_e < le and (i_s >= ls or end_ptr [i_e ] < start_ptr [i_s ]):
133+ p = end_ptr [i_e ]
134+ else :
135+ p = start_ptr [i_s ]
136+
137+ if p != pre_p :
138+ if z == pre_z :
139+ ret_p_ptr [c - 1 ] = p
140+ else :
141+ ret_p_ptr [c ] = p
142+ ret_v_ptr [c ] = z
143+ c += 1
144+ pre_z = z
145+ pre_p = p
146+
147+ while i_s < ls and start_ptr [i_s ] == p :
148+ z += 1
149+ i_s += 1
150+ while i_e < le and end_ptr [i_e ] == p :
151+ z -= 1
152+ i_e += 1
153+
154+ ret_p .resize (c , refcheck = False )
155+ ret_v .resize (c , refcheck = False )
156+ ret = np .zeros (shape = c , dtype = [('p' , 'i4' ), ('v' , 'f4' )])
157+ ret ['p' ] = ret_p
158+ ret ['v' ] = ret_v
159+ clean_up_ndarray (ret_p )
160+ clean_up_ndarray (ret_v )
161+ return ret
162+
163+
164+ @cython .cfunc
165+ def _pileup_sorted_weighted (start_poss : cnp .ndarray ,
166+ start_weights : cnp .ndarray ,
167+ end_poss : cnp .ndarray ,
168+ end_weights : cnp .ndarray ) -> cnp .ndarray :
169+ """Fast sweep for weighted sorted start/end positions."""
170+ p : cython .int
171+ pre_p : cython .int = 0
172+ z : cython .float = 0
173+ pre_z : cython .float = - 10000
174+ i_s : cython .long = 0
175+ i_e : cython .long = 0
176+ c : cython .long = 0
177+ ls : cython .long = start_poss .shape [0 ]
178+ le : cython .long = end_poss .shape [0 ]
179+ ret : cnp .ndarray
180+ ret_p : cnp .ndarray
181+ ret_v : cnp .ndarray
182+ start_ptr : cython .pointer (cython .int )
183+ end_ptr : cython .pointer (cython .int )
184+ start_w_ptr : cython .pointer (cython .float )
185+ end_w_ptr : cython .pointer (cython .float )
186+ ret_p_ptr : cython .pointer (cython .int )
187+ ret_v_ptr : cython .pointer (cython .float )
188+
189+ if ls + le == 0 :
190+ return np .zeros (shape = 0 , dtype = [('p' , 'i4' ), ('v' , 'f4' )])
191+
192+ ret_p = np .zeros (shape = ls + le , dtype = "i4" )
193+ ret_v = np .zeros (shape = ls + le , dtype = "f4" )
194+ start_ptr = cython .cast (cython .pointer (cython .int ), start_poss .data )
195+ end_ptr = cython .cast (cython .pointer (cython .int ), end_poss .data )
196+ start_w_ptr = cython .cast (cython .pointer (cython .float ),
197+ start_weights .data )
198+ end_w_ptr = cython .cast (cython .pointer (cython .float ), end_weights .data )
199+ ret_p_ptr = cython .cast (cython .pointer (cython .int ), ret_p .data )
200+ ret_v_ptr = cython .cast (cython .pointer (cython .float ), ret_v .data )
201+
202+ while i_s < ls or i_e < le :
203+ if i_s < ls and (i_e >= le or start_ptr [i_s ] < end_ptr [i_e ]):
204+ p = start_ptr [i_s ]
205+ elif i_e < le and (i_s >= ls or end_ptr [i_e ] < start_ptr [i_s ]):
206+ p = end_ptr [i_e ]
207+ else :
208+ p = start_ptr [i_s ]
209+
210+ if p != pre_p :
211+ if z == pre_z :
212+ ret_p_ptr [c - 1 ] = p
213+ else :
214+ ret_p_ptr [c ] = p
215+ ret_v_ptr [c ] = z
216+ c += 1
217+ pre_z = z
218+ pre_p = p
219+
220+ while i_s < ls and start_ptr [i_s ] == p :
221+ z += start_w_ptr [i_s ]
222+ i_s += 1
223+ while i_e < le and end_ptr [i_e ] == p :
224+ z -= end_w_ptr [i_e ]
225+ i_e += 1
226+
227+ ret_p .resize (c , refcheck = False )
228+ ret_v .resize (c , refcheck = False )
229+ ret = np .zeros (shape = c , dtype = [('p' , 'i4' ), ('v' , 'f4' )])
230+ ret ['p' ] = ret_p
231+ ret ['v' ] = ret_v
232+ clean_up_ndarray (ret_p )
233+ clean_up_ndarray (ret_v )
234+ return ret
235+
236+
237+ @cython .cfunc
238+ def _pileup_from_PN_shifted (P_array : cnp .ndarray ,
239+ N_array : cnp .ndarray ,
240+ five_shift : cython .long ,
241+ three_shift : cython .long ,
242+ rlength : cython .int ) -> cnp .ndarray :
243+ """Pile up plus/minus read ends using V1-compatible shifts."""
244+ start_poss : cnp .ndarray
245+ end_poss : cnp .ndarray
246+
247+ start_poss = np .concatenate ((P_array - five_shift , N_array - three_shift ))
248+ end_poss = np .concatenate ((P_array + three_shift , N_array + five_shift ))
249+ start_poss .sort ()
250+ end_poss .sort ()
251+ start_poss = fix_coordinates (start_poss , rlength )
252+ end_poss = fix_coordinates (end_poss , rlength )
253+ return _pileup_sorted_unit (start_poss , end_poss )
254+
255+
256+ @cython .cfunc
257+ def _write_pv_to_bedGraph (pv : cnp .ndarray ,
258+ chrom : bytes ,
259+ output_filename : bytes ,
260+ scale_factor : cython .float ,
261+ baseline_value : cython .float ):
262+ """Append one chromosome PV array to a bedGraph file."""
263+ fh : cython .pointer (FILE )
264+ p : cnp .ndarray
265+ v : cnp .ndarray
266+ p_ptr : cython .pointer (cython .int )
267+ v_ptr : cython .pointer (cython .float )
268+ i : cython .long
269+ l_pv : cython .long = pv .shape [0 ]
270+ pre : cython .int = 0
271+ pos : cython .int
272+ value : cython .float
273+ py_bytes : bytes = chrom
274+ chrom_char : cython .pointer (cython .char ) = py_bytes
275+
276+ fh = fopen (output_filename , b"a" )
277+ if fh == cython .NULL :
278+ raise OSError ("Unable to open bedGraph output file" )
279+
280+ p = np .ascontiguousarray (pv ['p' ])
281+ v = np .ascontiguousarray (pv ['v' ])
282+ p_ptr = cython .cast (cython .pointer (cython .int ), p .data )
283+ v_ptr = cython .cast (cython .pointer (cython .float ), v .data )
284+
285+ for i in range (l_pv ):
286+ pos = p_ptr [i ]
287+ value = v_ptr [i ] * scale_factor
288+ if value < baseline_value :
289+ value = baseline_value
290+ fprintf (fh , b"%s\t %d\t %d\t %.5f\n " , chrom_char , pre , pos , value )
291+ pre = pos
292+ fclose (fh )
293+ return
294+
295+
76296@cython .cfunc
77297def make_PV_from_LR (LR_array : cnp .ndarray ,
78298 mapping_func = mapping_function_always_1 ) -> cnp .ndarray :
@@ -314,6 +534,16 @@ def pileup_from_LR(LR_array: cnp.ndarray,
314534 """
315535 PV_array : cnp .ndarray
316536 pileup : cnp .ndarray
537+ start_poss : cnp .ndarray
538+ end_poss : cnp .ndarray
539+
540+ if mapping_func is mapping_function_always_1 :
541+ start_poss = np .sort (LR_array ['l' ])
542+ end_poss = np .sort (LR_array ['r' ])
543+ pileup = _pileup_sorted_unit (start_poss , end_poss )
544+ clean_up_ndarray (start_poss )
545+ clean_up_ndarray (end_poss )
546+ return pileup
317547
318548 PV_array = make_PV_from_LR (LR_array , mapping_func = mapping_func )
319549 pileup = pileup_PV (PV_array )
@@ -338,6 +568,29 @@ def pileup_from_LRC(LRC_array: cnp.ndarray,
338568 """
339569 PV_array : cnp .ndarray
340570 pileup : cnp .ndarray
571+ start_poss : cnp .ndarray
572+ end_poss : cnp .ndarray
573+ start_weights : cnp .ndarray
574+ end_weights : cnp .ndarray
575+ indices : cnp .ndarray
576+ start_indices : cnp .ndarray
577+
578+ if mapping_func is mapping_function_always_1 :
579+ start_indices = np .argsort (LRC_array ['l' ])
580+ start_poss = LRC_array ['l' ][start_indices ]
581+ start_weights = LRC_array ['c' ][start_indices ].astype ("f4" , copy = False )
582+ indices = np .argsort (LRC_array ['r' ])
583+ end_poss = LRC_array ['r' ][indices ]
584+ end_weights = LRC_array ['c' ][indices ].astype ("f4" , copy = False )
585+ pileup = _pileup_sorted_weighted (start_poss ,
586+ start_weights ,
587+ end_poss ,
588+ end_weights )
589+ clean_up_ndarray (start_poss )
590+ clean_up_ndarray (start_weights )
591+ clean_up_ndarray (end_poss )
592+ clean_up_ndarray (end_weights )
593+ return pileup
341594
342595 PV_array = make_PV_from_LRC (LRC_array , mapping_func = mapping_func )
343596 pileup = pileup_PV (PV_array )
@@ -354,10 +607,103 @@ def pileup_from_PN(P_array: cnp.ndarray, N_array: cnp.ndarray,
354607 pileup of a single chromosome is needed.
355608
356609 """
357- PV_array : cnp .ndarray
358610 pileup : cnp .ndarray
359-
360- PV_array = make_PV_from_PN (P_array , N_array , extsize )
361- pileup = pileup_PV (PV_array )
362- clean_up_ndarray (PV_array )
611+ start_poss : cnp .ndarray
612+ end_poss : cnp .ndarray
613+
614+ start_poss = np .concatenate ((P_array , N_array - extsize ))
615+ end_poss = np .concatenate ((P_array + extsize , N_array ))
616+ start_poss .sort ()
617+ end_poss .sort ()
618+ pileup = _pileup_sorted_unit (start_poss , end_poss )
619+ clean_up_ndarray (start_poss )
620+ clean_up_ndarray (end_poss )
363621 return pileup
622+
623+
624+ @cython .ccall
625+ def pileup_and_write_se (trackI ,
626+ output_filename : bytes ,
627+ d : cython .int ,
628+ scale_factor : cython .float ,
629+ baseline_value : cython .float = 0.0 ,
630+ directional : cython .bint = True ,
631+ halfextension : cython .bint = True ):
632+ """Pile up a single-end track and write a bedGraph file."""
633+ five_shift : cython .long
634+ three_shift : cython .long
635+ rlength : cython .long
636+ chrom : bytes
637+ plus_tags : cnp .ndarray
638+ minus_tags : cnp .ndarray
639+ chrlengths : dict = trackI .get_rlengths ()
640+ pv : cnp .ndarray
641+ fh : cython .pointer (FILE )
642+
643+ if directional :
644+ if halfextension :
645+ five_shift = d // - 4
646+ three_shift = d * 3 // 4
647+ else :
648+ five_shift = 0
649+ three_shift = d
650+ else :
651+ if halfextension :
652+ five_shift = d // 4
653+ three_shift = five_shift
654+ else :
655+ five_shift = d // 2
656+ three_shift = d - five_shift
657+
658+ fh = fopen (output_filename , b"w" )
659+ if fh == cython .NULL :
660+ raise OSError ("Unable to open bedGraph output file" )
661+ fclose (fh )
662+
663+ for chrom in list (chrlengths .keys ()):
664+ (plus_tags , minus_tags ) = trackI .get_locations_by_chr (chrom )
665+ rlength = cython .cast (cython .long , chrlengths [chrom ])
666+ pv = _pileup_from_PN_shifted (plus_tags ,
667+ minus_tags ,
668+ five_shift ,
669+ three_shift ,
670+ rlength )
671+ _write_pv_to_bedGraph (pv ,
672+ chrom ,
673+ output_filename ,
674+ scale_factor ,
675+ baseline_value )
676+ clean_up_ndarray (pv )
677+ return
678+
679+
680+ @cython .ccall
681+ def pileup_and_write_pe (petrackI ,
682+ output_filename : bytes ,
683+ scale_factor : cython .float = 1.0 ,
684+ baseline_value : cython .float = 0.0 ):
685+ """Pile up a paired-end track and write a bedGraph file."""
686+ chrom : bytes
687+ locs : cnp .ndarray
688+ pv : cnp .ndarray
689+ chrlengths : dict = petrackI .get_rlengths ()
690+ fh : cython .pointer (FILE )
691+
692+ fh = fopen (output_filename , b"w" )
693+ if fh == cython .NULL :
694+ raise OSError ("Unable to open bedGraph output file" )
695+ fclose (fh )
696+
697+ for chrom in sorted (chrlengths .keys ()):
698+ locs = petrackI .get_locations_by_chr (chrom )
699+ if locs .dtype .names is not None and "c" in locs .dtype .names :
700+ pv = pileup_from_LRC (locs )
701+ else :
702+ pv = pileup_from_LR (locs )
703+ _write_pv_to_bedGraph (pv ,
704+ chrom ,
705+ output_filename ,
706+ scale_factor ,
707+ baseline_value )
708+ clean_up_ndarray (pv )
709+ return
0 commit comments