4747 # 'No GPU capabilities available')
4848 has_pycuda = False
4949
50+ def _empty_like (gpuarray ):
51+ return pycuda .gpuarray .empty (
52+ shape = gpuarray .shape , dtype = gpuarray .dtype ,
53+ allocator = gpu_utils .memory_pool .allocate )
54+
5055
5156if has_pycuda :
5257 # define all compilation depending functions (e.g. ElementwiseKernel)
5762 )
5863 def sub_scalar (gpuarr , scalar , out = None , stream = None ):
5964 if out is None :
60- out = pycuda . gpuarray . empty_like (gpuarr )
65+ out = _empty_like (gpuarr )
6166 _sub_1dgpuarr (out , gpuarr , scalar , stream = stream )
6267 return out
6368
@@ -71,14 +76,14 @@ def _mul_scalar(gpuarr, scalar, out=None, stream=None):
7176 to specify a stream
7277 '''
7378 if out is None :
74- out = pycuda . gpuarray . empty_like (gpuarr )
79+ out = _empty_like (gpuarr )
7580 _mul_with_factor (out , gpuarr , scalar , stream = stream )
7681
7782 def _multiply (a , b , out = None , stream = None ):
7883 '''Elementwise multiply of two gpuarray specifying a stream
7984 Required because gpuarray.__mul__ has no stream argument'''
8085 if out is None :
81- out = pycuda . gpuarray . empty_like (a )
86+ out = _empty_like (a )
8287 func = pycuda .elementwise .get_binary_op_kernel (a .dtype , b .dtype ,
8388 out .dtype , "*" )
8489 func .prepared_async_call (a ._grid , a ._block , stream , a .gpudata ,
@@ -136,7 +141,7 @@ def _compute_sigma(a, b, c, d, out=None, stream=None):
136141 '''Computes elementwise a - b*c/d as required in compute sigma for
137142 the emittance '''
138143 if out is None :
139- out = pycuda . gpuarray . empty_like (a )
144+ out = _empty_like (a )
140145 _comp_sigma (out , a , b , c , d , stream = stream )
141146 return out
142147
@@ -160,7 +165,7 @@ def _emittance_dispersion(
160165 n , cov_u2 , cov_u_up , cov_up2 , cov_u_dp , cov_up_dp ,
161166 cov_dp2 , out = None , stream = None ):
162167 if out is None :
163- out = pycuda . gpuarray . empty_like (cov_u2 )
168+ out = _empty_like (cov_u2 )
164169 _emitt_disp (out , cov_u2 , cov_u_up , cov_up2 , cov_u_dp , cov_up_dp ,
165170 cov_dp2 , np .float64 (n ), stream = stream )
166171 return out
@@ -176,7 +181,7 @@ def _emittance_dispersion(
176181 def _emittance_no_dispersion (
177182 n , cov_u2 , cov_u_up , cov_up2 , out = None , stream = None ):
178183 if out is None :
179- out = pycuda . gpuarray . empty_like (cov_u2 )
184+ out = _empty_like (cov_u2 )
180185 _emitt_nodisp (out , cov_u2 , cov_u_up , cov_up2 , np .float64 (n ),
181186 stream = stream )
182187 return out
@@ -194,9 +199,9 @@ def wofz(in_real, in_imag, out_real=None, out_imag=None, stream=None):
194199 part of z.
195200 '''
196201 if out_real is None :
197- out_real = pycuda . gpuarray . empty_like (in_real )
202+ out_real = _empty_like (in_real )
198203 if out_imag is None :
199- out_imag = pycuda . gpuarray . empty_like (in_imag )
204+ out_imag = _empty_like (in_imag )
200205 _wofz (in_real , in_imag , out_real , out_imag , stream = stream )
201206 return out_real , out_imag
202207
@@ -207,7 +212,7 @@ def wofz(in_real, in_imag, out_real=None, out_imag=None, stream=None):
207212 )
208213 def sign (array , out = None , stream = None ):
209214 if out is None :
210- out = pycuda . gpuarray . empty_like (array )
215+ out = _empty_like (array )
211216 _sign (array , out , stream = stream )
212217 return out
213218
@@ -260,13 +265,9 @@ def thrust_mean_and_std_per_slice(sliceset, u, stream=None):
260265 p_sids = sliceset .slice_index_of_particle
261266 # slice_index_of_particle may have slice indices outside of slicing area,
262267 # the following arrays therefore can comprise non valid slice entries
263- slice_ids_noncontained = pycuda .gpuarray .empty (
264- p_sids .shape , dtype = p_sids .dtype ,
265- allocator = gpu_utils .memory_pool .allocate )
266- slice_means_noncontained = pycuda .gpuarray .empty (
267- u .shape , dtype = u .dtype , allocator = gpu_utils .memory_pool .allocate )
268- slice_stds_noncontained = pycuda .gpuarray .empty (
269- u .shape , dtype = u .dtype , allocator = gpu_utils .memory_pool .allocate )
268+ slice_ids_noncontained = _empty_like (p_sids )
269+ slice_means_noncontained = _empty_like (u )
270+ slice_stds_noncontained = _empty_like (u )
270271
271272 (_ , _ , _ , new_end ) = thrust .thrust_stats_per_slice (
272273 p_sids , u , slice_ids_noncontained , slice_means_noncontained ,
@@ -368,8 +369,8 @@ def covariance(a,b, stream=None):
368369 b: pycuda.GPUArray
369370 '''
370371 n = len (a )
371- x = pycuda . gpuarray . empty_like (a )
372- y = pycuda . gpuarray . empty_like (b )
372+ x = _empty_like (a )
373+ y = _empty_like (b )
373374 mean_a = skcuda .misc .mean (a )
374375 #x -= mean_a
375376 _sub_1dgpuarr (x , a , mean_a , stream = stream )
@@ -397,7 +398,7 @@ def std(a, stream=None):
397398 #return skcuda.misc.std(a, ddof=1)
398399 n = len (a )
399400 #mean_a = skcuda.misc.mean(a)
400- x = pycuda . gpuarray . empty_like (a )
401+ x = _empty_like (a )
401402 mean_a = mean (a , stream = stream )
402403 _sub_1dgpuarr (x , a , mean_a , stream = stream )
403404 _inplace_pow (x , 2 , stream = stream )
@@ -523,7 +524,7 @@ def emittance(u, up, dp, stream=None):
523524 n = len (u )
524525 mean_u = mean (u , stream = stream )
525526 mean_up = mean (up , stream = stream )
526- out = pycuda . gpuarray . empty_like (mean_u )
527+ out = _empty_like (mean_u )
527528 tmp_u = sub_scalar (u , mean_u , stream = stream )
528529 tmp_up = sub_scalar (up , mean_up , stream = stream )
529530 tmp_space = _multiply (tmp_u , tmp_u , stream = stream )
@@ -568,7 +569,7 @@ def emittance_multistream(u, up, dp, stream=None):
568569 tmp_u = sub_scalar (u , mean_u , stream = streams [0 ])
569570 tmp_space = _multiply (tmp_u , tmp_u , stream = streams [0 ])
570571 cov_u2 = pycuda .gpuarray .sum (tmp_space , stream = streams [0 ])
571- out = pycuda . gpuarray . empty_like (mean_u )
572+ out = _empty_like (mean_u )
572573 tmp_up = sub_scalar (up , mean_up , stream = streams [1 ])
573574 streams [0 ].synchronize ()
574575 streams [1 ].synchronize ()
@@ -601,11 +602,11 @@ def cumsum(array, dest=None):
601602 '''
602603 if array .dtype == np .int32 :
603604 if dest is None :
604- dest = pycuda . gpuarray . empty_like (array )
605+ dest = _empty_like (array )
605606 thrust_interface .thrust_cumsum_int (array , dest )
606607 elif array .dtype == np .float64 :
607608 if dest is None :
608- dest = pycuda . gpuarray . empty_like (array )
609+ dest = _empty_like (array )
609610 thrust_interface .thrust_cumsum_double (array , dest )
610611 else :
611612 dest = array .copy ()
@@ -660,7 +661,7 @@ def apply_permutation(array, permutation):
660661 permutation permutation array: must be np.int32 (or int32), is asserted
661662 '''
662663 assert (permutation .dtype .itemsize == 4 and permutation .dtype .kind is 'i' )
663- tmp = pycuda . gpuarray . empty_like (array )
664+ tmp = _empty_like (array )
664665 dtype = array .dtype
665666 if dtype .itemsize == 8 and dtype .kind is 'f' :
666667 thrust .apply_sort_perm_double (array , tmp , permutation )
@@ -832,7 +833,7 @@ def sorted_emittance_per_slice(sliceset, u, up, dp=None, stream=None):
832833 cov_u2 = sorted_cov_per_slice (sliceset , u , u , stream = streams [0 ])
833834 cov_up2 = sorted_cov_per_slice (sliceset , up , up , stream = streams [1 ])
834835 cov_u_up = sorted_cov_per_slice (sliceset , u , up , stream = streams [2 ])
835- out = pycuda . gpuarray . empty_like (cov_u2 )
836+ out = _empty_like (cov_u2 )
836837 # use this factor in emitt_disp: the code has a 1/(n*n+n) factor which is not
837838 # required here since the scaling is done in the cov_per_slice
838839 # --> 1/(n*n + n) must be 1. ==> n = sqrt(5)/2 -0.5
0 commit comments