@@ -200,20 +200,21 @@ template <typename Key, typename Hash, typename Eq>
200200std::vector<int64_t > SlabHashBackend<Key, Hash, Eq>::BucketSizes() const  {
201201    CUDAScopedDevice scoped_device (this ->device_ );
202202    thrust::device_vector<int64_t > elems_per_bucket (impl_.bucket_count_ );
203-     thrust::fill (elems_per_bucket.begin (), elems_per_bucket.end (), 0 );
203+     thrust::fill (thrust::cuda::par. on ( CUDAStream::GetInstance (). Get ()),  elems_per_bucket.begin (), elems_per_bucket.end (), 0 );
204204
205205    const  int64_t  num_blocks =
206206            (impl_.buffer_accessor_ .capacity_  + kThreadsPerBlock  - 1 ) /
207207            kThreadsPerBlock ;
208208    CountElemsPerBucketKernel<<<num_blocks, kThreadsPerBlock , 0 ,
209-                                 core:: CUDAStream::GetInstance ().Get ()>>>(
209+                                 CUDAStream::GetInstance ().Get ()>>>(
210210            impl_, thrust::raw_pointer_cast (elems_per_bucket.data ()));
211211    cuda::Synchronize (CUDAStream::GetInstance ());
212212    OPEN3D_CUDA_CHECK (cudaGetLastError ());
213213
214214    std::vector<int64_t > result (impl_.bucket_count_ );
215-     thrust::copy (elems_per_bucket.begin (), elems_per_bucket.end (),
215+     thrust::copy (thrust::cuda::par. on ( CUDAStream::GetInstance (). Get ()),  elems_per_bucket.begin (), elems_per_bucket.end (),
216216                 result.begin ());
217+     cuda::Synchronize (CUDAStream::GetInstance ());
217218    return  result;
218219}
219220
@@ -236,8 +237,8 @@ void SlabHashBackend<Key, Hash, Eq>::Insert(
236237    // / Increase heap_top to pre-allocate potential memory increment and
237238    // / avoid atomicAdd in kernel.
238239    int  prev_heap_top = this ->buffer_ ->GetHeapTopIndex ();
239-     *thrust::device_ptr< int >(impl_. buffer_accessor_ . heap_top_ ) = 
240-             prev_heap_top + count ;
240+     int  new_value = prev_heap_top + count; 
241+     thrust::fill_n (thrust::cuda::par. on ( CUDAStream::GetInstance (). Get ()),  thrust::device_pointer_cast (impl_. buffer_accessor_ . heap_top_ ),  1 , new_value) ;
241242
242243    const  int64_t  num_blocks =
243244            (count + kThreadsPerBlock  - 1 ) / kThreadsPerBlock ;
@@ -248,8 +249,9 @@ void SlabHashBackend<Key, Hash, Eq>::Insert(
248249                        core::CUDAStream::GetInstance ().Get ()>>>(
249250            impl_, input_keys, output_buf_indices, output_masks, count);
250251
251-     thrust::device_vector<const  void *> input_values_soa_device (
252-             input_values_soa.begin (), input_values_soa.end ());
252+     thrust::device_vector<const  void *> input_values_soa_device (input_values_soa.size ());
253+     thrust::copy (thrust::cuda::par.on (CUDAStream::GetInstance ().Get ()),
254+             input_values_soa.begin (), input_values_soa.end (), input_values_soa_device.begin ());
253255
254256    int64_t  n_values = input_values_soa.size ();
255257    const  void * const * ptr_input_values_soa =
0 commit comments