6
6
#include < thrust/random.h>
7
7
#include < random>
8
8
9
- #include < curand_kernel.h>
10
-
11
9
#include " Generator.hpp"
12
10
#include " KmMatrix.hpp"
13
11
#include " ../../utils/utils.cuh"
16
14
namespace h2o4gpu {
17
15
namespace Matrix {
18
16
19
- namespace kernel {
20
- // Split the definition to avoid multiple definition.
21
- __global__ void setup_random_states (int _seed, curandState *_state,
22
- size_t _size);
23
-
24
- __global__ void generate_uniform_kernel (float *_res,
25
- curandState *_state,
26
- int _size);
27
-
28
- __global__ void generate_uniform_kernel (double *_res,
29
- curandState *_state,
30
- int _size);
31
- }
32
-
33
17
template <typename T>
34
18
struct UniformGenerator : public GeneratorBase <T> {
35
19
private:
36
- // FIXME: Use KmMatrix
37
- curandState *dev_states_;
38
20
size_t size_;
39
21
// FIXME: Cache random_numbers_ in a safer way.
40
22
KmMatrix<T> random_numbers_;
@@ -43,45 +25,44 @@ struct UniformGenerator : public GeneratorBase<T> {
43
25
void initialize (size_t _size) {
44
26
size_ = _size;
45
27
random_numbers_ = KmMatrix<T> (1 , size_);
46
-
47
- if (dev_states_ != nullptr ) {
48
- safe_cuda (cudaFree (dev_states_));
49
- }
50
- safe_cuda (cudaMalloc ((void **)&dev_states_, size_ * sizeof (curandState)));
51
- kernel::setup_random_states<<<div_roundup(size_, 256 ), 256 >>> (
52
- seed_, dev_states_, size_);
53
28
}
54
29
55
30
public:
56
- UniformGenerator () : dev_states_ ( nullptr ), size_ (0 ) {
31
+ UniformGenerator () : size_ (0 ) {
57
32
std::random_device rd;
58
33
seed_ = rd ();
59
34
}
60
35
61
- UniformGenerator (size_t _size, int _seed) {
36
+ UniformGenerator (size_t _size, int _seed) : seed_(_seed) {
62
37
if (_size == 0 ) {
63
38
h2o4gpu_error (" Zero size for generate is not allowed." );
64
39
}
65
40
initialize (_size);
66
41
}
67
42
68
43
UniformGenerator (int _seed) :
69
- seed_ (_seed), dev_states_( nullptr ), size_ (0 ) {}
44
+ seed_ (_seed), size_ (0 ) {}
70
45
71
- ~UniformGenerator () {
72
- if (dev_states_ != nullptr ) {
73
- safe_cuda (cudaFree (dev_states_));
74
- }
75
- }
46
+ ~UniformGenerator () {}
76
47
77
48
UniformGenerator (const UniformGenerator<T>& _rhs) = delete ;
78
49
UniformGenerator (UniformGenerator<T>&& _rhs) = delete ;
79
50
void operator =(const UniformGenerator<T>& _rhs) = delete ;
80
51
void operator =(UniformGenerator<T>&& _rhs) = delete ;
81
52
82
53
KmMatrix<T> generate () override {
83
- kernel::generate_uniform_kernel<<<div_roundup(size_, 256 ), 256 >>>
84
- (random_numbers_.k_param ().ptr , dev_states_, size_);
54
+ thrust::device_ptr<T> rn_ptr (random_numbers_.dev_ptr ());
55
+ thrust::transform (
56
+ thrust::make_counting_iterator ((size_t )0 ),
57
+ thrust::make_counting_iterator (size_),
58
+ rn_ptr,
59
+ [=] __device__ (int idx) {
60
+ thrust::default_random_engine rng (seed_);
61
+ thrust::uniform_real_distribution<T> dist;
62
+ rng.discard (idx);
63
+ return dist (rng);
64
+ });
65
+
85
66
return random_numbers_;
86
67
}
87
68
@@ -95,6 +76,6 @@ struct UniformGenerator : public GeneratorBase<T> {
95
76
return generate ();
96
77
}
97
78
};
98
-
79
+
99
80
} // namespace h2o4gpu
100
81
} // namespace Matrix
0 commit comments