|
11 | 11 | #include "owl_ndarray_contract.h" |
12 | 12 | #include <stdio.h> // DEBUG |
13 | 13 |
|
| 14 | +#ifdef __cplusplus |
| 15 | +extern "C" |
| 16 | +{ |
| 17 | +#endif |
14 | 18 |
|
15 | | -/** Core function declaration **/ |
| 19 | + /** Core function declaration **/ |
16 | 20 |
|
| 21 | + extern int64_t c_ndarray_numel(struct caml_ba_array *X); |
17 | 22 |
|
18 | | -extern int64_t c_ndarray_numel (struct caml_ba_array *X); |
| 23 | + extern int64_t c_ndarray_stride_dim(struct caml_ba_array *X, int d); |
19 | 24 |
|
20 | | -extern int64_t c_ndarray_stride_dim (struct caml_ba_array *X, int d); |
| 25 | + extern int64_t c_ndarray_slice_dim(struct caml_ba_array *X, int d); |
21 | 26 |
|
22 | | -extern int64_t c_ndarray_slice_dim (struct caml_ba_array *X, int d); |
| 27 | + extern void c_float32_ndarray_transpose(struct slice_pair *sp); |
23 | 28 |
|
24 | | -extern void c_float32_ndarray_transpose (struct slice_pair *sp); |
| 29 | + extern void c_float64_ndarray_transpose(struct slice_pair *sp); |
25 | 30 |
|
26 | | -extern void c_float64_ndarray_transpose (struct slice_pair *sp); |
| 31 | + extern void c_complex32_ndarray_transpose(struct slice_pair *sp); |
27 | 32 |
|
28 | | -extern void c_complex32_ndarray_transpose (struct slice_pair *sp); |
| 33 | + extern void c_complex64_ndarray_transpose(struct slice_pair *sp); |
29 | 34 |
|
30 | | -extern void c_complex64_ndarray_transpose (struct slice_pair *sp); |
| 35 | + extern void c_float32_ndarray_contract_one(struct contract_pair *sp); |
31 | 36 |
|
32 | | -extern void c_float32_ndarray_contract_one (struct contract_pair *sp); |
| 37 | + extern void c_float64_ndarray_contract_one(struct contract_pair *sp); |
33 | 38 |
|
34 | | -extern void c_float64_ndarray_contract_one (struct contract_pair *sp); |
| 39 | + extern void c_complex32_ndarray_contract_one(struct contract_pair *sp); |
35 | 40 |
|
36 | | -extern void c_complex32_ndarray_contract_one (struct contract_pair *sp); |
| 41 | + extern void c_complex64_ndarray_contract_one(struct contract_pair *sp); |
37 | 42 |
|
38 | | -extern void c_complex64_ndarray_contract_one (struct contract_pair *sp); |
| 43 | + extern void c_float32_ndarray_contract_two(struct contract_pair *sp); |
39 | 44 |
|
40 | | -extern void c_float32_ndarray_contract_two (struct contract_pair *sp); |
| 45 | + extern void c_float64_ndarray_contract_two(struct contract_pair *sp); |
41 | 46 |
|
42 | | -extern void c_float64_ndarray_contract_two (struct contract_pair *sp); |
| 47 | + extern void c_complex32_ndarray_contract_two(struct contract_pair *sp); |
43 | 48 |
|
44 | | -extern void c_complex32_ndarray_contract_two (struct contract_pair *sp); |
| 49 | + extern void c_complex64_ndarray_contract_two(struct contract_pair *sp); |
45 | 50 |
|
46 | | -extern void c_complex64_ndarray_contract_two (struct contract_pair *sp); |
| 51 | + extern void c_float32_matrix_swap_rows(float *x, int m, int n, int i, int j); |
47 | 52 |
|
48 | | -extern void c_float32_matrix_swap_rows (float *x, int m, int n, int i, int j); |
| 53 | + extern void c_float64_matrix_swap_rows(double *x, int m, int n, int i, int j); |
49 | 54 |
|
50 | | -extern void c_float64_matrix_swap_rows (double *x, int m, int n, int i, int j); |
| 55 | + extern void c_complex32_matrix_swap_rows(_Complex float *x, int m, int n, int i, int j); |
51 | 56 |
|
52 | | -extern void c_complex32_matrix_swap_rows (_Complex float *x, int m, int n, int i, int j); |
| 57 | + extern void c_complex64_matrix_swap_rows(_Complex double *x, int m, int n, int i, int j); |
53 | 58 |
|
54 | | -extern void c_complex64_matrix_swap_rows (_Complex double *x, int m, int n, int i, int j); |
| 59 | + extern void c_float32_matrix_swap_cols(float *x, int m, int n, int i, int j); |
55 | 60 |
|
56 | | -extern void c_float32_matrix_swap_cols (float *x, int m, int n, int i, int j); |
| 61 | + extern void c_float64_matrix_swap_cols(double *x, int m, int n, int i, int j); |
57 | 62 |
|
58 | | -extern void c_float64_matrix_swap_cols (double *x, int m, int n, int i, int j); |
| 63 | + extern void c_complex32_matrix_swap_cols(_Complex float *x, int m, int n, int i, int j); |
59 | 64 |
|
60 | | -extern void c_complex32_matrix_swap_cols (_Complex float *x, int m, int n, int i, int j); |
| 65 | + extern void c_complex64_matrix_swap_cols(_Complex double *x, int m, int n, int i, int j); |
61 | 66 |
|
62 | | -extern void c_complex64_matrix_swap_cols (_Complex double *x, int m, int n, int i, int j); |
| 67 | + extern void c_float32_matrix_transpose(float *x, float *y, int m, int n); |
63 | 68 |
|
64 | | -extern void c_float32_matrix_transpose (float *x, float *y, int m, int n); |
| 69 | + extern void c_float64_matrix_transpose(double *x, double *y, int m, int n); |
65 | 70 |
|
66 | | -extern void c_float64_matrix_transpose (double *x, double *y, int m, int n); |
| 71 | + extern void c_complex32_matrix_transpose(_Complex float *x, _Complex float *y, int m, int n); |
67 | 72 |
|
68 | | -extern void c_complex32_matrix_transpose (_Complex float *x, _Complex float *y, int m, int n); |
| 73 | + extern void c_complex64_matrix_transpose(_Complex double *x, _Complex double *y, int m, int n); |
69 | 74 |
|
70 | | -extern void c_complex64_matrix_transpose (_Complex double *x, _Complex double *y, int m, int n); |
| 75 | + extern void c_ndarray_stride(struct caml_ba_array *X, int64_t *stride); |
71 | 76 |
|
72 | | -extern void c_ndarray_stride (struct caml_ba_array *X, int64_t *stride); |
| 77 | + extern void c_ndarray_slice(struct caml_ba_array *X, int64_t *slice); |
73 | 78 |
|
74 | | -extern void c_ndarray_slice (struct caml_ba_array *X, int64_t *slice); |
| 79 | + extern void c_slicing_stride(struct caml_ba_array *X, int64_t *slice, int64_t *stride); |
75 | 80 |
|
76 | | -extern void c_slicing_stride (struct caml_ba_array *X, int64_t *slice, int64_t *stride); |
| 81 | + extern void c_slicing_offset(struct caml_ba_array *X, int64_t *slice, int64_t *offset); |
77 | 82 |
|
78 | | -extern void c_slicing_offset (struct caml_ba_array *X, int64_t *slice, int64_t *offset); |
| 83 | + extern void c_float32_ndarray_get_slice(struct slice_pair *sp); |
79 | 84 |
|
80 | | -extern void c_float32_ndarray_get_slice (struct slice_pair *sp); |
| 85 | + extern void c_float64_ndarray_get_slice(struct slice_pair *sp); |
81 | 86 |
|
82 | | -extern void c_float64_ndarray_get_slice (struct slice_pair *sp); |
| 87 | + extern void c_complex32_ndarray_get_slice(struct slice_pair *sp); |
83 | 88 |
|
84 | | -extern void c_complex32_ndarray_get_slice (struct slice_pair *sp); |
| 89 | + extern void c_complex64_ndarray_get_slice(struct slice_pair *sp); |
85 | 90 |
|
86 | | -extern void c_complex64_ndarray_get_slice (struct slice_pair *sp); |
| 91 | + extern void c_float32_ndarray_set_slice(struct slice_pair *sp); |
87 | 92 |
|
88 | | -extern void c_float32_ndarray_set_slice (struct slice_pair *sp); |
| 93 | + extern void c_float64_ndarray_set_slice(struct slice_pair *sp); |
89 | 94 |
|
90 | | -extern void c_float64_ndarray_set_slice (struct slice_pair *sp); |
| 95 | + extern void c_complex32_ndarray_set_slice(struct slice_pair *sp); |
91 | 96 |
|
92 | | -extern void c_complex32_ndarray_set_slice (struct slice_pair *sp); |
| 97 | + extern void c_complex64_ndarray_set_slice(struct slice_pair *sp); |
93 | 98 |
|
94 | | -extern void c_complex64_ndarray_set_slice (struct slice_pair *sp); |
| 99 | + extern void c_float32_ndarray_get_fancy(struct fancy_pair *sp); |
95 | 100 |
|
96 | | -extern void c_float32_ndarray_get_fancy (struct fancy_pair *sp); |
| 101 | + extern void c_float64_ndarray_get_fancy(struct fancy_pair *sp); |
97 | 102 |
|
98 | | -extern void c_float64_ndarray_get_fancy (struct fancy_pair *sp); |
| 103 | + extern void c_complex32_ndarray_get_fancy(struct fancy_pair *sp); |
99 | 104 |
|
100 | | -extern void c_complex32_ndarray_get_fancy (struct fancy_pair *sp); |
| 105 | + extern void c_complex64_ndarray_get_fancy(struct fancy_pair *sp); |
101 | 106 |
|
102 | | -extern void c_complex64_ndarray_get_fancy (struct fancy_pair *sp); |
| 107 | + extern void c_float32_ndarray_set_fancy(struct fancy_pair *sp); |
103 | 108 |
|
104 | | -extern void c_float32_ndarray_set_fancy (struct fancy_pair *sp); |
| 109 | + extern void c_float64_ndarray_set_fancy(struct fancy_pair *sp); |
105 | 110 |
|
106 | | -extern void c_float64_ndarray_set_fancy (struct fancy_pair *sp); |
| 111 | + extern void c_complex32_ndarray_set_fancy(struct fancy_pair *sp); |
107 | 112 |
|
108 | | -extern void c_complex32_ndarray_set_fancy (struct fancy_pair *sp); |
| 113 | + extern void c_complex64_ndarray_set_fancy(struct fancy_pair *sp); |
109 | 114 |
|
110 | | -extern void c_complex64_ndarray_set_fancy (struct fancy_pair *sp); |
| 115 | + // compare two numbers (real & complex & int) |
111 | 116 |
|
| 117 | +#define CEQF(X, Y) ((crealf(X) == crealf(Y)) && (cimagf(X) == cimagf(Y))) |
112 | 118 |
|
113 | | -// compare two numbers (real & complex & int) |
| 119 | +#define CEQ(X, Y) ((creal(X) == creal(Y)) && (cimag(X) == cimag(Y))) |
114 | 120 |
|
115 | | -#define CEQF(X,Y) ((crealf(X) == crealf(Y)) && (cimagf(X) == cimagf(Y))) |
| 121 | +#define CNEQF(X, Y) ((crealf(X) != crealf(Y)) || (cimagf(X) != cimagf(Y))) |
116 | 122 |
|
117 | | -#define CEQ(X,Y) ((creal(X) == creal(Y)) && (cimag(X) == cimag(Y))) |
| 123 | +#define CNEQ(X, Y) ((creal(X) != creal(Y)) || (cimag(X) != cimag(Y))) |
118 | 124 |
|
119 | | -#define CNEQF(X,Y) ((crealf(X) != crealf(Y)) || (cimagf(X) != cimagf(Y))) |
| 125 | +#define CLTF(X, Y) ((cabsf(X) < cabsf(Y)) || ((cabsf(X) == cabsf(Y)) && (cargf(X) < cargf(Y)))) |
120 | 126 |
|
121 | | -#define CNEQ(X,Y) ((creal(X) != creal(Y)) || (cimag(X) != cimag(Y))) |
| 127 | +#define CGTF(X, Y) ((cabsf(X) > cabsf(Y)) || ((cabsf(X) == cabsf(Y)) && (cargf(X) > cargf(Y)))) |
122 | 128 |
|
123 | | -#define CLTF(X,Y) ((cabsf(X) < cabsf(Y)) || ((cabsf(X) == cabsf(Y)) && (cargf(X) < cargf(Y)))) |
| 129 | +#define CLEF(X, Y) !CGTF(X, Y) |
124 | 130 |
|
125 | | -#define CGTF(X,Y) ((cabsf(X) > cabsf(Y)) || ((cabsf(X) == cabsf(Y)) && (cargf(X) > cargf(Y)))) |
| 131 | +#define CGEF(X, Y) !CLTF(X, Y) |
126 | 132 |
|
127 | | -#define CLEF(X,Y) !CGTF(X,Y) |
| 133 | +#define CLT(X, Y) ((cabs(X) < cabs(Y)) || ((cabs(X) == cabs(Y)) && (carg(X) < carg(Y)))) |
128 | 134 |
|
129 | | -#define CGEF(X,Y) !CLTF(X,Y) |
| 135 | +#define CGT(X, Y) ((cabs(X) > cabs(Y)) || ((cabs(X) == cabs(Y)) && (carg(X) > carg(Y)))) |
130 | 136 |
|
131 | | -#define CLT(X,Y) ((cabs(X) < cabs(Y)) || ((cabs(X) == cabs(Y)) && (carg(X) < carg(Y)))) |
| 137 | +#define CLE(X, Y) !CGT(X, Y) |
132 | 138 |
|
133 | | -#define CGT(X,Y) ((cabs(X) > cabs(Y)) || ((cabs(X) == cabs(Y)) && (carg(X) > carg(Y)))) |
| 139 | +#define CGE(X, Y) !CLT(X, Y) |
134 | 140 |
|
135 | | -#define CLE(X,Y) !CGT(X,Y) |
| 141 | + extern int float32_cmp(const void *a, const void *b); |
136 | 142 |
|
137 | | -#define CGE(X,Y) !CLT(X,Y) |
| 143 | + extern int float64_cmp(const void *a, const void *b); |
138 | 144 |
|
139 | | -extern int float32_cmp (const void * a, const void * b); |
| 145 | + extern int complex32_cmp(const void *a, const void *b); |
140 | 146 |
|
141 | | -extern int float64_cmp (const void * a, const void * b); |
| 147 | + extern int complex64_cmp(const void *a, const void *b); |
142 | 148 |
|
143 | | -extern int complex32_cmp (const void * a, const void * b); |
| 149 | + extern int int8_cmp(const void *a, const void *b); |
144 | 150 |
|
145 | | -extern int complex64_cmp (const void * a, const void * b); |
| 151 | + extern int uint8_cmp(const void *a, const void *b); |
146 | 152 |
|
147 | | -extern int int8_cmp (const void * a, const void * b); |
| 153 | + extern int int16_cmp(const void *a, const void *b); |
148 | 154 |
|
149 | | -extern int uint8_cmp (const void * a, const void * b); |
| 155 | + extern int uint16_cmp(const void *a, const void *b); |
150 | 156 |
|
151 | | -extern int int16_cmp (const void * a, const void * b); |
| 157 | + extern int int32_cmp(const void *a, const void *b); |
152 | 158 |
|
153 | | -extern int uint16_cmp (const void * a, const void * b); |
| 159 | + extern int int64_cmp(const void *a, const void *b); |
154 | 160 |
|
155 | | -extern int int32_cmp (const void * a, const void * b); |
| 161 | + extern int float32_cmp_r(const void *a, const void *b, const void *z); |
156 | 162 |
|
157 | | -extern int int64_cmp (const void * a, const void * b); |
| 163 | + extern int float64_cmp_r(const void *a, const void *b, const void *z); |
158 | 164 |
|
159 | | -extern int float32_cmp_r (const void * a, const void * b, const void * z); |
| 165 | + extern int complex32_cmp_r(const void *a, const void *b, const void *z); |
160 | 166 |
|
161 | | -extern int float64_cmp_r (const void * a, const void * b, const void * z); |
| 167 | + extern int complex64_cmp_r(const void *a, const void *b, const void *z); |
162 | 168 |
|
163 | | -extern int complex32_cmp_r (const void * a, const void * b, const void * z); |
| 169 | + extern int int8_cmp_r(const void *a, const void *b, const void *z); |
164 | 170 |
|
165 | | -extern int complex64_cmp_r (const void * a, const void * b, const void * z); |
| 171 | + extern int uint8_cmp_r(const void *a, const void *b, const void *z); |
166 | 172 |
|
167 | | -extern int int8_cmp_r (const void * a, const void * b, const void * z); |
| 173 | + extern int int16_cmp_r(const void *a, const void *b, const void *z); |
168 | 174 |
|
169 | | -extern int uint8_cmp_r (const void * a, const void * b, const void * z); |
| 175 | + extern int uint16_cmp_r(const void *a, const void *b, const void *z); |
170 | 176 |
|
171 | | -extern int int16_cmp_r (const void * a, const void * b, const void * z); |
| 177 | + extern int int32_cmp_r(const void *a, const void *b, const void *z); |
172 | 178 |
|
173 | | -extern int uint16_cmp_r (const void * a, const void * b, const void * z); |
| 179 | + extern int int64_cmp_r(const void *a, const void *b, const void *z); |
174 | 180 |
|
175 | | -extern int int32_cmp_r (const void * a, const void * b, const void * z); |
| 181 | + // acquire CPU cache sizes |
176 | 182 |
|
177 | | -extern int int64_cmp_r (const void * a, const void * b, const void * z); |
| 183 | + extern void query_cache_sizes(int *l1p, int *l2p, int *l3p); |
178 | 184 |
|
| 185 | + // copy two double type numbers, for interfacing to foreign functions |
| 186 | + OWL_INLINE value cp_two_doubles(double d0, double d1) |
| 187 | + { |
| 188 | + value res = caml_alloc_small(2 * Double_wosize, Double_array_tag); |
| 189 | + Store_double_field(res, 0, d0); |
| 190 | + Store_double_field(res, 1, d1); |
| 191 | + return res; |
| 192 | + } |
179 | 193 |
|
180 | | -// acquire CPU cache sizes |
181 | | - |
182 | | -extern void query_cache_sizes(int* l1p, int* l2p, int* l3p); |
183 | | - |
184 | | - |
185 | | -// copy two double type numbers, for interfacing to foreign functions |
186 | | -OWL_INLINE value cp_two_doubles(double d0, double d1) { |
187 | | - value res = caml_alloc_small(2 * Double_wosize, Double_array_tag); |
188 | | - Store_double_field(res, 0, d0); |
189 | | - Store_double_field(res, 1, d1); |
190 | | - return res; |
191 | | -} |
192 | | - |
193 | | -// copy x to y with given offset and stride |
194 | | -OWL_INLINE void owl_float32_copy (int N, float* x, int ofsx, int incx, float* y, int ofsy, int incy) { |
195 | | - for (int i = 0; i < N; i++) { |
196 | | - *(y + ofsy) = *(x + ofsx); |
197 | | - ofsx += incx; |
198 | | - ofsy += incy; |
| 194 | + // copy x to y with given offset and stride |
| 195 | + OWL_INLINE void owl_float32_copy(int N, float *x, int ofsx, int incx, float *y, int ofsy, int incy) |
| 196 | + { |
| 197 | + for (int i = 0; i < N; i++) |
| 198 | + { |
| 199 | + *(y + ofsy) = *(x + ofsx); |
| 200 | + ofsx += incx; |
| 201 | + ofsy += incy; |
| 202 | + } |
199 | 203 | } |
200 | | -} |
201 | 204 |
|
202 | | -// copy x to y with given offset and stride |
203 | | -OWL_INLINE void owl_float64_copy (int N, double* x, int ofsx, int incx, double* y, int ofsy, int incy) { |
204 | | - for (int i = 0; i < N; i++) { |
205 | | - *(y + ofsy) = *(x + ofsx); |
206 | | - ofsx += incx; |
207 | | - ofsy += incy; |
| 205 | + // copy x to y with given offset and stride |
| 206 | + OWL_INLINE void owl_float64_copy(int N, double *x, int ofsx, int incx, double *y, int ofsy, int incy) |
| 207 | + { |
| 208 | + for (int i = 0; i < N; i++) |
| 209 | + { |
| 210 | + *(y + ofsy) = *(x + ofsx); |
| 211 | + ofsx += incx; |
| 212 | + ofsy += incy; |
| 213 | + } |
208 | 214 | } |
209 | | -} |
210 | 215 |
|
211 | | -// copy x to y with given offset and stride |
212 | | -OWL_INLINE void owl_complex32_copy (int N, _Complex float* x, int ofsx, int incx, _Complex float* y, int ofsy, int incy) { |
213 | | - for (int i = 0; i < N; i++) { |
214 | | - *(y + ofsy) = *(x + ofsx); |
215 | | - ofsx += incx; |
216 | | - ofsy += incy; |
| 216 | + // copy x to y with given offset and stride |
| 217 | + OWL_INLINE void owl_complex32_copy(int N, _Complex float *x, int ofsx, int incx, _Complex float *y, int ofsy, int incy) |
| 218 | + { |
| 219 | + for (int i = 0; i < N; i++) |
| 220 | + { |
| 221 | + *(y + ofsy) = *(x + ofsx); |
| 222 | + ofsx += incx; |
| 223 | + ofsy += incy; |
| 224 | + } |
217 | 225 | } |
218 | | -} |
219 | 226 |
|
220 | | -// copy x to y with given offset and stride |
221 | | -OWL_INLINE void owl_complex64_copy (int N, _Complex double* x, int ofsx, int incx, _Complex double* y, int ofsy, int incy) { |
222 | | - for (int i = 0; i < N; i++) { |
223 | | - *(y + ofsy) = *(x + ofsx); |
224 | | - ofsx += incx; |
225 | | - ofsy += incy; |
| 227 | + // copy x to y with given offset and stride |
| 228 | + OWL_INLINE void owl_complex64_copy(int N, _Complex double *x, int ofsx, int incx, _Complex double *y, int ofsy, int incy) |
| 229 | + { |
| 230 | + for (int i = 0; i < N; i++) |
| 231 | + { |
| 232 | + *(y + ofsy) = *(x + ofsx); |
| 233 | + ofsx += incx; |
| 234 | + ofsy += incy; |
| 235 | + } |
226 | 236 | } |
227 | | -} |
228 | 237 |
|
| 238 | +#ifdef __cplusplus |
| 239 | +} |
| 240 | +#endif |
229 | 241 |
|
230 | | -#endif /* OWL_CORE_H */ |
| 242 | +#endif /* OWL_CORE_H */ |
0 commit comments