|
| 1 | +//------------------------------------------------------------------------------ |
| 2 | +// CUDA/GB_cuda_kernel.h: definitions for all GraphBLAS CUDA kernels |
| 3 | +//------------------------------------------------------------------------------ |
| 4 | + |
| 5 | +// SPDX-License-Identifier: Apache-2.0 |
| 6 | + |
| 7 | +//------------------------------------------------------------------------------ |
| 8 | + |
| 9 | +// This file is #include'd into all CUDA kernels for GraphBLAS. It provides |
| 10 | +// a |
| 11 | + |
| 12 | +#pragma once |
| 13 | +#undef ASSERT |
| 14 | +#define ASSERT(x) |
| 15 | + |
| 16 | +//------------------------------------------------------------------------------ |
| 17 | +// TODO: this will be in the jit code: |
| 18 | +#define chunksize 128 |
| 19 | + |
| 20 | +//------------------------------------------------------------------------------ |
| 21 | +// GETA, GETB: get entries from input matrices A and B |
| 22 | +//------------------------------------------------------------------------------ |
| 23 | + |
| 24 | +#if GB_FLIPXY |
| 25 | + |
| 26 | + #if GB_A_IS_PATTERN |
| 27 | + #define GB_DECLAREA(aval) |
| 28 | + #define GB_SHAREDA(aval) |
| 29 | + #define GB_GETA( aval, ax, p) |
| 30 | + #else |
| 31 | + #define GB_DECLAREA(aval) T_Y aval |
| 32 | + #define GB_SHAREDA(aval) __shared__ T_Y aval |
| 33 | + #if GB_A_ISO |
| 34 | + #define GB_GETA( aval, ax, p) aval = (T_Y) (ax [0]) ; |
| 35 | + #else |
| 36 | + #define GB_GETA( aval, ax, p) aval = (T_Y) (ax [p]) ; |
| 37 | + #endif |
| 38 | + #endif |
| 39 | + |
| 40 | + #if GB_B_IS_PATTERN |
| 41 | + #define GB_DECLAREB(bval) |
| 42 | + #define GB_SHAREDB(bval) |
| 43 | + #define GB_GETB( bval, bx, p) |
| 44 | + #else |
| 45 | + #define GB_DECLAREB(bval) T_X bval |
| 46 | + #define GB_SHAREDB(bval) __shared__ T_X bval |
| 47 | + #if GB_B_ISO |
| 48 | + #define GB_GETB( bval, bx, p) bval = (T_X) (bx [0]) ; |
| 49 | + #else |
| 50 | + #define GB_GETB( bval, bx, p) bval = (T_X) (bx [p]) ; |
| 51 | + #endif |
| 52 | + #endif |
| 53 | + |
| 54 | +#else |
| 55 | + |
| 56 | + #if GB_A_IS_PATTERN |
| 57 | + #define GB_DECLAREA(aval) |
| 58 | + #define GB_SHAREDA(aval) |
| 59 | + #define GB_GETA( aval, ax, p) |
| 60 | + #else |
| 61 | + #define GB_DECLAREA(aval) T_X aval |
| 62 | + #define GB_SHAREDA(aval) __shared__ T_X aval |
| 63 | + #if GB_A_ISO |
| 64 | + #define GB_GETA( aval, ax, p) aval = (T_X) (ax [0]) ; |
| 65 | + #else |
| 66 | + #define GB_GETA( aval, ax, p) aval = (T_X) (ax [p]) ; |
| 67 | + #endif |
| 68 | + #endif |
| 69 | + |
| 70 | + #if GB_B_IS_PATTERN |
| 71 | + #define GB_DECLAREB(bval) |
| 72 | + #define GB_SHAREDB(bval) |
| 73 | + #define GB_GETB( bval, bx, p) |
| 74 | + #else |
| 75 | + #define GB_DECLAREB(bval) T_Y bval |
| 76 | + #define GB_SHAREDB(bval) __shared__ T_Y bval |
| 77 | + #if GB_B_ISO |
| 78 | + #define GB_GETB( bval, bx, p) bval = (T_Y) (bx [0]) ; |
| 79 | + #else |
| 80 | + #define GB_GETB( bval, bx, p) bval = (T_Y) (bx [p]) ; |
| 81 | + #endif |
| 82 | + #endif |
| 83 | + |
| 84 | +#endif |
| 85 | + |
| 86 | +//------------------------------------------------------------------------------ |
| 87 | +// operators |
| 88 | +//------------------------------------------------------------------------------ |
| 89 | + |
| 90 | +#if GB_C_ISO |
| 91 | + |
| 92 | + #define GB_ADD_F( f , s) |
| 93 | + #define GB_C_MULT( c, a, b) |
| 94 | + #define GB_MULTADD( c, a ,b ) |
| 95 | + #define GB_DOT_TERMINAL ( c ) |
| 96 | + #define GB_DOT_MERGE \ |
| 97 | + { \ |
| 98 | + cij_exists = true ; \ |
| 99 | + } |
| 100 | + |
| 101 | +#else |
| 102 | + |
| 103 | + #define GB_ADD_F( f , s) f = GB_ADD ( f, s ) |
| 104 | + #define GB_C_MULT( c, a, b) c = GB_MULT( (a), (b) ) |
| 105 | + #define GB_MULTADD( c, a ,b ) GB_ADD_F( (c), GB_MULT( (a),(b) ) ) |
| 106 | + #define GB_DOT_TERMINAL ( c ) |
| 107 | + //# if ( c == TERMINAL_VALUE) break; |
| 108 | + // cij += A(k,i) * B(k,j), for merge operation |
| 109 | + |
| 110 | + #define GB_DOT_MERGE \ |
| 111 | + { \ |
| 112 | + GB_GETA ( aki, Ax, pA) ; /* aki = A(k,i) */ \ |
| 113 | + GB_GETB ( bkj, Bx, pB) ; /* bkj = B(k,j) */ \ |
| 114 | + if (cij_exists) \ |
| 115 | + { \ |
| 116 | + GB_MULTADD (cij, aki, bkj) ; /* cij += aki * bkj */ \ |
| 117 | + } \ |
| 118 | + else \ |
| 119 | + { \ |
| 120 | + /* cij = A(k,i) * B(k,j), and add to the pattern */ \ |
| 121 | + cij_exists = true ; \ |
| 122 | + GB_C_MULT (cij, aki, bkj) ; /* cij = aki * bkj */ \ |
| 123 | + } \ |
| 124 | + } |
| 125 | + |
| 126 | +#endif |
| 127 | + |
| 128 | +//------------------------------------------------------------------------------ |
| 129 | +// subset of GraphBLAS.h |
| 130 | +//------------------------------------------------------------------------------ |
| 131 | + |
| 132 | +#ifndef GRAPHBLAS_H |
| 133 | +#define GRAPHBLAS_H |
| 134 | + |
| 135 | +#undef restrict |
| 136 | +#undef GB_restrict |
| 137 | +#if defined ( GB_CUDA_KERNEL ) || defined ( __NVCC__ ) |
| 138 | + #define GB_restrict __restrict__ |
| 139 | +#else |
| 140 | + #define GB_restrict |
| 141 | +#endif |
| 142 | +#define restrict GB_restrict |
| 143 | + |
| 144 | +#include <stdint.h> |
| 145 | +#include <stdbool.h> |
| 146 | +#include <stddef.h> |
| 147 | +#include <string.h> |
| 148 | + |
| 149 | +// GB_STR: convert the content of x into a string "x" |
| 150 | +#define GB_XSTR(x) GB_STR(x) |
| 151 | +#define GB_STR(x) #x |
| 152 | + |
| 153 | +#undef GB_PUBLIC |
| 154 | +#define GB_PUBLIC extern |
| 155 | +#undef GxB_MAX_NAME_LEN |
| 156 | +#define GxB_MAX_NAME_LEN 128 |
| 157 | + |
| 158 | +typedef uint64_t GrB_Index ; |
| 159 | +typedef struct GB_Descriptor_opaque *GrB_Descriptor ; |
| 160 | +typedef struct GB_Type_opaque *GrB_Type ; |
| 161 | +typedef struct GB_UnaryOp_opaque *GrB_UnaryOp ; |
| 162 | +typedef struct GB_BinaryOp_opaque *GrB_BinaryOp ; |
| 163 | +typedef struct GB_SelectOp_opaque *GxB_SelectOp ; |
| 164 | +typedef struct GB_IndexUnaryOp_opaque *GrB_IndexUnaryOp ; |
| 165 | +typedef struct GB_Monoid_opaque *GrB_Monoid ; |
| 166 | +typedef struct GB_Semiring_opaque *GrB_Semiring ; |
| 167 | +typedef struct GB_Scalar_opaque *GrB_Scalar ; |
| 168 | +typedef struct GB_Vector_opaque *GrB_Vector ; |
| 169 | +typedef struct GB_Matrix_opaque *GrB_Matrix ; |
| 170 | + |
| 171 | +#define GxB_HYPERSPARSE 1 // store matrix in hypersparse form |
| 172 | +#define GxB_SPARSE 2 // store matrix as sparse form (compressed vector) |
| 173 | +#define GxB_BITMAP 4 // store matrix as a bitmap |
| 174 | +#define GxB_FULL 8 // store matrix as full; all entries must be present |
| 175 | + |
| 176 | +typedef void (*GxB_unary_function) (void *, const void *) ; |
| 177 | +typedef void (*GxB_binary_function) (void *, const void *, const void *) ; |
| 178 | + |
| 179 | +typedef bool (*GxB_select_function) // return true if A(i,j) is kept |
| 180 | +( |
| 181 | + GrB_Index i, // row index of A(i,j) |
| 182 | + GrB_Index j, // column index of A(i,j) |
| 183 | + const void *x, // value of A(i,j) |
| 184 | + const void *thunk // optional input for select function |
| 185 | +) ; |
| 186 | + |
| 187 | +typedef void (*GxB_index_unary_function) |
| 188 | +( |
| 189 | + void *z, // output value z, of type ztype |
| 190 | + const void *x, // input value x of type xtype; value of v(i) or A(i,j) |
| 191 | + GrB_Index i, // row index of A(i,j) |
| 192 | + GrB_Index j, // column index of A(i,j), or zero for v(i) |
| 193 | + const void *y // input scalar y |
| 194 | +) ; |
| 195 | + |
| 196 | +typedef enum |
| 197 | +{ |
| 198 | + // for all GrB_Descriptor fields: |
| 199 | + GxB_DEFAULT = 0, // default behavior of the method |
| 200 | + |
| 201 | + // for GrB_OUTP only: |
| 202 | + GrB_REPLACE = 1, // clear the output before assigning new values to it |
| 203 | + |
| 204 | + // for GrB_MASK only: |
| 205 | + GrB_COMP = 2, // use the structural complement of the input |
| 206 | + GrB_SCMP = 2, // same as GrB_COMP (historical; use GrB_COMP instead) |
| 207 | + GrB_STRUCTURE = 4, // use the only pattern of the mask, not its values |
| 208 | + |
| 209 | + // for GrB_INP0 and GrB_INP1 only: |
| 210 | + GrB_TRAN = 3, // use the transpose of the input |
| 211 | + |
| 212 | + // for GxB_GPU_CONTROL only (DRAFT: in progress, do not use) |
| 213 | + GxB_GPU_ALWAYS = 2001, |
| 214 | + GxB_GPU_NEVER = 2002, |
| 215 | + |
| 216 | + // for GxB_AxB_METHOD only: |
| 217 | + GxB_AxB_GUSTAVSON = 1001, // gather-scatter saxpy method |
| 218 | + GxB_AxB_DOT = 1003, // dot product |
| 219 | + GxB_AxB_HASH = 1004, // hash-based saxpy method |
| 220 | + GxB_AxB_SAXPY = 1005 // saxpy method (any kind) |
| 221 | +} |
| 222 | +GrB_Desc_Value ; |
| 223 | + |
| 224 | +#include "GB_opaque.h" |
| 225 | +#endif |
| 226 | + |
| 227 | +//------------------------------------------------------------------------------ |
| 228 | +// subset of GB.h |
| 229 | +//------------------------------------------------------------------------------ |
| 230 | + |
| 231 | +#include "GB_imin.h" |
| 232 | +#include "GB_zombie.h" |
| 233 | +#include "GB_nnz.h" |
| 234 | +#include "GB_partition.h" |
| 235 | +#include "GB_binary_search.h" |
| 236 | +#include "GB_search_for_vector_template.c" |
| 237 | + |
0 commit comments