@@ -1209,18 +1209,18 @@ inline void Cusparse::bsrmv( cusparseHandle_t handle, cusparseDirection_t dir, c
12091209 double *y,
12101210 const cudaStream_t& stream)
12111211{
1212- // Run cuSparse on selected stream
1213- cusparseSetStream (handle, stream);
1214-
12151212 #ifndef DISABLE_MIXED_PRECISION
1213+ // Run cuSparse on selected stream
1214+ cusparseSetStream (handle, stream);
1215+
12161216 const double *d_bsrVal = reinterpret_cast <const double *>(const_cast <float *>(bsrVal)); // this works due to private API call in the matrix initialization which sets cusparse matrix description in the half precision mode
12171217 cusparseCheckError (cusparseDbsrxmv (handle, dir, trans, mb, mb, nb, nnzb, alpha, descr, d_bsrVal, bsrMaskPtr, bsrRowPtr, bsrRowPtr + 1 , bsrColInd, blockDim , x, beta, y));
1218+
1219+ // Reset cuSparse to default stream
1220+ cusparseSetStream (handle, 0 );
12181221 #else
12191222 FatalError (" Mixed precision modes not currently supported for CUDA 10.1 or later." , AMGX_ERR_NOT_IMPLEMENTED);
12201223 #endif
1221-
1222- // Reset cuSparse to default stream
1223- cusparseSetStream (handle, 0 );
12241224}
12251225
12261226// Custom implementation of matrix-vector product to replace the original bsrxmv,
@@ -1491,18 +1491,18 @@ inline void Cusparse::bsrmv( cusparseHandle_t handle, cusparseDirection_t dir, c
14911491 cuDoubleComplex *y,
14921492 const cudaStream_t& stream)
14931493{
1494- // Run cuSparse on selected stream
1495- cusparseSetStream (handle, stream);
1496-
14971494 #ifndef DISABLE_MIXED_PRECISION
1495+ // Run cuSparse on selected stream
1496+ cusparseSetStream (handle, stream);
1497+
14981498 const cuDoubleComplex *d_bsrVal = reinterpret_cast <cuDoubleComplex *>(const_cast <cuComplex *>(bsrVal));
14991499 cusparseCheckError (cusparseZbsrxmv (handle, dir, trans, mb, mb, nb, nnzb, alpha, descr, d_bsrVal, bsrMaskPtr, bsrRowPtr, bsrRowPtr + 1 , bsrColInd, blockDim , x, beta, y));
1500+
1501+ // Reset cuSparse to default stream
1502+ cusparseSetStream (handle, 0 );
15001503 #else
15011504 FatalError (" Mixed precision modes not currently supported for CUDA 10.1 or later." , AMGX_ERR_NOT_IMPLEMENTED);
15021505 #endif
1503-
1504- // Reset cuSparse to default stream
1505- cusparseSetStream (handle, 0 );
15061506}
15071507
15081508
0 commit comments