Skip to content

Commit 783dd24

Browse files
committed
fix a bug where a GPU impl was not properly macro-protected
1 parent f94e16b commit 783dd24

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

include/El/blas_like/level1/Copy/TransposeDist.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ namespace copy
1515
{
1616

1717
// TODO(poulson): Generalize the below implementation
18-
// FIXME (trb 03/06/18) -- Need to do the GPU impl
1918
template<typename T,Dist U,Dist V,typename>
2019
void TransposeDist(DistMatrix<T,U,V,ELEMENT,Device::CPU> const& A,
2120
DistMatrix<T,V,U,ELEMENT,Device::CPU>& B)
@@ -214,6 +213,8 @@ void TransposeDist(DistMatrix<T,U,V,ELEMENT,Device::CPU> const& A,
214213
}
215214
}
216215

216+
#ifdef HYDROGEN_HAVE_CUDA
217+
217218
// FIXME (trb): This should work just fine, but it might not have
218219
// optimal performance for row/column vectors (A.Height() or A.Width()
219220
// equal to 1). See CPU impl above for what would have to happen for
@@ -267,6 +268,8 @@ void TransposeDist(DistMatrix<T,U,V,ELEMENT,Device::GPU> const& A,
267268

268269
}
269270

271+
#endif // HYDROGEN_HAVE_CUDA
272+
270273
template<typename T,Dist U,Dist V,Device D,typename,typename>
271274
void TransposeDist(DistMatrix<T,U,V,ELEMENT,D> const& A,
272275
DistMatrix<T,V,U,ELEMENT,D>& B)

0 commit comments

Comments
 (0)