@@ -15,7 +15,6 @@ namespace copy
1515{
1616
1717// TODO(poulson): Generalize the below implementation
18- // FIXME (trb 03/06/18) -- Need to do the GPU impl
1918template <typename T,Dist U,Dist V,typename >
2019void TransposeDist (DistMatrix<T,U,V,ELEMENT,Device::CPU> const & A,
2120 DistMatrix<T,V,U,ELEMENT,Device::CPU>& B)
@@ -214,6 +213,8 @@ void TransposeDist(DistMatrix<T,U,V,ELEMENT,Device::CPU> const& A,
214213 }
215214}
216215
216+ #ifdef HYDROGEN_HAVE_CUDA
217+
217218// FIXME (trb): This should work just fine, but it might not have
218219// optimal performance for row/column vectors (A.Height() or A.Width()
219220// equal to 1). See CPU impl above for what would have to happen for
@@ -267,6 +268,8 @@ void TransposeDist(DistMatrix<T,U,V,ELEMENT,Device::GPU> const& A,
267268
268269}
269270
271+ #endif // HYDROGEN_HAVE_CUDA
272+
270273template <typename T,Dist U,Dist V,Device D,typename ,typename >
271274void TransposeDist (DistMatrix<T,U,V,ELEMENT,D> const & A,
272275 DistMatrix<T,V,U,ELEMENT,D>& B)
0 commit comments