Skip to content

Commit 934ea03

Browse files
committed
Add support for device vectors through a workaround moving them back to host to execute transfer before sending back to device.
1 parent bb11d08 commit 934ea03

File tree

3 files changed

+726
-0
lines changed

3 files changed

+726
-0
lines changed

include/deal.II/multigrid/mg_transfer_matrix_free.h

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1745,6 +1745,178 @@ MGTransferMatrixFree<dim, Number, MemorySpace>::interpolate_to_mg(
17451745
}
17461746

17471747

1748+
/**
1749+
* Template specialization for device vectors.
1750+
* Currently works by transferring device vectors back to the host and
1751+
* performing the transfer operation on the host. Eventually this should be
1752+
* replaced by all operations occurring on the device.
1753+
*/
1754+
1755+
template <int dim, typename Number>
1756+
class MGTransferMatrixFree<dim, Number, MemorySpace::Default>
1757+
: public MGTransferBase<
1758+
LinearAlgebra::distributed::Vector<Number, MemorySpace::Default>>
1759+
{
1760+
public:
1761+
using VectorType =
1762+
LinearAlgebra::distributed::Vector<Number, MemorySpace::Default>;
1763+
using VectorTypeHost =
1764+
LinearAlgebra::distributed::Vector<Number, dealii::MemorySpace::Host>;
1765+
1766+
MGTransferMatrixFree()
1767+
: transfer()
1768+
{}
1769+
1770+
MGTransferMatrixFree(const MGConstrainedDoFs &mg_constrained_dofs)
1771+
: transfer(mg_constrained_dofs)
1772+
{}
1773+
1774+
MGTransferMatrixFree(
1775+
const MGLevelObject<MGTwoLevelTransfer<dim, VectorTypeHost>> &mg_transfers,
1776+
const std::function<void(const unsigned int, VectorTypeHost &)>
1777+
&initialize_dof_vector)
1778+
: transfer(mg_transfers, initialize_dof_vector)
1779+
{}
1780+
1781+
void
1782+
build(const std::vector<std::shared_ptr<const Utilities::MPI::Partitioner>>
1783+
&external_partitioners = {})
1784+
{
1785+
transfer.build(external_partitioners);
1786+
}
1787+
1788+
void
1789+
build(const std::function<void(const unsigned int, VectorType &)>
1790+
&initialize_dof_vector)
1791+
{
1792+
transfer.build(initialize_dof_vector);
1793+
}
1794+
1795+
void
1796+
build(const DoFHandler<dim> &dof_handler,
1797+
const std::vector<std::shared_ptr<const Utilities::MPI::Partitioner>>
1798+
&external_partitioners = {})
1799+
{
1800+
transfer.build(dof_handler, external_partitioners);
1801+
}
1802+
1803+
void
1804+
build(const DoFHandler<dim> &dof_handler,
1805+
const std::function<void(const unsigned int, VectorType &)>
1806+
&initialize_dof_vector)
1807+
{
1808+
transfer.build(dof_handler, initialize_dof_vector);
1809+
}
1810+
1811+
1812+
template <typename Number2>
1813+
void
1814+
copy_to_mg(
1815+
const DoFHandler<dim> &dof_handler,
1816+
MGLevelObject<VectorType> &dst,
1817+
const LinearAlgebra::distributed::Vector<Number2, MemorySpace::Default>
1818+
&src) const
1819+
{
1820+
MGLevelObject<VectorTypeHost> dst_host(dst.min_level(), dst.max_level());
1821+
LinearAlgebra::distributed::Vector<Number2, dealii::MemorySpace::Host>
1822+
src_host;
1823+
1824+
copy_to_host(src_host, src);
1825+
1826+
transfer.copy_to_mg(dof_handler, dst_host, src_host);
1827+
1828+
for (unsigned int l = dst.min_level(); l <= dst.max_level(); ++l)
1829+
copy_from_host(dst[l], dst_host[l]);
1830+
}
1831+
1832+
template <typename Number2>
1833+
void
1834+
copy_from_mg(
1835+
const DoFHandler<dim> &dof_handler,
1836+
LinearAlgebra::distributed::Vector<Number2, MemorySpace::Default> &dst,
1837+
const MGLevelObject<VectorType> &src) const
1838+
{
1839+
LinearAlgebra::distributed::Vector<Number2, dealii::MemorySpace::Host>
1840+
dst_host;
1841+
MGLevelObject<VectorTypeHost> src_host(src.min_level(), src.max_level());
1842+
1843+
dst_host.reinit(dst.get_partitioner());
1844+
for (unsigned int l = src.min_level(); l <= src.max_level(); ++l)
1845+
copy_to_host(src_host[l], src[l]);
1846+
1847+
transfer.copy_from_mg(dof_handler, dst_host, src_host);
1848+
1849+
copy_from_host(dst, dst_host);
1850+
}
1851+
1852+
void
1853+
prolongate(const unsigned int to_level,
1854+
VectorType &dst,
1855+
const VectorType &src) const override
1856+
{
1857+
VectorTypeHost dst_host;
1858+
VectorTypeHost src_host;
1859+
1860+
dst_host.reinit(dst.get_partitioner());
1861+
copy_to_host(src_host, src);
1862+
1863+
transfer.prolongate(to_level, dst_host, src_host);
1864+
1865+
copy_from_host(dst, dst_host);
1866+
}
1867+
1868+
void
1869+
restrict_and_add(const unsigned int from_level,
1870+
VectorType &dst,
1871+
const VectorType &src) const override
1872+
{
1873+
VectorTypeHost dst_host;
1874+
VectorTypeHost src_host;
1875+
1876+
copy_to_host(dst_host, dst);
1877+
copy_to_host(src_host, src);
1878+
1879+
transfer.restrict_and_add(from_level, dst_host, src_host);
1880+
1881+
copy_from_host(dst, dst_host);
1882+
}
1883+
1884+
private:
1885+
MGTransferMatrixFree<dim, Number, dealii::MemorySpace::Host> transfer;
1886+
1887+
template <typename Number2>
1888+
void
1889+
copy_to_host(
1890+
LinearAlgebra::distributed::Vector<Number2, dealii::MemorySpace::Host> &dst,
1891+
const LinearAlgebra::distributed::Vector<Number2, MemorySpace::Default>
1892+
&src) const
1893+
{
1894+
LinearAlgebra::ReadWriteVector<Number2> rw_vector(
1895+
src.get_partitioner()->locally_owned_range());
1896+
rw_vector.import_elements(src, VectorOperation::insert);
1897+
1898+
dst.reinit(src.get_partitioner());
1899+
dst.import_elements(rw_vector, VectorOperation::insert);
1900+
}
1901+
1902+
template <typename Number2>
1903+
void
1904+
copy_from_host(
1905+
LinearAlgebra::distributed::Vector<Number2, MemorySpace::Default> &dst,
1906+
const LinearAlgebra::distributed::Vector<Number2, dealii::MemorySpace::Host>
1907+
&src) const
1908+
{
1909+
LinearAlgebra::ReadWriteVector<Number2> rw_vector(
1910+
src.get_partitioner()->locally_owned_range());
1911+
rw_vector.import_elements(src, VectorOperation::insert);
1912+
1913+
if (dst.size() == 0)
1914+
dst.reinit(src.get_partitioner());
1915+
dst.import_elements(rw_vector, VectorOperation::insert);
1916+
}
1917+
};
1918+
1919+
17481920

17491921
template <int dim, typename Number, typename TransferType>
17501922
template <typename BlockVectorType2>
Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
// ------------------------------------------------------------------------
2+
//
3+
// SPDX-License-Identifier: LGPL-2.1-or-later
4+
// Copyright (C) 2016 - 2023 by the deal.II authors
5+
//
6+
// This file is part of the deal.II library.
7+
//
8+
// Part of the source code is dual licensed under Apache-2.0 WITH
9+
// LLVM-exception OR LGPL-2.1-or-later. Detailed license information
10+
// governing the source code and code contributions can be found in
11+
// LICENSE.md and CONTRIBUTING.md at the top level directory of deal.II.
12+
//
13+
// ------------------------------------------------------------------------
14+
15+
16+
// Check MGTransferMatrixFree by comparison with MGTransferPrebuilt on a
17+
// series of meshes with uniform meshes for FE_Q
18+
19+
#include <deal.II/distributed/tria.h>
20+
21+
#include <deal.II/fe/fe_q.h>
22+
23+
#include <deal.II/grid/grid_generator.h>
24+
25+
#include <deal.II/lac/la_parallel_vector.h>
26+
27+
#include <deal.II/multigrid/mg_base.h>
28+
#include <deal.II/multigrid/mg_base.templates.h>
29+
#include <deal.II/multigrid/mg_transfer.h>
30+
#include <deal.II/multigrid/mg_transfer_matrix_free.h>
31+
32+
#include "../tests.h"
33+
34+
using namespace dealii;
35+
36+
template <typename Number2>
37+
void
38+
copy_to_host(
39+
LinearAlgebra::distributed::Vector<Number2, dealii::MemorySpace::Host> &dst,
40+
const LinearAlgebra::distributed::Vector<Number2, MemorySpace::Default> &src)
41+
{
42+
LinearAlgebra::ReadWriteVector<Number2> rw_vector(
43+
src.get_partitioner()->locally_owned_range());
44+
rw_vector.import_elements(src, VectorOperation::insert);
45+
46+
dst.reinit(src.get_partitioner());
47+
dst.import_elements(rw_vector, VectorOperation::insert);
48+
}
49+
50+
template <typename Number2>
51+
void
52+
copy_from_host(
53+
LinearAlgebra::distributed::Vector<Number2, MemorySpace::Default> &dst,
54+
const LinearAlgebra::distributed::Vector<Number2, dealii::MemorySpace::Host>
55+
&src)
56+
{
57+
LinearAlgebra::ReadWriteVector<Number2> rw_vector(
58+
src.get_partitioner()->locally_owned_range());
59+
rw_vector.import_elements(src, VectorOperation::insert);
60+
61+
if (dst.size() == 0)
62+
dst.reinit(src.get_partitioner());
63+
dst.import_elements(rw_vector, VectorOperation::insert);
64+
}
65+
66+
template <int dim, typename Number>
67+
void
68+
check(const unsigned int fe_degree)
69+
{
70+
FE_Q<dim> fe(fe_degree);
71+
deallog << "FE: " << fe.get_name() << std::endl;
72+
73+
// run a few different sizes...
74+
unsigned int sizes[] = {1, 2, 3, 4, 5, 6, 8};
75+
for (unsigned int cycle = 0; cycle < sizeof(sizes) / sizeof(unsigned int);
76+
++cycle)
77+
{
78+
unsigned int n_refinements = 0;
79+
unsigned int n_subdiv = sizes[cycle];
80+
if (n_subdiv > 1)
81+
while (n_subdiv % 2 == 0)
82+
{
83+
n_refinements += 1;
84+
n_subdiv /= 2;
85+
}
86+
n_refinements += 3 - dim;
87+
if (fe_degree < 3)
88+
n_refinements += 1;
89+
parallel::distributed::Triangulation<dim> tr(
90+
MPI_COMM_WORLD,
91+
Triangulation<dim>::limit_level_difference_at_vertices,
92+
parallel::distributed::Triangulation<
93+
dim>::construct_multigrid_hierarchy);
94+
GridGenerator::subdivided_hyper_cube(tr, n_subdiv);
95+
tr.refine_global(n_refinements);
96+
deallog << "no. cells: " << tr.n_global_active_cells() << std::endl;
97+
98+
DoFHandler<dim> mgdof(tr);
99+
mgdof.distribute_dofs(fe);
100+
mgdof.distribute_mg_dofs();
101+
102+
MGConstrainedDoFs mg_constrained_dofs;
103+
mg_constrained_dofs.initialize(mgdof);
104+
mg_constrained_dofs.make_zero_boundary_constraints(mgdof, {0});
105+
106+
// build host reference
107+
MGTransferMatrixFree<dim, Number, dealii::MemorySpace::Host>
108+
transfer_host(mg_constrained_dofs);
109+
transfer_host.build(mgdof);
110+
111+
// build device transfer
112+
MGTransferMatrixFree<dim, Number, dealii::MemorySpace::Default>
113+
transfer_device(mg_constrained_dofs);
114+
transfer_device.build(mgdof);
115+
116+
// check prolongation for all levels using random vector
117+
for (unsigned int level = 1;
118+
level < mgdof.get_triangulation().n_global_levels();
119+
++level)
120+
{
121+
LinearAlgebra::distributed::Vector<Number, MemorySpace::Host> v1, v2;
122+
LinearAlgebra::distributed::Vector<Number, MemorySpace::Default>
123+
v1_cpy, v2_cpy, v3;
124+
v1.reinit(mgdof.locally_owned_mg_dofs(level - 1), MPI_COMM_WORLD);
125+
v2.reinit(mgdof.locally_owned_mg_dofs(level), MPI_COMM_WORLD);
126+
v3.reinit(mgdof.locally_owned_mg_dofs(level), MPI_COMM_WORLD);
127+
for (unsigned int i = 0; i < v1.locally_owned_size(); ++i)
128+
v1.local_element(i) = random_value<double>();
129+
130+
copy_from_host(v1_cpy, v1);
131+
132+
transfer_host.prolongate(level, v2, v1);
133+
transfer_device.prolongate(level, v3, v1_cpy);
134+
135+
copy_from_host(v2_cpy, v2);
136+
137+
v3 -= v2_cpy;
138+
deallog << "Diff prolongate l" << level << ": " << v3.l2_norm()
139+
<< std::endl;
140+
}
141+
142+
// check restriction for all levels using random vector
143+
for (unsigned int level = 1;
144+
level < mgdof.get_triangulation().n_global_levels();
145+
++level)
146+
{
147+
LinearAlgebra::distributed::Vector<Number, MemorySpace::Host> v1, v2;
148+
LinearAlgebra::distributed::Vector<Number, MemorySpace::Default>
149+
v1_cpy, v2_cpy, v3;
150+
v1.reinit(mgdof.locally_owned_mg_dofs(level), MPI_COMM_WORLD);
151+
v2.reinit(mgdof.locally_owned_mg_dofs(level - 1), MPI_COMM_WORLD);
152+
v3.reinit(mgdof.locally_owned_mg_dofs(level - 1), MPI_COMM_WORLD);
153+
for (unsigned int i = 0; i < v1.locally_owned_size(); ++i)
154+
v1.local_element(i) = random_value<double>();
155+
copy_from_host(v1_cpy, v1);
156+
transfer_host.restrict_and_add(level, v2, v1);
157+
transfer_device.restrict_and_add(level, v3, v1_cpy);
158+
copy_from_host(v2_cpy, v2);
159+
v3 -= v2_cpy;
160+
deallog << "Diff restrict l" << level << ": " << v3.l2_norm()
161+
<< std::endl;
162+
163+
v2 = 1.;
164+
v3 = 1.;
165+
transfer_host.restrict_and_add(level, v2, v1);
166+
transfer_device.restrict_and_add(level, v3, v1_cpy);
167+
copy_from_host(v2_cpy, v2);
168+
v3 -= v2_cpy;
169+
deallog << "Diff restrict add l" << level << ": " << v3.l2_norm()
170+
<< std::endl;
171+
}
172+
deallog << std::endl;
173+
}
174+
}
175+
176+
177+
int
178+
main(int argc, char **argv)
179+
{
180+
// no threading in this test...
181+
Utilities::MPI::MPI_InitFinalize mpi(argc, argv, 1);
182+
mpi_initlog();
183+
184+
check<2, double>(1);
185+
check<2, double>(3);
186+
check<3, double>(1);
187+
check<3, double>(3);
188+
check<2, float>(2);
189+
check<3, float>(2);
190+
}

0 commit comments

Comments
 (0)