46 #ifndef MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP 47 #define MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP 49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR 54 #include <Teuchos_Comm.hpp> 55 #include <Teuchos_CommHelpers.hpp> 57 #include <Xpetra_Vector.hpp> 61 #include "MueLu_Aggregates_kokkos.hpp" 63 #include "MueLu_LWGraph_kokkos.hpp" 66 #include "KokkosGraph_graph_color.hpp" 70 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
71 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
72 BuildAggregates(
const ParameterList& params,
const LWGraph_kokkos& graph, Aggregates_kokkos& aggregates, std::vector<unsigned>& aggStat,
73 LO& numNonAggregatedNodes)
const {
74 Monitor m(*
this,
"BuildAggregates");
76 std::string orderingStr = params.get<std::string>(
"aggregation: ordering");
77 int maxNeighAlreadySelected = params.get<
int> (
"aggregation: max selected neighbors");
78 int minNodesPerAggregate = params.get<
int> (
"aggregation: min agg size");
79 int maxNodesPerAggregate = params.get<
int> (
"aggregation: max agg size");
81 Algorithm algorithm = Algorithm::Serial;
82 std::string algoParamName =
"aggregation: phase 1 algorithm";
83 if(params.isParameter(algoParamName))
85 algorithm = algorithmFromName(params.get<std::string>(
"aggregation: phase 1 algorithm"));
89 "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
94 if(algorithm == Algorithm::Distance2)
96 BuildAggregatesDistance2(graph, aggregates, aggStat, numNonAggregatedNodes, maxNodesPerAggregate);
100 BuildAggregatesSerial(graph, aggregates, aggStat, numNonAggregatedNodes,
101 minNodesPerAggregate, maxNodesPerAggregate, maxNeighAlreadySelected, orderingStr);
106 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
107 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
108 BuildAggregatesSerial(
const LWGraph_kokkos& graph, Aggregates_kokkos& aggregates,
109 std::vector<unsigned>& aggStat, LO& numNonAggregatedNodes,
110 LO minNodesPerAggregate, LO maxNodesPerAggregate,
111 LO maxNeighAlreadySelected, std::string& orderingStr)
const 119 ordering = O_NATURAL;
120 if (orderingStr ==
"natural") ordering = O_NATURAL;
121 if (orderingStr ==
"random" ) ordering = O_RANDOM;
122 if (orderingStr ==
"graph" ) ordering = O_GRAPH;
124 const LO numRows = graph.GetNodeNumVertices();
125 const int myRank = graph.GetComm()->getRank();
127 ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
128 ArrayRCP<LO> procWinner = aggregates.GetProcWinner() ->getDataNonConst(0);
130 LO numLocalAggregates = aggregates.GetNumAggregates();
132 ArrayRCP<LO> randomVector;
133 if (ordering == O_RANDOM) {
134 randomVector = arcp<LO>(numRows);
135 for (LO i = 0; i < numRows; i++)
137 RandomReorder(randomVector);
142 std::vector<int> aggList(graph.getNodeMaxNumRowEntries());
144 std::queue<LO> graphOrderQueue;
147 for (LO i = 0; i < numRows; i++) {
149 LO rootCandidate = 0;
150 if (ordering == O_NATURAL) rootCandidate = i;
151 else if (ordering == O_RANDOM) rootCandidate = randomVector[i];
152 else if (ordering == O_GRAPH) {
154 if (graphOrderQueue.size() == 0) {
156 for (LO jnode = 0; jnode < numRows; jnode++)
157 if (aggStat[jnode] ==
READY) {
158 graphOrderQueue.push(jnode);
162 if (graphOrderQueue.size() == 0) {
166 rootCandidate = graphOrderQueue.front();
167 graphOrderQueue.pop();
170 if (aggStat[rootCandidate] !=
READY)
175 aggList[aggSize++] = rootCandidate;
177 auto neighOfINode = graph.getNeighborVertices(rootCandidate);
183 if ((ordering == O_NATURAL || ordering == O_RANDOM) &&
184 as<int>(neighOfINode.length) < minNodesPerAggregate) {
188 LO numAggregatedNeighbours = 0;
190 for (
int j = 0; j < as<int>(neighOfINode.length); j++) {
191 LO neigh = neighOfINode(j);
193 if (neigh != rootCandidate && graph.isLocalNeighborVertex(neigh)) {
195 if (aggStat[neigh] ==
READY || aggStat[neigh] ==
NOTSEL) {
204 if (aggSize < as<size_t>(maxNodesPerAggregate))
205 aggList[aggSize++] = neigh;
208 numAggregatedNeighbours++;
214 if ((numAggregatedNeighbours <= maxNeighAlreadySelected) &&
215 (aggSize >= as<size_t>(minNodesPerAggregate))) {
218 aggregates.SetIsRoot(rootCandidate);
219 aggIndex = numLocalAggregates++;
221 for (
size_t k = 0; k < aggSize; k++) {
223 vertex2AggId[aggList[k]] = aggIndex;
224 procWinner [aggList[k]] = myRank;
227 numNonAggregatedNodes -= aggSize;
231 aggStat[rootCandidate] =
NOTSEL;
238 if (ordering == O_GRAPH) {
243 for (
size_t k = 0; k < aggSize; k++) {
244 auto neighOfJNode = graph.getNeighborVertices(aggList[k]);
246 for (
int j = 0; j < as<int>(neighOfJNode.length); j++) {
247 LO neigh = neighOfJNode(j);
249 if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] ==
READY)
250 graphOrderQueue.push(neigh);
258 for (LO i = 0; i < numRows; i++)
263 aggregates.SetNumAggregates(numLocalAggregates);
266 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
267 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
268 BuildAggregatesDistance2(
const LWGraph_kokkos& graph, Aggregates_kokkos& aggregates,
269 std::vector<unsigned>& aggStat, LO& numNonAggregatedNodes, LO maxAggSize)
const 271 const LO numRows = graph.GetNodeNumVertices();
272 const int myRank = graph.GetComm()->getRank();
274 ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
275 ArrayRCP<LO> procWinner = aggregates.GetProcWinner() ->getDataNonConst(0);
277 LO numLocalAggregates = aggregates.GetNumAggregates();
280 std::vector<LocalOrdinal> rowptrs;
281 rowptrs.reserve(numRows + 1);
282 std::vector<LocalOrdinal> colinds;
283 colinds.reserve(graph.GetNodeNumEdges());
285 rowptrs.push_back(0);
286 for(LocalOrdinal row = 0; row < numRows; row++)
288 auto entries = graph.getNeighborVertices(row);
289 for(LocalOrdinal i = 0; i < entries.length; i++)
291 colinds.push_back(entries.colidx(i));
293 rowptrs.push_back(colinds.size());
298 typedef typename graph_t::device_type device_t;
299 typedef typename device_t::memory_space memory_space;
300 typedef typename device_t::execution_space execution_space;
301 typedef typename graph_t::row_map_type::non_const_type rowptrs_view;
302 typedef typename rowptrs_view::HostMirror host_rowptrs_view;
303 typedef typename graph_t::entries_type::non_const_type colinds_view;
304 typedef typename colinds_view::HostMirror host_colinds_view;
306 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
307 typename rowptrs_view::const_value_type,
typename colinds_view::const_value_type,
typename colinds_view::const_value_type,
308 execution_space, memory_space, memory_space> KernelHandle;
312 kh.create_graph_coloring_handle();
315 rowptrs_view aRowptrs(
"A device rowptrs", rowptrs.size());
316 colinds_view aColinds(
"A device colinds", colinds.size());
319 host_rowptrs_view aHostRowptrs = Kokkos::create_mirror_view(aRowptrs);
320 for(
size_t i = 0; i < rowptrs.size(); i++)
322 aHostRowptrs(i) = rowptrs[i];
324 Kokkos::deep_copy(aRowptrs, aHostRowptrs);
325 host_colinds_view aHostColinds = Kokkos::create_mirror_view(aColinds);
326 for(
size_t i = 0; i < colinds.size(); i++)
328 aHostColinds(i) = colinds[i];
330 Kokkos::deep_copy(aColinds, aHostColinds);
334 KokkosGraph::Experimental::d2_graph_color(&kh, numRows, numRows, aRowptrs, aColinds, aRowptrs, aColinds);
337 auto coloringHandle = kh.get_graph_coloring_handle();
338 auto colorsDevice = coloringHandle->get_vertex_colors();
340 auto colors = Kokkos::create_mirror_view(colorsDevice);
341 Kokkos::deep_copy(colors, colorsDevice);
344 kh.destroy_graph_coloring_handle();
347 LocalOrdinal aggCount = 0;
348 for(LocalOrdinal i = 0; i < numRows; i++)
350 if(colors(i) == 1 && aggStat[i] ==
READY)
352 vertex2AggId[i] = aggCount++;
354 numLocalAggregates++;
355 procWinner[i] = myRank;
358 numNonAggregatedNodes = 0;
359 std::vector<LocalOrdinal> aggSizes(numLocalAggregates, 0);
360 for(
int i = 0; i < numRows; i++)
362 if(vertex2AggId[i] >= 0)
363 aggSizes[vertex2AggId[i]]++;
366 for(LocalOrdinal i = 0; i < numRows; i++)
368 if(colors(i) != 1 && (aggStat[i] ==
READY || aggStat[i] ==
NOTSEL))
372 auto neighbors = graph.getNeighborVertices(i);
373 for(LocalOrdinal j = 0; j < neighbors.length; j++)
375 auto nei = neighbors.colidx(j);
376 LocalOrdinal agg = vertex2AggId[nei];
377 if(graph.isLocalNeighborVertex(nei) && colors(nei) == 1 && aggStat[nei] ==
AGGREGATED && aggSizes[agg] < maxAggSize)
380 vertex2AggId[i] = agg;
383 procWinner[i] = myRank;
390 numNonAggregatedNodes++;
396 aggregates.SetNumAggregates(numLocalAggregates);
399 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
400 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::RandomReorder(ArrayRCP<LO> list)
const {
403 for(
int i = 0; i < n-1; i++)
404 std::swap(list[i], list[RandomOrdinal(i,n-1)]);
407 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
408 int AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::RandomOrdinal(
int min,
int max)
const {
409 return min + as<int>((max-min+1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
414 #endif // HAVE_MUELU_KOKKOS_REFACTOR 415 #endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Namespace for MueLu classes and methods.
Kokkos::StaticCrsGraph< LocalOrdinal, Kokkos::LayoutLeft, execution_space > local_graph_type