TTK
Loading...
Searching...
No Matches
MPIUtils.h
Go to the documentation of this file.
1
7
8#pragma once
9
10#include <BaseClass.h>
11#include <Timer.h>
12#include <iostream>
13
14#include <algorithm>
15#include <array>
16#include <limits>
17#include <unordered_map>
18#include <unordered_set>
19#include <vector>
20
21#ifdef TTK_ENABLE_MPI
22
23// disable the MPI C++ API
24#define OMPI_SKIP_MPICXX 1
25
26#include <mpi.h>
27
28namespace ttk {
29
30 inline MPI_Datatype getMPIType(const float ttkNotUsed(val)) {
31 return MPI_FLOAT;
32 };
33 inline MPI_Datatype getMPIType(const int ttkNotUsed(val)) {
34 return MPI_INT;
35 };
36 inline MPI_Datatype getMPIType(const unsigned int ttkNotUsed(val)) {
37 return MPI_UNSIGNED;
38 };
39 inline MPI_Datatype getMPIType(const double ttkNotUsed(val)) {
40 return MPI_DOUBLE;
41 };
42 inline MPI_Datatype getMPIType(const long double ttkNotUsed(val)) {
43 return MPI_LONG_DOUBLE;
44 };
45 inline MPI_Datatype getMPIType(const long ttkNotUsed(val)) {
46 return MPI_LONG;
47 };
48 inline MPI_Datatype getMPIType(const unsigned long ttkNotUsed(val)) {
49 return MPI_UNSIGNED_LONG;
50 };
51 inline MPI_Datatype getMPIType(const long long ttkNotUsed(val)) {
52 return MPI_LONG_LONG;
53 };
54 inline MPI_Datatype getMPIType(const unsigned long long ttkNotUsed(val)) {
55 return MPI_UNSIGNED_LONG_LONG;
56 };
57 inline MPI_Datatype getMPIType(const unsigned char ttkNotUsed(val)) {
58 return MPI_UNSIGNED_CHAR;
59 };
60
61 template <typename DT, typename IT>
62 struct value {
63 DT scalar;
64 IT globalId;
65
66 value(DT _scalar, IT _globalId) : scalar(_scalar), globalId(_globalId) {
67 }
68 };
69
70 inline bool isRunningWithMPI() {
71 return ttk::MPIsize_ > 1;
72 };
73
74 inline bool hasInitializedMPI() {
75 return ttk::MPIsize_ > 0;
76 };
77
78 inline int startMPITimer(Timer &t, int rank, int size) {
79 if(size > 0) {
80 MPI_Barrier(ttk::MPIcomm_);
81 if(rank == 0) {
82 t.reStart();
83 }
84 }
85 return 0;
86 };
87
88 inline double endMPITimer(Timer &t, int rank, int size) {
89 double elapsedTime = 0;
90 if(size > 0) {
91 MPI_Barrier(ttk::MPIcomm_);
92 if(rank == 0) {
93 elapsedTime = t.getElapsedTime();
94 }
95 }
96
97 return elapsedTime;
98 };
99
112 template <typename DT, typename triangulationType>
113 int getGhostCellScalars(DT *scalarArray,
114 const triangulationType *triangulation,
115 const int rankToSend,
116 MPI_Comm communicator,
117 const int dimensionNumber) {
118 const std::vector<int> &neighbors = triangulation->getNeighborRanks();
119 if(!ttk::isRunningWithMPI()) {
120 return -1;
121 }
122 MPI_Datatype MPI_DT = getMPIType(static_cast<DT>(0));
123 // we need unique tags for each rankToSend, otherwise messages might become
124 // entangled
125 int tagMultiplier = rankToSend + 1;
126 int valuesTag = 103 * tagMultiplier;
127 if(rankToSend == ttk::MPIrank_) {
128 int neighborNumber = neighbors.size();
129 const auto &ghostCellsPerOwner = triangulation->getGhostCellsPerOwner();
130 // receive the scalar values
131 for(int r = 0; r < neighborNumber; r++) {
132 ttk::SimplexId nValues = ghostCellsPerOwner[neighbors[r]].size();
133 std::vector<DT> receivedValues(nValues * dimensionNumber);
134 if(nValues > 0) {
135 MPI_Recv(receivedValues.data(), nValues * dimensionNumber, MPI_DT,
136 neighbors.at(r), valuesTag, communicator, MPI_STATUS_IGNORE);
137
138 for(ttk::SimplexId i = 0; i < nValues; i++) {
139 for(int j = 0; j < dimensionNumber; j++) {
140 DT receivedVal = receivedValues[i * dimensionNumber + j];
141 ttk::SimplexId globalId = ghostCellsPerOwner[neighbors[r]][i];
142 ttk::SimplexId localId = triangulation->getCellLocalId(globalId);
143 scalarArray[localId * dimensionNumber + j] = receivedVal;
144 }
145 }
146 }
147 }
148 } else { // owner ranks
149 // if rankToSend is not the neighbor of the current rank, we do not need
150 // to do anything
151 if(std::find(neighbors.begin(), neighbors.end(), rankToSend)
152 != neighbors.end()) {
153 // get the needed globalids from the triangulation
154 const auto &ghostCellsForThisRank
155 = triangulation->getRemoteGhostCells()[rankToSend];
156 ttk::SimplexId nValues = ghostCellsForThisRank.size();
157 if(nValues > 0) {
158 // assemble the scalar values
159 std::vector<DT> valuesToSend(nValues * dimensionNumber);
160 for(ttk::SimplexId i = 0; i < nValues; i++) {
161 for(int j = 0; j < dimensionNumber; j++) {
162 ttk::SimplexId globalId = ghostCellsForThisRank[i];
163 ttk::SimplexId localId = triangulation->getCellLocalId(globalId);
164 valuesToSend[i * dimensionNumber + j]
165 = scalarArray[localId * dimensionNumber + j];
166 }
167 }
168
169 // send the scalar values
170 MPI_Send(valuesToSend.data(), nValues * dimensionNumber, MPI_DT,
171 rankToSend, valuesTag, communicator);
172 }
173 }
174 }
175
176 return 0;
177 }
178
179 template <typename DT, typename triangulationType>
180 int getGhostVertexScalars(DT *scalarArray,
181 const triangulationType *triangulation,
182 const int rankToSend,
183 MPI_Comm communicator,
184 const int dimensionNumber) {
185 const std::vector<int> &neighbors = triangulation->getNeighborRanks();
186 if(!ttk::isRunningWithMPI()) {
187 return -1;
188 }
189 MPI_Datatype MPI_DT = getMPIType(static_cast<DT>(0));
190 // we need unique tags for each rankToSend, otherwise messages might become
191 // entangled
192 int tagMultiplier = rankToSend + 1;
193 int valuesTag = 103 * tagMultiplier;
194 if(rankToSend == ttk::MPIrank_) {
195 int neighborNumber = neighbors.size();
196 const auto &ghostVerticesPerOwner
197 = triangulation->getGhostVerticesPerOwner();
198 // receive the scalar values
199 for(int r = 0; r < neighborNumber; r++) {
200 ttk::SimplexId nValues = ghostVerticesPerOwner[neighbors[r]].size();
201 std::vector<DT> receivedValues(nValues * dimensionNumber);
202 if(nValues > 0) {
203 MPI_Recv(receivedValues.data(), nValues * dimensionNumber, MPI_DT,
204 neighbors.at(r), valuesTag, communicator, MPI_STATUS_IGNORE);
205 for(ttk::SimplexId i = 0; i < nValues; i++) {
206 for(int j = 0; j < dimensionNumber; j++) {
207 DT receivedVal = receivedValues[i * dimensionNumber + j];
208 ttk::SimplexId globalId = ghostVerticesPerOwner[neighbors[r]][i];
209 ttk::SimplexId localId
210 = triangulation->getVertexLocalId(globalId);
211 scalarArray[localId * dimensionNumber + j] = receivedVal;
212 }
213 }
214 }
215 }
216 } else { // owner ranks
217 // if rankToSend is not the neighbor of the current rank, we do not need
218 // to do anything
219 if(std::find(neighbors.begin(), neighbors.end(), rankToSend)
220 != neighbors.end()) {
221 // get the needed globalids from the triangulation
222 const auto &ghostVerticesForThisRank
223 = triangulation->getRemoteGhostVertices()[rankToSend];
224 ttk::SimplexId nValues = ghostVerticesForThisRank.size();
225 if(nValues > 0) {
226 // assemble the scalar values
227 std::vector<DT> valuesToSend(nValues * dimensionNumber);
228 for(ttk::SimplexId i = 0; i < nValues; i++) {
229 for(int j = 0; j < dimensionNumber; j++) {
230 ttk::SimplexId globalId = ghostVerticesForThisRank[i];
231 ttk::SimplexId localId
232 = triangulation->getVertexLocalId(globalId);
233 valuesToSend[i * dimensionNumber + j]
234 = scalarArray[localId * dimensionNumber + j];
235 }
236 }
237
238 // send the scalar values
239 MPI_Send(valuesToSend.data(), nValues * dimensionNumber, MPI_DT,
240 rankToSend, valuesTag, communicator);
241 }
242 }
243 }
244
245 return 0;
246 }
247
269 template <typename DT,
270 typename IT,
271 typename GVGID,
272 typename GVR,
273 typename GVLID>
274 int getGhostDataScalarsWithoutTriangulation(DT *scalarArray,
275 const GVR &getVertexRank,
276 const GVGID &getVertexGlobalId,
277 const GVLID &getVertexLocalId,
278 const std::vector<int> &neighbors,
279 const int rankToSend,
280 const IT nVerts,
281 MPI_Comm communicator,
282 const int dimensionNumber) {
283 int neighborNumber = neighbors.size();
284 if(!ttk::isRunningWithMPI()) {
285 return -1;
286 }
287 MPI_Datatype MPI_DT = getMPIType(static_cast<DT>(0));
288 MPI_Datatype MPI_IT = getMPIType(static_cast<IT>(0));
289 using globalIdType = decltype(getVertexGlobalId(0));
290 MPI_Datatype MPI_GIT = getMPIType(static_cast<globalIdType>(0));
291 // we need unique tags for each rankToSend, otherwise messages might become
292 // entangled
293 int tagMultiplier = rankToSend + 1;
294 int amountTag = 101 * tagMultiplier;
295 int idsTag = 102 * tagMultiplier;
296 int valuesTag = 103 * tagMultiplier;
297 if(rankToSend == ttk::MPIrank_) {
298 // initialize the inner vectors with size 0
299 std::vector<std::vector<globalIdType>> rankVectors(
300 ttk::MPIsize_, std::vector<globalIdType>(0));
301 // aggregate the needed ids
302
303 for(IT i = 0; i < nVerts; i++) {
304 if(ttk::MPIrank_ != getVertexRank(i)) {
305 rankVectors[getVertexRank(i)].push_back(getVertexGlobalId(i));
306 }
307 }
308 // send the amount of ids and the needed ids themselves
309 for(int r = 0; r < neighborNumber; r++) {
310 IT nValues = rankVectors[neighbors[r]].size();
311 MPI_Send(&nValues, 1, MPI_IT, neighbors[r], amountTag, communicator);
312 if(nValues > 0) {
313 MPI_Send(rankVectors[neighbors[r]].data(), nValues, MPI_GIT,
314 neighbors[r], idsTag, communicator);
315 }
316 }
317 // receive the scalar values
318 for(int r = 0; r < neighborNumber; r++) {
319 IT nValues = rankVectors[neighbors[r]].size();
320 std::vector<DT> receivedValues(nValues * dimensionNumber);
321 if(nValues > 0) {
322 MPI_Recv(receivedValues.data(), nValues * dimensionNumber, MPI_DT,
323 neighbors[r], valuesTag, communicator, MPI_STATUS_IGNORE);
324 for(IT i = 0; i < nValues; i++) {
325 for(int j = 0; j < dimensionNumber; j++) {
326 DT receivedVal = receivedValues[i * dimensionNumber + j];
327 const auto globalId = rankVectors[neighbors[r]][i];
328 IT localId = getVertexLocalId(globalId);
329 scalarArray[localId * dimensionNumber + j] = receivedVal;
330 }
331 }
332 }
333 }
334 } else { // owner ranks
335 // if rankToSend is not the neighbor of the current rank, we do not need
336 // to do anything
337 if(std::find(neighbors.begin(), neighbors.end(), rankToSend)
338 != neighbors.end()) {
339 // receive the amount of ids and the needed ids themselves
340 IT nValues;
341
342 MPI_Recv(&nValues, 1, MPI_IT, rankToSend, amountTag, communicator,
343 MPI_STATUS_IGNORE);
344
345 if(nValues > 0) {
346 std::vector<globalIdType> receivedIds(nValues);
347 MPI_Recv(receivedIds.data(), nValues, MPI_GIT, rankToSend, idsTag,
348 communicator, MPI_STATUS_IGNORE);
349
350 // assemble the scalar values
351 std::vector<DT> valuesToSend(nValues * dimensionNumber);
352 for(IT i = 0; i < nValues; i++) {
353 for(int j = 0; j < dimensionNumber; j++) {
354 IT globalId = receivedIds[i];
355 IT localId = getVertexLocalId(globalId);
356 valuesToSend[i * dimensionNumber + j]
357 = scalarArray[localId * dimensionNumber + j];
358 }
359 }
360
361 // send the scalar values
362 MPI_Send(valuesToSend.data(), nValues * dimensionNumber, MPI_DT,
363 rankToSend, valuesTag, communicator);
364 }
365 }
366 }
367
368 return 0;
369 }
370
380 template <typename IT, typename GVR>
381 int preconditionNeighborsUsingRankArray(std::vector<int> &neighbors,
382 const GVR &getVertexRank,
383 const IT nVerts,
384 MPI_Comm communicator) {
385 std::unordered_set<int> neighborSet{};
386 for(IT i = 0; i < nVerts; i++) {
387 if(getVertexRank(i) != ttk::MPIrank_) {
388 neighborSet.emplace(getVertexRank(i));
389 }
390 }
391 std::vector<int> sendVector(neighborSet.begin(), neighborSet.end());
392 int localSize = neighborSet.size();
393 std::vector<int> sizes(ttk::MPIsize_);
394 std::vector<int> displacements(ttk::MPIsize_);
395 MPI_Gather(
396 &localSize, 1, MPI_INT, sizes.data(), 1, MPI_INT, 0, communicator);
397 int totalSize = 0;
398 if(ttk::MPIrank_ == 0) {
399 for(int i = 0; i < ttk::MPIsize_; i++) {
400 totalSize += sizes[i];
401 if(i == 0) {
402 displacements[i] = 0;
403 } else {
404 displacements[i] = displacements[i - 1] + sizes[i - 1];
405 }
406 }
407 }
408 std::vector<int> rootVector(totalSize);
409
410 MPI_Gatherv(sendVector.data(), sendVector.size(), MPI_INT,
411 rootVector.data(), sizes.data(), displacements.data(), MPI_INT,
412 0, communicator);
413 std::vector<int> scatterVector;
414
415 if(ttk::MPIrank_ == 0) {
416 // now we transform this 1d vector in a correct vector of sets which we
417 // will transform back to scatter it
418 std::vector<std::unordered_set<int>> setsFromRanks(ttk::MPIsize_);
419 auto begin = rootVector.begin();
420 auto end = rootVector.begin();
421 for(int i = 0; i < ttk::MPIsize_; i++) {
422 end = begin + sizes[i];
423 std::unordered_set<int> s(begin, end);
424 setsFromRanks[i] = s;
425 begin = end;
426 }
427 // now we need to check for each rank if they are a neighbor of any other
428 // rank. If so, we need to add those to the neighbors
429 for(int i = 0; i < ttk::MPIsize_; i++) {
430 for(int j = 0; j < ttk::MPIsize_; j++) {
431 if(setsFromRanks[j].find(i) != setsFromRanks[j].end()) {
432 setsFromRanks[i].emplace(j);
433 }
434 }
435 }
436 // now we transform this vector of sets back into a 1d vector to scatter
437 // it
438 for(int i = 0; i < ttk::MPIsize_; i++) {
439 sizes[i] = setsFromRanks[i].size();
440 if(i == 0) {
441 displacements[i] = 0;
442 } else {
443 displacements[i] = displacements[i - 1] + sizes[i - 1];
444 }
445 scatterVector.insert(scatterVector.end(), setsFromRanks[i].begin(),
446 setsFromRanks[i].end());
447 }
448 }
449
450 // scatter first the size and then the vector itself
451 int receivedSize;
452 MPI_Scatter(
453 sizes.data(), 1, MPI_INT, &receivedSize, 1, MPI_INT, 0, communicator);
454
455 // and then the actual neighbors
456 std::vector<int> receivedNeighbors(receivedSize);
457 MPI_Scatterv(scatterVector.data(), sizes.data(), displacements.data(),
458 MPI_INT, receivedNeighbors.data(), receivedSize, MPI_INT, 0,
459 communicator);
460 // then we turn the vector back into a set
461 std::unordered_set<int> finalSet(
462 receivedNeighbors.begin(), receivedNeighbors.end());
463 neighborSet = finalSet;
464
465 // We copy the set as a vector
466 neighbors.clear();
467 for(int neighbor : neighborSet) {
468 neighbors.push_back(neighbor);
469 }
470 std::sort(neighbors.begin(), neighbors.end());
471
472 return 0;
473 }
474
486 template <typename DT, typename triangulationType>
487 int exchangeGhostCells(DT *scalarArray,
488 const triangulationType *triangulation,
489 MPI_Comm communicator,
490 const int dimensionNumber = 1) {
491 if(!ttk::isRunningWithMPI()) {
492 return -1;
493 }
494 if(!triangulation->hasPreconditionedDistributedCells()) {
495 return -1;
496 }
497 for(int r = 0; r < ttk::MPIsize_; r++) {
498 getGhostCellScalars<DT, triangulationType>(
499 scalarArray, triangulation, r, communicator, dimensionNumber);
500 MPI_Barrier(communicator);
501 }
502 return 0;
503 }
504
505 template <typename DT, typename triangulationType>
506 int exchangeGhostVertices(DT *scalarArray,
507 const triangulationType *triangulation,
508 MPI_Comm communicator,
509 const int dimensionNumber = 1) {
510 if(!ttk::isRunningWithMPI()) {
511 return -1;
512 }
513 if(!triangulation->hasPreconditionedDistributedVertices()) {
514 return -1;
515 }
516 for(int r = 0; r < ttk::MPIsize_; r++) {
517 getGhostVertexScalars<DT, triangulationType>(
518 scalarArray, triangulation, r, communicator, dimensionNumber);
519 MPI_Barrier(communicator);
520 }
521 return 0;
522 }
523
545 template <typename DT,
546 typename IT,
547 typename GVGID,
548 typename GVR,
549 typename GVLID>
550 int exchangeGhostDataWithoutTriangulation(DT *scalarArray,
551 const GVR &getVertexRank,
552 const GVGID &getVertexGlobalId,
553 const GVLID &getVertexLocalId,
554 const IT nVerts,
555 MPI_Comm communicator,
556 const std::vector<int> &neighbors,
557 const int dimensionNumber = 1) {
558 if(!ttk::isRunningWithMPI()) {
559 return -1;
560 }
561 for(int r = 0; r < ttk::MPIsize_; r++) {
562 getGhostDataScalarsWithoutTriangulation(
563 scalarArray, getVertexRank, getVertexGlobalId, getVertexLocalId,
564 neighbors, r, nVerts, communicator, dimensionNumber);
565 MPI_Barrier(communicator);
566 }
567 return 0;
568 }
569
570 // returns true if bounding boxes intersect, false if not
571 bool inline checkForIntersection(double *myBB, double *theirBB) {
572 return !(
573 myBB[0] > theirBB[1] // my left side is right of their right side
574 || myBB[1] < theirBB[0] // my right side is left of their left side
575 || myBB[2] > theirBB[3] // my bottom side is above their top side
576 || myBB[3] < theirBB[2] // my top side is under their bottom side
577 || myBB[4] > theirBB[5] // my front side is behind their back side
578 || myBB[5] < theirBB[4] // my back side is in front of their front side
579 );
580 }
581
582 void inline preconditionNeighborsUsingBoundingBox(
583 double *boundingBox, std::vector<int> &neighbors) {
584
585 std::vector<std::array<double, 6>> rankBoundingBoxes(ttk::MPIsize_);
586 std::copy(
587 boundingBox, boundingBox + 6, rankBoundingBoxes[ttk::MPIrank_].begin());
588 for(int r = 0; r < ttk::MPIsize_; r++) {
589 MPI_Bcast(rankBoundingBoxes[r].data(), 6, MPI_DOUBLE, r, ttk::MPIcomm_);
590 }
591
592 double epsilon = 0.00001;
593 // inflate our own bounding box by epsilon
594 for(int i = 0; i < 6; i++) {
595 if(i % 2 == 0)
596 boundingBox[i] -= epsilon;
597 if(i % 2 == 1)
598 boundingBox[i] += epsilon;
599 }
600
601 for(int i = 0; i < ttk::MPIsize_; i++) {
602 if(i != ttk::MPIrank_) {
603 double *theirBoundingBox = rankBoundingBoxes[i].data();
604 if(checkForIntersection(boundingBox, theirBoundingBox)) {
605 neighbors.push_back(i);
606 }
607 }
608 }
609 }
610
620 void inline produceRankArray(std::vector<int> &rankArray,
621 const LongSimplexId *globalIds,
622 const unsigned char *ghostCells,
623 int nVertices,
624 double *boundingBox,
625 std::vector<int> &neighbors) {
626 if(neighbors.empty()) {
627 ttk::preconditionNeighborsUsingBoundingBox(boundingBox, neighbors);
628 }
629 MPI_Datatype MIT = ttk::getMPIType(ttk::SimplexId{});
630 std::vector<ttk::SimplexId> currentRankUnknownIds;
631 std::vector<std::vector<ttk::SimplexId>> allUnknownIds(ttk::MPIsize_);
632 std::unordered_set<ttk::SimplexId> gIdSet;
633 std::unordered_map<ttk::SimplexId, ttk::SimplexId> gIdToLocalMap;
634
635 for(int i = 0; i < nVertices; i++) {
636 int ghostCellVal = ghostCells[i];
637 ttk::SimplexId globalId = globalIds[i];
638 if(ghostCellVal == 0) {
639 // if the ghost cell value is 0, then this vertex mainly belongs to
640 // this rank
641 rankArray[i] = ttk::MPIrank_;
642 gIdSet.insert(globalId);
643 } else {
644 // otherwise the vertex belongs to another rank and we need to find
645 // out to which one this needs to be done by broadcasting the global
646 // id and hoping for some other rank to answer
647 currentRankUnknownIds.push_back(globalId);
648 gIdToLocalMap[globalId] = i;
649 }
650 }
651
652 ttk::SimplexId sizeOfCurrentRank = currentRankUnknownIds.size();
653 std::vector<ttk::SimplexId> gIdsToSend;
654 std::vector<ttk::SimplexId> receivedGlobals;
655 receivedGlobals.resize(sizeOfCurrentRank);
656 ttk::SimplexId sizeOfNeighbor;
657 std::vector<ttk::SimplexId> neighborUnknownIds;
658 for(int neighbor : neighbors) {
659 // we first send the size and then all needed ids to the neighbor
660 MPI_Sendrecv(&sizeOfCurrentRank, 1, MIT, neighbor, ttk::MPIrank_,
661 &sizeOfNeighbor, 1, MIT, neighbor, neighbor, ttk::MPIcomm_,
662 MPI_STATUS_IGNORE);
663 neighborUnknownIds.resize(sizeOfNeighbor);
664 gIdsToSend.reserve(sizeOfNeighbor);
665
666 MPI_Sendrecv(currentRankUnknownIds.data(), sizeOfCurrentRank, MIT,
667 neighbor, ttk::MPIrank_, neighborUnknownIds.data(),
668 sizeOfNeighbor, MIT, neighbor, neighbor, ttk::MPIcomm_,
669 MPI_STATUS_IGNORE);
670
671 // then we check if the needed globalid values are present in the local
672 // globalid set if so, we send the rank value to the requesting rank
673 for(ttk::SimplexId gId : neighborUnknownIds) {
674 if(gIdSet.count(gId)) {
675 // add the value to the vector which will be sent
676 gIdsToSend.push_back(gId);
677 }
678 }
679 MPI_Status status;
680 int amount;
681
682 MPI_Sendrecv(gIdsToSend.data(), gIdsToSend.size(), MIT, neighbor,
683 ttk::MPIrank_, receivedGlobals.data(),
684 currentRankUnknownIds.size(), MIT, neighbor, neighbor,
685 ttk::MPIcomm_, &status);
686
687 MPI_Get_count(&status, MIT, &amount);
688 receivedGlobals.resize(amount);
689
690 for(ttk::SimplexId receivedGlobal : receivedGlobals) {
691 ttk::SimplexId localVal = gIdToLocalMap[receivedGlobal];
692 rankArray[localVal] = neighbor;
693 }
694 // cleanup
695 gIdsToSend.clear();
696 receivedGlobals.resize(sizeOfCurrentRank);
697 }
698 }
699
708 template <typename DT, typename IT>
709 void returnVectorForBurstsize(std::vector<value<DT, IT>> &outVector,
710 std::vector<value<DT, IT>> &values,
711 size_t burstSize) {
712
713 if(burstSize > values.size()) {
714 outVector.resize(values.size(), {0, 0});
715 outVector.assign(values.begin(), values.end());
716 values.clear();
717 } else {
718 outVector.resize(burstSize, {0, 0});
719 outVector.assign(values.end() - burstSize, values.end());
720 values.erase(values.end() - burstSize, values.end());
721 }
722 }
723
733 template <typename DT, typename IT>
734 void ReceiveAndAddToVector(
735 int rankFrom,
736 int structTag,
737 std::vector<std::vector<value<DT, IT>>> &unsortedReceivedValues) {
738 std::vector<value<DT, IT>> &receivedValues
739 = unsortedReceivedValues[rankFrom];
740 // probe to get the correct size to receive
741 int amount;
742 MPI_Status status;
743 MPI_Probe(rankFrom, structTag * rankFrom, ttk::MPIcomm_, &status);
744 MPI_Get_count(&status, MPI_CHAR, &amount);
745 receivedValues.resize(amount / sizeof(value<DT, IT>), {0, 0});
746 MPI_Recv(receivedValues.data(), amount, MPI_CHAR, rankFrom,
747 structTag * rankFrom, ttk::MPIcomm_, MPI_STATUS_IGNORE);
748 }
749
774 template <typename DT, typename IT>
775 void getMax(int intTag,
776 int structTag,
777 IT &currentOrder,
778 int burstSize,
779 MPI_Datatype MPI_IT,
780 IT &processedValueCounter,
781 std::vector<std::vector<value<DT, IT>>> &unsortedReceivedValues,
782 std::vector<std::vector<IT>> &orderResendValues,
783 std::vector<IT> &orderedValuesForRank,
784 std::vector<value<DT, IT>> &sortingValues) {
785 // take the current maximum scalar over all ranks
786 int rankIdOfMaxScalar = -1;
787 DT maxScalar = std::numeric_limits<DT>::lowest();
788 IT maxGId = -1;
789 for(size_t i = 0; i < unsortedReceivedValues.size(); i++) {
790 if(unsortedReceivedValues[i].size() > 0) {
791 const auto &v = unsortedReceivedValues[i].back();
792 if(v.scalar == maxScalar ? v.globalId > maxGId : v.scalar > maxScalar) {
793 maxScalar = v.scalar;
794 maxGId = v.globalId;
795 rankIdOfMaxScalar = i;
796 }
797 }
798 }
799
800 value<DT, IT> currentValue
801 = unsortedReceivedValues[rankIdOfMaxScalar].back();
802 // we send the globalId and the the order via one send command,
803 // therefore we need to check two concurrent values at the same time
804 // later on
805 orderResendValues[rankIdOfMaxScalar].push_back(currentValue.globalId);
806 orderResendValues[rankIdOfMaxScalar].push_back(currentOrder);
807 currentOrder--;
808 processedValueCounter++;
809 unsortedReceivedValues[rankIdOfMaxScalar].pop_back();
810 if(unsortedReceivedValues[rankIdOfMaxScalar].size() == 0) {
811 if(rankIdOfMaxScalar == 0) {
812 // append the ordered values to the correct vector
813 orderedValuesForRank.insert(
814 orderedValuesForRank.end(),
815 orderResendValues[rankIdOfMaxScalar].begin(),
816 orderResendValues[rankIdOfMaxScalar].end());
817 orderResendValues[rankIdOfMaxScalar].clear();
818
819 if(!sortingValues.empty()) {
820 std::vector<value<DT, IT>> ownValues;
821 returnVectorForBurstsize<DT, IT>(ownValues, sortingValues, burstSize);
822
823 unsortedReceivedValues[rankIdOfMaxScalar] = ownValues;
824 }
825 } else {
826 // receive more values from rank, send ordering to the rank
827 // send to the finished rank that we want more
828 MPI_Send(orderResendValues[rankIdOfMaxScalar].data(),
829 orderResendValues[rankIdOfMaxScalar].size(), MPI_IT,
830 rankIdOfMaxScalar, intTag * rankIdOfMaxScalar, ttk::MPIcomm_);
831 orderResendValues[rankIdOfMaxScalar].clear();
832 // we receive more vals, if the size is still zero afterwards, we are
833 // finished with this rank
834 ReceiveAndAddToVector(
835 rankIdOfMaxScalar, structTag, unsortedReceivedValues);
836 }
837 }
838 }
839
851 template <typename IT, typename GVLID>
852 void buildArrayForReceivedData(const size_t nInts,
853 const IT *const orderedValuesForRank,
854 const GVLID &getVertexLocalId,
855 SimplexId *const order,
856 const int nThreads) {
857
858 TTK_FORCE_USE(nThreads);
859
860#ifdef TTK_ENABLE_OPENMP
861#pragma omp parallel for num_threads(nThreads)
862#endif // TTK_ENABLE_OPENMP
863 for(size_t i = 0; i < nInts; i += 2) {
864 order[getVertexLocalId(orderedValuesForRank[i])]
865 = orderedValuesForRank[i + 1];
866 }
867 }
868
882 template <typename DT, typename IT, typename GVGID, typename GVR>
883 void populateVector(std::vector<value<DT, IT>> &valuesToSortVector,
884 const size_t nVerts,
885 const DT *const scalars,
886 const GVGID &getVertexGlobalId,
887 const GVR &getVertexRank) {
888 for(size_t i = 0; i < nVerts; i++) {
889 IT globalId = getVertexGlobalId(i);
890 if(getVertexRank(i) == ttk::MPIrank_) {
891 valuesToSortVector.emplace_back(scalars[i], globalId);
892 }
893 }
894 }
895
903 template <typename DT, typename IT>
904 void sortVerticesDistributed(std::vector<value<DT, IT>> &values,
905 const int nThreads) {
906
907 TTK_FORCE_USE(nThreads);
908
909 TTK_PSORT(nThreads, values.begin(), values.end(),
910 [](value<DT, IT> v1, value<DT, IT> v2) {
911 return (v1.scalar < v2.scalar)
912 || (v1.scalar == v2.scalar && v1.globalId < v2.globalId);
913 });
914 }
915
927 template <typename DT, typename GVGID, typename GVR, typename GVLID>
928 void produceOrdering(SimplexId *orderArray,
929 const DT *scalarArray,
930 const GVGID &getVertexGlobalId,
931 const GVR &getVertexRank,
932 const GVLID &getVertexLocalId,
933 const size_t nVerts,
934 const int burstSize,
935 std::vector<int> &neighbors) {
936 int intTag = 101;
937 int structTag = 102;
938 if(neighbors.empty()) {
939 ttk::preconditionNeighborsUsingRankArray(
940 neighbors, getVertexRank, nVerts, ttk::MPIcomm_);
941 }
942 MPI_Barrier(ttk::MPIcomm_);
943
944 using IT = decltype(getVertexGlobalId(0));
945 MPI_Datatype MPI_IT = ttk::getMPIType(static_cast<IT>(0));
946
947 ttk::Timer fillAndSortTimer;
948 std::vector<value<DT, IT>> sortingValues;
949 populateVector(
950 sortingValues, nVerts, scalarArray, getVertexGlobalId, getVertexRank);
951
952 // sort the scalar array distributed first by the scalar value itself,
953 // then by the global id
954 sortVerticesDistributed<DT, IT>(sortingValues, ttk::globalThreadNumber_);
955
956 // when all are done sorting, rank 0 requests the highest values and
957 // merges them
958 MPI_Barrier(ttk::MPIcomm_);
959
960 std::vector<IT> orderedValuesForRank;
961 IT processedValueCounter = 0;
962 ttk::Timer mergeTimer;
963 IT localSize = sortingValues.size();
964 IT totalSize;
965 // get the complete size of the dataset by summing up the local sizes
966 MPI_Reduce(&localSize, &totalSize, 1, MPI_IT, MPI_SUM, 0, ttk::MPIcomm_);
967 if(ttk::MPIrank_ == 0) {
968 IT currentOrder = totalSize - 1;
969 std::vector<std::vector<value<DT, IT>>> unsortedReceivedValues;
970 unsortedReceivedValues.resize(ttk::MPIsize_);
971 std::vector<std::vector<IT>> orderResendValues;
972 orderResendValues.resize(ttk::MPIsize_);
973 for(int i = 0; i < ttk::MPIsize_; i++) {
974 orderResendValues[i].reserve(burstSize);
975 }
976
977 // receive the first batch of values
978 for(int i = 0; i < ttk::MPIsize_; i++) {
979 if(i == 0) {
980 std::vector<value<DT, IT>> ownValues;
981 returnVectorForBurstsize<DT, IT>(ownValues, sortingValues, burstSize);
982 unsortedReceivedValues[i] = ownValues;
983 } else {
984 ReceiveAndAddToVector<DT, IT>(i, structTag, unsortedReceivedValues);
985 }
986 }
987 while(processedValueCounter < totalSize) {
988 getMax<DT, IT>(intTag, structTag, currentOrder, burstSize, MPI_IT,
989 processedValueCounter, unsortedReceivedValues,
990 orderResendValues, orderedValuesForRank, sortingValues);
991 }
992
993 } else { // other Ranks
994 // send the next burstsize values and then wait for an answer from the
995 // root rank
996 while(!sortingValues.empty()) {
997 std::vector<value<DT, IT>> sendValues;
998 returnVectorForBurstsize<DT, IT>(sendValues, sortingValues, burstSize);
999 int size = sendValues.size();
1000 MPI_Send(sendValues.data(), size * sizeof(value<DT, IT>), MPI_CHAR, 0,
1001 structTag * ttk::MPIrank_, ttk::MPIcomm_);
1002 std::vector<IT> receivedValues;
1003
1004 // be prepared to receive burstsize of elements, resize after
1005 // receiving to the correct size
1006 receivedValues.resize(burstSize * 2);
1007 MPI_Status status;
1008 int amount;
1009 MPI_Recv(receivedValues.data(), burstSize * 2, MPI_IT, 0,
1010 intTag * ttk::MPIrank_, ttk::MPIcomm_, &status);
1011 MPI_Get_count(&status, MPI_IT, &amount);
1012
1013 receivedValues.resize(amount);
1014 orderedValuesForRank.insert(orderedValuesForRank.end(),
1015 receivedValues.begin(),
1016 receivedValues.end());
1017 }
1018 // afterwards send once a message of length 0 to root to show that we
1019 // are done
1020 MPI_Send(sortingValues.data(), 0, MPI_CHAR, 0, structTag * ttk::MPIrank_,
1021 ttk::MPIcomm_);
1022 }
1023
1024 // all ranks do the following
1025 MPI_Barrier(ttk::MPIcomm_);
1026
1027 ttk::Timer orderTimer;
1028 buildArrayForReceivedData<IT>(orderedValuesForRank.size(),
1029 orderedValuesForRank.data(), getVertexLocalId,
1030 orderArray, ttk::globalThreadNumber_);
1031
1032 // we receive the values at the ghostcells through the abstract
1033 // exchangeGhostCells method
1034 ttk::exchangeGhostDataWithoutTriangulation<ttk::SimplexId, IT>(
1035 orderArray, getVertexRank, getVertexGlobalId, getVertexLocalId, nVerts,
1036 ttk::MPIcomm_, neighbors);
1037 }
1038
1048 template <typename dataType>
1049 void sendVector(std::vector<dataType> &sendBuffer,
1050 MPI_Datatype messageType,
1051 int neighbor) {
1052 ttk::SimplexId dataSize = sendBuffer.size();
1053 MPI_Send(&dataSize, 1, getMPIType(dataSize), neighbor, ttk::MPIrank_,
1054 ttk::MPIcomm_);
1055 MPI_Send(sendBuffer.data(), dataSize, messageType, neighbor, ttk::MPIrank_,
1056 ttk::MPIcomm_);
1057 }
1058
1068 template <typename dataType>
1069 void recvVector(std::vector<dataType> &receiveBuffer,
1070 ttk::SimplexId &recvMessageSize,
1071 MPI_Datatype messageType,
1072 int neighbor) {
1073 MPI_Recv(&recvMessageSize, 1, getMPIType(recvMessageSize), neighbor,
1074 neighbor, ttk::MPIcomm_, MPI_STATUS_IGNORE);
1075 receiveBuffer.resize(recvMessageSize);
1076 MPI_Recv(receiveBuffer.data(), recvMessageSize, messageType, neighbor,
1077 neighbor, ttk::MPIcomm_, MPI_STATUS_IGNORE);
1078 }
1079
1080} // namespace ttk
1081
1082#endif // TTK_ENABLE_MPI
#define TTK_FORCE_USE(x)
Force the compiler to use the function/method parameter.
Definition: BaseClass.h:57
#define ttkNotUsed(x)
Mark function/method parameters that are not used in the function body at all.
Definition: BaseClass.h:47
#define TTK_PSORT(NTHREADS,...)
Parallel sort macro.
Definition: OpenMP.h:46
The Topology ToolKit.
COMMON_EXPORTS int MPIsize_
Definition: BaseClass.cpp:10
COMMON_EXPORTS int globalThreadNumber_
Definition: BaseClass.cpp:6
COMMON_EXPORTS int MPIrank_
Definition: BaseClass.cpp:9
long long int LongSimplexId
Identifier type for simplices of any dimension.
Definition: DataTypes.h:15
int SimplexId
Identifier type for simplices of any dimension.
Definition: DataTypes.h:22