TTK
Loading...
Searching...
No Matches
MPIUtils.h
Go to the documentation of this file.
1
7
8#pragma once
9
10#include <BaseClass.h>
11#include <Timer.h>
12#include <iostream>
13
14#include <algorithm>
15#include <array>
16#include <limits>
17#include <map>
18#include <unordered_map>
19#include <unordered_set>
20#include <vector>
21
22#ifdef TTK_ENABLE_MPI
23
24// disable the MPI C++ API
25#define OMPI_SKIP_MPICXX 1
26
27#include <mpi.h>
28
29namespace ttk {
30
31 inline MPI_Datatype getMPIType(const float ttkNotUsed(val)) {
32 return MPI_FLOAT;
33 };
34 inline MPI_Datatype getMPIType(const int ttkNotUsed(val)) {
35 return MPI_INT;
36 };
37 inline MPI_Datatype getMPIType(const unsigned int ttkNotUsed(val)) {
38 return MPI_UNSIGNED;
39 };
40 inline MPI_Datatype getMPIType(const double ttkNotUsed(val)) {
41 return MPI_DOUBLE;
42 };
43 inline MPI_Datatype getMPIType(const long double ttkNotUsed(val)) {
44 return MPI_LONG_DOUBLE;
45 };
46 inline MPI_Datatype getMPIType(const long ttkNotUsed(val)) {
47 return MPI_LONG;
48 };
49 inline MPI_Datatype getMPIType(const unsigned long ttkNotUsed(val)) {
50 return MPI_UNSIGNED_LONG;
51 };
52 inline MPI_Datatype getMPIType(const long long ttkNotUsed(val)) {
53 return MPI_LONG_LONG;
54 };
55 inline MPI_Datatype getMPIType(const unsigned long long ttkNotUsed(val)) {
56 return MPI_UNSIGNED_LONG_LONG;
57 };
58 inline MPI_Datatype getMPIType(const unsigned char ttkNotUsed(val)) {
59 return MPI_UNSIGNED_CHAR;
60 };
61
62 template <typename DT, typename IT>
63 struct value {
64 DT scalar;
65 IT globalId;
66
67 value(DT _scalar, IT _globalId) : scalar(_scalar), globalId(_globalId) {
68 }
69 };
70
71 inline bool isRunningWithMPI() {
72 return ttk::MPIsize_ > 1;
73 };
74
75 inline bool hasInitializedMPI() {
76 return ttk::MPIsize_ > 0;
77 };
78
79 inline int startMPITimer(Timer &t, int rank, int size) {
80 if(size > 0) {
81 MPI_Barrier(ttk::MPIcomm_);
82 if(rank == 0) {
83 t.reStart();
84 }
85 }
86 return 0;
87 };
88
89 inline double endMPITimer(Timer &t, int rank, int size) {
90 double elapsedTime = 0;
91 if(size > 0) {
92 MPI_Barrier(ttk::MPIcomm_);
93 if(rank == 0) {
94 elapsedTime = t.getElapsedTime();
95 }
96 }
97
98 return elapsedTime;
99 };
100
113 template <typename DT, typename triangulationType>
114 int getGhostCellScalars(DT *scalarArray,
115 const triangulationType *triangulation,
116 const int rankToSend,
117 MPI_Comm communicator,
118 const int dimensionNumber) {
119 const std::vector<int> &neighbors = triangulation->getNeighborRanks();
120 if(!ttk::isRunningWithMPI()) {
121 return -1;
122 }
123 MPI_Datatype MPI_DT = getMPIType(static_cast<DT>(0));
124 // we need unique tags for each rankToSend, otherwise messages might become
125 // entangled
126 const int tagMultiplier = rankToSend + 1;
127 int valuesTag = 103 * tagMultiplier;
128 if(rankToSend == ttk::MPIrank_) {
129 int neighborNumber = neighbors.size();
130 const auto &ghostCellsPerOwner = triangulation->getGhostCellsPerOwner();
131 // receive the scalar values
132 for(int r = 0; r < neighborNumber; r++) {
133 ttk::SimplexId nValues = ghostCellsPerOwner[neighbors[r]].size();
134 std::vector<DT> receivedValues(nValues * dimensionNumber);
135 if(nValues > 0) {
136 MPI_Recv(receivedValues.data(), nValues * dimensionNumber, MPI_DT,
137 neighbors.at(r), valuesTag, communicator, MPI_STATUS_IGNORE);
138
139 for(ttk::SimplexId i = 0; i < nValues; i++) {
140 for(int j = 0; j < dimensionNumber; j++) {
141 DT receivedVal = receivedValues[i * dimensionNumber + j];
142 ttk::SimplexId globalId = ghostCellsPerOwner[neighbors[r]][i];
143 ttk::SimplexId localId = triangulation->getCellLocalId(globalId);
144 scalarArray[localId * dimensionNumber + j] = receivedVal;
145 }
146 }
147 }
148 }
149 } else { // owner ranks
150 // if rankToSend is not the neighbor of the current rank, we do not need
151 // to do anything
152 if(std::find(neighbors.begin(), neighbors.end(), rankToSend)
153 != neighbors.end()) {
154 // get the needed globalids from the triangulation
155 const auto &ghostCellsForThisRank
156 = triangulation->getRemoteGhostCells()[rankToSend];
157 ttk::SimplexId nValues = ghostCellsForThisRank.size();
158 if(nValues > 0) {
159 // assemble the scalar values
160 std::vector<DT> valuesToSend(nValues * dimensionNumber);
161 for(ttk::SimplexId i = 0; i < nValues; i++) {
162 for(int j = 0; j < dimensionNumber; j++) {
163 ttk::SimplexId globalId = ghostCellsForThisRank[i];
164 ttk::SimplexId localId = triangulation->getCellLocalId(globalId);
165 valuesToSend[i * dimensionNumber + j]
166 = scalarArray[localId * dimensionNumber + j];
167 }
168 }
169
170 // send the scalar values
171 MPI_Send(valuesToSend.data(), nValues * dimensionNumber, MPI_DT,
172 rankToSend, valuesTag, communicator);
173 }
174 }
175 }
176
177 return 0;
178 }
179
180 template <typename DT, typename triangulationType>
181 int getGhostVertexScalars(DT *scalarArray,
182 const triangulationType *triangulation,
183 const int rankToSend,
184 MPI_Comm communicator,
185 const int dimensionNumber) {
186 const std::vector<int> &neighbors = triangulation->getNeighborRanks();
187 if(!ttk::isRunningWithMPI()) {
188 return -1;
189 }
190 MPI_Datatype MPI_DT = getMPIType(static_cast<DT>(0));
191 // we need unique tags for each rankToSend, otherwise messages might become
192 // entangled
193 const int tagMultiplier = rankToSend + 1;
194 int valuesTag = 103 * tagMultiplier;
195 if(rankToSend == ttk::MPIrank_) {
196 int neighborNumber = neighbors.size();
197 const auto &ghostVerticesPerOwner
198 = triangulation->getGhostVerticesPerOwner();
199 // receive the scalar values
200 for(int r = 0; r < neighborNumber; r++) {
201 ttk::SimplexId nValues = ghostVerticesPerOwner[neighbors[r]].size();
202 std::vector<DT> receivedValues(nValues * dimensionNumber);
203 if(nValues > 0) {
204 MPI_Recv(receivedValues.data(), nValues * dimensionNumber, MPI_DT,
205 neighbors.at(r), valuesTag, communicator, MPI_STATUS_IGNORE);
206#pragma omp parallel for
207 for(ttk::SimplexId i = 0; i < nValues; i++) {
208 for(int j = 0; j < dimensionNumber; j++) {
209 DT receivedVal = receivedValues[i * dimensionNumber + j];
210 ttk::SimplexId globalId = ghostVerticesPerOwner[neighbors[r]][i];
211 ttk::SimplexId localId
212 = triangulation->getVertexLocalId(globalId);
213 scalarArray[localId * dimensionNumber + j] = receivedVal;
214 }
215 }
216 }
217 }
218 } else { // owner ranks
219 // if rankToSend is not the neighbor of the current rank, we do not need
220 // to do anything
221 if(std::find(neighbors.begin(), neighbors.end(), rankToSend)
222 != neighbors.end()) {
223 // get the needed globalids from the triangulation
224 const auto &ghostVerticesForThisRank
225 = triangulation->getRemoteGhostVertices()[rankToSend];
226 ttk::SimplexId nValues = ghostVerticesForThisRank.size();
227 if(nValues > 0) {
228 // assemble the scalar values
229 std::vector<DT> valuesToSend(nValues * dimensionNumber);
230#pragma omp parallel for
231 for(ttk::SimplexId i = 0; i < nValues; i++) {
232 for(int j = 0; j < dimensionNumber; j++) {
233 ttk::SimplexId globalId = ghostVerticesForThisRank[i];
234 ttk::SimplexId localId
235 = triangulation->getVertexLocalId(globalId);
236 valuesToSend[i * dimensionNumber + j]
237 = scalarArray[localId * dimensionNumber + j];
238 }
239 }
240
241 // send the scalar values
242 MPI_Send(valuesToSend.data(), nValues * dimensionNumber, MPI_DT,
243 rankToSend, valuesTag, communicator);
244 }
245 }
246 }
247
248 return 0;
249 }
250
272 template <typename DT,
273 typename IT,
274 typename GVGID,
275 typename GVR,
276 typename GVLID>
277 int getGhostDataScalarsWithoutTriangulation(DT *scalarArray,
278 const GVR &getVertexRank,
279 const GVGID &getVertexGlobalId,
280 const GVLID &getVertexLocalId,
281 const std::vector<int> &neighbors,
282 const int rankToSend,
283 const IT nVerts,
284 MPI_Comm communicator,
285 const int dimensionNumber) {
286 const int neighborNumber = neighbors.size();
287 if(!ttk::isRunningWithMPI()) {
288 return -1;
289 }
290 MPI_Datatype MPI_DT = getMPIType(static_cast<DT>(0));
291 MPI_Datatype MPI_IT = getMPIType(static_cast<IT>(0));
292 using globalIdType = decltype(getVertexGlobalId(0));
293 MPI_Datatype MPI_GIT = getMPIType(static_cast<globalIdType>(0));
294 // we need unique tags for each rankToSend, otherwise messages might become
295 // entangled
296 const int tagMultiplier = rankToSend + 1;
297 int amountTag = 101 * tagMultiplier;
298 int idsTag = 102 * tagMultiplier;
299 int valuesTag = 103 * tagMultiplier;
300 if(rankToSend == ttk::MPIrank_) {
301 // initialize the inner vectors with size 0
302 std::vector<std::vector<globalIdType>> rankVectors(
303 ttk::MPIsize_, std::vector<globalIdType>(0));
304 // aggregate the needed ids
305
306 for(IT i = 0; i < nVerts; i++) {
307 if(ttk::MPIrank_ != getVertexRank(i)) {
308 rankVectors[getVertexRank(i)].push_back(getVertexGlobalId(i));
309 }
310 }
311 // send the amount of ids and the needed ids themselves
312 for(int r = 0; r < neighborNumber; r++) {
313 IT nValues = rankVectors[neighbors[r]].size();
314 MPI_Send(&nValues, 1, MPI_IT, neighbors[r], amountTag, communicator);
315 if(nValues > 0) {
316 MPI_Send(rankVectors[neighbors[r]].data(), nValues, MPI_GIT,
317 neighbors[r], idsTag, communicator);
318 }
319 }
320 // receive the scalar values
321 for(int r = 0; r < neighborNumber; r++) {
322 IT nValues = rankVectors[neighbors[r]].size();
323 std::vector<DT> receivedValues(nValues * dimensionNumber);
324 if(nValues > 0) {
325 MPI_Recv(receivedValues.data(), nValues * dimensionNumber, MPI_DT,
326 neighbors[r], valuesTag, communicator, MPI_STATUS_IGNORE);
327 for(IT i = 0; i < nValues; i++) {
328 for(int j = 0; j < dimensionNumber; j++) {
329 DT receivedVal = receivedValues[i * dimensionNumber + j];
330 const auto globalId = rankVectors[neighbors[r]][i];
331 IT localId = getVertexLocalId(globalId);
332 scalarArray[localId * dimensionNumber + j] = receivedVal;
333 }
334 }
335 }
336 }
337 } else { // owner ranks
338 // if rankToSend is not the neighbor of the current rank, we do not need
339 // to do anything
340 if(std::find(neighbors.begin(), neighbors.end(), rankToSend)
341 != neighbors.end()) {
342 // receive the amount of ids and the needed ids themselves
343 IT nValues;
344
345 MPI_Recv(&nValues, 1, MPI_IT, rankToSend, amountTag, communicator,
346 MPI_STATUS_IGNORE);
347
348 if(nValues > 0) {
349 std::vector<globalIdType> receivedIds(nValues);
350 MPI_Recv(receivedIds.data(), nValues, MPI_GIT, rankToSend, idsTag,
351 communicator, MPI_STATUS_IGNORE);
352
353 // assemble the scalar values
354 std::vector<DT> valuesToSend(nValues * dimensionNumber);
355 for(IT i = 0; i < nValues; i++) {
356 for(int j = 0; j < dimensionNumber; j++) {
357 IT globalId = receivedIds[i];
358 IT localId = getVertexLocalId(globalId);
359 valuesToSend[i * dimensionNumber + j]
360 = scalarArray[localId * dimensionNumber + j];
361 }
362 }
363
364 // send the scalar values
365 MPI_Send(valuesToSend.data(), nValues * dimensionNumber, MPI_DT,
366 rankToSend, valuesTag, communicator);
367 }
368 }
369 }
370
371 return 0;
372 }
373
383 template <typename IT, typename GVR>
384 int preconditionNeighborsUsingRankArray(std::vector<int> &neighbors,
385 std::map<int, int> &neighborsToId,
386 const GVR &getVertexRank,
387 const IT nVerts,
388 MPI_Comm communicator) {
389 std::unordered_set<int> neighborSet{};
390 for(IT i = 0; i < nVerts; i++) {
391 if(getVertexRank(i) != ttk::MPIrank_) {
392 neighborSet.emplace(getVertexRank(i));
393 }
394 }
395 std::vector<int> sendVector(neighborSet.begin(), neighborSet.end());
396 int localSize = neighborSet.size();
397 std::vector<int> sizes(ttk::MPIsize_);
398 std::vector<int> displacements(ttk::MPIsize_);
399 MPI_Gather(
400 &localSize, 1, MPI_INT, sizes.data(), 1, MPI_INT, 0, communicator);
401 int totalSize = 0;
402 if(ttk::MPIrank_ == 0) {
403 for(int i = 0; i < ttk::MPIsize_; i++) {
404 totalSize += sizes[i];
405 if(i == 0) {
406 displacements[i] = 0;
407 } else {
408 displacements[i] = displacements[i - 1] + sizes[i - 1];
409 }
410 }
411 }
412 std::vector<int> rootVector(totalSize);
413
414 MPI_Gatherv(sendVector.data(), sendVector.size(), MPI_INT,
415 rootVector.data(), sizes.data(), displacements.data(), MPI_INT,
416 0, communicator);
417 std::vector<int> scatterVector;
418
419 if(ttk::MPIrank_ == 0) {
420 // now we transform this 1d vector in a correct vector of sets which we
421 // will transform back to scatter it
422 std::vector<std::unordered_set<int>> setsFromRanks(ttk::MPIsize_);
423 auto begin = rootVector.begin();
424 auto end = rootVector.begin();
425 for(int i = 0; i < ttk::MPIsize_; i++) {
426 end = begin + sizes[i];
427 const std::unordered_set<int> s(begin, end);
428 setsFromRanks[i] = s;
429 begin = end;
430 }
431 // now we need to check for each rank if they are a neighbor of any other
432 // rank. If so, we need to add those to the neighbors
433 for(int i = 0; i < ttk::MPIsize_; i++) {
434 for(int j = 0; j < ttk::MPIsize_; j++) {
435 if(setsFromRanks[j].find(i) != setsFromRanks[j].end()) {
436 setsFromRanks[i].emplace(j);
437 }
438 }
439 }
440 // now we transform this vector of sets back into a 1d vector to scatter
441 // it
442 for(int i = 0; i < ttk::MPIsize_; i++) {
443 sizes[i] = setsFromRanks[i].size();
444 if(i == 0) {
445 displacements[i] = 0;
446 } else {
447 displacements[i] = displacements[i - 1] + sizes[i - 1];
448 }
449 scatterVector.insert(scatterVector.end(), setsFromRanks[i].begin(),
450 setsFromRanks[i].end());
451 }
452 }
453
454 // scatter first the size and then the vector itself
455 int receivedSize;
456 MPI_Scatter(
457 sizes.data(), 1, MPI_INT, &receivedSize, 1, MPI_INT, 0, communicator);
458
459 // and then the actual neighbors
460 std::vector<int> receivedNeighbors(receivedSize);
461 MPI_Scatterv(scatterVector.data(), sizes.data(), displacements.data(),
462 MPI_INT, receivedNeighbors.data(), receivedSize, MPI_INT, 0,
463 communicator);
464 // then we turn the vector back into a set
465 const std::unordered_set<int> finalSet(
466 receivedNeighbors.begin(), receivedNeighbors.end());
467 neighborSet = finalSet;
468
469 // We copy the set as a vector
470 neighbors.clear();
471 for(const int neighbor : neighborSet) {
472 neighbors.push_back(neighbor);
473 }
474 std::sort(neighbors.begin(), neighbors.end());
475 int neighborNumber = neighbors.size();
476 neighborsToId.clear();
477 for(int i = 0; i < neighborNumber; i++) {
478 neighborsToId[neighbors[i]] = i;
479 }
480
481 return 0;
482 }
483
495 template <typename DT, typename triangulationType>
496 int exchangeGhostCells(DT *scalarArray,
497 const triangulationType *triangulation,
498 MPI_Comm communicator,
499 const int dimensionNumber = 1) {
500 if(!ttk::isRunningWithMPI()) {
501 return -1;
502 }
503 if(!triangulation->hasPreconditionedDistributedCells()) {
504 return -1;
505 }
506 const std::vector<int> &neighbors = triangulation->getNeighborRanks();
507 const int neighborNumber = neighbors.size();
508 const std::map<int, int> &neighborsToId = triangulation->getNeighborsToId();
509 if(!ttk::isRunningWithMPI()) {
510 return -1;
511 }
512
513 std::vector<std::vector<DT>> receivedValues(
514 neighborNumber, std::vector<DT>());
515 std::vector<std::vector<DT>> valuesToSend(
516 neighborNumber, std::vector<DT>());
517 MPI_Datatype MPI_DT = getMPIType(static_cast<DT>(0));
518 const auto &ghostCellsPerOwner = triangulation->getGhostCellsPerOwner();
519 const auto &remoteGhostCells = triangulation->getRemoteGhostCells();
520 for(int i = 0; i < neighborNumber; i++) {
521 receivedValues[i].resize(ghostCellsPerOwner[neighbors[i]].size()
522 * dimensionNumber);
523 valuesToSend[i].resize(remoteGhostCells[neighbors[i]].size()
524 * dimensionNumber);
525 }
526 for(int i = 0; i < neighborNumber; i++) {
527 ttk::SimplexId nValues = remoteGhostCells[neighbors[i]].size();
528#pragma omp parallel for
529 for(ttk::SimplexId j = 0; j < nValues; j++) {
530 for(int k = 0; k < dimensionNumber; k++) {
531 ttk::SimplexId globalId = remoteGhostCells[neighbors[i]][j];
532 ttk::SimplexId localId = triangulation->getCellLocalId(globalId);
533 valuesToSend[i].at(j * dimensionNumber + k)
534 = scalarArray[localId * dimensionNumber + k];
535 }
536 }
537 }
538
539 std::vector<MPI_Request> sendRequests(neighborNumber);
540 std::vector<MPI_Request> recvRequests(neighborNumber);
541 for(int i = 0; i < neighborNumber; i++) {
542 MPI_Isend(valuesToSend[i].data(), valuesToSend[i].size(), MPI_DT,
543 neighbors[i], 0, communicator, &sendRequests[i]);
544 MPI_Irecv(receivedValues[i].data(), receivedValues[i].size(), MPI_DT,
545 neighbors[i], 0, communicator, &recvRequests[i]);
546 }
547
548 std::vector<MPI_Status> recvStatus(neighborNumber);
549 std::vector<int> recvCompleted(neighborNumber, 0);
550 int recvPerformedCount = 0;
551 int recvPerformedCountTotal = 0;
552 int r;
553 int rId;
554 while(recvPerformedCountTotal < neighborNumber) {
555 MPI_Waitsome(neighborNumber, recvRequests.data(), &recvPerformedCount,
556 recvCompleted.data(), recvStatus.data());
557 if(recvPerformedCount > 0) {
558 for(int i = 0; i < recvPerformedCount; i++) {
559 r = recvStatus[i].MPI_SOURCE;
560 rId = neighborsToId.at(r);
561 ttk::SimplexId nValues = ghostCellsPerOwner[r].size();
562#pragma omp parallel for
563 for(ttk::SimplexId j = 0; j < nValues; j++) {
564 for(int k = 0; k < dimensionNumber; k++) {
565 DT receivedVal = receivedValues[rId][j * dimensionNumber + k];
566 ttk::SimplexId globalId = ghostCellsPerOwner[r][j];
567 ttk::SimplexId localId = triangulation->getCellLocalId(globalId);
568 scalarArray[localId * dimensionNumber + k] = receivedVal;
569 }
570 }
571 }
572 recvPerformedCountTotal += recvPerformedCount;
573 }
574 }
575 MPI_Waitall(neighborNumber, sendRequests.data(), MPI_STATUSES_IGNORE);
576
577 return 0;
578 }
579
580 template <typename DT, typename triangulationType>
581 int exchangeGhostVertices(DT *scalarArray,
582 const triangulationType *triangulation,
583 MPI_Comm communicator,
584 const int dimensionNumber = 1) {
585 if(!ttk::isRunningWithMPI()) {
586 return -1;
587 }
588 if(!triangulation->hasPreconditionedDistributedVertices()) {
589 return -1;
590 }
591 const std::vector<int> &neighbors = triangulation->getNeighborRanks();
592 const int neighborNumber = neighbors.size();
593 const std::map<int, int> &neighborsToId = triangulation->getNeighborsToId();
594
595 if(!ttk::isRunningWithMPI()) {
596 return -1;
597 }
598
599 std::vector<std::vector<DT>> receivedValues(
600 neighborNumber, std::vector<DT>());
601 std::vector<std::vector<DT>> valuesToSend(
602 neighborNumber, std::vector<DT>());
603 MPI_Datatype MPI_DT = getMPIType(static_cast<DT>(0));
604 const auto &ghostVerticesPerOwner
605 = triangulation->getGhostVerticesPerOwner();
606 const auto &remoteGhostVertices = triangulation->getRemoteGhostVertices();
607 for(int i = 0; i < neighborNumber; i++) {
608 receivedValues[i].resize(ghostVerticesPerOwner[neighbors[i]].size()
609 * dimensionNumber);
610 valuesToSend[i].resize(remoteGhostVertices[neighbors[i]].size()
611 * dimensionNumber);
612 }
613 for(int i = 0; i < neighborNumber; i++) {
614 ttk::SimplexId nValues = remoteGhostVertices[neighbors[i]].size();
615#pragma omp parallel for
616 for(ttk::SimplexId j = 0; j < nValues; j++) {
617 for(int k = 0; k < dimensionNumber; k++) {
618 ttk::SimplexId globalId = remoteGhostVertices[neighbors[i]][j];
619 ttk::SimplexId localId = triangulation->getVertexLocalId(globalId);
620 valuesToSend[i].at(j * dimensionNumber + k)
621 = scalarArray[localId * dimensionNumber + k];
622 }
623 }
624 }
625
626 std::vector<MPI_Request> sendRequests(neighborNumber);
627 std::vector<MPI_Request> recvRequests(neighborNumber);
628 for(int i = 0; i < neighborNumber; i++) {
629 MPI_Isend(valuesToSend[i].data(), valuesToSend[i].size(), MPI_DT,
630 neighbors[i], 0, communicator, &sendRequests[i]);
631 MPI_Irecv(receivedValues[i].data(), receivedValues[i].size(), MPI_DT,
632 neighbors[i], 0, communicator, &recvRequests[i]);
633 }
634
635 std::vector<MPI_Status> recvStatus(neighborNumber);
636 std::vector<int> recvCompleted(neighborNumber, 0);
637 int recvPerformedCount = 0;
638 int recvPerformedCountTotal = 0;
639 int r;
640 int rId;
641 while(recvPerformedCountTotal < neighborNumber) {
642 MPI_Waitsome(neighborNumber, recvRequests.data(), &recvPerformedCount,
643 recvCompleted.data(), recvStatus.data());
644 if(recvPerformedCount > 0) {
645 for(int i = 0; i < recvPerformedCount; i++) {
646 r = recvStatus[i].MPI_SOURCE;
647 rId = neighborsToId.at(r);
648 ttk::SimplexId nValues = ghostVerticesPerOwner[r].size();
649#pragma omp parallel for
650 for(ttk::SimplexId j = 0; j < nValues; j++) {
651 for(int k = 0; k < dimensionNumber; k++) {
652 DT receivedVal = receivedValues[rId][j * dimensionNumber + k];
653 ttk::SimplexId globalId = ghostVerticesPerOwner[r][j];
654 ttk::SimplexId localId
655 = triangulation->getVertexLocalId(globalId);
656 scalarArray[localId * dimensionNumber + k] = receivedVal;
657 }
658 }
659 }
660 recvPerformedCountTotal += recvPerformedCount;
661 }
662 }
663 MPI_Waitall(neighborNumber, sendRequests.data(), MPI_STATUSES_IGNORE);
664
665 return 0;
666 }
667
689 template <typename DT,
690 typename IT,
691 typename GVGID,
692 typename GVR,
693 typename GVLID>
694 int exchangeGhostDataWithoutTriangulation(DT *scalarArray,
695 const GVR &getVertexRank,
696 const GVGID &getVertexGlobalId,
697 const GVLID &getVertexLocalId,
698 const IT nVerts,
699 MPI_Comm communicator,
700 const std::vector<int> &neighbors,
701 const int dimensionNumber = 1) {
702 if(!ttk::isRunningWithMPI()) {
703 return -1;
704 }
705 for(int r = 0; r < ttk::MPIsize_; r++) {
706 getGhostDataScalarsWithoutTriangulation(
707 scalarArray, getVertexRank, getVertexGlobalId, getVertexLocalId,
708 neighbors, r, nVerts, communicator, dimensionNumber);
709 MPI_Barrier(communicator);
710 }
711 return 0;
712 }
713
714 // returns true if bounding boxes intersect, false if not
715 bool inline checkForIntersection(double *myBB, double *theirBB) {
716 return !(
717 myBB[0] > theirBB[1] // my left side is right of their right side
718 || myBB[1] < theirBB[0] // my right side is left of their left side
719 || myBB[2] > theirBB[3] // my bottom side is above their top side
720 || myBB[3] < theirBB[2] // my top side is under their bottom side
721 || myBB[4] > theirBB[5] // my front side is behind their back side
722 || myBB[5] < theirBB[4] // my back side is in front of their front side
723 );
724 }
725
726 void inline preconditionNeighborsUsingBoundingBox(
727 double *boundingBox,
728 std::vector<int> &neighbors,
729 std::map<int, int> &neighborsToId) {
730
731 std::vector<std::array<double, 6>> rankBoundingBoxes(ttk::MPIsize_);
732 std::copy(
733 boundingBox, boundingBox + 6, rankBoundingBoxes[ttk::MPIrank_].begin());
734 for(int r = 0; r < ttk::MPIsize_; r++) {
735 MPI_Bcast(rankBoundingBoxes[r].data(), 6, MPI_DOUBLE, r, ttk::MPIcomm_);
736 }
737
738 const double epsilon = 0.00001;
739 // inflate our own bounding box by epsilon
740 for(int i = 0; i < 6; i++) {
741 if(i % 2 == 0)
742 boundingBox[i] -= epsilon;
743 if(i % 2 == 1)
744 boundingBox[i] += epsilon;
745 }
746
747 for(int i = 0; i < ttk::MPIsize_; i++) {
748 if(i != ttk::MPIrank_) {
749 double *theirBoundingBox = rankBoundingBoxes[i].data();
750 if(checkForIntersection(boundingBox, theirBoundingBox)) {
751 neighbors.push_back(i);
752 }
753 }
754 }
755 neighborsToId.clear();
756 int neighborNumber = neighbors.size();
757 for(int i = 0; i < neighborNumber; i++) {
758 neighborsToId[neighbors[i]] = i;
759 }
760 }
761
771 void inline produceRankArray(std::vector<int> &rankArray,
772 const LongSimplexId *globalIds,
773 const unsigned char *ghostCells,
774 int nVertices,
775 double *boundingBox,
776 std::vector<int> &neighbors,
777 std::map<int, int> &neighborsToId) {
778 if(neighbors.empty()) {
779 ttk::preconditionNeighborsUsingBoundingBox(
780 boundingBox, neighbors, neighborsToId);
781 }
782 MPI_Datatype MIT = ttk::getMPIType(ttk::SimplexId{});
783 std::vector<ttk::SimplexId> currentRankUnknownIds;
784 std::unordered_set<ttk::SimplexId> gIdSet;
785 std::unordered_map<ttk::SimplexId, ttk::SimplexId> gIdToLocalMap;
786
787 for(int i = 0; i < nVertices; i++) {
788 const int ghostCellVal = ghostCells[i];
789 const ttk::SimplexId globalId = globalIds[i];
790 if(ghostCellVal == 0) {
791 // if the ghost cell value is 0, then this vertex mainly belongs to
792 // this rank
793 rankArray[i] = ttk::MPIrank_;
794 gIdSet.insert(globalId);
795 } else {
796 // otherwise the vertex belongs to another rank and we need to find
797 // out to which one this needs to be done by broadcasting the global
798 // id and hoping for some other rank to answer
799 currentRankUnknownIds.push_back(globalId);
800 gIdToLocalMap[globalId] = i;
801 }
802 }
803
804 ttk::SimplexId sizeOfCurrentRank = currentRankUnknownIds.size();
805 std::vector<ttk::SimplexId> gIdsToSend;
806 std::vector<ttk::SimplexId> receivedGlobals;
807 receivedGlobals.resize(sizeOfCurrentRank);
808 ttk::SimplexId sizeOfNeighbor;
809 std::vector<ttk::SimplexId> neighborUnknownIds;
810 for(const int neighbor : neighbors) {
811 // we first send the size and then all needed ids to the neighbor
812 MPI_Sendrecv(&sizeOfCurrentRank, 1, MIT, neighbor, ttk::MPIrank_,
813 &sizeOfNeighbor, 1, MIT, neighbor, neighbor, ttk::MPIcomm_,
814 MPI_STATUS_IGNORE);
815 neighborUnknownIds.resize(sizeOfNeighbor);
816 gIdsToSend.reserve(sizeOfNeighbor);
817
818 MPI_Sendrecv(currentRankUnknownIds.data(), sizeOfCurrentRank, MIT,
819 neighbor, ttk::MPIrank_, neighborUnknownIds.data(),
820 sizeOfNeighbor, MIT, neighbor, neighbor, ttk::MPIcomm_,
821 MPI_STATUS_IGNORE);
822
823 // then we check if the needed globalid values are present in the local
824 // globalid set if so, we send the rank value to the requesting rank
825 for(const ttk::SimplexId gId : neighborUnknownIds) {
826 if(gIdSet.count(gId)) {
827 // add the value to the vector which will be sent
828 gIdsToSend.push_back(gId);
829 }
830 }
831 MPI_Status status;
832 int amount;
833
834 MPI_Sendrecv(gIdsToSend.data(), gIdsToSend.size(), MIT, neighbor,
835 ttk::MPIrank_, receivedGlobals.data(),
836 currentRankUnknownIds.size(), MIT, neighbor, neighbor,
837 ttk::MPIcomm_, &status);
838
839 MPI_Get_count(&status, MIT, &amount);
840 receivedGlobals.resize(amount);
841
842 for(const ttk::SimplexId receivedGlobal : receivedGlobals) {
843 const ttk::SimplexId localVal = gIdToLocalMap[receivedGlobal];
844 rankArray[localVal] = neighbor;
845 }
846 // cleanup
847 gIdsToSend.clear();
848 receivedGlobals.resize(sizeOfCurrentRank);
849 }
850 }
851
860 template <typename DT, typename IT>
861 void returnVectorForBurstsize(std::vector<value<DT, IT>> &outVector,
862 std::vector<value<DT, IT>> &values,
863 size_t burstSize) {
864
865 if(burstSize > values.size()) {
866 outVector.resize(values.size(), {0, 0});
867 outVector.assign(values.begin(), values.end());
868 values.clear();
869 } else {
870 outVector.resize(burstSize, {0, 0});
871 outVector.assign(values.end() - burstSize, values.end());
872 values.erase(values.end() - burstSize, values.end());
873 }
874 }
875
885 template <typename DT, typename IT>
886 void ReceiveAndAddToVector(
887 int rankFrom,
888 int structTag,
889 std::vector<std::vector<value<DT, IT>>> &unsortedReceivedValues) {
890 std::vector<value<DT, IT>> &receivedValues
891 = unsortedReceivedValues[rankFrom];
892 // probe to get the correct size to receive
893 int amount;
894 MPI_Status status;
895 MPI_Probe(rankFrom, structTag * rankFrom, ttk::MPIcomm_, &status);
896 MPI_Get_count(&status, MPI_CHAR, &amount);
897 receivedValues.resize(amount / sizeof(value<DT, IT>), {0, 0});
898 MPI_Recv(receivedValues.data(), amount, MPI_CHAR, rankFrom,
899 structTag * rankFrom, ttk::MPIcomm_, MPI_STATUS_IGNORE);
900 }
901
926 template <typename DT, typename IT>
927 void getMax(int intTag,
928 int structTag,
929 IT &currentOrder,
930 int burstSize,
931 MPI_Datatype MPI_IT,
932 IT &processedValueCounter,
933 std::vector<std::vector<value<DT, IT>>> &unsortedReceivedValues,
934 std::vector<std::vector<IT>> &orderResendValues,
935 std::vector<IT> &orderedValuesForRank,
936 std::vector<value<DT, IT>> &sortingValues) {
937 // take the current maximum scalar over all ranks
938 int rankIdOfMaxScalar = -1;
939 DT maxScalar = std::numeric_limits<DT>::lowest();
940 IT maxGId = -1;
941 for(size_t i = 0; i < unsortedReceivedValues.size(); i++) {
942 if(unsortedReceivedValues[i].size() > 0) {
943 const auto &v = unsortedReceivedValues[i].back();
944 if(v.scalar == maxScalar ? v.globalId > maxGId : v.scalar > maxScalar) {
945 maxScalar = v.scalar;
946 maxGId = v.globalId;
947 rankIdOfMaxScalar = i;
948 }
949 }
950 }
951
952 value<DT, IT> currentValue
953 = unsortedReceivedValues[rankIdOfMaxScalar].back();
954 // we send the globalId and the the order via one send command,
955 // therefore we need to check two concurrent values at the same time
956 // later on
957 orderResendValues[rankIdOfMaxScalar].push_back(currentValue.globalId);
958 orderResendValues[rankIdOfMaxScalar].push_back(currentOrder);
959 currentOrder--;
960 processedValueCounter++;
961 unsortedReceivedValues[rankIdOfMaxScalar].pop_back();
962 if(unsortedReceivedValues[rankIdOfMaxScalar].size() == 0) {
963 if(rankIdOfMaxScalar == 0) {
964 // append the ordered values to the correct vector
965 orderedValuesForRank.insert(
966 orderedValuesForRank.end(),
967 orderResendValues[rankIdOfMaxScalar].begin(),
968 orderResendValues[rankIdOfMaxScalar].end());
969 orderResendValues[rankIdOfMaxScalar].clear();
970
971 if(!sortingValues.empty()) {
972 std::vector<value<DT, IT>> ownValues;
973 returnVectorForBurstsize<DT, IT>(ownValues, sortingValues, burstSize);
974
975 unsortedReceivedValues[rankIdOfMaxScalar] = ownValues;
976 }
977 } else {
978 // receive more values from rank, send ordering to the rank
979 // send to the finished rank that we want more
980 MPI_Send(orderResendValues[rankIdOfMaxScalar].data(),
981 orderResendValues[rankIdOfMaxScalar].size(), MPI_IT,
982 rankIdOfMaxScalar, intTag * rankIdOfMaxScalar, ttk::MPIcomm_);
983 orderResendValues[rankIdOfMaxScalar].clear();
984 // we receive more vals, if the size is still zero afterwards, we are
985 // finished with this rank
986 ReceiveAndAddToVector(
987 rankIdOfMaxScalar, structTag, unsortedReceivedValues);
988 }
989 }
990 }
991
1003 template <typename IT, typename GVLID>
1004 void buildArrayForReceivedData(const size_t nInts,
1005 const IT *const orderedValuesForRank,
1006 const GVLID &getVertexLocalId,
1007 SimplexId *const order,
1008 const int nThreads) {
1009
1010 TTK_FORCE_USE(nThreads);
1011
1012#ifdef TTK_ENABLE_OPENMP
1013#pragma omp parallel for num_threads(nThreads)
1014#endif // TTK_ENABLE_OPENMP
1015 for(size_t i = 0; i < nInts; i += 2) {
1016 order[getVertexLocalId(orderedValuesForRank[i])]
1017 = orderedValuesForRank[i + 1];
1018 }
1019 }
1020
1034 template <typename DT, typename IT, typename GVGID, typename GVR>
1035 void populateVector(std::vector<value<DT, IT>> &valuesToSortVector,
1036 const size_t nVerts,
1037 const DT *const scalars,
1038 const GVGID &getVertexGlobalId,
1039 const GVR &getVertexRank) {
1040 for(size_t i = 0; i < nVerts; i++) {
1041 IT globalId = getVertexGlobalId(i);
1042 if(getVertexRank(i) == ttk::MPIrank_) {
1043 valuesToSortVector.emplace_back(scalars[i], globalId);
1044 }
1045 }
1046 }
1047
1055 template <typename DT, typename IT>
1056 void sortVerticesDistributed(std::vector<value<DT, IT>> &values,
1057 const int nThreads) {
1058
1059 TTK_FORCE_USE(nThreads);
1060
1061 TTK_PSORT(nThreads, values.begin(), values.end(),
1062 [](value<DT, IT> v1, value<DT, IT> v2) {
1063 return (v1.scalar < v2.scalar)
1064 || (v1.scalar == v2.scalar && v1.globalId < v2.globalId);
1065 });
1066 }
1067
1079 template <typename DT, typename GVGID, typename GVR, typename GVLID>
1080 void produceOrdering(SimplexId *orderArray,
1081 const DT *scalarArray,
1082 const GVGID &getVertexGlobalId,
1083 const GVR &getVertexRank,
1084 const GVLID &getVertexLocalId,
1085 const size_t nVerts,
1086 const int burstSize,
1087 std::vector<int> &neighbors,
1088 std::map<int, int> &neighborsToId) {
1089 int intTag = 101;
1090 int structTag = 102;
1091 if(neighbors.empty()) {
1092 ttk::preconditionNeighborsUsingRankArray(
1093 neighbors, neighborsToId, getVertexRank, nVerts, ttk::MPIcomm_);
1094 }
1095 MPI_Barrier(ttk::MPIcomm_);
1096
1097 using IT = decltype(getVertexGlobalId(0));
1098 MPI_Datatype MPI_IT = ttk::getMPIType(static_cast<IT>(0));
1099
1100 std::vector<value<DT, IT>> sortingValues;
1101 populateVector(
1102 sortingValues, nVerts, scalarArray, getVertexGlobalId, getVertexRank);
1103
1104 // sort the scalar array distributed first by the scalar value itself,
1105 // then by the global id
1106 sortVerticesDistributed<DT, IT>(sortingValues, ttk::globalThreadNumber_);
1107
1108 // when all are done sorting, rank 0 requests the highest values and
1109 // merges them
1110 MPI_Barrier(ttk::MPIcomm_);
1111
1112 std::vector<IT> orderedValuesForRank;
1113 IT processedValueCounter = 0;
1114 IT localSize = sortingValues.size();
1115 IT totalSize;
1116 // get the complete size of the dataset by summing up the local sizes
1117 MPI_Reduce(&localSize, &totalSize, 1, MPI_IT, MPI_SUM, 0, ttk::MPIcomm_);
1118 if(ttk::MPIrank_ == 0) {
1119 IT currentOrder = totalSize - 1;
1120 std::vector<std::vector<value<DT, IT>>> unsortedReceivedValues;
1121 unsortedReceivedValues.resize(ttk::MPIsize_);
1122 std::vector<std::vector<IT>> orderResendValues;
1123 orderResendValues.resize(ttk::MPIsize_);
1124 for(int i = 0; i < ttk::MPIsize_; i++) {
1125 orderResendValues[i].reserve(burstSize);
1126 }
1127
1128 // receive the first batch of values
1129 for(int i = 0; i < ttk::MPIsize_; i++) {
1130 if(i == 0) {
1131 std::vector<value<DT, IT>> ownValues;
1132 returnVectorForBurstsize<DT, IT>(ownValues, sortingValues, burstSize);
1133 unsortedReceivedValues[i] = ownValues;
1134 } else {
1135 ReceiveAndAddToVector<DT, IT>(i, structTag, unsortedReceivedValues);
1136 }
1137 }
1138 while(processedValueCounter < totalSize) {
1139 getMax<DT, IT>(intTag, structTag, currentOrder, burstSize, MPI_IT,
1140 processedValueCounter, unsortedReceivedValues,
1141 orderResendValues, orderedValuesForRank, sortingValues);
1142 }
1143
1144 } else { // other Ranks
1145 // send the next burstsize values and then wait for an answer from the
1146 // root rank
1147 while(!sortingValues.empty()) {
1148 std::vector<value<DT, IT>> sendValues;
1149 returnVectorForBurstsize<DT, IT>(sendValues, sortingValues, burstSize);
1150 int size = sendValues.size();
1151 MPI_Send(sendValues.data(), size * sizeof(value<DT, IT>), MPI_CHAR, 0,
1152 structTag * ttk::MPIrank_, ttk::MPIcomm_);
1153 std::vector<IT> receivedValues;
1154
1155 // be prepared to receive burstsize of elements, resize after
1156 // receiving to the correct size
1157 receivedValues.resize(burstSize * 2);
1158 MPI_Status status;
1159 int amount;
1160 MPI_Recv(receivedValues.data(), burstSize * 2, MPI_IT, 0,
1161 intTag * ttk::MPIrank_, ttk::MPIcomm_, &status);
1162 MPI_Get_count(&status, MPI_IT, &amount);
1163
1164 receivedValues.resize(amount);
1165 orderedValuesForRank.insert(orderedValuesForRank.end(),
1166 receivedValues.begin(),
1167 receivedValues.end());
1168 }
1169 // afterwards send once a message of length 0 to root to show that we
1170 // are done
1171 MPI_Send(sortingValues.data(), 0, MPI_CHAR, 0, structTag * ttk::MPIrank_,
1172 ttk::MPIcomm_);
1173 }
1174
1175 // all ranks do the following
1176 MPI_Barrier(ttk::MPIcomm_);
1177
1178 buildArrayForReceivedData<IT>(orderedValuesForRank.size(),
1179 orderedValuesForRank.data(), getVertexLocalId,
1180 orderArray, ttk::globalThreadNumber_);
1181
1182 // we receive the values at the ghostcells through the abstract
1183 // exchangeGhostCells method
1184 ttk::exchangeGhostDataWithoutTriangulation<ttk::SimplexId, IT>(
1185 orderArray, getVertexRank, getVertexGlobalId, getVertexLocalId, nVerts,
1186 ttk::MPIcomm_, neighbors);
1187 }
1188
1198 template <typename dataType>
1199 void sendVector(std::vector<dataType> &sendBuffer,
1200 MPI_Datatype messageType,
1201 int neighbor) {
1202 ttk::SimplexId dataSize = sendBuffer.size();
1203 MPI_Send(&dataSize, 1, getMPIType(dataSize), neighbor, ttk::MPIrank_,
1204 ttk::MPIcomm_);
1205 MPI_Send(sendBuffer.data(), dataSize, messageType, neighbor, ttk::MPIrank_,
1206 ttk::MPIcomm_);
1207 }
1208
1218 template <typename dataType>
1219 void recvVector(std::vector<dataType> &receiveBuffer,
1220 ttk::SimplexId &recvMessageSize,
1221 MPI_Datatype messageType,
1222 int neighbor) {
1223 MPI_Recv(&recvMessageSize, 1, getMPIType(recvMessageSize), neighbor,
1224 neighbor, ttk::MPIcomm_, MPI_STATUS_IGNORE);
1225 receiveBuffer.resize(recvMessageSize);
1226 MPI_Recv(receiveBuffer.data(), recvMessageSize, messageType, neighbor,
1227 neighbor, ttk::MPIcomm_, MPI_STATUS_IGNORE);
1228 }
1229
1230} // namespace ttk
1231
1232#endif // TTK_ENABLE_MPI
#define TTK_FORCE_USE(x)
Force the compiler to use the function/method parameter.
Definition BaseClass.h:57
#define ttkNotUsed(x)
Mark function/method parameters that are not used in the function body at all.
Definition BaseClass.h:47
#define TTK_PSORT(NTHREADS,...)
Parallel sort macro.
Definition OpenMP.h:46
The Topology ToolKit.
COMMON_EXPORTS int MPIsize_
Definition BaseClass.cpp:10
COMMON_EXPORTS int globalThreadNumber_
Definition BaseClass.cpp:6
COMMON_EXPORTS int MPIrank_
Definition BaseClass.cpp:9
long long int LongSimplexId
Identifier type for simplices of any dimension.
Definition DataTypes.h:15
int SimplexId
Identifier type for simplices of any dimension.
Definition DataTypes.h:22
T end(std::pair< T, T > &p)
Definition ripserpy.cpp:483
T begin(std::pair< T, T > &p)
Definition ripserpy.cpp:479