76 triangulation->preconditionExchangeGhostVertices();
98 template <
typename DT,
typename triangulationType>
100 postprocessing(
const triangulationType *triangulation,
105 std::vector<globalOrder::vertexToSort<DT>> &verticesToSort,
106 MPI_Datatype &MPI_sortedVertexType,
107 MPI_Datatype &MPI_SimplexId,
111 std::vector<std::vector<globalOrder::sortedVertex>> verticesSorted(
113 if(itNumber < localNbChunk + 1) {
115#ifdef TTK_ENABLE_OPENMP
116 std::vector<std::vector<std::vector<globalOrder::sortedVertex>>>
117 verticesSortedThread(
119 std::vector<std::vector<globalOrder::sortedVertex>>(
121#pragma omp parallel firstprivate(rank) num_threads(threadNumber_)
123 int threadNumber = omp_get_thread_num();
124#pragma omp for schedule(static)
126 rank = verticesToSort.at(i).rank;
128 orderArray[triangulation->getVertexLocalId(
129 verticesToSort.at(i).globalId)]
132 verticesSortedThread.at(threadNumber)
134 .push_back(globalOrder::sortedVertex{
135 verticesToSort.at(i).globalId, orderOffset + i});
140 verticesToSort.resize(
begin + 1);
141 verticesToSort.shrink_to_fit();
143#pragma omp parallel for schedule(static, 1)
147 verticesSorted.at(j).insert(
148 verticesSorted.at(j).end(),
149 verticesSortedThread.at(i).at(j).begin(),
150 verticesSortedThread.at(i).at(j).end());
154 verticesSortedThread.clear();
155 verticesSortedThread.shrink_to_fit();
158 rank = verticesToSort.at(i).rank;
160 orderArray[triangulation->getVertexLocalId(
161 verticesToSort.at(i).globalId)]
164 verticesSorted.at(rank).push_back(globalOrder::sortedVertex{
165 verticesToSort.at(i).globalId, orderOffset + i});
176 std::vector<ttk::SimplexId> sendMessageSize(
ttk::MPIsize_, 0);
177 std::vector<ttk::SimplexId> recvMessageSize(
ttk::MPIsize_, 0);
180 int sendPerformedCount = 0;
181 int recvPerformedCount = 0;
182 int sendPerformedCountTotal = 0;
183 int recvPerformedCountTotal = 0;
188 sendMessageSize[i] = verticesSorted.at(i).size();
189 MPI_Isend(&sendMessageSize[i], 1, MPI_SimplexId, i, 0, ttk::MPIcomm_,
190 &sendRequests[count]);
191 MPI_Irecv(&recvMessageSize[i], 1, MPI_SimplexId, i, 0, ttk::MPIcomm_,
192 &recvRequests[count]);
196 std::vector<std::vector<globalOrder::sortedVertex>> recvVerticesSorted(
198 std::vector<MPI_Request> sendRequestsData(
ttk::MPIsize_ - 1);
199 std::vector<MPI_Request> recvRequestsData(
ttk::MPIsize_ - 1);
208 &sendPerformedCount, sendCompleted.data(),
210 if(sendPerformedCount > 0) {
211 for(
int i = 0; i < sendPerformedCount; i++) {
212 r = sendCompleted[i];
216 if((sendMessageSize[r] > 0)) {
217 MPI_Isend(verticesSorted.at(r).data(), sendMessageSize[r],
218 MPI_sortedVertexType, r, 1, ttk::MPIcomm_,
219 &sendRequestsData[sendCount]);
223 sendPerformedCountTotal += sendPerformedCount;
228 &recvPerformedCount, recvCompleted.data(),
230 if(recvPerformedCount > 0) {
231 for(
int i = 0; i < recvPerformedCount; i++) {
232 r = recvStatus[i].MPI_SOURCE;
233 if((recvMessageSize[r] > 0)) {
234 recvVerticesSorted.at(r).resize(recvMessageSize[r]);
235 MPI_Irecv(recvVerticesSorted.at(r).data(), recvMessageSize[r],
236 MPI_sortedVertexType, r, 1, ttk::MPIcomm_,
237 &recvRequestsData[recvCount]);
242 recvPerformedCountTotal += recvPerformedCount;
246 recvPerformedCountTotal = 0;
247 while(recvPerformedCountTotal < recvCount) {
248 MPI_Waitsome(recvCount, recvRequestsData.data(), &recvPerformedCount,
249 recvCompleted.data(), recvStatusData.data());
250 if(recvPerformedCount > 0) {
251 for(
int i = 0; i < recvPerformedCount; i++) {
252 r = recvStatusData[i].MPI_SOURCE;
253#pragma omp parallel for schedule(static)
254 for(
int j = 0; j < recvMessageSize[r]; j++) {
255 orderArray[triangulation->getVertexLocalId(
256 recvVerticesSorted.at(r).at(j).globalId)]
257 = recvVerticesSorted.at(r).at(j).order;
260 recvPerformedCountTotal += recvPerformedCount;
263 MPI_Waitall(sendCount, sendRequestsData.data(), MPI_STATUSES_IGNORE);
269 template <
typename DT,
typename triangulationType>
272 const DT *scalarArray,
273 const size_t nVerts)
const {
280 {
"#Threads", std::to_string(this->threadNumber_)},
281 {
"#Vertices", std::to_string(nVerts)},
289 if(ttk::isRunningWithMPI()) {
291 MPI_Datatype MPI_SimplexId = getMPIType(
id);
293 std::vector<globalOrder::vertexToSort<DT>> verticesToSort;
294 verticesToSort.reserve(nVerts);
295#ifdef TTK_ENABLE_OPENMP
296#pragma omp declare reduction( \
297 merge : std::vector<globalOrder::vertexToSort<DT>> : omp_out.insert( \
298 omp_out.end(), omp_in.begin(), omp_in.end()))
299#pragma omp parallel for reduction(merge : verticesToSort) schedule(static)
301 for(
size_t i = 0; i < nVerts; i++) {
303 verticesToSort.emplace_back(globalOrder::vertexToSort<DT>{
304 triangulation->getVertexGlobalId(i), scalarArray[i],
309 MPI_Datatype MPI_vertexToSortType;
316 MPI_Type_contiguous(
sizeof(globalOrder::vertexToSort<DT>), MPI_CHAR,
317 &MPI_vertexToSortType);
318 MPI_Type_commit(&MPI_vertexToSortType);
320 std::vector<ttk::SimplexId> vertexDistribution(
ttk::MPIsize_);
322 MPI_Allgather(&localVertexNumber, 1, MPI_SimplexId,
323 vertexDistribution.data(), 1, MPI_SimplexId,
327 p_sort::parallel_sort<globalOrder::vertexToSort<DT>>(
328 verticesToSort, globalOrder::comp<DT>, globalOrder::oppositeComp<DT>,
329 vertexDistribution, MPI_vertexToSortType, MPI_SimplexId,
335 MPI_Datatype types[] = {MPI_SimplexId, MPI_SimplexId};
336 int lengths[] = {1, 1};
337 MPI_Datatype MPI_sortedVertexType;
338 const long int mpi_offsets[]
339 = {offsetof(globalOrder::sortedVertex, globalId),
340 offsetof(globalOrder::sortedVertex, order)};
341 MPI_Type_create_struct(
342 2, lengths, mpi_offsets, types, &MPI_sortedVertexType);
343 MPI_Type_commit(&MPI_sortedVertexType);
349 orderOffset += vertexDistribution[i];
359 MPI_Allreduce(MPI_IN_PLACE, &nbChunkTotal, 1, MPI_SimplexId, MPI_MAX,
368 postprocessing<DT, triangulationType>(
369 triangulation, orderArray, nbChunk *
ChunkSize,
370 nbChunk *
ChunkSize + rest, orderOffset, verticesToSort,
371 MPI_sortedVertexType, MPI_SimplexId, nbChunk, nbChunk);
373 for(
int chunk = nbChunk - 1; chunk >= 0; chunk--) {
374 postprocessing<DT, triangulationType>(
375 triangulation, orderArray, chunk *
ChunkSize,
376 (chunk + 1) *
ChunkSize, orderOffset, verticesToSort,
377 MPI_sortedVertexType, MPI_SimplexId, chunk, nbChunk);
382 for(
int chunk = beginningChunk; chunk < nbChunkTotal; chunk++) {
383 postprocessing<DT, triangulationType>(
384 triangulation, orderArray, nbChunk *
ChunkSize + rest,
385 nbChunk *
ChunkSize + rest, orderOffset, verticesToSort,
386 MPI_sortedVertexType, MPI_SimplexId, chunk, nbChunk);
390 ttk::exchangeGhostVertices<ttk::SimplexId, triangulationType>(
391 orderArray, triangulation, ttk::MPIcomm_, 1);