67 class ArrayPreconditioning : virtual public Debug {
76 triangulation->preconditionExchangeGhostVertices();
83 this->GlobalOrder = order;
98 template <
typename DT,
typename triangulationType>
100 postprocessing(
const triangulationType *triangulation,
105 std::vector<globalOrder::vertexToSort<DT>> &verticesToSort,
106 MPI_Datatype &MPI_sortedVertexType,
107 MPI_Datatype &MPI_SimplexId,
111 std::vector<std::vector<globalOrder::sortedVertex>> verticesSorted(
113 if(itNumber < localNbChunk + 1) {
115#ifdef TTK_ENABLE_OPENMP
116 std::vector<std::vector<std::vector<globalOrder::sortedVertex>>>
117 verticesSortedThread(
119 std::vector<std::vector<globalOrder::sortedVertex>>(
121#pragma omp parallel firstprivate(rank) num_threads(threadNumber_)
123 int threadNumber = omp_get_thread_num();
124#pragma omp for schedule(static)
126 rank = verticesToSort.at(i).rank;
128 orderArray[triangulation->getVertexLocalId(
129 verticesToSort.at(i).globalId)]
132 verticesSortedThread.at(threadNumber)
134 .push_back(globalOrder::sortedVertex{
135 verticesToSort.at(i).globalId, orderOffset + i});
140 verticesToSort.resize(
begin + 1);
141 verticesToSort.shrink_to_fit();
143#pragma omp parallel for schedule(static, 1)
145 for(
int i = 0; i < this->threadNumber_; i++) {
147 verticesSorted.at(j).insert(
148 verticesSorted.at(j).end(),
149 verticesSortedThread.at(i).at(j).begin(),
150 verticesSortedThread.at(i).at(j).end());
154 verticesSortedThread.clear();
155 verticesSortedThread.shrink_to_fit();
158 rank = verticesToSort.at(i).rank;
160 orderArray[triangulation->getVertexLocalId(
161 verticesToSort.at(i).globalId)]
164 verticesSorted.at(rank).push_back(globalOrder::sortedVertex{
165 verticesToSort.at(i).globalId, orderOffset + i});
176 std::vector<ttk::SimplexId> sendMessageSize(
ttk::MPIsize_, 0);
177 std::vector<ttk::SimplexId> recvMessageSize(
ttk::MPIsize_, 0);
180 int sendPerformedCount = 0;
181 int recvPerformedCount = 0;
182 int sendPerformedCountTotal = 0;
183 int recvPerformedCountTotal = 0;
188 sendMessageSize[i] = verticesSorted.at(i).size();
189 MPI_Isend(&sendMessageSize[i], 1, MPI_SimplexId, i, 0, ttk::MPIcomm_,
190 &sendRequests[count]);
191 MPI_Irecv(&recvMessageSize[i], 1, MPI_SimplexId, i, 0, ttk::MPIcomm_,
192 &recvRequests[count]);
196 std::vector<std::vector<globalOrder::sortedVertex>> recvVerticesSorted(
198 std::vector<MPI_Request> sendRequestsData(
ttk::MPIsize_ - 1);
199 std::vector<MPI_Request> recvRequestsData(
ttk::MPIsize_ - 1);
208 &sendPerformedCount, sendCompleted.data(),
210 if(sendPerformedCount > 0) {
211 for(
int i = 0; i < sendPerformedCount; i++) {
212 r = sendCompleted[i];
216 if((sendMessageSize[r] > 0)) {
217 MPI_Isend(verticesSorted.at(r).data(), sendMessageSize[r],
218 MPI_sortedVertexType, r, 1, ttk::MPIcomm_,
219 &sendRequestsData[sendCount]);
223 sendPerformedCountTotal += sendPerformedCount;
228 &recvPerformedCount, recvCompleted.data(),
230 if(recvPerformedCount > 0) {
231 for(
int i = 0; i < recvPerformedCount; i++) {
232 r = recvStatus[i].MPI_SOURCE;
233 if((recvMessageSize[r] > 0)) {
234 recvVerticesSorted.at(r).resize(recvMessageSize[r]);
235 MPI_Irecv(recvVerticesSorted.at(r).data(), recvMessageSize[r],
236 MPI_sortedVertexType, r, 1, ttk::MPIcomm_,
237 &recvRequestsData[recvCount]);
242 recvPerformedCountTotal += recvPerformedCount;
246 recvPerformedCountTotal = 0;
247 while(recvPerformedCountTotal < recvCount) {
248 MPI_Waitsome(recvCount, recvRequestsData.data(), &recvPerformedCount,
249 recvCompleted.data(), recvStatusData.data());
250 if(recvPerformedCount > 0) {
251 for(
int i = 0; i < recvPerformedCount; i++) {
252 r = recvStatusData[i].MPI_SOURCE;
253#pragma omp parallel for schedule(static)
254 for(
int j = 0; j < recvMessageSize[r]; j++) {
255 orderArray[triangulation->getVertexLocalId(
256 recvVerticesSorted.at(r).at(j).globalId)]
257 = recvVerticesSorted.at(r).at(j).order;
260 recvPerformedCountTotal += recvPerformedCount;
263 MPI_Waitall(sendCount, sendRequestsData.data(), MPI_STATUSES_IGNORE);
269 template <
typename DT,
typename triangulationType>
272 const DT *scalarArray,
273 const size_t nVerts)
const {
280 {
"#Threads", std::to_string(this->threadNumber_)},
281 {
"#Vertices", std::to_string(nVerts)},
289 if(ttk::isRunningWithMPI()) {
291 MPI_Datatype MPI_SimplexId = getMPIType(
id);
293 std::vector<globalOrder::vertexToSort<DT>> verticesToSort;
294 verticesToSort.reserve(nVerts);
295#ifdef TTK_ENABLE_OPENMP
296#pragma omp declare reduction (merge : std::vector<globalOrder::vertexToSort<DT>> : omp_out.insert(omp_out.end(), omp_in.begin(), omp_in.end()))
297#pragma omp parallel for reduction(merge : verticesToSort) schedule(static)
299 for(
size_t i = 0; i < nVerts; i++) {
301 verticesToSort.emplace_back(globalOrder::vertexToSort<DT>{
302 triangulation->getVertexGlobalId(i), scalarArray[i],
307 MPI_Datatype MPI_vertexToSortType;
314 MPI_Type_contiguous(
sizeof(globalOrder::vertexToSort<DT>), MPI_CHAR,
315 &MPI_vertexToSortType);
316 MPI_Type_commit(&MPI_vertexToSortType);
318 std::vector<ttk::SimplexId> vertexDistribution(
ttk::MPIsize_);
320 MPI_Allgather(&localVertexNumber, 1, MPI_SimplexId,
321 vertexDistribution.data(), 1, MPI_SimplexId,
325 p_sort::parallel_sort<globalOrder::vertexToSort<DT>>(
326 verticesToSort, globalOrder::comp<DT>, globalOrder::oppositeComp<DT>,
327 vertexDistribution, MPI_vertexToSortType, MPI_SimplexId,
333 MPI_Datatype types[] = {MPI_SimplexId, MPI_SimplexId};
334 int lengths[] = {1, 1};
335 MPI_Datatype MPI_sortedVertexType;
336 const long int mpi_offsets[]
337 = {offsetof(globalOrder::sortedVertex, globalId),
338 offsetof(globalOrder::sortedVertex, order)};
339 MPI_Type_create_struct(
340 2, lengths, mpi_offsets, types, &MPI_sortedVertexType);
341 MPI_Type_commit(&MPI_sortedVertexType);
346 = std::accumulate(vertexDistribution.begin(),
350 ttk::SimplexId nbChunk = std::floor(verticesToSortSize / ChunkSize);
356 MPI_Allreduce(MPI_IN_PLACE, &nbChunkTotal, 1, MPI_SimplexId, MPI_MAX,
365 postprocessing<DT, triangulationType>(
366 triangulation, orderArray, nbChunk * ChunkSize,
367 nbChunk * ChunkSize + rest, orderOffset, verticesToSort,
368 MPI_sortedVertexType, MPI_SimplexId, nbChunk, nbChunk);
370 for(
int chunk = nbChunk - 1; chunk >= 0; chunk--) {
371 postprocessing<DT, triangulationType>(
372 triangulation, orderArray, chunk * ChunkSize,
373 (chunk + 1) * ChunkSize, orderOffset, verticesToSort,
374 MPI_sortedVertexType, MPI_SimplexId, chunk, nbChunk);
379 for(
int chunk = beginningChunk; chunk < nbChunkTotal; chunk++) {
380 postprocessing<DT, triangulationType>(
381 triangulation, orderArray, nbChunk * ChunkSize + rest,
382 nbChunk * ChunkSize + rest, orderOffset, verticesToSort,
383 MPI_sortedVertexType, MPI_SimplexId, chunk, nbChunk);
387 ttk::exchangeGhostVertices<ttk::SimplexId, triangulationType>(
388 orderArray, triangulation, ttk::MPIcomm_, 1);
413 bool GlobalOrder{
false};
417 int ChunkSize{1000000000};