64 class ArrayPreconditioning : virtual public Debug {
73 triangulation->preconditionExchangeGhostVertices();
80 this->GlobalOrder = order;
95 template <
typename DT,
typename triangulationType>
97 postprocessing(
const triangulationType *triangulation,
102 std::vector<globalOrder::vertexToSort<DT>> &verticesToSort,
103 MPI_Datatype &MPI_sortedVertexType,
104 MPI_Datatype &MPI_SimplexId,
108 std::vector<std::vector<globalOrder::sortedVertex>> verticesSorted(
110 if(itNumber < localNbChunk + 1) {
112#ifdef TTK_ENABLE_OPENMP
113 std::vector<std::vector<std::vector<globalOrder::sortedVertex>>>
114 verticesSortedThread(
116 std::vector<std::vector<globalOrder::sortedVertex>>(
118#pragma omp parallel firstprivate(rank) num_threads(threadNumber_)
120 int threadNumber = omp_get_thread_num();
121#pragma omp for schedule(static)
123 rank = verticesToSort.at(i).rank;
125 orderArray[triangulation->getVertexLocalId(
126 verticesToSort.at(i).globalId)]
129 verticesSortedThread.at(threadNumber)
131 .push_back(globalOrder::sortedVertex{
132 verticesToSort.at(i).globalId, orderOffset + i});
137 verticesToSort.resize(
begin + 1);
138 verticesToSort.shrink_to_fit();
140#pragma omp parallel for schedule(static, 1)
142 for(
int i = 0; i < this->threadNumber_; i++) {
144 verticesSorted.at(j).insert(
145 verticesSorted.at(j).end(),
146 verticesSortedThread.at(i).at(j).begin(),
147 verticesSortedThread.at(i).at(j).end());
151 verticesSortedThread.clear();
152 verticesSortedThread.shrink_to_fit();
155 rank = verticesToSort.at(i).rank;
157 orderArray[triangulation->getVertexLocalId(
158 verticesToSort.at(i).globalId)]
161 verticesSorted.at(rank).push_back(globalOrder::sortedVertex{
162 verticesToSort.at(i).globalId, orderOffset + i});
173 std::vector<ttk::SimplexId> sendMessageSize(
ttk::MPIsize_, 0);
174 std::vector<ttk::SimplexId> recvMessageSize(
ttk::MPIsize_, 0);
177 int sendPerformedCount = 0;
178 int recvPerformedCount = 0;
179 int sendPerformedCountTotal = 0;
180 int recvPerformedCountTotal = 0;
185 sendMessageSize[i] = verticesSorted.at(i).size();
186 MPI_Isend(&sendMessageSize[i], 1, MPI_SimplexId, i, 0, ttk::MPIcomm_,
187 &sendRequests[count]);
188 MPI_Irecv(&recvMessageSize[i], 1, MPI_SimplexId, i, 0, ttk::MPIcomm_,
189 &recvRequests[count]);
193 std::vector<std::vector<globalOrder::sortedVertex>> recvVerticesSorted(
195 std::vector<MPI_Request> sendRequestsData(
ttk::MPIsize_ - 1);
196 std::vector<MPI_Request> recvRequestsData(
ttk::MPIsize_ - 1);
205 &sendPerformedCount, sendCompleted.data(),
207 if(sendPerformedCount > 0) {
208 for(
int i = 0; i < sendPerformedCount; i++) {
209 r = sendCompleted[i];
213 if((sendMessageSize[r] > 0)) {
214 MPI_Isend(verticesSorted.at(r).data(), sendMessageSize[r],
215 MPI_sortedVertexType, r, 1, ttk::MPIcomm_,
216 &sendRequestsData[sendCount]);
220 sendPerformedCountTotal += sendPerformedCount;
225 &recvPerformedCount, recvCompleted.data(),
227 if(recvPerformedCount > 0) {
228 for(
int i = 0; i < recvPerformedCount; i++) {
229 r = recvStatus[i].MPI_SOURCE;
230 if((recvMessageSize[r] > 0)) {
231 recvVerticesSorted.at(r).resize(recvMessageSize[r]);
232 MPI_Irecv(recvVerticesSorted.at(r).data(), recvMessageSize[r],
233 MPI_sortedVertexType, r, 1, ttk::MPIcomm_,
234 &recvRequestsData[recvCount]);
239 recvPerformedCountTotal += recvPerformedCount;
243 recvPerformedCountTotal = 0;
244 while(recvPerformedCountTotal < recvCount) {
245 MPI_Waitsome(recvCount, recvRequestsData.data(), &recvPerformedCount,
246 recvCompleted.data(), recvStatusData.data());
247 if(recvPerformedCount > 0) {
248 for(
int i = 0; i < recvPerformedCount; i++) {
249 r = recvStatusData[i].MPI_SOURCE;
250#pragma omp parallel for schedule(static)
251 for(
int j = 0; j < recvMessageSize[r]; j++) {
252 orderArray[triangulation->getVertexLocalId(
253 recvVerticesSorted.at(r).at(j).globalId)]
254 = recvVerticesSorted.at(r).at(j).order;
257 recvPerformedCountTotal += recvPerformedCount;
260 MPI_Waitall(sendCount, sendRequestsData.data(), MPI_STATUSES_IGNORE);
266 template <
typename DT,
typename triangulationType>
269 const DT *scalarArray,
270 const size_t nVerts)
const {
286 if(ttk::isRunningWithMPI()) {
288 MPI_Datatype MPI_SimplexId = getMPIType(
id);
290 std::vector<globalOrder::vertexToSort<DT>> verticesToSort;
291 verticesToSort.reserve(nVerts);
292#ifdef TTK_ENABLE_OPENMP
293#pragma omp declare reduction (merge : std::vector<globalOrder::vertexToSort<DT>> : omp_out.insert(omp_out.end(), omp_in.begin(), omp_in.end()))
294#pragma omp parallel for reduction(merge : verticesToSort) schedule(static)
296 for(
size_t i = 0; i < nVerts; i++) {
298 verticesToSort.emplace_back(globalOrder::vertexToSort<DT>{
299 triangulation->getVertexGlobalId(i), scalarArray[i],
304 MPI_Datatype MPI_vertexToSortType;
311 MPI_Type_contiguous(
sizeof(globalOrder::vertexToSort<DT>), MPI_CHAR,
312 &MPI_vertexToSortType);
313 MPI_Type_commit(&MPI_vertexToSortType);
315 std::vector<ttk::SimplexId> vertexDistribution(
ttk::MPIsize_);
317 MPI_Allgather(&localVertexNumber, 1, MPI_SimplexId,
318 vertexDistribution.data(), 1, MPI_SimplexId,
322 p_sort::parallel_sort<globalOrder::vertexToSort<DT>>(
323 verticesToSort, globalOrder::comp<DT>, globalOrder::oppositeComp<DT>,
324 vertexDistribution, MPI_vertexToSortType, MPI_SimplexId,
330 MPI_Datatype types[] = {MPI_SimplexId, MPI_SimplexId};
331 int lengths[] = {1, 1};
332 MPI_Datatype MPI_sortedVertexType;
333 const long int mpi_offsets[]
334 = {offsetof(globalOrder::sortedVertex, globalId),
335 offsetof(globalOrder::sortedVertex, order)};
336 MPI_Type_create_struct(
337 2, lengths, mpi_offsets, types, &MPI_sortedVertexType);
338 MPI_Type_commit(&MPI_sortedVertexType);
343 = std::accumulate(vertexDistribution.begin(),
347 ttk::SimplexId nbChunk = std::floor(verticesToSortSize / ChunkSize);
353 MPI_Allreduce(MPI_IN_PLACE, &nbChunkTotal, 1, MPI_SimplexId, MPI_MAX,
362 postprocessing<DT, triangulationType>(
363 triangulation, orderArray, nbChunk * ChunkSize,
364 nbChunk * ChunkSize + rest, orderOffset, verticesToSort,
365 MPI_sortedVertexType, MPI_SimplexId, nbChunk, nbChunk);
367 for(
int chunk = nbChunk - 1; chunk >= 0; chunk--) {
368 postprocessing<DT, triangulationType>(
369 triangulation, orderArray, chunk * ChunkSize,
370 (chunk + 1) * ChunkSize, orderOffset, verticesToSort,
371 MPI_sortedVertexType, MPI_SimplexId, chunk, nbChunk);
376 for(
int chunk = beginningChunk; chunk < nbChunkTotal; chunk++) {
377 postprocessing<DT, triangulationType>(
378 triangulation, orderArray, nbChunk * ChunkSize + rest,
379 nbChunk * ChunkSize + rest, orderOffset, verticesToSort,
380 MPI_sortedVertexType, MPI_SimplexId, chunk, nbChunk);
384 ttk::exchangeGhostVertices<ttk::SimplexId, triangulationType>(
385 orderArray, triangulation, ttk::MPIcomm_, 1);
410 bool GlobalOrder{
false};
414 int ChunkSize{1000000000};