46 template <
typename T,
class _Compare>
53 PermCompare(T *w, _Compare c) : weights(w), comp(c) {
55 bool operator()(
int a,
int b) {
56 return comp(weights[a], weights[b]);
60 template <
class _Compare,
typename _RandomAccessIter>
61 void psort_split(_RandomAccessIter first,
62 _RandomAccessIter last,
65 std::vector<std::vector<ttk::SimplexId>> &right_ends,
66 MPI_Datatype &MPI_valueType,
67 MPI_Datatype &MPI_distanceType,
71 typename std::iterator_traits<_RandomAccessIter>::value_type;
86 std::partial_sum(dist, dist + (
ttk::MPIsize_ - 1), targets.data());
89 std::vector<std::pair<_RandomAccessIter, _RandomAccessIter>> d_ranges(
91 std::vector<std::pair<ttk::SimplexId *, ttk::SimplexId *>> t_ranges(
93 d_ranges[0] = std::make_pair(first, last);
95 = std::make_pair(targets.data(), targets.data() +
ttk::MPIsize_ - 1);
97 std::vector<std::vector<ttk::SimplexId>> subdist(
102 std::vector<std::vector<ttk::SimplexId>> outleft(
105 for(
int n_act = 1; n_act > 0;) {
107 for(
int k = 0; k < n_act; ++k) {
109 == d_ranges[k].second - d_ranges[k].first);
117 for(
int k = 0; k < n_act; ++k) {
118 if(d_ranges[k].first != last) {
119 dataType *ptr = &(*d_ranges[k].first);
126 MPI_Allgather(MPI_IN_PLACE, n_act, MPI_valueType, &medians[0], n_act,
127 MPI_valueType, ttk::MPIcomm_);
130 std::vector<dataType> queries(n_act);
132 std::vector<ttk::SimplexId> ms_perm(n_real);
133 for(
int k = 0; k < n_act; ++k) {
135 for(
int i = 0; i < n_real; ++i)
136 ms_perm[i] = i * n_act + k;
137 TTK_PSORT(nThreads, ms_perm.data(), ms_perm.data() + n_real,
138 PermCompare<dataType, _Compare>(&medians[0], comp));
145 for(
int i = 0; i < n_real; ++i) {
146 if(subdist[k][ms_perm[i] / n_act] == 0)
149 mid -= subdist[k][ms_perm[i] / n_act];
151 query_ind = ms_perm[i];
156 assert(query_ind >= 0);
157 queries[k] = medians[query_ind];
160 std::vector<ttk::SimplexId> ind_local(2 * n_act);
162 for(
int k = 0; k < n_act; ++k) {
163 std::pair<_RandomAccessIter, _RandomAccessIter> ind_local_p
165 d_ranges[k].first, d_ranges[k].second, queries[k], comp);
167 ind_local[2 * k] = ind_local_p.first - first;
168 ind_local[2 * k + 1] = ind_local_p.second - first;
171 std::vector<ttk::SimplexId> ind_all(2 * n_act *
ttk::MPIsize_);
172 MPI_Allgather(ind_local.data(), 2 * n_act, MPI_distanceType,
173 ind_all.data(), 2 * n_act, MPI_distanceType, ttk::MPIcomm_);
175 std::vector<std::pair<ttk::SimplexId, ttk::SimplexId>> ind_global(n_act);
176 for(
int k = 0; k < n_act; ++k) {
177 ind_global[k] = std::make_pair(0, 0);
179 ind_global[k].first += ind_all[2 * (i * n_act + k)];
180 ind_global[k].second += ind_all[2 * (i * n_act + k) + 1];
185 std::vector<std::pair<_RandomAccessIter, _RandomAccessIter>> d_ranges_x(
187 std::vector<std::pair<ttk::SimplexId *, ttk::SimplexId *>> t_ranges_x(
189 std::vector<std::vector<ttk::SimplexId>> subdist_x(
191 std::vector<std::vector<ttk::SimplexId>> outleft_x(
195 for(
int k = 0; k < n_act; ++k) {
197 t_ranges[k].first, t_ranges[k].second, ind_global[k].first);
199 t_ranges[k].first, t_ranges[k].second, ind_global[k].second);
210 = std::min(ind_all[2 * (i * n_act + k)] + excess,
211 ind_all[2 * (i * n_act + k) + 1]);
212 right_ends[(s - targets.data()) + 1][i] = amount;
213 excess -= amount - ind_all[2 * (i * n_act + k)];
217 if((split_low - t_ranges[k].first) > 0) {
218 t_ranges_x[n_act_x] = std::make_pair(t_ranges[k].first, split_low);
221 = std::make_pair(d_ranges[k].first, first + ind_local[2 * k]);
223 subdist_x[n_act_x][i]
224 = ind_all[2 * (i * n_act + k)] - outleft[k][i];
225 outleft_x[n_act_x][i] = outleft[k][i];
230 if((t_ranges[k].second - split_high) > 0) {
231 t_ranges_x[n_act_x] = std::make_pair(split_high, t_ranges[k].second);
234 = std::make_pair(first + ind_local[2 * k + 1], d_ranges[k].second);
236 subdist_x[n_act_x][i] = outleft[k][i] + subdist[k][i]
237 - ind_all[2 * (i * n_act + k) + 1];
238 outleft_x[n_act_x][i] = ind_all[2 * (i * n_act + k) + 1];
244 t_ranges = t_ranges_x;
245 d_ranges = d_ranges_x;
258 template <
typename dataType>
259 static void alltoall(std::vector<std::vector<ttk::SimplexId>> &right_ends,
260 std::vector<dataType> &data,
261 std::vector<dataType> &trans_data,
263 MPI_Datatype &MPI_valueType,
264 MPI_Datatype &MPI_distanceType) {
271 if(n_loc_ > INT_MAX) {
274 int n_loc =
static_cast<int>(n_loc_);
279#pragma omp parallel for reduction(+ : overflowInt)
285 if(scount > INT_MAX || rcount > INT_MAX) {
288 send_counts[i] = scount;
289 recv_counts[i] = rcount;
292 MPI_IN_PLACE, &overflowInt, 1, MPI_CHAR, MPI_SUM, ttk::MPIcomm_);
294 MPI_IN_PLACE, &chunkSize, 1, MPI_distanceType, MPI_MAX, ttk::MPIcomm_);
295 if(overflowInt == 0) {
302 send_counts_int[i] =
static_cast<int>(send_counts[i]);
303 recv_counts_int[i] =
static_cast<int>(recv_counts[i]);
306 recv_disps_int[0] = 0;
307 std::partial_sum(recv_counts_int.data(),
309 recv_disps_int.data() + 1);
311 send_disps_int[0] = 0;
312 std::partial_sum(send_counts_int.data(),
314 send_disps_int.data() + 1);
316 MPI_Alltoallv(data.data(), send_counts_int.data(), send_disps_int.data(),
317 MPI_valueType, trans_data.data(), recv_counts_int.data(),
318 recv_disps_int.data(), MPI_valueType, ttk::MPIcomm_);
333 std::partial_sum(send_counts.data(), send_counts.data() +
ttk::MPIsize_ - 1,
334 send_disps.data() + 1);
336 std::partial_sum(recv_counts.data(), recv_counts.data() +
ttk::MPIsize_ - 1,
337 recv_disps.data() + 1);
345 std::vector<dataType> send_buffer_64bits(INT_MAX);
346 std::vector<dataType> recv_buffer_64bits(INT_MAX);
350 send_buffer_64bits.resize(0);
354 partial_send_displs[i]
355 = partial_send_displs[i - 1] + partial_send_count[i - 1];
356 partial_recv_displs[i]
357 = partial_recv_displs[i - 1] + partial_recv_count[i - 1];
359 if(send_counts[i] - count * messageSize > 0) {
361 if(send_counts[i] - count * messageSize > messageSize) {
362 partial_send_count[i] = messageSize;
364 partial_send_count[i] = send_counts[i] - count * messageSize;
366 std::copy(data.begin() + send_disps[i] + count * messageSize,
367 data.begin() + send_disps[i] + count * messageSize
368 + partial_send_count[i],
369 send_buffer_64bits.begin() + partial_send_displs[i]);
371 partial_send_count[i] = 0;
373 if(recv_counts[i] - count * messageSize > 0) {
374 if(recv_counts[i] - count * messageSize > messageSize) {
375 partial_recv_count[i] = messageSize;
377 partial_recv_count[i] = recv_counts[i] - count * messageSize;
380 partial_recv_count[i] = 0;
383 MPI_Alltoallv(send_buffer_64bits.data(), partial_send_count.data(),
384 partial_send_displs.data(), MPI_valueType,
385 recv_buffer_64bits.data(), partial_recv_count.data(),
386 partial_recv_displs.data(), MPI_valueType, ttk::MPIcomm_);
389 if(partial_recv_count[i] > 0) {
390 std::copy(recv_buffer_64bits.begin() + partial_recv_displs[i],
391 recv_buffer_64bits.begin() + partial_recv_displs[i]
392 + partial_recv_count[i],
393 trans_data.begin() + recv_disps[i] + count * messageSize);
399 MPI_IN_PLACE, &moreToSend, 1, MPI_INTEGER, MPI_SUM, ttk::MPIcomm_);
403 boundaries[i] = recv_disps[i];
409 template <
class _Compare,
typename _RandomAccessIter>
410 void psort_merge(_RandomAccessIter in,
411 _RandomAccessIter out,
414 _Compare oppositeComp) {
421 _RandomAccessIter bufs[2] = {in, out};
435 std::merge(bufs[locs[i]] + disps[i], bufs[locs[i]] + disps[i + next],
436 bufs[locs[i + next]] + disps[i + next],
437 bufs[locs[i + next]] + disps[end_ind],
438 bufs[1 - locs[i]] + disps[i], comp);
439 locs[i] = 1 - locs[i];
448 std::merge(in, in + disps[next], bufs[locs[next]] + disps[next],
450 }
else if(locs[next] == 0) {
453 std::reverse_iterator<_RandomAccessIter>(in + disps[
ttk::MPIsize_]),
454 std::reverse_iterator<_RandomAccessIter>(in + disps[next]),
455 std::reverse_iterator<_RandomAccessIter>(out + disps[next]),
456 std::reverse_iterator<_RandomAccessIter>(out),
457 std::reverse_iterator<_RandomAccessIter>(out + disps[
ttk::MPIsize_]),
482 template <
typename dataType,
typename _Compare>
483 void parallel_sort(std::vector<dataType> &data,
485 _Compare oppositeComp,
486 std::vector<ttk::SimplexId> &dist,
487 MPI_Datatype &MPI_valueType,
488 MPI_Datatype &MPI_distanceType,
492 TTK_PSORT(nThreads, data.begin(), data.end(), comp);
498 std::vector<std::vector<ttk::SimplexId>> right_ends(
500 psort_split<_Compare>(data.begin(), data.end(), dist.data(), comp,
501 right_ends, MPI_valueType, MPI_distanceType,
506 std::vector<dataType> trans_data(n_loc);
509 alltoall(right_ends, data, trans_data, boundaries.data(), MPI_valueType,
512 psort_merge<_Compare>(
513 trans_data.data(), data.data(), boundaries.data(), comp, oppositeComp);
#define TTK_FORCE_USE(x)
Force the compiler to use the function/method parameter.
#define TTK_PSORT(NTHREADS,...)
Parallel sort macro.
COMMON_EXPORTS int MPIsize_
COMMON_EXPORTS int MPIrank_
int SimplexId
Identifier type for simplices of any dimension.
T end(std::pair< T, T > &p)
T begin(std::pair< T, T > &p)