TTK
Loading...
Searching...
No Matches
MergeTreeClustering.h
Go to the documentation of this file.
1
24
25#define treesMatchingVector \
26 std::vector<std::vector<std::tuple<ftm::idNode, ftm::idNode, double>>>
27#define matchingVectorType std::vector<treesMatchingVector>
28
29#pragma once
30
31#include <random>
32
33// ttk common includes
34#include <Debug.h>
35
36#include "MergeTreeBarycenter.h"
37
38namespace ttk {
39
44 // TODO rename dataType2 to dataType and remove template everywhere else in
45 // this class
46 template <class dataType2>
47 class MergeTreeClustering : virtual public Debug, public MergeTreeBarycenter {
48
49 private:
50 bool parallelizeUpdate_ = true;
51
52 unsigned int noCentroids_ = 2;
53
54 // Progressive parameters
55 int noIterationC_ = 0;
56 double addDeletedNodesTime_ = 0;
57
58 // Accelerated KMeans
59 bool acceleratedInitialized_ = false;
60 std::vector<std::vector<double>> lowerBound_;
61 std::vector<double> upperBound_;
62 std::vector<int> bestCentroid_, oldBestCentroid_;
63 std::vector<double> bestDistance_;
64 std::vector<bool> recompute_;
65 std::vector<ftm::MergeTree<dataType2>> oldCentroids_, oldCentroids2_;
66
67 // Clean correspondence
68 std::vector<std::vector<int>> trees2NodeCorr_;
69
70 public:
73 "MergeTreeClustering"); // inherited from Debug: prefix will be printed
74 // at the beginning of every msg
75 }
76 ~MergeTreeClustering() override = default;
77
78 void setNoCentroids(unsigned int noCentroidsT) {
79 noCentroids_ = noCentroidsT;
80 }
81
82 void setMixtureCoefficient(double coef) {
84 }
85
86 std::vector<std::vector<int>> getTrees2NodeCorr() {
87 return trees2NodeCorr_;
88 }
89
94 // ------------------------------------------------------------------------
95 // Initialization
96 // ------------------------------------------------------------------------
97 // KMeans++ init
98 template <class dataType>
100 std::vector<ftm::FTMTree_MT *> &trees,
101 std::vector<ftm::FTMTree_MT *> &trees2,
102 std::vector<std::vector<ftm::MergeTree<dataType>>> &allCentroids) {
103 allCentroids.resize(
104 2, std::vector<ftm::MergeTree<dataType>>(noCentroids_));
105 std::vector<dataType> distances(
106 trees.size(), std::numeric_limits<dataType>::max());
107
108 // Manage size limited trees
109 double limitPercent = barycenterSizeLimitPercent_ / noCentroids_;
110 std::vector<ftm::MergeTree<dataType>> mTreesLimited, mTrees2Limited;
111 bool doSizeLimit
112 = (limitPercent > 0.0 or barycenterMaximumNumberOfPairs_ > 0);
113 if(doSizeLimit) {
114 getSizeLimitedTrees<dataType>(
115 trees, barycenterMaximumNumberOfPairs_, limitPercent, mTreesLimited);
116 if(trees2.size() != 0)
117 getSizeLimitedTrees<dataType>(trees2, barycenterMaximumNumberOfPairs_,
118 limitPercent, mTrees2Limited);
119 }
120
121 // Init centroids
122 for(unsigned int i = 0; i < noCentroids_; ++i) {
123 int bestIndex = -1;
124 if(i == 0) {
125 bestIndex = getBestInitTreeIndex<dataType>(
126 trees, trees2, limitPercent, false);
127 } else {
128 // Create vector of probabilities
129 double sum = 0;
130 for(auto val : distances)
131 sum += val;
132 double bestValue = std::numeric_limits<double>::lowest();
133 std::vector<double> probabilities(trees.size());
134 for(unsigned int j = 0; j < distances.size(); ++j) {
135 probabilities[j]
136 = (sum != 0 ? distances[j] / sum : 1.0 / distances.size());
137 if(probabilities[j] > bestValue) {
138 bestValue = probabilities[j];
139 bestIndex = j;
140 }
141 }
142 if(not deterministic_) {
143 std::random_device rd;
144 std::default_random_engine generator(rd());
145 std::discrete_distribution<int> distribution(
146 probabilities.begin(), probabilities.end());
147 bestIndex = distribution(generator);
148 }
149 }
150 printMsg(
151 "Init index : " + std::to_string(bestIndex), debug::Priority::DETAIL);
152 // Create new centroid
153 allCentroids[0][i]
154 = ftm::copyMergeTree<dataType>(trees[bestIndex], true);
155 limitSizeBarycenter(allCentroids[0][i], trees, limitPercent);
156 ftm::cleanMergeTree<dataType>(allCentroids[0][i]);
157 if(trees2.size() != 0) {
158 allCentroids[1][i]
159 = ftm::copyMergeTree<dataType>(trees2[bestIndex], true);
160 limitSizeBarycenter(allCentroids[1][i], trees2, limitPercent);
161 ftm::cleanMergeTree<dataType>(allCentroids[1][i]);
162 }
163
164 if(i == noCentroids_ - 1)
165 continue;
166#ifdef TTK_ENABLE_OPENMP
167#pragma omp parallel for schedule(dynamic) shared(allCentroids) \
168 num_threads(this->threadNumber_) if(parallelize_)
169#endif
170 for(unsigned int j = 0; j < trees.size(); ++j) {
171 std::vector<std::tuple<ftm::idNode, ftm::idNode, double>> matching,
172 matching2;
173 dataType distanceT, distanceT2;
174 ftm::FTMTree_MT *treeToUse
175 = (doSizeLimit ? &(mTreesLimited[j].tree) : trees[j]);
176 computeOneDistance<dataType>(treeToUse, allCentroids[0][i], matching,
177 distanceT, useDoubleInput_);
178 if(trees2.size() != 0) {
179 ftm::FTMTree_MT *tree2ToUse
180 = (doSizeLimit ? &(mTrees2Limited[j].tree) : trees2[j]);
181 computeOneDistance<dataType>(tree2ToUse, allCentroids[1][i],
182 matching2, distanceT2, useDoubleInput_,
183 false);
184 distanceT = mixDistances<dataType>(distanceT, distanceT2);
185 }
186 distances[j] = std::min(distances[j], distanceT);
187 }
188 }
189 }
190
191 template <class dataType>
192 void initNewCentroid(std::vector<ftm::FTMTree_MT *> &trees,
193 ftm::MergeTree<dataType> &centroid,
194 int noNewCentroid) {
195 std::vector<std::tuple<double, int>> distancesAndIndexes(
196 bestDistance_.size());
197 for(unsigned int i = 0; i < bestDistance_.size(); ++i)
198 distancesAndIndexes[i] = std::make_tuple(-bestDistance_[i], i);
199 std::sort(distancesAndIndexes.begin(), distancesAndIndexes.end());
200 int bestIndex = std::get<1>(distancesAndIndexes[noNewCentroid]);
201 centroid = ftm::copyMergeTree<dataType>(trees[bestIndex], true);
202 limitSizeBarycenter(centroid, trees);
203 ftm::cleanMergeTree<dataType>(centroid);
204 }
205
206 template <class dataType>
208 std::vector<ftm::FTMTree_MT *> &trees,
209 std::vector<ftm::MergeTree<dataType>> &centroids,
210 std::vector<ftm::FTMTree_MT *> &ttkNotUsed(trees2)) {
211 lowerBound_.clear();
212 lowerBound_.resize(
213 trees.size(), std::vector<double>(centroids.size(), 0));
214 upperBound_.clear();
215 upperBound_.resize(trees.size(), std::numeric_limits<double>::max());
216 bestCentroid_.clear();
217 bestCentroid_.resize(trees.size(), -1);
218 oldBestCentroid_.clear();
219 oldBestCentroid_.resize(trees.size(), -1);
220 bestDistance_.clear();
221 bestDistance_.resize(trees.size(), std::numeric_limits<double>::max());
222 recompute_.clear();
223 recompute_.resize(trees.size(), true);
224 }
225
226 template <class dataType>
227 void
228 initAcceleratedKMeans(std::vector<ftm::FTMTree_MT *> &trees,
229 std::vector<ftm::MergeTree<dataType>> &centroids,
230 std::vector<ftm::FTMTree_MT *> &trees2,
231 std::vector<ftm::MergeTree<dataType>> &centroids2) {
232 acceleratedInitialized_ = true;
233 std::vector<std::tuple<int, int>> assignmentC;
234 std::vector<dataType> bestDistanceT(
235 trees.size(), std::numeric_limits<dataType>::max());
236 assignmentCentroidsNaive<dataType>(
237 trees, centroids, assignmentC, bestDistanceT, trees2, centroids2);
238 for(unsigned int i = 0; i < bestDistanceT.size(); ++i)
239 bestDistance_[i] = bestDistanceT[i];
240 for(auto asgn : assignmentC)
241 bestCentroid_[std::get<1>(asgn)] = std::get<0>(asgn);
242 for(unsigned int i = 0; i < bestDistance_.size(); ++i)
243 upperBound_[i] = bestDistance_[i];
244 }
245
246 template <class dataType>
247 void copyCentroids(std::vector<ftm::MergeTree<dataType>> &centroids,
248 std::vector<ftm::MergeTree<dataType>> &oldCentroids) {
249 oldCentroids.clear();
250 for(unsigned int i = 0; i < centroids.size(); ++i)
251 oldCentroids.push_back(ftm::copyMergeTree<dataType>(centroids[i]));
252 }
253
254 // ------------------------------------------------------------------------
255 // Assignment
256 // ------------------------------------------------------------------------
257 template <class dataType>
259 std::vector<ftm::FTMTree_MT *> &trees,
260 std::vector<ftm::MergeTree<dataType>> &centroids,
261 std::vector<std::tuple<int, int>> &assignmentC,
262 std::vector<dataType> &bestDistanceT,
263 std::vector<ftm::FTMTree_MT *> &trees2,
264 std::vector<ftm::MergeTree<dataType>> &centroids2) {
265 if(not acceleratedInitialized_) {
266 initAcceleratedKMeans<dataType>(trees, centroids, trees2, centroids2);
267 } else {
268 // Compute distance between old and new corresponding centroids
269 std::vector<dataType> distanceShift(centroids.size()),
270 distanceShift2(centroids2.size());
271#ifdef TTK_ENABLE_OPENMP
272#pragma omp parallel for schedule(dynamic) \
273 shared(centroids, centroids2, oldCentroids_, oldCentroids2_) \
274 num_threads(this->threadNumber_) if(parallelize_)
275#endif
276 for(unsigned int i = 0; i < centroids.size(); ++i) {
277 std::vector<std::tuple<ftm::idNode, ftm::idNode, double>> matching,
278 matching2;
279 computeOneDistance<dataType>(centroids[i], oldCentroids_[i], matching,
280 distanceShift[i], useDoubleInput_);
281 if(trees2.size() != 0) {
282 computeOneDistance<dataType>(centroids2[i], oldCentroids2_[i],
283 matching2, distanceShift2[i],
284 useDoubleInput_, false);
285 distanceShift[i]
286 = mixDistances<dataType>(distanceShift[i], distanceShift2[i]);
287 }
288 }
289
290 // Step 5
291 for(unsigned int i = 0; i < trees.size(); ++i)
292 for(unsigned int c = 0; c < centroids.size(); ++c)
293 lowerBound_[i][c]
294 = std::max(lowerBound_[i][c] - distanceShift[c], 0.0);
295
296 // Step 6
297 for(unsigned int i = 0; i < trees.size(); ++i) {
298 upperBound_[i] = upperBound_[i] + distanceShift[bestCentroid_[i]];
299 recompute_[i] = true;
300 }
301 }
302
303 // Step 1
304 std::vector<std::vector<double>> centroidsDistance, centroidsDistance2;
305 getCentroidsDistanceMatrix<dataType>(
306 centroids, centroidsDistance, useDoubleInput_);
307 if(trees2.size() != 0) {
308 getCentroidsDistanceMatrix<dataType>(
309 centroids2, centroidsDistance2, useDoubleInput_, false);
310 mixDistancesMatrix(centroidsDistance, centroidsDistance2);
311 }
312 std::vector<double> centroidScore(
313 centroids.size(), std::numeric_limits<double>::max());
314 for(unsigned int i = 0; i < centroids.size(); ++i)
315 for(unsigned int j = i + 1; j < centroids.size(); ++j) {
316 if(0.5 * centroidsDistance[i][j] < centroidScore[i])
317 centroidScore[i] = 0.5 * centroidsDistance[i][j];
318 if(0.5 * centroidsDistance[i][j] < centroidScore[j])
319 centroidScore[j] = 0.5 * centroidsDistance[i][j];
320 }
321
322 // Step 2
323 std::vector<bool> identified(trees.size());
324 for(unsigned int i = 0; i < trees.size(); ++i)
325 identified[i] = (upperBound_[i] <= centroidScore[bestCentroid_[i]]);
326
327 // Step 3
328#ifdef TTK_ENABLE_OPENMP
329#pragma omp parallel for schedule(dynamic) shared(centroids, centroids2) \
330 num_threads(this->threadNumber_) if(parallelize_)
331#endif
332 for(unsigned int i = 0; i < trees.size(); ++i)
333 for(unsigned int c = 0; c < centroids.size(); ++c) {
334 if(not identified[i] and (int) c != bestCentroid_[i]
335 and upperBound_[i] > lowerBound_[i][c]
336 and upperBound_[i]
337 > 0.5 * centroidsDistance[bestCentroid_[i]][c]) {
338 // Step 3a
339 if(recompute_[i]) {
340 std::vector<std::tuple<ftm::idNode, ftm::idNode, double>>
341 matching, matching2;
342 dataType distance, distance2;
343 computeOneDistance<dataType>(trees[i],
344 centroids[bestCentroid_[i]],
345 matching, distance, useDoubleInput_);
346 if(trees2.size() != 0) {
347 computeOneDistance<dataType>(
348 trees2[i], centroids2[bestCentroid_[i]], matching2, distance2,
349 useDoubleInput_, false);
350 distance = mixDistances<dataType>(distance, distance2);
351 }
352 recompute_[i] = false;
353 lowerBound_[i][bestCentroid_[i]] = distance;
354 upperBound_[i] = distance;
355 bestDistance_[i] = distance;
356 } else {
357 bestDistance_[i] = upperBound_[i];
358 }
359 // Step 3b
360 if(bestDistance_[i] > lowerBound_[i][c]
361 and bestDistance_[i]
362 > 0.5 * centroidsDistance[bestCentroid_[i]][c]) {
363 std::vector<std::tuple<ftm::idNode, ftm::idNode, double>>
364 matching, matching2;
365 dataType distance, distance2;
366 computeOneDistance<dataType>(
367 trees[i], centroids[c], matching, distance, useDoubleInput_);
368 if(trees2.size() != 0) {
369 computeOneDistance<dataType>(trees2[i], centroids2[c],
370 matching2, distance2,
371 useDoubleInput_, false);
372 distance = mixDistances<dataType>(distance, distance2);
373 }
374 lowerBound_[i][c] = distance;
375 if(distance < bestDistance_[i]) {
376 bestCentroid_[i] = c;
377 upperBound_[i] = distance;
378 bestDistance_[i] = distance;
379 }
380 }
381 }
382 }
383
384 // Copy centroids for next step
385 copyCentroids<dataType>(centroids, oldCentroids_);
386 if(trees2.size() != 0)
387 copyCentroids<dataType>(centroids2, oldCentroids2_);
388
389 // Manage output
390 for(unsigned int i = 0; i < bestDistance_.size(); ++i)
391 bestDistanceT[i] = bestDistance_[i];
392 for(unsigned int i = 0; i < bestCentroid_.size(); ++i)
393 assignmentC.emplace_back(bestCentroid_[i], i);
394 }
395
396 template <class dataType>
397 void
398 assignmentCentroids(std::vector<ftm::FTMTree_MT *> &trees,
399 std::vector<ftm::MergeTree<dataType>> &centroids,
400 std::vector<std::tuple<int, int>> &assignmentC,
401 std::vector<dataType> &bestDistanceT,
402 std::vector<ftm::FTMTree_MT *> &trees2,
403 std::vector<ftm::MergeTree<dataType>> &centroids2) {
404 oldBestCentroid_ = bestCentroid_;
405 assignmentCentroidsAccelerated<dataType>(
406 trees, centroids, assignmentC, bestDistanceT, trees2, centroids2);
407 }
408
409 template <class dataType>
411 std::vector<ftm::FTMTree_MT *> &trees,
412 std::vector<ftm::MergeTree<dataType>> &centroids,
413 matchingVectorType &matchingsC,
414 std::vector<std::tuple<int, int>> &assignmentC,
415 std::vector<dataType> &bestDistanceT,
416 std::vector<ftm::FTMTree_MT *> &trees2,
417 std::vector<ftm::MergeTree<dataType>> &centroids2,
418 matchingVectorType &matchingsC2) {
419 int noC = centroids.size();
420 std::vector<std::vector<ftm::FTMTree_MT *>> assignedTrees(noC),
421 assignedTrees2(noC);
422 std::vector<std::vector<int>> assignedTreesIndex(noC);
423
424 for(auto asgn : assignmentC) {
425 assignedTreesIndex[std::get<0>(asgn)].push_back(std::get<1>(asgn));
426 assignedTrees[std::get<0>(asgn)].push_back(trees[std::get<1>(asgn)]);
427 if(trees2.size() != 0)
428 assignedTrees2[std::get<0>(asgn)].push_back(
429 trees2[std::get<1>(asgn)]);
430 }
431
432#ifdef TTK_ENABLE_OPENMP
433#pragma omp parallel for schedule(dynamic) shared(centroids, centroids2) \
434 num_threads(this->threadNumber_) if(parallelize_)
435#endif
436 for(unsigned int i = 0; i < centroids.size(); ++i) {
437 std::vector<dataType> distances(assignedTrees[i].size(), 0);
438 std::vector<dataType> distances2(assignedTrees[i].size(), 0);
439 treesMatchingVector matching(trees.size()), matching2(trees2.size());
440 assignment<dataType>(
441 assignedTrees[i], centroids[i], matching, distances, useDoubleInput_);
442 matchingsC[i] = matching;
443 if(trees2.size() != 0) {
444 assignment<dataType>(assignedTrees2[i], centroids2[i], matching2,
445 distances2, useDoubleInput_, false);
446 matchingsC2[i] = matching2;
447 for(unsigned int j = 0; j < assignedTreesIndex[i].size(); ++j)
448 distances[j] = mixDistances<dataType>(distances[j], distances2[j]);
449 }
450 for(unsigned int j = 0; j < assignedTreesIndex[i].size(); ++j) {
451 int index = assignedTreesIndex[i][j];
452 bestDistanceT[index] = distances[j];
453 }
454 }
455 }
456
457 template <class dataType>
459 std::vector<ftm::FTMTree_MT *> &trees,
460 std::vector<ftm::MergeTree<dataType>> &centroids,
461 std::vector<std::tuple<int, int>> &assignmentC,
462 std::vector<dataType> &bestDistanceT,
463 std::vector<ftm::FTMTree_MT *> &trees2,
464 std::vector<ftm::MergeTree<dataType>> &centroids2) {
465 std::vector<int> bestCentroidT(trees.size(), -1);
466
467#ifdef TTK_ENABLE_OPENMP
468#pragma omp parallel for schedule(dynamic) shared(centroids, centroids2) \
469 num_threads(this->threadNumber_) if(parallelize_)
470#endif
471 for(unsigned int i = 0; i < trees.size(); ++i) {
472 for(unsigned int j = 0; j < centroids.size(); ++j) {
473 std::vector<std::tuple<ftm::idNode, ftm::idNode, double>> matching;
474 std::vector<std::tuple<ftm::idNode, ftm::idNode, double>> matching2;
475 dataType distance, distance2;
476 computeOneDistance<dataType>(
477 trees[i], centroids[j], matching, distance, useDoubleInput_);
478 if(trees2.size() != 0) {
479 computeOneDistance<dataType>(trees2[i], centroids2[j], matching2,
480 distance2, useDoubleInput_, false);
481 distance = mixDistances<dataType>(distance, distance2);
482 }
483 if(distance < bestDistanceT[i]) {
484 bestDistanceT[i] = distance;
485 bestDistance_[i] = distance;
486 bestCentroidT[i] = j;
487 bestCentroid_[i] = j;
488 }
489 }
490 }
491
492 for(unsigned int i = 0; i < bestCentroidT.size(); ++i)
493 assignmentC.emplace_back(bestCentroidT[i], i);
494 }
495
496 template <class dataType>
498 std::vector<ftm::MergeTree<dataType>> &centroids,
499 std::vector<std::vector<double>> &distanceMatrix,
500 bool useDoubleInput = false,
501 bool isFirstInput = true) {
502 std::vector<ftm::FTMTree_MT *> trees(centroids.size());
503 for(size_t i = 0; i < centroids.size(); ++i) {
504 trees[i] = &(centroids[i].tree);
505 }
506 getDistanceMatrix<dataType>(
507 trees, distanceMatrix, useDoubleInput, isFirstInput);
508 }
509
511 std::vector<int> &nodeCorr,
512 std::vector<int> &assignedTreesIndex) {
513 for(int i : assignedTreesIndex) {
514 std::vector<std::tuple<ftm::idNode, ftm::idNode, double>> newMatching;
515 for(auto tup : matchingT[i])
516 newMatching.emplace_back(
517 nodeCorr[std::get<0>(tup)], std::get<1>(tup), std::get<2>(tup));
518 matchingT[i] = newMatching;
519 }
520 }
521
522 // ------------------------------------------------------------------------
523 // Update
524 // ------------------------------------------------------------------------
525 bool samePreviousAssignment(int clusterId) {
526 for(unsigned int i = 0; i < bestCentroid_.size(); ++i)
527 if(bestCentroid_[i] == clusterId
528 and bestCentroid_[i] != oldBestCentroid_[i])
529 return false;
530 return true;
531 }
532
533 template <class dataType>
534 bool updateCentroids(std::vector<ftm::FTMTree_MT *> &trees,
535 std::vector<ftm::MergeTree<dataType>> &centroids,
536 std::vector<double> &alphas,
537 std::vector<std::tuple<int, int>> &assignmentC) {
538 bool oneCentroidUpdated = false;
539 int noC = centroids.size();
540 std::vector<std::vector<ftm::FTMTree_MT *>> assignedTrees(noC);
541 std::vector<std::vector<int>> assignedTreesIndex(noC);
542 std::vector<std::vector<double>> assignedAlphas(noC);
543
544 for(auto asgn : assignmentC) {
545 assignedTrees[std::get<0>(asgn)].push_back(trees[std::get<1>(asgn)]);
546 assignedTreesIndex[std::get<0>(asgn)].push_back(std::get<1>(asgn));
547 assignedAlphas[std::get<0>(asgn)].push_back(alphas[std::get<1>(asgn)]);
548 }
549
550 int cpt = 0;
551 std::vector<int> noNewCentroid(centroids.size(), -1);
552 for(unsigned int i = 0; i < centroids.size(); ++i)
553 if(assignedTrees[i].size() == 0) {
554 noNewCentroid[i] = cpt;
555 ++cpt;
556 }
557
558#ifdef TTK_ENABLE_OPENMP
559#pragma omp parallel num_threads(this->threadNumber_) \
560 shared(centroids) if(parallelize_ and parallelizeUpdate_)
561 {
562#pragma omp single nowait
563 {
564#endif
565 for(unsigned int i = 0; i < centroids.size(); ++i) {
566#ifdef TTK_ENABLE_OPENMP
567#pragma omp task firstprivate(i) shared(centroids)
568 {
569#endif
570 if(assignedTrees[i].size() == 0) {
571 // Init new centroid if no trees are assigned to it
572 initNewCentroid<dataType>(
573 trees, centroids[i], noNewCentroid[i]);
574 for(unsigned int t = 0; t < trees.size(); ++t)
575 lowerBound_[t][i] = 0;
576 } else if(assignedTrees[i].size() == 1) {
577 centroids[i]
578 = ftm::copyMergeTree<dataType>(assignedTrees[i][0], true);
579 limitSizeBarycenter(centroids[i], assignedTrees[i]);
580 ftm::cleanMergeTree<dataType>(centroids[i]);
581 } else if(not samePreviousAssignment(i)) {
582 // Do not update if same previous assignment
583 // And compute barycenter of the assigned trees otherwise
584 oneCentroidUpdated = true;
585 double alphasSum = 0;
586 for(unsigned int j = 0; j < assignedAlphas[i].size(); ++j)
587 alphasSum += assignedAlphas[i][j];
588 for(unsigned int j = 0; j < assignedAlphas[i].size(); ++j)
589 assignedAlphas[i][j] /= alphasSum;
590 treesMatchingVector matching(assignedTrees[i].size());
591 computeOneBarycenter<dataType>(
592 assignedTrees[i], centroids[i], assignedAlphas[i], matching);
593 std::vector<ftm::idNode> deletedNodesT;
594 persistenceThresholding<dataType>(
595 &(centroids[i].tree), 0, deletedNodesT);
596 ftm::cleanMergeTree<dataType>(centroids[i]);
597 }
598#ifdef TTK_ENABLE_OPENMP
599 } // pragma omp task
600#endif
601 }
602#ifdef TTK_ENABLE_OPENMP
603#pragma omp taskwait
604 } // pragma omp single nowait
605 } // pragma omp parallel
606#endif
607 return oneCentroidUpdated;
608 }
609
610 template <class dataType>
612 std::vector<ftm::FTMTree_MT *> &trees,
613 ftm::MergeTree<dataType> &baryMergeTree,
614 std::vector<double> &alphas,
615 std::vector<std::vector<std::tuple<ftm::idNode, ftm::idNode, double>>>
616 &finalMatchings) {
617 MergeTreeBarycenter mergeTreeBary;
618 mergeTreeBary.setDebugLevel(std::min(debugLevel_, 2));
619 mergeTreeBary.setBranchDecomposition(true);
621 mergeTreeBary.setKeepSubtree(keepSubtree_);
623 mergeTreeBary.setIsCalled(true);
624 mergeTreeBary.setThreadNumber(this->threadNumber_);
625 mergeTreeBary.setDistanceSquaredRoot(true); // squared root
627 mergeTreeBary.setDeterministic(deterministic_);
628 mergeTreeBary.setTol(tol_);
632
633 mergeTreeBary.computeBarycenter<dataType>(
634 trees, baryMergeTree, alphas, finalMatchings);
635
636 addDeletedNodesTime_ += mergeTreeBary.getAddDeletedNodesTime();
637 }
638
639 // ------------------------------------------------------------------------
640 // Main Functions
641 // ------------------------------------------------------------------------
642 template <class dataType>
643 void computeCentroids(std::vector<ftm::FTMTree_MT *> &trees,
644 std::vector<ftm::MergeTree<dataType>> &centroids,
645 matchingVectorType &outputMatching,
646 std::vector<double> &alphas,
647 std::vector<int> &clusteringAssignment,
648 std::vector<ftm::FTMTree_MT *> &trees2,
649 std::vector<ftm::MergeTree<dataType>> &centroids2,
650 matchingVectorType &outputMatching2) {
651 Timer t_clust;
652
653 printCentroidsStats(centroids, centroids2);
654
655 // Run
656 int noCentroidsT = centroids.size();
657 bool converged = false;
658 dataType inertia = -1;
659 dataType minInertia = std::numeric_limits<dataType>::max();
660 int cptBlocked = 0;
661 noIterationC_ = 0;
662 std::vector<std::tuple<int, int>> assignmentC;
663 std::vector<dataType> bestDistanceT(
664 trees.size(), std::numeric_limits<dataType>::max());
665 while(not converged) {
666 ++noIterationC_;
667
669 std::stringstream ssIter;
670 ssIter << "Iteration " << noIterationC_;
671 printMsg(ssIter.str());
672
673 // --- Assignment
674 Timer t_assignment;
675 assignmentCentroids<dataType>(
676 trees, centroids, assignmentC, bestDistanceT, trees2, centroids2);
677 auto t_assignment_time = t_assignment.getElapsedTime();
678 printMsg("Assignment", 1, t_assignment_time, this->threadNumber_);
679
680 // --- Update
681 Timer t_update;
682 bool trees1Updated = true, trees2Updated = true;
683 trees1Updated
684 = updateCentroids<dataType>(trees, centroids, alphas, assignmentC);
685 if(trees2.size() != 0)
686 trees2Updated = updateCentroids<dataType>(
687 trees2, centroids2, alphas, assignmentC);
688 auto t_update_time = t_update.getElapsedTime();
689 printMsg("Update", 1, t_update_time, this->threadNumber_);
690 printCentroidsStats(centroids, centroids2);
691
692 // --- Check convergence
693 dataType currentInertia = 0;
694 for(auto distance : bestDistanceT)
695 currentInertia += distance * distance;
696 converged = std::abs((double)(inertia - currentInertia)) < 0.01;
697 inertia = currentInertia;
698 std::stringstream ss3;
699 ss3 << "Inertia : " << inertia;
700 printMsg(ss3.str());
701
702 minInertia = std::min(minInertia, inertia);
703 if(not converged) {
704 cptBlocked += (minInertia < inertia) ? 1 : 0;
705 converged = (cptBlocked >= 10);
706 }
707
708 // Converged if barycenters were not updated (same assignment than last
709 // iteration)
710 converged = converged or (not trees1Updated and not trees2Updated);
711
712 // --- Reset vectors
713 if(not converged) {
714 assignmentC.clear();
715 bestDistanceT.clear();
716 bestDistanceT.resize(
717 trees.size(), std::numeric_limits<dataType>::max());
718 }
719 }
720
721 // Final processing
723 printMsg("Final assignment");
724 matchingVectorType matchingsC(noCentroidsT);
725 matchingVectorType matchingsC2(noCentroidsT);
726 finalAssignmentCentroids<dataType>(trees, centroids, matchingsC,
727 assignmentC, bestDistanceT, trees2,
728 centroids2, matchingsC2);
729 for(auto dist : bestDistanceT)
730 finalDistances_.push_back(dist);
731 dataType currentInertia = 0;
732 for(auto distance : bestDistanceT)
733 currentInertia += distance * distance;
734 std::stringstream ss;
735 ss << "Inertia : " << currentInertia;
736 printMsg(ss.str());
737
738 // Manage output
739 std::vector<int> cptCentroid(centroids.size(), 0);
740 for(auto asgn : assignmentC) {
741 int centroid = std::get<0>(asgn);
742 int tree = std::get<1>(asgn);
743 // std::cout << centroid << " " << tree << std::endl;
744 clusteringAssignment[tree] = centroid;
745 outputMatching[centroid][tree]
746 = matchingsC[centroid][cptCentroid[centroid]];
747 if(trees2.size() != 0)
748 outputMatching2[centroid][tree]
749 = matchingsC2[centroid][cptCentroid[centroid]];
750 ++cptCentroid[centroid];
751 }
752
753 auto clusteringTime = t_clust.getElapsedTime() - addDeletedNodesTime_;
754 printMsg("Total", 1, clusteringTime, this->threadNumber_);
755 }
756
757 template <class dataType>
758 void execute(std::vector<ftm::MergeTree<dataType>> &trees,
759 matchingVectorType &outputMatching,
760 std::vector<double> &alphas,
761 std::vector<int> &clusteringAssignment,
762 std::vector<ftm::MergeTree<dataType>> &trees2,
763 matchingVectorType &outputMatching2,
764 std::vector<ftm::MergeTree<dataType>> &centroids,
765 std::vector<ftm::MergeTree<dataType>> &centroids2) {
766 // --- Preprocessing
767 // std::vector<ftm::FTMTree_MT*> oldTrees, oldTrees2;
768 treesNodeCorr_.resize(trees.size());
769 preprocessingClustering<dataType>(trees, treesNodeCorr_);
770 if(trees2.size() != 0) {
771 trees2NodeCorr_.resize(trees2.size());
772 preprocessingClustering<dataType>(trees2, trees2NodeCorr_, false);
773 }
774 std::vector<ftm::FTMTree_MT *> treesT;
775 ftm::mergeTreeToFTMTree<dataType>(trees, treesT);
776 std::vector<ftm::FTMTree_MT *> treesT2;
777 ftm::mergeTreeToFTMTree<dataType>(trees2, treesT2);
778 useDoubleInput_ = (trees2.size() != 0);
779
780 // --- Init centroids
781 std::vector<std::vector<ftm::MergeTree<dataType>>> allCentroids;
782 initCentroids<dataType>(treesT, treesT2, allCentroids);
783 centroids = allCentroids[0];
784 if(trees2.size() != 0)
785 centroids2 = allCentroids[1];
786 /*for(unsigned int i = 0; i < centroids.size(); ++i){
787 verifyBranchDecompositionInconsistency<dataType>(centroids[i]->tree);
788 if(trees2.size() != 0)
789 verifyBranchDecompositionInconsistency<dataType>(centroids2[i]->tree);
790 }*/
791
792 // --- Init accelerated kmeans
793 initAcceleratedKMeansVectors<dataType>(treesT, centroids, treesT2);
794
795 // --- Execute
796 computeCentroids<dataType>(treesT, centroids, outputMatching, alphas,
797 clusteringAssignment, treesT2, centroids2,
798 outputMatching2);
799
800 // --- Postprocessing
801 if(postprocess_) {
802 // fixMergedRootOriginClustering<dataType>(centroids);
803 postprocessingClustering<dataType>(
804 trees, centroids, outputMatching, clusteringAssignment);
805 /*if(trees2.size() != 0){
806 putBackMinMaxPair<dataType>(centroids, centroids2);
807 postprocessingClustering<dataType>(trees2, centroids2,
808 outputMatching2, clusteringAssignment);
809 }*/
810 }
811 }
812
813 template <class dataType>
814 void execute(std::vector<ftm::MergeTree<dataType>> &trees,
815 matchingVectorType &outputMatching,
816 std::vector<int> &clusteringAssignment,
817 std::vector<ftm::MergeTree<dataType>> &trees2,
818 matchingVectorType &outputMatching2,
819 std::vector<ftm::MergeTree<dataType>> &centroids,
820 std::vector<ftm::MergeTree<dataType>> &centroids2) {
821 if(trees2.size() != 0)
822 printMsg("Use join and split trees");
823
824 std::vector<double> alphas;
825 for(unsigned int i = 0; i < trees.size(); ++i)
826 alphas.push_back(1.0 / trees.size());
827
828 execute<dataType>(trees, outputMatching, alphas, clusteringAssignment,
829 trees2, outputMatching2, centroids, centroids2);
830 }
831
832 template <class dataType>
833 void execute(std::vector<ftm::MergeTree<dataType>> &trees,
834 matchingVectorType &outputMatching,
835 std::vector<int> &clusteringAssignment,
836 std::vector<ftm::MergeTree<dataType>> &centroids) {
837 std::vector<ftm::MergeTree<dataType>> trees2, centroids2;
838 matchingVectorType outputMatching2 = matchingVectorType();
839 execute<dataType>(trees, outputMatching, clusteringAssignment, trees2,
840 outputMatching2, centroids, centroids2);
841 }
842
843 // ------------------------------------------------------------------------
844 // Preprocessing
845 // ------------------------------------------------------------------------
846 template <class dataType>
848 std::vector<std::vector<int>> &nodeCorr,
849 bool useMinMaxPairT = true) {
850 for(unsigned int i = 0; i < trees.size(); ++i) {
851 preprocessingPipeline<dataType>(
853 branchDecomposition_, useMinMaxPairT, cleanTree_, nodeCorr[i]);
854 if(trees.size() < 40)
855 printTreeStats(trees[i]);
856 }
857 printTreesStats(trees);
858 }
859
860 // ------------------------------------------------------------------------
861 // Postprocessing
862 // ------------------------------------------------------------------------
863 template <class dataType>
865 std::vector<ftm::MergeTree<dataType>> &centroids) {
866 for(unsigned int i = 0; i < centroids.size(); ++i)
867 fixMergedRootOriginBarycenter<dataType>(centroids[i]);
868 }
869
870 template <class dataType>
871 void putBackMinMaxPair(std::vector<ftm::MergeTree<dataType>> &centroids,
872 std::vector<ftm::MergeTree<dataType>> &centroids2) {
873 for(unsigned int i = 0; i < centroids2.size(); ++i)
874 copyMinMaxPair(centroids[i], centroids2[i]);
875 }
876
877 template <class dataType>
878 void
880 std::vector<ftm::MergeTree<dataType>> &centroids,
881 matchingVectorType &outputMatching,
882 std::vector<int> &clusteringAssignment) {
883 for(unsigned int i = 0; i < trees.size(); ++i)
884 postprocessingPipeline<dataType>(&(trees[i].tree));
885 for(unsigned int i = 0; i < centroids.size(); ++i)
886 postprocessingPipeline<dataType>(&(centroids[i].tree));
887 for(unsigned int c = 0; c < centroids.size(); ++c)
888 for(unsigned int i = 0; i < trees.size(); ++i)
889 if(clusteringAssignment[i] == (int)c)
890 convertBranchDecompositionMatching<dataType>(
891 &(centroids[c].tree), &(trees[i].tree), outputMatching[c][i]);
892 }
893
894 // ------------------------------------------------------------------------
895 // Utils
896 // ------------------------------------------------------------------------
897 template <class dataType>
898 void
900 std::vector<ftm::MergeTree<dataType>> &centroids2) {
901 for(auto &centroid : centroids)
902 printBaryStats(&(centroid.tree), debug::Priority::DETAIL);
903 for(auto &centroid : centroids2)
904 printBaryStats(&(centroid.tree), debug::Priority::DETAIL);
905 }
906
907 }; // MergeTreeClustering class
908
909} // namespace ttk
#define ttkNotUsed(x)
Mark function/method parameters that are not used in the function body at all.
Definition: BaseClass.h:47
#define matchingVectorType
#define treesMatchingVector
int threadNumber_
Definition: BaseClass.h:95
virtual int setThreadNumber(const int threadNumber)
Definition: BaseClass.h:80
Minimalist debugging class.
Definition: Debug.h:88
int debugLevel_
Definition: Debug.h:379
void setDebugMsgPrefix(const std::string &prefix)
Definition: Debug.h:364
virtual int setDebugLevel(const int &debugLevel)
Definition: Debug.cpp:147
int printMsg(const std::string &msg, const debug::Priority &priority=debug::Priority::INFO, const debug::LineMode &lineMode=debug::LineMode::NEW, std::ostream &stream=std::cout) const
Definition: Debug.h:118
void limitSizeBarycenter(ftm::MergeTree< dataType > &bary, std::vector< ftm::FTMTree_MT * > &trees, unsigned int barycenterMaximumNumberOfPairs, double percent, bool useBD=true)
void computeBarycenter(std::vector< ftm::FTMTree_MT * > &trees, ftm::MergeTree< dataType > &baryMergeTree, std::vector< double > &alphas, std::vector< std::vector< std::tuple< ftm::idNode, ftm::idNode, double > > > &finalMatchings, bool finalAsgnDoubleInput=false, bool finalAsgnFirstInput=true)
unsigned int barycenterMaximumNumberOfPairs_
void setDeterministic(bool deterministicT)
std::vector< double > finalDistances_
void printBaryStats(ftm::FTMTree_MT *baryTree, const debug::Priority &priority=debug::Priority::INFO)
void setProgressiveBarycenter(bool progressive)
void setBarycenterMaximumNumberOfPairs(unsigned int maxi)
void setBarycenterSizeLimitPercent(double percent)
void setBranchDecomposition(bool useBD)
void setNormalizedWasserstein(bool normalizedWasserstein)
void setDistanceSquaredRoot(bool distanceSquaredRoot)
double mixtureCoefficient_
Definition: MergeTreeBase.h:51
void setAssignmentSolver(int assignmentSolver)
Definition: MergeTreeBase.h:69
void printTreesStats(std::vector< ftm::FTMTree_MT * > &trees)
void copyMinMaxPair(ftm::MergeTree< dataType > &mTree1, ftm::MergeTree< dataType > &mTree2, bool setOrigins=false)
std::vector< std::vector< int > > treesNodeCorr_
Definition: MergeTreeBase.h:60
void setKeepSubtree(bool keepSubtree)
void mixDistancesMatrix(std::vector< std::vector< dataType > > &distanceMatrix, std::vector< std::vector< dataType > > &distanceMatrix2)
void computeCentroids(std::vector< ftm::FTMTree_MT * > &trees, std::vector< ftm::MergeTree< dataType > > &centroids, matchingVectorType &outputMatching, std::vector< double > &alphas, std::vector< int > &clusteringAssignment, std::vector< ftm::FTMTree_MT * > &trees2, std::vector< ftm::MergeTree< dataType > > &centroids2, matchingVectorType &outputMatching2)
void assignmentCentroidsNaive(std::vector< ftm::FTMTree_MT * > &trees, std::vector< ftm::MergeTree< dataType > > &centroids, std::vector< std::tuple< int, int > > &assignmentC, std::vector< dataType > &bestDistanceT, std::vector< ftm::FTMTree_MT * > &trees2, std::vector< ftm::MergeTree< dataType > > &centroids2)
void copyCentroids(std::vector< ftm::MergeTree< dataType > > &centroids, std::vector< ftm::MergeTree< dataType > > &oldCentroids)
void setMixtureCoefficient(double coef)
void postprocessingClustering(std::vector< ftm::MergeTree< dataType > > &trees, std::vector< ftm::MergeTree< dataType > > &centroids, matchingVectorType &outputMatching, std::vector< int > &clusteringAssignment)
void initAcceleratedKMeans(std::vector< ftm::FTMTree_MT * > &trees, std::vector< ftm::MergeTree< dataType > > &centroids, std::vector< ftm::FTMTree_MT * > &trees2, std::vector< ftm::MergeTree< dataType > > &centroids2)
void fixMergedRootOriginClustering(std::vector< ftm::MergeTree< dataType > > &centroids)
void getCentroidsDistanceMatrix(std::vector< ftm::MergeTree< dataType > > &centroids, std::vector< std::vector< double > > &distanceMatrix, bool useDoubleInput=false, bool isFirstInput=true)
void initAcceleratedKMeansVectors(std::vector< ftm::FTMTree_MT * > &trees, std::vector< ftm::MergeTree< dataType > > &centroids, std::vector< ftm::FTMTree_MT * > &ttkNotUsed(trees2))
void printCentroidsStats(std::vector< ftm::MergeTree< dataType > > &centroids, std::vector< ftm::MergeTree< dataType > > &centroids2)
void computeOneBarycenter(std::vector< ftm::FTMTree_MT * > &trees, ftm::MergeTree< dataType > &baryMergeTree, std::vector< double > &alphas, std::vector< std::vector< std::tuple< ftm::idNode, ftm::idNode, double > > > &finalMatchings)
void setNoCentroids(unsigned int noCentroidsT)
void preprocessingClustering(std::vector< ftm::MergeTree< dataType > > &trees, std::vector< std::vector< int > > &nodeCorr, bool useMinMaxPairT=true)
void finalAssignmentCentroids(std::vector< ftm::FTMTree_MT * > &trees, std::vector< ftm::MergeTree< dataType > > &centroids, matchingVectorType &matchingsC, std::vector< std::tuple< int, int > > &assignmentC, std::vector< dataType > &bestDistanceT, std::vector< ftm::FTMTree_MT * > &trees2, std::vector< ftm::MergeTree< dataType > > &centroids2, matchingVectorType &matchingsC2)
void execute(std::vector< ftm::MergeTree< dataType > > &trees, matchingVectorType &outputMatching, std::vector< double > &alphas, std::vector< int > &clusteringAssignment, std::vector< ftm::MergeTree< dataType > > &trees2, matchingVectorType &outputMatching2, std::vector< ftm::MergeTree< dataType > > &centroids, std::vector< ftm::MergeTree< dataType > > &centroids2)
std::vector< std::vector< int > > getTrees2NodeCorr()
void assignmentCentroidsAccelerated(std::vector< ftm::FTMTree_MT * > &trees, std::vector< ftm::MergeTree< dataType > > &centroids, std::vector< std::tuple< int, int > > &assignmentC, std::vector< dataType > &bestDistanceT, std::vector< ftm::FTMTree_MT * > &trees2, std::vector< ftm::MergeTree< dataType > > &centroids2)
void execute(std::vector< ftm::MergeTree< dataType > > &trees, matchingVectorType &outputMatching, std::vector< int > &clusteringAssignment, std::vector< ftm::MergeTree< dataType > > &centroids)
void putBackMinMaxPair(std::vector< ftm::MergeTree< dataType > > &centroids, std::vector< ftm::MergeTree< dataType > > &centroids2)
bool samePreviousAssignment(int clusterId)
bool updateCentroids(std::vector< ftm::FTMTree_MT * > &trees, std::vector< ftm::MergeTree< dataType > > &centroids, std::vector< double > &alphas, std::vector< std::tuple< int, int > > &assignmentC)
void initCentroids(std::vector< ftm::FTMTree_MT * > &trees, std::vector< ftm::FTMTree_MT * > &trees2, std::vector< std::vector< ftm::MergeTree< dataType > > > &allCentroids)
void execute(std::vector< ftm::MergeTree< dataType > > &trees, matchingVectorType &outputMatching, std::vector< int > &clusteringAssignment, std::vector< ftm::MergeTree< dataType > > &trees2, matchingVectorType &outputMatching2, std::vector< ftm::MergeTree< dataType > > &centroids, std::vector< ftm::MergeTree< dataType > > &centroids2)
~MergeTreeClustering() override=default
void matchingCorrespondence(treesMatchingVector &matchingT, std::vector< int > &nodeCorr, std::vector< int > &assignedTreesIndex)
void initNewCentroid(std::vector< ftm::FTMTree_MT * > &trees, ftm::MergeTree< dataType > &centroid, int noNewCentroid)
void assignmentCentroids(std::vector< ftm::FTMTree_MT * > &trees, std::vector< ftm::MergeTree< dataType > > &centroids, std::vector< std::tuple< int, int > > &assignmentC, std::vector< dataType > &bestDistanceT, std::vector< ftm::FTMTree_MT * > &trees2, std::vector< ftm::MergeTree< dataType > > &centroids2)
double getElapsedTime()
Definition: Timer.h:15
The Topology ToolKit.