TTK
Loading...
Searching...
No Matches
ClusteringMetrics.cpp
Go to the documentation of this file.
1#include <ClusteringMetrics.h>
2#include <Geometry.h> // To check wheter a double is zero.
3#include <cmath> // For the log2 function
4#include <map>
5#include <vector>
6
8 // inherited from Debug: prefix will be printed at the beginning of every msg
9 this->setDebugMsgPrefix("ClusteringMetrics");
10}
11
12inline int nChoose2(int x) {
13 return x * (x - 1) / 2;
14}
15
17 const std::vector<std::vector<int>> &matrix,
18 const size_t nPoint) {
19 if(nPoint == 0) {
20 object->printErr("Error: clustering on zero points.");
21 return 0;
22 }
23
24 size_t nLin = matrix.size();
25 if(nLin == 0) {
26 object->printErr("The provided contingency matrix is empty.\n");
27 return 0;
28 }
29 size_t nCol = matrix[0].size();
30
31 for(size_t i = 0; i < nLin; i++) {
32 size_t curNCol = matrix[i].size();
33 if(curNCol == 0) {
34 object->printErr("Line " + std::to_string(i)
35 + " of the contingency matrix is empty.\n");
36 return 0;
37 } else if(curNCol != nCol) {
38 object->printErr(
39 "Line " + std::to_string(i)
40 + " of the contingency matrix has wrong number of columns : "
41 + std::to_string(curNCol) + " instead of " + std::to_string(nCol)
42 + ".\n");
43 return 0;
44 }
45 }
46 return 1;
47}
48
50 const int *clust1,
51 const int *clust2,
52 const size_t nPoint,
53 std::vector<std::vector<int>> &contingencyMatrix,
54 std::vector<int> &sumLin,
55 std::vector<int> &sumCol) const {
56 if(nPoint == 0) {
57 this->printErr("Error: clustering on zero points.");
58 return 0;
59 }
60
61 std::map<int, int> values1ToId, values2ToId;
62 size_t nbVal1 = 0, nbVal2 = 0;
63 for(size_t i = 0; i < nPoint; i++) {
64 int x1 = clust1[i], x2 = clust2[i];
65 auto found1 = values1ToId.find(x1), found2 = values2ToId.find(x2);
66
67 if(found1 == values1ToId.end()) {
68 values1ToId[x1] = nbVal1;
69 nbVal1++;
70 }
71
72 if(found2 == values2ToId.end()) {
73 values2ToId[x2] = nbVal2;
74 nbVal2++;
75 }
76 }
77
78 size_t nCluster1 = nbVal1, nCluster2 = nbVal2;
79 contingencyMatrix.resize(nCluster1);
80 for(size_t i = 0; i < nCluster1; i++)
81 contingencyMatrix[i].resize(nCluster2, 0);
82 sumLin.resize(nCluster1);
83 sumCol.resize(nCluster2, 0);
84
85 for(size_t i = 0; i < nPoint; i++) {
86 int x1 = values1ToId[clust1[i]], x2 = values2ToId[clust2[i]];
87 contingencyMatrix[x1][x2]++;
88 }
89
90 for(size_t i1 = 0; i1 < nCluster1; i1++) {
91 int sum = 0;
92 for(size_t i2 = 0; i2 < nCluster2; i2++) {
93 sumCol[i2] += contingencyMatrix[i1][i2];
94 sum += contingencyMatrix[i1][i2];
95 }
96
97 sumLin[i1] = sum;
98 }
99
100 return 0;
101}
102
104 const std::vector<std::vector<int>> &contingencyMatrix,
105 const std::vector<int> &sumLin,
106 const std::vector<int> &sumCol,
107 const size_t nPoint,
108 double &ariValue) const {
109 if(!checkContingencyMatSize(this, contingencyMatrix, nPoint))
110 return 0;
111
112 size_t nCluster1 = contingencyMatrix.size();
113 size_t nCluster2 = contingencyMatrix[0].size();
114
115 double sumNChooseContingency = 0;
116#ifdef TTK_ENABLE_OPENMP
117#pragma omp parallel for num_threads(this->threadNumber_) reduction(+:sumNChooseContingency)
118#endif // TTK_ENABLE_OPENMP
119 for(size_t i1 = 0; i1 < nCluster1; i1++) {
120 for(size_t i2 = 0; i2 < nCluster2; i2++)
121 sumNChooseContingency += nChoose2(contingencyMatrix[i1][i2]);
122 }
123
124 double sumNChoose2_1 = 0, sumNChoose2_2 = 0;
125 for(size_t i = 0; i < nCluster1; i++) {
126 if(sumLin[i] == 0) {
127 this->printErr("Error: the sum of a line in the contingency matrix is "
128 "zero. This should not happen.");
129 }
130 sumNChoose2_1 += nChoose2(sumLin[i]);
131 }
132 for(size_t i = 0; i < nCluster2; i++) {
133 if(sumCol[i] == 0) {
134 this->printErr("Error: the sum of a column in the contingency matrix is "
135 "zero. This should not happen.");
136 }
137 sumNChoose2_2 += nChoose2(sumCol[i]);
138 }
139
140 double numerator = sumNChooseContingency
141 - (sumNChoose2_1 * sumNChoose2_2) / nChoose2(nPoint);
142 double denominator = 0.5 * (sumNChoose2_1 + sumNChoose2_2)
143 - (sumNChoose2_1 * sumNChoose2_2) / nChoose2(nPoint);
144 if(denominator < ttk::Geometry::powIntTen(-DBL_DIG))
145 ariValue = 1;
146 else
147 ariValue = numerator / denominator;
148
149 return 0;
150}
151
153 const std::vector<std::vector<int>> &contingencyMatrix,
154 const std::vector<int> &sumLin,
155 const std::vector<int> &sumCol,
156 const size_t nPoint,
157 double &nmiValue) const {
158 if(!checkContingencyMatSize(this, contingencyMatrix, nPoint))
159 return 0;
160
161 size_t nCluster1 = contingencyMatrix.size();
162 size_t nCluster2 = contingencyMatrix[0].size();
163
164 double mutualInfo = 0;
165 bool invalidCell = false;
166#ifdef TTK_ENABLE_OPENMP
167#pragma omp parallel for num_threads(this->threadNumber_) reduction(+:mutualInfo)
168#endif // TTK_ENABLE_OPENMP
169 for(size_t i1 = 0; i1 < nCluster1; i1++) {
170 for(size_t i2 = 0; i2 < nCluster2; i2++) {
171 if(contingencyMatrix[i1][i2] == 0)
172 continue;
173 if(sumLin[i1] == 0 || sumCol[i2] == 0) {
174 this->printErr("Error: a sum of a line or a column of the contingency "
175 "matrix is zero. This should not happen.");
176 invalidCell = true;
177 continue;
178 }
179
180 double logArg = (double)nPoint * contingencyMatrix[i1][i2]
181 / (sumLin[i1] * sumCol[i2]);
182 double curAdd = contingencyMatrix[i1][i2] * log2(logArg) / (nPoint);
183 mutualInfo += curAdd;
184 }
185 }
186 if(invalidCell)
187 return 0;
188
189 double entropy1 = 0, entropy2 = 0;
190 for(size_t i = 0; i < nCluster1; i++) {
191 double eltLin = (double)sumLin[i] / nPoint;
192 entropy1 -= eltLin * log2(eltLin);
193 }
194 for(size_t i = 0; i < nCluster2; i++) {
195 double eltCol = (double)sumCol[i] / nPoint;
196 entropy2 -= eltCol * log2(eltCol);
197 }
198
199 nmiValue = 2 * mutualInfo / (entropy1 + entropy2);
200
201 return 0;
202}
203
204int ttk::ClusteringMetrics::execute(const int *clustering1,
205 const int *clustering2,
206 const size_t n,
207 double &nmiValue,
208 double &ariValue) const {
209 ttk::Timer timer;
210
212
213 std::vector<std::vector<int>> contingencyMatrix;
214 std::vector<int> sumLines, sumColumns;
215 computeContingencyTables(
216 clustering1, clustering2, n, contingencyMatrix, sumLines, sumColumns);
217
218 computeARI(contingencyMatrix, sumLines, sumColumns, n, ariValue);
219 computeNMI(contingencyMatrix, sumLines, sumColumns, n, nmiValue);
220
221 this->printMsg("Size of output in ttk/base = 0\n");
222
223 this->printMsg("Computed NMI value: " + std::to_string(nmiValue) + "\n");
224 this->printMsg("Computed ARI value: " + std::to_string(ariValue) + "\n");
225 this->printMsg(ttk::debug::Separator::L2); // horizontal '-' separator
226 this->printMsg("Complete", 1, timer.getElapsedTime());
227 this->printMsg(ttk::debug::Separator::L1); // horizontal '=' separator
228 return 0;
229}
int nChoose2(int x)
int checkContingencyMatSize(const ttk::ClusteringMetrics *object, const std::vector< std::vector< int > > &matrix, const size_t nPoint)
int computeARI(const std::vector< std::vector< int > > &contingencyMatrix, const std::vector< int > &sumLin, const std::vector< int > &sumCol, const size_t nPoint, double &ariValue) const
int computeContingencyTables(const int *clust1, const int *clust2, const size_t nPoint, std::vector< std::vector< int > > &contingencyMatrix, std::vector< int > &sumLin, std::vector< int > &sumCol) const
int computeNMI(const std::vector< std::vector< int > > &contingencyMatrix, const std::vector< int > &sumLin, const std::vector< int > &sumCol, const size_t nPoint, double &nmiValue) const
int execute(const int *clustering1, const int *clustering2, const size_t n, double &nmiValue, double &ariValue) const
void setDebugMsgPrefix(const std::string &prefix)
Definition: Debug.h:364
double getElapsedTime()
Definition: Timer.h:15
T powIntTen(const int n)
Compute the nth power of ten.
Definition: Geometry.h:360
printMsg(debug::output::GREEN+"                           "+debug::output::ENDCOLOR+debug::output::GREEN+"▒"+debug::output::ENDCOLOR+debug::output::GREEN+"▒▒▒▒▒▒▒▒▒▒▒▒▒░"+debug::output::ENDCOLOR, debug::Priority::PERFORMANCE, debug::LineMode::NEW, stream)