TTK
Loading...
Searching...
No Matches
ClusteringMetrics.cpp
Go to the documentation of this file.
1#include <ClusteringMetrics.h>
2#include <Geometry.h> // To check wheter a double is zero.
3#include <cmath> // For the log2 function
4#include <map>
5#include <vector>
6
8 // inherited from Debug: prefix will be printed at the beginning of every msg
9 this->setDebugMsgPrefix("ClusteringMetrics");
10}
11
12inline int nChoose2(int x) {
13 return x * (x - 1) / 2;
14}
15
17 const std::vector<std::vector<int>> &matrix,
18 const size_t nPoint) {
19 if(nPoint == 0) {
20 object->printErr("Error: clustering on zero points.");
21 return 0;
22 }
23
24 const size_t nLin = matrix.size();
25 if(nLin == 0) {
26 object->printErr("The provided contingency matrix is empty.\n");
27 return 0;
28 }
29 const size_t nCol = matrix[0].size();
30
31 for(size_t i = 0; i < nLin; i++) {
32 const size_t curNCol = matrix[i].size();
33 if(curNCol == 0) {
34 object->printErr("Line " + std::to_string(i)
35 + " of the contingency matrix is empty.\n");
36 return 0;
37 } else if(curNCol != nCol) {
38 object->printErr(
39 "Line " + std::to_string(i)
40 + " of the contingency matrix has wrong number of columns : "
41 + std::to_string(curNCol) + " instead of " + std::to_string(nCol)
42 + ".\n");
43 return 0;
44 }
45 }
46 return 1;
47}
48
50 const int *clust1,
51 const int *clust2,
52 const size_t nPoint,
53 std::vector<std::vector<int>> &contingencyMatrix,
54 std::vector<int> &sumLin,
55 std::vector<int> &sumCol) const {
56 if(nPoint == 0) {
57 this->printErr("Error: clustering on zero points.");
58 return 0;
59 }
60
61 std::map<int, int> values1ToId, values2ToId;
62 size_t nbVal1 = 0, nbVal2 = 0;
63 for(size_t i = 0; i < nPoint; i++) {
64 const int x1 = clust1[i], x2 = clust2[i];
65 auto found1 = values1ToId.find(x1), found2 = values2ToId.find(x2);
66
67 if(found1 == values1ToId.end()) {
68 values1ToId[x1] = nbVal1;
69 nbVal1++;
70 }
71
72 if(found2 == values2ToId.end()) {
73 values2ToId[x2] = nbVal2;
74 nbVal2++;
75 }
76 }
77
78 const size_t nCluster1 = nbVal1, nCluster2 = nbVal2;
79 contingencyMatrix.resize(nCluster1);
80 for(size_t i = 0; i < nCluster1; i++)
81 contingencyMatrix[i].resize(nCluster2, 0);
82 sumLin.resize(nCluster1);
83 sumCol.resize(nCluster2, 0);
84
85 for(size_t i = 0; i < nPoint; i++) {
86 const int x1 = values1ToId[clust1[i]], x2 = values2ToId[clust2[i]];
87 contingencyMatrix[x1][x2]++;
88 }
89
90 for(size_t i1 = 0; i1 < nCluster1; i1++) {
91 int sum = 0;
92 for(size_t i2 = 0; i2 < nCluster2; i2++) {
93 sumCol[i2] += contingencyMatrix[i1][i2];
94 sum += contingencyMatrix[i1][i2];
95 }
96
97 sumLin[i1] = sum;
98 }
99
100 return 0;
101}
102
104 const std::vector<std::vector<int>> &contingencyMatrix,
105 const std::vector<int> &sumLin,
106 const std::vector<int> &sumCol,
107 const size_t nPoint,
108 double &ariValue) const {
109 if(!checkContingencyMatSize(this, contingencyMatrix, nPoint))
110 return 0;
111
112 const size_t nCluster1 = contingencyMatrix.size();
113 const size_t nCluster2 = contingencyMatrix[0].size();
114
115 double sumNChooseContingency = 0;
116#ifdef TTK_ENABLE_OPENMP
117#pragma omp parallel for num_threads(this->threadNumber_) reduction(+:sumNChooseContingency)
118#endif // TTK_ENABLE_OPENMP
119 for(size_t i1 = 0; i1 < nCluster1; i1++) {
120 for(size_t i2 = 0; i2 < nCluster2; i2++)
121 sumNChooseContingency += nChoose2(contingencyMatrix[i1][i2]);
122 }
123
124 double sumNChoose2_1 = 0, sumNChoose2_2 = 0;
125 for(size_t i = 0; i < nCluster1; i++) {
126 if(sumLin[i] == 0) {
127 this->printErr("Error: the sum of a line in the contingency matrix is "
128 "zero. This should not happen.");
129 }
130 sumNChoose2_1 += nChoose2(sumLin[i]);
131 }
132 for(size_t i = 0; i < nCluster2; i++) {
133 if(sumCol[i] == 0) {
134 this->printErr("Error: the sum of a column in the contingency matrix is "
135 "zero. This should not happen.");
136 }
137 sumNChoose2_2 += nChoose2(sumCol[i]);
138 }
139
140 const double numerator = sumNChooseContingency
141 - (sumNChoose2_1 * sumNChoose2_2) / nChoose2(nPoint);
142 const double denominator
143 = 0.5 * (sumNChoose2_1 + sumNChoose2_2)
144 - (sumNChoose2_1 * sumNChoose2_2) / nChoose2(nPoint);
145 if(denominator < ttk::Geometry::powIntTen(-DBL_DIG))
146 ariValue = 1;
147 else
148 ariValue = numerator / denominator;
149
150 return 0;
151}
152
154 const std::vector<std::vector<int>> &contingencyMatrix,
155 const std::vector<int> &sumLin,
156 const std::vector<int> &sumCol,
157 const size_t nPoint,
158 double &nmiValue) const {
159 if(!checkContingencyMatSize(this, contingencyMatrix, nPoint))
160 return 0;
161
162 const size_t nCluster1 = contingencyMatrix.size();
163 const size_t nCluster2 = contingencyMatrix[0].size();
164
165 double mutualInfo = 0;
166 bool invalidCell = false;
167#ifdef TTK_ENABLE_OPENMP
168#pragma omp parallel for num_threads(this->threadNumber_) reduction(+:mutualInfo)
169#endif // TTK_ENABLE_OPENMP
170 for(size_t i1 = 0; i1 < nCluster1; i1++) {
171 for(size_t i2 = 0; i2 < nCluster2; i2++) {
172 if(contingencyMatrix[i1][i2] == 0)
173 continue;
174 if(sumLin[i1] == 0 || sumCol[i2] == 0) {
175 this->printErr("Error: a sum of a line or a column of the contingency "
176 "matrix is zero. This should not happen.");
177 invalidCell = true;
178 continue;
179 }
180
181 const double logArg = (double)nPoint * contingencyMatrix[i1][i2]
182 / (sumLin[i1] * sumCol[i2]);
183 const double curAdd = contingencyMatrix[i1][i2] * log2(logArg) / (nPoint);
184 mutualInfo += curAdd;
185 }
186 }
187 if(invalidCell)
188 return 0;
189
190 double entropy1 = 0, entropy2 = 0;
191 for(size_t i = 0; i < nCluster1; i++) {
192 const double eltLin = (double)sumLin[i] / nPoint;
193 entropy1 -= eltLin * log2(eltLin);
194 }
195 for(size_t i = 0; i < nCluster2; i++) {
196 const double eltCol = (double)sumCol[i] / nPoint;
197 entropy2 -= eltCol * log2(eltCol);
198 }
199
200 nmiValue = 2 * mutualInfo / (entropy1 + entropy2);
201
202 return 0;
203}
204
205int ttk::ClusteringMetrics::execute(const int *clustering1,
206 const int *clustering2,
207 const size_t n,
208 double &nmiValue,
209 double &ariValue) const {
210 ttk::Timer timer;
211
213
214 std::vector<std::vector<int>> contingencyMatrix;
215 std::vector<int> sumLines, sumColumns;
216 computeContingencyTables(
217 clustering1, clustering2, n, contingencyMatrix, sumLines, sumColumns);
218
219 computeARI(contingencyMatrix, sumLines, sumColumns, n, ariValue);
220 computeNMI(contingencyMatrix, sumLines, sumColumns, n, nmiValue);
221
222 this->printMsg("Size of output in ttk/base = 0\n");
223
224 this->printMsg("Computed NMI value: " + std::to_string(nmiValue) + "\n");
225 this->printMsg("Computed ARI value: " + std::to_string(ariValue) + "\n");
226 this->printMsg(ttk::debug::Separator::L2); // horizontal '-' separator
227 this->printMsg("Complete", 1, timer.getElapsedTime());
228 this->printMsg(ttk::debug::Separator::L1); // horizontal '=' separator
229 return 0;
230}
int nChoose2(int x)
int checkContingencyMatSize(const ttk::ClusteringMetrics *object, const std::vector< std::vector< int > > &matrix, const size_t nPoint)
int computeARI(const std::vector< std::vector< int > > &contingencyMatrix, const std::vector< int > &sumLin, const std::vector< int > &sumCol, const size_t nPoint, double &ariValue) const
int computeContingencyTables(const int *clust1, const int *clust2, const size_t nPoint, std::vector< std::vector< int > > &contingencyMatrix, std::vector< int > &sumLin, std::vector< int > &sumCol) const
int computeNMI(const std::vector< std::vector< int > > &contingencyMatrix, const std::vector< int > &sumLin, const std::vector< int > &sumCol, const size_t nPoint, double &nmiValue) const
int execute(const int *clustering1, const int *clustering2, const size_t n, double &nmiValue, double &ariValue) const
void setDebugMsgPrefix(const std::string &prefix)
Definition Debug.h:364
double getElapsedTime()
Definition Timer.h:15
T powIntTen(const int n)
Compute the nth power of ten.
Definition Geometry.h:405
printMsg(debug::output::BOLD+" | | | | | . \\ | | (__| | / __/| |_| / __/|__ _|"+debug::output::ENDCOLOR, debug::Priority::PERFORMANCE, debug::LineMode::NEW, stream)