TTK
Loading...
Searching...
No Matches
ClusteringMetrics.cpp
Go to the documentation of this file.
1#include <ClusteringMetrics.h>
2#include <Geometry.h> // To check whether a double is zero.
3#include <cmath> // For the log2 function
4#include <map>
5#include <vector>
6
8 // inherited from Debug: prefix will be printed at the beginning of every msg
9 this->setDebugMsgPrefix("ClusteringMetrics");
10}
11
12inline int nChoose2(int x) {
13 return x * (x - 1) / 2;
14}
15
17 const std::vector<std::vector<int>> &matrix,
18 const size_t nPoint) {
19 if(nPoint == 0) {
20 object->printErr("Error: clustering on zero points.");
21 return 0;
22 }
23
24 const size_t nLin = matrix.size();
25 if(nLin == 0) {
26 object->printErr("The provided contingency matrix is empty.\n");
27 return 0;
28 }
29 const size_t nCol = matrix[0].size();
30
31 for(size_t i = 0; i < nLin; i++) {
32 const size_t curNCol = matrix[i].size();
33 if(curNCol == 0) {
34 object->printErr("Line " + std::to_string(i)
35 + " of the contingency matrix is empty.\n");
36 return 0;
37 } else if(curNCol != nCol) {
38 object->printErr(
39 "Line " + std::to_string(i)
40 + " of the contingency matrix has wrong number of columns : "
41 + std::to_string(curNCol) + " instead of " + std::to_string(nCol)
42 + ".\n");
43 return 0;
44 }
45 }
46 return 1;
47}
48
50 const int *clust1,
51 const int *clust2,
52 const size_t nPoint,
53 std::vector<std::vector<int>> &contingencyMatrix,
54 std::vector<int> &sumLin,
55 std::vector<int> &sumCol) const {
56 if(nPoint == 0) {
57 this->printErr("Error: clustering on zero points.");
58 return 0;
59 }
60
61 std::map<int, int> values1ToId, values2ToId;
62 size_t nbVal1 = 0, nbVal2 = 0;
63 for(size_t i = 0; i < nPoint; i++) {
64 const int x1 = clust1[i], x2 = clust2[i];
65 auto found1 = values1ToId.find(x1), found2 = values2ToId.find(x2);
66
67 if(found1 == values1ToId.end()) {
68 values1ToId[x1] = nbVal1;
69 nbVal1++;
70 }
71
72 if(found2 == values2ToId.end()) {
73 values2ToId[x2] = nbVal2;
74 nbVal2++;
75 }
76 }
77
78 const size_t nCluster1 = nbVal1, nCluster2 = nbVal2;
79 contingencyMatrix.resize(nCluster1);
80 for(size_t i = 0; i < nCluster1; i++)
81 contingencyMatrix[i].resize(nCluster2, 0);
82 sumLin.resize(nCluster1);
83 sumCol.resize(nCluster2, 0);
84
85 for(size_t i = 0; i < nPoint; i++) {
86 const int x1 = values1ToId[clust1[i]], x2 = values2ToId[clust2[i]];
87 contingencyMatrix[x1][x2]++;
88 }
89
90 for(size_t i1 = 0; i1 < nCluster1; i1++) {
91 int sum = 0;
92 for(size_t i2 = 0; i2 < nCluster2; i2++) {
93 sumCol[i2] += contingencyMatrix[i1][i2];
94 sum += contingencyMatrix[i1][i2];
95 }
96
97 sumLin[i1] = sum;
98 }
99
100 return 0;
101}
102
104 const std::vector<std::vector<int>> &contingencyMatrix,
105 const std::vector<int> &sumLin,
106 const std::vector<int> &sumCol,
107 const size_t nPoint,
108 double &ariValue) const {
109 if(!checkContingencyMatSize(this, contingencyMatrix, nPoint))
110 return 0;
111
112 const size_t nCluster1 = contingencyMatrix.size();
113 const size_t nCluster2 = contingencyMatrix[0].size();
114
115 double sumNChooseContingency = 0;
116#ifdef TTK_ENABLE_OPENMP
117#pragma omp parallel for num_threads(this->threadNumber_) \
118 reduction(+ : sumNChooseContingency)
119#endif // TTK_ENABLE_OPENMP
120 for(size_t i1 = 0; i1 < nCluster1; i1++) {
121 for(size_t i2 = 0; i2 < nCluster2; i2++)
122 sumNChooseContingency += nChoose2(contingencyMatrix[i1][i2]);
123 }
124
125 double sumNChoose2_1 = 0, sumNChoose2_2 = 0;
126 for(size_t i = 0; i < nCluster1; i++) {
127 if(sumLin[i] == 0) {
128 this->printErr("Error: the sum of a line in the contingency matrix is "
129 "zero. This should not happen.");
130 }
131 sumNChoose2_1 += nChoose2(sumLin[i]);
132 }
133 for(size_t i = 0; i < nCluster2; i++) {
134 if(sumCol[i] == 0) {
135 this->printErr("Error: the sum of a column in the contingency matrix is "
136 "zero. This should not happen.");
137 }
138 sumNChoose2_2 += nChoose2(sumCol[i]);
139 }
140
141 const double numerator = sumNChooseContingency
142 - (sumNChoose2_1 * sumNChoose2_2) / nChoose2(nPoint);
143 const double denominator
144 = 0.5 * (sumNChoose2_1 + sumNChoose2_2)
145 - (sumNChoose2_1 * sumNChoose2_2) / nChoose2(nPoint);
146 if(denominator < ttk::Geometry::powIntTen(-DBL_DIG))
147 ariValue = 1;
148 else
149 ariValue = numerator / denominator;
150
151 return 0;
152}
153
155 const std::vector<std::vector<int>> &contingencyMatrix,
156 const std::vector<int> &sumLin,
157 const std::vector<int> &sumCol,
158 const size_t nPoint,
159 double &nmiValue) const {
160 if(!checkContingencyMatSize(this, contingencyMatrix, nPoint))
161 return 0;
162
163 const size_t nCluster1 = contingencyMatrix.size();
164 const size_t nCluster2 = contingencyMatrix[0].size();
165
166 double mutualInfo = 0;
167 bool invalidCell = false;
168#ifdef TTK_ENABLE_OPENMP
169#pragma omp parallel for num_threads(this->threadNumber_) \
170 reduction(+ : mutualInfo)
171#endif // TTK_ENABLE_OPENMP
172 for(size_t i1 = 0; i1 < nCluster1; i1++) {
173 for(size_t i2 = 0; i2 < nCluster2; i2++) {
174 if(contingencyMatrix[i1][i2] == 0)
175 continue;
176 if(sumLin[i1] == 0 || sumCol[i2] == 0) {
177 this->printErr("Error: a sum of a line or a column of the contingency "
178 "matrix is zero. This should not happen.");
179 invalidCell = true;
180 continue;
181 }
182
183 const double logArg = (double)nPoint * contingencyMatrix[i1][i2]
184 / (sumLin[i1] * sumCol[i2]);
185 const double curAdd = contingencyMatrix[i1][i2] * log2(logArg) / (nPoint);
186 mutualInfo += curAdd;
187 }
188 }
189 if(invalidCell)
190 return 0;
191
192 double entropy1 = 0, entropy2 = 0;
193 for(size_t i = 0; i < nCluster1; i++) {
194 const double eltLin = (double)sumLin[i] / nPoint;
195 entropy1 -= eltLin * log2(eltLin);
196 }
197 for(size_t i = 0; i < nCluster2; i++) {
198 const double eltCol = (double)sumCol[i] / nPoint;
199 entropy2 -= eltCol * log2(eltCol);
200 }
201
202 nmiValue = 2 * mutualInfo / (entropy1 + entropy2);
203
204 return 0;
205}
206
207int ttk::ClusteringMetrics::execute(const int *clustering1,
208 const int *clustering2,
209 const size_t n,
210 double &nmiValue,
211 double &ariValue) const {
212 ttk::Timer timer;
213
215
216 std::vector<std::vector<int>> contingencyMatrix;
217 std::vector<int> sumLines, sumColumns;
219 clustering1, clustering2, n, contingencyMatrix, sumLines, sumColumns);
220
221 computeARI(contingencyMatrix, sumLines, sumColumns, n, ariValue);
222 computeNMI(contingencyMatrix, sumLines, sumColumns, n, nmiValue);
223
224 this->printMsg("Size of output in ttk/base = 0\n");
225
226 this->printMsg("Computed NMI value: " + std::to_string(nmiValue) + "\n");
227 this->printMsg("Computed ARI value: " + std::to_string(ariValue) + "\n");
228 this->printMsg(ttk::debug::Separator::L2); // horizontal '-' separator
229 this->printMsg("Complete", 1, timer.getElapsedTime());
230 this->printMsg(ttk::debug::Separator::L1); // horizontal '=' separator
231 return 0;
232}
int nChoose2(int x)
int checkContingencyMatSize(const ttk::ClusteringMetrics *object, const std::vector< std::vector< int > > &matrix, const size_t nPoint)
int computeARI(const std::vector< std::vector< int > > &contingencyMatrix, const std::vector< int > &sumLin, const std::vector< int > &sumCol, const size_t nPoint, double &ariValue) const
int computeContingencyTables(const int *clust1, const int *clust2, const size_t nPoint, std::vector< std::vector< int > > &contingencyMatrix, std::vector< int > &sumLin, std::vector< int > &sumCol) const
int computeNMI(const std::vector< std::vector< int > > &contingencyMatrix, const std::vector< int > &sumLin, const std::vector< int > &sumCol, const size_t nPoint, double &nmiValue) const
int execute(const int *clustering1, const int *clustering2, const size_t n, double &nmiValue, double &ariValue) const
void setDebugMsgPrefix(const std::string &prefix)
Definition Debug.h:364
int printErr(const std::string &msg, const debug::LineMode &lineMode=debug::LineMode::NEW, std::ostream &stream=std::cerr) const
Definition Debug.h:149
double getElapsedTime()
Definition Timer.h:15
T powIntTen(const int n)
Compute the nth power of ten.
Definition Geometry.h:405
printMsg(debug::output::BOLD+" | | | | | . \\ | | (__| | / __/| |_| / __/| (_) |"+debug::output::ENDCOLOR, debug::Priority::PERFORMANCE, debug::LineMode::NEW, stream)