53 std::vector<std::vector<int>> &contingencyMatrix,
54 std::vector<int> &sumLin,
55 std::vector<int> &sumCol)
const {
57 this->printErr(
"Error: clustering on zero points.");
61 std::map<int, int> values1ToId, values2ToId;
62 size_t nbVal1 = 0, nbVal2 = 0;
63 for(
size_t i = 0; i < nPoint; i++) {
64 const int x1 = clust1[i], x2 = clust2[i];
65 auto found1 = values1ToId.find(x1), found2 = values2ToId.find(x2);
67 if(found1 == values1ToId.end()) {
68 values1ToId[x1] = nbVal1;
72 if(found2 == values2ToId.end()) {
73 values2ToId[x2] = nbVal2;
78 const size_t nCluster1 = nbVal1, nCluster2 = nbVal2;
79 contingencyMatrix.resize(nCluster1);
80 for(
size_t i = 0; i < nCluster1; i++)
81 contingencyMatrix[i].resize(nCluster2, 0);
82 sumLin.resize(nCluster1);
83 sumCol.resize(nCluster2, 0);
85 for(
size_t i = 0; i < nPoint; i++) {
86 const int x1 = values1ToId[clust1[i]], x2 = values2ToId[clust2[i]];
87 contingencyMatrix[x1][x2]++;
90 for(
size_t i1 = 0; i1 < nCluster1; i1++) {
92 for(
size_t i2 = 0; i2 < nCluster2; i2++) {
93 sumCol[i2] += contingencyMatrix[i1][i2];
94 sum += contingencyMatrix[i1][i2];
104 const std::vector<std::vector<int>> &contingencyMatrix,
105 const std::vector<int> &sumLin,
106 const std::vector<int> &sumCol,
108 double &ariValue)
const {
112 const size_t nCluster1 = contingencyMatrix.size();
113 const size_t nCluster2 = contingencyMatrix[0].size();
115 double sumNChooseContingency = 0;
116#ifdef TTK_ENABLE_OPENMP
117#pragma omp parallel for num_threads(this->threadNumber_) reduction(+:sumNChooseContingency)
119 for(
size_t i1 = 0; i1 < nCluster1; i1++) {
120 for(
size_t i2 = 0; i2 < nCluster2; i2++)
121 sumNChooseContingency +=
nChoose2(contingencyMatrix[i1][i2]);
124 double sumNChoose2_1 = 0, sumNChoose2_2 = 0;
125 for(
size_t i = 0; i < nCluster1; i++) {
127 this->printErr(
"Error: the sum of a line in the contingency matrix is "
128 "zero. This should not happen.");
130 sumNChoose2_1 +=
nChoose2(sumLin[i]);
132 for(
size_t i = 0; i < nCluster2; i++) {
134 this->printErr(
"Error: the sum of a column in the contingency matrix is "
135 "zero. This should not happen.");
137 sumNChoose2_2 +=
nChoose2(sumCol[i]);
140 const double numerator = sumNChooseContingency
141 - (sumNChoose2_1 * sumNChoose2_2) /
nChoose2(nPoint);
142 const double denominator
143 = 0.5 * (sumNChoose2_1 + sumNChoose2_2)
144 - (sumNChoose2_1 * sumNChoose2_2) /
nChoose2(nPoint);
148 ariValue = numerator / denominator;
154 const std::vector<std::vector<int>> &contingencyMatrix,
155 const std::vector<int> &sumLin,
156 const std::vector<int> &sumCol,
158 double &nmiValue)
const {
162 const size_t nCluster1 = contingencyMatrix.size();
163 const size_t nCluster2 = contingencyMatrix[0].size();
165 double mutualInfo = 0;
166 bool invalidCell =
false;
167#ifdef TTK_ENABLE_OPENMP
168#pragma omp parallel for num_threads(this->threadNumber_) reduction(+:mutualInfo)
170 for(
size_t i1 = 0; i1 < nCluster1; i1++) {
171 for(
size_t i2 = 0; i2 < nCluster2; i2++) {
172 if(contingencyMatrix[i1][i2] == 0)
174 if(sumLin[i1] == 0 || sumCol[i2] == 0) {
175 this->printErr(
"Error: a sum of a line or a column of the contingency "
176 "matrix is zero. This should not happen.");
181 const double logArg = (double)nPoint * contingencyMatrix[i1][i2]
182 / (sumLin[i1] * sumCol[i2]);
183 const double curAdd = contingencyMatrix[i1][i2] * log2(logArg) / (nPoint);
184 mutualInfo += curAdd;
190 double entropy1 = 0, entropy2 = 0;
191 for(
size_t i = 0; i < nCluster1; i++) {
192 const double eltLin = (double)sumLin[i] / nPoint;
193 entropy1 -= eltLin * log2(eltLin);
195 for(
size_t i = 0; i < nCluster2; i++) {
196 const double eltCol = (double)sumCol[i] / nPoint;
197 entropy2 -= eltCol * log2(eltCol);
200 nmiValue = 2 * mutualInfo / (entropy1 + entropy2);
206 const int *clustering2,
209 double &ariValue)
const {
214 std::vector<std::vector<int>> contingencyMatrix;
215 std::vector<int> sumLines, sumColumns;
216 computeContingencyTables(
217 clustering1, clustering2, n, contingencyMatrix, sumLines, sumColumns);
219 computeARI(contingencyMatrix, sumLines, sumColumns, n, ariValue);
220 computeNMI(contingencyMatrix, sumLines, sumColumns, n, nmiValue);
222 this->
printMsg(
"Size of output in ttk/base = 0\n");
224 this->
printMsg(
"Computed NMI value: " + std::to_string(nmiValue) +
"\n");
225 this->
printMsg(
"Computed ARI value: " + std::to_string(ariValue) +
"\n");
int computeContingencyTables(const int *clust1, const int *clust2, const size_t nPoint, std::vector< std::vector< int > > &contingencyMatrix, std::vector< int > &sumLin, std::vector< int > &sumCol) const
printMsg(debug::output::BOLD+" | | | | | . \\ | | (__| | / __/| |_| / __/|__ _|"+debug::output::ENDCOLOR, debug::Priority::PERFORMANCE, debug::LineMode::NEW, stream)