TTK
Loading...
Searching...
No Matches
DimensionReduction.cpp
Go to the documentation of this file.
2#include <TopoMap.h>
3
4#define VALUE_TO_STRING(x) #x
5#define VALUE(x) VALUE_TO_STRING(x)
6
7#ifdef TTK_ENABLE_SCIKIT_LEARN
8#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
9#include <Python.h>
10#include <numpy/arrayobject.h>
11#endif
12
13using namespace std;
14using namespace ttk;
15
17 this->setDebugMsgPrefix("DimensionReduction");
18
19 // default backend
21
22#ifdef TTK_ENABLE_SCIKIT_LEARN
23 auto finalize_callback = []() { Py_Finalize(); };
24
25 if(!Py_IsInitialized()) {
26 Py_Initialize();
27 atexit(finalize_callback);
28 }
29
30 const char *version = Py_GetVersion();
31 if(version[0] >= '3') {
32 this->printMsg("Initializing Python " + std::to_string(version[0])
33 + std::to_string(version[1]) + std::to_string(version[2]));
34 } else {
35 this->printErr("Python 3 + is required :" + std::string{version}
36 + " is provided.");
37 }
38
39 majorVersion_ = version[0];
40#endif
41}
42
44 std::vector<std::vector<double>> &outputEmbedding,
45 const std::vector<double> &inputMatrix,
46 const int nRows,
47 const int nColumns,
48 int *insertionTimeForTopomap) const {
49
50#ifndef TTK_ENABLE_SCIKIT_LEARN
51 TTK_FORCE_USE(nColumns);
52#endif
53
54 Timer t;
55
56 if(this->Method == METHOD::TOPOMAP) {
57 TopoMap topomap(
59 topomap.setDebugLevel(this->debugLevel_);
60 topomap.setThreadNumber(this->threadNumber_);
61
62 std::vector<double> coordsTopomap(2 * nRows);
63 topomap.execute<double>(coordsTopomap.data(), insertionTimeForTopomap,
64 inputMatrix, IsInputADistanceMatrix, nRows);
65 outputEmbedding.resize(2);
66 outputEmbedding[0].resize(nRows);
67 outputEmbedding[1].resize(nRows);
68 for(int i = 0; i < nRows; i++) {
69 outputEmbedding[0][i] = coordsTopomap[2 * i];
70 outputEmbedding[1][i] = coordsTopomap[2 * i + 1];
71 }
72
73 this->printMsg(
74 "Computed TopoMap", 1.0, t.getElapsedTime(), this->threadNumber_);
75 return 0;
76 }
77
78 if(this->Method == METHOD::AE) {
79#ifdef TTK_ENABLE_TORCH
87
88 outputEmbedding.resize(NumberOfComponents);
89 for(int d = 0; d < NumberOfComponents; d++)
90 outputEmbedding[d].resize(nRows);
91
92 tcdr.execute(outputEmbedding, inputMatrix, nRows);
93
94 this->printMsg("Computed AE dimension reduction", 1.0, t.getElapsedTime(),
96 return 0;
97#else
98 this->printErr("Unavailable backend: Torch is required.");
99 return 1;
100#endif
101 }
102
103#ifdef TTK_ENABLE_SCIKIT_LEARN
104#ifndef TTK_ENABLE_KAMIKAZE
105 if(majorVersion_ < '3')
106 return -1;
107 if(ModulePath.empty())
108 return -2;
109 if(ModuleName.empty())
110 return -3;
111 if(FunctionName.empty())
112 return -4;
113#endif
114
115 const int numberOfComponents = std::max(2, this->NumberOfComponents);
116
117 const int numberOfNeighbors = std::max(1, this->NumberOfNeighbors);
118
119 // declared here to avoid crossing initialization with goto
120 vector<PyObject *> gc;
121 PyObject *pArray;
122 PyObject *pPath;
123 PyObject *pSys;
124 PyObject *pName;
125 PyObject *pModule;
126 PyObject *pFunc;
127 PyObject *pMethod;
128 PyObject *pNumberOfComponents;
129 PyObject *pNumberOfNeighbors;
130 PyObject *pJobs;
131 PyObject *pIsDeterministic;
132 PyObject *pReturn;
133 PyObject *pNRows;
134 PyObject *pNColumns;
135 PyObject *pEmbedding;
136 PyObject *pSEParams;
137 PyObject *pLLEParams;
138 PyObject *pMDSParams;
139 PyObject *pTSNEParams;
140 PyObject *pISOParams;
141 PyObject *pPCAParams;
142 PyObject *pParams;
143 PyArrayObject *npArr;
144 PyArrayObject *npEmbedding;
145
146 string modulePath;
147
148 if(PyArray_API == nullptr) {
149#ifndef __clang_analyzer__
150 import_array1(-1);
151#endif // __clang_analyzer__
152 }
153 if(PyArray_API == nullptr) {
154 return -5;
155 }
156
157 // convert the input matrix into a NumPy array.
158 const int numberOfDimensions = 2;
159 npy_intp dimensions[2]{nRows, nColumns};
160
161 std::vector<std::string> methodToString{
162 "SE", "LLE", "MDS", "t-SNE", "IsoMap", "PCA"};
163
164 pArray = PyArray_SimpleNewFromData(numberOfDimensions, dimensions, NPY_DOUBLE,
165 const_cast<double *>(inputMatrix.data()));
166#ifndef TTK_ENABLE_KAMIKAZE
167 if(!pArray) {
168 this->printErr("Python: failed to convert the array.");
169 goto collect_garbage;
170 }
171#endif
172 gc.push_back(pArray);
173
174 npArr = reinterpret_cast<PyArrayObject *>(pArray);
175
176 pSys = PyImport_ImportModule("sys");
177#ifndef TTK_ENABLE_KAMIKAZE
178 if(!pSys) {
179 this->printErr("Python: failed to load the sys module.");
180 goto collect_garbage;
181 }
182#endif
183 gc.push_back(pSys);
184
185 pPath = PyObject_GetAttrString(pSys, "path");
186#ifndef TTK_ENABLE_KAMIKAZE
187 if(!pPath) {
188 this->printErr("Python: failed to get the path variable.");
189 goto collect_garbage;
190 }
191#endif
192 gc.push_back(pPath);
193
194 if(ModulePath == "default")
195 modulePath = VALUE(TTK_SCRIPTS_PATH);
196 else
197 modulePath = ModulePath;
198
199 this->printMsg("Loading Python script from: " + modulePath);
200 PyList_Append(pPath, PyUnicode_FromString(modulePath.data()));
201
202 // set other parameters
203 pNumberOfComponents = PyLong_FromLong(numberOfComponents);
204#ifndef TTK_ENABLE_KAMIKAZE
205 if(!pNumberOfComponents) {
206 this->printErr("Python: cannot convert pNumberOfComponents.");
207 goto collect_garbage;
208 }
209#endif
210 gc.push_back(pNumberOfComponents);
211
212 pNumberOfNeighbors = PyLong_FromLong(numberOfNeighbors);
213#ifndef TTK_ENABLE_KAMIKAZE
214 if(!pNumberOfNeighbors) {
215 this->printErr("Python: cannot convert pNumberOfNeighbors.");
216 goto collect_garbage;
217 }
218#endif
219 gc.push_back(pNumberOfNeighbors);
220
221 pMethod = PyLong_FromLong(static_cast<long>(this->Method));
222#ifndef TTK_ENABLE_KAMIKAZE
223 if(!pMethod) {
224 this->printErr("Python: cannot convert pMethod.");
225 goto collect_garbage;
226 }
227#endif
228 gc.push_back(pMethod);
229
230 if(threadNumber_ > 1 && this->Method == METHOD::MDS) { // MDS
231 this->printWrn(
232 "MDS is known to be instable when used with multiple threads");
233 }
234 pJobs = PyLong_FromLong(threadNumber_);
235#ifndef TTK_ENABLE_KAMIKAZE
236 if(!pJobs) {
237 this->printErr("Python: cannot convert pJobs.");
238 goto collect_garbage;
239 }
240#endif
241
242 pIsDeterministic = PyLong_FromLong(static_cast<long>(this->IsDeterministic));
243#ifndef TTK_ENABLE_KAMIKAZE
244 if(!pIsDeterministic) {
245 this->printErr("Python: cannot convert pIsDeterministic.");
246 goto collect_garbage;
247 }
248#endif
249
250 // load module
251 pName = PyUnicode_FromString(ModuleName.data());
252#ifndef TTK_ENABLE_KAMIKAZE
253 if(!pName) {
254 this->printErr("Python: moduleName parsing failed.");
255 goto collect_garbage;
256 }
257#endif
258 gc.push_back(pName);
259
260 pModule = PyImport_Import(pName);
261#ifndef TTK_ENABLE_KAMIKAZE
262 if(!pModule) {
263 this->printErr("Python: module import failed.");
264 goto collect_garbage;
265 }
266#endif
267 gc.push_back(pModule);
268
269 // configure function
270 pFunc = PyObject_GetAttrString(pModule, FunctionName.data());
271#ifndef TTK_ENABLE_KAMIKAZE
272 if(!pFunc) {
273 this->printErr("Python: functionName parsing failed.");
274 goto collect_garbage;
275 }
276
277 if(!PyCallable_Check(pFunc)) {
278 this->printErr("Python: function call failed.");
279 goto collect_garbage;
280 }
281#endif
282
283 pSEParams = PyList_New(0);
284 PyList_Append(pSEParams, PyUnicode_FromString(se_Affinity.data()));
285 PyList_Append(pSEParams, PyFloat_FromDouble(se_Gamma));
286 PyList_Append(pSEParams, PyUnicode_FromString(se_EigenSolver.data()));
287
288 pLLEParams = PyList_New(0);
289 PyList_Append(pLLEParams, PyFloat_FromDouble(lle_Regularization));
290 PyList_Append(pLLEParams, PyUnicode_FromString(lle_EigenSolver.data()));
291 PyList_Append(pLLEParams, PyFloat_FromDouble(lle_Tolerance));
292 PyList_Append(pLLEParams, PyLong_FromLong(lle_MaxIteration));
293 PyList_Append(pLLEParams, PyUnicode_FromString(lle_Method.data()));
294 PyList_Append(pLLEParams, PyFloat_FromDouble(lle_HessianTolerance));
295 PyList_Append(pLLEParams, PyFloat_FromDouble(lle_ModifiedTolerance));
296 PyList_Append(
297 pLLEParams, PyUnicode_FromString(lle_NeighborsAlgorithm.data()));
298
299 pMDSParams = PyList_New(0);
300 PyList_Append(pMDSParams, PyBool_FromLong(mds_Metric));
301 PyList_Append(pMDSParams, PyLong_FromLong(mds_Init));
302 PyList_Append(pMDSParams, PyLong_FromLong(mds_MaxIteration));
303 PyList_Append(pMDSParams, PyLong_FromLong(mds_Verbose));
304 PyList_Append(pMDSParams, PyFloat_FromDouble(mds_Epsilon));
305 PyList_Append(pMDSParams, PyUnicode_FromString(mds_Dissimilarity.data()));
306
307 pTSNEParams = PyList_New(0);
308 PyList_Append(pTSNEParams, PyFloat_FromDouble(tsne_Perplexity));
309 PyList_Append(pTSNEParams, PyFloat_FromDouble(tsne_Exaggeration));
310 PyList_Append(pTSNEParams, PyFloat_FromDouble(tsne_LearningRate));
311 PyList_Append(pTSNEParams, PyLong_FromLong(tsne_MaxIteration));
312 PyList_Append(pTSNEParams, PyLong_FromLong(tsne_MaxIterationProgress));
313 PyList_Append(pTSNEParams, PyFloat_FromDouble(tsne_GradientThreshold));
314 PyList_Append(pTSNEParams, PyUnicode_FromString(tsne_Metric.data()));
315 PyList_Append(pTSNEParams, PyUnicode_FromString(tsne_Init.data()));
316 PyList_Append(pTSNEParams, PyLong_FromLong(tsne_Verbose));
317 PyList_Append(pTSNEParams, PyUnicode_FromString(tsne_Method.data()));
318 PyList_Append(pTSNEParams, PyFloat_FromDouble(tsne_Angle));
319
320 pISOParams = PyList_New(0);
321 PyList_Append(pISOParams, PyUnicode_FromString(iso_EigenSolver.data()));
322 PyList_Append(pISOParams, PyFloat_FromDouble(iso_Tolerance));
323 PyList_Append(pISOParams, PyLong_FromLong(iso_MaxIteration));
324 PyList_Append(pISOParams, PyUnicode_FromString(iso_PathMethod.data()));
325 PyList_Append(
326 pISOParams, PyUnicode_FromString(iso_NeighborsAlgorithm.data()));
327 PyList_Append(pISOParams, PyUnicode_FromString(iso_Metric.data()));
328
329 pPCAParams = PyList_New(0);
330 PyList_Append(pPCAParams, PyBool_FromLong(pca_Copy));
331 PyList_Append(pPCAParams, PyBool_FromLong(pca_Whiten));
332 PyList_Append(pPCAParams, PyUnicode_FromString(pca_SVDSolver.data()));
333 PyList_Append(pPCAParams, PyFloat_FromDouble(pca_Tolerance));
334 PyList_Append(pPCAParams, PyUnicode_FromString(pca_MaxIteration.data()));
335
336 pParams = PyList_New(0);
337 gc.push_back(pParams);
338
339 PyList_Append(pParams, pSEParams);
340 PyList_Append(pParams, pLLEParams);
341 PyList_Append(pParams, pMDSParams);
342 PyList_Append(pParams, pTSNEParams);
343 PyList_Append(pParams, pISOParams);
344 PyList_Append(pParams, pPCAParams);
345
346 pReturn = PyObject_CallFunctionObjArgs(
347 pFunc, npArr, pMethod, pNumberOfComponents, pNumberOfNeighbors, pJobs,
348 pIsDeterministic, pParams, NULL);
349#ifndef TTK_ENABLE_KAMIKAZE
350 if(!pReturn) {
351 this->printErr("Python: function returned invalid object.");
352 goto collect_garbage;
353 }
354#endif
355 gc.push_back(pReturn);
356
357 pNRows = PyList_GetItem(pReturn, 0);
358#ifndef TTK_ENABLE_KAMIKAZE
359 if(!pNRows) {
360 this->printErr("Python: function returned invalid number of rows");
361 goto collect_garbage;
362 }
363#endif
364
365 pNColumns = PyList_GetItem(pReturn, 1);
366#ifndef TTK_ENABLE_KAMIKAZE
367 if(!pNColumns) {
368 this->printErr("Python: function returned invalid number of columns.");
369 goto collect_garbage;
370 }
371#endif
372
373 pEmbedding = PyList_GetItem(pReturn, 2);
374#ifndef TTK_ENABLE_KAMIKAZE
375 if(!pEmbedding) {
376 this->printErr("Python: function returned invalid embedding data.");
377 goto collect_garbage;
378 }
379#endif
380
381 if(PyLong_AsLong(pNRows) == nRows
382 and PyLong_AsLong(pNColumns) == numberOfComponents) {
383 npEmbedding = reinterpret_cast<PyArrayObject *>(pEmbedding);
384
385 outputEmbedding.resize(numberOfComponents);
386 for(int i = 0; i < numberOfComponents; ++i) {
387 outputEmbedding[i].resize(nRows);
388 if(PyArray_TYPE(npEmbedding) == NPY_FLOAT) {
389 float *c_out = reinterpret_cast<float *>(PyArray_DATA(npEmbedding));
390 for(int j = 0; j < nRows; ++j)
391 outputEmbedding[i][j] = c_out[i * nRows + j];
392 } else if(PyArray_TYPE(npEmbedding) == NPY_DOUBLE) {
393 double *c_out = reinterpret_cast<double *>(PyArray_DATA(npEmbedding));
394 for(int j = 0; j < nRows; ++j)
395 outputEmbedding[i][j] = c_out[i * nRows + j];
396 }
397 }
398 }
399
400 // normal control-flow
401 for(auto i : gc)
402 Py_DECREF(i);
403
404 this->printMsg("Computed " + methodToString[static_cast<int>(this->Method)],
405 1.0, t.getElapsedTime(), this->threadNumber_);
406
407 return 0;
408
409 // error control-flow
410#ifndef TTK_ENABLE_KAMIKAZE
411collect_garbage:
412#endif
413 for(auto i : gc)
414 Py_DECREF(i);
415 return -6;
416
417#endif
418
419 return 0;
420}
#define TTK_FORCE_USE(x)
Force the compiler to use the function/method parameter.
Definition BaseClass.h:57
#define VALUE(x)
virtual int setThreadNumber(const int threadNumber)
Definition BaseClass.h:80
int debugLevel_
Definition Debug.h:379
int printWrn(const std::string &msg, const debug::LineMode &lineMode=debug::LineMode::NEW, std::ostream &stream=std::cerr) const
Definition Debug.h:159
void setDebugMsgPrefix(const std::string &prefix)
Definition Debug.h:364
virtual int setDebugLevel(const int &debugLevel)
Definition Debug.cpp:147
int printErr(const std::string &msg, const debug::LineMode &lineMode=debug::LineMode::NEW, std::ostream &stream=std::cerr) const
Definition Debug.h:149
TopologicalDimensionReduction::REGUL ae_Method
TopologicalDimensionReduction::OPTIMIZER ae_Optimizer
TopoMap::STRATEGY topomap_Strategy
TopologicalDimensionReduction::MODEL ae_Model
void setInputMethod(METHOD method)
int execute(std::vector< std::vector< double > > &outputEmbedding, const std::vector< double > &inputMatrix, const int nRows, const int nColumns, int *insertionTimeForTopoMap=nullptr) const
double getElapsedTime()
Definition Timer.h:15
int execute(T *outputCoords, int *insertionTime, const std::vector< T > &inputMatrix, bool isDistMat, size_t n)
Computes the TopoMap projection.
Definition TopoMap.h:183
TTK base class that embeds points into 2D, under topological constraints.
TTK base package defining the standard types.
printMsg(debug::output::BOLD+" | | | | | . \\ | | (__| | / __/| |_| / __/| (_) |"+debug::output::ENDCOLOR, debug::Priority::PERFORMANCE, debug::LineMode::NEW, stream)