TTK
Loading...
Searching...
No Matches
DimensionReduction.cpp
Go to the documentation of this file.
2#include <TopoMap.h>
3
4#include <map>
5
6#define VALUE_TO_STRING(x) #x
7#define VALUE(x) VALUE_TO_STRING(x)
8
9#ifdef TTK_ENABLE_SCIKIT_LEARN
10#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
11#include <Python.h>
12#include <numpy/arrayobject.h>
13#endif
14
15using namespace std;
16using namespace ttk;
17
19 this->setDebugMsgPrefix("DimensionReduction");
20
21 // default backend
23
24#ifdef TTK_ENABLE_SCIKIT_LEARN
25 auto finalize_callback = []() { Py_Finalize(); };
26
27 if(!Py_IsInitialized()) {
28 Py_Initialize();
29 atexit(finalize_callback);
30 }
31
32 const char *version = Py_GetVersion();
33 if(version[0] >= '3') {
34 this->printMsg("Initializing Python " + std::to_string(version[0])
35 + std::to_string(version[1]) + std::to_string(version[2]));
36 } else {
37 this->printErr("Python 3 + is required :" + std::string{version}
38 + " is provided.");
39 }
40
41 majorVersion_ = version[0];
42#endif
43}
44
46 std::vector<std::vector<double>> &outputEmbedding,
47 const std::vector<double> &inputMatrix,
48 const int nRows,
49 const int nColumns,
50 int *insertionTimeForTopomap) const {
51
52#ifndef TTK_ENABLE_SCIKIT_LEARN
53 TTK_FORCE_USE(nColumns);
54#endif
55
56 Timer t;
57
58 if(this->Method == METHOD::TOPOMAP) {
59 TopoMap topomap(
61 topomap.setDebugLevel(this->debugLevel_);
62 topomap.setThreadNumber(this->threadNumber_);
63
64 std::vector<double> coordsTopomap(2 * nRows);
65 topomap.execute<double>(coordsTopomap.data(), insertionTimeForTopomap,
66 inputMatrix, IsInputADistanceMatrix, nRows);
67 outputEmbedding.resize(2);
68 outputEmbedding[0].resize(nRows);
69 outputEmbedding[1].resize(nRows);
70 for(int i = 0; i < nRows; i++) {
71 outputEmbedding[0][i] = coordsTopomap[2 * i];
72 outputEmbedding[1][i] = coordsTopomap[2 * i + 1];
73 }
74
75 this->printMsg(
76 "Computed TopoMap", 1.0, t.getElapsedTime(), this->threadNumber_);
77 return 0;
78 }
79
80#ifdef TTK_ENABLE_SCIKIT_LEARN
81#ifndef TTK_ENABLE_KAMIKAZE
82 if(majorVersion_ < '3')
83 return -1;
84 if(ModulePath.empty())
85 return -2;
86 if(ModuleName.empty())
87 return -3;
88 if(FunctionName.empty())
89 return -4;
90#endif
91
92 const int numberOfComponents = std::max(2, this->NumberOfComponents);
93
94 const int numberOfNeighbors = std::max(1, this->NumberOfNeighbors);
95
96 // declared here to avoid crossing initialization with goto
97 vector<PyObject *> gc;
98 PyObject *pArray;
99 PyObject *pPath;
100 PyObject *pSys;
101 PyObject *pName;
102 PyObject *pModule;
103 PyObject *pFunc;
104 PyObject *pMethod;
105 PyObject *pNumberOfComponents;
106 PyObject *pNumberOfNeighbors;
107 PyObject *pJobs;
108 PyObject *pIsDeterministic;
109 PyObject *pReturn;
110 PyObject *pNRows;
111 PyObject *pNColumns;
112 PyObject *pEmbedding;
113 PyObject *pSEParams;
114 PyObject *pLLEParams;
115 PyObject *pMDSParams;
116 PyObject *pTSNEParams;
117 PyObject *pISOParams;
118 PyObject *pPCAParams;
119 PyObject *pParams;
120 PyArrayObject *npArr;
121 PyArrayObject *npEmbedding;
122
123 string modulePath;
124
125 if(PyArray_API == nullptr) {
126#ifndef __clang_analyzer__
127 import_array1(-1);
128#endif // __clang_analyzer__
129 }
130 if(PyArray_API == nullptr) {
131 return -5;
132 }
133
134 // convert the input matrix into a NumPy array.
135 const int numberOfDimensions = 2;
136 npy_intp dimensions[2]{nRows, nColumns};
137
138 std::vector<std::string> methodToString{
139 "SE", "LLE", "MDS", "t-SNE", "IsoMap", "PCA"};
140
141 pArray = PyArray_SimpleNewFromData(numberOfDimensions, dimensions, NPY_DOUBLE,
142 const_cast<double *>(inputMatrix.data()));
143#ifndef TTK_ENABLE_KAMIKAZE
144 if(!pArray) {
145 this->printErr("Python: failed to convert the array.");
146 goto collect_garbage;
147 }
148#endif
149 gc.push_back(pArray);
150
151 npArr = reinterpret_cast<PyArrayObject *>(pArray);
152
153 pSys = PyImport_ImportModule("sys");
154#ifndef TTK_ENABLE_KAMIKAZE
155 if(!pSys) {
156 this->printErr("Python: failed to load the sys module.");
157 goto collect_garbage;
158 }
159#endif
160 gc.push_back(pSys);
161
162 pPath = PyObject_GetAttrString(pSys, "path");
163#ifndef TTK_ENABLE_KAMIKAZE
164 if(!pPath) {
165 this->printErr("Python: failed to get the path variable.");
166 goto collect_garbage;
167 }
168#endif
169 gc.push_back(pPath);
170
171 if(ModulePath == "default")
172 modulePath = VALUE(TTK_SCRIPTS_PATH);
173 else
174 modulePath = ModulePath;
175
176 this->printMsg("Loading Python script from: " + modulePath);
177 PyList_Append(pPath, PyUnicode_FromString(modulePath.data()));
178
179 // set other parameters
180 pNumberOfComponents = PyLong_FromLong(numberOfComponents);
181#ifndef TTK_ENABLE_KAMIKAZE
182 if(!pNumberOfComponents) {
183 this->printErr("Python: cannot convert pNumberOfComponents.");
184 goto collect_garbage;
185 }
186#endif
187 gc.push_back(pNumberOfComponents);
188
189 pNumberOfNeighbors = PyLong_FromLong(numberOfNeighbors);
190#ifndef TTK_ENABLE_KAMIKAZE
191 if(!pNumberOfNeighbors) {
192 this->printErr("Python: cannot convert pNumberOfNeighbors.");
193 goto collect_garbage;
194 }
195#endif
196 gc.push_back(pNumberOfNeighbors);
197
198 pMethod = PyLong_FromLong(static_cast<long>(this->Method));
199#ifndef TTK_ENABLE_KAMIKAZE
200 if(!pMethod) {
201 this->printErr("Python: cannot convert pMethod.");
202 goto collect_garbage;
203 }
204#endif
205 gc.push_back(pMethod);
206
207 if(threadNumber_ > 1 && this->Method == METHOD::MDS) { // MDS
208 this->printWrn(
209 "MDS is known to be instable when used with multiple threads");
210 }
211 pJobs = PyLong_FromLong(threadNumber_);
212#ifndef TTK_ENABLE_KAMIKAZE
213 if(!pJobs) {
214 this->printErr("Python: cannot convert pJobs.");
215 goto collect_garbage;
216 }
217#endif
218
219 pIsDeterministic = PyLong_FromLong(static_cast<long>(this->IsDeterministic));
220#ifndef TTK_ENABLE_KAMIKAZE
221 if(!pIsDeterministic) {
222 this->printErr("Python: cannot convert pIsDeterministic.");
223 goto collect_garbage;
224 }
225#endif
226
227 // load module
228 pName = PyUnicode_FromString(ModuleName.data());
229#ifndef TTK_ENABLE_KAMIKAZE
230 if(!pName) {
231 this->printErr("Python: moduleName parsing failed.");
232 goto collect_garbage;
233 }
234#endif
235 gc.push_back(pName);
236
237 pModule = PyImport_Import(pName);
238#ifndef TTK_ENABLE_KAMIKAZE
239 if(!pModule) {
240 this->printErr("Python: module import failed.");
241 goto collect_garbage;
242 }
243#endif
244 gc.push_back(pModule);
245
246 // configure function
247 pFunc = PyObject_GetAttrString(pModule, FunctionName.data());
248#ifndef TTK_ENABLE_KAMIKAZE
249 if(!pFunc) {
250 this->printErr("Python: functionName parsing failed.");
251 goto collect_garbage;
252 }
253
254 if(!PyCallable_Check(pFunc)) {
255 this->printErr("Python: function call failed.");
256 goto collect_garbage;
257 }
258#endif
259
260 pSEParams = PyList_New(0);
261 PyList_Append(pSEParams, PyUnicode_FromString(se_Affinity.data()));
262 PyList_Append(pSEParams, PyFloat_FromDouble(se_Gamma));
263 PyList_Append(pSEParams, PyUnicode_FromString(se_EigenSolver.data()));
264
265 pLLEParams = PyList_New(0);
266 PyList_Append(pLLEParams, PyFloat_FromDouble(lle_Regularization));
267 PyList_Append(pLLEParams, PyUnicode_FromString(lle_EigenSolver.data()));
268 PyList_Append(pLLEParams, PyFloat_FromDouble(lle_Tolerance));
269 PyList_Append(pLLEParams, PyLong_FromLong(lle_MaxIteration));
270 PyList_Append(pLLEParams, PyUnicode_FromString(lle_Method.data()));
271 PyList_Append(pLLEParams, PyFloat_FromDouble(lle_HessianTolerance));
272 PyList_Append(pLLEParams, PyFloat_FromDouble(lle_ModifiedTolerance));
273 PyList_Append(
274 pLLEParams, PyUnicode_FromString(lle_NeighborsAlgorithm.data()));
275
276 pMDSParams = PyList_New(0);
277 PyList_Append(pMDSParams, PyBool_FromLong(mds_Metric));
278 PyList_Append(pMDSParams, PyLong_FromLong(mds_Init));
279 PyList_Append(pMDSParams, PyLong_FromLong(mds_MaxIteration));
280 PyList_Append(pMDSParams, PyLong_FromLong(mds_Verbose));
281 PyList_Append(pMDSParams, PyFloat_FromDouble(mds_Epsilon));
282 PyList_Append(pMDSParams, PyUnicode_FromString(mds_Dissimilarity.data()));
283
284 pTSNEParams = PyList_New(0);
285 PyList_Append(pTSNEParams, PyFloat_FromDouble(tsne_Perplexity));
286 PyList_Append(pTSNEParams, PyFloat_FromDouble(tsne_Exaggeration));
287 PyList_Append(pTSNEParams, PyFloat_FromDouble(tsne_LearningRate));
288 PyList_Append(pTSNEParams, PyLong_FromLong(tsne_MaxIteration));
289 PyList_Append(pTSNEParams, PyLong_FromLong(tsne_MaxIterationProgress));
290 PyList_Append(pTSNEParams, PyFloat_FromDouble(tsne_GradientThreshold));
291 PyList_Append(pTSNEParams, PyUnicode_FromString(tsne_Metric.data()));
292 PyList_Append(pTSNEParams, PyUnicode_FromString(tsne_Init.data()));
293 PyList_Append(pTSNEParams, PyLong_FromLong(tsne_Verbose));
294 PyList_Append(pTSNEParams, PyUnicode_FromString(tsne_Method.data()));
295 PyList_Append(pTSNEParams, PyFloat_FromDouble(tsne_Angle));
296
297 pISOParams = PyList_New(0);
298 PyList_Append(pISOParams, PyUnicode_FromString(iso_EigenSolver.data()));
299 PyList_Append(pISOParams, PyFloat_FromDouble(iso_Tolerance));
300 PyList_Append(pISOParams, PyLong_FromLong(iso_MaxIteration));
301 PyList_Append(pISOParams, PyUnicode_FromString(iso_PathMethod.data()));
302 PyList_Append(
303 pISOParams, PyUnicode_FromString(iso_NeighborsAlgorithm.data()));
304 PyList_Append(pISOParams, PyUnicode_FromString(iso_Metric.data()));
305
306 pPCAParams = PyList_New(0);
307 PyList_Append(pPCAParams, PyBool_FromLong(pca_Copy));
308 PyList_Append(pPCAParams, PyBool_FromLong(pca_Whiten));
309 PyList_Append(pPCAParams, PyUnicode_FromString(pca_SVDSolver.data()));
310 PyList_Append(pPCAParams, PyFloat_FromDouble(pca_Tolerance));
311 PyList_Append(pPCAParams, PyUnicode_FromString(pca_MaxIteration.data()));
312
313 pParams = PyList_New(0);
314 gc.push_back(pParams);
315
316 PyList_Append(pParams, pSEParams);
317 PyList_Append(pParams, pLLEParams);
318 PyList_Append(pParams, pMDSParams);
319 PyList_Append(pParams, pTSNEParams);
320 PyList_Append(pParams, pISOParams);
321 PyList_Append(pParams, pPCAParams);
322
323 pReturn = PyObject_CallFunctionObjArgs(
324 pFunc, npArr, pMethod, pNumberOfComponents, pNumberOfNeighbors, pJobs,
325 pIsDeterministic, pParams, NULL);
326#ifndef TTK_ENABLE_KAMIKAZE
327 if(!pReturn) {
328 this->printErr("Python: function returned invalid object.");
329 goto collect_garbage;
330 }
331#endif
332 gc.push_back(pReturn);
333
334 pNRows = PyList_GetItem(pReturn, 0);
335#ifndef TTK_ENABLE_KAMIKAZE
336 if(!pNRows) {
337 this->printErr("Python: function returned invalid number of rows");
338 goto collect_garbage;
339 }
340#endif
341
342 pNColumns = PyList_GetItem(pReturn, 1);
343#ifndef TTK_ENABLE_KAMIKAZE
344 if(!pNColumns) {
345 this->printErr("Python: function returned invalid number of columns.");
346 goto collect_garbage;
347 }
348#endif
349
350 pEmbedding = PyList_GetItem(pReturn, 2);
351#ifndef TTK_ENABLE_KAMIKAZE
352 if(!pEmbedding) {
353 this->printErr("Python: function returned invalid embedding data.");
354 goto collect_garbage;
355 }
356#endif
357
358 if(PyLong_AsLong(pNRows) == nRows
359 and PyLong_AsLong(pNColumns) == numberOfComponents) {
360 npEmbedding = reinterpret_cast<PyArrayObject *>(pEmbedding);
361
362 outputEmbedding.resize(numberOfComponents);
363 for(int i = 0; i < numberOfComponents; ++i) {
364 outputEmbedding[i].resize(nRows);
365 if(PyArray_TYPE(npEmbedding) == NPY_FLOAT) {
366 float *c_out = reinterpret_cast<float *>(PyArray_DATA(npEmbedding));
367 for(int j = 0; j < nRows; ++j)
368 outputEmbedding[i][j] = c_out[i * nRows + j];
369 } else if(PyArray_TYPE(npEmbedding) == NPY_DOUBLE) {
370 double *c_out = reinterpret_cast<double *>(PyArray_DATA(npEmbedding));
371 for(int j = 0; j < nRows; ++j)
372 outputEmbedding[i][j] = c_out[i * nRows + j];
373 }
374 }
375 }
376
377 // normal control-flow
378 for(auto i : gc)
379 Py_DECREF(i);
380
381 this->printMsg("Computed " + methodToString[static_cast<int>(this->Method)],
382 1.0, t.getElapsedTime(), this->threadNumber_);
383
384 return 0;
385
386 // error control-flow
387#ifndef TTK_ENABLE_KAMIKAZE
388collect_garbage:
389#endif
390 for(auto i : gc)
391 Py_DECREF(i);
392 return -6;
393
394#endif
395
396 return 0;
397}
#define TTK_FORCE_USE(x)
Force the compiler to use the function/method parameter.
Definition BaseClass.h:57
#define VALUE(x)
virtual int setThreadNumber(const int threadNumber)
Definition BaseClass.h:80
int debugLevel_
Definition Debug.h:379
int printWrn(const std::string &msg, const debug::LineMode &lineMode=debug::LineMode::NEW, std::ostream &stream=std::cerr) const
Definition Debug.h:159
void setDebugMsgPrefix(const std::string &prefix)
Definition Debug.h:364
virtual int setDebugLevel(const int &debugLevel)
Definition Debug.cpp:147
int printErr(const std::string &msg, const debug::LineMode &lineMode=debug::LineMode::NEW, std::ostream &stream=std::cerr) const
Definition Debug.h:149
TopoMap::STRATEGY topomap_Strategy
void setInputMethod(METHOD method)
int execute(std::vector< std::vector< double > > &outputEmbedding, const std::vector< double > &inputMatrix, const int nRows, const int nColumns, int *insertionTimeForTopoMap=nullptr) const
double getElapsedTime()
Definition Timer.h:15
int execute(T *outputCoords, int *insertionTime, const std::vector< T > &inputMatrix, bool isDistMat, size_t n)
Computes the TopoMap projection.
Definition TopoMap.h:183
std::string to_string(__int128)
Definition ripserpy.cpp:99
The Topology ToolKit.
printMsg(debug::output::BOLD+" | | | | | . \\ | | (__| | / __/| |_| / __/|__ _|"+debug::output::ENDCOLOR, debug::Priority::PERFORMANCE, debug::LineMode::NEW, stream)