TTK
Loading...
Searching...
No Matches
DimensionReduction.cpp
Go to the documentation of this file.
2
3#include <map>
4
5#define VALUE_TO_STRING(x) #x
6#define VALUE(x) VALUE_TO_STRING(x)
7
8#ifdef TTK_ENABLE_SCIKIT_LEARN
9#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
10#include <Python.h>
11#include <numpy/arrayobject.h>
12#endif
13
14using namespace std;
15using namespace ttk;
16
18 this->setDebugMsgPrefix("DimensionReduction");
19
20#ifdef TTK_ENABLE_SCIKIT_LEARN
21 auto finalize_callback = []() { Py_Finalize(); };
22
23 if(!Py_IsInitialized()) {
24 Py_Initialize();
25 atexit(finalize_callback);
26 }
27
28 const char *version = Py_GetVersion();
29 if(version[0] >= '3') {
30 this->printMsg("Initializing Python " + std::to_string(version[0])
31 + std::to_string(version[1]) + std::to_string(version[2]));
32 } else {
33 this->printErr("Python 3 + is required :" + std::string{version}
34 + " is provided.");
35 }
36
37 majorVersion_ = version[0];
38#endif
39}
40
42#ifdef TTK_ENABLE_SCIKIT_LEARN
43 return true;
44#else
45 this->printErr("Warning: scikit-learn support disabled: Python/Numpy may "
46 "not be installed properly");
47 this->printErr("Module features disabled.");
48 return false;
49#endif
50}
51
53 std::vector<std::vector<double>> &outputEmbedding,
54 const std::vector<double> &inputMatrix,
55 const int nRows,
56 const int nColumns) const {
57
58#ifdef TTK_ENABLE_SCIKIT_LEARN
59#ifndef TTK_ENABLE_KAMIKAZE
60 if(majorVersion_ < '3')
61 return -1;
62 if(ModulePath.empty())
63 return -2;
64 if(ModuleName.empty())
65 return -3;
66 if(FunctionName.empty())
67 return -4;
68#endif
69
70 Timer t;
71
72 const int numberOfComponents = std::max(2, this->NumberOfComponents);
73 const int numberOfNeighbors = std::max(1, this->NumberOfNeighbors);
74
75 // declared here to avoid crossing initialization with goto
76 vector<PyObject *> gc;
77 PyObject *pArray;
78 PyObject *pPath;
79 PyObject *pSys;
80 PyObject *pName;
81 PyObject *pModule;
82 PyObject *pFunc;
83 PyObject *pMethod;
84 PyObject *pNumberOfComponents;
85 PyObject *pNumberOfNeighbors;
86 PyObject *pJobs;
87 PyObject *pIsDeterministic;
88 PyObject *pReturn;
89 PyObject *pNRows;
90 PyObject *pNColumns;
91 PyObject *pEmbedding;
92 PyObject *pSEParams;
93 PyObject *pLLEParams;
94 PyObject *pMDSParams;
95 PyObject *pTSNEParams;
96 PyObject *pISOParams;
97 PyObject *pPCAParams;
98 PyObject *pParams;
99 PyArrayObject *npArr;
100 PyArrayObject *npEmbedding;
101
102 string modulePath;
103
104 if(PyArray_API == nullptr) {
105#ifndef __clang_analyzer__
106 import_array1(-1);
107#endif // __clang_analyzer__
108 }
109 if(PyArray_API == nullptr) {
110 return -5;
111 }
112
113 // convert the input matrix into a NumPy array.
114 const int numberOfDimensions = 2;
115 npy_intp dimensions[2]{nRows, nColumns};
116
117 std::vector<std::string> methodToString{
118 "SE", "LLE", "MDS", "t-SNE", "IsoMap", "PCA"};
119
120 pArray = PyArray_SimpleNewFromData(numberOfDimensions, dimensions, NPY_DOUBLE,
121 const_cast<double *>(inputMatrix.data()));
122#ifndef TTK_ENABLE_KAMIKAZE
123 if(!pArray) {
124 this->printErr("Python: failed to convert the array.");
125 goto collect_garbage;
126 }
127#endif
128 gc.push_back(pArray);
129
130 npArr = reinterpret_cast<PyArrayObject *>(pArray);
131
132 pSys = PyImport_ImportModule("sys");
133#ifndef TTK_ENABLE_KAMIKAZE
134 if(!pSys) {
135 this->printErr("Python: failed to load the sys module.");
136 goto collect_garbage;
137 }
138#endif
139 gc.push_back(pSys);
140
141 pPath = PyObject_GetAttrString(pSys, "path");
142#ifndef TTK_ENABLE_KAMIKAZE
143 if(!pPath) {
144 this->printErr("Python: failed to get the path variable.");
145 goto collect_garbage;
146 }
147#endif
148 gc.push_back(pPath);
149
150 if(ModulePath == "default")
151 modulePath = VALUE(TTK_SCRIPTS_PATH);
152 else
153 modulePath = ModulePath;
154
155 this->printMsg("Loading Python script from: " + modulePath);
156 PyList_Append(pPath, PyUnicode_FromString(modulePath.data()));
157
158 // set other parameters
159 pNumberOfComponents = PyLong_FromLong(numberOfComponents);
160#ifndef TTK_ENABLE_KAMIKAZE
161 if(!pNumberOfComponents) {
162 this->printErr("Python: cannot convert pNumberOfComponents.");
163 goto collect_garbage;
164 }
165#endif
166 gc.push_back(pNumberOfComponents);
167
168 pNumberOfNeighbors = PyLong_FromLong(numberOfNeighbors);
169#ifndef TTK_ENABLE_KAMIKAZE
170 if(!pNumberOfNeighbors) {
171 this->printErr("Python: cannot convert pNumberOfNeighbors.");
172 goto collect_garbage;
173 }
174#endif
175 gc.push_back(pNumberOfNeighbors);
176
177 pMethod = PyLong_FromLong(static_cast<long>(this->Method));
178#ifndef TTK_ENABLE_KAMIKAZE
179 if(!pMethod) {
180 this->printErr("Python: cannot convert pMethod.");
181 goto collect_garbage;
182 }
183#endif
184 gc.push_back(pMethod);
185
186 if(threadNumber_ > 1 && this->Method == METHOD::MDS) { // MDS
187 this->printWrn(
188 "MDS is known to be instable when used with multiple threads");
189 }
190 pJobs = PyLong_FromLong(threadNumber_);
191#ifndef TTK_ENABLE_KAMIKAZE
192 if(!pJobs) {
193 this->printErr("Python: cannot convert pJobs.");
194 goto collect_garbage;
195 }
196#endif
197
198 pIsDeterministic = PyLong_FromLong(static_cast<long>(this->IsDeterministic));
199#ifndef TTK_ENABLE_KAMIKAZE
200 if(!pIsDeterministic) {
201 this->printErr("Python: cannot convert pIsDeterministic.");
202 goto collect_garbage;
203 }
204#endif
205
206 // load module
207 pName = PyUnicode_FromString(ModuleName.data());
208#ifndef TTK_ENABLE_KAMIKAZE
209 if(!pName) {
210 this->printErr("Python: moduleName parsing failed.");
211 goto collect_garbage;
212 }
213#endif
214 gc.push_back(pName);
215
216 pModule = PyImport_Import(pName);
217#ifndef TTK_ENABLE_KAMIKAZE
218 if(!pModule) {
219 this->printErr("Python: module import failed.");
220 goto collect_garbage;
221 }
222#endif
223 gc.push_back(pModule);
224
225 // configure function
226 pFunc = PyObject_GetAttrString(pModule, FunctionName.data());
227#ifndef TTK_ENABLE_KAMIKAZE
228 if(!pFunc) {
229 this->printErr("Python: functionName parsing failed.");
230 goto collect_garbage;
231 }
232
233 if(!PyCallable_Check(pFunc)) {
234 this->printErr("Python: function call failed.");
235 goto collect_garbage;
236 }
237#endif
238
239 pSEParams = PyList_New(0);
240 PyList_Append(pSEParams, PyUnicode_FromString(se_Affinity.data()));
241 PyList_Append(pSEParams, PyFloat_FromDouble(se_Gamma));
242 PyList_Append(pSEParams, PyUnicode_FromString(se_EigenSolver.data()));
243
244 pLLEParams = PyList_New(0);
245 PyList_Append(pLLEParams, PyFloat_FromDouble(lle_Regularization));
246 PyList_Append(pLLEParams, PyUnicode_FromString(lle_EigenSolver.data()));
247 PyList_Append(pLLEParams, PyFloat_FromDouble(lle_Tolerance));
248 PyList_Append(pLLEParams, PyLong_FromLong(lle_MaxIteration));
249 PyList_Append(pLLEParams, PyUnicode_FromString(lle_Method.data()));
250 PyList_Append(pLLEParams, PyFloat_FromDouble(lle_HessianTolerance));
251 PyList_Append(pLLEParams, PyFloat_FromDouble(lle_ModifiedTolerance));
252 PyList_Append(
253 pLLEParams, PyUnicode_FromString(lle_NeighborsAlgorithm.data()));
254
255 pMDSParams = PyList_New(0);
256 PyList_Append(pMDSParams, PyBool_FromLong(mds_Metric));
257 PyList_Append(pMDSParams, PyLong_FromLong(mds_Init));
258 PyList_Append(pMDSParams, PyLong_FromLong(mds_MaxIteration));
259 PyList_Append(pMDSParams, PyLong_FromLong(mds_Verbose));
260 PyList_Append(pMDSParams, PyFloat_FromDouble(mds_Epsilon));
261 PyList_Append(pMDSParams, PyUnicode_FromString(mds_Dissimilarity.data()));
262
263 pTSNEParams = PyList_New(0);
264 PyList_Append(pTSNEParams, PyFloat_FromDouble(tsne_Perplexity));
265 PyList_Append(pTSNEParams, PyFloat_FromDouble(tsne_Exaggeration));
266 PyList_Append(pTSNEParams, PyFloat_FromDouble(tsne_LearningRate));
267 PyList_Append(pTSNEParams, PyLong_FromLong(tsne_MaxIteration));
268 PyList_Append(pTSNEParams, PyLong_FromLong(tsne_MaxIterationProgress));
269 PyList_Append(pTSNEParams, PyFloat_FromDouble(tsne_GradientThreshold));
270 PyList_Append(pTSNEParams, PyUnicode_FromString(tsne_Metric.data()));
271 PyList_Append(pTSNEParams, PyUnicode_FromString(tsne_Init.data()));
272 PyList_Append(pTSNEParams, PyLong_FromLong(tsne_Verbose));
273 PyList_Append(pTSNEParams, PyUnicode_FromString(tsne_Method.data()));
274 PyList_Append(pTSNEParams, PyFloat_FromDouble(tsne_Angle));
275
276 pISOParams = PyList_New(0);
277 PyList_Append(pISOParams, PyUnicode_FromString(iso_EigenSolver.data()));
278 PyList_Append(pISOParams, PyFloat_FromDouble(iso_Tolerance));
279 PyList_Append(pISOParams, PyLong_FromLong(iso_MaxIteration));
280 PyList_Append(pISOParams, PyUnicode_FromString(iso_PathMethod.data()));
281 PyList_Append(
282 pISOParams, PyUnicode_FromString(iso_NeighborsAlgorithm.data()));
283 PyList_Append(pISOParams, PyUnicode_FromString(iso_Metric.data()));
284
285 pPCAParams = PyList_New(0);
286 PyList_Append(pPCAParams, PyBool_FromLong(pca_Copy));
287 PyList_Append(pPCAParams, PyBool_FromLong(pca_Whiten));
288 PyList_Append(pPCAParams, PyUnicode_FromString(pca_SVDSolver.data()));
289 PyList_Append(pPCAParams, PyFloat_FromDouble(pca_Tolerance));
290 PyList_Append(pPCAParams, PyUnicode_FromString(pca_MaxIteration.data()));
291
292 pParams = PyList_New(0);
293 gc.push_back(pParams);
294
295 PyList_Append(pParams, pSEParams);
296 PyList_Append(pParams, pLLEParams);
297 PyList_Append(pParams, pMDSParams);
298 PyList_Append(pParams, pTSNEParams);
299 PyList_Append(pParams, pISOParams);
300 PyList_Append(pParams, pPCAParams);
301
302 pReturn = PyObject_CallFunctionObjArgs(
303 pFunc, npArr, pMethod, pNumberOfComponents, pNumberOfNeighbors, pJobs,
304 pIsDeterministic, pParams, NULL);
305#ifndef TTK_ENABLE_KAMIKAZE
306 if(!pReturn) {
307 this->printErr("Python: function returned invalid object.");
308 goto collect_garbage;
309 }
310#endif
311 gc.push_back(pReturn);
312
313 pNRows = PyList_GetItem(pReturn, 0);
314#ifndef TTK_ENABLE_KAMIKAZE
315 if(!pNRows) {
316 this->printErr("Python: function returned invalid number of rows");
317 goto collect_garbage;
318 }
319#endif
320
321 pNColumns = PyList_GetItem(pReturn, 1);
322#ifndef TTK_ENABLE_KAMIKAZE
323 if(!pNColumns) {
324 this->printErr("Python: function returned invalid number of columns.");
325 goto collect_garbage;
326 }
327#endif
328
329 pEmbedding = PyList_GetItem(pReturn, 2);
330#ifndef TTK_ENABLE_KAMIKAZE
331 if(!pEmbedding) {
332 this->printErr("Python: function returned invalid embedding data.");
333 goto collect_garbage;
334 }
335#endif
336
337 if(PyLong_AsLong(pNRows) == nRows
338 and PyLong_AsLong(pNColumns) == numberOfComponents) {
339 npEmbedding = reinterpret_cast<PyArrayObject *>(pEmbedding);
340
341 outputEmbedding.resize(numberOfComponents);
342 for(int i = 0; i < numberOfComponents; ++i) {
343 outputEmbedding[i].resize(nRows);
344 if(PyArray_TYPE(npEmbedding) == NPY_FLOAT) {
345 float *c_out = reinterpret_cast<float *>(PyArray_DATA(npEmbedding));
346 for(int j = 0; j < nRows; ++j)
347 outputEmbedding[i][j] = c_out[i * nRows + j];
348 } else if(PyArray_TYPE(npEmbedding) == NPY_DOUBLE) {
349 double *c_out = reinterpret_cast<double *>(PyArray_DATA(npEmbedding));
350 for(int j = 0; j < nRows; ++j)
351 outputEmbedding[i][j] = c_out[i * nRows + j];
352 }
353 }
354 }
355
356 // normal control-flow
357 for(auto i : gc)
358 Py_DECREF(i);
359
360 this->printMsg("Computed " + methodToString[static_cast<int>(this->Method)],
361 1.0, t.getElapsedTime(), this->threadNumber_);
362
363 return 0;
364
365 // error control-flow
366#ifndef TTK_ENABLE_KAMIKAZE
367collect_garbage:
368#endif
369 for(auto i : gc)
370 Py_DECREF(i);
371 return -6;
372
373#endif
374
375 return 0;
376}
#define VALUE(x)
int threadNumber_
Definition: BaseClass.h:95
int printWrn(const std::string &msg, const debug::LineMode &lineMode=debug::LineMode::NEW, std::ostream &stream=std::cerr) const
Definition: Debug.h:159
void setDebugMsgPrefix(const std::string &prefix)
Definition: Debug.h:364
int printMsg(const std::string &msg, const debug::Priority &priority=debug::Priority::INFO, const debug::LineMode &lineMode=debug::LineMode::NEW, std::ostream &stream=std::cout) const
Definition: Debug.h:118
int printErr(const std::string &msg, const debug::LineMode &lineMode=debug::LineMode::NEW, std::ostream &stream=std::cerr) const
Definition: Debug.h:149
int execute(std::vector< std::vector< double > > &outputEmbedding, const std::vector< double > &inputMatrix, const int nRows, const int nColumns) const
double getElapsedTime()
Definition: Timer.h:15
The Topology ToolKit.