GestureRecognitionToolkit  Version: 0.2.0
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
PrincipalComponentAnalysis.cpp
Go to the documentation of this file.
1 
26 #define GRT_DLL_EXPORTS
28 
29 GRT_BEGIN_NAMESPACE
30 
32  trained = false;
33  normData = false;
34  numInputDimensions = 0;
35  numPrincipalComponents = 0;
36  maxVariance = 0;
37 
38  classType = "PrincipalComponentAnalysis";
39  errorLog.setProceedingText("[ERROR PrincipalComponentAnalysis]");
40  warningLog.setProceedingText("[WARNING PrincipalComponentAnalysis]");
41 }
42 
44 
45 }
46 
47 bool PrincipalComponentAnalysis::computeFeatureVector(const MatrixFloat &data,double maxVariance,bool normData){
48  trained = false;
49  this->maxVariance = maxVariance;
50  this->normData = normData;
51  return computeFeatureVector_(data,MAX_VARIANCE);
52 }
53 
54 bool PrincipalComponentAnalysis::computeFeatureVector(const MatrixFloat &data,UINT numPrincipalComponents,bool normData){
55  trained = false;
56  if( numPrincipalComponents > data.getNumCols() ){
57  errorLog << "computeFeatureVector(const MatrixFloat &data,UINT numPrincipalComponents,bool normData) - The number of principal components (";
58  errorLog << numPrincipalComponents << ") is greater than the number of columns in your data (" << data.getNumCols() << ")" << std::endl;
59  return false;
60  }
61  this->numPrincipalComponents = numPrincipalComponents;
62  this->normData = normData;
63  return computeFeatureVector_(data,MAX_NUM_PCS);
64 }
65 
66 bool PrincipalComponentAnalysis::computeFeatureVector_(const MatrixFloat &data,const UINT analysisMode){
67 
68  trained = false;
69  const UINT M = data.getNumRows();
70  const UINT N = data.getNumCols();
71  this->numInputDimensions = N;
72 
73  MatrixFloat msData( M, N );
74 
75  //Compute the mean and standard deviation of the input data
76  mean = data.getMean();
77  stdDev = data.getStdDev();
78 
79  if( normData ){
80  //Normalize the data
81  for(UINT i=0; i<M; i++)
82  for(UINT j=0; j<N; j++)
83  msData[i][j] = (data[i][j]-mean[j]) / stdDev[j];
84 
85  }else{
86  //Mean Subtract Data
87  for(UINT i=0; i<M; i++)
88  for(UINT j=0; j<N; j++)
89  msData[i][j] = data[i][j] - mean[j];
90  }
91 
92  //Get the covariance matrix
93  MatrixFloat cov = msData.getCovarianceMatrix();
94 
95  //Use Eigen Value Decomposition to find eigenvectors of the covariance matrix
97 
98  if( !eig.decompose( cov ) ){
99  mean.clear();
100  stdDev.clear();
101  componentWeights.clear();
102  sortedEigenvalues.clear();
103  eigenvectors.clear();
104  errorLog << "computeFeatureVector(const MatrixFloat &data,UINT analysisMode) - Failed to decompose input matrix!" << std::endl;
105  return false;
106  }
107 
108  //Get the eigenvectors and eigenvalues
109  eigenvectors = eig.getEigenvectors();
110  eigenvalues = eig.getRealEigenvalues();
111 
112  //Any eigenvalues less than 0 are not worth anything so set to 0
113  for(UINT i=0; i<eigenvalues.size(); i++){
114  if( eigenvalues[i] < 0 )
115  eigenvalues[i] = 0;
116  }
117 
118  //Sort the eigenvalues and compute the component weights
119  Float sum = 0;
120  UINT componentIndex = 0;
121  sortedEigenvalues.clear();
122  componentWeights.resize(N,0);
123 
124  while( true ){
125  Float maxValue = 0;
126  UINT index = 0;
127  for(UINT i=0; i<eigenvalues.size(); i++){
128  if( eigenvalues[i] > maxValue ){
129  maxValue = eigenvalues[i];
130  index = i;
131  }
132  }
133  if( maxValue == 0 || componentIndex >= eigenvalues.size() ){
134  break;
135  }
136  sortedEigenvalues.push_back( IndexedDouble(index,maxValue) );
137  componentWeights[ componentIndex++ ] = eigenvalues[ index ];
138  sum += eigenvalues[ index ];
139  eigenvalues[ index ] = 0; //Set the maxValue to zero so it won't be used again
140  }
141 
142  Float cumulativeVariance = 0;
143  switch( analysisMode ){
144  case MAX_VARIANCE:
145  //Normalize the component weights and workout how many components we need to use to reach the maxVariance
146  numPrincipalComponents = 0;
147  for(UINT k=0; k<N; k++){
148  componentWeights[k] /= sum;
149  cumulativeVariance += componentWeights[k];
150  if( cumulativeVariance >= maxVariance && numPrincipalComponents==0 ){
151  numPrincipalComponents = k+1;
152  }
153  }
154  break;
155  case MAX_NUM_PCS:
156  //Normalize the component weights and compute the maxVariance
157  maxVariance = 0;
158  for(UINT k=0; k<N; k++){
159  componentWeights[k] /= sum;
160  if( k < numPrincipalComponents ){
161  maxVariance += componentWeights[k];
162  }
163  }
164  break;
165  default:
166  errorLog << "computeFeatureVector(const MatrixFloat &data,UINT analysisMode) - Unknown analysis mode!" << std::endl;
167  break;
168  }
169 
170  //Get the raw eigenvalues (encase the user asks for these later)
171  eigenvalues = eig.getRealEigenvalues();
172 
173  //Flag that the features have been computed
174  trained = true;
175 
176  return true;
177 }
178 
180 
181  if( !trained ){
182  warningLog << "project(const MatrixFloat &data,MatrixFloat &prjData) - The PrincipalComponentAnalysis module has not been trained!" << std::endl;
183  return false;
184  }
185 
186  if( data.getNumCols() != numInputDimensions ){
187  warningLog << "project(const MatrixFloat &data,MatrixFloat &prjData) - The number of columns in the input vector (" << data.getNumCols() << ") does not match the number of input dimensions (" << numInputDimensions << ")!" << std::endl;
188  return false;
189  }
190 
191  MatrixFloat msData( data );
192  prjData.resize(data.getNumRows(),numPrincipalComponents);
193 
194  if( normData ){
195  //Mean subtract the data
196  for(UINT i=0; i<data.getNumRows(); i++)
197  for(UINT j=0; j<numInputDimensions; j++)
198  msData[i][j] = (msData[i][j]-mean[j])/stdDev[j];
199  }else{
200  //Mean subtract the data
201  for(UINT i=0; i<data.getNumRows(); i++)
202  for(UINT j=0; j<numInputDimensions; j++)
203  msData[i][j] -= mean[j];
204  }
205 
206  //Projected Data
207  for(UINT row=0; row<msData.getNumRows(); row++){//For each row in the final data
208  for(UINT i=0; i<numPrincipalComponents; i++){//For each PC
209  prjData[row][i]=0;
210  for(UINT j=0; j<data.getNumCols(); j++)//For each feature
211  prjData[row][i] += msData[row][j] * eigenvectors[j][sortedEigenvalues[i].index];
212  }
213  }
214 
215  return true;
216 }
217 
219 
220  const unsigned int N = (unsigned int)data.size();
221 
222  if( !trained ){
223  warningLog << "project(const VectorFloat &data,VectorFloat &prjData) - The PrincipalComponentAnalysis module has not been trained!" << std::endl;
224  return false;
225  }
226 
227  if( N != numInputDimensions ){
228  warningLog << "project(const VectorFloat &data,VectorFloat &prjData) - The size of the input vector (" << N << ") does not match the number of input dimensions (" << numInputDimensions << ")!" << std::endl;
229  return false;
230  }
231 
232  VectorFloat msData = data;
233 
234  if( normData ){
235  //Mean subtract the data
236  for(UINT j=0; j<numInputDimensions; j++)
237  msData[j] = (msData[j]-mean[j])/stdDev[j];
238  }else{
239  //Mean subtract the data
240  for(UINT j=0; j<numInputDimensions; j++)
241  msData[j] -= mean[j];
242  }
243 
244  //Projected Data
245  prjData.resize( numPrincipalComponents );
246  for(UINT i=0; i<numPrincipalComponents; i++){//For each PC
247  prjData[i]=0;
248  for(UINT j=0; j<N; j++)//For each feature
249  prjData[i] += msData[j] * eigenvectors[j][sortedEigenvalues[i].index];
250  }
251 
252  return true;
253 }
254 
255 bool PrincipalComponentAnalysis::save( std::fstream &file ) const {
256 
257  //Write the header info
258  file << "GRT_PCA_MODEL_FILE_V1.0\n";
259 
260  if( !MLBase::saveBaseSettingsToFile( file ) ) return false;
261 
262  file << "NumPrincipalComponents: " << numPrincipalComponents << std::endl;
263  file << "NormData: " << normData << std::endl;
264  file << "MaxVariance: " << maxVariance << std::endl;
265 
266  if( trained ){
267  file << "Mean: ";
268  for(unsigned int i=0; i<numInputDimensions; i++){
269  file << mean[i] << " ";
270  }
271  file << std::endl;
272 
273  file << "StdDev: ";
274  for(unsigned int i=0; i<numInputDimensions; i++){
275  file << stdDev[i] << " ";
276  }
277  file << std::endl;
278 
279  file << "ComponentWeights: ";
280  for(unsigned int i=0; i<numInputDimensions; i++){
281  file << componentWeights[i] << " ";
282  }
283  file << std::endl;
284 
285  file << "Eigenvalues: ";
286  for(unsigned int i=0; i<numInputDimensions; i++){
287  file << eigenvalues[i] << " ";
288  }
289  file << std::endl;
290 
291  file << "SortedEigenvalues: ";
292  for(unsigned int i=0; i<numInputDimensions; i++){
293  file << sortedEigenvalues[i].index << " ";
294  file << sortedEigenvalues[i].value << " ";
295  }
296  file << std::endl;
297 
298  file << "Eigenvectors: ";
299  file << eigenvectors.getNumRows() << " " << eigenvectors.getNumCols() << std::endl;
300  for(unsigned int i=0; i<eigenvectors.getNumRows(); i++){
301  for(unsigned int j=0; j<eigenvectors.getNumCols(); j++){
302  file << eigenvectors[i][j];
303  if( j+1 < eigenvectors.getNumCols() ) file << " ";
304  else file << std::endl;
305  }
306  }
307  file << std::endl;
308  }
309 
310  return true;
311 }
312 
313 bool PrincipalComponentAnalysis::load( std::fstream &file ) {
314 
315  std::string word;
316 
317  //Read the header info
318  file >> word;
319  if( word != "GRT_PCA_MODEL_FILE_V1.0" ){
320  return false;
321  }
322 
323  if( !MLBase::loadBaseSettingsFromFile( file ) ) return false;
324 
325  //Read the num components
326  file >> word;
327  if( word != "NumPrincipalComponents:" ){
328  return false;
329  }
330  file >> numPrincipalComponents;
331 
332  //Read the normData
333  file >> word;
334  if( word != "NormData:" ){
335  return false;
336  }
337  file >> normData;
338 
339  //Read the MaxVariance
340  file >> word;
341  if( word != "MaxVariance:" ){
342  return false;
343  }
344  file >> maxVariance;
345 
346  if( trained ){
347  //Read the mean vector
348  file >> word;
349  if( word != "Mean:" ){
350  trained = false;
351  return false;
352  }
353  mean.resize( numInputDimensions );
354 
355  for(unsigned int i=0; i<numInputDimensions; i++){
356  file >> mean[i];
357  }
358 
359  //Read the stddev vector
360  file >> word;
361  if( word != "StdDev:" ){
362  trained = false;
363  return false;
364  }
365  stdDev.resize( numInputDimensions );
366 
367  for(unsigned int i=0; i<numInputDimensions; i++){
368  file >> stdDev[i];
369  }
370 
371  //Read the ComponentWeights vector
372  file >> word;
373  if( word != "ComponentWeights:" ){
374  trained = false;
375  return false;
376  }
377  componentWeights.resize( numInputDimensions );
378 
379  for(unsigned int i=0; i<numInputDimensions; i++){
380  file >> componentWeights[i];
381  }
382 
383  //Read the Eigenvalues vector
384  file >> word;
385  if( word != "Eigenvalues:" ){
386  trained = false;
387  return false;
388  }
389  eigenvalues.resize( numInputDimensions );
390 
391  for(unsigned int i=0; i<numInputDimensions; i++){
392  file >> eigenvalues[i];
393  }
394 
395  //Read the SortedEigenvalues vector
396  file >> word;
397  if( word != "SortedEigenvalues:" ){
398  trained = false;
399  return false;
400  }
401  sortedEigenvalues.resize( numInputDimensions );
402 
403  for(unsigned int i=0; i<numInputDimensions; i++){
404  file >> sortedEigenvalues[i].index;
405  file >> sortedEigenvalues[i].value;
406  }
407 
408  //Read the Eigenvectors vector
409  file >> word;
410  if( word != "Eigenvectors:" ){
411  trained = false;
412  return false;
413  }
414  UINT numRows;
415  UINT numCols;
416  file >> numRows;
417  file >> numCols;
418  eigenvectors.resize( numRows, numCols );
419 
420  for(unsigned int i=0; i<eigenvectors.getNumRows(); i++){
421  for(unsigned int j=0; j<eigenvectors.getNumCols(); j++){
422  file >> eigenvectors[i][j];
423  }
424  }
425  }
426 
427  return true;
428 }
429 
430 bool PrincipalComponentAnalysis::print( std::string title ) const{
431 
432  if( title != "" ){
433  std::cout << title << std::endl;
434  }
435  if( !trained ){
436  std::cout << "Not Trained!\n";
437  return false;
438  }
439  std::cout << "NumInputDimensions: " << numInputDimensions << " NumPrincipalComponents: " << numPrincipalComponents << std::endl;
440  std::cout << "ComponentWeights: ";
441  for(UINT k=0; k<componentWeights.size(); k++){
442  std::cout << "\t" << componentWeights[k];
443  }
444  std::cout << std::endl;
445  std::cout << "SortedEigenValues: ";
446  for(UINT k=0; k<sortedEigenvalues.size(); k++){
447  std::cout << "\t" << sortedEigenvalues[k].value;
448  }
449  std::cout << std::endl;
450  eigenvectors.print("Eigenvectors:");
451 
452  return true;
453 }
454 
456  return eigenvectors;
457 }
458 
459 bool PrincipalComponentAnalysis::setModel( const VectorFloat &mean, const MatrixFloat &eigenvectors ){
460 
461  if( (UINT)mean.size() != eigenvectors.getNumCols() ){
462  return false;
463  }
464 
465  trained = true;
466  numInputDimensions = eigenvectors.getNumCols();
467  numPrincipalComponents = eigenvectors.getNumRows();
468  this->mean = mean;
469  stdDev.clear();
470  componentWeights.clear();
471  eigenvalues.clear();
472  sortedEigenvalues.clear();
473  this->eigenvectors = eigenvectors;
474 
475  //The eigenvectors are already sorted, so the sorted eigenvalues just holds the default index
476  for(UINT i=0; i<numPrincipalComponents; i++){
477  sortedEigenvalues.push_back( IndexedDouble(i,0.0) );
478  }
479  return true;
480 }
481 
482 GRT_END_NAMESPACE
void clear()
Definition: Matrix.h:522
bool saveBaseSettingsToFile(std::fstream &file) const
Definition: MLBase.cpp:375
virtual bool resize(const unsigned int size)
Definition: Vector.h:133
virtual bool save(std::fstream &file) const
virtual bool load(std::fstream &file)
bool print(const std::string title="") const
virtual bool print() const
Definition: MLBase.cpp:141
VectorFloat getMean() const
MatrixFloat getCovarianceMatrix() const
unsigned int getNumRows() const
Definition: Matrix.h:542
unsigned int getNumCols() const
Definition: Matrix.h:549
bool loadBaseSettingsFromFile(std::fstream &file)
Definition: MLBase.cpp:398
This class runs the Principal Component Analysis (PCA) algorithm, a dimensionality reduction algorith...
bool project(const MatrixFloat &data, MatrixFloat &prjData)
virtual bool resize(const unsigned int r, const unsigned int c)
Definition: Matrix.h:232
bool computeFeatureVector(const MatrixFloat &data, Float maxVariance=0.95, bool normData=false)
VectorFloat getStdDev() const