GestureRecognitionToolkit  Version: 0.1.0
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
PrincipalComponentAnalysis.cpp
Go to the documentation of this file.
1 
27 
28 GRT_BEGIN_NAMESPACE
29 
31  trained = false;
32  normData = false;
33  numInputDimensions = 0;
34  numPrincipalComponents = 0;
35  maxVariance = 0;
36 
37  classType = "PrincipalComponentAnalysis";
38  errorLog.setProceedingText("[ERROR PrincipalComponentAnalysis]");
39  warningLog.setProceedingText("[WARNING PrincipalComponentAnalysis]");
40 }
41 
43 
44 }
45 
46 bool PrincipalComponentAnalysis::computeFeatureVector(const MatrixFloat &data,double maxVariance,bool normData){
47  trained = false;
48  this->maxVariance = maxVariance;
49  this->normData = normData;
50  return computeFeatureVector_(data,MAX_VARIANCE);
51 }
52 
53 bool PrincipalComponentAnalysis::computeFeatureVector(const MatrixFloat &data,UINT numPrincipalComponents,bool normData){
54  trained = false;
55  if( numPrincipalComponents > data.getNumCols() ){
56  errorLog << "computeFeatureVector(const MatrixFloat &data,UINT numPrincipalComponents,bool normData) - The number of principal components (";
57  errorLog << numPrincipalComponents << ") is greater than the number of columns in your data (" << data.getNumCols() << ")" << std::endl;
58  return false;
59  }
60  this->numPrincipalComponents = numPrincipalComponents;
61  this->normData = normData;
62  return computeFeatureVector_(data,MAX_NUM_PCS);
63 }
64 
65 bool PrincipalComponentAnalysis::computeFeatureVector_(const MatrixFloat &data,const UINT analysisMode){
66 
67  trained = false;
68  const UINT M = data.getNumRows();
69  const UINT N = data.getNumCols();
70  this->numInputDimensions = N;
71 
72  MatrixFloat msData( M, N );
73 
74  //Compute the mean and standard deviation of the input data
75  mean = data.getMean();
76  stdDev = data.getStdDev();
77 
78  if( normData ){
79  //Normalize the data
80  for(UINT i=0; i<M; i++)
81  for(UINT j=0; j<N; j++)
82  msData[i][j] = (data[i][j]-mean[j]) / stdDev[j];
83 
84  }else{
85  //Mean Subtract Data
86  for(UINT i=0; i<M; i++)
87  for(UINT j=0; j<N; j++)
88  msData[i][j] = data[i][j] - mean[j];
89  }
90 
91  //Get the covariance matrix
92  MatrixFloat cov = msData.getCovarianceMatrix();
93 
94  //Use Eigen Value Decomposition to find eigenvectors of the covariance matrix
96 
97  if( !eig.decompose( cov ) ){
98  mean.clear();
99  stdDev.clear();
100  componentWeights.clear();
101  sortedEigenvalues.clear();
102  eigenvectors.clear();
103  errorLog << "computeFeatureVector(const MatrixFloat &data,UINT analysisMode) - Failed to decompose input matrix!" << std::endl;
104  return false;
105  }
106 
107  //Get the eigenvectors and eigenvalues
108  eigenvectors = eig.getEigenvectors();
109  eigenvalues = eig.getRealEigenvalues();
110 
111  //Any eigenvalues less than 0 are not worth anything so set to 0
112  for(UINT i=0; i<eigenvalues.size(); i++){
113  if( eigenvalues[i] < 0 )
114  eigenvalues[i] = 0;
115  }
116 
117  //Sort the eigenvalues and compute the component weights
118  Float sum = 0;
119  UINT componentIndex = 0;
120  sortedEigenvalues.clear();
121  componentWeights.resize(N,0);
122 
123  while( true ){
124  Float maxValue = 0;
125  UINT index = 0;
126  for(UINT i=0; i<eigenvalues.size(); i++){
127  if( eigenvalues[i] > maxValue ){
128  maxValue = eigenvalues[i];
129  index = i;
130  }
131  }
132  if( maxValue == 0 || componentIndex >= eigenvalues.size() ){
133  break;
134  }
135  sortedEigenvalues.push_back( IndexedDouble(index,maxValue) );
136  componentWeights[ componentIndex++ ] = eigenvalues[ index ];
137  sum += eigenvalues[ index ];
138  eigenvalues[ index ] = 0; //Set the maxValue to zero so it won't be used again
139  }
140 
141  Float cumulativeVariance = 0;
142  switch( analysisMode ){
143  case MAX_VARIANCE:
144  //Normalize the component weights and workout how many components we need to use to reach the maxVariance
145  numPrincipalComponents = 0;
146  for(UINT k=0; k<N; k++){
147  componentWeights[k] /= sum;
148  cumulativeVariance += componentWeights[k];
149  if( cumulativeVariance >= maxVariance && numPrincipalComponents==0 ){
150  numPrincipalComponents = k+1;
151  }
152  }
153  break;
154  case MAX_NUM_PCS:
155  //Normalize the component weights and compute the maxVariance
156  maxVariance = 0;
157  for(UINT k=0; k<N; k++){
158  componentWeights[k] /= sum;
159  if( k < numPrincipalComponents ){
160  maxVariance += componentWeights[k];
161  }
162  }
163  break;
164  default:
165  errorLog << "computeFeatureVector(const MatrixFloat &data,UINT analysisMode) - Unknown analysis mode!" << std::endl;
166  break;
167  }
168 
169  //Get the raw eigenvalues (encase the user asks for these later)
170  eigenvalues = eig.getRealEigenvalues();
171 
172  //Flag that the features have been computed
173  trained = true;
174 
175  return true;
176 }
177 
179 
180  if( !trained ){
181  warningLog << "project(const MatrixFloat &data,MatrixFloat &prjData) - The PrincipalComponentAnalysis module has not been trained!" << std::endl;
182  return false;
183  }
184 
185  if( data.getNumCols() != numInputDimensions ){
186  warningLog << "project(const MatrixFloat &data,MatrixFloat &prjData) - The number of columns in the input vector (" << data.getNumCols() << ") does not match the number of input dimensions (" << numInputDimensions << ")!" << std::endl;
187  return false;
188  }
189 
190  MatrixFloat msData( data );
191  prjData.resize(data.getNumRows(),numPrincipalComponents);
192 
193  if( normData ){
194  //Mean subtract the data
195  for(UINT i=0; i<data.getNumRows(); i++)
196  for(UINT j=0; j<numInputDimensions; j++)
197  msData[i][j] = (msData[i][j]-mean[j])/stdDev[j];
198  }else{
199  //Mean subtract the data
200  for(UINT i=0; i<data.getNumRows(); i++)
201  for(UINT j=0; j<numInputDimensions; j++)
202  msData[i][j] -= mean[j];
203  }
204 
205  //Projected Data
206  for(UINT row=0; row<msData.getNumRows(); row++){//For each row in the final data
207  for(UINT i=0; i<numPrincipalComponents; i++){//For each PC
208  prjData[row][i]=0;
209  for(UINT j=0; j<data.getNumCols(); j++)//For each feature
210  prjData[row][i] += msData[row][j] * eigenvectors[j][sortedEigenvalues[i].index];
211  }
212  }
213 
214  return true;
215 }
216 
218 
219  const unsigned int N = (unsigned int)data.size();
220 
221  if( !trained ){
222  warningLog << "project(const VectorFloat &data,VectorFloat &prjData) - The PrincipalComponentAnalysis module has not been trained!" << std::endl;
223  return false;
224  }
225 
226  if( N != numInputDimensions ){
227  warningLog << "project(const VectorFloat &data,VectorFloat &prjData) - The size of the input vector (" << N << ") does not match the number of input dimensions (" << numInputDimensions << ")!" << std::endl;
228  return false;
229  }
230 
231  VectorFloat msData = data;
232 
233  if( normData ){
234  //Mean subtract the data
235  for(UINT j=0; j<numInputDimensions; j++)
236  msData[j] = (msData[j]-mean[j])/stdDev[j];
237  }else{
238  //Mean subtract the data
239  for(UINT j=0; j<numInputDimensions; j++)
240  msData[j] -= mean[j];
241  }
242 
243  //Projected Data
244  prjData.resize( numPrincipalComponents );
245  for(UINT i=0; i<numPrincipalComponents; i++){//For each PC
246  prjData[i]=0;
247  for(UINT j=0; j<N; j++)//For each feature
248  prjData[i] += msData[j] * eigenvectors[j][sortedEigenvalues[i].index];
249  }
250 
251  return true;
252 }
253 
254 bool PrincipalComponentAnalysis::saveModelToFile( std::fstream &file ) const {
255 
256  //Write the header info
257  file << "GRT_PCA_MODEL_FILE_V1.0\n";
258 
259  if( !MLBase::saveBaseSettingsToFile( file ) ) return false;
260 
261  file << "NumPrincipalComponents: " << numPrincipalComponents << std::endl;
262  file << "NormData: " << normData << std::endl;
263  file << "MaxVariance: " << maxVariance << std::endl;
264 
265  if( trained ){
266  file << "Mean: ";
267  for(unsigned int i=0; i<numInputDimensions; i++){
268  file << mean[i] << " ";
269  }
270  file << std::endl;
271 
272  file << "StdDev: ";
273  for(unsigned int i=0; i<numInputDimensions; i++){
274  file << stdDev[i] << " ";
275  }
276  file << std::endl;
277 
278  file << "ComponentWeights: ";
279  for(unsigned int i=0; i<numInputDimensions; i++){
280  file << componentWeights[i] << " ";
281  }
282  file << std::endl;
283 
284  file << "Eigenvalues: ";
285  for(unsigned int i=0; i<numInputDimensions; i++){
286  file << eigenvalues[i] << " ";
287  }
288  file << std::endl;
289 
290  file << "SortedEigenvalues: ";
291  for(unsigned int i=0; i<numInputDimensions; i++){
292  file << sortedEigenvalues[i].index << " ";
293  file << sortedEigenvalues[i].value << " ";
294  }
295  file << std::endl;
296 
297  file << "Eigenvectors: ";
298  file << eigenvectors.getNumRows() << " " << eigenvectors.getNumCols() << std::endl;
299  for(unsigned int i=0; i<eigenvectors.getNumRows(); i++){
300  for(unsigned int j=0; j<eigenvectors.getNumCols(); j++){
301  file << eigenvectors[i][j];
302  if( j+1 < eigenvectors.getNumCols() ) file << " ";
303  else file << std::endl;
304  }
305  }
306  file << std::endl;
307  }
308 
309  return true;
310 }
311 
313 
314  std::string word;
315 
316  //Read the header info
317  file >> word;
318  if( word != "GRT_PCA_MODEL_FILE_V1.0" ){
319  return false;
320  }
321 
322  if( !MLBase::loadBaseSettingsFromFile( file ) ) return false;
323 
324  //Read the num components
325  file >> word;
326  if( word != "NumPrincipalComponents:" ){
327  return false;
328  }
329  file >> numPrincipalComponents;
330 
331  //Read the normData
332  file >> word;
333  if( word != "NormData:" ){
334  return false;
335  }
336  file >> normData;
337 
338  //Read the MaxVariance
339  file >> word;
340  if( word != "MaxVariance:" ){
341  return false;
342  }
343  file >> maxVariance;
344 
345  if( trained ){
346  //Read the mean vector
347  file >> word;
348  if( word != "Mean:" ){
349  trained = false;
350  return false;
351  }
352  mean.resize( numInputDimensions );
353 
354  for(unsigned int i=0; i<numInputDimensions; i++){
355  file >> mean[i];
356  }
357 
358  //Read the stddev vector
359  file >> word;
360  if( word != "StdDev:" ){
361  trained = false;
362  return false;
363  }
364  stdDev.resize( numInputDimensions );
365 
366  for(unsigned int i=0; i<numInputDimensions; i++){
367  file >> stdDev[i];
368  }
369 
370  //Read the ComponentWeights vector
371  file >> word;
372  if( word != "ComponentWeights:" ){
373  trained = false;
374  return false;
375  }
376  componentWeights.resize( numInputDimensions );
377 
378  for(unsigned int i=0; i<numInputDimensions; i++){
379  file >> componentWeights[i];
380  }
381 
382  //Read the Eigenvalues vector
383  file >> word;
384  if( word != "Eigenvalues:" ){
385  trained = false;
386  return false;
387  }
388  eigenvalues.resize( numInputDimensions );
389 
390  for(unsigned int i=0; i<numInputDimensions; i++){
391  file >> eigenvalues[i];
392  }
393 
394  //Read the SortedEigenvalues vector
395  file >> word;
396  if( word != "SortedEigenvalues:" ){
397  trained = false;
398  return false;
399  }
400  sortedEigenvalues.resize( numInputDimensions );
401 
402  for(unsigned int i=0; i<numInputDimensions; i++){
403  file >> sortedEigenvalues[i].index;
404  file >> sortedEigenvalues[i].value;
405  }
406 
407  //Read the Eigenvectors vector
408  file >> word;
409  if( word != "Eigenvectors:" ){
410  trained = false;
411  return false;
412  }
413  UINT numRows;
414  UINT numCols;
415  file >> numRows;
416  file >> numCols;
417  eigenvectors.resize( numRows, numCols );
418 
419  for(unsigned int i=0; i<eigenvectors.getNumRows(); i++){
420  for(unsigned int j=0; j<eigenvectors.getNumCols(); j++){
421  file >> eigenvectors[i][j];
422  }
423  }
424  }
425 
426  return true;
427 }
428 
429 bool PrincipalComponentAnalysis::print( std::string title ) const{
430 
431  if( title != "" ){
432  std::cout << title << std::endl;
433  }
434  if( !trained ){
435  std::cout << "Not Trained!\n";
436  return false;
437  }
438  std::cout << "NumInputDimensions: " << numInputDimensions << " NumPrincipalComponents: " << numPrincipalComponents << std::endl;
439  std::cout << "ComponentWeights: ";
440  for(UINT k=0; k<componentWeights.size(); k++){
441  std::cout << "\t" << componentWeights[k];
442  }
443  std::cout << std::endl;
444  std::cout << "SortedEigenValues: ";
445  for(UINT k=0; k<sortedEigenvalues.size(); k++){
446  std::cout << "\t" << sortedEigenvalues[k].value;
447  }
448  std::cout << std::endl;
449  eigenvectors.print("Eigenvectors:");
450 
451  return true;
452 }
453 
455  return eigenvectors;
456 }
457 
458 bool PrincipalComponentAnalysis::setModel( const VectorFloat &mean, const MatrixFloat &eigenvectors ){
459 
460  if( (UINT)mean.size() != eigenvectors.getNumCols() ){
461  return false;
462  }
463 
464  trained = true;
465  numInputDimensions = eigenvectors.getNumCols();
466  numPrincipalComponents = eigenvectors.getNumRows();
467  this->mean = mean;
468  stdDev.clear();
469  componentWeights.clear();
470  eigenvalues.clear();
471  sortedEigenvalues.clear();
472  this->eigenvectors = eigenvectors;
473 
474  //The eigenvectors are already sorted, so the sorted eigenvalues just holds the default index
475  for(UINT i=0; i<numPrincipalComponents; i++){
476  sortedEigenvalues.push_back( IndexedDouble(i,0.0) );
477  }
478  return true;
479 }
480 
481 GRT_END_NAMESPACE
void clear()
Definition: Matrix.h:522
bool saveBaseSettingsToFile(std::fstream &file) const
Definition: MLBase.cpp:370
virtual bool loadModelFromFile(std::fstream &file)
virtual bool resize(const unsigned int size)
Definition: Vector.h:133
virtual bool saveModelToFile(std::fstream &file) const
bool print(const std::string title="") const
virtual bool print() const
Definition: MLBase.cpp:140
VectorFloat getMean() const
MatrixFloat getCovarianceMatrix() const
unsigned int getNumRows() const
Definition: Matrix.h:542
unsigned int getNumCols() const
Definition: Matrix.h:549
bool loadBaseSettingsFromFile(std::fstream &file)
Definition: MLBase.cpp:393
This class runs the Principal Component Analysis (PCA) algorithm, a dimensionality reduction algorith...
bool project(const MatrixFloat &data, MatrixFloat &prjData)
virtual bool resize(const unsigned int r, const unsigned int c)
Definition: Matrix.h:232
bool computeFeatureVector(const MatrixFloat &data, Float maxVariance=0.95, bool normData=false)
VectorFloat getStdDev() const