GestureRecognitionToolkit  Version: 0.2.5
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
PrincipalComponentAnalysis.cpp
Go to the documentation of this file.
1 
26 #define GRT_DLL_EXPORTS
28 
29 GRT_BEGIN_NAMESPACE
30 
32 {
33  trained = false;
34  normData = false;
35  numInputDimensions = 0;
36  numPrincipalComponents = 0;
37  maxVariance = 0;
38 }
39 
41 
42 }
43 
44 bool PrincipalComponentAnalysis::computeFeatureVector(const MatrixFloat &data,double maxVariance,bool normData){
45  trained = false;
46  this->maxVariance = maxVariance;
47  this->normData = normData;
48  return computeFeatureVector_(data,MAX_VARIANCE);
49 }
50 
51 bool PrincipalComponentAnalysis::computeFeatureVector(const MatrixFloat &data,UINT numPrincipalComponents,bool normData){
52  trained = false;
53  if( numPrincipalComponents > data.getNumCols() ){
54  errorLog << __GRT_LOG__ << " The number of principal components (";
55  errorLog << numPrincipalComponents << ") is greater than the number of columns in your data (" << data.getNumCols() << ")" << std::endl;
56  return false;
57  }
58  this->numPrincipalComponents = numPrincipalComponents;
59  this->normData = normData;
60  return computeFeatureVector_(data,MAX_NUM_PCS);
61 }
62 
63 bool PrincipalComponentAnalysis::computeFeatureVector_(const MatrixFloat &data,const UINT analysisMode){
64 
65  trained = false;
66  const UINT M = data.getNumRows();
67  const UINT N = data.getNumCols();
68  this->numInputDimensions = N;
69 
70  MatrixFloat msData( M, N );
71 
72  //Compute the mean and standard deviation of the input data
73  mean = data.getMean();
74  stdDev = data.getStdDev();
75 
76  if( normData ){
77  //Normalize the data
78  for(UINT i=0; i<M; i++)
79  for(UINT j=0; j<N; j++)
80  msData[i][j] = (data[i][j]-mean[j]) / stdDev[j];
81 
82  }else{
83  //Mean Subtract Data
84  for(UINT i=0; i<M; i++)
85  for(UINT j=0; j<N; j++)
86  msData[i][j] = data[i][j] - mean[j];
87  }
88 
89  //Get the covariance matrix
90  MatrixFloat cov = msData.getCovarianceMatrix();
91 
92  //Use Eigen Value Decomposition to find eigenvectors of the covariance matrix
94 
95  if( !eig.decompose( cov ) ){
96  mean.clear();
97  stdDev.clear();
98  componentWeights.clear();
99  sortedEigenvalues.clear();
100  eigenvectors.clear();
101  errorLog << __GRT_LOG__ << " Failed to decompose input matrix!" << std::endl;
102  return false;
103  }
104 
105  //Get the eigenvectors and eigenvalues
106  eigenvectors = eig.getEigenvectors();
107  eigenvalues = eig.getRealEigenvalues();
108 
109  //Any eigenvalues less than 0 are not worth anything so set to 0
110  for(UINT i=0; i<eigenvalues.size(); i++){
111  if( eigenvalues[i] < 0 )
112  eigenvalues[i] = 0;
113  }
114 
115  //Sort the eigenvalues and compute the component weights
116  Float sum = 0;
117  UINT componentIndex = 0;
118  sortedEigenvalues.clear();
119  componentWeights.resize(N,0);
120 
121  while( true ){
122  Float maxValue = 0;
123  UINT index = 0;
124  for(UINT i=0; i<eigenvalues.size(); i++){
125  if( eigenvalues[i] > maxValue ){
126  maxValue = eigenvalues[i];
127  index = i;
128  }
129  }
130  if( maxValue == 0 || componentIndex >= eigenvalues.size() ){
131  break;
132  }
133  sortedEigenvalues.push_back( IndexedDouble(index,maxValue) );
134  componentWeights[ componentIndex++ ] = eigenvalues[ index ];
135  sum += eigenvalues[ index ];
136  eigenvalues[ index ] = 0; //Set the maxValue to zero so it won't be used again
137  }
138 
139  Float cumulativeVariance = 0;
140  switch( analysisMode ){
141  case MAX_VARIANCE:
142  //Normalize the component weights and workout how many components we need to use to reach the maxVariance
143  numPrincipalComponents = 0;
144  for(UINT k=0; k<N; k++){
145  componentWeights[k] /= sum;
146  cumulativeVariance += componentWeights[k];
147  if( cumulativeVariance >= maxVariance && numPrincipalComponents==0 ){
148  numPrincipalComponents = k+1;
149  }
150  }
151  break;
152  case MAX_NUM_PCS:
153  //Normalize the component weights and compute the maxVariance
154  maxVariance = 0;
155  for(UINT k=0; k<N; k++){
156  componentWeights[k] /= sum;
157  if( k < numPrincipalComponents ){
158  maxVariance += componentWeights[k];
159  }
160  }
161  break;
162  default:
163  errorLog << __GRT_LOG__ << " Unknown analysis mode!" << std::endl;
164  break;
165  }
166 
167  //Get the raw eigenvalues (encase the user asks for these later)
168  eigenvalues = eig.getRealEigenvalues();
169 
170  //Flag that the features have been computed
171  trained = true;
172 
173  return true;
174 }
175 
177 
178  if( !trained ){
179  warningLog << __GRT_LOG__ << " The PrincipalComponentAnalysis module has not been trained!" << std::endl;
180  return false;
181  }
182 
183  if( data.getNumCols() != numInputDimensions ){
184  warningLog << __GRT_LOG__ << " The number of columns in the input vector (" << data.getNumCols() << ") does not match the number of input dimensions (" << numInputDimensions << ")!" << std::endl;
185  return false;
186  }
187 
188  MatrixFloat msData( data );
189  prjData.resize(data.getNumRows(),numPrincipalComponents);
190 
191  if( normData ){
192  //Mean subtract the data
193  for(UINT i=0; i<data.getNumRows(); i++)
194  for(UINT j=0; j<numInputDimensions; j++)
195  msData[i][j] = (msData[i][j]-mean[j])/stdDev[j];
196  }else{
197  //Mean subtract the data
198  for(UINT i=0; i<data.getNumRows(); i++)
199  for(UINT j=0; j<numInputDimensions; j++)
200  msData[i][j] -= mean[j];
201  }
202 
203  //Projected Data
204  for(UINT row=0; row<msData.getNumRows(); row++){//For each row in the final data
205  for(UINT i=0; i<numPrincipalComponents; i++){//For each PC
206  prjData[row][i]=0;
207  for(UINT j=0; j<data.getNumCols(); j++)//For each feature
208  prjData[row][i] += msData[row][j] * eigenvectors[j][sortedEigenvalues[i].index];
209  }
210  }
211 
212  return true;
213 }
214 
216 
217  const unsigned int N = data.getSize();
218 
219  if( !trained ){
220  warningLog << __GRT_LOG__ << " The PrincipalComponentAnalysis module has not been trained!" << std::endl;
221  return false;
222  }
223 
224  if( N != numInputDimensions ){
225  warningLog << __GRT_LOG__ << " The size of the input vector (" << N << ") does not match the number of input dimensions (" << numInputDimensions << ")!" << std::endl;
226  return false;
227  }
228 
229  VectorFloat msData = data;
230 
231  if( normData ){
232  //Mean subtract the data
233  for(UINT j=0; j<numInputDimensions; j++)
234  msData[j] = (msData[j]-mean[j])/stdDev[j];
235  }else{
236  //Mean subtract the data
237  for(UINT j=0; j<numInputDimensions; j++)
238  msData[j] -= mean[j];
239  }
240 
241  //Projected Data
242  prjData.resize( numPrincipalComponents );
243  for(UINT i=0; i<numPrincipalComponents; i++){//For each PC
244  prjData[i]=0;
245  for(UINT j=0; j<N; j++)//For each feature
246  prjData[i] += msData[j] * eigenvectors[j][sortedEigenvalues[i].index];
247  }
248 
249  return true;
250 }
251 
252 bool PrincipalComponentAnalysis::save( std::fstream &file ) const {
253 
254  //Write the header info
255  file << "GRT_PCA_MODEL_FILE_V1.0\n";
256 
257  if( !MLBase::saveBaseSettingsToFile( file ) ) return false;
258 
259  file << "NumPrincipalComponents: " << numPrincipalComponents << std::endl;
260  file << "NormData: " << normData << std::endl;
261  file << "MaxVariance: " << maxVariance << std::endl;
262 
263  if( trained ){
264  file << "Mean: ";
265  for(unsigned int i=0; i<numInputDimensions; i++){
266  file << mean[i] << " ";
267  }
268  file << std::endl;
269 
270  file << "StdDev: ";
271  for(unsigned int i=0; i<numInputDimensions; i++){
272  file << stdDev[i] << " ";
273  }
274  file << std::endl;
275 
276  file << "ComponentWeights: ";
277  for(unsigned int i=0; i<numInputDimensions; i++){
278  file << componentWeights[i] << " ";
279  }
280  file << std::endl;
281 
282  file << "Eigenvalues: ";
283  for(unsigned int i=0; i<numInputDimensions; i++){
284  file << eigenvalues[i] << " ";
285  }
286  file << std::endl;
287 
288  file << "SortedEigenvalues: ";
289  for(unsigned int i=0; i<numInputDimensions; i++){
290  file << sortedEigenvalues[i].index << " ";
291  file << sortedEigenvalues[i].value << " ";
292  }
293  file << std::endl;
294 
295  file << "Eigenvectors: ";
296  file << eigenvectors.getNumRows() << " " << eigenvectors.getNumCols() << std::endl;
297  for(unsigned int i=0; i<eigenvectors.getNumRows(); i++){
298  for(unsigned int j=0; j<eigenvectors.getNumCols(); j++){
299  file << eigenvectors[i][j];
300  if( j+1 < eigenvectors.getNumCols() ) file << " ";
301  else file << std::endl;
302  }
303  }
304  file << std::endl;
305  }
306 
307  return true;
308 }
309 
310 bool PrincipalComponentAnalysis::load( std::fstream &file ) {
311 
312  std::string word;
313 
314  //Read the header info
315  file >> word;
316  if( word != "GRT_PCA_MODEL_FILE_V1.0" ){
317  return false;
318  }
319 
320  if( !MLBase::loadBaseSettingsFromFile( file ) ) return false;
321 
322  //Read the num components
323  file >> word;
324  if( word != "NumPrincipalComponents:" ){
325  return false;
326  }
327  file >> numPrincipalComponents;
328 
329  //Read the normData
330  file >> word;
331  if( word != "NormData:" ){
332  return false;
333  }
334  file >> normData;
335 
336  //Read the MaxVariance
337  file >> word;
338  if( word != "MaxVariance:" ){
339  return false;
340  }
341  file >> maxVariance;
342 
343  if( trained ){
344  //Read the mean vector
345  file >> word;
346  if( word != "Mean:" ){
347  trained = false;
348  return false;
349  }
350  mean.resize( numInputDimensions );
351 
352  for(unsigned int i=0; i<numInputDimensions; i++){
353  file >> mean[i];
354  }
355 
356  //Read the stddev vector
357  file >> word;
358  if( word != "StdDev:" ){
359  trained = false;
360  return false;
361  }
362  stdDev.resize( numInputDimensions );
363 
364  for(unsigned int i=0; i<numInputDimensions; i++){
365  file >> stdDev[i];
366  }
367 
368  //Read the ComponentWeights vector
369  file >> word;
370  if( word != "ComponentWeights:" ){
371  trained = false;
372  return false;
373  }
374  componentWeights.resize( numInputDimensions );
375 
376  for(unsigned int i=0; i<numInputDimensions; i++){
377  file >> componentWeights[i];
378  }
379 
380  //Read the Eigenvalues vector
381  file >> word;
382  if( word != "Eigenvalues:" ){
383  trained = false;
384  return false;
385  }
386  eigenvalues.resize( numInputDimensions );
387 
388  for(unsigned int i=0; i<numInputDimensions; i++){
389  file >> eigenvalues[i];
390  }
391 
392  //Read the SortedEigenvalues vector
393  file >> word;
394  if( word != "SortedEigenvalues:" ){
395  trained = false;
396  return false;
397  }
398  sortedEigenvalues.resize( numInputDimensions );
399 
400  for(unsigned int i=0; i<numInputDimensions; i++){
401  file >> sortedEigenvalues[i].index;
402  file >> sortedEigenvalues[i].value;
403  }
404 
405  //Read the Eigenvectors vector
406  file >> word;
407  if( word != "Eigenvectors:" ){
408  trained = false;
409  return false;
410  }
411  UINT numRows;
412  UINT numCols;
413  file >> numRows;
414  file >> numCols;
415  eigenvectors.resize( numRows, numCols );
416 
417  for(unsigned int i=0; i<eigenvectors.getNumRows(); i++){
418  for(unsigned int j=0; j<eigenvectors.getNumCols(); j++){
419  file >> eigenvectors[i][j];
420  }
421  }
422  }
423 
424  return true;
425 }
426 
427 bool PrincipalComponentAnalysis::print( std::string title ) const{
428 
429  if( title != "" ){
430  std::cout << title << std::endl;
431  }
432  if( !trained ){
433  std::cout << "Not Trained!\n";
434  return false;
435  }
436  std::cout << "NumInputDimensions: " << numInputDimensions << " NumPrincipalComponents: " << numPrincipalComponents << std::endl;
437  std::cout << "ComponentWeights: ";
438  for(UINT k=0; k<componentWeights.size(); k++){
439  std::cout << "\t" << componentWeights[k];
440  }
441  std::cout << std::endl;
442  std::cout << "SortedEigenValues: ";
443  for(UINT k=0; k<sortedEigenvalues.size(); k++){
444  std::cout << "\t" << sortedEigenvalues[k].value;
445  }
446  std::cout << std::endl;
447  eigenvectors.print("Eigenvectors:");
448 
449  return true;
450 }
451 
453  return eigenvectors;
454 }
455 
456 bool PrincipalComponentAnalysis::setModel( const VectorFloat &mean, const MatrixFloat &eigenvectors ){
457 
458  if( (UINT)mean.size() != eigenvectors.getNumCols() ){
459  return false;
460  }
461 
462  trained = true;
463  numInputDimensions = eigenvectors.getNumCols();
464  numPrincipalComponents = eigenvectors.getNumRows();
465  this->mean = mean;
466  stdDev.clear();
467  componentWeights.clear();
468  eigenvalues.clear();
469  sortedEigenvalues.clear();
470  this->eigenvectors = eigenvectors;
471 
472  //The eigenvectors are already sorted, so the sorted eigenvalues just holds the default index
473  for(UINT i=0; i<numPrincipalComponents; i++){
474  sortedEigenvalues.push_back( IndexedDouble(i,0.0) );
475  }
476  return true;
477 }
478 
479 GRT_END_NAMESPACE
bool saveBaseSettingsToFile(std::fstream &file) const
Definition: MLBase.cpp:435
virtual bool resize(const unsigned int size)
Definition: Vector.h:133
UINT getSize() const
Definition: Vector.h:201
virtual bool save(std::fstream &file) const
virtual bool load(std::fstream &file)
bool clear()
Definition: Matrix.h:553
bool print(const std::string title="") const
virtual bool print() const
Definition: MLBase.cpp:165
VectorFloat getMean() const
MatrixFloat getCovarianceMatrix() const
unsigned int getNumRows() const
Definition: Matrix.h:574
unsigned int getNumCols() const
Definition: Matrix.h:581
bool loadBaseSettingsFromFile(std::fstream &file)
Definition: MLBase.cpp:458
bool project(const MatrixFloat &data, MatrixFloat &prjData)
virtual bool resize(const unsigned int r, const unsigned int c)
Definition: Matrix.h:245
bool computeFeatureVector(const MatrixFloat &data, Float maxVariance=0.95, bool normData=false)
VectorFloat getStdDev() const
This is the main base class that all GRT machine learning algorithms should inherit from...
Definition: MLBase.h:72