26 #define GRT_DLL_EXPORTS 35 numInputDimensions = 0;
36 numPrincipalComponents = 0;
46 this->maxVariance = maxVariance;
47 this->normData = normData;
48 return computeFeatureVector_(data,MAX_VARIANCE);
53 if( numPrincipalComponents > data.
getNumCols() ){
54 errorLog << __GRT_LOG__ <<
" The number of principal components (";
55 errorLog << numPrincipalComponents <<
") is greater than the number of columns in your data (" << data.
getNumCols() <<
")" << std::endl;
58 this->numPrincipalComponents = numPrincipalComponents;
59 this->normData = normData;
60 return computeFeatureVector_(data,MAX_NUM_PCS);
63 bool PrincipalComponentAnalysis::computeFeatureVector_(
const MatrixFloat &data,
const UINT analysisMode){
68 this->numInputDimensions = N;
78 for(UINT i=0; i<M; i++)
79 for(UINT j=0; j<N; j++)
80 msData[i][j] = (data[i][j]-mean[j]) / stdDev[j];
84 for(UINT i=0; i<M; i++)
85 for(UINT j=0; j<N; j++)
86 msData[i][j] = data[i][j] - mean[j];
95 if( !eig.decompose( cov ) ){
98 componentWeights.clear();
99 sortedEigenvalues.clear();
100 eigenvectors.
clear();
101 errorLog << __GRT_LOG__ <<
" Failed to decompose input matrix!" << std::endl;
110 for(UINT i=0; i<eigenvalues.size(); i++){
111 if( eigenvalues[i] < 0 )
117 UINT componentIndex = 0;
118 sortedEigenvalues.clear();
119 componentWeights.
resize(N,0);
124 for(UINT i=0; i<eigenvalues.size(); i++){
125 if( eigenvalues[i] > maxValue ){
126 maxValue = eigenvalues[i];
130 if( maxValue == 0 || componentIndex >= eigenvalues.size() ){
133 sortedEigenvalues.push_back(
IndexedDouble(index,maxValue) );
134 componentWeights[ componentIndex++ ] = eigenvalues[ index ];
135 sum += eigenvalues[ index ];
136 eigenvalues[ index ] = 0;
139 Float cumulativeVariance = 0;
140 switch( analysisMode ){
143 numPrincipalComponents = 0;
144 for(UINT k=0; k<N; k++){
145 componentWeights[k] /= sum;
146 cumulativeVariance += componentWeights[k];
147 if( cumulativeVariance >= maxVariance && numPrincipalComponents==0 ){
148 numPrincipalComponents = k+1;
155 for(UINT k=0; k<N; k++){
156 componentWeights[k] /= sum;
157 if( k < numPrincipalComponents ){
158 maxVariance += componentWeights[k];
163 errorLog << __GRT_LOG__ <<
" Unknown analysis mode!" << std::endl;
179 warningLog << __GRT_LOG__ <<
" The PrincipalComponentAnalysis module has not been trained!" << std::endl;
183 if( data.
getNumCols() != numInputDimensions ){
184 warningLog << __GRT_LOG__ <<
" The number of columns in the input vector (" << data.
getNumCols() <<
") does not match the number of input dimensions (" << numInputDimensions <<
")!" << std::endl;
194 for(UINT j=0; j<numInputDimensions; j++)
195 msData[i][j] = (msData[i][j]-mean[j])/stdDev[j];
199 for(UINT j=0; j<numInputDimensions; j++)
200 msData[i][j] -= mean[j];
204 for(UINT row=0; row<msData.
getNumRows(); row++){
205 for(UINT i=0; i<numPrincipalComponents; i++){
208 prjData[row][i] += msData[row][j] * eigenvectors[j][sortedEigenvalues[i].index];
217 const unsigned int N = data.
getSize();
220 warningLog << __GRT_LOG__ <<
" The PrincipalComponentAnalysis module has not been trained!" << std::endl;
224 if( N != numInputDimensions ){
225 warningLog << __GRT_LOG__ <<
" The size of the input vector (" << N <<
") does not match the number of input dimensions (" << numInputDimensions <<
")!" << std::endl;
233 for(UINT j=0; j<numInputDimensions; j++)
234 msData[j] = (msData[j]-mean[j])/stdDev[j];
237 for(UINT j=0; j<numInputDimensions; j++)
238 msData[j] -= mean[j];
242 prjData.
resize( numPrincipalComponents );
243 for(UINT i=0; i<numPrincipalComponents; i++){
245 for(UINT j=0; j<N; j++)
246 prjData[i] += msData[j] * eigenvectors[j][sortedEigenvalues[i].index];
255 file <<
"GRT_PCA_MODEL_FILE_V1.0\n";
259 file <<
"NumPrincipalComponents: " << numPrincipalComponents << std::endl;
260 file <<
"NormData: " << normData << std::endl;
261 file <<
"MaxVariance: " << maxVariance << std::endl;
265 for(
unsigned int i=0; i<numInputDimensions; i++){
266 file << mean[i] <<
" ";
271 for(
unsigned int i=0; i<numInputDimensions; i++){
272 file << stdDev[i] <<
" ";
276 file <<
"ComponentWeights: ";
277 for(
unsigned int i=0; i<numInputDimensions; i++){
278 file << componentWeights[i] <<
" ";
282 file <<
"Eigenvalues: ";
283 for(
unsigned int i=0; i<numInputDimensions; i++){
284 file << eigenvalues[i] <<
" ";
288 file <<
"SortedEigenvalues: ";
289 for(
unsigned int i=0; i<numInputDimensions; i++){
290 file << sortedEigenvalues[i].index <<
" ";
291 file << sortedEigenvalues[i].value <<
" ";
295 file <<
"Eigenvectors: ";
297 for(
unsigned int i=0; i<eigenvectors.
getNumRows(); i++){
298 for(
unsigned int j=0; j<eigenvectors.
getNumCols(); j++){
299 file << eigenvectors[i][j];
300 if( j+1 < eigenvectors.getNumCols() ) file <<
" ";
301 else file << std::endl;
316 if( word !=
"GRT_PCA_MODEL_FILE_V1.0" ){
324 if( word !=
"NumPrincipalComponents:" ){
327 file >> numPrincipalComponents;
331 if( word !=
"NormData:" ){
338 if( word !=
"MaxVariance:" ){
346 if( word !=
"Mean:" ){
350 mean.
resize( numInputDimensions );
352 for(
unsigned int i=0; i<numInputDimensions; i++){
358 if( word !=
"StdDev:" ){
362 stdDev.
resize( numInputDimensions );
364 for(
unsigned int i=0; i<numInputDimensions; i++){
370 if( word !=
"ComponentWeights:" ){
374 componentWeights.
resize( numInputDimensions );
376 for(
unsigned int i=0; i<numInputDimensions; i++){
377 file >> componentWeights[i];
382 if( word !=
"Eigenvalues:" ){
386 eigenvalues.
resize( numInputDimensions );
388 for(
unsigned int i=0; i<numInputDimensions; i++){
389 file >> eigenvalues[i];
394 if( word !=
"SortedEigenvalues:" ){
398 sortedEigenvalues.
resize( numInputDimensions );
400 for(
unsigned int i=0; i<numInputDimensions; i++){
401 file >> sortedEigenvalues[i].index;
402 file >> sortedEigenvalues[i].value;
407 if( word !=
"Eigenvectors:" ){
415 eigenvectors.
resize( numRows, numCols );
417 for(
unsigned int i=0; i<eigenvectors.
getNumRows(); i++){
418 for(
unsigned int j=0; j<eigenvectors.
getNumCols(); j++){
419 file >> eigenvectors[i][j];
430 std::cout << title << std::endl;
433 std::cout <<
"Not Trained!\n";
436 std::cout <<
"NumInputDimensions: " << numInputDimensions <<
" NumPrincipalComponents: " << numPrincipalComponents << std::endl;
437 std::cout <<
"ComponentWeights: ";
438 for(UINT k=0; k<componentWeights.size(); k++){
439 std::cout <<
"\t" << componentWeights[k];
441 std::cout << std::endl;
442 std::cout <<
"SortedEigenValues: ";
443 for(UINT k=0; k<sortedEigenvalues.size(); k++){
444 std::cout <<
"\t" << sortedEigenvalues[k].value;
446 std::cout << std::endl;
447 eigenvectors.
print(
"Eigenvectors:");
458 if( (UINT)mean.size() != eigenvectors.
getNumCols() ){
463 numInputDimensions = eigenvectors.
getNumCols();
464 numPrincipalComponents = eigenvectors.
getNumRows();
467 componentWeights.clear();
469 sortedEigenvalues.clear();
470 this->eigenvectors = eigenvectors;
473 for(UINT i=0; i<numPrincipalComponents; i++){
bool saveBaseSettingsToFile(std::fstream &file) const
virtual ~PrincipalComponentAnalysis()
virtual bool resize(const unsigned int size)
PrincipalComponentAnalysis()
virtual bool save(std::fstream &file) const
virtual bool load(std::fstream &file)
MatrixFloat getEigenvectors()
bool print(const std::string title="") const
virtual bool print() const
VectorFloat getMean() const
MatrixFloat getCovarianceMatrix() const
unsigned int getNumRows() const
unsigned int getNumCols() const
bool loadBaseSettingsFromFile(std::fstream &file)
MatrixFloat getEigenVectors() const
VectorFloat getRealEigenvalues()
bool project(const MatrixFloat &data, MatrixFloat &prjData)
virtual bool resize(const unsigned int r, const unsigned int c)
bool computeFeatureVector(const MatrixFloat &data, Float maxVariance=0.95, bool normData=false)
VectorFloat getStdDev() const
This is the main base class that all GRT machine learning algorithms should inherit from...