GestureRecognitionToolkit  Version: 0.2.5
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
PrincipalComponentAnalysis Class Reference

This class runs the Principal Component Analysis (PCA) algorithm, a dimensionality reduction algorithm that projects an [M N] matrix (where M==samples and N==dimensions) onto a new K dimensional subspace, where K is normally much less than N. More...

#include <PrincipalComponentAnalysis.h>

Inheritance diagram for PrincipalComponentAnalysis:
MLBase GRTBase Observer< TrainingResult > Observer< TestInstanceResult >

Public Member Functions

 PrincipalComponentAnalysis ()
 
virtual ~PrincipalComponentAnalysis ()
 
bool computeFeatureVector (const MatrixFloat &data, Float maxVariance=0.95, bool normData=false)
 
bool computeFeatureVector (const MatrixFloat &data, UINT numPrincipalComponents, bool normData=false)
 
bool project (const MatrixFloat &data, MatrixFloat &prjData)
 
bool project (const VectorFloat &data, VectorFloat &prjData)
 
virtual bool save (std::fstream &file) const
 
virtual bool load (std::fstream &file)
 
bool getNormData () const
 
UINT getNumInputDimensions () const
 
UINT getNumPrincipalComponents () const
 
Float getMaxVariance () const
 
VectorFloat getMeanVector () const
 
VectorFloat getStdDevVector () const
 
VectorFloat getComponentWeights () const
 
VectorFloat getEigenValues () const
 
virtual bool print (std::string title="") const
 
MatrixFloat getEigenVectors () const
 
bool setModel (const VectorFloat &mean, const MatrixFloat &eigenvectors)
 
- Public Member Functions inherited from MLBase
 MLBase (const std::string &id="", const BaseType type=BASE_TYPE_NOT_SET)
 
virtual ~MLBase (void)
 
bool copyMLBaseVariables (const MLBase *mlBase)
 
virtual bool train (ClassificationData trainingData)
 
virtual bool train_ (ClassificationData &trainingData)
 
virtual bool train (RegressionData trainingData)
 
virtual bool train_ (RegressionData &trainingData)
 
virtual bool train (RegressionData trainingData, RegressionData validationData)
 
virtual bool train_ (RegressionData &trainingData, RegressionData &validationData)
 
virtual bool train (TimeSeriesClassificationData trainingData)
 
virtual bool train_ (TimeSeriesClassificationData &trainingData)
 
virtual bool train (ClassificationDataStream trainingData)
 
virtual bool train_ (ClassificationDataStream &trainingData)
 
virtual bool train (UnlabelledData trainingData)
 
virtual bool train_ (UnlabelledData &trainingData)
 
virtual bool train (MatrixFloat data)
 
virtual bool train_ (MatrixFloat &data)
 
virtual bool predict (VectorFloat inputVector)
 
virtual bool predict_ (VectorFloat &inputVector)
 
virtual bool predict (MatrixFloat inputMatrix)
 
virtual bool predict_ (MatrixFloat &inputMatrix)
 
virtual bool map (VectorFloat inputVector)
 
virtual bool map_ (VectorFloat &inputVector)
 
virtual bool reset ()
 
virtual bool clear ()
 
virtual bool print () const
 
virtual bool save (const std::string &filename) const
 
virtual bool load (const std::string &filename)
 
 GRT_DEPRECATED_MSG ("saveModelToFile(std::string filename) is deprecated, use save(const std::string &filename) instead", virtual bool saveModelToFile(const std::string &filename) const )
 
 GRT_DEPRECATED_MSG ("saveModelToFile(std::fstream &file) is deprecated, use save(std::fstream &file) instead", virtual bool saveModelToFile(std::fstream &file) const )
 
 GRT_DEPRECATED_MSG ("loadModelFromFile(std::string filename) is deprecated, use load(const std::string &filename) instead", virtual bool loadModelFromFile(const std::string &filename))
 
 GRT_DEPRECATED_MSG ("loadModelFromFile(std::fstream &file) is deprecated, use load(std::fstream &file) instead", virtual bool loadModelFromFile(std::fstream &file))
 
virtual bool getModel (std::ostream &stream) const
 
virtual std::string getModelAsString () const
 
DataType getInputType () const
 
DataType getOutputType () const
 
BaseType getType () const
 
UINT getNumInputFeatures () const
 
UINT getNumInputDimensions () const
 
UINT getNumOutputDimensions () const
 
UINT getMinNumEpochs () const
 
UINT getMaxNumEpochs () const
 
UINT getBatchSize () const
 
UINT getNumRestarts () const
 
UINT getValidationSetSize () const
 
UINT getNumTrainingIterationsToConverge () const
 
Float getMinChange () const
 
Float getLearningRate () const
 
Float getRMSTrainingError () const
 
 GRT_DEPRECATED_MSG ("getRootMeanSquaredTrainingError() is deprecated, use getRMSTrainingError() instead", Float getRootMeanSquaredTrainingError() const )
 
Float getTotalSquaredTrainingError () const
 
Float getRMSValidationError () const
 
Float getValidationSetAccuracy () const
 
VectorFloat getValidationSetPrecision () const
 
VectorFloat getValidationSetRecall () const
 
bool getUseValidationSet () const
 
bool getRandomiseTrainingOrder () const
 
bool getTrained () const
 
 GRT_DEPRECATED_MSG ("getModelTrained() is deprecated, use getTrained() instead", bool getModelTrained() const )
 
bool getConverged () const
 
bool getScalingEnabled () const
 
bool getIsBaseTypeClassifier () const
 
bool getIsBaseTypeRegressifier () const
 
bool getIsBaseTypeClusterer () const
 
bool getTrainingLoggingEnabled () const
 
bool getTestingLoggingEnabled () const
 
bool enableScaling (const bool useScaling)
 
bool setMaxNumEpochs (const UINT maxNumEpochs)
 
bool setBatchSize (const UINT batchSize)
 
bool setMinNumEpochs (const UINT minNumEpochs)
 
bool setNumRestarts (const UINT numRestarts)
 
bool setMinChange (const Float minChange)
 
bool setLearningRate (const Float learningRate)
 
bool setUseValidationSet (const bool useValidationSet)
 
bool setValidationSetSize (const UINT validationSetSize)
 
bool setRandomiseTrainingOrder (const bool randomiseTrainingOrder)
 
bool setTrainingLoggingEnabled (const bool loggingEnabled)
 
bool setTestingLoggingEnabled (const bool loggingEnabled)
 
bool registerTrainingResultsObserver (Observer< TrainingResult > &observer)
 
bool registerTestResultsObserver (Observer< TestInstanceResult > &observer)
 
bool removeTrainingResultsObserver (const Observer< TrainingResult > &observer)
 
bool removeTestResultsObserver (const Observer< TestInstanceResult > &observer)
 
bool removeAllTrainingObservers ()
 
bool removeAllTestObservers ()
 
bool notifyTrainingResultsObservers (const TrainingResult &data)
 
bool notifyTestResultsObservers (const TestInstanceResult &data)
 
MLBasegetMLBasePointer ()
 
const MLBasegetMLBasePointer () const
 
Vector< TrainingResultgetTrainingResults () const
 
- Public Member Functions inherited from GRTBase
 GRTBase (const std::string &id="")
 
virtual ~GRTBase (void)
 
bool copyGRTBaseVariables (const GRTBase *GRTBase)
 
 GRT_DEPRECATED_MSG ("getClassType is deprecated, use getId() instead!", std::string getClassType() const )
 
std::string getId () const
 
std::string getLastWarningMessage () const
 
std::string getLastErrorMessage () const
 
std::string getLastInfoMessage () const
 
bool setInfoLoggingEnabled (const bool loggingEnabled)
 
bool setWarningLoggingEnabled (const bool loggingEnabled)
 
bool setErrorLoggingEnabled (const bool loggingEnabled)
 
bool setDebugLoggingEnabled (const bool loggingEnabled)
 
GRTBasegetGRTBasePointer ()
 
const GRTBasegetGRTBasePointer () const
 
Float scale (const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
 
Float SQR (const Float &x) const
 
- Public Member Functions inherited from Observer< TrainingResult >
virtual void notify (const TrainingResult &data)
 
- Public Member Functions inherited from Observer< TestInstanceResult >
virtual void notify (const TestInstanceResult &data)
 

Protected Types

enum  AnalysisMode { MAX_VARIANCE =0, MAX_NUM_PCS }
 

Protected Member Functions

bool computeFeatureVector_ (const MatrixFloat &data, UINT analysisMode)
 
- Protected Member Functions inherited from MLBase
bool saveBaseSettingsToFile (std::fstream &file) const
 
bool loadBaseSettingsFromFile (std::fstream &file)
 

Protected Attributes

bool normData
 
UINT numPrincipalComponents
 
Float maxVariance
 
VectorFloat mean
 
VectorFloat stdDev
 
VectorFloat componentWeights
 
VectorFloat eigenvalues
 
Vector< IndexedDoublesortedEigenvalues
 
MatrixFloat eigenvectors
 
- Protected Attributes inherited from MLBase
bool trained
 
bool useScaling
 
bool converged
 
DataType inputType
 
DataType outputType
 
BaseType baseType
 
UINT numInputDimensions
 
UINT numOutputDimensions
 
UINT numTrainingIterationsToConverge
 
UINT minNumEpochs
 
UINT maxNumEpochs
 
UINT batchSize
 
UINT validationSetSize
 
UINT numRestarts
 
Float learningRate
 
Float minChange
 
Float rmsTrainingError
 
Float rmsValidationError
 
Float totalSquaredTrainingError
 
Float validationSetAccuracy
 
bool useValidationSet
 
bool randomiseTrainingOrder
 
VectorFloat validationSetPrecision
 
VectorFloat validationSetRecall
 
Random random
 
Vector< TrainingResulttrainingResults
 
TrainingResultsObserverManager trainingResultsObserverManager
 
TestResultsObserverManager testResultsObserverManager
 
TrainingLog trainingLog
 
TestingLog testingLog
 
- Protected Attributes inherited from GRTBase
std::string classId
 Stores the name of the class (e.g., MinDist)
 
DebugLog debugLog
 
ErrorLog errorLog
 
InfoLog infoLog
 
WarningLog warningLog
 

Additional Inherited Members

- Public Types inherited from MLBase
enum  BaseType {
  BASE_TYPE_NOT_SET =0, CLASSIFIER, REGRESSIFIER, CLUSTERER,
  PRE_PROCSSING, POST_PROCESSING, FEATURE_EXTRACTION, CONTEXT
}
 
- Static Public Member Functions inherited from GRTBase
static std::string getGRTVersion (bool returnRevision=true)
 
static std::string getGRTRevison ()
 

Detailed Description

This class runs the Principal Component Analysis (PCA) algorithm, a dimensionality reduction algorithm that projects an [M N] matrix (where M==samples and N==dimensions) onto a new K dimensional subspace, where K is normally much less than N.

GRT MIT License Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. This projection or transformation is defined in such a way that the first principal component has the largest possible variance (that is, accounts for as much of the variability in the data as possible), and each succeeding component has the highest variance possible under the constraint that it be orthogonal to (i.e., uncorrelated with) the preceding components. Principal components are guaranteed to be independent only if the data set is jointly normally distributed. PCA is sensitive to the relative scaling of the original variables.

The PCA algorithm will automatically mean subtract the input data, and also normalize the data if required. To use this algorithm, the user should first run the computeFeatureVector(...) function to build the PCA feature vector and then run the project(...) function to project new data onto the new principal subspace.

Remarks
This implementation is based on Bishop, Christopher M. Pattern recognition and machine learning. Vol. 1. New York: springer, 2006.

Definition at line 52 of file PrincipalComponentAnalysis.h.

Constructor & Destructor Documentation

GRT_BEGIN_NAMESPACE PrincipalComponentAnalysis::PrincipalComponentAnalysis ( )

Default constructor.

Definition at line 31 of file PrincipalComponentAnalysis.cpp.

PrincipalComponentAnalysis::~PrincipalComponentAnalysis ( )
virtual

Default destructor.

Definition at line 40 of file PrincipalComponentAnalysis.cpp.

Member Function Documentation

bool PrincipalComponentAnalysis::computeFeatureVector ( const MatrixFloat data,
Float  maxVariance = 0.95,
bool  normData = false 
)

Runs the principal component analysis algorithm on the input data and builds the resulting feature vector so new data can be projected onto the principal subspace (using the project function). The number of principal components is automatically computed by selecting the minimum number of components that reach the maxVariance value. This should be a value between [0 1], the default value of 0.95 represents 95% of the variance in the original dataset.

Parameters
dataa matrix containing the data from which the principal components will be computed. This should be an [M N] matrix, where M==samples and N==dimensions.
maxVariancesets the variance that should represented by the top K principal components. This should be a value between [0 1]. Default value=0.95
normDatasets if the data will be z-normalized before running the PCA algorithm. Default value=false
Returns
returns true if the principal components of the input matrix could be computed, false otherwise value
bool PrincipalComponentAnalysis::computeFeatureVector ( const MatrixFloat data,
UINT  numPrincipalComponents,
bool  normData = false 
)

Runs the principal component analysis algorithm on the input data and builds the resulting feature vector so new data can be projected onto the principal subspace (using the project function). The number of principal components should be set be the user and must be less than or equal to the number of dimensions in the input data.

Parameters
dataa matrix containing the data from which the principal components will be computed. This should be an [M N] matrix, where M==samples and N==dimensions
numPrincipalComponentssets the number of principal components. This must be a value be less than or equal to the number of dimensions in the input data
normDatasets if the data will be z-normalized before running the PCA algorithm. Default value=false
Returns
returns true if the principal components of the input matrix could be computed, false otherwise value

Definition at line 51 of file PrincipalComponentAnalysis.cpp.

VectorFloat PrincipalComponentAnalysis::getComponentWeights ( ) const
inline

Returns the weights for each principal component, these weights sum to 1.

Returns
returns a vector of the weights for each principal component, these weights sum to 1

Definition at line 173 of file PrincipalComponentAnalysis.h.

VectorFloat PrincipalComponentAnalysis::getEigenValues ( ) const
inline

Returns the raw eigen values (these are not sorted).

Returns
returns a vector of the raw eigen values

Definition at line 179 of file PrincipalComponentAnalysis.h.

MatrixFloat PrincipalComponentAnalysis::getEigenVectors ( ) const

Returns a matrix containing the eigen vectors.

Returns
returns a matrix containing the raw eigen vectors

Definition at line 452 of file PrincipalComponentAnalysis.cpp.

Float PrincipalComponentAnalysis::getMaxVariance ( ) const
inline

Returns the maxVariance parameter, set by the user when the computeFeatureVector was called. returns the maxVariance parameter, set by the user when the computeFeatureVector was called

Definition at line 152 of file PrincipalComponentAnalysis.h.

VectorFloat PrincipalComponentAnalysis::getMeanVector ( ) const
inline

Returns the mean shift vector, computed during the computeFeatureVector function. New data will be subtracted by this value before it is projected onto the principal subspace.

Returns
returns the mean shift vector, computed during the computeFeatureVector function

Definition at line 159 of file PrincipalComponentAnalysis.h.

bool PrincipalComponentAnalysis::getNormData ( ) const
inline

Returns true if z-normalization is being applied to new data.

Returns
returns true if the normData is true, false otherwise

Definition at line 134 of file PrincipalComponentAnalysis.h.

UINT PrincipalComponentAnalysis::getNumInputDimensions ( ) const
inline

Returns the number of input dimensions in the original input data.

Returns
returns the numInputDimensions parameter.

Definition at line 140 of file PrincipalComponentAnalysis.h.

UINT PrincipalComponentAnalysis::getNumPrincipalComponents ( ) const
inline

Returns the number of principal components that was required to reach the maxVariance parameter.

Returns
returns the number of principal components that was required to reach the maxVariance parameter

Definition at line 146 of file PrincipalComponentAnalysis.h.

VectorFloat PrincipalComponentAnalysis::getStdDevVector ( ) const
inline

Returns the standard deviation vector that is used to normalize new data, this is computed during the computeFeatureVector function. This is only used id the normData parameter is true. If true, new data will be z-normalized by this value before it is projected onto the principal subspace.

Returns
returns the stdDev vector, computed during the computeFeatureVector function

Definition at line 167 of file PrincipalComponentAnalysis.h.

bool PrincipalComponentAnalysis::load ( std::fstream &  file)
virtual

This loads a trained PCA model from a file.

Parameters
filea reference to the file the model will be loaded from
Returns
returns true if the model was loaded successfully, false otherwise

Reimplemented from MLBase.

Definition at line 310 of file PrincipalComponentAnalysis.cpp.

bool PrincipalComponentAnalysis::print ( std::string  title = "") const
virtual

A helper function that prints the PCA info. If the user sets the title string, then this will be written in addition with the PCA data.

Definition at line 427 of file PrincipalComponentAnalysis.cpp.

bool PrincipalComponentAnalysis::project ( const MatrixFloat data,
MatrixFloat prjData 
)

Projects the input data matrix onto the principal subspace. The new projected data will be stored in the prjData matrix. The computeFeatureVector function should have been called at least once before this function is called. The number of the columns in the data matrix must match the numInputDimensions parameter. The function will return true if the projection was successful, false otherwise.

Parameters
dataThe data that should be projected onto the principal subspace. This should be an [M N] matrix, where N must equal the numInputDimensions value (there are no restrictions on M).
prjDataA matrix into which the projected data will be stored. This matrix will be resized to [M K], where M is the number of rows in the data matrix and K is the numPrincipalComponents.
Returns
returns true if the projection was successful, false otherwise

Definition at line 176 of file PrincipalComponentAnalysis.cpp.

bool PrincipalComponentAnalysis::project ( const VectorFloat data,
VectorFloat prjData 
)

Projects the input data vector onto the principal subspace. The new projected data will be stored in the prjData vector. The computeFeatureVector function should have been called at least once before this function is called. The size of the data vector must match the numInputDimensions parameter. The function will return true if the projection was successful, false otherwise.

Parameters
dataThe data that should be projected onto the principal subspace. This should be an N-dimensional vector, where N must equal the numInputDimensions value.
prjDataA vector into which the projected data will be stored. This vector will be resized to K, where K is the numPrincipalComponents.
Returns
returns true if the projection was successful, false otherwise

Definition at line 215 of file PrincipalComponentAnalysis.cpp.

bool PrincipalComponentAnalysis::save ( std::fstream &  file) const
virtual

This saves the trained PCA model to a file.

Parameters
filea reference to the file the model will be saved to
Returns
returns true if the model was saved successfully, false otherwise

Reimplemented from MLBase.

Definition at line 252 of file PrincipalComponentAnalysis.cpp.


The documentation for this class was generated from the following files: