This class runs the Principal Component Analysis (PCA) algorithm, a dimensionality reduction algorithm that projects an [M N] matrix (where M==samples and N==dimensions) onto a new K dimensional subspace, where K is normally much less than N. More...

#include <PrincipalComponentAnalysis.h>

Inheritance diagram for PrincipalComponentAnalysis:

Public Member Functions
	PrincipalComponentAnalysis ()

virtual	~PrincipalComponentAnalysis ()

bool	computeFeatureVector (const MatrixFloat &data, Float maxVariance=0.95, bool normData=false)

bool	computeFeatureVector (const MatrixFloat &data, UINT numPrincipalComponents, bool normData=false)

bool	project (const MatrixFloat &data, MatrixFloat &prjData)

bool	project (const VectorFloat &data, VectorFloat &prjData)

virtual bool	save (std::fstream &file) const

virtual bool	load (std::fstream &file)

bool	getNormData () const

UINT	getNumInputDimensions () const

UINT	getNumPrincipalComponents () const

Float	getMaxVariance () const

VectorFloat	getMeanVector () const

VectorFloat	getStdDevVector () const

VectorFloat	getComponentWeights () const

VectorFloat	getEigenValues () const

virtual bool	print (std::string title="") const

MatrixFloat	getEigenVectors () const

bool	setModel (const VectorFloat &mean, const MatrixFloat &eigenvectors)

Public Member Functions inherited from MLBase
	MLBase (const std::string &id="", const BaseType type=BASE_TYPE_NOT_SET)

virtual	~MLBase (void)

bool	copyMLBaseVariables (const MLBase *mlBase)

virtual bool	train (ClassificationData trainingData)

virtual bool	train_ (ClassificationData &trainingData)

virtual bool	train (RegressionData trainingData)

virtual bool	train_ (RegressionData &trainingData)

virtual bool	train (RegressionData trainingData, RegressionData validationData)

virtual bool	train_ (RegressionData &trainingData, RegressionData &validationData)

virtual bool	train (TimeSeriesClassificationData trainingData)

virtual bool	train_ (TimeSeriesClassificationData &trainingData)

virtual bool	train (ClassificationDataStream trainingData)

virtual bool	train_ (ClassificationDataStream &trainingData)

virtual bool	train (UnlabelledData trainingData)

virtual bool	train_ (UnlabelledData &trainingData)

virtual bool	train (MatrixFloat data)

virtual bool	train_ (MatrixFloat &data)

virtual bool	predict (VectorFloat inputVector)

virtual bool	predict_ (VectorFloat &inputVector)

virtual bool	predict (MatrixFloat inputMatrix)

virtual bool	predict_ (MatrixFloat &inputMatrix)

virtual bool	map (VectorFloat inputVector)

virtual bool	map_ (VectorFloat &inputVector)

virtual bool	reset ()

virtual bool	clear ()

virtual bool	print () const

virtual bool	save (const std::string &filename) const

virtual bool	load (const std::string &filename)

	GRT_DEPRECATED_MSG ("saveModelToFile(std::string filename) is deprecated, use save(const std::string &filename) instead", virtual bool saveModelToFile(const std::string &filename) const )

	GRT_DEPRECATED_MSG ("saveModelToFile(std::fstream &file) is deprecated, use save(std::fstream &file) instead", virtual bool saveModelToFile(std::fstream &file) const )

	GRT_DEPRECATED_MSG ("loadModelFromFile(std::string filename) is deprecated, use load(const std::string &filename) instead", virtual bool loadModelFromFile(const std::string &filename))

	GRT_DEPRECATED_MSG ("loadModelFromFile(std::fstream &file) is deprecated, use load(std::fstream &file) instead", virtual bool loadModelFromFile(std::fstream &file))

virtual bool	getModel (std::ostream &stream) const

virtual std::string	getModelAsString () const

DataType	getInputType () const

DataType	getOutputType () const

BaseType	getType () const

UINT	getNumInputFeatures () const

UINT	getNumInputDimensions () const

UINT	getNumOutputDimensions () const

UINT	getMinNumEpochs () const

UINT	getMaxNumEpochs () const

UINT	getBatchSize () const

UINT	getNumRestarts () const

UINT	getValidationSetSize () const

UINT	getNumTrainingIterationsToConverge () const

Float	getMinChange () const

Float	getLearningRate () const

Float	getRMSTrainingError () const

	GRT_DEPRECATED_MSG ("getRootMeanSquaredTrainingError() is deprecated, use getRMSTrainingError() instead", Float getRootMeanSquaredTrainingError() const )

Float	getTotalSquaredTrainingError () const

Float	getRMSValidationError () const

Float	getValidationSetAccuracy () const

VectorFloat	getValidationSetPrecision () const

VectorFloat	getValidationSetRecall () const

bool	getUseValidationSet () const

bool	getRandomiseTrainingOrder () const

bool	getTrained () const

	GRT_DEPRECATED_MSG ("getModelTrained() is deprecated, use getTrained() instead", bool getModelTrained() const )

bool	getConverged () const

bool	getScalingEnabled () const

bool	getIsBaseTypeClassifier () const

bool	getIsBaseTypeRegressifier () const

bool	getIsBaseTypeClusterer () const

bool	getTrainingLoggingEnabled () const

bool	getTestingLoggingEnabled () const

bool	enableScaling (const bool useScaling)

bool	setMaxNumEpochs (const UINT maxNumEpochs)

bool	setBatchSize (const UINT batchSize)

bool	setMinNumEpochs (const UINT minNumEpochs)

bool	setNumRestarts (const UINT numRestarts)

bool	setMinChange (const Float minChange)

bool	setLearningRate (const Float learningRate)

bool	setUseValidationSet (const bool useValidationSet)

bool	setValidationSetSize (const UINT validationSetSize)

bool	setRandomiseTrainingOrder (const bool randomiseTrainingOrder)

bool	setTrainingLoggingEnabled (const bool loggingEnabled)

bool	setTestingLoggingEnabled (const bool loggingEnabled)

bool	registerTrainingResultsObserver (Observer< TrainingResult > &observer)

bool	registerTestResultsObserver (Observer< TestInstanceResult > &observer)

bool	removeTrainingResultsObserver (const Observer< TrainingResult > &observer)

bool	removeTestResultsObserver (const Observer< TestInstanceResult > &observer)

bool	removeAllTrainingObservers ()

bool	removeAllTestObservers ()

bool	notifyTrainingResultsObservers (const TrainingResult &data)

bool	notifyTestResultsObservers (const TestInstanceResult &data)

MLBase *	getMLBasePointer ()

const MLBase *	getMLBasePointer () const

Vector< TrainingResult >	getTrainingResults () const

Public Member Functions inherited from GRTBase
	GRTBase (const std::string &id="")

virtual	~GRTBase (void)

bool	copyGRTBaseVariables (const GRTBase *GRTBase)

	GRT_DEPRECATED_MSG ("getClassType is deprecated, use getId() instead!", std::string getClassType() const )

std::string	getId () const

std::string	getLastWarningMessage () const

std::string	getLastErrorMessage () const

std::string	getLastInfoMessage () const

bool	setInfoLoggingEnabled (const bool loggingEnabled)

bool	setWarningLoggingEnabled (const bool loggingEnabled)

bool	setErrorLoggingEnabled (const bool loggingEnabled)

bool	setDebugLoggingEnabled (const bool loggingEnabled)

GRTBase *	getGRTBasePointer ()

const GRTBase *	getGRTBasePointer () const

Float	scale (const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)

Float	SQR (const Float &x) const

Public Member Functions inherited from Observer< TrainingResult >
virtual void	notify (const TrainingResult &data)

Public Member Functions inherited from Observer< TestInstanceResult >
virtual void	notify (const TestInstanceResult &data)

Protected Types
enum	AnalysisMode { MAX_VARIANCE =0, MAX_NUM_PCS }

Protected Member Functions
bool	computeFeatureVector_ (const MatrixFloat &data, UINT analysisMode)

Protected Member Functions inherited from MLBase
bool	saveBaseSettingsToFile (std::fstream &file) const

bool	loadBaseSettingsFromFile (std::fstream &file)

Protected Attributes
bool	normData

UINT	numPrincipalComponents

Float	maxVariance

VectorFloat	mean

VectorFloat	stdDev

VectorFloat	componentWeights

VectorFloat	eigenvalues

Vector< IndexedDouble >	sortedEigenvalues

MatrixFloat	eigenvectors

Protected Attributes inherited from MLBase
bool	trained

bool	useScaling

bool	converged

DataType	inputType

DataType	outputType

BaseType	baseType

UINT	numInputDimensions

UINT	numOutputDimensions

UINT	numTrainingIterationsToConverge

UINT	minNumEpochs

UINT	maxNumEpochs

UINT	batchSize

UINT	validationSetSize

UINT	numRestarts

Float	learningRate

Float	minChange

Float	rmsTrainingError

Float	rmsValidationError

Float	totalSquaredTrainingError

Float	validationSetAccuracy

bool	useValidationSet

bool	randomiseTrainingOrder

VectorFloat	validationSetPrecision

VectorFloat	validationSetRecall

Random	random

Vector< TrainingResult >	trainingResults

TrainingResultsObserverManager	trainingResultsObserverManager

TestResultsObserverManager	testResultsObserverManager

TrainingLog	trainingLog

TestingLog	testingLog

Protected Attributes inherited from GRTBase
std::string	classId
	Stores the name of the class (e.g., MinDist)

DebugLog	debugLog

ErrorLog	errorLog

InfoLog	infoLog

WarningLog	warningLog

Additional Inherited Members
Public Types inherited from MLBase
enum	BaseType { BASE_TYPE_NOT_SET =0, CLASSIFIER, REGRESSIFIER, CLUSTERER, PRE_PROCSSING, POST_PROCESSING, FEATURE_EXTRACTION, CONTEXT }

Static Public Member Functions inherited from GRTBase
static std::string	getGRTVersion (bool returnRevision=true)

static std::string	getGRTRevison ()

Detailed Description

This class runs the Principal Component Analysis (PCA) algorithm, a dimensionality reduction algorithm that projects an [M N] matrix (where M==samples and N==dimensions) onto a new K dimensional subspace, where K is normally much less than N.

GRT MIT License Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. This projection or transformation is defined in such a way that the first principal component has the largest possible variance (that is, accounts for as much of the variability in the data as possible), and each succeeding component has the highest variance possible under the constraint that it be orthogonal to (i.e., uncorrelated with) the preceding components. Principal components are guaranteed to be independent only if the data set is jointly normally distributed. PCA is sensitive to the relative scaling of the original variables.

The PCA algorithm will automatically mean subtract the input data, and also normalize the data if required. To use this algorithm, the user should first run the computeFeatureVector(...) function to build the PCA feature vector and then run the project(...) function to project new data onto the new principal subspace.

Remarks: This implementation is based on Bishop, Christopher M. Pattern recognition and machine learning. Vol. 1. New York: springer, 2006.

Definition at line 52 of file PrincipalComponentAnalysis.h.

Constructor & Destructor Documentation

GRT_BEGIN_NAMESPACE PrincipalComponentAnalysis::PrincipalComponentAnalysis ( )

Default constructor.

Definition at line 31 of file PrincipalComponentAnalysis.cpp.

PrincipalComponentAnalysis::~PrincipalComponentAnalysis ( )

virtual

Default destructor.

Definition at line 40 of file PrincipalComponentAnalysis.cpp.

Member Function Documentation

bool PrincipalComponentAnalysis::computeFeatureVector	(	const MatrixFloat &	data,
		Float	maxVariance = `0.95`,
		bool	normData = `false`
	)

Runs the principal component analysis algorithm on the input data and builds the resulting feature vector so new data can be projected onto the principal subspace (using the project function). The number of principal components is automatically computed by selecting the minimum number of components that reach the maxVariance value. This should be a value between [0 1], the default value of 0.95 represents 95% of the variance in the original dataset.

Parameters

data	a matrix containing the data from which the principal components will be computed. This should be an [M N] matrix, where M==samples and N==dimensions.
maxVariance	sets the variance that should represented by the top K principal components. This should be a value between [0 1]. Default value=0.95
normData	sets if the data will be z-normalized before running the PCA algorithm. Default value=false

Returns: returns true if the principal components of the input matrix could be computed, false otherwise value

bool PrincipalComponentAnalysis::computeFeatureVector	(	const MatrixFloat &	data,
		UINT	numPrincipalComponents,
		bool	normData = `false`
	)

Runs the principal component analysis algorithm on the input data and builds the resulting feature vector so new data can be projected onto the principal subspace (using the project function). The number of principal components should be set be the user and must be less than or equal to the number of dimensions in the input data.

Parameters

data	a matrix containing the data from which the principal components will be computed. This should be an [M N] matrix, where M==samples and N==dimensions
numPrincipalComponents	sets the number of principal components. This must be a value be less than or equal to the number of dimensions in the input data
normData	sets if the data will be z-normalized before running the PCA algorithm. Default value=false

Returns: returns true if the principal components of the input matrix could be computed, false otherwise value

Definition at line 51 of file PrincipalComponentAnalysis.cpp.

VectorFloat PrincipalComponentAnalysis::getComponentWeights ( ) const

inline

Returns the weights for each principal component, these weights sum to 1.

Returns: returns a vector of the weights for each principal component, these weights sum to 1

Definition at line 173 of file PrincipalComponentAnalysis.h.

VectorFloat PrincipalComponentAnalysis::getEigenValues ( ) const

inline

Returns the raw eigen values (these are not sorted).

Returns: returns a vector of the raw eigen values

Definition at line 179 of file PrincipalComponentAnalysis.h.

MatrixFloat PrincipalComponentAnalysis::getEigenVectors ( ) const

Returns a matrix containing the eigen vectors.

Returns: returns a matrix containing the raw eigen vectors

Definition at line 452 of file PrincipalComponentAnalysis.cpp.

Float PrincipalComponentAnalysis::getMaxVariance ( ) const

inline

Returns the maxVariance parameter, set by the user when the computeFeatureVector was called. returns the maxVariance parameter, set by the user when the computeFeatureVector was called

Definition at line 152 of file PrincipalComponentAnalysis.h.

VectorFloat PrincipalComponentAnalysis::getMeanVector ( ) const

inline

Returns the mean shift vector, computed during the computeFeatureVector function. New data will be subtracted by this value before it is projected onto the principal subspace.

Returns: returns the mean shift vector, computed during the computeFeatureVector function

Definition at line 159 of file PrincipalComponentAnalysis.h.

bool PrincipalComponentAnalysis::getNormData ( ) const

inline

Returns true if z-normalization is being applied to new data.

Returns: returns true if the normData is true, false otherwise

Definition at line 134 of file PrincipalComponentAnalysis.h.

UINT PrincipalComponentAnalysis::getNumInputDimensions ( ) const

inline

Returns the number of input dimensions in the original input data.

Returns: returns the numInputDimensions parameter.

Definition at line 140 of file PrincipalComponentAnalysis.h.

UINT PrincipalComponentAnalysis::getNumPrincipalComponents ( ) const

inline

Returns the number of principal components that was required to reach the maxVariance parameter.

Returns: returns the number of principal components that was required to reach the maxVariance parameter

Definition at line 146 of file PrincipalComponentAnalysis.h.

VectorFloat PrincipalComponentAnalysis::getStdDevVector ( ) const

inline

Returns the standard deviation vector that is used to normalize new data, this is computed during the computeFeatureVector function. This is only used id the normData parameter is true. If true, new data will be z-normalized by this value before it is projected onto the principal subspace.

Returns: returns the stdDev vector, computed during the computeFeatureVector function

Definition at line 167 of file PrincipalComponentAnalysis.h.

bool PrincipalComponentAnalysis::load ( std::fstream & file )

virtual

This loads a trained PCA model from a file.

Parameters

file	a reference to the file the model will be loaded from

Returns: returns true if the model was loaded successfully, false otherwise

Reimplemented from MLBase.

Definition at line 310 of file PrincipalComponentAnalysis.cpp.

bool PrincipalComponentAnalysis::print ( std::string title = "" ) const

virtual

A helper function that prints the PCA info. If the user sets the title string, then this will be written in addition with the PCA data.

Definition at line 427 of file PrincipalComponentAnalysis.cpp.

bool PrincipalComponentAnalysis::project	(	const MatrixFloat &	data,
		MatrixFloat &	prjData
	)

Projects the input data matrix onto the principal subspace. The new projected data will be stored in the prjData matrix. The computeFeatureVector function should have been called at least once before this function is called. The number of the columns in the data matrix must match the numInputDimensions parameter. The function will return true if the projection was successful, false otherwise.

Parameters

data	The data that should be projected onto the principal subspace. This should be an [M N] matrix, where N must equal the numInputDimensions value (there are no restrictions on M).
prjData	A matrix into which the projected data will be stored. This matrix will be resized to [M K], where M is the number of rows in the data matrix and K is the numPrincipalComponents.

Returns: returns true if the projection was successful, false otherwise

Definition at line 176 of file PrincipalComponentAnalysis.cpp.

bool PrincipalComponentAnalysis::project	(	const VectorFloat &	data,
		VectorFloat &	prjData
	)

Projects the input data vector onto the principal subspace. The new projected data will be stored in the prjData vector. The computeFeatureVector function should have been called at least once before this function is called. The size of the data vector must match the numInputDimensions parameter. The function will return true if the projection was successful, false otherwise.

Parameters

data	The data that should be projected onto the principal subspace. This should be an N-dimensional vector, where N must equal the numInputDimensions value.
prjData	A vector into which the projected data will be stored. This vector will be resized to K, where K is the numPrincipalComponents.

Returns: returns true if the projection was successful, false otherwise

Definition at line 215 of file PrincipalComponentAnalysis.cpp.

bool PrincipalComponentAnalysis::save ( std::fstream & file ) const

virtual

This saves the trained PCA model to a file.

Parameters

file	a reference to the file the model will be saved to

Returns: returns true if the model was saved successfully, false otherwise

Reimplemented from MLBase.

Definition at line 252 of file PrincipalComponentAnalysis.cpp.

The documentation for this class was generated from the following files:

Public Member Functions

Protected Types

Protected Member Functions

Protected Attributes

Additional Inherited Members

Detailed Description

Constructor & Destructor Documentation

Member Function Documentation