GestureRecognitionToolkit  Version: 0.2.5
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
ClassificationData Class Reference
Inheritance diagram for ClassificationData:
GRTBase

Public Member Functions

 ClassificationData (UINT numDimensions=0, std::string datasetName="NOT_SET", std::string infoText="")
 
 ClassificationData (const ClassificationData &rhs)
 
virtual ~ClassificationData ()
 
ClassificationDataoperator= (const ClassificationData &rhs)
 
ClassificationSampleoperator[] (const UINT &i)
 
const ClassificationSampleoperator[] (const UINT &i) const
 
void clear ()
 
bool setNumDimensions (UINT numDimensions)
 
bool setDatasetName (std::string datasetName)
 
bool setInfoText (std::string infoText)
 
bool setClassNameForCorrespondingClassLabel (const std::string className, const UINT classLabel)
 
bool setAllowNullGestureClass (const bool allowNullGestureClass)
 
bool addSample (const UINT classLabel, const VectorFloat &sample)
 
bool removeSample (const UINT index)
 
bool removeLastSample ()
 
bool reserve (const UINT M)
 
bool addClass (const UINT classLabel, const std::string className="NOT_SET")
 
UINT removeClass (const UINT classLabel)
 
UINT eraseAllSamplesWithClassLabel (const UINT classLabel)
 
bool relabelAllSamplesWithClassLabel (const UINT oldClassLabel, const UINT newClassLabel)
 
bool setExternalRanges (const Vector< MinMax > &externalRanges, const bool useExternalRanges=false)
 
bool enableExternalRangeScaling (const bool useExternalRanges)
 
bool scale (const Float minTarget, const Float maxTarget)
 
bool scale (const Vector< MinMax > &ranges, const Float minTarget, const Float maxTarget)
 
bool save (const std::string &filename) const
 
bool load (const std::string &filename)
 
bool saveDatasetToFile (const std::string &filename) const
 
bool loadDatasetFromFile (const std::string &filename)
 
bool saveDatasetToCSVFile (const std::string &filename) const
 
bool loadDatasetFromCSVFile (const std::string &filename, const UINT classLabelColumnIndex=0)
 
bool printStats () const
 
bool sortClassLabels ()
 
bool merge (const ClassificationData &data)
 
 GRT_DEPRECATED_MSG ("partition(...) is deprecated, use split(...) instead", ClassificationData partition(const UINT partitionPercentage, const bool useStratifiedSampling=false))
 
ClassificationData split (const UINT splitPercentage, const bool useStratifiedSampling=false)
 
bool spiltDataIntoKFolds (const UINT K, const bool useStratifiedSampling=false)
 
ClassificationData getTrainingFoldData (const UINT foldIndex) const
 
ClassificationData getTestFoldData (const UINT foldIndex) const
 
ClassificationData getClassData (const UINT classLabel) const
 
ClassificationData getBootstrappedDataset (const UINT numSamples=0, const bool balanceDataset=false) const
 
RegressionData reformatAsRegressionData () const
 
UnlabelledData reformatAsUnlabelledData () const
 
std::string getDatasetName () const
 
std::string getInfoText () const
 
std::string getStatsAsString () const
 
UINT getNumDimensions () const
 
UINT getNumSamples () const
 
UINT getNumClasses () const
 
UINT getMinimumClassLabel () const
 
UINT getMaximumClassLabel () const
 
UINT getClassLabelIndexValue (const UINT classLabel) const
 
std::string getClassNameForCorrespondingClassLabel (const UINT classLabel) const
 
Vector< MinMaxgetRanges () const
 
Vector< UINT > getClassLabels () const
 
Vector< UINT > getNumSamplesPerClass () const
 
Vector< ClassTrackergetClassTracker () const
 
MatrixFloat getClassHistogramData (const UINT classLabel, const UINT numBins) const
 
Vector< MatrixFloatgetHistogramData (const UINT numBins) const
 
Vector< ClassificationSamplegetClassificationData () const
 
VectorFloat getClassProbabilities () const
 
VectorFloat getClassProbabilities (const Vector< UINT > &classLabels) const
 
VectorFloat getMean () const
 
VectorFloat getStdDev () const
 
MatrixFloat getClassMean () const
 
MatrixFloat getClassStdDev () const
 
MatrixFloat getCovarianceMatrix () const
 
Vector< UINT > getClassDataIndexes (const UINT classLabel) const
 
MatrixDouble getDataAsMatrixDouble () const
 
MatrixFloat getDataAsMatrixFloat () const
 
- Public Member Functions inherited from GRTBase
 GRTBase (const std::string &id="")
 
virtual ~GRTBase (void)
 
bool copyGRTBaseVariables (const GRTBase *GRTBase)
 
 GRT_DEPRECATED_MSG ("getClassType is deprecated, use getId() instead!", std::string getClassType() const )
 
std::string getId () const
 
std::string getLastWarningMessage () const
 
std::string getLastErrorMessage () const
 
std::string getLastInfoMessage () const
 
bool setInfoLoggingEnabled (const bool loggingEnabled)
 
bool setWarningLoggingEnabled (const bool loggingEnabled)
 
bool setErrorLoggingEnabled (const bool loggingEnabled)
 
bool setDebugLoggingEnabled (const bool loggingEnabled)
 
GRTBasegetGRTBasePointer ()
 
const GRTBasegetGRTBasePointer () const
 
Float scale (const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
 
Float SQR (const Float &x) const
 

Static Public Member Functions

static bool generateGaussDataset (const std::string filename, const UINT numSamples=10000, const UINT numClasses=10, const UINT numDimensions=3, const Float range=10, const Float sigma=1)
 
static ClassificationData generateGaussDataset (const UINT numSamples=10000, const UINT numClasses=10, const UINT numDimensions=3, const Float range=10, const Float sigma=1)
 
static ClassificationData generateGaussLinearDataset (const UINT numSamples=10000, const UINT numClasses=10, const UINT numDimensions=3, const Float range=10, const Float sigma=1)
 
- Static Public Member Functions inherited from GRTBase
static std::string getGRTVersion (bool returnRevision=true)
 
static std::string getGRTRevison ()
 

Additional Inherited Members

- Protected Attributes inherited from GRTBase
std::string classId
 Stores the name of the class (e.g., MinDist)
 
DebugLog debugLog
 
ErrorLog errorLog
 
InfoLog infoLog
 
WarningLog warningLog
 

Detailed Description

Constructor & Destructor Documentation

GRT_BEGIN_NAMESPACE ClassificationData::ClassificationData ( UINT  numDimensions = 0,
std::string  datasetName = "NOT_SET",
std::string  infoText = "" 
)

Constructor, sets the name of the dataset and the number of dimensions of the training data. The name of the dataset should not contain any spaces.

Parameters
numDimensionsthe number of dimensions of the training data, should be an unsigned integer greater than 0
datasetNamethe name of the dataset, should not contain any spaces
infoTextsome info about the data in this dataset, this can contain spaces

Definition at line 26 of file ClassificationData.cpp.

ClassificationData::ClassificationData ( const ClassificationData rhs)

Copy Constructor, copies the ClassificationData from the rhs instance to this instance

Parameters
rhsanother instance of the ClassificationData class from which the data will be copied to this instance

Definition at line 41 of file ClassificationData.cpp.

ClassificationData::~ClassificationData ( )
virtual

Default Destructor

Definition at line 45 of file ClassificationData.cpp.

Member Function Documentation

bool ClassificationData::addClass ( const UINT  classLabel,
const std::string  className = "NOT_SET" 
)

This function adds the class with the classLabel to the class tracker. If the class tracker already contains the classLabel then the function will return false.

Parameters
classLabelthe class label you want to add to the classTracker
classNamethe name associated with the new class
Returns
returns true if the classLabel was added, false otherwise

Definition at line 241 of file ClassificationData.cpp.

bool ClassificationData::addSample ( const UINT  classLabel,
const VectorFloat sample 
)

Adds a new labelled sample to the dataset. The dimensionality of the sample should match the number of dimensions in the ClassificationData. The class label should be greater than zero (as zero is used as the default null rejection class label).

Parameters
classLabelthe class label of the corresponding sample
samplethe new sample you want to add to the dataset. The dimensionality of this sample should match the number of dimensions in the ClassificationData
Returns
true if the sample was correctly added to the dataset, false otherwise

Definition at line 133 of file ClassificationData.cpp.

void ClassificationData::clear ( )

Clears any previous training data and counters

Definition at line 70 of file ClassificationData.cpp.

bool ClassificationData::enableExternalRangeScaling ( const bool  useExternalRanges)

Sets if the dataset should be scaled using an external range (if useExternalRanges == true) or the ranges of the dataset (if false). The external ranges need to be set FIRST before calling this function, otherwise it will return false.

Parameters
useExternalRangessets if these ranges should be used to scale the dataset
Returns
returns true if the useExternalRanges variable was set, false otherwise

Definition at line 352 of file ClassificationData.cpp.

UINT ClassificationData::eraseAllSamplesWithClassLabel ( const UINT  classLabel)
Deprecated:
This function is now depreciated! You should use removeClass(const UINT classLabel) instead.

Deletes from the dataset all the samples with a specific class label.

Parameters
classLabelthe class label of the samples you wish to delete from the dataset
Returns
the number of samples deleted from the dataset

Definition at line 237 of file ClassificationData.cpp.

bool ClassificationData::generateGaussDataset ( const std::string  filename,
const UINT  numSamples = 10000,
const UINT  numClasses = 10,
const UINT  numDimensions = 3,
const Float  range = 10,
const Float  sigma = 1 
)
static

Generates a labeled dataset that can be used for basic training/testing/validation for ClassificationData, saving the dataset to the file specified by filename.

Samples in the dataset will be generated based on K randomly select models, with Gaussian noise. K is set by the numClasses argument.

The Gaussian clusters are selected at random, therefore the returned dataset may or may not be linearly seperable, depending on the random clusters.

The range of each dimension will be [-range range]. Sigma controls the amount of Gaussian noise added.

Parameters
filenamethe name of the file the dataset will be saved to
numSamplesthe total number of samples in the dataset
numClassesthe number of classes in the dataset
numDimensionsthe number of dimensions in the dataset
rangethe range the data will be sampled from, range will be [-range range] for each dimension
sigmathe amount of Gaussian noise
Returns
returns true if the dataset was created successfully, false otherwise

Definition at line 1511 of file ClassificationData.cpp.

ClassificationData ClassificationData::generateGaussDataset ( const UINT  numSamples = 10000,
const UINT  numClasses = 10,
const UINT  numDimensions = 3,
const Float  range = 10,
const Float  sigma = 1 
)
static

Generates a labeled dataset that can be used for basic training/testing/validation for ClassificationData and returns it directly.

Samples in the dataset will be generated based on K randomly select models, with Gaussian noise. K is set by the numClasses argument.

The Gaussian clusters are selected at random, therefore the returned dataset may or may not be linearly separable, depending on the random clusters.

The range of each dimension will be [-range range]. Sigma controls the amount of Gaussian noise added.

Parameters
numSamplesthe total number of samples in the dataset
numClassesthe number of classes in the dataset
numDimensionsthe number of dimensions in the dataset
rangethe range the data will be sampled from, range will be [-range range] for each dimension
sigmathe amount of Gaussian noise
Returns
returns the new dataset

Definition at line 1520 of file ClassificationData.cpp.

ClassificationData ClassificationData::generateGaussLinearDataset ( const UINT  numSamples = 10000,
const UINT  numClasses = 10,
const UINT  numDimensions = 3,
const Float  range = 10,
const Float  sigma = 1 
)
static

Generates a labeled dataset that can be used for basic training/testing/validation for ClassificationData and returns it directly.

Samples in the dataset will be generated based on K randomly select models, with Gaussian noise. K is set by the numClasses argument.

The range of each dimension will be [-range range]. Sigma controls the amount of Gaussian noise added.

The Gaussian clusters are encouraged to be linearly separable by setting the centroids of each class on a regularly spaced grid. If there are too many classes or the sigma noise of each class is too high then the resulting data may NOT be linearly separable.

Parameters
numSamplesthe total number of samples in the dataset
numClassesthe number of classes in the dataset
numDimensionsthe number of dimensions in the dataset
rangethe range the data will be sampled from, range will be [-range range] for each dimension
sigmathe amount of Gaussian noise
Returns
returns the new dataset

Definition at line 1559 of file ClassificationData.cpp.

ClassificationData ClassificationData::getBootstrappedDataset ( const UINT  numSamples = 0,
const bool  balanceDataset = false 
) const

Gets a bootstrapped dataset from the current dataset. If the numSamples parameter is set to zero, then the size of the bootstrapped dataset will match the size of the current dataset, otherwise the size of the bootstrapped dataset will match the numSamples parameter.

Parameters
numSamplesthe size of the bootstrapped dataset
balanceDatasetif true will use stratified sampling to balance the dataset returned, otherwise will use random sampling
Returns
returns a bootstrapped ClassificationData

Definition at line 1048 of file ClassificationData.cpp.

ClassificationData ClassificationData::getClassData ( const UINT  classLabel) const

Returns the all the data with the class label set by classLabel. The classLabel should be a valid classLabel, otherwise the dataset returned will be empty.

Parameters
classLabelthe class label of the class you want the data for
Returns
returns a dataset containing all the data with the matching classLabel

Definition at line 1025 of file ClassificationData.cpp.

Vector< UINT > ClassificationData::getClassDataIndexes ( const UINT  classLabel) const

Gets the indexes for all the samples in the current dataset belonging to the classLabel.

Parameters
classLabelthe classLabel of the class you want the indexes for
Returns
a Vector< UINT > containing the indexes for all the samples in the current dataset belonging to the classLabel

Definition at line 1457 of file ClassificationData.cpp.

MatrixFloat ClassificationData::getClassHistogramData ( const UINT  classLabel,
const UINT  numBins 
) const

Computes a histogram for a specific class.

Parameters
classLabelthe class label of the class you want to compute the histogram data for
numBinsthe number of bins in the histogram
Returns
a MatrixFloat of histogram data where each row represents a dimension and each column represents a histogram bin

Definition at line 1305 of file ClassificationData.cpp.

Vector< ClassificationSample > ClassificationData::getClassificationData ( ) const
inline

Gets the classification data.

Returns
a Vector of LabelledClassificationSamples

Definition at line 553 of file ClassificationData.h.

UINT ClassificationData::getClassLabelIndexValue ( const UINT  classLabel) const

Gets the index of the class label from the class tracker.

Parameters
classLabelthe class label you want to access the index for
Returns
an unsigned int representing the index of the class label in the class tracker

Definition at line 1185 of file ClassificationData.cpp.

Vector< UINT > ClassificationData::getClassLabels ( ) const

Gets the class label associated with class[i].

Returns
returns a Vector of UINTs, where each element represents a class label.

Definition at line 1252 of file ClassificationData.cpp.

MatrixFloat ClassificationData::getClassMean ( ) const

Gets the mean values for each class in the dataset. This is returned in an [K N] matrix, where K is the number of classes in the dataset and N is the number of dimensions in the dataset.

Returns
a MatrixFloat with the mean values for each class in the dataset

Definition at line 1351 of file ClassificationData.cpp.

std::string ClassificationData::getClassNameForCorrespondingClassLabel ( const UINT  classLabel) const

Gets the name of the class with a given class label. If the class label does not exist then the string "CLASS_LABEL_NOT_FOUND" will be returned.

Parameters
classLabelthe class label you want to access the name for
Returns
a string containing the name of the given class label or the string "CLASS_LABEL_NOT_FOUND" if the class label does not exist

Definition at line 1195 of file ClassificationData.cpp.

MatrixFloat ClassificationData::getClassStdDev ( ) const

Gets the standard deviation values for each class in the dataset. This is returned in an [K N] matrix, where K is the number of classes in the dataset and N is the number of dimensions in the dataset.

Returns
a MatrixFloat with the standard deviation values for each class in the dataset

Definition at line 1375 of file ClassificationData.cpp.

Vector< ClassTracker > ClassificationData::getClassTracker ( ) const
inline

Gets the class tracker for each class in the dataset.

Returns
a Vector of ClassTracker, one for each class in the dataset

Definition at line 528 of file ClassificationData.h.

MatrixFloat ClassificationData::getCovarianceMatrix ( ) const

Gets the covariance matrix across all the classes in the dataset. This is returned in an [N N] matrix, where N is the number of dimensions in the dataset.

Returns
a MatrixFloat with the covariance values for the dataset

Definition at line 1400 of file ClassificationData.cpp.

MatrixDouble ClassificationData::getDataAsMatrixDouble ( ) const

Gets the data as a MatrixDouble. This returns just the data, not the labels. This will be an M by N MatrixDouble, where M is the number of samples and N is the number of dimensions.

Returns
a MatrixDouble containing the data from the current dataset.

Definition at line 1482 of file ClassificationData.cpp.

MatrixFloat ClassificationData::getDataAsMatrixFloat ( ) const

Gets the data as a MatrixFloat. This returns just the data, not the labels. This will be an M by N MatrixFloat, where M is the number of samples and N is the number of dimensions.

Returns
a MatrixFloat containing the data from the current dataset.

Definition at line 1497 of file ClassificationData.cpp.

std::string ClassificationData::getDatasetName ( ) const
inline

Gets the name of the dataset.

Returns
returns the name of the dataset

Definition at line 435 of file ClassificationData.h.

Vector< MatrixFloat > ClassificationData::getHistogramData ( const UINT  numBins) const

Computes a histogram for each class in the dataset.

Parameters
numBinsthe number of bins in the histogram
Returns
a Vector of MatrixFloat, each element represents a class and is a MatrixFloat of histogram data where each row represents a dimension and each column represents a histogram bin

Definition at line 1417 of file ClassificationData.cpp.

std::string ClassificationData::getInfoText ( ) const
inline

Gets the infotext for the dataset

Returns
returns the infotext of the dataset

Definition at line 442 of file ClassificationData.h.

UINT ClassificationData::getMaximumClassLabel ( ) const

Gets the maximum class label in the dataset. If there are no values in the dataset then the value 0 will be returned.

Returns
an unsigned int representing the maximum class label in the dataset

Definition at line 1173 of file ClassificationData.cpp.

VectorFloat ClassificationData::getMean ( ) const

Gets the mean values across all classes in the dataset.

Returns
a Vector containing the mean values across the entire dataset.

Definition at line 1276 of file ClassificationData.cpp.

UINT ClassificationData::getMinimumClassLabel ( ) const

Gets the minimum class label in the dataset. If there are no values in the dataset then the value 99999 will be returned.

Returns
an unsigned int representing the minimum class label in the dataset

Definition at line 1160 of file ClassificationData.cpp.

UINT ClassificationData::getNumClasses ( ) const
inline

Gets the number of classes.

Returns
an unsigned int representing the number of classes

Definition at line 470 of file ClassificationData.h.

UINT ClassificationData::getNumDimensions ( ) const
inline

Gets the number of dimensions of the labelled classification data.

Returns
an unsigned int representing the number of dimensions in the classification data

Definition at line 456 of file ClassificationData.h.

Vector< UINT > ClassificationData::getNumSamplesPerClass ( ) const

Gets the number of samples in each class.

Returns
returns a Vector of UINTs, where each element represents the number of samples in that class.

Definition at line 1264 of file ClassificationData.cpp.

Vector< MinMax > ClassificationData::getRanges ( ) const

Gets the ranges of the classification data.

Returns
a Vector of minimum and maximum values for each dimension of the data

Definition at line 1231 of file ClassificationData.cpp.

std::string ClassificationData::getStatsAsString ( ) const

Gets the stats of the dataset as a string

Returns
returns the stats of this dataset as a string

Definition at line 1206 of file ClassificationData.cpp.

VectorFloat ClassificationData::getStdDev ( ) const

Gets the standard deviation values across all classes in the dataset.

Returns
a Vector containing the standard deviation values across all classes in the dataset.

Definition at line 1290 of file ClassificationData.cpp.

ClassificationData ClassificationData::getTestFoldData ( const UINT  foldIndex) const

Returns the test dataset for the k-th fold for cross validation. The spiltDataIntoKFolds(UINT K) function should have been called once before using this function. The foldIndex should be in the range [0 K-1], where K is the number of folds the data was spilt into.

Parameters
foldIndexthe index of the fold you want the test data for, this should be in the range [0 K-1], where K is the number of folds the data was spilt into
Returns
returns a test dataset

Definition at line 994 of file ClassificationData.cpp.

ClassificationData ClassificationData::getTrainingFoldData ( const UINT  foldIndex) const

Returns the training dataset for the k-th fold for cross validation. The spiltDataIntoKFolds(UINT K) function should have been called once before using this function. The foldIndex should be in the range [0 K-1], where K is the number of folds the data was spilt into.

Parameters
foldIndexthe index of the fold you want the training data for, this should be in the range [0 K-1], where K is the number of folds the data was spilt into
Returns
returns a training dataset

Definition at line 958 of file ClassificationData.cpp.

ClassificationData::GRT_DEPRECATED_MSG ( "partition(...) is  deprecated,
use split(...) instead"  ,
ClassificationData   partitionconst UINT partitionPercentage, const bool useStratifiedSampling=false 
)
Deprecated:
use split(...) instead
Parameters
partitionPercentagesets the percentage of data which remains in this instance, the remaining percentage of data is then returned as the testing/validation dataset
useStratifiedSamplingsets if the dataset should be broken into homogeneous groups first before randomly being spilt, default value is false
Returns
a new ClassificationData instance, containing the remaining data not kept but this instance
bool ClassificationData::loadDatasetFromCSVFile ( const std::string &  filename,
const UINT  classLabelColumnIndex = 0 
)

Loads the labelled classification data from a CSV file. This assumes the data is formatted with each row representing a sample. The class label should be the first column followed by the sample data as the following N columns, where N is the number of dimensions in the data. If the class label is not the first column, you should set the 2nd argument (UINT classLabelColumnIndex) to the column index that contains the class label.

Parameters
filenamethe name of the file the data will be loaded from
classLabelColumnIndexthe index of the column containing the class label. Default value = 0
Returns
true if the data was loaded successfully, false otherwise

Definition at line 603 of file ClassificationData.cpp.

bool ClassificationData::loadDatasetFromFile ( const std::string &  filename)

Loads the labelled classification data from a custom file format.

Parameters
filenamethe name of the file the data will be loaded from
Returns
true if the data was loaded successfully, false otherwise

Definition at line 443 of file ClassificationData.cpp.

bool ClassificationData::merge ( const ClassificationData data)

Adds the data to the current instance of the ClassificationData. The number of dimensions in both datasets must match. The names of the classes from the data will be added to the current instance.

Parameters
datathe dataset to add to this dataset
Returns
returns true if the datasets were merged, false otherwise

Definition at line 819 of file ClassificationData.cpp.

ClassificationData & ClassificationData::operator= ( const ClassificationData rhs)

Sets the equals operator, copies the data from the rhs instance to this instance

Parameters
rhsanother instance of the ClassificationData class from which the data will be copied to this instance
Returns
a reference to this instance of ClassificationData

Definition at line 48 of file ClassificationData.cpp.

ClassificationSample& ClassificationData::operator[] ( const UINT &  i)
inline

Array Subscript Operator, returns the ClassificationSample at index i. It is up to the user to ensure that i is within the range of [0 totalNumSamples-1]

Parameters
ithe index of the training sample you want to access. Must be within the range of [0 totalNumSamples-1]
Returns
a reference to the i'th ClassificationSample

Definition at line 82 of file ClassificationData.h.

const ClassificationSample& ClassificationData::operator[] ( const UINT &  i) const
inline

Const Array Subscript Operator, returns the ClassificationSample at index i. It is up to the user to ensure that i is within the range of [0 totalNumSamples-1]

Parameters
ithe index of the training sample you want to access. Must be within the range of [0 totalNumSamples-1]
Returns
a const reference to the i'th ClassificationSample

Definition at line 93 of file ClassificationData.h.

bool ClassificationData::printStats ( ) const

Prints the dataset info (such as its name and infoText) and the stats (such as the number of examples, number of dimensions, number of classes, etc.) to the std out.

Returns
returns true if the dataset info and stats were printed successfully, false otherwise
Examples:
Tutorials/MachineLearning101/MachineLearning101.cpp.

Definition at line 693 of file ClassificationData.cpp.

RegressionData ClassificationData::reformatAsRegressionData ( ) const

Reformats the ClassificationData as RegressionData to enable regression algorithms like the MLP to be used as a classifier. This sets the number of targets in the regression data equal to the number of classes in the classification data. The output target ouput of each regression sample will therefore be all zeros, except for the index matching the class label which will be 1. For this to work, the labelled classification data cannot have any samples with a class label of 0!

Returns
a new RegressionData instance, containing the reformated classification data

Definition at line 1108 of file ClassificationData.cpp.

UnlabelledData ClassificationData::reformatAsUnlabelledData ( ) const

Reformats the ClassificationData as UnlabelledData so the data can be used to train unsupervised training algorithms such as K-Means Clustering and Gaussian Mixture Models.

Returns
a new UnlabelledData instance, containing the reformated classification data

Definition at line 1143 of file ClassificationData.cpp.

bool ClassificationData::relabelAllSamplesWithClassLabel ( const UINT  oldClassLabel,
const UINT  newClassLabel 
)

Relabels all the samples with the class label A with the new class label B.

Parameters
oldClassLabelthe class label of the samples you want to relabel
newClassLabelthe class label the samples will be relabelled with
Returns
returns true if the samples were correctly relablled, false otherwise

Definition at line 294 of file ClassificationData.cpp.

UINT ClassificationData::removeClass ( const UINT  classLabel)

Deletes from the dataset all the samples with a specific class label.

Parameters
classLabelthe class label of the samples you wish to delete from the dataset
Returns
the number of samples deleted from the dataset

Definition at line 260 of file ClassificationData.cpp.

bool ClassificationData::removeLastSample ( )

Removes the last training sample added to the dataset.

Returns
true if the last sample was removed, false otherwise

Definition at line 218 of file ClassificationData.cpp.

bool ClassificationData::removeSample ( const UINT  index)

Removes the training sample at the specific index from the dataset.

Parameters
indexthe index of the training sample that should be removed
Returns
true if the index is valid and the sample was removed, false otherwise

Definition at line 183 of file ClassificationData.cpp.

bool ClassificationData::reserve ( const UINT  M)

Reserves that the Vector capacity be at least enough to contain M elements.

If M is greater than the current Vector capacity, the function causes the container to reallocate its storage increasing its capacity to M (or greater).

Parameters
Mthe new memory size
Returns
true if the memory was reserved successfully, false otherwise

Definition at line 228 of file ClassificationData.cpp.

bool ClassificationData::save ( const std::string &  filename) const

Saves the classification data to a file. If the file format ends in '.csv' then the data will be saved as comma-seperated-values, otherwise it will be saved to a custom GRT file (which contains the csv data with an additional header).

Parameters
filenamethe name of the file the data will be saved to
Returns
true if the data was saved successfully, false otherwise

Definition at line 378 of file ClassificationData.cpp.

bool ClassificationData::saveDatasetToCSVFile ( const std::string &  filename) const

Saves the labelled classification data to a CSV file. This will save the class label as the first column and the sample data as the following N columns, where N is the number of dimensions in the data. Each row will represent a sample.

Parameters
filenamethe name of the file the data will be saved to
Returns
true if the data was saved successfully, false otherwise

Definition at line 580 of file ClassificationData.cpp.

bool ClassificationData::saveDatasetToFile ( const std::string &  filename) const

Saves the labelled classification data to a custom file format.

Parameters
filenamethe name of the file the data will be saved to
Returns
true if the data was saved successfully, false otherwise

Definition at line 400 of file ClassificationData.cpp.

bool ClassificationData::scale ( const Float  minTarget,
const Float  maxTarget 
)

Scales the dataset to the new target range.

Parameters
minTargetthe minimum range that the target data should be scaled to
maxTargetthe maximum range that the target data should be scaled to
Returns
true if the data was scaled correctly, false otherwise

Definition at line 360 of file ClassificationData.cpp.

bool ClassificationData::scale ( const Vector< MinMax > &  ranges,
const Float  minTarget,
const Float  maxTarget 
)

Scales the dataset to the new target range, using the Vector of ranges as the min and max source ranges.

Parameters
rangesa vector containing the new ranges
minTargetthe minimum range that the target data should be scaled to
maxTargetthe maximum range that the target data should be scaled to
Returns
true if the data was scaled correctly, false otherwise

Definition at line 365 of file ClassificationData.cpp.

bool ClassificationData::setAllowNullGestureClass ( const bool  allowNullGestureClass)

Sets if the user can add samples to the dataset with the label matching the GRT_DEFAULT_NULL_CLASS_LABEL. If the allowNullGestureClass is set to true, then the user can add labels matching the default null class label (which is normally 0). If the allowNullGestureClass is set to false, then the user will not be able to add samples that have a class label matching the default null class label.

Parameters
allowNullGestureClassflag that indicates if the null gesture class should be allowed
Returns
returns true if the allowNullGestureClass was set, false otherwise

Definition at line 128 of file ClassificationData.cpp.

bool ClassificationData::setClassNameForCorrespondingClassLabel ( const std::string  className,
const UINT  classLabel 
)

Sets the name of the class with the given class label. There should not be any spaces in the className. Will return true if the name is set, or false if the class label does not exist.

Parameters
classNamethe className for which the label should be updated
classLabelthe updated class label
Returns
returns true if the name is set, or false if the class label does not exist

Definition at line 115 of file ClassificationData.cpp.

bool ClassificationData::setDatasetName ( std::string  datasetName)

Sets the name of the dataset. There should not be any spaces in the name. Will return true if the name is set, or false otherwise.

Returns
returns true if the name is set, or false otherwise

Definition at line 98 of file ClassificationData.cpp.

bool ClassificationData::setExternalRanges ( const Vector< MinMax > &  externalRanges,
const bool  useExternalRanges = false 
)

Sets the external ranges of the dataset, also sets if the dataset should be scaled using these values. The dimensionality of the externalRanges Vector should match the number of dimensions of this dataset.

Parameters
externalRangesan N dimensional Vector containing the min and max values of the expected ranges of the dataset.
useExternalRangessets if these ranges should be used to scale the dataset, default value is false.
Returns
returns true if the external ranges were set, false otherwise

Definition at line 342 of file ClassificationData.cpp.

bool ClassificationData::setInfoText ( std::string  infoText)

Sets the info string. This can be any string with information about how the training data was recorded for example.

Parameters
infoTextthe infoText
Returns
true if the infoText was correctly updated, false otherwise

Definition at line 110 of file ClassificationData.cpp.

bool ClassificationData::setNumDimensions ( UINT  numDimensions)

Sets the number of dimensions in the training data. This should be an unsigned integer greater than zero. This will clear any previous training data and counters. This function needs to be called before any new samples can be added to the dataset, unless the numDimensions variable was set in the constructor or some data was already loaded from a file

Parameters
numDimensionsthe number of dimensions of the training data. Must be an unsigned integer greater than zero
Returns
true if the number of dimensions was correctly updated, false otherwise

Definition at line 78 of file ClassificationData.cpp.

bool ClassificationData::sortClassLabels ( )

Sorts the class labels (in the class tracker) in ascending order.

Returns
returns true if the labels were sorted successfully, false otherwise

Definition at line 700 of file ClassificationData.cpp.

bool ClassificationData::spiltDataIntoKFolds ( const UINT  K,
const bool  useStratifiedSampling = false 
)

This function prepares the dataset for k-fold cross validation and should be called prior to calling the getTrainingFold(UINT foldIndex) or getTestingFold(UINT foldIndex) functions. It will spilt the dataset into K-folds, as long as K < M, where M is the number of samples in the dataset.

Parameters
Kthe number of folds the dataset will be split into, K should be less than the number of samples in the dataset
useStratifiedSamplingsets if the dataset should be broken into homogeneous groups first before randomly being spilt, default value is false
Returns
returns true if the dataset was split correctly, false otherwise

Definition at line 853 of file ClassificationData.cpp.

ClassificationData ClassificationData::split ( const UINT  splitPercentage,
const bool  useStratifiedSampling = false 
)

Splits the dataset into a training dataset (which is kept by this instance of the ClassificationData) and a testing/validation dataset (which is returned as a new instance of a ClassificationData).

Parameters
splitPercentagesets the percentage of data which remains in this instance, the remaining percentage of data is then returned as the testing/validation dataset
useStratifiedSamplingsets if the dataset should be broken into homogeneous groups first before randomly being spilt, default value is false
Returns
a new ClassificationData instance, containing the remaining data not kept but this instance
Examples:
ClassificationModulesExamples/AdaBoostExample/AdaBoostExample.cpp, ClassificationModulesExamples/ANBCExample/ANBCExample.cpp, ClassificationModulesExamples/BAGExample/BAGExample.cpp, ClassificationModulesExamples/DecisionTreeExample/DecisionTreeExample.cpp, ClassificationModulesExamples/GMMExample/GMMExample.cpp, ClassificationModulesExamples/KNNExample/KNNExample.cpp, ClassificationModulesExamples/MinDistExample/MinDistExample.cpp, ClassificationModulesExamples/RandomForestsExample/RandomForestsExample.cpp, ClassificationModulesExamples/SoftmaxExample/SoftmaxExample.cpp, ClassificationModulesExamples/SVMExample/SVMExample.cpp, and Tutorials/MachineLearning101/MachineLearning101.cpp.

Definition at line 711 of file ClassificationData.cpp.


The documentation for this class was generated from the following files: