26 debugLog.setProceedingText(
"[DEBUG TSCD]");
27 errorLog.setProceedingText(
"[ERROR TSCD]");
28 warningLog.setProceedingText(
"[WARNING TSCD]");
37 if( numDimensions > 0 ){
44 debugLog.setProceedingText(
"[DEBUG TSCD]");
45 errorLog.setProceedingText(
"[ERROR TSCD]");
46 warningLog.setProceedingText(
"[WARNING TSCD]");
80 if( numDimensions > 0 ){
93 errorLog <<
"setNumDimensions(UINT numDimensions) - The number of dimensions of the dataset must be greater than zero!" << std::endl;
100 if( datasetName.find(
" ") == std::string::npos ){
105 errorLog <<
"setDatasetName(std::string datasetName) - The dataset name cannot contain any spaces!" << std::endl;
134 errorLog <<
"addSample(UINT classLabel, MatrixFloat trainingSample) - The dimensionality of the training sample (" << trainingSample.
getNumCols() <<
") does not match that of the dataset (" <<
numDimensions <<
")" << std::endl;
140 errorLog <<
"addSample(UINT classLabel, MatrixFloat sample) - the class label can not be 0!" << std::endl;
145 data.push_back( newSample );
152 bool labelFound =
false;
169 UINT numExamplesRemoved = 0;
170 UINT numExamplesToRemove = 0;
182 if( numExamplesToRemove > 0 ){
184 while( numExamplesRemoved < numExamplesToRemove ){
185 if(
data[i].getClassLabel() == classLabel ){
187 numExamplesRemoved++;
188 }
else if( ++i ==
data.size() )
break;
194 return numExamplesRemoved;
224 bool oldClassLabelFound =
false;
225 bool newClassLabelAllReadyExists =
false;
226 UINT indexOfOldClassLabel = 0;
227 UINT indexOfNewClassLabel = 0;
232 indexOfOldClassLabel = i;
233 oldClassLabelFound =
true;
236 indexOfNewClassLabel = i;
237 newClassLabelAllReadyExists =
true;
242 if( !oldClassLabelFound ){
248 if(
data[i].getClassLabel() == oldClassLabel ){
249 data[i].setTrainingSample(newClassLabel,
data[i].getData());
254 if( newClassLabelAllReadyExists ){
288 return scale(ranges,minTarget,maxTarget);
296 for(UINT x=0; x<
data[i].getLength(); x++){
298 data[i][x][j] =
Util::scale(
data[i][x][j],ranges[j].minValue,ranges[j].maxValue,minTarget,maxTarget);
331 file.open(fileName.c_str(), std::ios::out);
333 if( !file.is_open() ){
334 errorLog <<
"saveDatasetToFile(std::string fileName) - Failed to open file!" << std::endl;
338 file <<
"GRT_LABELLED_TIME_SERIES_CLASSIFICATION_DATA_FILE_V1.0\n";
339 file <<
"DatasetName: " <<
datasetName << std::endl;
340 file <<
"InfoText: " <<
infoText << std::endl;
343 file <<
"NumberOfClasses: "<<
classTracker.size() << std::endl;
344 file <<
"ClassIDsAndCounters: " << std::endl;
358 file <<
"LabelledTimeSeriesTrainingData:\n";
361 file <<
"************TIME_SERIES************\n";
362 file <<
"ClassID: "<<
data[x].getClassLabel() << std::endl;
363 file <<
"TimeSeriesLength: "<<
data[x].getLength()<< std::endl;
364 file <<
"TimeSeriesData: \n";
365 for(UINT i=0; i<
data[x].getLength(); i++){
367 file <<
data[x][i][j];
368 if( j<numDimensions-1 ) file <<
"\t";
380 file.open(filename.c_str(), std::ios::in);
384 if( !file.is_open() ){
385 errorLog <<
"loadDatasetFromFile(std::string filename) - FILE NOT OPEN!" << std::endl;
393 if(word !=
"GRT_LABELLED_TIME_SERIES_CLASSIFICATION_DATA_FILE_V1.0"){
396 errorLog <<
"loadDatasetFromFile(std::string filename) - Failed to find file header!" << std::endl;
402 if(word !=
"DatasetName:"){
403 errorLog <<
"loadDatasetFromFile(std::string filename) - failed to find DatasetName!" << std::endl;
410 if(word !=
"InfoText:"){
411 errorLog <<
"loadDatasetFromFile(std::string filename) - failed to find InfoText!" << std::endl;
419 while( word !=
"NumDimensions:" ){
425 if(word !=
"NumDimensions:"){
428 errorLog <<
"loadDatasetFromFile(std::string filename) - Failed to find NumDimensions!" << std::endl;
435 if(word !=
"TotalNumTrainingExamples:"){
438 errorLog <<
"loadDatasetFromFile(std::string filename) - Failed to find TotalNumTrainingExamples!" << std::endl;
445 if(word !=
"NumberOfClasses:"){
448 errorLog <<
"loadDatasetFromFile(std::string filename) - Failed to find NumberOfClasses!" << std::endl;
458 if(word !=
"ClassIDsAndCounters:"){
461 errorLog <<
"loadDatasetFromFile(std::string filename) - Failed to find ClassIDsAndCounters!" << std::endl;
472 if(word !=
"UseExternalRanges:"){
475 errorLog <<
"loadDatasetFromFile(std::string filename) - Failed to find UseExternalRanges!" << std::endl;
481 if( useExternalRanges ){
491 if(word !=
"LabelledTimeSeriesTrainingData:"){
494 errorLog <<
"loadDatasetFromFile(std::string filename) - Failed to find LabelledTimeSeriesTrainingData!" << std::endl;
504 UINT timeSeriesLength = 0;
507 if( word !=
"************TIME_SERIES************" ){
510 errorLog <<
"loadDatasetFromFile(std::string filename) - Failed to find TimeSeries Header!" << std::endl;
515 if( word !=
"ClassID:" ){
518 errorLog <<
"loadDatasetFromFile(std::string filename) - Failed to find ClassID!" << std::endl;
524 if( word !=
"TimeSeriesLength:" ){
527 errorLog <<
"loadDatasetFromFile(std::string filename) - Failed to find TimeSeriesLength!" << std::endl;
530 file >> timeSeriesLength;
533 if( word !=
"TimeSeriesData:" ){
536 errorLog <<
"loadDatasetFromFile(std::string filename) - Failed to find TimeSeriesData!" << std::endl;
541 MatrixFloat trainingExample(timeSeriesLength,numDimensions);
542 for(UINT i=0; i<timeSeriesLength; i++){
544 file >> trainingExample[i][j];
548 data[x].setTrainingSample(classLabel,trainingExample);
558 file.open(filename.c_str(), std::ios::out );
560 if( !file.is_open() ){
566 for(UINT i=0; i<
data[x].getLength(); i++){
568 file <<
data[x].getClassLabel() <<
",";
570 file <<
data[x][i][j];
571 if( j+1 < numDimensions ){
596 if( !parser.parseCSVFile(filename,
true) ){
597 errorLog <<
"loadDatasetFromCSVFile(const std::string &filename) - Failed to parse CSV file!" << std::endl;
601 if( !parser.getConsistentColumnSize() ){
602 errorLog <<
"loadDatasetFromCSVFile(const std::string &filename) - The CSV file does not have a consistent number of columns!" << std::endl;
606 if( parser.getColumnSize() <= 2 ){
607 errorLog <<
"loadDatasetFromCSVFile(const std::string &filename) - The CSV file does not have enough columns! It should contain at least three columns!" << std::endl;
615 data.reserve( parser.getRowSize() );
617 UINT sampleCounter = 0;
618 UINT lastSampleCounter = 0;
624 for(UINT i=0; i<parser.getRowSize(); i++){
626 sampleCounter = grt_from_str< UINT >( parser[i][0] );
629 if( sampleCounter != lastSampleCounter && i != 0 ){
631 if( !
addSample(classLabel, timeseries) ){
632 warningLog <<
"loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - Could not add sample " << i <<
" to the dataset!" << std::endl;
636 lastSampleCounter = sampleCounter;
639 classLabel = grt_from_str< UINT >( parser[i][1] );
645 sample[j++] = grt_from_str< Float >( parser[i][n] );
652 if ( timeseries.
getSize() > 0 )
654 if( !
addSample(classLabel, timeseries) ){
655 warningLog <<
"loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - Could not add sample " << parser.getRowSize()-1 <<
" to the dataset!" << std::endl;
673 stats +=
"DatasetInfo:\t" +
infoText +
"\n";
677 stats +=
"ClassStats:\n";
682 stats +=
"\tClassName:\t" +
classTracker[k].className +
"\n";
687 stats +=
"Dataset Ranges:\n";
688 for(UINT j=0; j<ranges.size(); j++){
692 stats +=
"Timeseries Lengths:\n";
693 UINT M = (UINT)
data.size();
694 for(UINT j=0; j<M; j++){
720 UINT randomIndex = 0;
722 if( useStratifiedSampling ){
733 UINT numSamples = (UINT)classData[k].size();
734 for(UINT x=0; x<numSamples; x++){
739 SWAP( classData[k][ x ] ,classData[k][ randomIndex ] );
745 UINT numTrainingExamples = (UINT) floor( Float(classData[k].size()) / 100.0 * Float(trainingSizePercentage) );
748 for(UINT i=0; i<numTrainingExamples; i++){
749 trainingSet.
addSample(
data[ classData[k][i] ].getClassLabel(),
data[ classData[k][i] ].getData() );
751 for(UINT i=numTrainingExamples; i<classData[k].size(); i++){
752 testSet.
addSample(
data[ classData[k][i] ].getClassLabel(),
data[ classData[k][i] ].getData() );
761 const UINT numTrainingExamples = (UINT) floor( Float(
totalNumSamples) / 100.0 * Float(trainingSizePercentage) );
770 SWAP( indexs[ x ] , indexs[ randomIndex ] );
774 for(UINT i=0; i<numTrainingExamples; i++){
775 trainingSet.
addSample(
data[ indexs[i] ].getClassLabel(),
data[ indexs[i] ].getData() );
778 testSet.
addSample(
data[ indexs[i] ].getClassLabel(),
data[ indexs[i] ].getData() );
792 errorLog <<
"merge(TimeSeriesClassificationData &labelledData) - The number of dimensions in the labelledData (" << labelledData.
getNumDimensions() <<
") does not match the number of dimensions of this dataset (" <<
numDimensions <<
")" << std::endl;
802 addSample(labelledData[i].getClassLabel(), labelledData[i].getData());
807 for(UINT i=0; i<classTracker.size(); i++){
821 errorLog <<
"spiltDataIntoKFolds(UINT K) - K can not be zero!" << std::endl;
827 errorLog <<
"spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be larger than the total number of samples in the dataset!" << std::endl;
832 if( useStratifiedSampling ){
835 errorLog <<
"spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be larger than the number of samples in any given class!" << std::endl;
853 UINT randomIndex = 0;
855 if( useStratifiedSampling ){
866 UINT numSamples = (UINT)classData[c].size();
867 for(UINT x=0; x<numSamples; x++){
872 SWAP( classData[c][ x ] , classData[c][ randomIndex ] );
879 iter = classData[ c ].begin();
881 while( iter != classData[c].end() ){
897 SWAP( indexs[ x ] , indexs[ randomIndex ] );
907 if( ++counter == numSamplesPerFold && foldIndex < K-1 ){
924 errorLog <<
"getTrainingFoldData(UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << std::endl;
928 if( foldIndex >=
kFoldValue )
return trainingData;
935 if( k != foldIndex ){
939 trainingData.
addSample(
data[ index ].getClassLabel(),
data[ index ].getData() );
952 if( foldIndex >=
kFoldValue )
return testData;
970 if(
data[x].getClassLabel() == classLabel ){
982 return unlabelledData;
988 for(UINT x=0; x<
data[i].getLength(); x++){
993 return unlabelledData;
997 UINT minClassLabel = 99999;
1005 return minClassLabel;
1010 UINT maxClassLabel = 0;
1018 return maxClassLabel;
1027 warningLog <<
"getClassLabelIndexValue(UINT classLabel) - Failed to find class label: " << classLabel <<
" in class tracker!" << std::endl;
1038 return "CLASS_LABEL_NOT_FOUND";
1049 ranges[j].minValue =
data[0][0][0];
1050 ranges[j].maxValue =
data[0][0][0];
1052 for(UINT i=0; i<
data[x].getLength(); i++){
1053 if(
data[x][i][j] < ranges[j].minValue ){ ranges[j].minValue =
data[x][i][j]; }
1054 else if(
data[x][i][j] > ranges[j].maxValue ){ ranges[j].maxValue =
data[x][i][j]; }
1068 M +=
data[x].getLength();
1076 for(UINT i=0; i<
data[x].getLength(); i++){
1078 matrixData[index][j] =
data[x][i][j];
std::string getStatsAsString() const
bool spiltDataIntoKFolds(const UINT K, const bool useStratifiedSampling=false)
bool loadDatasetFromCSVFile(const std::string &filename)
unsigned int getSize() const
static std::string toString(const int &i)
bool addSample(const VectorFloat &sample)
virtual ~TimeSeriesClassificationData()
UINT numDimensions
The number of dimensions in the dataset.
static Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
bool setInfoText(const std::string infoText)
Vector< MinMax > externalRanges
A vector containing a set of externalRanges set by the user.
bool merge(const TimeSeriesClassificationData &labelledData)
Vector< TimeSeriesClassificationSample > data
The labelled time series classification data.
UINT kFoldValue
The number of folds the dataset has been spilt into for cross valiation.
bool setNumDimensions(const UINT numDimensions)
virtual bool resize(const unsigned int size)
TimeSeriesClassificationData & operator=(const TimeSeriesClassificationData &rhs)
bool saveDatasetToFile(const std::string filename) const
Vector< MinMax > getRanges() const
bool useExternalRanges
A flag to show if the dataset should be scaled using the externalRanges values.
UINT totalNumSamples
The total number of samples in the dataset.
Vector< ClassTracker > getClassTracker() const
bool setExternalRanges(const Vector< MinMax > &externalRanges, const bool useExternalRanges=false)
The TimeSeriesClassificationData is the main data structure for recording, labeling, managing, saving, and loading training data for supervised temporal learning problems. Unlike the ClassificationData, in which each sample consists of 1 N dimensional datum, a TimeSeriesClassificationData sample will consist of an N dimensional time series of length M. The length of each time series sample (i.e. M) can be different for each datum in the dataset.
MatrixFloat getDataAsMatrixFloat() const
bool setNumDimensions(const UINT numDimensions)
WarningLog warningLog
Default warning log.
bool relabelAllSamplesWithClassLabel(const UINT oldClassLabel, const UINT newClassLabel)
UINT eraseAllSamplesWithClassLabel(const UINT classLabel)
bool allowNullGestureClass
A flag that enables/disables a user from adding new samples with a class label matching the default n...
bool enableExternalRangeScaling(const bool useExternalRanges)
unsigned int getNumCols() const
std::string getClassNameForCorrespondingClassLabel(const UINT classLabel) const
DebugLog debugLog
Default debugging log.
UINT getClassLabelIndexValue(const UINT classLabel) const
bool crossValidationSetup
A flag to show if the dataset is ready for cross validation.
bool addSample(const UINT classLabel, const MatrixFloat &trainingSample)
UINT getMaximumClassLabel() const
std::string datasetName
The name of the dataset.
bool scale(const Float minTarget, const Float maxTarget)
TimeSeriesClassificationData getClassData(const UINT classLabel) const
bool setClassNameForCorrespondingClassLabel(const std::string className, const UINT classLabel)
UINT getNumDimensions() const
bool saveDatasetToCSVFile(const std::string &filename) const
ErrorLog errorLog
Default error log.
Vector< ClassTracker > classTracker
A vector of ClassTracker, which keeps track of the number of samples of each class.
UnlabelledData reformatAsUnlabelledData() const
int getRandomNumberInt(int minRange, int maxRange)
static bool stringEndsWith(const std::string &str, const std::string &ending)
TimeSeriesClassificationData(UINT numDimensions=0, std::string datasetName="NOT_SET", std::string infoText="")
UINT getMinimumClassLabel() const
std::string infoText
Some infoText about the dataset.
UINT getNumClasses() const
bool setDatasetName(const std::string datasetName)
TimeSeriesClassificationData partition(const UINT partitionPercentage, const bool useStratifiedSampling=false)
UINT getNumSamples() const
Vector< TimeSeriesClassificationSample > getClassificationData() const
bool push_back(const Vector< T > &sample)
Vector< Vector< UINT > > crossValidationIndexs
A vector to hold the indexs of the dataset for the cross validation.
bool setAllowNullGestureClass(const bool allowNullGestureClass)
TimeSeriesClassificationData getTrainingFoldData(const UINT foldIndex) const
bool load(const std::string &filename)
bool save(const std::string &filename) const
bool loadDatasetFromFile(const std::string filename)
TimeSeriesClassificationData getTestFoldData(const UINT foldIndex) const