GestureRecognitionToolkit  Version: 0.1.0
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
TimeSeriesClassificationData.cpp
1 /*
2 GRT MIT License
3 Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>
4 
5 Permission is hereby granted, free of charge, to any person obtaining a copy of this software
6 and associated documentation files (the "Software"), to deal in the Software without restriction,
7 including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in all copies or substantial
12 portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
15 LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
18 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 */
20 
22 
23 GRT_BEGIN_NAMESPACE
24 
25 TimeSeriesClassificationData::TimeSeriesClassificationData(UINT numDimensions,std::string datasetName,std::string infoText){
26  debugLog.setProceedingText("[DEBUG TSCD]");
27  errorLog.setProceedingText("[ERROR TSCD]");
28  warningLog.setProceedingText("[WARNING TSCD]");
29 
30  this->numDimensions = numDimensions;
31  this->datasetName = datasetName;
32  this->infoText = infoText;
33  totalNumSamples = 0;
34  crossValidationSetup = false;
35  useExternalRanges = false;
36  allowNullGestureClass = true;
37  if( numDimensions > 0 ){
38  setNumDimensions(numDimensions);
39  }
40 }
41 
43 
44  debugLog.setProceedingText("[DEBUG TSCD]");
45  errorLog.setProceedingText("[ERROR TSCD]");
46  warningLog.setProceedingText("[WARNING TSCD]");
47 
48  *this = rhs;
49 }
50 
52 
54  if( this != &rhs){
55  this->datasetName = rhs.datasetName;
56  this->infoText = rhs.infoText;
57  this->numDimensions = rhs.numDimensions;
62  this->totalNumSamples = rhs.totalNumSamples;
63  this->data = rhs.data;
64  this->classTracker = rhs.classTracker;
65  this->externalRanges = rhs.externalRanges;
66  this->debugLog = rhs.debugLog;
67  this->errorLog = rhs.errorLog;
68  this->warningLog = rhs.warningLog;
69  }
70  return *this;
71 }
72 
74  totalNumSamples = 0;
75  data.clear();
76  classTracker.clear();
77 }
78 
79 bool TimeSeriesClassificationData::setNumDimensions(const UINT numDimensions){
80  if( numDimensions > 0 ){
81  //Clear any previous training data
82  clear();
83 
84  //Set the dimensionality of the training data
85  this->numDimensions = numDimensions;
86 
87  useExternalRanges = false;
88  externalRanges.clear();
89 
90  return true;
91  }
92 
93  errorLog << "setNumDimensions(UINT numDimensions) - The number of dimensions of the dataset must be greater than zero!" << std::endl;
94  return false;
95 }
96 
97 bool TimeSeriesClassificationData::setDatasetName(const std::string datasetName){
98 
99  //Make sure there are no spaces in the std::string
100  if( datasetName.find(" ") == std::string::npos ){
101  this->datasetName = datasetName;
102  return true;
103  }
104 
105  errorLog << "setDatasetName(std::string datasetName) - The dataset name cannot contain any spaces!" << std::endl;
106  return false;
107 }
108 
109 bool TimeSeriesClassificationData::setInfoText(const std::string infoText){
110  this->infoText = infoText;
111  return true;
112 }
113 
114 bool TimeSeriesClassificationData::setClassNameForCorrespondingClassLabel(const std::string className,const UINT classLabel){
115 
116  for(UINT i=0; i<classTracker.size(); i++){
117  if( classTracker[i].classLabel == classLabel ){
118  classTracker[i].className = className;
119  return true;
120  }
121  }
122 
123  return false;
124 }
125 
126 bool TimeSeriesClassificationData::setAllowNullGestureClass(const bool allowNullGestureClass){
127  this->allowNullGestureClass = allowNullGestureClass;
128  return true;
129 }
130 
131 bool TimeSeriesClassificationData::addSample(const UINT classLabel,const MatrixFloat &trainingSample){
132 
133  if( trainingSample.getNumCols() != numDimensions ){
134  errorLog << "addSample(UINT classLabel, MatrixFloat trainingSample) - The dimensionality of the training sample (" << trainingSample.getNumCols() << ") does not match that of the dataset (" << numDimensions << ")" << std::endl;
135  return false;
136  }
137 
138  //The class label must be greater than zero (as zero is used for the null rejection class label
139  if( classLabel == GRT_DEFAULT_NULL_CLASS_LABEL && !allowNullGestureClass ){
140  errorLog << "addSample(UINT classLabel, MatrixFloat sample) - the class label can not be 0!" << std::endl;
141  return false;
142  }
143 
144  TimeSeriesClassificationSample newSample(classLabel,trainingSample);
145  data.push_back( newSample );
146  totalNumSamples++;
147 
148  if( classTracker.size() == 0 ){
149  ClassTracker tracker(classLabel,1);
150  classTracker.push_back(tracker);
151  }else{
152  bool labelFound = false;
153  for(UINT i=0; i<classTracker.size(); i++){
154  if( classLabel == classTracker[i].classLabel ){
155  classTracker[i].counter++;
156  labelFound = true;
157  break;
158  }
159  }
160  if( !labelFound ){
161  ClassTracker tracker(classLabel,1);
162  classTracker.push_back(tracker);
163  }
164  }
165  return true;
166 }
167 
169  UINT numExamplesRemoved = 0;
170  UINT numExamplesToRemove = 0;
171 
172  //Find out how many training examples we need to remove
173  for(UINT i=0; i<classTracker.size(); i++){
174  if( classTracker[i].classLabel == classLabel ){
175  numExamplesToRemove = classTracker[i].counter;
176  classTracker.erase(classTracker.begin()+i);
177  break;
178  }
179  }
180 
181  //Remove the samples with the matching class ID
182  if( numExamplesToRemove > 0 ){
183  UINT i=0;
184  while( numExamplesRemoved < numExamplesToRemove ){
185  if( data[i].getClassLabel() == classLabel ){
186  data.erase(data.begin()+i);
187  numExamplesRemoved++;
188  }else if( ++i == data.size() ) break;
189  }
190  }
191 
192  totalNumSamples = (UINT)data.size();
193 
194  return numExamplesRemoved;
195 }
196 
198 
199  if( totalNumSamples > 0 ){
200 
201  //Find the corresponding class ID for the last training example
202  UINT classLabel = data[ totalNumSamples-1 ].getClassLabel();
203 
204  //Remove the training example from the buffer
205  data.erase(data.end()-1);
206 
207  totalNumSamples = (UINT)data.size();
208 
209  //Remove the value from the counter
210  for(UINT i=0; i<classTracker.size(); i++){
211  if( classTracker[i].classLabel == classLabel ){
212  classTracker[i].counter--;
213  break;
214  }
215  }
216 
217  return true;
218 
219  }else return false;
220 
221 }
222 
223 bool TimeSeriesClassificationData::relabelAllSamplesWithClassLabel(const UINT oldClassLabel,const UINT newClassLabel){
224  bool oldClassLabelFound = false;
225  bool newClassLabelAllReadyExists = false;
226  UINT indexOfOldClassLabel = 0;
227  UINT indexOfNewClassLabel = 0;
228 
229  //Find out how many training examples we need to relabel
230  for(UINT i=0; i<classTracker.size(); i++){
231  if( classTracker[i].classLabel == oldClassLabel ){
232  indexOfOldClassLabel = i;
233  oldClassLabelFound = true;
234  }
235  if( classTracker[i].classLabel == newClassLabel ){
236  indexOfNewClassLabel = i;
237  newClassLabelAllReadyExists = true;
238  }
239  }
240 
241  //If the old class label was not found then we can't do anything
242  if( !oldClassLabelFound ){
243  return false;
244  }
245 
246  //Relabel the old class labels
247  for(UINT i=0; i<totalNumSamples; i++){
248  if( data[i].getClassLabel() == oldClassLabel ){
249  data[i].setTrainingSample(newClassLabel, data[i].getData());
250  }
251  }
252 
253  //Update the class label counters
254  if( newClassLabelAllReadyExists ){
255  //Add the old sample count to the new sample count
256  classTracker[ indexOfNewClassLabel ].counter += classTracker[ indexOfOldClassLabel ].counter;
257 
258  //Erase the old class tracker
259  classTracker.erase( classTracker.begin() + indexOfOldClassLabel );
260  }else{
261  //Create a new class tracker
262  classTracker.push_back( ClassTracker(newClassLabel,classTracker[ indexOfOldClassLabel ].counter,classTracker[ indexOfOldClassLabel ].className) );
263  }
264 
265  return true;
266 }
267 
268 bool TimeSeriesClassificationData::setExternalRanges(const Vector< MinMax > &externalRanges,const bool useExternalRanges){
269 
270  if( externalRanges.size() != numDimensions ) return false;
271 
272  this->externalRanges = externalRanges;
273  this->useExternalRanges = useExternalRanges;
274 
275  return true;
276 }
277 
279  if( externalRanges.size() == numDimensions ){
280  this->useExternalRanges = useExternalRanges;
281  return true;
282  }
283  return false;
284 }
285 
286 bool TimeSeriesClassificationData::scale(const Float minTarget,const Float maxTarget){
287  Vector< MinMax > ranges = getRanges();
288  return scale(ranges,minTarget,maxTarget);
289 }
290 
291 bool TimeSeriesClassificationData::scale(const Vector<MinMax> &ranges,const Float minTarget,const Float maxTarget){
292  if( ranges.size() != numDimensions ) return false;
293 
294  //Scale the training data
295  for(UINT i=0; i<totalNumSamples; i++){
296  for(UINT x=0; x<data[i].getLength(); x++){
297  for(UINT j=0; j<numDimensions; j++){
298  data[i][x][j] = Util::scale(data[i][x][j],ranges[j].minValue,ranges[j].maxValue,minTarget,maxTarget);
299  }
300  }
301  }
302 
303  return true;
304 }
305 
306 bool TimeSeriesClassificationData::save(const std::string &filename) const{
307 
308  //Check if the file should be saved as a csv file
309  if( Util::stringEndsWith( filename, ".csv" ) ){
310  return saveDatasetToCSVFile( filename );
311  }
312 
313  //Otherwise save it as a custom GRT file
314  return saveDatasetToFile( filename );
315 }
316 
317 bool TimeSeriesClassificationData::load(const std::string &filename){
318 
319  //Check if the file should be loaded as a csv file
320  if( Util::stringEndsWith( filename, ".csv" ) ){
321  return loadDatasetFromCSVFile( filename );
322  }
323 
324  //Otherwise save it as a custom GRT file
325  return loadDatasetFromFile( filename );
326 }
327 
328 bool TimeSeriesClassificationData::saveDatasetToFile(const std::string fileName) const{
329 
330  std::fstream file;
331  file.open(fileName.c_str(), std::ios::out);
332 
333  if( !file.is_open() ){
334  errorLog << "saveDatasetToFile(std::string fileName) - Failed to open file!" << std::endl;
335  return false;
336  }
337 
338  file << "GRT_LABELLED_TIME_SERIES_CLASSIFICATION_DATA_FILE_V1.0\n";
339  file << "DatasetName: " << datasetName << std::endl;
340  file << "InfoText: " << infoText << std::endl;
341  file << "NumDimensions: "<<numDimensions << std::endl;
342  file << "TotalNumTrainingExamples: "<<totalNumSamples << std::endl;
343  file << "NumberOfClasses: "<<classTracker.size() << std::endl;
344  file << "ClassIDsAndCounters: " << std::endl;
345 
346  for(UINT i=0; i<classTracker.size(); i++){
347  file << classTracker[i].classLabel << "\t" << classTracker[i].counter << std::endl;
348  }
349 
350  file << "UseExternalRanges: " << useExternalRanges << std::endl;
351 
352  if( useExternalRanges ){
353  for(UINT i=0; i<externalRanges.size(); i++){
354  file << externalRanges[i].minValue << "\t" << externalRanges[i].maxValue << std::endl;
355  }
356  }
357 
358  file << "LabelledTimeSeriesTrainingData:\n";
359 
360  for(UINT x=0; x<totalNumSamples; x++){
361  file << "************TIME_SERIES************\n";
362  file << "ClassID: "<<data[x].getClassLabel() << std::endl;
363  file << "TimeSeriesLength: "<<data[x].getLength()<< std::endl;
364  file << "TimeSeriesData: \n";
365  for(UINT i=0; i<data[x].getLength(); i++){
366  for(UINT j=0; j<numDimensions; j++){
367  file << data[x][i][j];
368  if( j<numDimensions-1 ) file << "\t";
369  }file << std::endl;
370  }
371  }
372 
373  file.close();
374  return true;
375 }
376 
377 bool TimeSeriesClassificationData::loadDatasetFromFile(const std::string filename){
378 
379  std::fstream file;
380  file.open(filename.c_str(), std::ios::in);
381  UINT numClasses = 0;
382  clear();
383 
384  if( !file.is_open() ){
385  errorLog << "loadDatasetFromFile(std::string filename) - FILE NOT OPEN!" << std::endl;
386  return false;
387  }
388 
389  std::string word;
390 
391  //Check to make sure this is a file with the Training File Format
392  file >> word;
393  if(word != "GRT_LABELLED_TIME_SERIES_CLASSIFICATION_DATA_FILE_V1.0"){
394  file.close();
395  clear();
396  errorLog << "loadDatasetFromFile(std::string filename) - Failed to find file header!" << std::endl;
397  return false;
398  }
399 
400  //Get the name of the dataset
401  file >> word;
402  if(word != "DatasetName:"){
403  errorLog << "loadDatasetFromFile(std::string filename) - failed to find DatasetName!" << std::endl;
404  file.close();
405  return false;
406  }
407  file >> datasetName;
408 
409  file >> word;
410  if(word != "InfoText:"){
411  errorLog << "loadDatasetFromFile(std::string filename) - failed to find InfoText!" << std::endl;
412  file.close();
413  return false;
414  }
415 
416  //Load the info text
417  file >> word;
418  infoText = "";
419  while( word != "NumDimensions:" ){
420  infoText += word + " ";
421  file >> word;
422  }
423 
424  //Get the number of dimensions in the training data
425  if(word != "NumDimensions:"){
426  file.close();
427  clear();
428  errorLog << "loadDatasetFromFile(std::string filename) - Failed to find NumDimensions!" << std::endl;
429  return false;
430  }
431  file >> numDimensions;
432 
433  //Get the total number of training examples in the training data
434  file >> word;
435  if(word != "TotalNumTrainingExamples:"){
436  file.close();
437  clear();
438  errorLog << "loadDatasetFromFile(std::string filename) - Failed to find TotalNumTrainingExamples!" << std::endl;
439  return false;
440  }
441  file >> totalNumSamples;
442 
443  //Get the total number of classes in the training data
444  file >> word;
445  if(word != "NumberOfClasses:"){
446  file.close();
447  clear();
448  errorLog << "loadDatasetFromFile(std::string filename) - Failed to find NumberOfClasses!" << std::endl;
449  return false;
450  }
451  file >> numClasses;
452 
453  //Resize the class counter buffer and load the counters
454  classTracker.resize(numClasses);
455 
456  //Get the total number of classes in the training data
457  file >> word;
458  if(word != "ClassIDsAndCounters:"){
459  file.close();
460  clear();
461  errorLog << "loadDatasetFromFile(std::string filename) - Failed to find ClassIDsAndCounters!" << std::endl;
462  return false;
463  }
464 
465  for(UINT i=0; i<classTracker.size(); i++){
466  file >> classTracker[i].classLabel;
467  file >> classTracker[i].counter;
468  }
469 
470  //Get the UseExternalRanges
471  file >> word;
472  if(word != "UseExternalRanges:"){
473  file.close();
474  clear();
475  errorLog << "loadDatasetFromFile(std::string filename) - Failed to find UseExternalRanges!" << std::endl;
476  return false;
477  }
478 
479  file >> useExternalRanges;
480 
481  if( useExternalRanges ){
482  externalRanges.resize(numDimensions);
483  for(UINT i=0; i<externalRanges.size(); i++){
484  file >> externalRanges[i].minValue;
485  file >> externalRanges[i].maxValue;
486  }
487  }
488 
489  //Get the main training data
490  file >> word;
491  if(word != "LabelledTimeSeriesTrainingData:"){
492  file.close();
493  clear();
494  errorLog << "loadDatasetFromFile(std::string filename) - Failed to find LabelledTimeSeriesTrainingData!" << std::endl;
495  return false;
496  }
497 
498  //Reset the memory
499  data.resize( totalNumSamples, TimeSeriesClassificationSample() );
500 
501  //Load each of the time series
502  for(UINT x=0; x<totalNumSamples; x++){
503  UINT classLabel = 0;
504  UINT timeSeriesLength = 0;
505 
506  file >> word;
507  if( word != "************TIME_SERIES************" ){
508  file.close();
509  clear();
510  errorLog << "loadDatasetFromFile(std::string filename) - Failed to find TimeSeries Header!" << std::endl;
511  return false;
512  }
513 
514  file >> word;
515  if( word != "ClassID:" ){
516  file.close();
517  clear();
518  errorLog << "loadDatasetFromFile(std::string filename) - Failed to find ClassID!" << std::endl;
519  return false;
520  }
521  file >> classLabel;
522 
523  file >> word;
524  if( word != "TimeSeriesLength:" ){
525  file.close();
526  clear();
527  errorLog << "loadDatasetFromFile(std::string filename) - Failed to find TimeSeriesLength!" << std::endl;
528  return false;
529  }
530  file >> timeSeriesLength;
531 
532  file >> word;
533  if( word != "TimeSeriesData:" ){
534  file.close();
535  clear();
536  errorLog << "loadDatasetFromFile(std::string filename) - Failed to find TimeSeriesData!" << std::endl;
537  return false;
538  }
539 
540  //Load the time series data
541  MatrixFloat trainingExample(timeSeriesLength,numDimensions);
542  for(UINT i=0; i<timeSeriesLength; i++){
543  for(UINT j=0; j<numDimensions; j++){
544  file >> trainingExample[i][j];
545  }
546  }
547 
548  data[x].setTrainingSample(classLabel,trainingExample);
549  }
550 
551  file.close();
552  return true;
553 }
554 
555 bool TimeSeriesClassificationData::saveDatasetToCSVFile(const std::string &filename) const{
556 
557  std::fstream file;
558  file.open(filename.c_str(), std::ios::out );
559 
560  if( !file.is_open() ){
561  return false;
562  }
563 
564  //Write the data to the CSV file
565  for(UINT x=0; x<totalNumSamples; x++){
566  for(UINT i=0; i<data[x].getLength(); i++){
567  file << x+1 << ",";
568  file << data[x].getClassLabel() << ",";
569  for(UINT j=0; j<numDimensions; j++){
570  file << data[x][i][j];
571  if( j+1 < numDimensions ){
572  file << ",";
573  }
574  }
575  file << std::endl;
576  }
577  }
578 
579  file.close();
580 
581  return true;
582 }
583 
585 
586  numDimensions = 0;
587  datasetName = "NOT_SET";
588  infoText = "";
589 
590  //Clear any previous data
591  clear();
592 
593  //Parse the CSV file
594  FileParser parser;
595 
596  if( !parser.parseCSVFile(filename,true) ){
597  errorLog << "loadDatasetFromCSVFile(const std::string &filename) - Failed to parse CSV file!" << std::endl;
598  return false;
599  }
600 
601  if( !parser.getConsistentColumnSize() ){
602  errorLog << "loadDatasetFromCSVFile(const std::string &filename) - The CSV file does not have a consistent number of columns!" << std::endl;
603  return false;
604  }
605 
606  if( parser.getColumnSize() <= 2 ){
607  errorLog << "loadDatasetFromCSVFile(const std::string &filename) - The CSV file does not have enough columns! It should contain at least three columns!" << std::endl;
608  return false;
609  }
610 
611  //Set the number of dimensions
612  numDimensions = parser.getColumnSize()-2;
613 
614  //Reserve the memory for the data
615  data.reserve( parser.getRowSize() );
616 
617  UINT sampleCounter = 0;
618  UINT lastSampleCounter = 0;
619  UINT classLabel = 0;
620  UINT j = 0;
621  UINT n = 0;
622  VectorFloat sample(numDimensions);
623  MatrixFloat timeseries;
624  for(UINT i=0; i<parser.getRowSize(); i++){
625 
626  sampleCounter = grt_from_str< UINT >( parser[i][0] );
627 
628  //Check to see if a new timeseries has started, if so then add the previous time series as a sample and start recording the new time series
629  if( sampleCounter != lastSampleCounter && i != 0 ){
630  //Add the labelled sample to the dataset
631  if( !addSample(classLabel, timeseries) ){
632  warningLog << "loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - Could not add sample " << i << " to the dataset!" << std::endl;
633  }
634  timeseries.clear();
635  }
636  lastSampleCounter = sampleCounter;
637 
638  //Get the class label
639  classLabel = grt_from_str< UINT >( parser[i][1] );
640 
641  //Get the sample data
642  j=0;
643  n=2;
644  while( j != numDimensions ){
645  sample[j++] = grt_from_str< Float >( parser[i][n] );
646  n++;
647  }
648 
649  //Add the sample to the timeseries
650  timeseries.push_back( sample );
651  }
652  if ( timeseries.getSize() > 0 )
653  //Add the labelled sample to the dataset
654  if( !addSample(classLabel, timeseries) ){
655  warningLog << "loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - Could not add sample " << parser.getRowSize()-1 << " to the dataset!" << std::endl;
656  }
657 
658  return true;
659 }
660 
662 
663  std::cout << getStatsAsString();
664 
665  return true;
666 }
667 
669 
670  std::string stats;
671 
672  stats += "DatasetName:\t" + datasetName + "\n";
673  stats += "DatasetInfo:\t" + infoText + "\n";
674  stats += "Number of Dimensions:\t" + Util::toString(numDimensions) + "\n";
675  stats += "Number of Samples:\t" + Util::toString(totalNumSamples) + "\n";
676  stats += "Number of Classes:\t" + Util::toString(getNumClasses()) + "\n";
677  stats += "ClassStats:\n";
678 
679  for(UINT k=0; k<getNumClasses(); k++){
680  stats += "ClassLabel:\t" + Util::toString(classTracker[k].classLabel);
681  stats += "\tNumber of Samples:\t" + Util::toString( classTracker[k].counter );
682  stats +="\tClassName:\t" + classTracker[k].className + "\n";
683  }
684 
685  Vector< MinMax > ranges = getRanges();
686 
687  stats += "Dataset Ranges:\n";
688  for(UINT j=0; j<ranges.size(); j++){
689  stats += "[" + Util::toString( j+1 ) + "] Min:\t" + Util::toString( ranges[j].minValue ) + "\tMax: " + Util::toString( ranges[j].maxValue ) + "\n";
690  }
691 
692  stats += "Timeseries Lengths:\n";
693  UINT M = (UINT)data.size();
694  for(UINT j=0; j<M; j++){
695  stats += "ClassLabel: " + Util::toString( data[j].getClassLabel() ) + " Length:\t" + Util::toString( data[j].getLength() ) + "\n";
696  }
697 
698  return stats;
699 }
700 
701 TimeSeriesClassificationData TimeSeriesClassificationData::partition(const UINT trainingSizePercentage,const bool useStratifiedSampling){
702 
703  //Partitions the dataset into a training dataset (which is kept by this instance of the TimeSeriesClassificationData) and
704  //a testing/validation dataset (which is return as a new instance of the TimeSeriesClassificationData). The trainingSizePercentage
705  //therefore sets the size of the data which remains in this instance and the remaining percentage of data is then added to
706  //the testing/validation dataset
707 
708  //The dataset has changed so flag that any previous cross validation setup will now not work
709  crossValidationSetup = false;
710  crossValidationIndexs.clear();
711 
717 
718  //Create the random partion indexs
719  Random random;
720  UINT randomIndex = 0;
721 
722  if( useStratifiedSampling ){
723  //Break the data into seperate classes
724  Vector< Vector< UINT > > classData( getNumClasses() );
725 
726  //Add the indexs to their respective classes
727  for(UINT i=0; i<totalNumSamples; i++){
728  classData[ getClassLabelIndexValue( data[i].getClassLabel() ) ].push_back( i );
729  }
730 
731  //Randomize the order of the indexs in each of the class index buffers
732  for(UINT k=0; k<getNumClasses(); k++){
733  UINT numSamples = (UINT)classData[k].size();
734  for(UINT x=0; x<numSamples; x++){
735  //Pick a random index
736  randomIndex = random.getRandomNumberInt(0,numSamples);
737 
738  //Swap the indexs
739  SWAP( classData[k][ x ] ,classData[k][ randomIndex ] );
740  }
741  }
742 
743  //Loop over each class and add the data to the trainingSet and testSet
744  for(UINT k=0; k<getNumClasses(); k++){
745  UINT numTrainingExamples = (UINT) floor( Float(classData[k].size()) / 100.0 * Float(trainingSizePercentage) );
746 
747  //Add the data to the training and test sets
748  for(UINT i=0; i<numTrainingExamples; i++){
749  trainingSet.addSample( data[ classData[k][i] ].getClassLabel(), data[ classData[k][i] ].getData() );
750  }
751  for(UINT i=numTrainingExamples; i<classData[k].size(); i++){
752  testSet.addSample( data[ classData[k][i] ].getClassLabel(), data[ classData[k][i] ].getData() );
753  }
754  }
755 
756  //Overwrite the training data in this instance with the training data of the trainingSet
757  data = trainingSet.getClassificationData();
758  totalNumSamples = trainingSet.getNumSamples();
759  }else{
760 
761  const UINT numTrainingExamples = (UINT) floor( Float(totalNumSamples) / 100.0 * Float(trainingSizePercentage) );
762  //Create the random partion indexs
763  Random random;
764  for(UINT i=0; i<totalNumSamples; i++) indexs[i] = i;
765  for(UINT x=0; x<totalNumSamples; x++){
766  //Pick a random index
767  randomIndex = random.getRandomNumberInt(0,totalNumSamples);
768 
769  //Swap the indexs
770  SWAP( indexs[ x ] , indexs[ randomIndex ] );
771  }
772 
773  //Add the data to the training and test sets
774  for(UINT i=0; i<numTrainingExamples; i++){
775  trainingSet.addSample( data[ indexs[i] ].getClassLabel(), data[ indexs[i] ].getData() );
776  }
777  for(UINT i=numTrainingExamples; i<totalNumSamples; i++){
778  testSet.addSample( data[ indexs[i] ].getClassLabel(), data[ indexs[i] ].getData() );
779  }
780 
781  //Overwrite the training data in this instance with the training data of the trainingSet
782  data = trainingSet.getClassificationData();
783  totalNumSamples = trainingSet.getNumSamples();
784  }
785 
786  return testSet;
787 }
788 
790 
791  if( labelledData.getNumDimensions() != numDimensions ){
792  errorLog << "merge(TimeSeriesClassificationData &labelledData) - The number of dimensions in the labelledData (" << labelledData.getNumDimensions() << ") does not match the number of dimensions of this dataset (" << numDimensions << ")" << std::endl;
793  return false;
794  }
795 
796  //The dataset has changed so flag that any previous cross validation setup will now not work
797  crossValidationSetup = false;
798  crossValidationIndexs.clear();
799 
800  //Add the data from the labelledData to this instance
801  for(UINT i=0; i<labelledData.getNumSamples(); i++){
802  addSample(labelledData[i].getClassLabel(), labelledData[i].getData());
803  }
804 
805  //Set the class names from the dataset
807  for(UINT i=0; i<classTracker.size(); i++){
808  setClassNameForCorrespondingClassLabel(classTracker[i].className, classTracker[i].classLabel);
809  }
810 
811  return true;
812 }
813 
814 bool TimeSeriesClassificationData::spiltDataIntoKFolds(const UINT K,const bool useStratifiedSampling){
815 
816  crossValidationSetup = false;
817  crossValidationIndexs.clear();
818 
819  //K can not be zero
820  if( K > totalNumSamples ){
821  errorLog << "spiltDataIntoKFolds(UINT K) - K can not be zero!" << std::endl;
822  return false;
823  }
824 
825  //K can not be larger than the number of examples
826  if( K > totalNumSamples ){
827  errorLog << "spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be larger than the total number of samples in the dataset!" << std::endl;
828  return false;
829  }
830 
831  //K can not be larger than the number of examples in a specific class if the stratified sampling option is true
832  if( useStratifiedSampling ){
833  for(UINT c=0; c<classTracker.size(); c++){
834  if( K > classTracker[c].counter ){
835  errorLog << "spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be larger than the number of samples in any given class!" << std::endl;
836  return false;
837  }
838  }
839  }
840 
841  //Setup the dataset for k-fold cross validation
842  kFoldValue = K;
844 
845  //Work out how many samples are in each fold, the last fold might have more samples than the others
846  UINT numSamplesPerFold = (UINT) floor( totalNumSamples/Float(K) );
847 
848  //Resize the cross validation indexs buffer
850 
851  //Create the random partion indexs
852  Random random;
853  UINT randomIndex = 0;
854 
855  if( useStratifiedSampling ){
856  //Break the data into seperate classes
857  Vector< Vector< UINT > > classData( getNumClasses() );
858 
859  //Add the indexs to their respective classes
860  for(UINT i=0; i<totalNumSamples; i++){
861  classData[ getClassLabelIndexValue( data[i].getClassLabel() ) ].push_back( i );
862  }
863 
864  //Randomize the order of the indexs in each of the class index buffers
865  for(UINT c=0; c<getNumClasses(); c++){
866  UINT numSamples = (UINT)classData[c].size();
867  for(UINT x=0; x<numSamples; x++){
868  //Pick a random index
869  randomIndex = random.getRandomNumberInt(0,numSamples);
870 
871  //Swap the indexs
872  SWAP( classData[c][ x ] , classData[c][ randomIndex ] );
873  }
874  }
875 
876  //Loop over each of the classes and add the data equally to each of the k folds until there is no data left
878  for(UINT c=0; c<getNumClasses(); c++){
879  iter = classData[ c ].begin();
880  UINT k = 0;
881  while( iter != classData[c].end() ){
882  crossValidationIndexs[ k ].push_back( *iter );
883  iter++;
884  k++;
885  k = k % K;
886  }
887  }
888 
889  }else{
890  //Randomize the order of the data
891  for(UINT i=0; i<totalNumSamples; i++) indexs[i] = i;
892  for(UINT x=0; x<totalNumSamples; x++){
893  //Pick a random index
894  randomIndex = random.getRandomNumberInt(0,totalNumSamples);
895 
896  //Swap the indexs
897  SWAP( indexs[ x ] , indexs[ randomIndex ] );
898  }
899 
900  UINT counter = 0;
901  UINT foldIndex = 0;
902  for(UINT i=0; i<totalNumSamples; i++){
903  //Add the index to the current fold
904  crossValidationIndexs[ foldIndex ].push_back( indexs[i] );
905 
906  //Move to the next fold if ready
907  if( ++counter == numSamplesPerFold && foldIndex < K-1 ){
908  foldIndex++;
909  counter = 0;
910  }
911  }
912  }
913 
914  crossValidationSetup = true;
915  return true;
916 
917 }
918 
920 
921  TimeSeriesClassificationData trainingData;
922 
923  if( !crossValidationSetup ){
924  errorLog << "getTrainingFoldData(UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << std::endl;
925  return trainingData;
926  }
927 
928  if( foldIndex >= kFoldValue ) return trainingData;
929 
930  trainingData.setNumDimensions( numDimensions );
931 
932  //Add the data to the training set, this will consist of all the data that is NOT in the foldIndex
933  UINT index = 0;
934  for(UINT k=0; k<kFoldValue; k++){
935  if( k != foldIndex ){
936  for(UINT i=0; i<crossValidationIndexs[k].size(); i++){
937 
938  index = crossValidationIndexs[k][i];
939  trainingData.addSample( data[ index ].getClassLabel(), data[ index ].getData() );
940  }
941  }
942  }
943 
944  return trainingData;
945 }
946 
949 
950  if( !crossValidationSetup ) return testData;
951 
952  if( foldIndex >= kFoldValue ) return testData;
953 
954  //Add the data to the training
955  testData.setNumDimensions( numDimensions );
956 
957  UINT index = 0;
958  for(UINT i=0; i<crossValidationIndexs[ foldIndex ].size(); i++){
959 
960  index = crossValidationIndexs[ foldIndex ][i];
961  testData.addSample( data[ index ].getClassLabel(), data[ index ].getData() );
962  }
963 
964  return testData;
965 }
966 
969  for(UINT x=0; x<totalNumSamples; x++){
970  if( data[x].getClassLabel() == classLabel ){
971  classData.addSample( classLabel, data[x].getData() );
972  }
973  }
974  return classData;
975 }
976 
978 
979  UnlabelledData unlabelledData;
980 
981  if( totalNumSamples == 0 ){
982  return unlabelledData;
983  }
984 
985  unlabelledData.setNumDimensions( numDimensions );
986 
987  for(UINT i=0; i<totalNumSamples; i++){
988  for(UINT x=0; x<data[i].getLength(); x++){
989  unlabelledData.addSample( data[i].getData().getRow( x ) );
990  }
991  }
992 
993  return unlabelledData;
994 }
995 
997  UINT minClassLabel = 99999;
998 
999  for(UINT i=0; i<classTracker.size(); i++){
1000  if( classTracker[i].classLabel < minClassLabel ){
1001  minClassLabel = classTracker[i].classLabel;
1002  }
1003  }
1004 
1005  return minClassLabel;
1006 }
1007 
1008 
1010  UINT maxClassLabel = 0;
1011 
1012  for(UINT i=0; i<classTracker.size(); i++){
1013  if( classTracker[i].classLabel > maxClassLabel ){
1014  maxClassLabel = classTracker[i].classLabel;
1015  }
1016  }
1017 
1018  return maxClassLabel;
1019 }
1020 
1022  for(UINT k=0; k<classTracker.size(); k++){
1023  if( classTracker[k].classLabel == classLabel ){
1024  return k;
1025  }
1026  }
1027  warningLog << "getClassLabelIndexValue(UINT classLabel) - Failed to find class label: " << classLabel << " in class tracker!" << std::endl;
1028  return 0;
1029 }
1030 
1032 
1033  for(UINT i=0; i<classTracker.size(); i++){
1034  if( classTracker[i].classLabel == classLabel ){
1035  return classTracker[i].className;
1036  }
1037  }
1038  return "CLASS_LABEL_NOT_FOUND";
1039 }
1040 
1042 
1043  if( useExternalRanges ) return externalRanges;
1044 
1045  Vector<MinMax> ranges(numDimensions);
1046 
1047  if( totalNumSamples > 0 ){
1048  for(UINT j=0; j<numDimensions; j++){
1049  ranges[j].minValue = data[0][0][0];
1050  ranges[j].maxValue = data[0][0][0];
1051  for(UINT x=0; x<totalNumSamples; x++){
1052  for(UINT i=0; i<data[x].getLength(); i++){
1053  if( data[x][i][j] < ranges[j].minValue ){ ranges[j].minValue = data[x][i][j]; } //Search for the min value
1054  else if( data[x][i][j] > ranges[j].maxValue ){ ranges[j].maxValue = data[x][i][j]; } //Search for the max value
1055  }
1056  }
1057  }
1058  }
1059  return ranges;
1060 }
1061 
1063 
1064  //Count how many samples are in the entire dataset
1065  UINT M = 0;
1066  UINT index = 0;
1067  for(UINT x=0; x<totalNumSamples; x++){
1068  M += data[x].getLength();
1069  }
1070 
1071  if( M == 0 ) MatrixFloat();
1072 
1073  //Get all the data and concatenate it into 1 matrix
1074  MatrixFloat matrixData(M,numDimensions);
1075  for(UINT x=0; x<totalNumSamples; x++){
1076  for(UINT i=0; i<data[x].getLength(); i++){
1077  for(UINT j=0; j<numDimensions; j++){
1078  matrixData[index][j] = data[x][i][j];
1079  }
1080  index++;
1081  }
1082  }
1083  return matrixData;
1084 }
1085 
1086 GRT_END_NAMESPACE
1087 
void clear()
Definition: Matrix.h:522
bool spiltDataIntoKFolds(const UINT K, const bool useStratifiedSampling=false)
bool loadDatasetFromCSVFile(const std::string &filename)
unsigned int getSize() const
Definition: Matrix.h:564
static std::string toString(const int &i)
Definition: Util.cpp:73
bool addSample(const VectorFloat &sample)
UINT numDimensions
The number of dimensions in the dataset.
static Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
Definition: Util.cpp:52
bool setInfoText(const std::string infoText)
Vector< MinMax > externalRanges
A vector containing a set of externalRanges set by the user.
bool merge(const TimeSeriesClassificationData &labelledData)
Vector< TimeSeriesClassificationSample > data
The labelled time series classification data.
UINT kFoldValue
The number of folds the dataset has been spilt into for cross valiation.
Definition: Random.h:40
bool setNumDimensions(const UINT numDimensions)
virtual bool resize(const unsigned int size)
Definition: Vector.h:133
TimeSeriesClassificationData & operator=(const TimeSeriesClassificationData &rhs)
bool saveDatasetToFile(const std::string filename) const
bool useExternalRanges
A flag to show if the dataset should be scaled using the externalRanges values.
UINT totalNumSamples
The total number of samples in the dataset.
Vector< ClassTracker > getClassTracker() const
bool setExternalRanges(const Vector< MinMax > &externalRanges, const bool useExternalRanges=false)
The TimeSeriesClassificationData is the main data structure for recording, labeling, managing, saving, and loading training data for supervised temporal learning problems. Unlike the ClassificationData, in which each sample consists of 1 N dimensional datum, a TimeSeriesClassificationData sample will consist of an N dimensional time series of length M. The length of each time series sample (i.e. M) can be different for each datum in the dataset.
bool setNumDimensions(const UINT numDimensions)
WarningLog warningLog
Default warning log.
bool relabelAllSamplesWithClassLabel(const UINT oldClassLabel, const UINT newClassLabel)
UINT eraseAllSamplesWithClassLabel(const UINT classLabel)
bool allowNullGestureClass
A flag that enables/disables a user from adding new samples with a class label matching the default n...
bool enableExternalRangeScaling(const bool useExternalRanges)
unsigned int getNumCols() const
Definition: Matrix.h:549
std::string getClassNameForCorrespondingClassLabel(const UINT classLabel) const
DebugLog debugLog
Default debugging log.
UINT getClassLabelIndexValue(const UINT classLabel) const
bool crossValidationSetup
A flag to show if the dataset is ready for cross validation.
bool addSample(const UINT classLabel, const MatrixFloat &trainingSample)
std::string datasetName
The name of the dataset.
bool scale(const Float minTarget, const Float maxTarget)
TimeSeriesClassificationData getClassData(const UINT classLabel) const
bool setClassNameForCorrespondingClassLabel(const std::string className, const UINT classLabel)
bool saveDatasetToCSVFile(const std::string &filename) const
ErrorLog errorLog
Default error log.
Vector< ClassTracker > classTracker
A vector of ClassTracker, which keeps track of the number of samples of each class.
UnlabelledData reformatAsUnlabelledData() const
int getRandomNumberInt(int minRange, int maxRange)
Definition: Random.h:88
static bool stringEndsWith(const std::string &str, const std::string &ending)
Definition: Util.cpp:156
TimeSeriesClassificationData(UINT numDimensions=0, std::string datasetName="NOT_SET", std::string infoText="")
std::string infoText
Some infoText about the dataset.
bool setDatasetName(const std::string datasetName)
TimeSeriesClassificationData partition(const UINT partitionPercentage, const bool useStratifiedSampling=false)
Vector< TimeSeriesClassificationSample > getClassificationData() const
bool push_back(const Vector< T > &sample)
Definition: Matrix.h:401
Vector< Vector< UINT > > crossValidationIndexs
A vector to hold the indexs of the dataset for the cross validation.
bool setAllowNullGestureClass(const bool allowNullGestureClass)
TimeSeriesClassificationData getTrainingFoldData(const UINT foldIndex) const
bool load(const std::string &filename)
bool save(const std::string &filename) const
bool loadDatasetFromFile(const std::string filename)
TimeSeriesClassificationData getTestFoldData(const UINT foldIndex) const