GestureRecognitionToolkit  Version: 0.2.5
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
ClassificationData.cpp
1 /*
2 GRT MIT License
3 Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>
4 
5 Permission is hereby granted, free of charge, to any person obtaining a copy of this software
6 and associated documentation files (the "Software"), to deal in the Software without restriction,
7 including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in all copies or substantial
12 portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
15 LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
18 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 */
20 
21 #define GRT_DLL_EXPORTS
22 #include "ClassificationData.h"
23 
24 GRT_BEGIN_NAMESPACE
25 
26 ClassificationData::ClassificationData(const UINT numDimensions,const std::string datasetName,const std::string infoText){
27  this->datasetName = datasetName;
28  this->numDimensions = numDimensions;
29  this->infoText = infoText;
30  totalNumSamples = 0;
31  crossValidationSetup = false;
32  useExternalRanges = false;
33  allowNullGestureClass = true;
34  if( numDimensions > 0 ) setNumDimensions( numDimensions );
35  infoLog.setKey("[ClassificationData]");
36  debugLog.setKey("[DEBUG ClassificationData]");
37  errorLog.setKey("[ERROR ClassificationData]");
38  warningLog.setKey("[WARNING ClassificationData]");
39 }
40 
42  *this = rhs;
43 }
44 
46 }
47 
49  if( this != &rhs){
50  this->datasetName = rhs.datasetName;
51  this->infoText = rhs.infoText;
52  this->numDimensions = rhs.numDimensions;
53  this->totalNumSamples = rhs.totalNumSamples;
54  this->kFoldValue = rhs.kFoldValue;
55  this->crossValidationSetup = rhs.crossValidationSetup;
56  this->useExternalRanges = rhs.useExternalRanges;
57  this->allowNullGestureClass = rhs.allowNullGestureClass;
58  this->externalRanges = rhs.externalRanges;
59  this->classTracker = rhs.classTracker;
60  this->data = rhs.data;
61  this->crossValidationIndexs = rhs.crossValidationIndexs;
62  this->infoLog = rhs.infoLog;
63  this->debugLog = rhs.debugLog;
64  this->errorLog = rhs.errorLog;
65  this->warningLog = rhs.warningLog;
66  }
67  return *this;
68 }
69 
71  totalNumSamples = 0;
72  data.clear();
73  classTracker.clear();
74  crossValidationSetup = false;
75  crossValidationIndexs.clear();
76 }
77 
78 bool ClassificationData::setNumDimensions(const UINT numDimensions){
79 
80  if( numDimensions > 0 ){
81  //Clear any previous training data
82  clear();
83 
84  //Set the dimensionality of the data
85  this->numDimensions = numDimensions;
86 
87  //Clear the external ranges
88  useExternalRanges = false;
89  externalRanges.clear();
90 
91  return true;
92  }
93 
94  errorLog << "setNumDimensions(const UINT numDimensions) - The number of dimensions of the dataset must be greater than zero!" << std::endl;
95  return false;
96 }
97 
98 bool ClassificationData::setDatasetName(const std::string datasetName){
99 
100  //Make sure there are no spaces in the std::string
101  if( datasetName.find(" ") == std::string::npos ){
102  this->datasetName = datasetName;
103  return true;
104  }
105 
106  errorLog << "setDatasetName(const std::string datasetName) - The dataset name cannot contain any spaces!" << std::endl;
107  return false;
108 }
109 
110 bool ClassificationData::setInfoText(const std::string infoText){
111  this->infoText = infoText;
112  return true;
113 }
114 
115 bool ClassificationData::setClassNameForCorrespondingClassLabel(const std::string className,const UINT classLabel){
116 
117  for(UINT i=0; i<classTracker.getSize(); i++){
118  if( classTracker[i].classLabel == classLabel ){
119  classTracker[i].className = className;
120  return true;
121  }
122  }
123 
124  errorLog << "setClassNameForCorrespondingClassLabel(const std::string className,const UINT classLabel) - Failed to find class with label: " << classLabel << std::endl;
125  return false;
126 }
127 
128 bool ClassificationData::setAllowNullGestureClass(const bool allowNullGestureClass){
129  this->allowNullGestureClass = allowNullGestureClass;
130  return true;
131 }
132 
133 bool ClassificationData::addSample(const UINT classLabel,const VectorFloat &sample){
134 
135  if( sample.getSize() != numDimensions ){
136  if( totalNumSamples == 0 ){
137  warningLog << "addSample(const UINT classLabel, VectorFloat &sample) - the size of the new sample (" << sample.getSize() << ") does not match the number of dimensions of the dataset (" << numDimensions << "), setting dimensionality to: " << numDimensions << std::endl;
138  numDimensions = sample.getSize();
139  }else{
140  errorLog << "addSample(const UINT classLabel, VectorFloat &sample) - the size of the new sample (" << sample.getSize() << ") does not match the number of dimensions of the dataset (" << numDimensions << ")" << std::endl;
141  return false;
142  }
143  }
144 
145  //The class label must be greater than zero (as zero is used for the null rejection class label
146  if( classLabel == GRT_DEFAULT_NULL_CLASS_LABEL && !allowNullGestureClass ){
147  errorLog << "addSample(const UINT classLabel, VectorFloat &sample) - the class label can not be 0!" << std::endl;
148  return false;
149  }
150 
151  //The dataset has changed so flag that any previous cross validation setup will now not work
152  crossValidationSetup = false;
153  crossValidationIndexs.clear();
154 
155  ClassificationSample newSample(classLabel,sample);
156  data.push_back( newSample );
157  totalNumSamples++;
158 
159  if( classTracker.getSize() == 0 ){
160  ClassTracker tracker(classLabel,1);
161  classTracker.push_back(tracker);
162  }else{
163  bool labelFound = false;
164  for(UINT i=0; i<classTracker.getSize(); i++){
165  if( classLabel == classTracker[i].classLabel ){
166  classTracker[i].counter++;
167  labelFound = true;
168  break;
169  }
170  }
171  if( !labelFound ){
172  ClassTracker tracker(classLabel,1);
173  classTracker.push_back(tracker);
174  }
175  }
176 
177  //Update the class labels
178  sortClassLabels();
179 
180  return true;
181 }
182 
183 bool ClassificationData::removeSample( const UINT index ){
184 
185  if( totalNumSamples == 0 ){
186  warningLog << "removeSample( const UINT index ) - Failed to remove sample, the training dataset is empty!" << std::endl;
187  return false;
188  }
189 
190  if( index >= totalNumSamples ){
191  warningLog << "removeSample( const UINT index ) - Failed to remove sample, the index is out of bounds! Number of training samples: " << totalNumSamples << " index: " << index << std::endl;
192  return false;
193  }
194 
195  //The dataset has changed so flag that any previous cross validation setup will now not work
196  crossValidationSetup = false;
197  crossValidationIndexs.clear();
198 
199  //Find the corresponding class ID for the last training example
200  UINT classLabel = data[ index ].getClassLabel();
201 
202  //Remove the training example from the buffer
203  data.erase( data.begin()+index );
204 
205  totalNumSamples = data.getSize();
206 
207  //Remove the value from the counter
208  for(size_t i=0; i<classTracker.getSize(); i++){
209  if( classTracker[i].classLabel == classLabel ){
210  classTracker[i].counter--;
211  break;
212  }
213  }
214 
215  return true;
216 }
217 
219 
220  if( totalNumSamples == 0 ){
221  warningLog << "removeLastSample() - Failed to remove sample, the training dataset is empty!" << std::endl;
222  return false;
223  }
224 
225  return removeSample( totalNumSamples-1 );
226 }
227 
228 bool ClassificationData::reserve(const UINT N){
229 
230  data.reserve( N );
231 
232  if( data.capacity() >= N ) return true;
233 
234  return false;
235 }
236 
238  return removeClass( classLabel );
239 }
240 
241 bool ClassificationData::addClass(const UINT classLabel,const std::string className){
242 
243  //Check to make sure the class label does not exist
244  for(size_t i=0; i<classTracker.getSize(); i++){
245  if( classTracker[i].classLabel == classLabel ){
246  warningLog << "addClass(const UINT classLabel,const std::string className) - Failed to add class, it already exists! Class label: " << classLabel << std::endl;
247  return false;
248  }
249  }
250 
251  //Add the class label to the class tracker
252  classTracker.push_back( ClassTracker(classLabel,0,className) );
253 
254  //Sort the class labels
255  sortClassLabels();
256 
257  return true;
258 }
259 
260 UINT ClassificationData::removeClass(const UINT classLabel){
261 
262  UINT numExamplesRemoved = 0;
263  UINT numExamplesToRemove = 0;
264 
265  //The dataset has changed so flag that any previous cross validation setup will now not work
266  crossValidationSetup = false;
267  crossValidationIndexs.clear();
268 
269  //Find out how many training examples we need to remove
270  for(UINT i=0; i<classTracker.getSize(); i++){
271  if( classTracker[i].classLabel == classLabel ){
272  numExamplesToRemove = classTracker[i].counter;
273  classTracker.erase(classTracker.begin()+i);
274  break;
275  }
276  }
277 
278  //Remove the samples with the matching class ID
279  if( numExamplesToRemove > 0 ){
280  UINT i=0;
281  while( numExamplesRemoved < numExamplesToRemove ){
282  if( data[i].getClassLabel() == classLabel ){
283  data.erase(data.begin()+i);
284  numExamplesRemoved++;
285  }else if( ++i == data.getSize() ) break;
286  }
287  }
288 
289  totalNumSamples = data.getSize();
290 
291  return numExamplesRemoved;
292 }
293 
294 bool ClassificationData::relabelAllSamplesWithClassLabel(const UINT oldClassLabel,const UINT newClassLabel){
295  bool oldClassLabelFound = false;
296  bool newClassLabelAllReadyExists = false;
297  UINT indexOfOldClassLabel = 0;
298  UINT indexOfNewClassLabel = 0;
299 
300  //Find out how many training examples we need to relabel
301  for(UINT i=0; i<classTracker.getSize(); i++){
302  if( classTracker[i].classLabel == oldClassLabel ){
303  indexOfOldClassLabel = i;
304  oldClassLabelFound = true;
305  }
306  if( classTracker[i].classLabel == newClassLabel ){
307  indexOfNewClassLabel = i;
308  newClassLabelAllReadyExists = true;
309  }
310  }
311 
312  //If the old class label was not found then we can't do anything
313  if( !oldClassLabelFound ){
314  return false;
315  }
316 
317  //Relabel the old class labels
318  for(UINT i=0; i<totalNumSamples; i++){
319  if( data[i].getClassLabel() == oldClassLabel ){
320  data[i].setClassLabel(newClassLabel);
321  }
322  }
323 
324  //Update the class tracler
325  if( newClassLabelAllReadyExists ){
326  //Add the old sample count to the new sample count
327  classTracker[ indexOfNewClassLabel ].counter += classTracker[ indexOfOldClassLabel ].counter;
328  }else{
329  //Create a new class tracker
330  classTracker.push_back( ClassTracker(newClassLabel,classTracker[ indexOfOldClassLabel ].counter,classTracker[ indexOfOldClassLabel ].className) );
331  }
332 
333  //Erase the old class tracker
334  classTracker.erase( classTracker.begin() + indexOfOldClassLabel );
335 
336  //Sort the class labels
337  sortClassLabels();
338 
339  return true;
340 }
341 
342 bool ClassificationData::setExternalRanges(const Vector< MinMax > &externalRanges, const bool useExternalRanges){
343 
344  if( externalRanges.size() != numDimensions ) return false;
345 
346  this->externalRanges = externalRanges;
347  this->useExternalRanges = useExternalRanges;
348 
349  return true;
350 }
351 
352 bool ClassificationData::enableExternalRangeScaling(const bool useExternalRanges){
353  if( externalRanges.getSize() == numDimensions ){
354  this->useExternalRanges = useExternalRanges;
355  return true;
356  }
357  return false;
358 }
359 
360 bool ClassificationData::scale(const Float minTarget,const Float maxTarget){
361  Vector< MinMax > ranges = getRanges();
362  return scale(ranges,minTarget,maxTarget);
363 }
364 
365 bool ClassificationData::scale(const Vector<MinMax> &ranges,const Float minTarget,const Float maxTarget){
366  if( ranges.getSize() != numDimensions ) return false;
367 
368  //Scale the training data
369  for(UINT i=0; i<totalNumSamples; i++){
370  for(UINT j=0; j<numDimensions; j++){
371  data[i][j] = grt_scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,minTarget,maxTarget);
372  }
373  }
374 
375  return true;
376 }
377 
378 bool ClassificationData::save(const std::string &filename) const{
379 
380  //Check if the file should be saved as a csv file
381  if( Util::stringEndsWith( filename, ".csv" ) ){
382  return saveDatasetToCSVFile( filename );
383  }
384 
385  //Otherwise save it as a custom GRT file
386  return saveDatasetToFile( filename );
387 }
388 
389 bool ClassificationData::load(const std::string &filename){
390 
391  //Check if the file should be loaded as a csv file
392  if( Util::stringEndsWith( filename, ".csv" ) ){
393  return loadDatasetFromCSVFile( filename );
394  }
395 
396  //Otherwise save it as a custom GRT file
397  return loadDatasetFromFile( filename );
398 }
399 
400 bool ClassificationData::saveDatasetToFile(const std::string &filename) const{
401 
402  std::fstream file;
403  file.open(filename.c_str(), std::ios::out);
404 
405  if( !file.is_open() ){
406  return false;
407  }
408 
409  file << "GRT_LABELLED_CLASSIFICATION_DATA_FILE_V1.0\n";
410  file << "DatasetName: " << datasetName << std::endl;
411  file << "InfoText: " << infoText << std::endl;
412  file << "NumDimensions: " << numDimensions << std::endl;
413  file << "TotalNumExamples: " << totalNumSamples << std::endl;
414  file << "NumberOfClasses: " << classTracker.size() << std::endl;
415  file << "ClassIDsAndCounters: " << std::endl;
416 
417  for(UINT i=0; i<classTracker.size(); i++){
418  file << classTracker[i].classLabel << "\t" << classTracker[i].counter << "\t" << classTracker[i].className << std::endl;
419  }
420 
421  file << "UseExternalRanges: " << useExternalRanges << std::endl;
422 
423  if( useExternalRanges ){
424  for(UINT i=0; i<externalRanges.size(); i++){
425  file << externalRanges[i].minValue << "\t" << externalRanges[i].maxValue << std::endl;
426  }
427  }
428 
429  file << "Data:\n";
430 
431  for(UINT i=0; i<totalNumSamples; i++){
432  file << data[i].getClassLabel();
433  for(UINT j=0; j<numDimensions; j++){
434  file << "\t" << data[i][j];
435  }
436  file << std::endl;
437  }
438 
439  file.close();
440  return true;
441 }
442 
443 bool ClassificationData::loadDatasetFromFile(const std::string &filename){
444 
445  std::fstream file;
446  file.open(filename.c_str(), std::ios::in);
447  UINT numClasses = 0;
448  clear();
449 
450  if( !file.is_open() ){
451  errorLog << "loadDatasetFromFile(const std::string &filename) - could not open file!" << std::endl;
452  return false;
453  }
454 
455  std::string word;
456 
457  //Check to make sure this is a file with the Training File Format
458  file >> word;
459  if(word != "GRT_LABELLED_CLASSIFICATION_DATA_FILE_V1.0"){
460  errorLog << "loadDatasetFromFile(const std::string &filename) - could not find file header!" << std::endl;
461  file.close();
462  return false;
463  }
464 
465  //Get the name of the dataset
466  file >> word;
467  if(word != "DatasetName:"){
468  errorLog << "loadDatasetFromFile(const std::string &filename) - failed to find DatasetName header!" << std::endl;
469  errorLog << word << std::endl;
470  file.close();
471  return false;
472  }
473  file >> datasetName;
474 
475  file >> word;
476  if(word != "InfoText:"){
477  errorLog << "loadDatasetFromFile(const std::string &filename) - failed to find InfoText header!" << std::endl;
478  file.close();
479  return false;
480  }
481 
482  //Load the info text
483  file >> word;
484  infoText = "";
485  while( word != "NumDimensions:" ){
486  infoText += word + " ";
487  file >> word;
488  }
489 
490  //Get the number of dimensions in the training data
491  if( word != "NumDimensions:" ){
492  errorLog << "loadDatasetFromFile(const std::string &filename) - failed to find NumDimensions header!" << std::endl;
493  file.close();
494  return false;
495  }
496  file >> numDimensions;
497 
498  //Get the total number of training examples in the training data
499  file >> word;
500  if( word != "TotalNumTrainingExamples:" && word != "TotalNumExamples:" ){
501  errorLog << "loadDatasetFromFile(const std::string &filename) - failed to find TotalNumTrainingExamples header!" << std::endl;
502  file.close();
503  return false;
504  }
505  file >> totalNumSamples;
506 
507  //Get the total number of classes in the training data
508  file >> word;
509  if(word != "NumberOfClasses:"){
510  errorLog << "loadDatasetFromFile(string filename) - failed to find NumberOfClasses header!" << std::endl;
511  file.close();
512  return false;
513  }
514  file >> numClasses;
515 
516  //Resize the class counter buffer and load the counters
517  classTracker.resize(numClasses);
518 
519  //Get the total number of classes in the training data
520  file >> word;
521  if(word != "ClassIDsAndCounters:"){
522  errorLog << "loadDatasetFromFile(const std::string &filename) - failed to find ClassIDsAndCounters header!" << std::endl;
523  file.close();
524  return false;
525  }
526 
527  for(UINT i=0; i<classTracker.getSize(); i++){
528  file >> classTracker[i].classLabel;
529  file >> classTracker[i].counter;
530  file >> classTracker[i].className;
531  }
532 
533  //Check if the dataset should be scaled using external ranges
534  file >> word;
535  if(word != "UseExternalRanges:"){
536  errorLog << "loadDatasetFromFile(const std::string &filename) - failed to find UseExternalRanges header!" << std::endl;
537  file.close();
538  return false;
539  }
540  file >> useExternalRanges;
541 
542  //If we are using external ranges then load them
543  if( useExternalRanges ){
544  externalRanges.resize(numDimensions);
545  for(UINT i=0; i<externalRanges.getSize(); i++){
546  file >> externalRanges[i].minValue;
547  file >> externalRanges[i].maxValue;
548  }
549  }
550 
551  //Get the main training data
552  file >> word;
553  if( word != "LabelledTrainingData:" && word != "Data:"){
554  errorLog << "loadDatasetFromFile(const std::string &filename) - failed to find LabelledTrainingData header!" << std::endl;
555  file.close();
556  return false;
557  }
558 
559  ClassificationSample tempSample( numDimensions );
560  data.resize( totalNumSamples, tempSample );
561 
562  for(UINT i=0; i<totalNumSamples; i++){
563  UINT classLabel = 0;
564  VectorFloat sample(numDimensions,0);
565  file >> classLabel;
566  for(UINT j=0; j<numDimensions; j++){
567  file >> sample[j];
568  }
569  data[i].set(classLabel, sample);
570  }
571 
572  file.close();
573 
574  //Sort the class labels
575  sortClassLabels();
576 
577  return true;
578 }
579 
580 bool ClassificationData::saveDatasetToCSVFile(const std::string &filename) const{
581 
582  std::fstream file;
583  file.open(filename.c_str(), std::ios::out );
584 
585  if( !file.is_open() ){
586  return false;
587  }
588 
589  //Write the data to the CSV file
590  for(UINT i=0; i<totalNumSamples; i++){
591  file << data[i].getClassLabel();
592  for(UINT j=0; j<numDimensions; j++){
593  file << "," << data[i][j];
594  }
595  file << std::endl;
596  }
597 
598  file.close();
599 
600  return true;
601 }
602 
603 bool ClassificationData::loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex){
604 
605  numDimensions = 0;
606  datasetName = "NOT_SET";
607  infoText = "";
608 
609  //Clear any previous data
610  clear();
611 
612  //Parse the CSV file
613  FileParser parser;
614 
615  Timer timer;
616 
617  timer.start();
618 
619  if( !parser.parseCSVFile(filename,true) ){
620  errorLog << "loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - Failed to parse CSV file!" << std::endl;
621  return false;
622  }
623 
624  if( !parser.getConsistentColumnSize() ){
625  errorLog << "loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndexe) - The CSV file does not have a consistent number of columns!" << std::endl;
626  return false;
627  }
628 
629  if( parser.getColumnSize() <= 1 ){
630  errorLog << "loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - The CSV file does not have enough columns! It should contain at least two columns!" << std::endl;
631  return false;
632  }
633 
634  //Set the number of dimensions
635  numDimensions = parser.getColumnSize()-1;
636 
637  timer.start();
638 
639  //Reserve the memory for the data
640  data.resize( parser.getRowSize(), ClassificationSample(numDimensions) );
641 
642  timer.start();
643 
644  //Loop over the samples and add them to the data set
645  UINT classLabel = 0;
646  UINT j = 0;
647  UINT n = 0;
648  totalNumSamples = parser.getRowSize();
649  for(UINT i=0; i<totalNumSamples; i++){
650  //Get the class label
651  classLabel = grt_from_str< UINT >( parser[i][classLabelColumnIndex] );
652 
653  //Set the class label
654  data[i].setClassLabel( classLabel );
655 
656  //Get the sample data
657  j=0;
658  n=0;
659  while( j != numDimensions ){
660  if( n != classLabelColumnIndex ){
661  data[i][j++] = grt_from_str< Float >( parser[i][n] );
662  }
663  n++;
664  }
665 
666  //Update the class tracker
667  if( classTracker.size() == 0 ){
668  ClassTracker tracker(classLabel,1);
669  classTracker.push_back(tracker);
670  }else{
671  bool labelFound = false;
672  const size_t numClasses = classTracker.size();
673  for(size_t i=0; i<numClasses; i++){
674  if( classLabel == classTracker[i].classLabel ){
675  classTracker[i].counter++;
676  labelFound = true;
677  break;
678  }
679  }
680  if( !labelFound ){
681  ClassTracker tracker(classLabel,1);
682  classTracker.push_back(tracker);
683  }
684  }
685  }
686 
687  //Sort the class labels
688  sortClassLabels();
689 
690  return true;
691 }
692 
694 
695  std::cout << getStatsAsString();
696 
697  return true;
698 }
699 
701 
702  sort(classTracker.begin(),classTracker.end(),ClassTracker::sortByClassLabelAscending);
703 
704  return true;
705 }
706 
707 ClassificationData ClassificationData::partition(const UINT trainingSizePercentage,const bool useStratifiedSampling){
708  return split(trainingSizePercentage, useStratifiedSampling);
709 }
710 
711 ClassificationData ClassificationData::split(const UINT trainingSizePercentage,const bool useStratifiedSampling){
712 
713  //Partitions the dataset into a training dataset (which is kept by this instance of the ClassificationData) and
714  //a testing/validation dataset (which is return as a new instance of the ClassificationData). The trainingSizePercentage
715  //therefore sets the size of the data which remains in this instance and the remaining percentage of data is then added to
716  //the testing/validation dataset
717 
718  //The dataset has changed so flag that any previous cross validation setup will now not work
719  crossValidationSetup = false;
720  crossValidationIndexs.clear();
721 
722  ClassificationData trainingSet(numDimensions);
723  ClassificationData testSet(numDimensions);
724  trainingSet.setAllowNullGestureClass( allowNullGestureClass );
725  testSet.setAllowNullGestureClass( allowNullGestureClass );
726 
727  //Create the random partion indexs
728  Random random;
729  UINT K = getNumClasses();
730 
731  //Make sure both datasets get all the class labels, even if they have no samples in each
732  trainingSet.classTracker.resize( K );
733  testSet.classTracker.resize( K );
734  for(UINT k=0; k<K; k++){
735  trainingSet.classTracker[k].classLabel = classTracker[k].classLabel;
736  testSet.classTracker[k].classLabel = classTracker[k].classLabel;
737  trainingSet.classTracker[k].counter = 0;
738  testSet.classTracker[k].counter = 0;
739  }
740 
741  if( useStratifiedSampling ){
742  //Break the data into seperate classes
743  Vector< Vector< UINT > > classData( K );
744 
745  //Add the indexs to their respective classes
746  for(UINT i=0; i<totalNumSamples; i++){
747  classData[ getClassLabelIndexValue( data[i].getClassLabel() ) ].push_back( i );
748  }
749 
750  //Randomize the order of the indexs in each of the class index buffers
751  for(UINT k=0; k<K; k++){
752  std::random_shuffle(classData[k].begin(), classData[k].end());
753  }
754 
755  //Reserve the memory
756  UINT numTrainingSamples = 0;
757  UINT numTestSamples = 0;
758 
759  for(UINT k=0; k<K; k++){
760  UINT numTrainingExamples = (UINT) floor( Float(classData[k].size()) / 100.0 * Float(trainingSizePercentage) );
761  UINT numTestExamples = ((UINT)classData[k].size())-numTrainingExamples;
762  numTrainingSamples += numTrainingExamples;
763  numTestSamples += numTestExamples;
764  }
765 
766  trainingSet.reserve( numTrainingSamples );
767  testSet.reserve( numTestSamples );
768 
769  //Loop over each class and add the data to the trainingSet and testSet
770  for(UINT k=0; k<K; k++){
771  UINT numTrainingExamples = (UINT) floor( Float(classData[k].getSize()) / 100.0 * Float(trainingSizePercentage) );
772 
773  //Add the data to the training and test sets
774  for(UINT i=0; i<numTrainingExamples; i++){
775  trainingSet.addSample( data[ classData[k][i] ].getClassLabel(), data[ classData[k][i] ].getSample() );
776  }
777  for(UINT i=numTrainingExamples; i<classData[k].getSize(); i++){
778  testSet.addSample( data[ classData[k][i] ].getClassLabel(), data[ classData[k][i] ].getSample() );
779  }
780  }
781  }else{
782 
783  const UINT numTrainingExamples = (UINT) floor( Float(totalNumSamples) / 100.0 * Float(trainingSizePercentage) );
784 
785  //Create the random partion indexs
786  Vector< UINT > indexs( totalNumSamples );
787  for(UINT i=0; i<totalNumSamples; i++) indexs[i] = i;
788  std::random_shuffle(indexs.begin(), indexs.end());
789 
790  //Reserve the memory
791  trainingSet.reserve( numTrainingExamples );
792  testSet.reserve( totalNumSamples-numTrainingExamples );
793 
794  //Add the data to the training and test sets
795  for(UINT i=0; i<numTrainingExamples; i++){
796  trainingSet.addSample( data[ indexs[i] ].getClassLabel(), data[ indexs[i] ].getSample() );
797  }
798  for(UINT i=numTrainingExamples; i<totalNumSamples; i++){
799  testSet.addSample( data[ indexs[i] ].getClassLabel(), data[ indexs[i] ].getSample() );
800  }
801  }
802 
803  //The training and test datasets MUST have the same number of classes as the original data
804  grt_assert( trainingSet.getNumClasses() == K );
805  grt_assert( testSet.getNumClasses() == K );
806 
807  //Overwrite the training data in this instance with the training data of the trainingSet
808  *this = trainingSet;
809 
810  //Sort the class labels in this dataset
811  sortClassLabels();
812 
813  //Sort the class labels of the test dataset
814  testSet.sortClassLabels();
815 
816  return testSet;
817 }
818 
820 
821  if( otherData.getNumDimensions() != numDimensions ){
822  errorLog << "merge(const ClassificationData &labelledData) - The number of dimensions in the labelledData (" << otherData.getNumDimensions() << ") does not match the number of dimensions of this dataset (" << numDimensions << ")" << std::endl;
823  return false;
824  }
825 
826  //The dataset has changed so flag that any previous cross validation setup will now not work
827  crossValidationSetup = false;
828  crossValidationIndexs.clear();
829 
830  const UINT M = otherData.getNumSamples();
831 
832  //Reserve the memory
833  reserve( getNumSamples() + M );
834 
835  //Add the data from the labelledData to this instance
836 
837  for(UINT i=0; i<M; i++){
838  addSample(otherData[i].getClassLabel(), otherData[i].getSample());
839  }
840 
841  //Set the class names from the dataset
842  Vector< ClassTracker > classTracker = otherData.getClassTracker();
843  for(UINT i=0; i<classTracker.getSize(); i++){
844  setClassNameForCorrespondingClassLabel(classTracker[i].className, classTracker[i].classLabel);
845  }
846 
847  //Sort the class labels
848  sortClassLabels();
849 
850  return true;
851 }
852 
853 bool ClassificationData::spiltDataIntoKFolds(const UINT K,const bool useStratifiedSampling){
854 
855  crossValidationSetup = false;
856  crossValidationIndexs.clear();
857 
858  //K can not be zero
859  if( K == 0 ){
860  errorLog << "spiltDataIntoKFolds(const UINT K,const bool useStratifiedSampling) - K can not be zero!" << std::endl;
861  return false;
862  }
863 
864  //K can not be larger than the number of examples
865  if( K > totalNumSamples ){
866  errorLog << "spiltDataIntoKFolds(const UINT K,const bool useStratifiedSampling) - K can not be larger than the total number of samples in the dataset!" << std::endl;
867  return false;
868  }
869 
870  //K can not be larger than the number of examples in a specific class if the stratified sampling option is true
871  if( useStratifiedSampling ){
872  for(UINT c=0; c<classTracker.getSize(); c++){
873  if( K > classTracker[c].counter ){
874  errorLog << "spiltDataIntoKFolds(const UINT K,const bool useStratifiedSampling) - K can not be larger than the number of samples in any given class!" << std::endl;
875  return false;
876  }
877  }
878  }
879 
880  //Setup the dataset for k-fold cross validation
881  kFoldValue = K;
882  Vector< UINT > indexs( totalNumSamples );
883 
884  //Work out how many samples are in each fold, the last fold might have more samples than the others
885  UINT numSamplesPerFold = (UINT) floor( totalNumSamples/Float(K) );
886 
887  //Add the random indexs to each fold
888  crossValidationIndexs.resize(K);
889 
890  //Create the random partion indexs
891  Random random;
892  UINT randomIndex = 0;
893 
894  if( useStratifiedSampling ){
895  //Break the data into seperate classes
896  Vector< Vector< UINT > > classData( getNumClasses() );
897 
898  //Add the indexs to their respective classes
899  for(UINT i=0; i<totalNumSamples; i++){
900  classData[ getClassLabelIndexValue( data[i].getClassLabel() ) ].push_back( i );
901  }
902 
903  //Randomize the order of the indexs in each of the class index buffers
904  for(UINT c=0; c<getNumClasses(); c++){
905  UINT numSamples = (UINT)classData[c].size();
906  for(UINT x=0; x<numSamples; x++){
907  //Pick a random indexs
908  randomIndex = random.getRandomNumberInt(0,numSamples);
909 
910  //Swap the indexs
911  SWAP(classData[c][ x ] , classData[c][ randomIndex ]);
912  }
913  }
914 
915  //Loop over each of the k folds, at each fold add a sample from each class
917  for(UINT c=0; c<getNumClasses(); c++){
918  iter = classData[ c ].begin();
919  UINT k = 0;
920  while( iter != classData[c].end() ){
921  crossValidationIndexs[ k ].push_back( *iter );
922  iter++;
923  k++;
924  k = k % K;
925  }
926  }
927 
928  }else{
929  //Randomize the order of the data
930  for(UINT i=0; i<totalNumSamples; i++) indexs[i] = i;
931  for(UINT x=0; x<totalNumSamples; x++){
932  //Pick a random index
933  randomIndex = random.getRandomNumberInt(0,totalNumSamples);
934 
935  //Swap the indexs
936  SWAP(indexs[ x ] , indexs[ randomIndex ]);
937  }
938 
939  UINT counter = 0;
940  UINT foldIndex = 0;
941  for(UINT i=0; i<totalNumSamples; i++){
942  //Add the index to the current fold
943  crossValidationIndexs[ foldIndex ].push_back( indexs[i] );
944 
945  //Move to the next fold if ready
946  if( ++counter == numSamplesPerFold && foldIndex < K-1 ){
947  foldIndex++;
948  counter = 0;
949  }
950  }
951  }
952 
953  crossValidationSetup = true;
954  return true;
955 
956 }
957 
959 
960  ClassificationData trainingData;
961  trainingData.setNumDimensions( numDimensions );
962  trainingData.setAllowNullGestureClass( allowNullGestureClass );
963 
964  if( !crossValidationSetup ){
965  errorLog << "getTrainingFoldData(const UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << std::endl;
966  return trainingData;
967  }
968 
969  if( foldIndex >= kFoldValue ) return trainingData;
970 
971  //Add the class labels to make sure they all exist
972  for(UINT k=0; k<getNumClasses(); k++){
973  trainingData.addClass( classTracker[k].classLabel, classTracker[k].className );
974  }
975 
976  //Add the data to the training set, this will consist of all the data that is NOT in the foldIndex
977  UINT index = 0;
978  for(UINT k=0; k<kFoldValue; k++){
979  if( k != foldIndex ){
980  for(UINT i=0; i<crossValidationIndexs[k].getSize(); i++){
981 
982  index = crossValidationIndexs[k][i];
983  trainingData.addSample( data[ index ].getClassLabel(), data[ index ].getSample() );
984  }
985  }
986  }
987 
988  //Sort the class labels
989  trainingData.sortClassLabels();
990 
991  return trainingData;
992 }
993 
995 
996  ClassificationData testData;
997  testData.setNumDimensions( numDimensions );
998  testData.setAllowNullGestureClass( allowNullGestureClass );
999 
1000  if( !crossValidationSetup ) return testData;
1001 
1002  if( foldIndex >= kFoldValue ) return testData;
1003 
1004  //Add the class labels to make sure they all exist
1005  for(UINT k=0; k<getNumClasses(); k++){
1006  testData.addClass( classTracker[k].classLabel, classTracker[k].className );
1007  }
1008 
1009  testData.reserve( crossValidationIndexs[ foldIndex ].getSize() );
1010 
1011  //Add the data to the test fold
1012  UINT index = 0;
1013  for(UINT i=0; i<crossValidationIndexs[ foldIndex ].getSize(); i++){
1014 
1015  index = crossValidationIndexs[ foldIndex ][i];
1016  testData.addSample( data[ index ].getClassLabel(), data[ index ].getSample() );
1017  }
1018 
1019  //Sort the class labels
1020  testData.sortClassLabels();
1021 
1022  return testData;
1023 }
1024 
1026 
1027  ClassificationData classData;
1028  classData.setNumDimensions( this->numDimensions );
1029  classData.setAllowNullGestureClass( allowNullGestureClass );
1030 
1031  //Reserve the memory for the class data
1032  for(UINT i=0; i<classTracker.getSize(); i++){
1033  if( classTracker[i].classLabel == classLabel ){
1034  classData.reserve( classTracker[i].counter );
1035  break;
1036  }
1037  }
1038 
1039  for(UINT i=0; i<totalNumSamples; i++){
1040  if( data[i].getClassLabel() == classLabel ){
1041  classData.addSample(classLabel, data[i].getSample());
1042  }
1043  }
1044 
1045  return classData;
1046 }
1047 
1048 ClassificationData ClassificationData::getBootstrappedDataset(const UINT numSamples_,const bool balanceDataset) const{
1049 
1050  Random rand;
1051  ClassificationData newDataset;
1052  newDataset.setNumDimensions( getNumDimensions() );
1053  newDataset.setAllowNullGestureClass( allowNullGestureClass );
1054  newDataset.setExternalRanges( externalRanges, useExternalRanges );
1055 
1056  const UINT numBootstrapSamples = numSamples_ > 0 ? numSamples_ : totalNumSamples;
1057 
1058  grt_assert( numBootstrapSamples > 0 );
1059 
1060  newDataset.reserve( numBootstrapSamples );
1061 
1062  const UINT K = getNumClasses();
1063 
1064  //Add all the class labels to the new dataset to ensure the dataset has a list of all the labels
1065  for(UINT k=0; k<K; k++){
1066  newDataset.addClass( classTracker[k].classLabel );
1067  }
1068 
1069  if( balanceDataset ){
1070  //Group the class indexs
1071  Vector< Vector< UINT > > classIndexs( K );
1072  for(UINT i=0; i<totalNumSamples; i++){
1073  classIndexs[ getClassLabelIndexValue( data[i].getClassLabel() ) ].push_back( i );
1074  }
1075 
1076  //Get the class with the minimum number of examples
1077  UINT numSamplesPerClass = (UINT)floor( numBootstrapSamples / Float(K) );
1078 
1079  //Randomly select the training samples from each class
1080  UINT classIndex = 0;
1081  UINT classCounter = 0;
1082  UINT randomIndex = 0;
1083  for(UINT i=0; i<numBootstrapSamples; i++){
1084  randomIndex = rand.getRandomNumberInt(0, (UINT)classIndexs[ classIndex ].size() );
1085  randomIndex = classIndexs[ classIndex ][ randomIndex ];
1086  newDataset.addSample(data[ randomIndex ].getClassLabel(), data[ randomIndex ].getSample());
1087  if( classCounter++ >= numSamplesPerClass && classIndex+1 < K ){
1088  classCounter = 0;
1089  classIndex++;
1090  }
1091  }
1092 
1093  }else{
1094  //Randomly select the training samples to add to the new data set
1095  UINT randomIndex;
1096  for(UINT i=0; i<numBootstrapSamples; i++){
1097  randomIndex = rand.getRandomNumberInt(0, totalNumSamples);
1098  newDataset.addSample( data[randomIndex].getClassLabel(), data[randomIndex].getSample() );
1099  }
1100  }
1101 
1102  //Sort the class labels so they are in order
1103  newDataset.sortClassLabels();
1104 
1105  return newDataset;
1106 }
1107 
1109 
1110  //Turns the classification into a regression data to enable regression algorithms like the MLP to be used as a classifier
1111  //This sets the number of targets in the regression data equal to the number of classes in the classification data
1112  //The output of each regression training sample will then be all 0's, except for the index matching the classLabel, which will be 1
1113  //For this to work, the labelled classification data cannot have any samples with a classLabel of 0!
1114  RegressionData regressionData;
1115 
1116  if( totalNumSamples == 0 ){
1117  return regressionData;
1118  }
1119 
1120  const UINT numInputDimensions = numDimensions;
1121  const UINT numTargetDimensions = getNumClasses();
1122  regressionData.setInputAndTargetDimensions(numInputDimensions, numTargetDimensions);
1123 
1124  for(UINT i=0; i<totalNumSamples; i++){
1125  VectorFloat targetVector(numTargetDimensions,0);
1126 
1127  //Set the class index in the target Vector to 1 and all other values in the target Vector to 0
1128  UINT classLabel = data[i].getClassLabel();
1129 
1130  if( classLabel > 0 ){
1131  targetVector[ classLabel-1 ] = 1;
1132  }else{
1133  regressionData.clear();
1134  return regressionData;
1135  }
1136 
1137  regressionData.addSample(data[i].getSample(),targetVector);
1138  }
1139 
1140  return regressionData;
1141 }
1142 
1144 
1145  UnlabelledData unlabelledData;
1146 
1147  if( totalNumSamples == 0 ){
1148  return unlabelledData;
1149  }
1150 
1151  unlabelledData.setNumDimensions( numDimensions );
1152 
1153  for(UINT i=0; i<totalNumSamples; i++){
1154  unlabelledData.addSample( data[i].getSample() );
1155  }
1156 
1157  return unlabelledData;
1158 }
1159 
1161  UINT minClassLabel = grt_numeric_limits< UINT >::max();
1162 
1163  for(UINT i=0; i<classTracker.getSize(); i++){
1164  if( classTracker[i].classLabel < minClassLabel ){
1165  minClassLabel = classTracker[i].classLabel;
1166  }
1167  }
1168 
1169  return minClassLabel;
1170 }
1171 
1172 
1174  UINT maxClassLabel = 0;
1175 
1176  for(UINT i=0; i<classTracker.getSize(); i++){
1177  if( classTracker[i].classLabel > maxClassLabel ){
1178  maxClassLabel = classTracker[i].classLabel;
1179  }
1180  }
1181 
1182  return maxClassLabel;
1183 }
1184 
1185 UINT ClassificationData::getClassLabelIndexValue(const UINT classLabel) const{
1186  for(UINT k=0; k<classTracker.getSize(); k++){
1187  if( classTracker[k].classLabel == classLabel ){
1188  return k;
1189  }
1190  }
1191  warningLog << "getClassLabelIndexValue(UINT classLabel) - Failed to find class label: " << classLabel << " in class tracker!" << std::endl;
1192  return 0;
1193 }
1194 
1195 std::string ClassificationData::getClassNameForCorrespondingClassLabel(const UINT classLabel) const{
1196 
1197  for(UINT i=0; i<classTracker.getSize(); i++){
1198  if( classTracker[i].classLabel == classLabel ){
1199  return classTracker[i].className;
1200  }
1201  }
1202 
1203  return "CLASS_LABEL_NOT_FOUND";
1204 }
1205 
1207  std::string statsText;
1208  statsText += "DatasetName:\t" + datasetName + "\n";
1209  statsText += "DatasetInfo:\t" + infoText + "\n";
1210  statsText += "Number of Dimensions:\t" + Util::toString( numDimensions ) + "\n";
1211  statsText += "Number of Samples:\t" + Util::toString( totalNumSamples ) + "\n";
1212  statsText += "Number of Classes:\t" + Util::toString( getNumClasses() ) + "\n";
1213  statsText += "ClassStats:\n";
1214 
1215  for(UINT k=0; k<getNumClasses(); k++){
1216  statsText += "ClassLabel:\t" + Util::toString( classTracker[k].classLabel );
1217  statsText += "\tNumber of Samples:\t" + Util::toString(classTracker[k].counter);
1218  statsText += "\tClassName:\t" + classTracker[k].className + "\n";
1219  }
1220 
1221  Vector< MinMax > ranges = getRanges();
1222 
1223  statsText += "Dataset Ranges:\n";
1224  for(UINT j=0; j<ranges.size(); j++){
1225  statsText += "[" + Util::toString( j+1 ) + "] Min:\t" + Util::toString( ranges[j].minValue ) + "\tMax: " + Util::toString( ranges[j].maxValue ) + "\n";
1226  }
1227 
1228  return statsText;
1229 }
1230 
1232 
1233  //If the dataset should be scaled using the external ranges then return the external ranges
1234  if( useExternalRanges ) return externalRanges;
1235 
1236  Vector< MinMax > ranges(numDimensions);
1237 
1238  //Otherwise return the min and max values for each column in the dataset
1239  if( totalNumSamples > 0 ){
1240  for(UINT j=0; j<numDimensions; j++){
1241  ranges[j].minValue = data[0][j];
1242  ranges[j].maxValue = data[0][j];
1243  for(UINT i=0; i<totalNumSamples; i++){
1244  if( data[i][j] < ranges[j].minValue ){ ranges[j].minValue = data[i][j]; } //Search for the min value
1245  else if( data[i][j] > ranges[j].maxValue ){ ranges[j].maxValue = data[i][j]; } //Search for the max value
1246  }
1247  }
1248  }
1249  return ranges;
1250 }
1251 
1253  Vector< UINT > classLabels( getNumClasses(), 0 );
1254 
1255  if( getNumClasses() == 0 ) return classLabels;
1256 
1257  for(UINT i=0; i<getNumClasses(); i++){
1258  classLabels[i] = classTracker[i].classLabel;
1259  }
1260 
1261  return classLabels;
1262 }
1263 
1265  Vector< UINT > classSampleCounts( getNumClasses(), 0 );
1266 
1267  if( getNumSamples() == 0 ) return classSampleCounts;
1268 
1269  for(UINT i=0; i<getNumClasses(); i++){
1270  classSampleCounts[i] = classTracker[i].counter;
1271  }
1272 
1273  return classSampleCounts;
1274 }
1275 
1277 
1278  VectorFloat mean(numDimensions,0);
1279 
1280  for(UINT j=0; j<numDimensions; j++){
1281  for(UINT i=0; i<totalNumSamples; i++){
1282  mean[j] += data[i][j];
1283  }
1284  mean[j] /= Float(totalNumSamples);
1285  }
1286 
1287  return mean;
1288 }
1289 
1291 
1292  VectorFloat mean = getMean();
1293  VectorFloat stdDev(numDimensions,0);
1294 
1295  for(UINT j=0; j<numDimensions; j++){
1296  for(UINT i=0; i<totalNumSamples; i++){
1297  stdDev[j] += SQR(data[i][j]-mean[j]);
1298  }
1299  stdDev[j] = sqrt( stdDev[j] / Float(totalNumSamples-1) );
1300  }
1301 
1302  return stdDev;
1303 }
1304 
1305 MatrixFloat ClassificationData::getClassHistogramData(const UINT classLabel,const UINT numBins) const{
1306 
1307  const UINT M = getNumSamples();
1308  const UINT N = getNumDimensions();
1309 
1310  Vector< MinMax > ranges = getRanges();
1311  VectorFloat binRange(N);
1312  for(UINT i=0; i<ranges.size(); i++){
1313  binRange[i] = (ranges[i].maxValue-ranges[i].minValue)/Float(numBins);
1314  }
1315 
1316  MatrixFloat histData(N,numBins);
1317  histData.setAllValues(0);
1318 
1319  Float norm = 0;
1320  for(UINT i=0; i<M; i++){
1321  if( data[i].getClassLabel() == classLabel ){
1322  for(UINT j=0; j<N; j++){
1323  UINT binIndex = 0;
1324  bool binFound = false;
1325  for(UINT k=0; k<numBins-1; k++){
1326  if( data[i][j] >= ranges[i].minValue + (binRange[j]*k) && data[i][j] >= ranges[i].minValue + (binRange[j]*(k+1)) ){
1327  binIndex = k;
1328  binFound = true;
1329  break;
1330  }
1331  }
1332  if( !binFound ) binIndex = numBins-1;
1333  histData[j][binIndex]++;
1334  }
1335  norm++;
1336  }
1337  }
1338 
1339  if( norm == 0 ) return histData;
1340 
1341  //Is this the best way to normalize a multidimensional histogram???
1342  for(UINT i=0; i<histData.getNumRows(); i++){
1343  for(UINT j=0; j<histData.getNumCols(); j++){
1344  histData[i][j] /= norm;
1345  }
1346  }
1347 
1348  return histData;
1349 }
1350 
1352 
1353  MatrixFloat mean(getNumClasses(),numDimensions);
1354  VectorFloat counter(getNumClasses(),0);
1355 
1356  mean.setAllValues( 0 );
1357 
1358  for(UINT i=0; i<totalNumSamples; i++){
1359  UINT classIndex = getClassLabelIndexValue( data[i].getClassLabel() );
1360  for(UINT j=0; j<numDimensions; j++){
1361  mean[classIndex][j] += data[i][j];
1362  }
1363  counter[ classIndex ]++;
1364  }
1365 
1366  for(UINT k=0; k<getNumClasses(); k++){
1367  for(UINT j=0; j<numDimensions; j++){
1368  mean[k][j] = counter[k] > 0 ? mean[k][j]/counter[k] : 0;
1369  }
1370  }
1371 
1372  return mean;
1373 }
1374 
1376 
1377  MatrixFloat mean = getClassMean();
1378  MatrixFloat stdDev(getNumClasses(),numDimensions);
1379  VectorFloat counter(getNumClasses(),0);
1380 
1381  stdDev.setAllValues( 0 );
1382 
1383  for(UINT i=0; i<totalNumSamples; i++){
1384  UINT classIndex = getClassLabelIndexValue( data[i].getClassLabel() );
1385  for(UINT j=0; j<numDimensions; j++){
1386  stdDev[classIndex][j] += SQR(data[i][j]-mean[classIndex][j]);
1387  }
1388  counter[ classIndex ]++;
1389  }
1390 
1391  for(UINT k=0; k<getNumClasses(); k++){
1392  for(UINT j=0; j<numDimensions; j++){
1393  stdDev[k][j] = sqrt( stdDev[k][j] / Float(counter[k]-1) );
1394  }
1395  }
1396 
1397  return stdDev;
1398 }
1399 
1401 
1402  VectorFloat mean = getMean();
1403  MatrixFloat covariance(numDimensions,numDimensions);
1404 
1405  for(UINT j=0; j<numDimensions; j++){
1406  for(UINT k=0; k<numDimensions; k++){
1407  for(UINT i=0; i<totalNumSamples; i++){
1408  covariance[j][k] += (data[i][j]-mean[j]) * (data[i][k]-mean[k]) ;
1409  }
1410  covariance[j][k] /= Float(totalNumSamples-1);
1411  }
1412  }
1413 
1414  return covariance;
1415 }
1416 
1418  const UINT K = getNumClasses();
1419  Vector< MatrixFloat > histData(K);
1420 
1421  for(UINT k=0; k<K; k++){
1422  histData[k] = getClassHistogramData( classTracker[k].classLabel, numBins );
1423  }
1424 
1425  return histData;
1426 }
1427 
1428 VectorFloat ClassificationData::getClassProbabilities() const {
1429  return getClassProbabilities( getClassLabels() );
1430 }
1431 
1432 VectorFloat ClassificationData::getClassProbabilities( const Vector< UINT > &classLabels ) const {
1433  const UINT K = (UINT)classLabels.size();
1434  const UINT N = getNumClasses();
1435  Float sum = 0;
1436  VectorFloat x(K,0);
1437  for(UINT k=0; k<K; k++){
1438  for(UINT n=0; n<N; n++){
1439  if( classLabels[k] == classTracker[n].classLabel ){
1440  x[k] = classTracker[n].counter;
1441  sum += classTracker[n].counter;
1442  break;
1443  }
1444  }
1445  }
1446 
1447  //Normalize the class probabilities
1448  if( sum > 0 ){
1449  for(UINT k=0; k<K; k++){
1450  x[k] /= sum;
1451  }
1452  }
1453 
1454  return x;
1455 }
1456 
1458 
1459  const UINT M = getNumSamples();
1460  const UINT K = getNumClasses();
1461  UINT N = 0;
1462 
1463  //Get the number of samples in the class
1464  for(UINT k=0; k<K; k++){
1465  if( classTracker[k].classLabel == classLabel){
1466  N = classTracker[k].counter;
1467  break;
1468  }
1469  }
1470 
1471  UINT index = 0;
1472  Vector< UINT > classIndexes(N);
1473  for(UINT i=0; i<M; i++){
1474  if( data[i].getClassLabel() == classLabel ){
1475  classIndexes[index++] = i;
1476  }
1477  }
1478 
1479  return classIndexes;
1480 }
1481 
1483 
1484  const UINT M = getNumSamples();
1485  const UINT N = getNumDimensions();
1486  MatrixDouble d(M,N);
1487 
1488  for(UINT i=0; i<M; i++){
1489  for(UINT j=0; j<N; j++){
1490  d[i][j] = data[i][j];
1491  }
1492  }
1493 
1494  return d;
1495 }
1496 
1498  const UINT M = getNumSamples();
1499  const UINT N = getNumDimensions();
1500  MatrixFloat d(M,N);
1501 
1502  for(UINT i=0; i<M; i++){
1503  for(UINT j=0; j<N; j++){
1504  d[i][j] = data[i][j];
1505  }
1506  }
1507 
1508  return d;
1509 }
1510 
1511 bool ClassificationData::generateGaussDataset( const std::string filename, const UINT numSamples, const UINT numClasses, const UINT numDimensions, const Float range, const Float sigma ){
1512 
1513  //Generate the dataset
1514  ClassificationData data = generateGaussDataset( numSamples, numClasses, numDimensions, range, sigma );
1515 
1516  //Save the dataset to a CSV file
1517  return data.save( filename );
1518 }
1519 
1520 ClassificationData ClassificationData::generateGaussDataset( const UINT numSamples, const UINT numClasses, const UINT numDimensions, const Float range, const Float sigma ){
1521 
1522  Random random;
1523 
1524  //Generate a simple model that will be used to generate the main dataset
1525  MatrixFloat model(numClasses,numDimensions);
1526  for(UINT k=0; k<numClasses; k++){
1527  for(UINT j=0; j<numDimensions; j++){
1528  model[k][j] = random.getRandomNumberUniform(-range,range);
1529  }
1530  }
1531 
1532  //Use the model above to generate the main dataset
1533  ClassificationData data;
1534  data.setNumDimensions( numDimensions );
1535  data.reserve( numSamples );
1536 
1537  for(UINT i=0; i<numSamples; i++){
1538 
1539  //Randomly select which class this sample belongs to
1540  UINT k = random.getRandomNumberInt( 0, numClasses );
1541 
1542  //Generate a sample using the model (+ some Gaussian noise)
1543  VectorFloat sample( numDimensions );
1544  for(UINT j=0; j<numDimensions; j++){
1545  sample[j] = model[k][j] + random.getRandomNumberGauss(0,sigma);
1546  }
1547 
1548  //By default in the GRT, the class label should not be 0, so add 1
1549  UINT classLabel = k + 1;
1550 
1551  //Add the labeled sample to the dataset
1552  data.addSample( classLabel, sample );
1553  }
1554 
1555  //Return the datset
1556  return data;
1557 }
1558 
1559 ClassificationData ClassificationData::generateGaussLinearDataset( const UINT numSamples, const UINT numClasses, const UINT numDimensions, const Float range, const Float sigma){
1560 
1561  Random random;
1562 
1563  //Generate a simple model that will be used to generate the main dataset
1564  //Enforce the gaussian clusters to be linearly separable by setting each model centroid on a regular spaced grid
1565  MatrixFloat model(numClasses,numDimensions);
1566  for(UINT k=0; k<numClasses; k++){
1567  for(UINT j=0; j<numDimensions; j++){
1568  model[k][j] = Util::scale(k,0,numClasses-1,-range,range,true);
1569  }
1570  }
1571 
1572  //Use the model above to generate the main dataset
1573  ClassificationData data;
1574  data.setNumDimensions( numDimensions );
1575  data.reserve( numSamples );
1576 
1577  for(UINT i=0; i<numSamples; i++){
1578 
1579  //Randomly select which class this sample belongs to
1580  UINT k = random.getRandomNumberInt( 0, numClasses );
1581 
1582  //Generate a sample using the model (+ some Gaussian noise)
1583  VectorFloat sample( numDimensions );
1584  for(UINT j=0; j<numDimensions; j++){
1585  sample[j] = model[k][j] + random.getRandomNumberGauss(0,sigma);
1586  }
1587 
1588  //By default in the GRT, the class label should not be 0, so add 1
1589  UINT classLabel = k + 1;
1590 
1591  //Add the labeled sample to the dataset
1592  data.addSample( classLabel, sample );
1593  }
1594 
1595  return data;
1596 }
1597 
1598 GRT_END_NAMESPACE
1599 
bool saveDatasetToFile(const std::string &filename) const
bool setDatasetName(std::string datasetName)
bool loadDatasetFromFile(const std::string &filename)
static std::string toString(const int &i)
Definition: Util.cpp:81
RegressionData reformatAsRegressionData() const
ClassificationData & operator=(const ClassificationData &rhs)
Definition: Timer.h:43
static bool generateGaussDataset(const std::string filename, const UINT numSamples=10000, const UINT numClasses=10, const UINT numDimensions=3, const Float range=10, const Float sigma=1)
bool addSample(const VectorFloat &sample)
bool setAllowNullGestureClass(const bool allowNullGestureClass)
bool relabelAllSamplesWithClassLabel(const UINT oldClassLabel, const UINT newClassLabel)
ClassificationData getTestFoldData(const UINT foldIndex) const
bool addClass(const UINT classLabel, const std::string className="NOT_SET")
bool addSample(const UINT classLabel, const VectorFloat &sample)
static Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
Definition: Util.cpp:55
Vector< ClassTracker > getClassTracker() const
This file contains the Random class, a useful wrapper for generating cross platform random functions...
Definition: Random.h:46
ClassificationData getClassData(const UINT classLabel) const
virtual bool resize(const unsigned int size)
Definition: Vector.h:133
bool setNumDimensions(UINT numDimensions)
UINT eraseAllSamplesWithClassLabel(const UINT classLabel)
MatrixDouble getDataAsMatrixDouble() const
virtual bool setKey(const std::string &key)
sets the key that gets written at the start of each message, this will be written in the format &#39;key ...
Definition: Log.h:166
MatrixFloat getClassMean() const
UINT getSize() const
Definition: Vector.h:201
Float getRandomNumberGauss(Float mu=0.0, Float sigma=1.0)
Definition: Random.cpp:142
std::string getClassNameForCorrespondingClassLabel(const UINT classLabel) const
Vector< UINT > getClassLabels() const
bool loadDatasetFromCSVFile(const std::string &filename, const UINT classLabelColumnIndex=0)
UINT getMinimumClassLabel() const
Vector< MatrixFloat > getHistogramData(const UINT numBins) const
UINT removeClass(const UINT classLabel)
ClassificationData(UINT numDimensions=0, std::string datasetName="NOT_SET", std::string infoText="")
bool setAllValues(const T &value)
Definition: Matrix.h:366
bool setInputAndTargetDimensions(const UINT numInputDimensions, const UINT numTargetDimensions)
bool setInfoText(std::string infoText)
Vector< UINT > getNumSamplesPerClass() const
MatrixFloat getCovarianceMatrix() const
UnlabelledData reformatAsUnlabelledData() const
bool removeSample(const UINT index)
UINT getNumSamples() const
bool spiltDataIntoKFolds(const UINT K, const bool useStratifiedSampling=false)
bool save(const std::string &filename) const
bool setNumDimensions(const UINT numDimensions)
bool enableExternalRangeScaling(const bool useExternalRanges)
bool setExternalRanges(const Vector< MinMax > &externalRanges, const bool useExternalRanges=false)
bool saveDatasetToCSVFile(const std::string &filename) const
static ClassificationData generateGaussLinearDataset(const UINT numSamples=10000, const UINT numClasses=10, const UINT numDimensions=3, const Float range=10, const Float sigma=1)
unsigned int getNumRows() const
Definition: Matrix.h:574
UINT getNumDimensions() const
UINT getNumClasses() const
unsigned int getNumCols() const
Definition: Matrix.h:581
bool start()
Definition: Timer.h:64
Vector< MinMax > getRanges() const
Float getRandomNumberUniform(Float minRange=0.0, Float maxRange=1.0)
Definition: Random.cpp:129
bool merge(const ClassificationData &data)
ClassificationData split(const UINT splitPercentage, const bool useStratifiedSampling=false)
VectorFloat getStdDev() const
Vector< UINT > getClassDataIndexes(const UINT classLabel) const
int getRandomNumberInt(int minRange, int maxRange)
Definition: Random.cpp:59
bool reserve(const UINT M)
MatrixFloat getDataAsMatrixFloat() const
static bool stringEndsWith(const std::string &str, const std::string &ending)
Definition: Util.cpp:164
bool setClassNameForCorrespondingClassLabel(const std::string className, const UINT classLabel)
ClassificationData getBootstrappedDataset(const UINT numSamples=0, const bool balanceDataset=false) const
This class stores the class label and raw data for a single labelled classification sample...
UINT getClassLabelIndexValue(const UINT classLabel) const
MatrixFloat getClassHistogramData(const UINT classLabel, const UINT numBins) const
ClassificationData getTrainingFoldData(const UINT foldIndex) const
UINT getMaximumClassLabel() const
bool scale(const Float minTarget, const Float maxTarget)
bool load(const std::string &filename)
MatrixFloat getClassStdDev() const
bool addSample(const VectorFloat &inputVector, const VectorFloat &targetVector)
std::string getStatsAsString() const
VectorFloat getMean() const