GestureRecognitionToolkit  Version: 0.2.5
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
BAG.cpp
1 /*
2 GRT MIT License
3 Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>
4 
5 Permission is hereby granted, free of charge, to any person obtaining a copy of this software
6 and associated documentation files (the "Software"), to deal in the Software without restriction,
7 including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in all copies or substantial
12 portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
15 LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
18 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 */
20 
21 #define GRT_DLL_EXPORTS
22 #include "BAG.h"
23 
24 GRT_BEGIN_NAMESPACE
25 
26 //Define the string that will be used to identify the object
27 const std::string BAG::id = "BAG";
28 std::string BAG::getId() { return BAG::id; }
29 
30 //Register the BAG module with the Classifier base class
31 RegisterClassifierModule< BAG > BAG::registerModule( BAG::getId() );
32 
33 BAG::BAG(bool useScaling) : Classifier( BAG::getId() )
34 {
35  this->useScaling = useScaling;
36  useNullRejection = false;
37  classifierMode = STANDARD_CLASSIFIER_MODE;
38 }
39 
40 BAG::BAG(const BAG &rhs):Classifier( BAG::getId() ){
41  classifierMode = STANDARD_CLASSIFIER_MODE;
42  *this = rhs;
43 }
44 
45 BAG::~BAG(void)
46 {
47  clearEnsemble();
48 }
49 
50 BAG& BAG::operator=(const BAG &rhs){
51  if( this != &rhs ){
52  //Clear any previous ensemble
53  clearEnsemble();
54 
55  //Copy the weights
56  this->weights = rhs.weights;
57 
58  //Deep copy each classifier in the ensemble
59  for(UINT i=0; i<rhs.getEnsembleSize(); i++){
60  addClassifierToEnsemble( *(rhs.ensemble[i]) );
61  }
62  //Copy the base classifier variables
63  copyBaseVariables( (Classifier*)&rhs );
64  }
65  return *this;
66 }
67 
68 bool BAG::deepCopyFrom(const Classifier *classifier){
69 
70  if( classifier == NULL ) return false;
71 
72  if( this->getClassifierType() == classifier->getClassifierType() ){
73  BAG *ptr = (BAG*)classifier;
74 
75  //Clear any previous ensemble
76  clearEnsemble();
77 
78  //Copy the weights
79  this->weights = ptr->weights;
80 
81  //Deep copy each classifier in the ensemble
82  for(UINT i=0; i<ptr->getEnsembleSize(); i++){
83  addClassifierToEnsemble( *(ptr->ensemble[i]) );
84  }
85  //Copy the base classifier variables
86  return copyBaseVariables( classifier );
87  }
88  return false;
89 }
90 
91 bool BAG::train_(ClassificationData &trainingData){
92 
93  //Clear any previous models
94  clear();
95 
96  const unsigned int M = trainingData.getNumSamples();
97  const unsigned int N = trainingData.getNumDimensions();
98  const unsigned int K = trainingData.getNumClasses();
99 
100  if( M == 0 ){
101  errorLog << __GRT_LOG__ << " Training data has zero samples!" << std::endl;
102  return false;
103  }
104 
105  numInputDimensions = N;
106  numOutputDimensions = K;
107  numClasses = K;
108  classLabels.resize(K);
109  ranges = trainingData.getRanges();
110  ClassificationData validationData;
111 
112  //Scale the training data if needed
113  if( useScaling ){
114  //Scale the training data between 0 and 1
115  trainingData.scale(0, 1);
116  }
117 
118  if( useValidationSet ){
119  validationData = trainingData.split( 100-validationSetSize );
120  }
121 
122  UINT ensembleSize = ensemble.getSize();
123 
124  if( ensembleSize == 0 ){
125  errorLog << __GRT_LOG__ << " The ensemble size is zero! You need to add some classifiers to the ensemble first." << std::endl;
126  return false;
127  }
128 
129  for(UINT i=0; i<ensembleSize; i++){
130  if( ensemble[i] == NULL ){
131  errorLog << __GRT_LOG__ << " The classifier at ensemble index " << i << " has not been set!" << std::endl;
132  return false;
133  }
134  }
135 
136  //Train the ensemble
137  for(UINT i=0; i<ensembleSize; i++){
138  ClassificationData boostedDataset = trainingData.getBootstrappedDataset();
139 
140  //Propagate the training logging to the ensemble
141  ensemble[i]->setTrainingLoggingEnabled( this->getTrainingLoggingEnabled() );
142 
143  trainingLog << "Training ensemble " << i+1 << ". Ensemble type: " << ensemble[i]->getId() << ". Num Training Samples: " << boostedDataset.getNumSamples() << std::endl;
144 
145  //Train the classifier with the bootstrapped dataset
146  if( !ensemble[i]->train_( boostedDataset ) ){
147  errorLog << __GRT_LOG__ << " The classifier at ensemble index " << i << " failed training!" << std::endl;
148  return false;
149  }
150  }
151 
152  //Set the class labels
153  classLabels = trainingData.getClassLabels();
154 
155  //Flag that the model has been trained
156  trained = true;
157  converged = true;
158 
159  //Compute the final training stats
160  trainingSetAccuracy = 0;
161  validationSetAccuracy = 0;
162 
163  //If scaling was on, then the data will already be scaled, so turn it off temporially so we can test the model accuracy
164  bool scalingState = useScaling;
165  useScaling = false;
166  if( !computeAccuracy( trainingData, trainingSetAccuracy ) ){
167  trained = false;
168  errorLog << __GRT_LOG__ << " Failed to compute training set accuracy! Failed to fully train model!" << std::endl;
169  return false;
170  }
171 
172  if( useValidationSet ){
173  if( !computeAccuracy( validationData, validationSetAccuracy ) ){
174  trained = false;
175  errorLog << __GRT_LOG__ << " Failed to compute validation set accuracy! Failed to fully train model!" << std::endl;
176  return false;
177  }
178 
179  }
180 
181  trainingLog << "Training set accuracy: " << trainingSetAccuracy << std::endl;
182 
183  if( useValidationSet ){
184  trainingLog << "Validation set accuracy: " << validationSetAccuracy << std::endl;
185  }
186 
187  //Reset the scaling state for future prediction
188  useScaling = scalingState;
189 
190  return trained;
191 }
192 
193 bool BAG::predict_(VectorFloat &inputVector){
194 
195  if( !trained ){
196  errorLog << __GRT_LOG__ << " Model Not Trained!" << std::endl;
197  return false;
198  }
199 
200  predictedClassLabel = 0;
201  maxLikelihood = -10000;
202 
203  if( !trained ) return false;
204 
205  if( inputVector.getSize() != numInputDimensions ){
206  errorLog << __GRT_LOG__ << " The size of the input Vector (" << inputVector.getSize() << ") does not match the num features in the model (" << numInputDimensions << std::endl;
207  return false;
208  }
209 
210  if( useScaling ){
211  for(UINT n=0; n<numInputDimensions; n++){
212  inputVector[n] = scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0, 1);
213  }
214  }
215 
216  if( classLikelihoods.getSize() != numClasses ) classLikelihoods.resize(numClasses);
217  if( classDistances.getSize() != numClasses ) classDistances.resize(numClasses);
218 
219  //Reset the likelihoods and distances
220  for(UINT k=0; k<numClasses; k++){
221  classLikelihoods[k] = 0;
222  classDistances[k] = 0;
223  }
224 
225  //Run the prediction for each classifier
226  Float sum = 0;
227  UINT ensembleSize = ensemble.getSize();
228  for(UINT i=0; i<ensembleSize; i++){
229 
230  if( !ensemble[i]->predict(inputVector) ){
231  errorLog << __GRT_LOG__ << " The " << i << " classifier in the ensemble failed prediction!" << std::endl;
232  return false;
233  }
234 
235  classLikelihoods[ getClassLabelIndexValue( ensemble[i]->getPredictedClassLabel() ) ] += weights[i];
236  classDistances[ getClassLabelIndexValue( ensemble[i]->getPredictedClassLabel() ) ] += ensemble[i]->getMaximumLikelihood() * weights[i];
237 
238  sum += weights[i];
239  }
240 
241  //Set the predicted class label as the most common class
242  Float maxCount = 0;
243  UINT maxIndex = 0;
244  for(UINT i=0; i<numClasses; i++){
245  if( classLikelihoods[i] > maxCount ){
246  maxIndex = i;
247  maxCount = classLikelihoods[i];
248  }
249  classLikelihoods[i] /= sum;
250  classDistances[i] /= Float(ensembleSize);
251  }
252 
253  predictedClassLabel = classLabels[ maxIndex ];
254  maxLikelihood = classLikelihoods[ maxIndex ];
255 
256  return true;
257 }
258 
259 bool BAG::reset(){
260 
261  //Reset all the classifiers
262  for(UINT i=0; i<ensemble.getSize(); i++){
263  if( ensemble[i] != NULL ){
264  ensemble[i]->reset();
265  }
266  }
267 
268  return true;
269 }
270 
271 bool BAG::clear(){
272 
273  //Clear the Classifier variables
275 
276  //Clear all the classifiers, but do not remove the ensemble
277  for(UINT i=0; i<ensemble.getSize(); i++){
278  if( ensemble[i] != NULL ){
279  ensemble[i]->clear();
280  }
281  }
282 
283  return true;
284 }
285 
286 bool BAG::save( std::fstream &file ) const{
287 
288  if(!file.is_open())
289  {
290  errorLog << __GRT_LOG__ << " The file is not open!" << std::endl;
291  return false;
292  }
293 
294  const UINT ensembleSize = getEnsembleSize();
295 
296  //Write the header info
297  file << "GRT_BAG_MODEL_FILE_V2.0\n";
298 
299  //Write the classifier settings to the file
301  errorLog << __GRT_LOG__ << " Failed to save classifier base settings to file!" << std::endl;
302  return false;
303  }
304 
305  if( trained ){
306 
307  file << "EnsembleSize: " << ensembleSize << std::endl;
308 
309  if( getEnsembleSize() > 0 ){
310 
311  //Save the weights
312  file << "Weights: ";
313  for(UINT i=0; i<getEnsembleSize(); i++){
314  file << weights[i];
315  if( i < ensembleSize-1 ) file << "\t";
316  else file << "\n";
317  }
318 
319  //Save the classifier types
320  file << "ClassifierTypes: ";
321  for(UINT i=0; i<getEnsembleSize(); i++){
322  file << ensemble[i]->getId() << std::endl;
323  }
324 
325  //Save the ensemble
326  file << "Ensemble:" << std::endl;
327  for(UINT i=0; i<getEnsembleSize(); i++){
328  if( !ensemble[i]->save( file ) ){
329  errorLog << __GRT_LOG__ << " Failed to save classifier " << i << " to file!" << std::endl;
330  return false;
331  }
332  }
333  }
334 
335  }
336 
337  //NOTE: We do not need to close the file
338 
339  return true;
340 }
341 
342 bool BAG::load( std::fstream &file ){
343 
344  clear();
345  UINT ensembleSize = 0;
346 
347  if(!file.is_open())
348  {
349  errorLog << __GRT_LOG__ << " Could not open file to load model" << std::endl;
350  return false;
351  }
352 
353  std::string word;
354  file >> word;
355 
356  //Check to see if we should load a legacy file
357  if( word == "GRT_BAG_MODEL_FILE_V1.0" ){
358  return loadLegacyModelFromFile( file );
359  }
360 
361  //Find the file type header
362  if(word != "GRT_BAG_MODEL_FILE_V2.0"){
363  errorLog << __GRT_LOG__ << " Could not find Model File Header" << std::endl;
364  return false;
365  }
366 
367  //Load the base settings from the file
369  errorLog << __GRT_LOG__ << " Failed to load base settings from file!" << std::endl;
370  clear();
371  return false;
372  }
373 
374  if( trained ){
375 
376  //Load the ensemble size
377  file >> word;
378  if(word != "EnsembleSize:"){
379  errorLog << __GRT_LOG__ << " Could not find the EnsembleSize!" << std::endl;
380  clear();
381  return false;
382  }
383  file >> ensembleSize;
384 
385  //Load the weights
386  weights.resize( ensembleSize );
387 
388  file >> word;
389  if(word != "Weights:"){
390  errorLog << __GRT_LOG__ << " Could not find the Weights!" << std::endl;
391  clear();
392  return false;
393  }
394  for(UINT i=0; i<ensembleSize; i++){
395  file >> weights[i];
396  }
397 
398  //Load the classifier types
399  Vector< std::string > classifierTypes( ensembleSize );
400 
401  file >> word;
402  if(word != "ClassifierTypes:"){
403  errorLog << __GRT_LOG__ << " Could not find the ClassifierTypes!" << std::endl;
404  clear();
405  return false;
406  }
407  for(UINT i=0; i<ensembleSize; i++){
408  file >> classifierTypes[i];
409  }
410 
411  //Load the ensemble
412  file >> word;
413  if(word != "Ensemble:"){
414  errorLog << __GRT_LOG__ << " Could not find the Ensemble! Found: " << word << std::endl;
415  clear();
416  return false;
417  }
418  ensemble.resize(ensembleSize,NULL);
419  for(UINT i=0; i<ensembleSize; i++){
420  ensemble[i] = create( classifierTypes[i] );
421 
422  if( ensemble[i] == NULL ){
423  errorLog << __GRT_LOG__ << " Could not create a new classifier instance from the classifierType: " << classifierTypes[i] << std::endl;
424  clear();
425  clearEnsemble();
426  return false;
427  }
428 
429  if( !ensemble[i]->load( file ) ){
430  errorLog << __GRT_LOG__ << " Failed to load ensemble classifier: " << i << std::endl;
431  clear();
432  clearEnsemble();
433  return false;
434  }
435  }
436 
437  //Recompute the null rejection thresholds
439 
440  //Resize the prediction results to make sure it is setup for realtime prediction
441  maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE;
442  bestDistance = DEFAULT_NULL_DISTANCE_VALUE;
443  classLikelihoods.resize(numClasses,DEFAULT_NULL_LIKELIHOOD_VALUE);
444  classDistances.resize(numClasses,DEFAULT_NULL_DISTANCE_VALUE);
445  }
446 
447  return true;
448 }
449 
450 UINT BAG::getEnsembleSize() const{
451  return ensemble.getSize();
452 }
453 
455  return weights;
456 }
457 
459  return ensemble;
460 }
461 
462 bool BAG::addClassifierToEnsemble(const Classifier &classifier,Float weight){
463 
464  trained = false;
465 
466  Classifier *newClassifier = classifier.create( classifier.getId() );
467 
468  if( newClassifier == NULL ){
469  return false;
470  }
471 
472  if( !newClassifier->deepCopyFrom( &classifier ) ){
473  return false;
474  }
475 
476  weights.push_back( weight );
477  ensemble.push_back( newClassifier );
478 
479  return true;
480 }
481 
483 
484  trained = false;
485  for(UINT i=0; i<ensemble.getSize(); i++){
486  if( ensemble[i] != NULL ){
487  delete ensemble[i];
488  ensemble[i] = NULL;
489  }
490  }
491  weights.clear();
492  ensemble.clear();
493 
494  return true;
495 }
496 
497 bool BAG::setWeights(const VectorFloat &weights){
498 
499  if( this->weights.getSize() != weights.getSize() ){
500  return false;
501  }
502  this->weights = weights;
503  return true;
504 }
505 
506 bool BAG::loadLegacyModelFromFile( std::fstream &file ){
507 
508  std::string word;
509 
510  return true;
511 }
512 
513 GRT_END_NAMESPACE
Definition: BAG.h:44
bool saveBaseSettingsToFile(std::fstream &file) const
Definition: Classifier.cpp:274
std::string getId() const
Definition: GRTBase.cpp:85
virtual bool predict(VectorFloat inputVector)
Definition: MLBase.cpp:135
const Vector< Classifier * > getEnsemble() const
Definition: BAG.cpp:458
virtual bool recomputeNullRejectionThresholds()
Definition: Classifier.h:255
#define DEFAULT_NULL_LIKELIHOOD_VALUE
Definition: Classifier.h:33
VectorFloat getEnsembleWeights() const
Definition: BAG.cpp:454
virtual bool clear()
Definition: BAG.cpp:271
UINT getEnsembleSize() const
Definition: BAG.cpp:450
static std::string getId()
Definition: BAG.cpp:28
bool getTrainingLoggingEnabled() const
Definition: MLBase.cpp:312
std::string getClassifierType() const
Definition: Classifier.cpp:175
virtual bool resize(const unsigned int size)
Definition: Vector.h:133
virtual bool reset()
Definition: BAG.cpp:259
BAG(bool useScaling=false)
Definition: BAG.cpp:33
virtual bool save(std::fstream &file) const
Definition: BAG.cpp:286
UINT getSize() const
Definition: Vector.h:201
Vector< UINT > getClassLabels() const
virtual ~BAG(void)
Definition: BAG.cpp:45
virtual bool computeAccuracy(const ClassificationData &data, Float &accuracy)
Definition: Classifier.cpp:171
virtual bool deepCopyFrom(const Classifier *classifier)
Definition: Classifier.h:64
UINT getNumSamples() const
virtual bool deepCopyFrom(const Classifier *classifier)
Definition: BAG.cpp:68
UINT getPredictedClassLabel() const
Definition: Classifier.cpp:221
virtual bool load(std::fstream &file)
Definition: BAG.cpp:342
bool copyBaseVariables(const Classifier *classifier)
Definition: Classifier.cpp:101
bool loadBaseSettingsFromFile(std::fstream &file)
Definition: Classifier.cpp:321
bool addClassifierToEnsemble(const Classifier &classifier, Float weight=1)
Definition: BAG.cpp:462
UINT getNumDimensions() const
UINT getNumClasses() const
UINT getClassLabelIndexValue(const UINT classLabel) const
Definition: Classifier.cpp:213
virtual bool train_(ClassificationData &trainingData)
Definition: BAG.cpp:91
Vector< MinMax > getRanges() const
ClassificationData split(const UINT splitPercentage, const bool useStratifiedSampling=false)
ClassificationData getBootstrappedDataset(const UINT numSamples=0, const bool balanceDataset=false) const
Definition: Vector.h:41
BAG & operator=(const BAG &rhs)
Definition: BAG.cpp:50
bool clearEnsemble()
Definition: BAG.cpp:482
bool scale(const Float minTarget, const Float maxTarget)
virtual bool clear()
Definition: Classifier.cpp:151
Classifier * create() const
Definition: Classifier.cpp:45
This is the main base class that all GRT Classification algorithms should inherit from...
Definition: Classifier.h:41
virtual bool predict_(VectorFloat &inputVector)
Definition: BAG.cpp:193
bool setWeights(const VectorFloat &weights)
Definition: BAG.cpp:497
static Classifier * create(const std::string &id)
Definition: Classifier.cpp:32
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
Definition: GRTBase.h:184