GestureRecognitionToolkit  Version: 0.1.0
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
BAG.cpp
1 /*
2 GRT MIT License
3 Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>
4 
5 Permission is hereby granted, free of charge, to any person obtaining a copy of this software
6 and associated documentation files (the "Software"), to deal in the Software without restriction,
7 including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in all copies or substantial
12 portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
15 LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
18 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 */
20 
21 #include "BAG.h"
22 
23 GRT_BEGIN_NAMESPACE
24 
25 //Register the BAG module with the Classifier base class
26 RegisterClassifierModule< BAG > BAG::registerModule("BAG");
27 
28 BAG::BAG(bool useScaling)
29 {
30  this->useScaling = useScaling;
31  useNullRejection = false;
32  classType = "BAG";
33  classifierType = classType;
34  classifierMode = STANDARD_CLASSIFIER_MODE;
35  debugLog.setProceedingText("[DEBUG BAG]");
36  errorLog.setProceedingText("[ERROR BAG]");
37  trainingLog.setProceedingText("[TRAINING BAG]");
38  warningLog.setProceedingText("[WARNING BAG]");
39 }
40 
41 BAG::BAG(const BAG &rhs){
42  classType = "BAG";
43  classifierType = classType;
44  classifierMode = STANDARD_CLASSIFIER_MODE;
45  debugLog.setProceedingText("[DEBUG BAG]");
46  errorLog.setProceedingText("[ERROR BAG]");
47  trainingLog.setProceedingText("[TRAINING BAG]");
48  warningLog.setProceedingText("[WARNING BAG]");
49  *this = rhs;
50 }
51 
52 BAG::~BAG(void)
53 {
54  clearEnsemble();
55 }
56 
57 BAG& BAG::operator=(const BAG &rhs){
58  if( this != &rhs ){
59  //Clear any previous ensemble
60  clearEnsemble();
61 
62  //Copy the weights
63  this->weights = rhs.weights;
64 
65  //Deep copy each classifier in the ensemble
66  for(UINT i=0; i<rhs.getEnsembleSize(); i++){
67  addClassifierToEnsemble( *(rhs.ensemble[i]) );
68  }
69  //Copy the base classifier variables
70  copyBaseVariables( (Classifier*)&rhs );
71  }
72  return *this;
73 }
74 
75 bool BAG::deepCopyFrom(const Classifier *classifier){
76 
77  if( classifier == NULL ) return false;
78 
79  if( this->getClassifierType() == classifier->getClassifierType() ){
80  BAG *ptr = (BAG*)classifier;
81 
82  //Clear any previous ensemble
83  clearEnsemble();
84 
85  //Copy the weights
86  this->weights = ptr->weights;
87 
88  //Deep copy each classifier in the ensemble
89  for(UINT i=0; i<ptr->getEnsembleSize(); i++){
90  addClassifierToEnsemble( *(ptr->ensemble[i]) );
91  }
92  //Copy the base classifier variables
93  return copyBaseVariables( classifier );
94  }
95  return false;
96 }
97 
98 bool BAG::train_(ClassificationData &trainingData){
99 
100  //Clear any previous models
101  clear();
102 
103  const unsigned int M = trainingData.getNumSamples();
104  const unsigned int N = trainingData.getNumDimensions();
105  const unsigned int K = trainingData.getNumClasses();
106 
107  if( M == 0 ){
108  errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << std::endl;
109  return false;
110  }
111 
112  numInputDimensions = N;
113  numClasses = K;
114  classLabels.resize(K);
115  ranges = trainingData.getRanges();
116 
117  //Scale the training data if needed
118  if( useScaling ){
119  //Scale the training data between 0 and 1
120  trainingData.scale(0, 1);
121  }
122 
123  UINT ensembleSize = ensemble.getSize();
124 
125  if( ensembleSize == 0 ){
126  errorLog << "train_(ClassificationData &trainingData) - The ensemble size is zero! You need to add some classifiers to the ensemble first." << std::endl;
127  return false;
128  }
129 
130  for(UINT i=0; i<ensembleSize; i++){
131  if( ensemble[i] == NULL ){
132  errorLog << "train_(ClassificationData &trainingData) - The classifier at ensemble index " << i << " has not been set!" << std::endl;
133  return false;
134  }
135  }
136 
137  //Train the ensemble
138  for(UINT i=0; i<ensembleSize; i++){
139  ClassificationData boostedDataset = trainingData.getBootstrappedDataset();
140 
141  trainingLog << "Training ensemble " << i+1 << ". Ensemble type: " << ensemble[i]->getClassType() << std::endl;
142 
143  //Train the classifier with the bootstrapped dataset
144  if( !ensemble[i]->train( boostedDataset ) ){
145  errorLog << "train_(ClassificationData &trainingData) - The classifier at ensemble index " << i << " failed training!" << std::endl;
146  return false;
147  }
148  }
149 
150  //Set the class labels
151  classLabels = trainingData.getClassLabels();
152 
153  //Flag that the model has been trained
154  trained = true;
155 
156  return trained;
157 }
158 
159 bool BAG::predict_(VectorFloat &inputVector){
160 
161  if( !trained ){
162  errorLog << "predict_(VectorFloat &inputVector) - Model Not Trained!" << std::endl;
163  return false;
164  }
165 
166  predictedClassLabel = 0;
167  maxLikelihood = -10000;
168 
169  if( !trained ) return false;
170 
171  if( inputVector.getSize() != numInputDimensions ){
172  errorLog << "predict_(VectorFloat &inputVector) - The size of the input Vector (" << inputVector.getSize() << ") does not match the num features in the model (" << numInputDimensions << std::endl;
173  return false;
174  }
175 
176  if( useScaling ){
177  for(UINT n=0; n<numInputDimensions; n++){
178  inputVector[n] = scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0, 1);
179  }
180  }
181 
182  if( classLikelihoods.getSize() != numClasses ) classLikelihoods.resize(numClasses);
183  if( classDistances.getSize() != numClasses ) classDistances.resize(numClasses);
184 
185  //Reset the likelihoods and distances
186  for(UINT k=0; k<numClasses; k++){
187  classLikelihoods[k] = 0;
188  classDistances[k] = 0;
189  }
190 
191  //Run the prediction for each classifier
192  Float sum = 0;
193  UINT ensembleSize = ensemble.getSize();
194  for(UINT i=0; i<ensembleSize; i++){
195 
196  if( !ensemble[i]->predict(inputVector) ){
197  errorLog << "predict_(VectorFloat &inputVector) - The " << i << " classifier in the ensemble failed prediction!" << std::endl;
198  return false;
199  }
200 
201  classLikelihoods[ getClassLabelIndexValue( ensemble[i]->getPredictedClassLabel() ) ] += weights[i];
202  classDistances[ getClassLabelIndexValue( ensemble[i]->getPredictedClassLabel() ) ] += ensemble[i]->getMaximumLikelihood() * weights[i];
203 
204  sum += weights[i];
205  }
206 
207  //Set the predicted class label as the most common class
208  Float maxCount = 0;
209  UINT maxIndex = 0;
210  for(UINT i=0; i<numClasses; i++){
211  if( classLikelihoods[i] > maxCount ){
212  maxIndex = i;
213  maxCount = classLikelihoods[i];
214  }
215  classLikelihoods[i] /= sum;
216  classDistances[i] /= Float(ensembleSize);
217  }
218 
219  predictedClassLabel = classLabels[ maxIndex ];
220  maxLikelihood = classLikelihoods[ maxIndex ];
221 
222  return true;
223 }
224 
225 bool BAG::reset(){
226 
227  //Reset all the classifiers
228  for(UINT i=0; i<ensemble.getSize(); i++){
229  if( ensemble[i] != NULL ){
230  ensemble[i]->reset();
231  }
232  }
233 
234  return true;
235 }
236 
237 bool BAG::clear(){
238 
239  //Clear the Classifier variables
241 
242  //Clear all the classifiers, but do not remove the ensemble
243  for(UINT i=0; i<ensemble.getSize(); i++){
244  if( ensemble[i] != NULL ){
245  ensemble[i]->clear();
246  }
247  }
248 
249  return true;
250 }
251 
252 bool BAG::saveModelToFile( std::fstream &file ) const{
253 
254  if(!file.is_open())
255  {
256  errorLog <<"saveModelToFile(fstream &file) - The file is not open!" << std::endl;
257  return false;
258  }
259 
260  const UINT ensembleSize = getEnsembleSize();
261 
262  //Write the header info
263  file << "GRT_BAG_MODEL_FILE_V2.0\n";
264 
265  //Write the classifier settings to the file
267  errorLog <<"saveModelToFile(fstream &file) - Failed to save classifier base settings to file!" << std::endl;
268  return false;
269  }
270 
271  if( trained ){
272 
273  file << "EnsembleSize: " << ensembleSize << std::endl;
274 
275  if( getEnsembleSize() > 0 ){
276 
277  //Save the weights
278  file << "Weights: ";
279  for(UINT i=0; i<getEnsembleSize(); i++){
280  file << weights[i];
281  if( i < ensembleSize-1 ) file << "\t";
282  else file << "\n";
283  }
284 
285  //Save the classifier types
286  file << "ClassifierTypes: ";
287  for(UINT i=0; i<getEnsembleSize(); i++){
288  file << ensemble[i]->getClassifierType() << std::endl;
289  }
290 
291  //Save the ensemble
292  file << "Ensemble: \n";
293  for(UINT i=0; i<getEnsembleSize(); i++){
294  if( !ensemble[i]->saveModelToFile( file ) ){
295  errorLog <<"saveModelToFile(fstream &file) - Failed to save classifier " << i << " to file!" << std::endl;
296  return false;
297  }
298  }
299  }
300 
301  }
302 
303  //NOTE: We do not need to close the file
304 
305  return true;
306 }
307 
308 bool BAG::loadModelFromFile( std::fstream &file ){
309 
310  clear();
311  UINT ensembleSize = 0;
312 
313  if(!file.is_open())
314  {
315  errorLog << "loadModelFromFile(string filename) - Could not open file to load model" << std::endl;
316  return false;
317  }
318 
319  std::string word;
320  file >> word;
321 
322  //Check to see if we should load a legacy file
323  if( word == "GRT_BAG_MODEL_FILE_V1.0" ){
324  return loadLegacyModelFromFile( file );
325  }
326 
327  //Find the file type header
328  if(word != "GRT_BAG_MODEL_FILE_V2.0"){
329  errorLog << "loadModelFromFile(string filename) - Could not find Model File Header" << std::endl;
330  return false;
331  }
332 
333  //Load the base settings from the file
335  errorLog << "loadModelFromFile(string filename) - Failed to load base settings from file!" << std::endl;
336  return false;
337  }
338 
339  if( trained ){
340 
341  //Load the ensemble size
342  file >> word;
343  if(word != "EnsembleSize:"){
344  errorLog << "loadModelFromFile(string filename) - Could not find the EnsembleSize!" << std::endl;
345  return false;
346  }
347  file >> ensembleSize;
348 
349  //Load the weights
350  weights.resize( ensembleSize );
351 
352  file >> word;
353  if(word != "Weights:"){
354  errorLog << "loadModelFromFile(string filename) - Could not find the Weights!" << std::endl;
355  return false;
356  }
357  for(UINT i=0; i<ensembleSize; i++){
358  file >> weights[i];
359  }
360 
361  //Load the classifier types
362  Vector< std::string > classifierTypes( ensembleSize );
363 
364  file >> word;
365  if(word != "ClassifierTypes:"){
366  errorLog << "loadModelFromFile(string filename) - Could not find the ClassifierTypes!" << std::endl;
367  return false;
368  }
369  for(UINT i=0; i<ensembleSize; i++){
370  file >> classifierTypes[i];
371  }
372 
373  //Load the ensemble
374  file >> word;
375  if(word != "Ensemble:"){
376  errorLog << "loadModelFromFile(string filename) - Could not find the Ensemble!" << std::endl;
377  return false;
378  }
379  ensemble.resize(ensembleSize,NULL);
380  for(UINT i=0; i<ensembleSize; i++){
381  ensemble[i] = createInstanceFromString( classifierTypes[i] );
382 
383  if( ensemble[i] == NULL ){
384  errorLog << "loadModelFromFile(string filename) - Could not create a new classifier instance from the classifierType: " << classifierTypes[i] << std::endl;
385  clearEnsemble();
386  return false;
387  }
388 
389  if( !ensemble[i]->loadModelFromFile( file ) ){
390  errorLog << "loadModelFromFile(string filename) - Failed to load ensemble classifier: " << i << std::endl;
391  clearEnsemble();
392  return false;
393  }
394  }
395 
396  //Recompute the null rejection thresholds
398 
399  //Resize the prediction results to make sure it is setup for realtime prediction
400  maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE;
401  bestDistance = DEFAULT_NULL_DISTANCE_VALUE;
402  classLikelihoods.resize(numClasses,DEFAULT_NULL_LIKELIHOOD_VALUE);
403  classDistances.resize(numClasses,DEFAULT_NULL_DISTANCE_VALUE);
404  }
405 
406  return true;
407 }
408 
409 UINT BAG::getEnsembleSize() const{
410  return ensemble.getSize();
411 }
412 
414  return weights;
415 }
416 
418  return ensemble;
419 }
420 
421 bool BAG::addClassifierToEnsemble(const Classifier &classifier,Float weight){
422 
423  trained = false;
424 
425  Classifier *newClassifier = classifier.createNewInstance();
426 
427  if( newClassifier == NULL ){
428  return false;
429  }
430 
431  if( !newClassifier->deepCopyFrom( &classifier ) ){
432  return false;
433  }
434 
435  weights.push_back( weight );
436  ensemble.push_back( newClassifier );
437 
438  return false;
439 }
440 
442 
443  trained = false;
444  for(UINT i=0; i<ensemble.size(); i++){
445  if( ensemble[i] != NULL ){
446  delete ensemble[i];
447  ensemble[i] = NULL;
448  }
449  }
450  weights.clear();
451  ensemble.clear();
452 
453  return true;
454 }
455 
456 bool BAG::setWeights(const VectorFloat &weights){
457 
458  if( this->weights.size() != weights.size() ){
459  return false;
460  }
461  this->weights = weights;
462  return true;
463 }
464 
465 bool BAG::loadLegacyModelFromFile( std::fstream &file ){
466 
467  std::string word;
468 
469  return true;
470 }
471 
472 GRT_END_NAMESPACE
473 
Definition: BAG.h:44
bool saveBaseSettingsToFile(std::fstream &file) const
Definition: Classifier.cpp:255
virtual bool predict(VectorFloat inputVector)
Definition: MLBase.cpp:112
const Vector< Classifier * > getEnsemble() const
Definition: BAG.cpp:417
virtual bool recomputeNullRejectionThresholds()
Definition: Classifier.h:235
#define DEFAULT_NULL_LIKELIHOOD_VALUE
Definition: Classifier.h:38
VectorFloat getEnsembleWeights() const
Definition: BAG.cpp:413
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
Definition: MLBase.h:339
virtual bool clear()
Definition: BAG.cpp:237
UINT getEnsembleSize() const
Definition: BAG.cpp:409
std::string getClassifierType() const
Definition: Classifier.cpp:160
virtual bool resize(const unsigned int size)
Definition: Vector.h:133
virtual bool reset()
Definition: BAG.cpp:225
virtual bool train(ClassificationData trainingData)
Definition: MLBase.cpp:88
BAG(bool useScaling=false)
Definition: BAG.cpp:28
UINT getClassLabelIndexValue(UINT classLabel) const
Definition: Classifier.cpp:194
Vector< UINT > getClassLabels() const
virtual ~BAG(void)
Definition: BAG.cpp:52
unsigned int getSize() const
Definition: Vector.h:193
virtual bool deepCopyFrom(const Classifier *classifier)
Definition: Classifier.h:61
UINT getNumSamples() const
virtual bool deepCopyFrom(const Classifier *classifier)
Definition: BAG.cpp:75
virtual bool saveModelToFile(std::fstream &file) const
Definition: BAG.cpp:252
UINT getPredictedClassLabel() const
Definition: Classifier.cpp:202
static Classifier * createInstanceFromString(std::string const &classifierType)
Definition: Classifier.cpp:28
bool copyBaseVariables(const Classifier *classifier)
Definition: Classifier.cpp:92
bool loadBaseSettingsFromFile(std::fstream &file)
Definition: Classifier.cpp:302
bool addClassifierToEnsemble(const Classifier &classifier, Float weight=1)
Definition: BAG.cpp:421
This class implements the bootstrap aggregator classifier. Bootstrap aggregating (bagging) is a machi...
UINT getNumDimensions() const
UINT getNumClasses() const
virtual bool train_(ClassificationData &trainingData)
Definition: BAG.cpp:98
Vector< MinMax > getRanges() const
Definition: Vector.h:41
BAG & operator=(const BAG &rhs)
Definition: BAG.cpp:57
ClassificationData getBootstrappedDataset(UINT numSamples=0, bool balanceDataset=false) const
bool clearEnsemble()
Definition: BAG.cpp:441
bool scale(const Float minTarget, const Float maxTarget)
virtual bool clear()
Definition: Classifier.cpp:141
virtual bool predict_(VectorFloat &inputVector)
Definition: BAG.cpp:159
Classifier * createNewInstance() const
Definition: Classifier.cpp:36
virtual bool loadModelFromFile(std::fstream &file)
Definition: BAG.cpp:308
bool setWeights(const VectorFloat &weights)
Definition: BAG.cpp:456