GestureRecognitionToolkit  Version: 0.2.0
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
BAG.cpp
1 /*
2 GRT MIT License
3 Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>
4 
5 Permission is hereby granted, free of charge, to any person obtaining a copy of this software
6 and associated documentation files (the "Software"), to deal in the Software without restriction,
7 including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in all copies or substantial
12 portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
15 LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
18 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 */
20 
21 #define GRT_DLL_EXPORTS
22 #include "BAG.h"
23 
24 GRT_BEGIN_NAMESPACE
25 
26 //Register the BAG module with the Classifier base class
27 RegisterClassifierModule< BAG > BAG::registerModule("BAG");
28 
29 BAG::BAG(bool useScaling)
30 {
31  this->useScaling = useScaling;
32  useNullRejection = false;
33  classType = "BAG";
34  classifierType = classType;
35  classifierMode = STANDARD_CLASSIFIER_MODE;
36  debugLog.setProceedingText("[DEBUG BAG]");
37  errorLog.setProceedingText("[ERROR BAG]");
38  trainingLog.setProceedingText("[TRAINING BAG]");
39  warningLog.setProceedingText("[WARNING BAG]");
40 }
41 
42 BAG::BAG(const BAG &rhs){
43  classType = "BAG";
44  classifierType = classType;
45  classifierMode = STANDARD_CLASSIFIER_MODE;
46  debugLog.setProceedingText("[DEBUG BAG]");
47  errorLog.setProceedingText("[ERROR BAG]");
48  trainingLog.setProceedingText("[TRAINING BAG]");
49  warningLog.setProceedingText("[WARNING BAG]");
50  *this = rhs;
51 }
52 
53 BAG::~BAG(void)
54 {
55  clearEnsemble();
56 }
57 
58 BAG& BAG::operator=(const BAG &rhs){
59  if( this != &rhs ){
60  //Clear any previous ensemble
61  clearEnsemble();
62 
63  //Copy the weights
64  this->weights = rhs.weights;
65 
66  //Deep copy each classifier in the ensemble
67  for(UINT i=0; i<rhs.getEnsembleSize(); i++){
68  addClassifierToEnsemble( *(rhs.ensemble[i]) );
69  }
70  //Copy the base classifier variables
71  copyBaseVariables( (Classifier*)&rhs );
72  }
73  return *this;
74 }
75 
76 bool BAG::deepCopyFrom(const Classifier *classifier){
77 
78  if( classifier == NULL ) return false;
79 
80  if( this->getClassifierType() == classifier->getClassifierType() ){
81  BAG *ptr = (BAG*)classifier;
82 
83  //Clear any previous ensemble
84  clearEnsemble();
85 
86  //Copy the weights
87  this->weights = ptr->weights;
88 
89  //Deep copy each classifier in the ensemble
90  for(UINT i=0; i<ptr->getEnsembleSize(); i++){
91  addClassifierToEnsemble( *(ptr->ensemble[i]) );
92  }
93  //Copy the base classifier variables
94  return copyBaseVariables( classifier );
95  }
96  return false;
97 }
98 
99 bool BAG::train_(ClassificationData &trainingData){
100 
101  //Clear any previous models
102  clear();
103 
104  const unsigned int M = trainingData.getNumSamples();
105  const unsigned int N = trainingData.getNumDimensions();
106  const unsigned int K = trainingData.getNumClasses();
107 
108  if( M == 0 ){
109  errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << std::endl;
110  return false;
111  }
112 
113  numInputDimensions = N;
114  numClasses = K;
115  classLabels.resize(K);
116  ranges = trainingData.getRanges();
117 
118  //Scale the training data if needed
119  if( useScaling ){
120  //Scale the training data between 0 and 1
121  trainingData.scale(0, 1);
122  }
123 
124  UINT ensembleSize = ensemble.getSize();
125 
126  if( ensembleSize == 0 ){
127  errorLog << "train_(ClassificationData &trainingData) - The ensemble size is zero! You need to add some classifiers to the ensemble first." << std::endl;
128  return false;
129  }
130 
131  for(UINT i=0; i<ensembleSize; i++){
132  if( ensemble[i] == NULL ){
133  errorLog << "train_(ClassificationData &trainingData) - The classifier at ensemble index " << i << " has not been set!" << std::endl;
134  return false;
135  }
136  }
137 
138  //Train the ensemble
139  for(UINT i=0; i<ensembleSize; i++){
140  ClassificationData boostedDataset = trainingData.getBootstrappedDataset();
141 
142  trainingLog << "Training ensemble " << i+1 << ". Ensemble type: " << ensemble[i]->getClassType() << std::endl;
143 
144  //Train the classifier with the bootstrapped dataset
145  if( !ensemble[i]->train( boostedDataset ) ){
146  errorLog << "train_(ClassificationData &trainingData) - The classifier at ensemble index " << i << " failed training!" << std::endl;
147  return false;
148  }
149  }
150 
151  //Set the class labels
152  classLabels = trainingData.getClassLabels();
153 
154  //Flag that the model has been trained
155  trained = true;
156 
157  return trained;
158 }
159 
160 bool BAG::predict_(VectorFloat &inputVector){
161 
162  if( !trained ){
163  errorLog << "predict_(VectorFloat &inputVector) - Model Not Trained!" << std::endl;
164  return false;
165  }
166 
167  predictedClassLabel = 0;
168  maxLikelihood = -10000;
169 
170  if( !trained ) return false;
171 
172  if( inputVector.getSize() != numInputDimensions ){
173  errorLog << "predict_(VectorFloat &inputVector) - The size of the input Vector (" << inputVector.getSize() << ") does not match the num features in the model (" << numInputDimensions << std::endl;
174  return false;
175  }
176 
177  if( useScaling ){
178  for(UINT n=0; n<numInputDimensions; n++){
179  inputVector[n] = scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0, 1);
180  }
181  }
182 
183  if( classLikelihoods.getSize() != numClasses ) classLikelihoods.resize(numClasses);
184  if( classDistances.getSize() != numClasses ) classDistances.resize(numClasses);
185 
186  //Reset the likelihoods and distances
187  for(UINT k=0; k<numClasses; k++){
188  classLikelihoods[k] = 0;
189  classDistances[k] = 0;
190  }
191 
192  //Run the prediction for each classifier
193  Float sum = 0;
194  UINT ensembleSize = ensemble.getSize();
195  for(UINT i=0; i<ensembleSize; i++){
196 
197  if( !ensemble[i]->predict(inputVector) ){
198  errorLog << "predict_(VectorFloat &inputVector) - The " << i << " classifier in the ensemble failed prediction!" << std::endl;
199  return false;
200  }
201 
202  classLikelihoods[ getClassLabelIndexValue( ensemble[i]->getPredictedClassLabel() ) ] += weights[i];
203  classDistances[ getClassLabelIndexValue( ensemble[i]->getPredictedClassLabel() ) ] += ensemble[i]->getMaximumLikelihood() * weights[i];
204 
205  sum += weights[i];
206  }
207 
208  //Set the predicted class label as the most common class
209  Float maxCount = 0;
210  UINT maxIndex = 0;
211  for(UINT i=0; i<numClasses; i++){
212  if( classLikelihoods[i] > maxCount ){
213  maxIndex = i;
214  maxCount = classLikelihoods[i];
215  }
216  classLikelihoods[i] /= sum;
217  classDistances[i] /= Float(ensembleSize);
218  }
219 
220  predictedClassLabel = classLabels[ maxIndex ];
221  maxLikelihood = classLikelihoods[ maxIndex ];
222 
223  return true;
224 }
225 
226 bool BAG::reset(){
227 
228  //Reset all the classifiers
229  for(UINT i=0; i<ensemble.getSize(); i++){
230  if( ensemble[i] != NULL ){
231  ensemble[i]->reset();
232  }
233  }
234 
235  return true;
236 }
237 
238 bool BAG::clear(){
239 
240  //Clear the Classifier variables
242 
243  //Clear all the classifiers, but do not remove the ensemble
244  for(UINT i=0; i<ensemble.getSize(); i++){
245  if( ensemble[i] != NULL ){
246  ensemble[i]->clear();
247  }
248  }
249 
250  return true;
251 }
252 
253 bool BAG::save( std::fstream &file ) const{
254 
255  if(!file.is_open())
256  {
257  errorLog <<"save(fstream &file) - The file is not open!" << std::endl;
258  return false;
259  }
260 
261  const UINT ensembleSize = getEnsembleSize();
262 
263  //Write the header info
264  file << "GRT_BAG_MODEL_FILE_V2.0\n";
265 
266  //Write the classifier settings to the file
268  errorLog <<"save(fstream &file) - Failed to save classifier base settings to file!" << std::endl;
269  return false;
270  }
271 
272  if( trained ){
273 
274  file << "EnsembleSize: " << ensembleSize << std::endl;
275 
276  if( getEnsembleSize() > 0 ){
277 
278  //Save the weights
279  file << "Weights: ";
280  for(UINT i=0; i<getEnsembleSize(); i++){
281  file << weights[i];
282  if( i < ensembleSize-1 ) file << "\t";
283  else file << "\n";
284  }
285 
286  //Save the classifier types
287  file << "ClassifierTypes: ";
288  for(UINT i=0; i<getEnsembleSize(); i++){
289  file << ensemble[i]->getClassifierType() << std::endl;
290  }
291 
292  //Save the ensemble
293  file << "Ensemble: \n";
294  for(UINT i=0; i<getEnsembleSize(); i++){
295  if( !ensemble[i]->save( file ) ){
296  errorLog <<"save(fstream &file) - Failed to save classifier " << i << " to file!" << std::endl;
297  return false;
298  }
299  }
300  }
301 
302  }
303 
304  //NOTE: We do not need to close the file
305 
306  return true;
307 }
308 
309 bool BAG::load( std::fstream &file ){
310 
311  clear();
312  UINT ensembleSize = 0;
313 
314  if(!file.is_open())
315  {
316  errorLog << "load(string filename) - Could not open file to load model" << std::endl;
317  return false;
318  }
319 
320  std::string word;
321  file >> word;
322 
323  //Check to see if we should load a legacy file
324  if( word == "GRT_BAG_MODEL_FILE_V1.0" ){
325  return loadLegacyModelFromFile( file );
326  }
327 
328  //Find the file type header
329  if(word != "GRT_BAG_MODEL_FILE_V2.0"){
330  errorLog << "load(string filename) - Could not find Model File Header" << std::endl;
331  return false;
332  }
333 
334  //Load the base settings from the file
336  errorLog << "load(string filename) - Failed to load base settings from file!" << std::endl;
337  return false;
338  }
339 
340  if( trained ){
341 
342  //Load the ensemble size
343  file >> word;
344  if(word != "EnsembleSize:"){
345  errorLog << "load(string filename) - Could not find the EnsembleSize!" << std::endl;
346  return false;
347  }
348  file >> ensembleSize;
349 
350  //Load the weights
351  weights.resize( ensembleSize );
352 
353  file >> word;
354  if(word != "Weights:"){
355  errorLog << "load(string filename) - Could not find the Weights!" << std::endl;
356  return false;
357  }
358  for(UINT i=0; i<ensembleSize; i++){
359  file >> weights[i];
360  }
361 
362  //Load the classifier types
363  Vector< std::string > classifierTypes( ensembleSize );
364 
365  file >> word;
366  if(word != "ClassifierTypes:"){
367  errorLog << "load(string filename) - Could not find the ClassifierTypes!" << std::endl;
368  return false;
369  }
370  for(UINT i=0; i<ensembleSize; i++){
371  file >> classifierTypes[i];
372  }
373 
374  //Load the ensemble
375  file >> word;
376  if(word != "Ensemble:"){
377  errorLog << "load(string filename) - Could not find the Ensemble!" << std::endl;
378  return false;
379  }
380  ensemble.resize(ensembleSize,NULL);
381  for(UINT i=0; i<ensembleSize; i++){
382  ensemble[i] = createInstanceFromString( classifierTypes[i] );
383 
384  if( ensemble[i] == NULL ){
385  errorLog << "load(string filename) - Could not create a new classifier instance from the classifierType: " << classifierTypes[i] << std::endl;
386  clearEnsemble();
387  return false;
388  }
389 
390  if( !ensemble[i]->load( file ) ){
391  errorLog << "load(string filename) - Failed to load ensemble classifier: " << i << std::endl;
392  clearEnsemble();
393  return false;
394  }
395  }
396 
397  //Recompute the null rejection thresholds
399 
400  //Resize the prediction results to make sure it is setup for realtime prediction
401  maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE;
402  bestDistance = DEFAULT_NULL_DISTANCE_VALUE;
403  classLikelihoods.resize(numClasses,DEFAULT_NULL_LIKELIHOOD_VALUE);
404  classDistances.resize(numClasses,DEFAULT_NULL_DISTANCE_VALUE);
405  }
406 
407  return true;
408 }
409 
410 UINT BAG::getEnsembleSize() const{
411  return ensemble.getSize();
412 }
413 
415  return weights;
416 }
417 
419  return ensemble;
420 }
421 
422 bool BAG::addClassifierToEnsemble(const Classifier &classifier,Float weight){
423 
424  trained = false;
425 
426  Classifier *newClassifier = classifier.createNewInstance();
427 
428  if( newClassifier == NULL ){
429  return false;
430  }
431 
432  if( !newClassifier->deepCopyFrom( &classifier ) ){
433  return false;
434  }
435 
436  weights.push_back( weight );
437  ensemble.push_back( newClassifier );
438 
439  return false;
440 }
441 
443 
444  trained = false;
445  for(UINT i=0; i<ensemble.size(); i++){
446  if( ensemble[i] != NULL ){
447  delete ensemble[i];
448  ensemble[i] = NULL;
449  }
450  }
451  weights.clear();
452  ensemble.clear();
453 
454  return true;
455 }
456 
457 bool BAG::setWeights(const VectorFloat &weights){
458 
459  if( this->weights.size() != weights.size() ){
460  return false;
461  }
462  this->weights = weights;
463  return true;
464 }
465 
466 bool BAG::loadLegacyModelFromFile( std::fstream &file ){
467 
468  std::string word;
469 
470  return true;
471 }
472 
473 GRT_END_NAMESPACE
Definition: BAG.h:44
bool saveBaseSettingsToFile(std::fstream &file) const
Definition: Classifier.cpp:256
virtual bool predict(VectorFloat inputVector)
Definition: MLBase.cpp:113
const Vector< Classifier * > getEnsemble() const
Definition: BAG.cpp:418
virtual bool recomputeNullRejectionThresholds()
Definition: Classifier.h:237
#define DEFAULT_NULL_LIKELIHOOD_VALUE
Definition: Classifier.h:38
VectorFloat getEnsembleWeights() const
Definition: BAG.cpp:414
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
Definition: MLBase.h:353
virtual bool clear()
Definition: BAG.cpp:238
UINT getEnsembleSize() const
Definition: BAG.cpp:410
std::string getClassifierType() const
Definition: Classifier.cpp:161
virtual bool resize(const unsigned int size)
Definition: Vector.h:133
virtual bool reset()
Definition: BAG.cpp:226
virtual bool train(ClassificationData trainingData)
Definition: MLBase.cpp:89
BAG(bool useScaling=false)
Definition: BAG.cpp:29
virtual bool save(std::fstream &file) const
Definition: BAG.cpp:253
UINT getSize() const
Definition: Vector.h:191
UINT getClassLabelIndexValue(UINT classLabel) const
Definition: Classifier.cpp:195
Vector< UINT > getClassLabels() const
virtual ~BAG(void)
Definition: BAG.cpp:53
virtual bool deepCopyFrom(const Classifier *classifier)
Definition: Classifier.h:63
UINT getNumSamples() const
virtual bool deepCopyFrom(const Classifier *classifier)
Definition: BAG.cpp:76
UINT getPredictedClassLabel() const
Definition: Classifier.cpp:203
virtual bool load(std::fstream &file)
Definition: BAG.cpp:309
static Classifier * createInstanceFromString(std::string const &classifierType)
Definition: Classifier.cpp:29
bool copyBaseVariables(const Classifier *classifier)
Definition: Classifier.cpp:93
bool loadBaseSettingsFromFile(std::fstream &file)
Definition: Classifier.cpp:303
bool addClassifierToEnsemble(const Classifier &classifier, Float weight=1)
Definition: BAG.cpp:422
This class implements the bootstrap aggregator classifier. Bootstrap aggregating (bagging) is a machi...
UINT getNumDimensions() const
UINT getNumClasses() const
virtual bool train_(ClassificationData &trainingData)
Definition: BAG.cpp:99
Vector< MinMax > getRanges() const
Definition: Vector.h:41
BAG & operator=(const BAG &rhs)
Definition: BAG.cpp:58
ClassificationData getBootstrappedDataset(UINT numSamples=0, bool balanceDataset=false) const
bool clearEnsemble()
Definition: BAG.cpp:442
bool scale(const Float minTarget, const Float maxTarget)
virtual bool clear()
Definition: Classifier.cpp:142
virtual bool predict_(VectorFloat &inputVector)
Definition: BAG.cpp:160
Classifier * createNewInstance() const
Definition: Classifier.cpp:37
bool setWeights(const VectorFloat &weights)
Definition: BAG.cpp:457