GestureRecognitionToolkit  Version: 0.2.0
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
RandomForests.cpp
1 /*
2 GRT MIT License
3 Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>
4 
5 Permission is hereby granted, free of charge, to any person obtaining a copy of this software
6 and associated documentation files (the "Software"), to deal in the Software without restriction,
7 including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in all copies or substantial
12 portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
15 LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
18 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 */
20 
21 #define GRT_DLL_EXPORTS
22 #include "RandomForests.h"
23 
24 GRT_BEGIN_NAMESPACE
25 
26 //Define the string that will be used to indentify the object
27 std::string RandomForests::id = "RandomForests";
28 std::string RandomForests::getId() { return RandomForests::id; }
29 
30 //Register the RandomForests module with the Classifier base class
31 RegisterClassifierModule< RandomForests > RandomForests::registerModule( RandomForests::getId() );
32 
33 RandomForests::RandomForests(const DecisionTreeNode &decisionTreeNode,const UINT forestSize,const UINT numRandomSplits,const UINT minNumSamplesPerNode,const UINT maxDepth,const UINT trainingMode,const bool removeFeaturesAtEachSpilt,const bool useScaling,const Float bootstrappedDatasetWeight)
34 {
35  this->decisionTreeNode = decisionTreeNode.deepCopy();
36  this->forestSize = forestSize;
37  this->numRandomSplits = numRandomSplits;
38  this->minNumSamplesPerNode = minNumSamplesPerNode;
39  this->maxDepth = maxDepth;
40  this->trainingMode = trainingMode;
41  this->removeFeaturesAtEachSpilt = removeFeaturesAtEachSpilt;
42  this->useScaling = useScaling;
43  this->bootstrappedDatasetWeight = bootstrappedDatasetWeight;
44  classType = RandomForests::getId();
45  classifierType = classType;
46  classifierMode = STANDARD_CLASSIFIER_MODE;
47  useNullRejection = false;
48  supportsNullRejection = false;
49  useValidationSet = true;
50  validationSetSize = 20;
51  debugLog.setProceedingText("[DEBUG " + RandomForests::getId() + "]");
52  errorLog.setProceedingText("[ERROR " + RandomForests::getId() + "]");
53  trainingLog.setProceedingText("[TRAINING " + RandomForests::getId() + "]");
54  warningLog.setProceedingText("[WARNING " + RandomForests::getId() + "]");
55 }
56 
58  this->decisionTreeNode = NULL;
59  classType = RandomForests::getId();
60  classifierType = classType;
61  classifierMode = STANDARD_CLASSIFIER_MODE;
62  debugLog.setProceedingText("[DEBUG " + RandomForests::getId() + "]");
63  errorLog.setProceedingText("[ERROR " + RandomForests::getId() + "]");
64  trainingLog.setProceedingText("[TRAINING " + RandomForests::getId() + "]");
65  warningLog.setProceedingText("[WARNING " + RandomForests::getId() + "]");
66  *this = rhs;
67 }
68 
70 {
71  clear();
72 
73  if( decisionTreeNode != NULL ){
74  delete decisionTreeNode;
75  decisionTreeNode = NULL;
76  }
77 }
78 
80  if( this != &rhs ){
81  //Clear this tree
82  clear();
83 
84  //Copy the base classifier variables
85  if( copyBaseVariables( (Classifier*)&rhs ) ){
86 
87  //Deep copy the main node
88  if( this->decisionTreeNode != NULL ){
89  delete decisionTreeNode;
90  decisionTreeNode = NULL;
91  }
92  this->decisionTreeNode = rhs.deepCopyDecisionTreeNode();
93 
94  if( rhs.getTrained() ){
95  //Deep copy the forest
96  for(UINT i=0; i<rhs.forest.size(); i++){
97  this->forest.push_back( rhs.forest[i]->deepCopy() );
98  }
99  }
100 
101  this->forestSize = rhs.forestSize;
102  this->numRandomSplits = rhs.numRandomSplits;
103  this->minNumSamplesPerNode = rhs.minNumSamplesPerNode;
104  this->maxDepth = rhs.maxDepth;
105  this->removeFeaturesAtEachSpilt = rhs.removeFeaturesAtEachSpilt;
106  this->bootstrappedDatasetWeight = rhs.bootstrappedDatasetWeight;
107  this->trainingMode = rhs.trainingMode;
108 
109  }else errorLog << "deepCopyFrom(const Classifier *classifier) - Failed to copy base variables!" << std::endl;
110  }
111  return *this;
112 }
113 
114 bool RandomForests::deepCopyFrom(const Classifier *classifier){
115 
116  if( classifier == NULL ) return false;
117 
118  if( this->getClassifierType() == classifier->getClassifierType() ){
119 
120  RandomForests *ptr = (RandomForests*)classifier;
121 
122  //Clear this tree
123  this->clear();
124 
125  if( copyBaseVariables( classifier ) ){
126 
127  //Deep copy the main node
128  if( this->decisionTreeNode != NULL ){
129  delete decisionTreeNode;
130  decisionTreeNode = NULL;
131  }
132  this->decisionTreeNode = ptr->deepCopyDecisionTreeNode();
133 
134  if( ptr->getTrained() ){
135  //Deep copy the forest
136  this->forest.reserve( ptr->forest.getSize() );
137  for(UINT i=0; i<ptr->forest.getSize(); i++){
138  this->forest.push_back( ptr->forest[i]->deepCopy() );
139  }
140  }
141 
142  this->forestSize = ptr->forestSize;
143  this->numRandomSplits = ptr->numRandomSplits;
144  this->minNumSamplesPerNode = ptr->minNumSamplesPerNode;
145  this->maxDepth = ptr->maxDepth;
146  this->removeFeaturesAtEachSpilt = ptr->removeFeaturesAtEachSpilt;
147  this->bootstrappedDatasetWeight = ptr->bootstrappedDatasetWeight;
148  this->trainingMode = ptr->trainingMode;
149 
150  return true;
151  }
152 
153  errorLog << "deepCopyFrom(const Classifier *classifier) - Failed to copy base variables!" << std::endl;
154  }
155  return false;
156 }
157 
159 
160  //Clear any previous model
161  clear();
162 
163  const unsigned int M = trainingData.getNumSamples();
164  const unsigned int N = trainingData.getNumDimensions();
165  const unsigned int K = trainingData.getNumClasses();
166 
167  if( M == 0 ){
168  errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << std::endl;
169  return false;
170  }
171 
172  if( bootstrappedDatasetWeight <= 0.0 || bootstrappedDatasetWeight > 1.0 ){
173  errorLog << "train_(ClassificationData &trainingData) - Bootstrapped Dataset Weight must be [> 0.0 and <= 1.0]" << std::endl;
174  return false;
175  }
176 
177  numInputDimensions = N;
178  numClasses = K;
179  classLabels = trainingData.getClassLabels();
180  ranges = trainingData.getRanges();
181 
182  //Scale the training data if needed
183  if( useScaling ){
184  //Scale the training data between 0 and 1
185  trainingData.scale(0, 1);
186  }
187 
188  if( useValidationSet ){
189  validationSetAccuracy = 0;
190  validationSetPrecision.resize( useNullRejection ? K+1 : K, 0 );
191  validationSetRecall.resize( useNullRejection ? K+1 : K, 0 );
192  }
193 
194  //Flag that the main algorithm has been trained encase we need to trigger any callbacks
195  trained = true;
196 
197  //Train the random forest
198  forest.reserve( forestSize );
199 
200  for(UINT i=0; i<forestSize; i++){
201 
202  //Get a balanced bootstrapped dataset
203  UINT datasetSize = (UINT)floor(trainingData.getNumSamples() * bootstrappedDatasetWeight);
204  ClassificationData data = trainingData.getBootstrappedDataset( datasetSize, true );
205 
206  Timer timer;
207  timer.start();
208 
209  DecisionTree tree;
210  tree.setDecisionTreeNode( *decisionTreeNode );
211  tree.enableScaling( false ); //We have already scaled the training data so we do not need to scale it again
212  tree.setUseValidationSet( useValidationSet );
213  tree.setValidationSetSize( validationSetSize );
214  tree.setTrainingMode( trainingMode );
215  tree.setNumSplittingSteps( numRandomSplits );
216  tree.setMinNumSamplesPerNode( minNumSamplesPerNode );
217  tree.setMaxDepth( maxDepth );
218  tree.enableNullRejection( useNullRejection );
219  tree.setRemoveFeaturesAtEachSpilt( removeFeaturesAtEachSpilt );
220 
221  trainingLog << "Training decision tree " << i+1 << "/" << forestSize << "..." << std::endl;
222 
223  //Train this tree
224  if( !tree.train_( data ) ){
225  errorLog << "train_(ClassificationData &trainingData) - Failed to train tree at forest index: " << i << std::endl;
226  clear();
227  return false;
228  }
229 
230  Float computeTime = timer.getMilliSeconds();
231  trainingLog << "Decision tree trained in " << (computeTime*0.001)/60.0 << " minutes" << std::endl;
232 
233  if( useValidationSet ){
234  Float forestNorm = 1.0 / forestSize;
235  validationSetAccuracy += tree.getValidationSetAccuracy();
236  VectorFloat precision = tree.getValidationSetPrecision();
237  VectorFloat recall = tree.getValidationSetRecall();
238 
239  grt_assert( precision.getSize() == validationSetPrecision.getSize() );
240  grt_assert( recall.getSize() == validationSetRecall.getSize() );
241 
242  for(UINT i=0; i<validationSetPrecision.getSize(); i++){
243  validationSetPrecision[i] += precision[i] * forestNorm;
244  }
245 
246  for(UINT i=0; i<validationSetRecall.getSize(); i++){
247  validationSetRecall[i] += recall[i] * forestNorm;
248  }
249  }
250 
251  //Deep copy the tree into the forest
252  forest.push_back( tree.deepCopyTree() );
253  }
254 
255  if( useValidationSet ){
256  validationSetAccuracy /= forestSize;
257  trainingLog << "Validation set accuracy: " << validationSetAccuracy << std::endl;
258 
259  trainingLog << "Validation set precision: ";
260  for(UINT i=0; i<validationSetPrecision.getSize(); i++){
261  trainingLog << validationSetPrecision[i] << " ";
262  }
263  trainingLog << std::endl;
264 
265  trainingLog << "Validation set recall: ";
266  for(UINT i=0; i<validationSetRecall.getSize(); i++){
267  trainingLog << validationSetRecall[i] << " ";
268  }
269  trainingLog << std::endl;
270  }
271 
272  return true;
273 }
274 
276 
277  predictedClassLabel = 0;
278  maxLikelihood = 0;
279 
280  if( !trained ){
281  errorLog << "predict_(VectorDouble &inputVector) - Model Not Trained!" << std::endl;
282  return false;
283  }
284 
285  if( inputVector.getSize() != numInputDimensions ){
286  errorLog << "predict_(VectorDouble &inputVector) - The size of the input Vector (" << inputVector.getSize() << ") does not match the num features in the model (" << numInputDimensions << std::endl;
287  return false;
288  }
289 
290  if( useScaling ){
291  for(UINT n=0; n<numInputDimensions; n++){
292  inputVector[n] = grt_scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0.0, 1.0);
293  }
294  }
295 
296  if( classLikelihoods.getSize() != numClasses ) classLikelihoods.resize(numClasses,0);
297  if( classDistances.getSize() != numClasses ) classDistances.resize(numClasses,0);
298 
299  std::fill(classDistances.begin(),classDistances.end(),0);
300 
301  //Run the prediction for each tree in the forest
302  VectorDouble y;
303  for(UINT i=0; i<forestSize; i++){
304  if( !forest[i]->predict(inputVector, y) ){
305  errorLog << "predict_(VectorDouble &inputVector) - Tree " << i << " failed prediction!" << std::endl;
306  return false;
307  }
308 
309  for(UINT j=0; j<numClasses; j++){
310  classDistances[j] += y[j];
311  }
312  }
313 
314  //Use the class distances to estimate the class likelihoods
315  bestDistance = 0;
316  UINT bestIndex = 0;
317  Float classNorm = 1.0 / Float(forestSize);
318  for(UINT k=0; k<numClasses; k++){
319  classLikelihoods[k] = classDistances[k] * classNorm;
320 
321  if( classLikelihoods[k] > maxLikelihood ){
322  maxLikelihood = classLikelihoods[k];
323  bestDistance = classDistances[k];
324  bestIndex = k;
325  }
326  }
327 
328  predictedClassLabel = classLabels[ bestIndex ];
329 
330  return true;
331 }
332 
334 
335  //Call the classifiers clear function
337 
338  //Delete the forest
339  for(UINT i=0; i<forest.getSize(); i++){
340  if( forest[i] != NULL ){
341  forest[i]->clear();
342  delete forest[i];
343  forest[i] = NULL;
344  }
345  }
346  forest.clear();
347 
348  return true;
349 }
350 
351 bool RandomForests::print() const{
352 
353  std::cout << "RandomForest\n";
354  std::cout << "ForestSize: " << forestSize << std::endl;
355  std::cout << "NumSplittingSteps: " << numRandomSplits << std::endl;
356  std::cout << "MinNumSamplesPerNode: " << minNumSamplesPerNode << std::endl;
357  std::cout << "MaxDepth: " << maxDepth << std::endl;
358  std::cout << "RemoveFeaturesAtEachSpilt: " << removeFeaturesAtEachSpilt << std::endl;
359  std::cout << "TrainingMode: " << trainingMode << std::endl;
360  std::cout << "ForestBuilt: " << (trained ? 1 : 0) << std::endl;
361 
362  if( trained ){
363  std::cout << "Forest:\n";
364  for(UINT i=0; i<forestSize; i++){
365  std::cout << "Tree: " << i+1 << std::endl;
366  forest[i]->print();
367  }
368  }
369 
370  return true;
371 }
372 
373 bool RandomForests::save( std::fstream &file ) const{
374 
375  if(!file.is_open())
376  {
377  errorLog <<"save(fstream &file) - The file is not open!" << std::endl;
378  return false;
379  }
380 
381  //Write the header info
382  file << "GRT_RANDOM_FOREST_MODEL_FILE_V1.0\n";
383 
384  //Write the classifier settings to the file
386  errorLog <<"save(fstream &file) - Failed to save classifier base settings to file!" << std::endl;
387  return false;
388  }
389 
390  if( decisionTreeNode != NULL ){
391  file << "DecisionTreeNodeType: " << decisionTreeNode->getNodeType() << std::endl;
392  if( !decisionTreeNode->save( file ) ){
393  Classifier::errorLog <<"save(fstream &file) - Failed to save decisionTreeNode settings to file!" << std::endl;
394  return false;
395  }
396  }else{
397  file << "DecisionTreeNodeType: " << "NULL" << std::endl;
398  }
399 
400  file << "ForestSize: " << forestSize << std::endl;
401  file << "NumSplittingSteps: " << numRandomSplits << std::endl;
402  file << "MinNumSamplesPerNode: " << minNumSamplesPerNode << std::endl;
403  file << "MaxDepth: " << maxDepth << std::endl;
404  file << "RemoveFeaturesAtEachSpilt: " << removeFeaturesAtEachSpilt << std::endl;
405  file << "TrainingMode: " << trainingMode << std::endl;
406  file << "ForestBuilt: " << (trained ? 1 : 0) << std::endl;
407 
408  if( trained ){
409  file << "Forest:\n";
410  for(UINT i=0; i<forestSize; i++){
411  file << "Tree: " << i+1 << std::endl;
412  file << "TreeNodeType: " << forest[i]->getNodeType() << std::endl;
413  if( !forest[i]->save( file ) ){
414  errorLog << "save(fstream &file) - Failed to save tree " << i << " to file!" << std::endl;
415  return false;
416  }
417  }
418  }
419 
420  return true;
421 }
422 
423 bool RandomForests::load( std::fstream &file ){
424 
425  clear();
426 
427  if(!file.is_open())
428  {
429  errorLog << "load(string filename) - Could not open file to load model" << std::endl;
430  return false;
431  }
432 
433  std::string word;
434  std::string treeNodeType;
435 
436  file >> word;
437 
438  //Find the file type header
439  if(word != "GRT_RANDOM_FOREST_MODEL_FILE_V1.0"){
440  errorLog << "load(string filename) - Could not find Model File Header" << std::endl;
441  return false;
442  }
443 
444  //Load the base settings from the file
446  errorLog << "load(string filename) - Failed to load base settings from file!" << std::endl;
447  return false;
448  }
449 
450  file >> word;
451  if(word != "DecisionTreeNodeType:"){
452  Classifier::errorLog << "load(string filename) - Could not find the DecisionTreeNodeType!" << std::endl;
453  return false;
454  }
455  file >> treeNodeType;
456 
457  if( treeNodeType != "NULL" ){
458 
459  decisionTreeNode = dynamic_cast< DecisionTreeNode* >( DecisionTreeNode::createInstanceFromString( treeNodeType ) );
460 
461  if( decisionTreeNode == NULL ){
462  Classifier::errorLog << "load(string filename) - Could not create new DecisionTreeNode from type: " << treeNodeType << std::endl;
463  return false;
464  }
465 
466  if( !decisionTreeNode->load( file ) ){
467  Classifier::errorLog <<"load(fstream &file) - Failed to load decisionTreeNode settings from file!" << std::endl;
468  return false;
469  }
470  }else{
471  Classifier::errorLog <<"load(fstream &file) - Failed to load decisionTreeNode! DecisionTreeNodeType is NULL!" << std::endl;
472  return false;
473  }
474 
475  file >> word;
476  if(word != "ForestSize:"){
477  errorLog << "load(string filename) - Could not find the ForestSize!" << std::endl;
478  return false;
479  }
480  file >> forestSize;
481 
482  file >> word;
483  if(word != "NumSplittingSteps:"){
484  errorLog << "load(string filename) - Could not find the NumSplittingSteps!" << std::endl;
485  return false;
486  }
487  file >> numRandomSplits;
488 
489  file >> word;
490  if(word != "MinNumSamplesPerNode:"){
491  errorLog << "load(string filename) - Could not find the MinNumSamplesPerNode!" << std::endl;
492  return false;
493  }
494  file >> minNumSamplesPerNode;
495 
496  file >> word;
497  if(word != "MaxDepth:"){
498  errorLog << "load(string filename) - Could not find the MaxDepth!" << std::endl;
499  return false;
500  }
501  file >> maxDepth;
502 
503  file >> word;
504  if(word != "RemoveFeaturesAtEachSpilt:"){
505  errorLog << "load(string filename) - Could not find the RemoveFeaturesAtEachSpilt!" << std::endl;
506  return false;
507  }
508  file >> removeFeaturesAtEachSpilt;
509 
510  file >> word;
511  if(word != "TrainingMode:"){
512  errorLog << "load(string filename) - Could not find the TrainingMode!" << std::endl;
513  return false;
514  }
515  file >> trainingMode;
516 
517  file >> word;
518  if(word != "ForestBuilt:"){
519  errorLog << "load(string filename) - Could not find the ForestBuilt!" << std::endl;
520  return false;
521  }
522  file >> trained;
523 
524  if( trained ){
525  //Find the forest header
526  file >> word;
527  if(word != "Forest:"){
528  errorLog << "load(string filename) - Could not find the Forest!" << std::endl;
529  return false;
530  }
531 
532  //Load each tree
533  UINT treeIndex;
534  forest.reserve( forestSize );
535  for(UINT i=0; i<forestSize; i++){
536 
537  file >> word;
538  if(word != "Tree:"){
539  errorLog << "load(string filename) - Could not find the Tree Header!" << std::endl;
540  std::cout << "WORD: " << word << std::endl;
541  std::cout << "Tree i: " << i << std::endl;
542  return false;
543  }
544  file >> treeIndex;
545 
546  if( treeIndex != i+1 ){
547  errorLog << "load(string filename) - Incorrect tree index: " << treeIndex << std::endl;
548  return false;
549  }
550 
551  file >> word;
552  if(word != "TreeNodeType:"){
553  errorLog << "load(string filename) - Could not find the TreeNodeType!" << std::endl;
554  std::cout << "WORD: " << word << std::endl;
555  std::cout << "i: " << i << std::endl;
556  return false;
557  }
558  file >> treeNodeType;
559 
560  //Create a new DTree
561  DecisionTreeNode *tree = dynamic_cast< DecisionTreeNode* >( DecisionTreeNode::createInstanceFromString( treeNodeType ) );
562 
563  if( tree == NULL ){
564  errorLog << "load(fstream &file) - Failed to create new Tree!" << std::endl;
565  return false;
566  }
567 
568  //Load the tree from the file
569  tree->setParent( NULL );
570  if( !tree->load( file ) ){
571  errorLog << "load(fstream &file) - Failed to load tree from file!" << std::endl;
572  return false;
573  }
574 
575  //Add the tree to the forest
576  forest.push_back( tree );
577  }
578  }
579 
580  return true;
581 }
582 
584 
585  if( !getTrained() ){
586  errorLog << "combineModels( const RandomForests &forest ) - This instance has not been trained!" << std::endl;
587  return false;
588  }
589 
590  if( !forest.getTrained() ){
591  errorLog << "combineModels( const RandomForests &forest ) - This external forest instance has not been trained!" << std::endl;
592  return false;
593  }
594 
595  if( this->getNumInputDimensions() != forest.getNumInputDimensions() ) {
596  errorLog << "combineModels( const RandomForests &forest ) - The number of input dimensions of the external forest (";
597  errorLog << forest.getNumInputDimensions() << ") does not match the number of input dimensions of this instance (";
598  errorLog << this->getNumInputDimensions() << ")!" << std::endl;
599  return false;
600  }
601 
602  //Add the trees in the other forest to this model
603  DecisionTreeNode *node;
604  for(UINT i=0; i<forest.getForestSize(); i++){
605  node = forest.getTree(i);
606  if( node ){
607  this->forest.push_back( node->deepCopy() );
608  forestSize++;
609  }
610  }
611 
612  return true;
613 }
614 
616  return forestSize;
617 }
618 
620  return numRandomSplits;
621 }
622 
624  return minNumSamplesPerNode;
625 }
626 
628  return maxDepth;
629 }
630 
632  return trainingMode;
633 }
634 
636  return removeFeaturesAtEachSpilt;
637 }
638 
640  return bootstrappedDatasetWeight;
641 }
642 
644  return forest;
645 }
646 
648 
649  if( decisionTreeNode == NULL ){
650  return NULL;
651  }
652 
653  return decisionTreeNode->deepCopy();
654 }
655 
656 DecisionTreeNode* RandomForests::getTree( const UINT index ) const{
657 
658  if( !trained || index >= forestSize ) return NULL;
659 
660  return forest[ index ];
661 }
662 
663 VectorDouble RandomForests::getFeatureWeights( const bool normWeights ) const{
664 
665  if( !trained ) return VectorDouble();
666 
667  VectorDouble weights( numInputDimensions, 0 );
668 
669  for(UINT i=0; i<forestSize; i++){
670  if( !forest[i]->computeFeatureWeights( weights ) ){
671  warningLog << "getFeatureWeights( const bool normWeights ) - Failed to compute weights for tree: " << i << std::endl;
672  }
673  }
674 
675  //Normalize the weights
676  if( normWeights ){
677  Float sum = Util::sum( weights );
678  if( sum > 0.0 ){
679  const Float norm = 1.0 / sum;
680  for(UINT j=0; j<numInputDimensions; j++){
681  weights[j] *= norm;
682  }
683  }
684  }
685 
686  return weights;
687 }
688 
690 
691  if( !trained ) return MatrixDouble();
692 
693  MatrixDouble weights( getNumClasses(), numInputDimensions );
694  weights.setAllValues(0.0);
695 
696  for(UINT i=0; i<forestSize; i++){
697  if( !forest[i]->computeLeafNodeWeights( weights ) ){
698  warningLog << "computeLeafNodeWeights( const bool normWeights ) - Failed to compute leaf node weights for tree: " << i << std::endl;
699  }
700  }
701 
702  //Normalize the weights
703  if( normWeights ){
704  for(UINT j=0; j<weights.getNumCols(); j++){
705  Float sum = 0.0;
706  for(UINT i=0; i<weights.getNumRows(); i++){
707  sum += weights[i][j];
708  }
709  if( sum != 0.0 ){
710  const Float norm = 1.0 / sum;
711  for(UINT i=0; i<weights.getNumRows(); i++){
712  weights[i][j] *= norm;
713  }
714  }
715  }
716  }
717 
718  return weights;
719 }
720 
721 bool RandomForests::setForestSize(const UINT forestSize){
722  if( forestSize > 0 ){
723  clear();
724  this->forestSize = forestSize;
725  return true;
726  }
727  return false;
728 }
729 
730 bool RandomForests::setNumRandomSplits(const UINT numRandomSplits){
731  if( numRandomSplits > 0 ){
732  this->numRandomSplits = numRandomSplits;
733  return true;
734  }
735  return false;
736 }
737 
738 bool RandomForests::setMinNumSamplesPerNode(const UINT minNumSamplesPerNode){
739  if( minNumSamplesPerNode > 0 ){
740  this->minNumSamplesPerNode = minNumSamplesPerNode;
741  return true;
742  }
743  return false;
744 }
745 
746 bool RandomForests::setMaxDepth(const UINT maxDepth){
747  if( maxDepth > 0 ){
748  this->maxDepth = maxDepth;
749  return true;
750  }
751  return false;
752 }
753 
754 bool RandomForests::setRemoveFeaturesAtEachSpilt(const bool removeFeaturesAtEachSpilt){
755  this->removeFeaturesAtEachSpilt = removeFeaturesAtEachSpilt;
756  return true;
757 }
758 
759 bool RandomForests::setTrainingMode(const UINT trainingMode){
760 
761  if( trainingMode == DecisionTree::BEST_ITERATIVE_SPILT || trainingMode == DecisionTree::BEST_RANDOM_SPLIT ){
762  this->trainingMode = trainingMode;
763  return true;
764  }
765 
766  warningLog << "setTrainingMode(const UINT mode) - Unknown training mode!" << std::endl;
767  return false;
768 }
769 
771 
772  if( decisionTreeNode != NULL ){
773  delete decisionTreeNode;
774  decisionTreeNode = NULL;
775  }
776  this->decisionTreeNode = node.deepCopy();
777 
778  return true;
779 }
780 
781 bool RandomForests::setBootstrappedDatasetWeight( const Float bootstrappedDatasetWeight ){
782 
783  if( bootstrappedDatasetWeight > 0.0 && bootstrappedDatasetWeight <= 1.0 ){
784  this->bootstrappedDatasetWeight = bootstrappedDatasetWeight;
785  return true;
786  }
787 
788  warningLog << "setBootstrappedDatasetWeight(...) - Bad parameter, the weight must be > 0.0 and <= 1.0. Weight: " << bootstrappedDatasetWeight << std::endl;
789  return false;
790 }
791 
792 GRT_END_NAMESPACE
DecisionTreeNode * getTree(const UINT index) const
bool saveBaseSettingsToFile(std::fstream &file) const
Definition: Classifier.cpp:256
virtual bool predict(VectorFloat inputVector)
Definition: MLBase.cpp:113
Definition: Timer.h:43
bool setRemoveFeaturesAtEachSpilt(const bool removeFeaturesAtEachSpilt)
Definition: Tree.cpp:157
UINT getForestSize() const
RandomForests(const DecisionTreeNode &decisionTreeNode=DecisionTreeClusterNode(), const UINT forestSize=10, const UINT numRandomSplits=100, const UINT minNumSamplesPerNode=5, const UINT maxDepth=10, const UINT trainingMode=DecisionTree::BEST_RANDOM_SPLIT, const bool removeFeaturesAtEachSpilt=true, const bool useScaling=false, const Float bootstrappedDatasetWeight=0.8)
virtual ~RandomForests(void)
bool setBootstrappedDatasetWeight(const Float bootstrappedDatasetWeight)
bool setMinNumSamplesPerNode(const UINT minNumSamplesPerNode)
Definition: Tree.cpp:139
virtual bool save(std::fstream &file) const
std::string getClassifierType() const
Definition: Classifier.cpp:161
UINT getMaxDepth() const
virtual UINT getNumClasses() const
Definition: Classifier.cpp:191
bool enableScaling(const bool useScaling)
Definition: MLBase.cpp:271
virtual bool resize(const unsigned int size)
Definition: Vector.h:133
virtual bool train_(ClassificationData &trainingData)
std::string getNodeType() const
Definition: Node.cpp:304
bool getTrained() const
Definition: MLBase.cpp:259
bool setMinNumSamplesPerNode(const UINT minNumSamplesPerNode)
UINT getSize() const
Definition: Vector.h:191
bool setNumSplittingSteps(const UINT numSplittingSteps)
Definition: Tree.cpp:130
Vector< UINT > getClassLabels() const
bool combineModels(const RandomForests &forest)
signed long getMilliSeconds()
Definition: Timer.h:117
RandomForests & operator=(const RandomForests &rhs)
static std::string getId()
bool setNumRandomSplits(const UINT numSplittingSteps)
bool setAllValues(const T &value)
Definition: Matrix.h:336
virtual bool train_(ClassificationData &trainingData)
MatrixDouble getLeafNodeFeatureWeights(const bool normWeights=true) const
DecisionTreeNode * deepCopyTree() const
virtual bool save(std::fstream &file) const
Definition: Node.cpp:140
UINT getNumSamples() const
bool setForestSize(const UINT forestSize)
bool setValidationSetSize(const UINT validationSetSize)
Definition: MLBase.cpp:304
bool setMaxDepth(const UINT maxDepth)
UINT getNumRandomSplits() const
virtual bool deepCopyFrom(const Classifier *classifier)
Float getBootstrappedDatasetWeight() const
bool copyBaseVariables(const Classifier *classifier)
Definition: Classifier.cpp:93
bool loadBaseSettingsFromFile(std::fstream &file)
Definition: Classifier.cpp:303
Float getValidationSetAccuracy() const
Definition: MLBase.cpp:247
bool setMaxDepth(const UINT maxDepth)
Definition: Tree.cpp:148
unsigned int getNumRows() const
Definition: Matrix.h:542
UINT getNumDimensions() const
UINT getNumClasses() const
unsigned int getNumCols() const
Definition: Matrix.h:549
bool setTrainingMode(const UINT trainingMode)
UINT getMinNumSamplesPerNode() const
bool setDecisionTreeNode(const DecisionTreeNode &node)
bool start()
Definition: Timer.h:64
virtual bool predict_(VectorDouble &inputVector)
DecisionTreeNode * deepCopy() const
Vector< MinMax > getRanges() const
VectorDouble getFeatureWeights(const bool normWeights=true) const
const Vector< DecisionTreeNode * > & getForest() const
VectorFloat getValidationSetPrecision() const
Definition: MLBase.cpp:251
bool enableNullRejection(bool useNullRejection)
Definition: Classifier.cpp:231
virtual bool load(std::fstream &file)
bool getRemoveFeaturesAtEachSpilt() const
static Node * createInstanceFromString(std::string const &nodeType)
Definition: Node.cpp:29
bool setTrainingMode(const UINT trainingMode)
Definition: Tree.cpp:121
bool setDecisionTreeNode(const DecisionTreeNode &node)
UINT getNumInputDimensions() const
Definition: MLBase.cpp:212
virtual bool clear()
ClassificationData getBootstrappedDataset(UINT numSamples=0, bool balanceDataset=false) const
bool setUseValidationSet(const bool useValidationSet)
Definition: MLBase.cpp:316
bool scale(const Float minTarget, const Float maxTarget)
virtual bool clear()
Definition: Classifier.cpp:142
virtual bool load(std::fstream &file)
Definition: Node.cpp:182
virtual bool print() const
VectorFloat getValidationSetRecall() const
Definition: MLBase.cpp:255
static Float sum(const VectorFloat &x)
Definition: Util.cpp:171
bool setRemoveFeaturesAtEachSpilt(const bool removeFeaturesAtEachSpilt)
UINT getTrainingMode() const
DecisionTreeNode * deepCopyDecisionTreeNode() const