GestureRecognitionToolkit  Version: 0.1.0
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
RandomForests.cpp
1 /*
2 GRT MIT License
3 Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>
4 
5 Permission is hereby granted, free of charge, to any person obtaining a copy of this software
6 and associated documentation files (the "Software"), to deal in the Software without restriction,
7 including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in all copies or substantial
12 portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
15 LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
18 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 */
20 
21 #include "RandomForests.h"
22 
23 GRT_BEGIN_NAMESPACE
24 
25 //Register the RandomForests module with the Classifier base class
26 RegisterClassifierModule< RandomForests > RandomForests::registerModule("RandomForests");
27 
28 RandomForests::RandomForests(const DecisionTreeNode &decisionTreeNode,const UINT forestSize,const UINT numRandomSplits,const UINT minNumSamplesPerNode,const UINT maxDepth,const UINT trainingMode,const bool removeFeaturesAtEachSpilt,const bool useScaling,const Float bootstrappedDatasetWeight)
29 {
30  this->decisionTreeNode = decisionTreeNode.deepCopy();
31  this->forestSize = forestSize;
32  this->numRandomSplits = numRandomSplits;
33  this->minNumSamplesPerNode = minNumSamplesPerNode;
34  this->maxDepth = maxDepth;
35  this->trainingMode = trainingMode;
36  this->removeFeaturesAtEachSpilt = removeFeaturesAtEachSpilt;
37  this->useScaling = useScaling;
38  this->bootstrappedDatasetWeight = bootstrappedDatasetWeight;
39  classType = "RandomForests";
40  classifierType = classType;
41  classifierMode = STANDARD_CLASSIFIER_MODE;
42  useNullRejection = false;
43  supportsNullRejection = false;
44  useValidationSet = true;
45  validationSetSize = 20;
46  debugLog.setProceedingText("[DEBUG RandomForests]");
47  errorLog.setProceedingText("[ERROR RandomForests]");
48  trainingLog.setProceedingText("[TRAINING RandomForests]");
49  warningLog.setProceedingText("[WARNING RandomForests]");
50 }
51 
53  this->decisionTreeNode = NULL;
54  classType = "RandomForests";
55  classifierType = classType;
56  classifierMode = STANDARD_CLASSIFIER_MODE;
57  debugLog.setProceedingText("[DEBUG RandomForests]");
58  errorLog.setProceedingText("[ERROR RandomForests]");
59  trainingLog.setProceedingText("[TRAINING RandomForests]");
60  warningLog.setProceedingText("[WARNING RandomForests]");
61  *this = rhs;
62 }
63 
65 {
66  clear();
67 
68  if( decisionTreeNode != NULL ){
69  delete decisionTreeNode;
70  decisionTreeNode = NULL;
71  }
72 }
73 
75  if( this != &rhs ){
76  //Clear this tree
77  clear();
78 
79  //Copy the base classifier variables
80  if( copyBaseVariables( (Classifier*)&rhs ) ){
81 
82  //Deep copy the main node
83  if( this->decisionTreeNode != NULL ){
84  delete decisionTreeNode;
85  decisionTreeNode = NULL;
86  }
87  this->decisionTreeNode = rhs.deepCopyDecisionTreeNode();
88 
89  if( rhs.getTrained() ){
90  //Deep copy the forest
91  for(UINT i=0; i<rhs.forest.size(); i++){
92  this->forest.push_back( rhs.forest[i]->deepCopy() );
93  }
94  }
95 
96  this->forestSize = rhs.forestSize;
97  this->numRandomSplits = rhs.numRandomSplits;
98  this->minNumSamplesPerNode = rhs.minNumSamplesPerNode;
99  this->maxDepth = rhs.maxDepth;
100  this->removeFeaturesAtEachSpilt = rhs.removeFeaturesAtEachSpilt;
101  this->bootstrappedDatasetWeight = rhs.bootstrappedDatasetWeight;
102  this->trainingMode = rhs.trainingMode;
103 
104  }else errorLog << "deepCopyFrom(const Classifier *classifier) - Failed to copy base variables!" << std::endl;
105  }
106  return *this;
107 }
108 
109 bool RandomForests::deepCopyFrom(const Classifier *classifier){
110 
111  if( classifier == NULL ) return false;
112 
113  if( this->getClassifierType() == classifier->getClassifierType() ){
114 
115  RandomForests *ptr = (RandomForests*)classifier;
116 
117  //Clear this tree
118  this->clear();
119 
120  if( copyBaseVariables( classifier ) ){
121 
122  //Deep copy the main node
123  if( this->decisionTreeNode != NULL ){
124  delete decisionTreeNode;
125  decisionTreeNode = NULL;
126  }
127  this->decisionTreeNode = ptr->deepCopyDecisionTreeNode();
128 
129  if( ptr->getTrained() ){
130  //Deep copy the forest
131  this->forest.reserve( ptr->forest.size() );
132  for(size_t i=0; i<ptr->forest.size(); i++){
133  this->forest.push_back( ptr->forest[i]->deepCopy() );
134  }
135  }
136 
137  this->forestSize = ptr->forestSize;
138  this->numRandomSplits = ptr->numRandomSplits;
139  this->minNumSamplesPerNode = ptr->minNumSamplesPerNode;
140  this->maxDepth = ptr->maxDepth;
141  this->removeFeaturesAtEachSpilt = ptr->removeFeaturesAtEachSpilt;
142  this->bootstrappedDatasetWeight = ptr->bootstrappedDatasetWeight;
143  this->trainingMode = ptr->trainingMode;
144 
145  return true;
146  }
147 
148  errorLog << "deepCopyFrom(const Classifier *classifier) - Failed to copy base variables!" << std::endl;
149  }
150  return false;
151 }
152 
154 
155  //Clear any previous model
156  clear();
157 
158  const unsigned int M = trainingData.getNumSamples();
159  const unsigned int N = trainingData.getNumDimensions();
160  const unsigned int K = trainingData.getNumClasses();
161 
162  if( M == 0 ){
163  errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << std::endl;
164  return false;
165  }
166 
167  if( bootstrappedDatasetWeight <= 0.0 || bootstrappedDatasetWeight > 1.0 ){
168  errorLog << "train_(ClassificationData &trainingData) - Bootstrapped Dataset Weight must be [> 0.0 and <= 1.0]" << std::endl;
169  return false;
170  }
171 
172  numInputDimensions = N;
173  numClasses = K;
174  classLabels = trainingData.getClassLabels();
175  ranges = trainingData.getRanges();
176 
177  //Scale the training data if needed
178  if( useScaling ){
179  //Scale the training data between 0 and 1
180  trainingData.scale(0, 1);
181  }
182 
183  if( useValidationSet ){
184  validationSetAccuracy = 0;
185  validationSetPrecision.resize( useNullRejection ? K+1 : K, 0 );
186  validationSetRecall.resize( useNullRejection ? K+1 : K, 0 );
187  }
188 
189  //Flag that the main algorithm has been trained encase we need to trigger any callbacks
190  trained = true;
191 
192  //Train the random forest
193  forest.reserve( forestSize );
194 
195  for(UINT i=0; i<forestSize; i++){
196 
197  //Get a balanced bootstrapped dataset
198  UINT datasetSize = (UINT)(trainingData.getNumSamples() * bootstrappedDatasetWeight);
199  ClassificationData data = trainingData.getBootstrappedDataset( datasetSize, true );
200 
201  Timer timer;
202  timer.start();
203 
204  DecisionTree tree;
205  tree.setDecisionTreeNode( *decisionTreeNode );
206  tree.enableScaling( false ); //We have already scaled the training data so we do not need to scale it again
207  tree.setUseValidationSet( useValidationSet );
208  tree.setValidationSetSize( validationSetSize );
209  tree.setTrainingMode( trainingMode );
210  tree.setNumSplittingSteps( numRandomSplits );
211  tree.setMinNumSamplesPerNode( minNumSamplesPerNode );
212  tree.setMaxDepth( maxDepth );
213  tree.enableNullRejection( useNullRejection );
214  tree.setRemoveFeaturesAtEachSpilt( removeFeaturesAtEachSpilt );
215 
216  trainingLog << "Training decision tree " << i+1 << "/" << forestSize << "..." << std::endl;
217 
218  //Train this tree
219  if( !tree.train_( data ) ){
220  errorLog << "train_(ClassificationData &trainingData) - Failed to train tree at forest index: " << i << std::endl;
221  clear();
222  return false;
223  }
224 
225  Float computeTime = timer.getMilliSeconds();
226  trainingLog << "Decision tree trained in " << (computeTime*0.001)/60.0 << " minutes" << std::endl;
227 
228  if( useValidationSet ){
229  Float forestNorm = 1.0 / forestSize;
230  validationSetAccuracy += tree.getValidationSetAccuracy();
231  VectorFloat precision = tree.getValidationSetPrecision();
232  VectorFloat recall = tree.getValidationSetRecall();
233 
234  grt_assert( precision.getSize() == validationSetPrecision.getSize() );
235  grt_assert( recall.getSize() == validationSetRecall.getSize() );
236 
237  for(UINT i=0; i<validationSetPrecision.getSize(); i++){
238  validationSetPrecision[i] += precision[i] * forestNorm;
239  }
240 
241  for(UINT i=0; i<validationSetRecall.getSize(); i++){
242  validationSetRecall[i] += recall[i] * forestNorm;
243  }
244 
245  }
246 
247  //Deep copy the tree into the forest
248  forest.push_back( tree.deepCopyTree() );
249  }
250 
251  if( useValidationSet ){
252  validationSetAccuracy /= forestSize;
253  trainingLog << "Validation set accuracy: " << validationSetAccuracy << std::endl;
254 
255  trainingLog << "Validation set precision: ";
256  for(UINT i=0; i<validationSetPrecision.getSize(); i++){
257  trainingLog << validationSetPrecision[i] << " ";
258  }
259  trainingLog << std::endl;
260 
261  trainingLog << "Validation set recall: ";
262  for(UINT i=0; i<validationSetRecall.getSize(); i++){
263  trainingLog << validationSetRecall[i] << " ";
264  }
265  trainingLog << std::endl;
266  }
267 
268  return true;
269 }
270 
272 
273  predictedClassLabel = 0;
274  maxLikelihood = 0;
275 
276  if( !trained ){
277  errorLog << "predict_(VectorDouble &inputVector) - Model Not Trained!" << std::endl;
278  return false;
279  }
280 
281  if( inputVector.getSize() != numInputDimensions ){
282  errorLog << "predict_(VectorDouble &inputVector) - The size of the input Vector (" << inputVector.getSize() << ") does not match the num features in the model (" << numInputDimensions << std::endl;
283  return false;
284  }
285 
286  if( useScaling ){
287  for(UINT n=0; n<numInputDimensions; n++){
288  inputVector[n] = grt_scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0.0, 1.0);
289  }
290  }
291 
292  if( classLikelihoods.getSize() != numClasses ) classLikelihoods.resize(numClasses,0);
293  if( classDistances.getSize() != numClasses ) classDistances.resize(numClasses,0);
294 
295  std::fill(classDistances.begin(),classDistances.end(),0);
296 
297  //Run the prediction for each tree in the forest
298  VectorDouble y;
299  for(UINT i=0; i<forestSize; i++){
300  if( !forest[i]->predict(inputVector, y) ){
301  errorLog << "predict_(VectorDouble &inputVector) - Tree " << i << " failed prediction!" << std::endl;
302  return false;
303  }
304 
305  for(UINT j=0; j<numClasses; j++){
306  classDistances[j] += y[j];
307  }
308  }
309 
310  //Use the class distances to estimate the class likelihoods
311  bestDistance = 0;
312  UINT bestIndex = 0;
313  Float classNorm = 1.0 / Float(forestSize);
314  for(UINT k=0; k<numClasses; k++){
315  classLikelihoods[k] = classDistances[k] * classNorm;
316 
317  if( classLikelihoods[k] > maxLikelihood ){
318  maxLikelihood = classLikelihoods[k];
319  bestDistance = classDistances[k];
320  bestIndex = k;
321  }
322  }
323 
324  predictedClassLabel = classLabels[ bestIndex ];
325 
326  return true;
327 }
328 
330 
331  //Call the classifiers clear function
333 
334  //Delete the forest
335  for(UINT i=0; i<forest.getSize(); i++){
336  if( forest[i] != NULL ){
337  forest[i]->clear();
338  delete forest[i];
339  forest[i] = NULL;
340  }
341  }
342  forest.clear();
343 
344  return true;
345 }
346 
347 bool RandomForests::print() const{
348 
349  std::cout << "RandomForest\n";
350  std::cout << "ForestSize: " << forestSize << std::endl;
351  std::cout << "NumSplittingSteps: " << numRandomSplits << std::endl;
352  std::cout << "MinNumSamplesPerNode: " << minNumSamplesPerNode << std::endl;
353  std::cout << "MaxDepth: " << maxDepth << std::endl;
354  std::cout << "RemoveFeaturesAtEachSpilt: " << removeFeaturesAtEachSpilt << std::endl;
355  std::cout << "TrainingMode: " << trainingMode << std::endl;
356  std::cout << "ForestBuilt: " << (trained ? 1 : 0) << std::endl;
357 
358  if( trained ){
359  std::cout << "Forest:\n";
360  for(UINT i=0; i<forestSize; i++){
361  std::cout << "Tree: " << i+1 << std::endl;
362  forest[i]->print();
363  }
364  }
365 
366  return true;
367 }
368 
369 bool RandomForests::saveModelToFile( std::fstream &file ) const{
370 
371  if(!file.is_open())
372  {
373  errorLog <<"saveModelToFile(fstream &file) - The file is not open!" << std::endl;
374  return false;
375  }
376 
377  //Write the header info
378  file << "GRT_RANDOM_FOREST_MODEL_FILE_V1.0\n";
379 
380  //Write the classifier settings to the file
382  errorLog <<"saveModelToFile(fstream &file) - Failed to save classifier base settings to file!" << std::endl;
383  return false;
384  }
385 
386  if( decisionTreeNode != NULL ){
387  file << "DecisionTreeNodeType: " << decisionTreeNode->getNodeType() << std::endl;
388  if( !decisionTreeNode->saveToFile( file ) ){
389  Classifier::errorLog <<"saveModelToFile(fstream &file) - Failed to save decisionTreeNode settings to file!" << std::endl;
390  return false;
391  }
392  }else{
393  file << "DecisionTreeNodeType: " << "NULL" << std::endl;
394  }
395 
396  file << "ForestSize: " << forestSize << std::endl;
397  file << "NumSplittingSteps: " << numRandomSplits << std::endl;
398  file << "MinNumSamplesPerNode: " << minNumSamplesPerNode << std::endl;
399  file << "MaxDepth: " << maxDepth << std::endl;
400  file << "RemoveFeaturesAtEachSpilt: " << removeFeaturesAtEachSpilt << std::endl;
401  file << "TrainingMode: " << trainingMode << std::endl;
402  file << "ForestBuilt: " << (trained ? 1 : 0) << std::endl;
403 
404  if( trained ){
405  file << "Forest:\n";
406  for(UINT i=0; i<forestSize; i++){
407  file << "Tree: " << i+1 << std::endl;
408  file << "TreeNodeType: " << forest[i]->getNodeType() << std::endl;
409  if( !forest[i]->saveToFile( file ) ){
410  errorLog << "saveModelToFile(fstream &file) - Failed to save tree " << i << " to file!" << std::endl;
411  return false;
412  }
413  }
414  }
415 
416  return true;
417 }
418 
419 bool RandomForests::loadModelFromFile( std::fstream &file ){
420 
421  clear();
422 
423  if(!file.is_open())
424  {
425  errorLog << "loadModelFromFile(string filename) - Could not open file to load model" << std::endl;
426  return false;
427  }
428 
429  std::string word;
430  std::string treeNodeType;
431 
432  file >> word;
433 
434  //Find the file type header
435  if(word != "GRT_RANDOM_FOREST_MODEL_FILE_V1.0"){
436  errorLog << "loadModelFromFile(string filename) - Could not find Model File Header" << std::endl;
437  return false;
438  }
439 
440  //Load the base settings from the file
442  errorLog << "loadModelFromFile(string filename) - Failed to load base settings from file!" << std::endl;
443  return false;
444  }
445 
446  file >> word;
447  if(word != "DecisionTreeNodeType:"){
448  Classifier::errorLog << "loadModelFromFile(string filename) - Could not find the DecisionTreeNodeType!" << std::endl;
449  return false;
450  }
451  file >> treeNodeType;
452 
453  if( treeNodeType != "NULL" ){
454 
455  decisionTreeNode = dynamic_cast< DecisionTreeNode* >( DecisionTreeNode::createInstanceFromString( treeNodeType ) );
456 
457  if( decisionTreeNode == NULL ){
458  Classifier::errorLog << "loadModelFromFile(string filename) - Could not create new DecisionTreeNode from type: " << treeNodeType << std::endl;
459  return false;
460  }
461 
462  if( !decisionTreeNode->loadFromFile( file ) ){
463  Classifier::errorLog <<"loadModelFromFile(fstream &file) - Failed to load decisionTreeNode settings from file!" << std::endl;
464  return false;
465  }
466  }else{
467  Classifier::errorLog <<"loadModelFromFile(fstream &file) - Failed to load decisionTreeNode! DecisionTreeNodeType is NULL!" << std::endl;
468  return false;
469  }
470 
471  file >> word;
472  if(word != "ForestSize:"){
473  errorLog << "loadModelFromFile(string filename) - Could not find the ForestSize!" << std::endl;
474  return false;
475  }
476  file >> forestSize;
477 
478  file >> word;
479  if(word != "NumSplittingSteps:"){
480  errorLog << "loadModelFromFile(string filename) - Could not find the NumSplittingSteps!" << std::endl;
481  return false;
482  }
483  file >> numRandomSplits;
484 
485  file >> word;
486  if(word != "MinNumSamplesPerNode:"){
487  errorLog << "loadModelFromFile(string filename) - Could not find the MinNumSamplesPerNode!" << std::endl;
488  return false;
489  }
490  file >> minNumSamplesPerNode;
491 
492  file >> word;
493  if(word != "MaxDepth:"){
494  errorLog << "loadModelFromFile(string filename) - Could not find the MaxDepth!" << std::endl;
495  return false;
496  }
497  file >> maxDepth;
498 
499  file >> word;
500  if(word != "RemoveFeaturesAtEachSpilt:"){
501  errorLog << "loadModelFromFile(string filename) - Could not find the RemoveFeaturesAtEachSpilt!" << std::endl;
502  return false;
503  }
504  file >> removeFeaturesAtEachSpilt;
505 
506  file >> word;
507  if(word != "TrainingMode:"){
508  errorLog << "loadModelFromFile(string filename) - Could not find the TrainingMode!" << std::endl;
509  return false;
510  }
511  file >> trainingMode;
512 
513  file >> word;
514  if(word != "ForestBuilt:"){
515  errorLog << "loadModelFromFile(string filename) - Could not find the ForestBuilt!" << std::endl;
516  return false;
517  }
518  file >> trained;
519 
520  if( trained ){
521  //Find the forest header
522  file >> word;
523  if(word != "Forest:"){
524  errorLog << "loadModelFromFile(string filename) - Could not find the Forest!" << std::endl;
525  return false;
526  }
527 
528  //Load each tree
529  UINT treeIndex;
530  forest.reserve( forestSize );
531  for(UINT i=0; i<forestSize; i++){
532 
533  file >> word;
534  if(word != "Tree:"){
535  errorLog << "loadModelFromFile(string filename) - Could not find the Tree Header!" << std::endl;
536  std::cout << "WORD: " << word << std::endl;
537  std::cout << "Tree i: " << i << std::endl;
538  return false;
539  }
540  file >> treeIndex;
541 
542  if( treeIndex != i+1 ){
543  errorLog << "loadModelFromFile(string filename) - Incorrect tree index: " << treeIndex << std::endl;
544  return false;
545  }
546 
547  file >> word;
548  if(word != "TreeNodeType:"){
549  errorLog << "loadModelFromFile(string filename) - Could not find the TreeNodeType!" << std::endl;
550  std::cout << "WORD: " << word << std::endl;
551  std::cout << "i: " << i << std::endl;
552  return false;
553  }
554  file >> treeNodeType;
555 
556  //Create a new DTree
557  DecisionTreeNode *tree = dynamic_cast< DecisionTreeNode* >( DecisionTreeNode::createInstanceFromString( treeNodeType ) );
558 
559  if( tree == NULL ){
560  errorLog << "loadModelFromFile(fstream &file) - Failed to create new Tree!" << std::endl;
561  return false;
562  }
563 
564  //Load the tree from the file
565  tree->setParent( NULL );
566  if( !tree->loadFromFile( file ) ){
567  errorLog << "loadModelFromFile(fstream &file) - Failed to load tree from file!" << std::endl;
568  return false;
569  }
570 
571  //Add the tree to the forest
572  forest.push_back( tree );
573  }
574  }
575 
576  return true;
577 }
578 
580 
581  if( !getTrained() ){
582  errorLog << "combineModels( const RandomForests &forest ) - This instance has not been trained!" << std::endl;
583  return false;
584  }
585 
586  if( !forest.getTrained() ){
587  errorLog << "combineModels( const RandomForests &forest ) - This external forest instance has not been trained!" << std::endl;
588  return false;
589  }
590 
591  if( this->getNumInputDimensions() != forest.getNumInputDimensions() ) {
592  errorLog << "combineModels( const RandomForests &forest ) - The number of input dimensions of the external forest (";
593  errorLog << forest.getNumInputDimensions() << ") does not match the number of input dimensions of this instance (";
594  errorLog << this->getNumInputDimensions() << ")!" << std::endl;
595  return false;
596  }
597 
598  //Add the trees in the other forest to this model
599  DecisionTreeNode *node;
600  for(UINT i=0; i<forest.getForestSize(); i++){
601  node = forest.getTree(i);
602  if( node ){
603  this->forest.push_back( node->deepCopy() );
604  forestSize++;
605  }
606  }
607 
608  return true;
609 }
610 
612  return forestSize;
613 }
614 
616  return numRandomSplits;
617 }
618 
620  return minNumSamplesPerNode;
621 }
622 
624  return maxDepth;
625 }
626 
628  return trainingMode;
629 }
630 
632  return removeFeaturesAtEachSpilt;
633 }
634 
636  return bootstrappedDatasetWeight;
637 }
638 
640  return forest;
641 }
642 
644 
645  if( decisionTreeNode == NULL ){
646  return NULL;
647  }
648 
649  return decisionTreeNode->deepCopy();
650 }
651 
652 DecisionTreeNode* RandomForests::getTree( const UINT index ) const{
653 
654  if( !trained || index >= forestSize ) return NULL;
655 
656  return forest[ index ];
657 }
658 
659 VectorDouble RandomForests::getFeatureWeights( const bool normWeights ) const{
660 
661  if( !trained ) return VectorDouble();
662 
663  VectorDouble weights( numInputDimensions, 0 );
664 
665  for(UINT i=0; i<forestSize; i++){
666  if( !forest[i]->computeFeatureWeights( weights ) ){
667  warningLog << "getFeatureWeights( const bool normWeights ) - Failed to compute weights for tree: " << i << std::endl;
668  }
669  }
670 
671  //Normalize the weights
672  if( normWeights ){
673  Float sum = Util::sum( weights );
674  if( sum > 0.0 ){
675  const Float norm = 1.0 / sum;
676  for(UINT j=0; j<numInputDimensions; j++){
677  weights[j] *= norm;
678  }
679  }
680  }
681 
682  return weights;
683 }
684 
686 
687  if( !trained ) return MatrixDouble();
688 
689  MatrixDouble weights( getNumClasses(), numInputDimensions );
690  weights.setAllValues(0.0);
691 
692  for(UINT i=0; i<forestSize; i++){
693  if( !forest[i]->computeLeafNodeWeights( weights ) ){
694  warningLog << "computeLeafNodeWeights( const bool normWeights ) - Failed to compute leaf node weights for tree: " << i << std::endl;
695  }
696  }
697 
698  //Normalize the weights
699  if( normWeights ){
700  for(UINT j=0; j<weights.getNumCols(); j++){
701  Float sum = 0.0;
702  for(UINT i=0; i<weights.getNumRows(); i++){
703  sum += weights[i][j];
704  }
705  if( sum != 0.0 ){
706  const Float norm = 1.0 / sum;
707  for(UINT i=0; i<weights.getNumRows(); i++){
708  weights[i][j] *= norm;
709  }
710  }
711  }
712  }
713 
714  return weights;
715 }
716 
717 bool RandomForests::setForestSize(const UINT forestSize){
718  if( forestSize > 0 ){
719  clear();
720  this->forestSize = forestSize;
721  return true;
722  }
723  return false;
724 }
725 
726 bool RandomForests::setNumRandomSplits(const UINT numRandomSplits){
727  if( numRandomSplits > 0 ){
728  this->numRandomSplits = numRandomSplits;
729  return true;
730  }
731  return false;
732 }
733 
734 bool RandomForests::setMinNumSamplesPerNode(const UINT minNumSamplesPerNode){
735  if( minNumSamplesPerNode > 0 ){
736  this->minNumSamplesPerNode = minNumSamplesPerNode;
737  return true;
738  }
739  return false;
740 }
741 
742 bool RandomForests::setMaxDepth(const UINT maxDepth){
743  if( maxDepth > 0 ){
744  this->maxDepth = maxDepth;
745  return true;
746  }
747  return false;
748 }
749 
750 bool RandomForests::setRemoveFeaturesAtEachSpilt(const bool removeFeaturesAtEachSpilt){
751  this->removeFeaturesAtEachSpilt = removeFeaturesAtEachSpilt;
752  return true;
753 }
754 
755 bool RandomForests::setTrainingMode(const UINT trainingMode){
756 
757  if( trainingMode == DecisionTree::BEST_ITERATIVE_SPILT || trainingMode == DecisionTree::BEST_RANDOM_SPLIT ){
758  this->trainingMode = trainingMode;
759  return true;
760  }
761 
762  warningLog << "setTrainingMode(const UINT mode) - Unknown training mode!" << std::endl;
763  return false;
764 }
765 
767 
768  if( decisionTreeNode != NULL ){
769  delete decisionTreeNode;
770  decisionTreeNode = NULL;
771  }
772  this->decisionTreeNode = node.deepCopy();
773 
774  return true;
775 }
776 
777 bool RandomForests::setBootstrappedDatasetWeight( const Float bootstrappedDatasetWeight ){
778 
779  if( bootstrappedDatasetWeight > 0.0 && bootstrappedDatasetWeight <= 1.0 ){
780  this->bootstrappedDatasetWeight = bootstrappedDatasetWeight;
781  return true;
782  }
783 
784  warningLog << "setBootstrappedDatasetWeight(...) - Bad parameter, the weight must be > 0.0 and <= 1.0. Weight: " << bootstrappedDatasetWeight << std::endl;
785  return false;
786 }
787 
788 GRT_END_NAMESPACE
789 
DecisionTreeNode * getTree(const UINT index) const
bool saveBaseSettingsToFile(std::fstream &file) const
Definition: Classifier.cpp:255
virtual bool predict(VectorFloat inputVector)
Definition: MLBase.cpp:112
virtual bool loadModelFromFile(std::fstream &file)
Definition: Timer.h:43
bool setRemoveFeaturesAtEachSpilt(const bool removeFeaturesAtEachSpilt)
Definition: Tree.cpp:156
UINT getForestSize() const
RandomForests(const DecisionTreeNode &decisionTreeNode=DecisionTreeClusterNode(), const UINT forestSize=10, const UINT numRandomSplits=100, const UINT minNumSamplesPerNode=5, const UINT maxDepth=10, const UINT trainingMode=DecisionTree::BEST_RANDOM_SPLIT, const bool removeFeaturesAtEachSpilt=true, const bool useScaling=false, const Float bootstrappedDatasetWeight=0.8)
virtual ~RandomForests(void)
virtual bool saveModelToFile(std::fstream &file) const
bool setBootstrappedDatasetWeight(const Float bootstrappedDatasetWeight)
bool setMinNumSamplesPerNode(const UINT minNumSamplesPerNode)
Definition: Tree.cpp:138
std::string getClassifierType() const
Definition: Classifier.cpp:160
UINT getMaxDepth() const
virtual UINT getNumClasses() const
Definition: Classifier.cpp:190
bool enableScaling(const bool useScaling)
Definition: MLBase.cpp:266
virtual bool resize(const unsigned int size)
Definition: Vector.h:133
virtual bool train_(ClassificationData &trainingData)
std::string getNodeType() const
Definition: Node.cpp:303
bool getTrained() const
Definition: MLBase.cpp:254
bool setMinNumSamplesPerNode(const UINT minNumSamplesPerNode)
bool setNumSplittingSteps(const UINT numSplittingSteps)
Definition: Tree.cpp:129
Vector< UINT > getClassLabels() const
bool combineModels(const RandomForests &forest)
signed long getMilliSeconds()
Definition: Timer.h:117
unsigned int getSize() const
Definition: Vector.h:193
RandomForests & operator=(const RandomForests &rhs)
bool setNumRandomSplits(const UINT numSplittingSteps)
bool setAllValues(const T &value)
Definition: Matrix.h:336
virtual bool train_(ClassificationData &trainingData)
MatrixDouble getLeafNodeFeatureWeights(const bool normWeights=true) const
DecisionTreeNode * deepCopyTree() const
UINT getNumSamples() const
bool setForestSize(const UINT forestSize)
bool setValidationSetSize(const UINT validationSetSize)
Definition: MLBase.cpp:299
virtual bool saveToFile(std::fstream &file) const
Definition: Node.cpp:139
bool setMaxDepth(const UINT maxDepth)
UINT getNumRandomSplits() const
virtual bool deepCopyFrom(const Classifier *classifier)
Float getBootstrappedDatasetWeight() const
virtual bool loadFromFile(std::fstream &file)
Definition: Node.cpp:181
bool copyBaseVariables(const Classifier *classifier)
Definition: Classifier.cpp:92
bool loadBaseSettingsFromFile(std::fstream &file)
Definition: Classifier.cpp:302
Float getValidationSetAccuracy() const
Definition: MLBase.cpp:242
bool setMaxDepth(const UINT maxDepth)
Definition: Tree.cpp:147
unsigned int getNumRows() const
Definition: Matrix.h:542
UINT getNumDimensions() const
UINT getNumClasses() const
unsigned int getNumCols() const
Definition: Matrix.h:549
bool setTrainingMode(const UINT trainingMode)
UINT getMinNumSamplesPerNode() const
bool setDecisionTreeNode(const DecisionTreeNode &node)
bool start()
Definition: Timer.h:64
virtual bool predict_(VectorDouble &inputVector)
DecisionTreeNode * deepCopy() const
Vector< MinMax > getRanges() const
VectorDouble getFeatureWeights(const bool normWeights=true) const
const Vector< DecisionTreeNode * > & getForest() const
VectorFloat getValidationSetPrecision() const
Definition: MLBase.cpp:246
bool enableNullRejection(bool useNullRejection)
Definition: Classifier.cpp:230
bool getRemoveFeaturesAtEachSpilt() const
static Node * createInstanceFromString(std::string const &nodeType)
Definition: Node.cpp:28
bool setTrainingMode(const UINT trainingMode)
Definition: Tree.cpp:120
bool setDecisionTreeNode(const DecisionTreeNode &node)
UINT getNumInputDimensions() const
Definition: MLBase.cpp:207
virtual bool clear()
ClassificationData getBootstrappedDataset(UINT numSamples=0, bool balanceDataset=false) const
bool setUseValidationSet(const bool useValidationSet)
Definition: MLBase.cpp:311
bool scale(const Float minTarget, const Float maxTarget)
virtual bool clear()
Definition: Classifier.cpp:141
virtual bool print() const
VectorFloat getValidationSetRecall() const
Definition: MLBase.cpp:250
static Float sum(const VectorFloat &x)
Definition: Util.cpp:170
bool setRemoveFeaturesAtEachSpilt(const bool removeFeaturesAtEachSpilt)
UINT getTrainingMode() const
DecisionTreeNode * deepCopyDecisionTreeNode() const