GestureRecognitionToolkit  Version: 0.2.5
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
RandomForests.cpp
1 /*
2 GRT MIT License
3 Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>
4 
5 Permission is hereby granted, free of charge, to any person obtaining a copy of this software
6 and associated documentation files (the "Software"), to deal in the Software without restriction,
7 including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in all copies or substantial
12 portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
15 LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
18 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 */
20 
21 #define GRT_DLL_EXPORTS
22 #include "RandomForests.h"
23 
24 GRT_BEGIN_NAMESPACE
25 
26 //Define the string that will be used to identify the object
27 const std::string RandomForests::id = "RandomForests";
28 std::string RandomForests::getId() { return RandomForests::id; }
29 
30 //Register the RandomForests module with the Classifier base class
31 RegisterClassifierModule< RandomForests > RandomForests::registerModule( RandomForests::getId() );
32 
33 RandomForests::RandomForests(const DecisionTreeNode &decisionTreeNode,const UINT forestSize,const UINT numRandomSplits,const UINT minNumSamplesPerNode,const UINT maxDepth,const Tree::TrainingMode trainingMode,const bool removeFeaturesAtEachSplit,const bool useScaling,const Float bootstrappedDatasetWeight) : Classifier( RandomForests::getId() )
34 {
35  this->decisionTreeNode = dynamic_cast<DecisionTreeNode*>(decisionTreeNode.deepCopy());
36  this->forestSize = forestSize;
37  this->numRandomSplits = numRandomSplits;
38  this->minNumSamplesPerNode = minNumSamplesPerNode;
39  this->maxDepth = maxDepth;
40  this->trainingMode = trainingMode;
41  this->removeFeaturesAtEachSplit = removeFeaturesAtEachSplit;
42  this->useScaling = useScaling;
43  this->bootstrappedDatasetWeight = bootstrappedDatasetWeight;
44  classifierMode = STANDARD_CLASSIFIER_MODE;
45  useNullRejection = false;
46  supportsNullRejection = false;
47  useValidationSet = true;
48  validationSetSize = 20;
49 }
50 
52 {
53  this->decisionTreeNode = NULL;
54  classifierMode = STANDARD_CLASSIFIER_MODE;
55  *this = rhs;
56 }
57 
59 {
60  clear();
61 
62  if( decisionTreeNode != NULL ){
63  this->decisionTreeNode->clear();
64  delete decisionTreeNode;
65  decisionTreeNode = NULL;
66  }
67 }
68 
70  if( this != &rhs ){
71  //Clear this tree
72  clear();
73 
74  //Copy the base classifier variables
75  if( copyBaseVariables( (Classifier*)&rhs ) ){
76 
77  //Deep copy the main node
78  if( this->decisionTreeNode != NULL ){
79  this->decisionTreeNode->clear();
80  delete decisionTreeNode;
81  decisionTreeNode = NULL;
82  }
83  this->decisionTreeNode = rhs.deepCopyDecisionTreeNode();
84 
85  if( rhs.getTrained() ){
86  //Deep copy the forest
87  for(UINT i=0; i<rhs.forest.size(); i++){
88  this->forest.push_back( dynamic_cast<DecisionTreeNode*>(rhs.forest[i]->deepCopy()) );
89  }
90  }
91 
92  this->forestSize = rhs.forestSize;
93  this->numRandomSplits = rhs.numRandomSplits;
94  this->minNumSamplesPerNode = rhs.minNumSamplesPerNode;
95  this->maxDepth = rhs.maxDepth;
96  this->removeFeaturesAtEachSplit = rhs.removeFeaturesAtEachSplit;
97  this->bootstrappedDatasetWeight = rhs.bootstrappedDatasetWeight;
98  this->trainingMode = rhs.trainingMode;
99 
100  }else errorLog << __GRT_LOG__ << " Failed to copy base variables!" << std::endl;
101  }
102  return *this;
103 }
104 
105 bool RandomForests::deepCopyFrom(const Classifier *classifier){
106 
107  if( classifier == NULL ) return false;
108 
109  if( this->getId() == classifier->getId() ){
110 
111  const RandomForests *ptr = dynamic_cast<const RandomForests*>(classifier);
112 
113  //Clear this tree
114  this->clear();
115 
116  if( copyBaseVariables( classifier ) ){
117 
118  //Deep copy the main node
119  if( this->decisionTreeNode != NULL ){
120  delete decisionTreeNode;
121  decisionTreeNode = NULL;
122  }
123  this->decisionTreeNode = ptr->deepCopyDecisionTreeNode();
124 
125  if( ptr->getTrained() ){
126  //Deep copy the forest
127  this->forest.reserve( ptr->forest.getSize() );
128  for(UINT i=0; i<ptr->forest.getSize(); i++){
129  this->forest.push_back( dynamic_cast<DecisionTreeNode*>(ptr->forest[i]->deepCopy()) );
130  }
131  }
132 
133  this->forestSize = ptr->forestSize;
134  this->numRandomSplits = ptr->numRandomSplits;
135  this->minNumSamplesPerNode = ptr->minNumSamplesPerNode;
136  this->maxDepth = ptr->maxDepth;
137  this->removeFeaturesAtEachSplit = ptr->removeFeaturesAtEachSplit;
138  this->bootstrappedDatasetWeight = ptr->bootstrappedDatasetWeight;
139  this->trainingMode = ptr->trainingMode;
140 
141  return true;
142  }
143 
144  errorLog << __GRT_LOG__ << " Failed to copy base variables!" << std::endl;
145  }
146  return false;
147 }
148 
150 
151  //Clear any previous model
152  clear();
153 
154  const unsigned int M = trainingData.getNumSamples();
155  const unsigned int N = trainingData.getNumDimensions();
156  const unsigned int K = trainingData.getNumClasses();
157 
158  if( M == 0 ){
159  errorLog << __GRT_LOG__ << " Training data has zero samples!" << std::endl;
160  return false;
161  }
162 
163  if( bootstrappedDatasetWeight <= 0.0 || bootstrappedDatasetWeight > 1.0 ){
164  errorLog << __GRT_LOG__ << " Bootstrapped Dataset Weight must be [> 0.0 and <= 1.0]" << std::endl;
165  return false;
166  }
167 
168  numInputDimensions = N;
169  numOutputDimensions = K;
170  numClasses = K;
171  classLabels = trainingData.getClassLabels();
172  ranges = trainingData.getRanges();
173 
174  //Scale the training data if needed
175  if( useScaling ){
176  //Scale the training data between 0 and 1
177  trainingData.scale(0, 1);
178  }
179 
180  if( useValidationSet ){
181  validationSetAccuracy = 0;
182  validationSetPrecision.resize( useNullRejection ? K+1 : K, 0 );
183  validationSetRecall.resize( useNullRejection ? K+1 : K, 0 );
184  }
185 
186  //Flag that the main algorithm has been trained encase we need to trigger any callbacks
187  trained = true;
188 
189  //Train the random forest
190  forest.reserve( forestSize );
191 
192  for(UINT i=0; i<forestSize; i++){
193 
194  //Get a balanced bootstrapped dataset
195  UINT datasetSize = (UINT)floor(trainingData.getNumSamples() * bootstrappedDatasetWeight);
196  ClassificationData data = trainingData.getBootstrappedDataset( datasetSize, true );
197 
198  Timer timer;
199  timer.start();
200 
201  DecisionTree tree;
202  tree.setDecisionTreeNode( *decisionTreeNode );
203  tree.enableScaling( false ); //We have already scaled the training data so we do not need to scale it again
204  tree.setUseValidationSet( useValidationSet );
205  tree.setValidationSetSize( validationSetSize );
206  tree.setTrainingMode( trainingMode );
207  tree.setNumSplittingSteps( numRandomSplits );
208  tree.setMinNumSamplesPerNode( minNumSamplesPerNode );
209  tree.setMaxDepth( maxDepth );
210  tree.enableNullRejection( useNullRejection );
211  tree.setRemoveFeaturesAtEachSplit( removeFeaturesAtEachSplit );
212 
213  trainingLog << "Training decision tree " << i+1 << "/" << forestSize << "..." << std::endl;
214 
215  //Train this tree
216  if( !tree.train_( data ) ){
217  errorLog << __GRT_LOG__ << " Failed to train tree at forest index: " << i << std::endl;
218  clear();
219  return false;
220  }
221 
222  Float computeTime = timer.getMilliSeconds();
223  trainingLog << "Decision tree trained in " << (computeTime*0.001)/60.0 << " minutes" << std::endl;
224 
225  if( useValidationSet ){
226  Float forestNorm = 1.0 / forestSize;
227  validationSetAccuracy += tree.getValidationSetAccuracy();
228  VectorFloat precision = tree.getValidationSetPrecision();
229  VectorFloat recall = tree.getValidationSetRecall();
230 
231  grt_assert( precision.getSize() == validationSetPrecision.getSize() );
232  grt_assert( recall.getSize() == validationSetRecall.getSize() );
233 
234  for(UINT i=0; i<validationSetPrecision.getSize(); i++){
235  validationSetPrecision[i] += precision[i] * forestNorm;
236  }
237 
238  for(UINT i=0; i<validationSetRecall.getSize(); i++){
239  validationSetRecall[i] += recall[i] * forestNorm;
240  }
241  }
242 
243  //Deep copy the tree into the forest
244  forest.push_back( tree.deepCopyTree() );
245  }
246 
247  //Flag that the models have been trained
248  trained = true;
249  converged = true;
250 
251  //Compute the final training stats
252  trainingSetAccuracy = 0;
253 
254  //If scaling was on, then the data will already be scaled, so turn it off temporially so we can test the model accuracy
255  bool scalingState = useScaling;
256  useScaling = false;
257  if( !computeAccuracy( trainingData, trainingSetAccuracy ) ){
258  trained = false;
259  converged = true;
260  errorLog << __GRT_LOG__ << " Failed to compute training set accuracy! Failed to fully train model!" << std::endl;
261  return false;
262  }
263 
264  trainingLog << "Training set accuracy: " << trainingSetAccuracy << std::endl;
265 
266  //Reset the scaling state for future prediction
267  useScaling = scalingState;
268 
269  if( useValidationSet ){
270  validationSetAccuracy /= forestSize;
271  trainingLog << "Validation set accuracy: " << validationSetAccuracy << std::endl;
272 
273  trainingLog << "Validation set precision: ";
274  for(UINT i=0; i<validationSetPrecision.getSize(); i++){
275  trainingLog << validationSetPrecision[i] << " ";
276  }
277  trainingLog << std::endl;
278 
279  trainingLog << "Validation set recall: ";
280  for(UINT i=0; i<validationSetRecall.getSize(); i++){
281  trainingLog << validationSetRecall[i] << " ";
282  }
283  trainingLog << std::endl;
284  }
285 
286  return true;
287 }
288 
290 
291  predictedClassLabel = 0;
292  maxLikelihood = 0;
293 
294  if( !trained ){
295  errorLog << __GRT_LOG__ << " Model Not Trained!" << std::endl;
296  return false;
297  }
298 
299  if( inputVector.getSize() != numInputDimensions ){
300  errorLog << __GRT_LOG__ << " The size of the input Vector (" << inputVector.getSize() << ") does not match the num features in the model (" << numInputDimensions << std::endl;
301  return false;
302  }
303 
304  if( useScaling ){
305  for(UINT n=0; n<numInputDimensions; n++){
306  inputVector[n] = grt_scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0.0, 1.0);
307  }
308  }
309 
310  if( classLikelihoods.getSize() != numClasses ) classLikelihoods.resize(numClasses,0);
311  if( classDistances.getSize() != numClasses ) classDistances.resize(numClasses,0);
312 
313  std::fill(classDistances.begin(),classDistances.end(),0);
314 
315  //Run the prediction for each tree in the forest
316  VectorDouble y;
317  for(UINT i=0; i<forestSize; i++){
318  if( !forest[i]->predict_(inputVector, y) ){
319  errorLog << __GRT_LOG__ << " Tree " << i << " failed prediction!" << std::endl;
320  return false;
321  }
322 
323  for(UINT j=0; j<numClasses; j++){
324  classDistances[j] += y[j];
325  }
326  }
327 
328  //Use the class distances to estimate the class likelihoods
329  bestDistance = 0;
330  UINT bestIndex = 0;
331  Float classNorm = 1.0 / Float(forestSize);
332  for(UINT k=0; k<numClasses; k++){
333  classLikelihoods[k] = classDistances[k] * classNorm;
334 
335  if( classLikelihoods[k] > maxLikelihood ){
336  maxLikelihood = classLikelihoods[k];
337  bestDistance = classDistances[k];
338  bestIndex = k;
339  }
340  }
341 
342  predictedClassLabel = classLabels[ bestIndex ];
343 
344  return true;
345 }
346 
348 
349  //Call the classifiers clear function
351 
352  //Delete the forest
353  for(UINT i=0; i<forest.getSize(); i++){
354  if( forest[i] != NULL ){
355  forest[i]->clear();
356  delete forest[i];
357  forest[i] = NULL;
358  }
359  }
360  forest.clear();
361 
362  if( this->decisionTreeNode ){
363  this->decisionTreeNode->clear();
364  //Note, we do not free the decisionTreeNode here, as we need to keep this for the lifecycle of the RF instance
365  }
366 
367  return true;
368 }
369 
370 bool RandomForests::print() const{
371 
372  std::cout << "RandomForest\n";
373  std::cout << "ForestSize: " << forestSize << std::endl;
374  std::cout << "NumSplittingSteps: " << numRandomSplits << std::endl;
375  std::cout << "MinNumSamplesPerNode: " << minNumSamplesPerNode << std::endl;
376  std::cout << "MaxDepth: " << maxDepth << std::endl;
377  std::cout << "RemoveFeaturesAtEachSpilt: " << removeFeaturesAtEachSplit << std::endl;
378  std::cout << "TrainingMode: " << trainingMode << std::endl;
379  std::cout << "ForestBuilt: " << (trained ? 1 : 0) << std::endl;
380 
381  if( trained ){
382  std::cout << "Forest:\n";
383  for(UINT i=0; i<forestSize; i++){
384  std::cout << "Tree: " << i+1 << std::endl;
385  forest[i]->print();
386  }
387  }
388 
389  return true;
390 }
391 
392 bool RandomForests::save( std::fstream &file ) const{
393 
394  if(!file.is_open())
395  {
396  errorLog << __GRT_LOG__ << " The file is not open!" << std::endl;
397  return false;
398  }
399 
400  //Write the header info
401  file << "GRT_RANDOM_FOREST_MODEL_FILE_V1.0\n";
402 
403  //Write the classifier settings to the file
405  errorLog << __GRT_LOG__ << " Failed to save classifier base settings to file!" << std::endl;
406  return false;
407  }
408 
409  if( decisionTreeNode != NULL ){
410  file << "DecisionTreeNodeType: " << decisionTreeNode->getNodeType() << std::endl;
411  if( !decisionTreeNode->save( file ) ){
412  errorLog << __GRT_LOG__ << " Failed to save decisionTreeNode settings to file!" << std::endl;
413  return false;
414  }
415  }else{
416  file << "DecisionTreeNodeType: " << "NULL" << std::endl;
417  }
418 
419  file << "ForestSize: " << forestSize << std::endl;
420  file << "NumSplittingSteps: " << numRandomSplits << std::endl;
421  file << "MinNumSamplesPerNode: " << minNumSamplesPerNode << std::endl;
422  file << "MaxDepth: " << maxDepth << std::endl;
423  file << "RemoveFeaturesAtEachSpilt: " << removeFeaturesAtEachSplit << std::endl;
424  file << "TrainingMode: " << trainingMode << std::endl;
425  file << "ForestBuilt: " << (trained ? 1 : 0) << std::endl;
426 
427  if( trained ){
428  file << "Forest:\n";
429  for(UINT i=0; i<forestSize; i++){
430  file << "Tree: " << i+1 << std::endl;
431  file << "TreeNodeType: " << forest[i]->getNodeType() << std::endl;
432  if( !forest[i]->save( file ) ){
433  errorLog << __GRT_LOG__ << " Failed to save tree " << i << " to file!" << std::endl;
434  return false;
435  }
436  }
437  }
438 
439  return true;
440 }
441 
442 bool RandomForests::load( std::fstream &file ){
443 
444  clear();
445 
446  if(!file.is_open())
447  {
448  errorLog << __GRT_LOG__ << " Could not open file to load model" << std::endl;
449  return false;
450  }
451 
452  std::string word;
453  std::string treeNodeType;
454 
455  file >> word;
456 
457  //Find the file type header
458  if(word != "GRT_RANDOM_FOREST_MODEL_FILE_V1.0"){
459  errorLog << __GRT_LOG__ << " Could not find Model File Header" << std::endl;
460  return false;
461  }
462 
463  //Load the base settings from the file
465  errorLog << __GRT_LOG__ << " Failed to load base settings from file!" << std::endl;
466  return false;
467  }
468 
469  file >> word;
470  if(word != "DecisionTreeNodeType:"){
471  errorLog << __GRT_LOG__ << " Could not find the DecisionTreeNodeType!" << std::endl;
472  return false;
473  }
474  file >> treeNodeType;
475 
476  if( treeNodeType != "NULL" ){
477 
478  decisionTreeNode = dynamic_cast< DecisionTreeNode* >( DecisionTreeNode::createInstanceFromString( treeNodeType ) );
479 
480  if( decisionTreeNode == NULL ){
481  errorLog << __GRT_LOG__ << " Could not create new DecisionTreeNode from type: " << treeNodeType << std::endl;
482  return false;
483  }
484 
485  if( !decisionTreeNode->load( file ) ){
486  errorLog << __GRT_LOG__ << " Failed to load decisionTreeNode settings from file!" << std::endl;
487  return false;
488  }
489  }else{
490  errorLog << __GRT_LOG__ << " Failed to load decisionTreeNode! DecisionTreeNodeType is NULL!" << std::endl;
491  return false;
492  }
493 
494  file >> word;
495  if(word != "ForestSize:"){
496  errorLog << __GRT_LOG__ << " Could not find the ForestSize!" << std::endl;
497  return false;
498  }
499  file >> forestSize;
500 
501  file >> word;
502  if(word != "NumSplittingSteps:"){
503  errorLog << __GRT_LOG__ << " Could not find the NumSplittingSteps!" << std::endl;
504  return false;
505  }
506  file >> numRandomSplits;
507 
508  file >> word;
509  if(word != "MinNumSamplesPerNode:"){
510  errorLog << __GRT_LOG__ << " Could not find the MinNumSamplesPerNode!" << std::endl;
511  return false;
512  }
513  file >> minNumSamplesPerNode;
514 
515  file >> word;
516  if(word != "MaxDepth:"){
517  errorLog << __GRT_LOG__ << " Could not find the MaxDepth!" << std::endl;
518  return false;
519  }
520  file >> maxDepth;
521 
522  file >> word;
523  if(word != "RemoveFeaturesAtEachSpilt:"){
524  errorLog << __GRT_LOG__ << " Could not find the RemoveFeaturesAtEachSpilt!" << std::endl;
525  return false;
526  }
527  file >> removeFeaturesAtEachSplit;
528 
529  file >> word;
530  if(word != "TrainingMode:"){
531  errorLog << __GRT_LOG__ << " Could not find the TrainingMode!" << std::endl;
532  return false;
533  }
534  UINT trainingModeTmp;
535  file >> trainingModeTmp;
536  trainingMode = (Tree::TrainingMode)trainingModeTmp;
537 
538  file >> word;
539  if(word != "ForestBuilt:"){
540  errorLog << __GRT_LOG__ << " Could not find the ForestBuilt!" << std::endl;
541  return false;
542  }
543  file >> trained;
544 
545  if( trained ){
546  //Find the forest header
547  file >> word;
548  if(word != "Forest:"){
549  errorLog << __GRT_LOG__ << " Could not find the Forest!" << std::endl;
550  return false;
551  }
552 
553  //Load each tree
554  UINT treeIndex;
555  forest.reserve( forestSize );
556  for(UINT i=0; i<forestSize; i++){
557 
558  file >> word;
559  if(word != "Tree:"){
560  errorLog << __GRT_LOG__ << " Could not find the Tree Header!" << std::endl;
561  std::cout << "WORD: " << word << std::endl;
562  std::cout << "Tree i: " << i << std::endl;
563  return false;
564  }
565  file >> treeIndex;
566 
567  if( treeIndex != i+1 ){
568  errorLog << __GRT_LOG__ << " Incorrect tree index: " << treeIndex << std::endl;
569  return false;
570  }
571 
572  file >> word;
573  if(word != "TreeNodeType:"){
574  errorLog << "load(string filename) - Could not find the TreeNodeType!" << std::endl;
575  std::cout << "WORD: " << word << std::endl;
576  std::cout << "i: " << i << std::endl;
577  return false;
578  }
579  file >> treeNodeType;
580 
581  //Create a new DTree
582  DecisionTreeNode *tree = dynamic_cast< DecisionTreeNode* >( DecisionTreeNode::createInstanceFromString( treeNodeType ) );
583 
584  if( tree == NULL ){
585  errorLog << "load(fstream &file) - Failed to create new Tree!" << std::endl;
586  return false;
587  }
588 
589  //Load the tree from the file
590  tree->setParent( NULL );
591  if( !tree->load( file ) ){
592  errorLog << "load(fstream &file) - Failed to load tree from file!" << std::endl;
593  return false;
594  }
595 
596  //Add the tree to the forest
597  forest.push_back( tree );
598  }
599  }
600 
601  return true;
602 }
603 
605 
606  if( !getTrained() ){
607  errorLog << "combineModels( const RandomForests &forest ) - This instance has not been trained!" << std::endl;
608  return false;
609  }
610 
611  if( !forest.getTrained() ){
612  errorLog << "combineModels( const RandomForests &forest ) - This external forest instance has not been trained!" << std::endl;
613  return false;
614  }
615 
616  if( this->getNumInputDimensions() != forest.getNumInputDimensions() ) {
617  errorLog << "combineModels( const RandomForests &forest ) - The number of input dimensions of the external forest (";
618  errorLog << forest.getNumInputDimensions() << ") does not match the number of input dimensions of this instance (";
619  errorLog << this->getNumInputDimensions() << ")!" << std::endl;
620  return false;
621  }
622 
623  //Add the trees in the other forest to this model
624  DecisionTreeNode *node;
625  for(UINT i=0; i<forest.getForestSize(); i++){
626  node = forest.getTree(i);
627  if( node ){
628  this->forest.push_back( dynamic_cast<DecisionTreeNode*>(node->deepCopy()) );
629  forestSize++;
630  }
631  }
632 
633  return true;
634 }
635 
637  return forestSize;
638 }
639 
641  return numRandomSplits;
642 }
643 
645  return minNumSamplesPerNode;
646 }
647 
649  return maxDepth;
650 }
651 
653  return trainingMode;
654 }
655 
657  return removeFeaturesAtEachSplit;
658 }
659 
661  return bootstrappedDatasetWeight;
662 }
663 
665  return forest;
666 }
667 
669 
670  if( decisionTreeNode == NULL ){
671  return NULL;
672  }
673 
674  return dynamic_cast<DecisionTreeNode*>(decisionTreeNode->deepCopy());
675 }
676 
677 DecisionTreeNode* RandomForests::getTree( const UINT index ) const{
678 
679  if( !trained || index >= forestSize ) return NULL;
680 
681  return forest[ index ];
682 }
683 
684 VectorDouble RandomForests::getFeatureWeights( const bool normWeights ) const{
685 
686  if( !trained ) return VectorDouble();
687 
688  VectorDouble weights( numInputDimensions, 0 );
689 
690  for(UINT i=0; i<forestSize; i++){
691  if( !forest[i]->computeFeatureWeights( weights ) ){
692  warningLog << __GRT_LOG__ << " Failed to compute weights for tree: " << i << std::endl;
693  }
694  }
695 
696  //Normalize the weights
697  if( normWeights ){
698  Float sum = Util::sum( weights );
699  if( sum > 0.0 ){
700  const Float norm = 1.0 / sum;
701  for(UINT j=0; j<numInputDimensions; j++){
702  weights[j] *= norm;
703  }
704  }
705  }
706 
707  return weights;
708 }
709 
711 
712  if( !trained ) return MatrixDouble();
713 
714  MatrixDouble weights( getNumClasses(), numInputDimensions );
715  weights.setAllValues(0.0);
716 
717  for(UINT i=0; i<forestSize; i++){
718  if( !forest[i]->computeLeafNodeWeights( weights ) ){
719  warningLog << __GRT_LOG__ << " Failed to compute leaf node weights for tree: " << i << std::endl;
720  }
721  }
722 
723  //Normalize the weights
724  if( normWeights ){
725  for(UINT j=0; j<weights.getNumCols(); j++){
726  Float sum = 0.0;
727  for(UINT i=0; i<weights.getNumRows(); i++){
728  sum += weights[i][j];
729  }
730  if( sum != 0.0 ){
731  const Float norm = 1.0 / sum;
732  for(UINT i=0; i<weights.getNumRows(); i++){
733  weights[i][j] *= norm;
734  }
735  }
736  }
737  }
738 
739  return weights;
740 }
741 
742 bool RandomForests::setForestSize(const UINT forestSize){
743  if( forestSize > 0 ){
744  clear();
745  this->forestSize = forestSize;
746  return true;
747  }
748  return false;
749 }
750 
751 bool RandomForests::setNumRandomSplits(const UINT numRandomSplits){
752  if( numRandomSplits > 0 ){
753  this->numRandomSplits = numRandomSplits;
754  return true;
755  }
756  return false;
757 }
758 
759 bool RandomForests::setMinNumSamplesPerNode(const UINT minNumSamplesPerNode){
760  if( minNumSamplesPerNode > 0 ){
761  this->minNumSamplesPerNode = minNumSamplesPerNode;
762  return true;
763  }
764  return false;
765 }
766 
767 bool RandomForests::setMaxDepth(const UINT maxDepth){
768  if( maxDepth > 0 ){
769  this->maxDepth = maxDepth;
770  return true;
771  }
772  return false;
773 }
774 
775 bool RandomForests::setRemoveFeaturesAtEachSplit(const bool removeFeaturesAtEachSplit){
776  this->removeFeaturesAtEachSplit = removeFeaturesAtEachSplit;
777  return true;
778 }
779 
780 bool RandomForests::setRemoveFeaturesAtEachSpilt(const bool removeFeaturesAtEachSpilt){
781  return setRemoveFeaturesAtEachSplit(removeFeaturesAtEachSpilt);
782 }
783 
784 bool RandomForests::setTrainingMode(const Tree::TrainingMode trainingMode){
785  if( trainingMode == Tree::BEST_ITERATIVE_SPILT || trainingMode == Tree::BEST_RANDOM_SPLIT ){
786  this->trainingMode = trainingMode;
787  return true;
788  }
789  warningLog << __GRT_LOG__ << " Unknown training mode!" << std::endl;
790  return false;
791 }
792 
794 
795  if( decisionTreeNode != NULL ){
796  delete decisionTreeNode;
797  decisionTreeNode = NULL;
798  }
799  this->decisionTreeNode = dynamic_cast<DecisionTreeNode*>(node.deepCopy());
800 
801  return true;
802 }
803 
804 bool RandomForests::setBootstrappedDatasetWeight( const Float bootstrappedDatasetWeight ){
805 
806  if( bootstrappedDatasetWeight > 0.0 && bootstrappedDatasetWeight <= 1.0 ){
807  this->bootstrappedDatasetWeight = bootstrappedDatasetWeight;
808  return true;
809  }
810 
811  warningLog << __GRT_LOG__ << " Bad parameter, the weight must be > 0.0 and <= 1.0. Weight: " << bootstrappedDatasetWeight << std::endl;
812  return false;
813 }
814 
815 GRT_END_NAMESPACE
DecisionTreeNode * getTree(const UINT index) const
bool saveBaseSettingsToFile(std::fstream &file) const
Definition: Classifier.cpp:274
bool setMinNumSamplesPerNode(const UINT minNumSamplesPerNode)
std::string getId() const
Definition: GRTBase.cpp:85
Definition: Timer.h:43
virtual bool clear() override
UINT getForestSize() const
virtual ~RandomForests(void)
bool setRemoveFeaturesAtEachSplit(const bool removeFeaturesAtEachSplit)
bool setBootstrappedDatasetWeight(const Float bootstrappedDatasetWeight)
virtual bool save(std::fstream &file) const
UINT getMaxDepth() const
virtual UINT getNumClasses() const
Definition: Classifier.cpp:209
bool enableScaling(const bool useScaling)
Definition: MLBase.cpp:308
virtual bool resize(const unsigned int size)
Definition: Vector.h:133
std::string getNodeType() const
Definition: Node.cpp:300
bool setRemoveFeaturesAtEachSplit(const bool removeFeaturesAtEachSplit)
bool setTrainingMode(const Tree::TrainingMode trainingMode)
bool getTrained() const
Definition: MLBase.cpp:294
bool setMinNumSamplesPerNode(const UINT minNumSamplesPerNode)
UINT getSize() const
Definition: Vector.h:201
Vector< UINT > getClassLabels() const
virtual Node * deepCopy() const override
bool combineModels(const RandomForests &forest)
signed long getMilliSeconds()
Definition: Timer.h:117
RandomForests & operator=(const RandomForests &rhs)
static std::string getId()
bool setNumRandomSplits(const UINT numSplittingSteps)
bool setAllValues(const T &value)
Definition: Matrix.h:366
virtual bool train_(ClassificationData &trainingData)
virtual bool computeAccuracy(const ClassificationData &data, Float &accuracy)
Definition: Classifier.cpp:171
MatrixDouble getLeafNodeFeatureWeights(const bool normWeights=true) const
DecisionTreeNode * deepCopyTree() const
UINT getNumSamples() const
RandomForests(const DecisionTreeNode &decisionTreeNode=DecisionTreeClusterNode(), const UINT forestSize=10, const UINT numRandomSplits=100, const UINT minNumSamplesPerNode=5, const UINT maxDepth=10, const Tree::TrainingMode trainingMode=Tree::BEST_RANDOM_SPLIT, const bool removeFeaturesAtEachSplit=true, const bool useScaling=false, const Float bootstrappedDatasetWeight=0.8)
bool setForestSize(const UINT forestSize)
bool setValidationSetSize(const UINT validationSetSize)
Definition: MLBase.cpp:361
bool setMaxDepth(const UINT maxDepth)
UINT getNumRandomSplits() const
virtual bool deepCopyFrom(const Classifier *classifier)
Float getBootstrappedDatasetWeight() const
bool setMaxDepth(const UINT maxDepth)
bool setNumSplittingSteps(const UINT numSplittingSteps)
bool copyBaseVariables(const Classifier *classifier)
Definition: Classifier.cpp:101
bool loadBaseSettingsFromFile(std::fstream &file)
Definition: Classifier.cpp:321
virtual bool save(std::fstream &file) const override
Definition: Node.cpp:136
Float getValidationSetAccuracy() const
Definition: MLBase.cpp:282
unsigned int getNumRows() const
Definition: Matrix.h:574
UINT getNumDimensions() const
UINT getNumClasses() const
unsigned int getNumCols() const
Definition: Matrix.h:581
UINT getMinNumSamplesPerNode() const
bool setDecisionTreeNode(const DecisionTreeNode &node)
bool start()
Definition: Timer.h:64
virtual bool predict_(VectorDouble &inputVector)
Vector< MinMax > getRanges() const
VectorDouble getFeatureWeights(const bool normWeights=true) const
const Vector< DecisionTreeNode * > & getForest() const
VectorFloat getValidationSetPrecision() const
Definition: MLBase.cpp:286
bool getRemoveFeaturesAtEachSplit() const
virtual bool load(std::fstream &file)
virtual bool train_(ClassificationData &trainingData) override
static Node * createInstanceFromString(std::string const &nodeType)
Definition: Node.cpp:29
virtual bool load(std::fstream &file) override
Definition: Node.cpp:178
ClassificationData getBootstrappedDataset(const UINT numSamples=0, const bool balanceDataset=false) const
bool setDecisionTreeNode(const DecisionTreeNode &node)
UINT getNumInputDimensions() const
Definition: MLBase.cpp:231
virtual bool clear()
bool setUseValidationSet(const bool useValidationSet)
Definition: MLBase.cpp:373
bool enableNullRejection(const bool useNullRejection)
Definition: Classifier.cpp:249
bool scale(const Float minTarget, const Float maxTarget)
virtual bool clear()
Definition: Classifier.cpp:151
virtual bool print() const
VectorFloat getValidationSetRecall() const
Definition: MLBase.cpp:290
This is the main base class that all GRT Classification algorithms should inherit from...
Definition: Classifier.h:41
static Float sum(const VectorFloat &x)
Definition: Util.cpp:178
UINT getTrainingMode() const
DecisionTreeNode * deepCopyDecisionTreeNode() const
bool setTrainingMode(const Tree::TrainingMode trainingMode)