21 #include "RandomForests.h"
28 RandomForests::RandomForests(
const DecisionTreeNode &decisionTreeNode,
const UINT forestSize,
const UINT numRandomSplits,
const UINT minNumSamplesPerNode,
const UINT maxDepth,
const UINT trainingMode,
const bool removeFeaturesAtEachSpilt,
const bool useScaling,
const Float bootstrappedDatasetWeight)
30 this->decisionTreeNode = decisionTreeNode.
deepCopy();
31 this->forestSize = forestSize;
32 this->numRandomSplits = numRandomSplits;
33 this->minNumSamplesPerNode = minNumSamplesPerNode;
34 this->maxDepth = maxDepth;
35 this->trainingMode = trainingMode;
36 this->removeFeaturesAtEachSpilt = removeFeaturesAtEachSpilt;
37 this->useScaling = useScaling;
38 this->bootstrappedDatasetWeight = bootstrappedDatasetWeight;
39 classType =
"RandomForests";
40 classifierType = classType;
41 classifierMode = STANDARD_CLASSIFIER_MODE;
42 useNullRejection =
false;
43 supportsNullRejection =
false;
44 useValidationSet =
true;
45 validationSetSize = 20;
46 debugLog.setProceedingText(
"[DEBUG RandomForests]");
47 errorLog.setProceedingText(
"[ERROR RandomForests]");
48 trainingLog.setProceedingText(
"[TRAINING RandomForests]");
49 warningLog.setProceedingText(
"[WARNING RandomForests]");
53 this->decisionTreeNode = NULL;
54 classType =
"RandomForests";
55 classifierType = classType;
56 classifierMode = STANDARD_CLASSIFIER_MODE;
57 debugLog.setProceedingText(
"[DEBUG RandomForests]");
58 errorLog.setProceedingText(
"[ERROR RandomForests]");
59 trainingLog.setProceedingText(
"[TRAINING RandomForests]");
60 warningLog.setProceedingText(
"[WARNING RandomForests]");
68 if( decisionTreeNode != NULL ){
69 delete decisionTreeNode;
70 decisionTreeNode = NULL;
83 if( this->decisionTreeNode != NULL ){
84 delete decisionTreeNode;
85 decisionTreeNode = NULL;
91 for(UINT i=0; i<rhs.forest.size(); i++){
92 this->forest.push_back( rhs.forest[i]->deepCopy() );
96 this->forestSize = rhs.forestSize;
97 this->numRandomSplits = rhs.numRandomSplits;
98 this->minNumSamplesPerNode = rhs.minNumSamplesPerNode;
99 this->maxDepth = rhs.maxDepth;
100 this->removeFeaturesAtEachSpilt = rhs.removeFeaturesAtEachSpilt;
101 this->bootstrappedDatasetWeight = rhs.bootstrappedDatasetWeight;
102 this->trainingMode = rhs.trainingMode;
104 }
else errorLog <<
"deepCopyFrom(const Classifier *classifier) - Failed to copy base variables!" << std::endl;
111 if( classifier == NULL )
return false;
123 if( this->decisionTreeNode != NULL ){
124 delete decisionTreeNode;
125 decisionTreeNode = NULL;
131 this->forest.reserve( ptr->forest.size() );
132 for(
size_t i=0; i<ptr->forest.size(); i++){
133 this->forest.push_back( ptr->forest[i]->deepCopy() );
137 this->forestSize = ptr->forestSize;
138 this->numRandomSplits = ptr->numRandomSplits;
139 this->minNumSamplesPerNode = ptr->minNumSamplesPerNode;
140 this->maxDepth = ptr->maxDepth;
141 this->removeFeaturesAtEachSpilt = ptr->removeFeaturesAtEachSpilt;
142 this->bootstrappedDatasetWeight = ptr->bootstrappedDatasetWeight;
143 this->trainingMode = ptr->trainingMode;
148 errorLog <<
"deepCopyFrom(const Classifier *classifier) - Failed to copy base variables!" << std::endl;
163 errorLog <<
"train_(ClassificationData &trainingData) - Training data has zero samples!" << std::endl;
167 if( bootstrappedDatasetWeight <= 0.0 || bootstrappedDatasetWeight > 1.0 ){
168 errorLog <<
"train_(ClassificationData &trainingData) - Bootstrapped Dataset Weight must be [> 0.0 and <= 1.0]" << std::endl;
172 numInputDimensions = N;
180 trainingData.
scale(0, 1);
183 if( useValidationSet ){
184 validationSetAccuracy = 0;
185 validationSetPrecision.
resize( useNullRejection ? K+1 : K, 0 );
186 validationSetRecall.
resize( useNullRejection ? K+1 : K, 0 );
193 forest.reserve( forestSize );
195 for(UINT i=0; i<forestSize; i++){
198 UINT datasetSize = (UINT)(trainingData.
getNumSamples() * bootstrappedDatasetWeight);
216 trainingLog <<
"Training decision tree " << i+1 <<
"/" << forestSize <<
"..." << std::endl;
219 if( !tree.
train_( data ) ){
220 errorLog <<
"train_(ClassificationData &trainingData) - Failed to train tree at forest index: " << i << std::endl;
226 trainingLog <<
"Decision tree trained in " << (computeTime*0.001)/60.0 <<
" minutes" << std::endl;
228 if( useValidationSet ){
229 Float forestNorm = 1.0 / forestSize;
234 grt_assert( precision.
getSize() == validationSetPrecision.
getSize() );
237 for(UINT i=0; i<validationSetPrecision.
getSize(); i++){
238 validationSetPrecision[i] += precision[i] * forestNorm;
241 for(UINT i=0; i<validationSetRecall.
getSize(); i++){
242 validationSetRecall[i] += recall[i] * forestNorm;
251 if( useValidationSet ){
252 validationSetAccuracy /= forestSize;
253 trainingLog <<
"Validation set accuracy: " << validationSetAccuracy << std::endl;
255 trainingLog <<
"Validation set precision: ";
256 for(UINT i=0; i<validationSetPrecision.
getSize(); i++){
257 trainingLog << validationSetPrecision[i] <<
" ";
259 trainingLog << std::endl;
261 trainingLog <<
"Validation set recall: ";
262 for(UINT i=0; i<validationSetRecall.
getSize(); i++){
263 trainingLog << validationSetRecall[i] <<
" ";
265 trainingLog << std::endl;
273 predictedClassLabel = 0;
277 errorLog <<
"predict_(VectorDouble &inputVector) - Model Not Trained!" << std::endl;
281 if( inputVector.
getSize() != numInputDimensions ){
282 errorLog <<
"predict_(VectorDouble &inputVector) - The size of the input Vector (" << inputVector.
getSize() <<
") does not match the num features in the model (" << numInputDimensions << std::endl;
287 for(UINT n=0; n<numInputDimensions; n++){
288 inputVector[n] = grt_scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0.0, 1.0);
292 if( classLikelihoods.
getSize() != numClasses ) classLikelihoods.
resize(numClasses,0);
293 if( classDistances.
getSize() != numClasses ) classDistances.
resize(numClasses,0);
295 std::fill(classDistances.begin(),classDistances.end(),0);
299 for(UINT i=0; i<forestSize; i++){
300 if( !forest[i]->
predict(inputVector, y) ){
301 errorLog <<
"predict_(VectorDouble &inputVector) - Tree " << i <<
" failed prediction!" << std::endl;
305 for(UINT j=0; j<numClasses; j++){
306 classDistances[j] += y[j];
313 Float classNorm = 1.0 / Float(forestSize);
314 for(UINT k=0; k<numClasses; k++){
315 classLikelihoods[k] = classDistances[k] * classNorm;
317 if( classLikelihoods[k] > maxLikelihood ){
318 maxLikelihood = classLikelihoods[k];
319 bestDistance = classDistances[k];
324 predictedClassLabel = classLabels[ bestIndex ];
335 for(UINT i=0; i<forest.
getSize(); i++){
336 if( forest[i] != NULL ){
349 std::cout <<
"RandomForest\n";
350 std::cout <<
"ForestSize: " << forestSize << std::endl;
351 std::cout <<
"NumSplittingSteps: " << numRandomSplits << std::endl;
352 std::cout <<
"MinNumSamplesPerNode: " << minNumSamplesPerNode << std::endl;
353 std::cout <<
"MaxDepth: " << maxDepth << std::endl;
354 std::cout <<
"RemoveFeaturesAtEachSpilt: " << removeFeaturesAtEachSpilt << std::endl;
355 std::cout <<
"TrainingMode: " << trainingMode << std::endl;
356 std::cout <<
"ForestBuilt: " << (trained ? 1 : 0) << std::endl;
359 std::cout <<
"Forest:\n";
360 for(UINT i=0; i<forestSize; i++){
361 std::cout <<
"Tree: " << i+1 << std::endl;
373 errorLog <<
"saveModelToFile(fstream &file) - The file is not open!" << std::endl;
378 file <<
"GRT_RANDOM_FOREST_MODEL_FILE_V1.0\n";
382 errorLog <<
"saveModelToFile(fstream &file) - Failed to save classifier base settings to file!" << std::endl;
386 if( decisionTreeNode != NULL ){
387 file <<
"DecisionTreeNodeType: " << decisionTreeNode->
getNodeType() << std::endl;
389 Classifier::errorLog <<
"saveModelToFile(fstream &file) - Failed to save decisionTreeNode settings to file!" << std::endl;
393 file <<
"DecisionTreeNodeType: " <<
"NULL" << std::endl;
396 file <<
"ForestSize: " << forestSize << std::endl;
397 file <<
"NumSplittingSteps: " << numRandomSplits << std::endl;
398 file <<
"MinNumSamplesPerNode: " << minNumSamplesPerNode << std::endl;
399 file <<
"MaxDepth: " << maxDepth << std::endl;
400 file <<
"RemoveFeaturesAtEachSpilt: " << removeFeaturesAtEachSpilt << std::endl;
401 file <<
"TrainingMode: " << trainingMode << std::endl;
402 file <<
"ForestBuilt: " << (trained ? 1 : 0) << std::endl;
406 for(UINT i=0; i<forestSize; i++){
407 file <<
"Tree: " << i+1 << std::endl;
408 file <<
"TreeNodeType: " << forest[i]->getNodeType() << std::endl;
409 if( !forest[i]->saveToFile( file ) ){
410 errorLog <<
"saveModelToFile(fstream &file) - Failed to save tree " << i <<
" to file!" << std::endl;
425 errorLog <<
"loadModelFromFile(string filename) - Could not open file to load model" << std::endl;
430 std::string treeNodeType;
435 if(word !=
"GRT_RANDOM_FOREST_MODEL_FILE_V1.0"){
436 errorLog <<
"loadModelFromFile(string filename) - Could not find Model File Header" << std::endl;
442 errorLog <<
"loadModelFromFile(string filename) - Failed to load base settings from file!" << std::endl;
447 if(word !=
"DecisionTreeNodeType:"){
448 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the DecisionTreeNodeType!" << std::endl;
451 file >> treeNodeType;
453 if( treeNodeType !=
"NULL" ){
457 if( decisionTreeNode == NULL ){
458 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not create new DecisionTreeNode from type: " << treeNodeType << std::endl;
463 Classifier::errorLog <<
"loadModelFromFile(fstream &file) - Failed to load decisionTreeNode settings from file!" << std::endl;
467 Classifier::errorLog <<
"loadModelFromFile(fstream &file) - Failed to load decisionTreeNode! DecisionTreeNodeType is NULL!" << std::endl;
472 if(word !=
"ForestSize:"){
473 errorLog <<
"loadModelFromFile(string filename) - Could not find the ForestSize!" << std::endl;
479 if(word !=
"NumSplittingSteps:"){
480 errorLog <<
"loadModelFromFile(string filename) - Could not find the NumSplittingSteps!" << std::endl;
483 file >> numRandomSplits;
486 if(word !=
"MinNumSamplesPerNode:"){
487 errorLog <<
"loadModelFromFile(string filename) - Could not find the MinNumSamplesPerNode!" << std::endl;
490 file >> minNumSamplesPerNode;
493 if(word !=
"MaxDepth:"){
494 errorLog <<
"loadModelFromFile(string filename) - Could not find the MaxDepth!" << std::endl;
500 if(word !=
"RemoveFeaturesAtEachSpilt:"){
501 errorLog <<
"loadModelFromFile(string filename) - Could not find the RemoveFeaturesAtEachSpilt!" << std::endl;
504 file >> removeFeaturesAtEachSpilt;
507 if(word !=
"TrainingMode:"){
508 errorLog <<
"loadModelFromFile(string filename) - Could not find the TrainingMode!" << std::endl;
511 file >> trainingMode;
514 if(word !=
"ForestBuilt:"){
515 errorLog <<
"loadModelFromFile(string filename) - Could not find the ForestBuilt!" << std::endl;
523 if(word !=
"Forest:"){
524 errorLog <<
"loadModelFromFile(string filename) - Could not find the Forest!" << std::endl;
530 forest.reserve( forestSize );
531 for(UINT i=0; i<forestSize; i++){
535 errorLog <<
"loadModelFromFile(string filename) - Could not find the Tree Header!" << std::endl;
536 std::cout <<
"WORD: " << word << std::endl;
537 std::cout <<
"Tree i: " << i << std::endl;
542 if( treeIndex != i+1 ){
543 errorLog <<
"loadModelFromFile(string filename) - Incorrect tree index: " << treeIndex << std::endl;
548 if(word !=
"TreeNodeType:"){
549 errorLog <<
"loadModelFromFile(string filename) - Could not find the TreeNodeType!" << std::endl;
550 std::cout <<
"WORD: " << word << std::endl;
551 std::cout <<
"i: " << i << std::endl;
554 file >> treeNodeType;
560 errorLog <<
"loadModelFromFile(fstream &file) - Failed to create new Tree!" << std::endl;
565 tree->setParent( NULL );
567 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load tree from file!" << std::endl;
572 forest.push_back( tree );
582 errorLog <<
"combineModels( const RandomForests &forest ) - This instance has not been trained!" << std::endl;
587 errorLog <<
"combineModels( const RandomForests &forest ) - This external forest instance has not been trained!" << std::endl;
592 errorLog <<
"combineModels( const RandomForests &forest ) - The number of input dimensions of the external forest (";
593 errorLog << forest.
getNumInputDimensions() <<
") does not match the number of input dimensions of this instance (";
603 this->forest.push_back( node->
deepCopy() );
616 return numRandomSplits;
620 return minNumSamplesPerNode;
632 return removeFeaturesAtEachSpilt;
636 return bootstrappedDatasetWeight;
645 if( decisionTreeNode == NULL ){
649 return decisionTreeNode->
deepCopy();
654 if( !trained || index >= forestSize )
return NULL;
656 return forest[ index ];
665 for(UINT i=0; i<forestSize; i++){
666 if( !forest[i]->computeFeatureWeights( weights ) ){
667 warningLog <<
"getFeatureWeights( const bool normWeights ) - Failed to compute weights for tree: " << i << std::endl;
675 const Float norm = 1.0 / sum;
676 for(UINT j=0; j<numInputDimensions; j++){
692 for(UINT i=0; i<forestSize; i++){
693 if( !forest[i]->computeLeafNodeWeights( weights ) ){
694 warningLog <<
"computeLeafNodeWeights( const bool normWeights ) - Failed to compute leaf node weights for tree: " << i << std::endl;
703 sum += weights[i][j];
706 const Float norm = 1.0 / sum;
708 weights[i][j] *= norm;
718 if( forestSize > 0 ){
720 this->forestSize = forestSize;
727 if( numRandomSplits > 0 ){
728 this->numRandomSplits = numRandomSplits;
735 if( minNumSamplesPerNode > 0 ){
736 this->minNumSamplesPerNode = minNumSamplesPerNode;
744 this->maxDepth = maxDepth;
751 this->removeFeaturesAtEachSpilt = removeFeaturesAtEachSpilt;
757 if( trainingMode == DecisionTree::BEST_ITERATIVE_SPILT || trainingMode == DecisionTree::BEST_RANDOM_SPLIT ){
758 this->trainingMode = trainingMode;
762 warningLog <<
"setTrainingMode(const UINT mode) - Unknown training mode!" << std::endl;
768 if( decisionTreeNode != NULL ){
769 delete decisionTreeNode;
770 decisionTreeNode = NULL;
772 this->decisionTreeNode = node.
deepCopy();
779 if( bootstrappedDatasetWeight > 0.0 && bootstrappedDatasetWeight <= 1.0 ){
780 this->bootstrappedDatasetWeight = bootstrappedDatasetWeight;
784 warningLog <<
"setBootstrappedDatasetWeight(...) - Bad parameter, the weight must be > 0.0 and <= 1.0. Weight: " << bootstrappedDatasetWeight << std::endl;
DecisionTreeNode * getTree(const UINT index) const
bool saveBaseSettingsToFile(std::fstream &file) const
virtual bool predict(VectorFloat inputVector)
virtual bool loadModelFromFile(std::fstream &file)
bool setRemoveFeaturesAtEachSpilt(const bool removeFeaturesAtEachSpilt)
UINT getForestSize() const
RandomForests(const DecisionTreeNode &decisionTreeNode=DecisionTreeClusterNode(), const UINT forestSize=10, const UINT numRandomSplits=100, const UINT minNumSamplesPerNode=5, const UINT maxDepth=10, const UINT trainingMode=DecisionTree::BEST_RANDOM_SPLIT, const bool removeFeaturesAtEachSpilt=true, const bool useScaling=false, const Float bootstrappedDatasetWeight=0.8)
virtual ~RandomForests(void)
virtual bool saveModelToFile(std::fstream &file) const
bool setBootstrappedDatasetWeight(const Float bootstrappedDatasetWeight)
bool setMinNumSamplesPerNode(const UINT minNumSamplesPerNode)
std::string getClassifierType() const
virtual UINT getNumClasses() const
bool enableScaling(const bool useScaling)
virtual bool resize(const unsigned int size)
virtual bool train_(ClassificationData &trainingData)
std::string getNodeType() const
bool setMinNumSamplesPerNode(const UINT minNumSamplesPerNode)
bool setNumSplittingSteps(const UINT numSplittingSteps)
Vector< UINT > getClassLabels() const
bool combineModels(const RandomForests &forest)
signed long getMilliSeconds()
unsigned int getSize() const
RandomForests & operator=(const RandomForests &rhs)
bool setNumRandomSplits(const UINT numSplittingSteps)
bool setAllValues(const T &value)
virtual bool train_(ClassificationData &trainingData)
MatrixDouble getLeafNodeFeatureWeights(const bool normWeights=true) const
DecisionTreeNode * deepCopyTree() const
UINT getNumSamples() const
bool setForestSize(const UINT forestSize)
bool setValidationSetSize(const UINT validationSetSize)
virtual bool saveToFile(std::fstream &file) const
bool setMaxDepth(const UINT maxDepth)
UINT getNumRandomSplits() const
virtual bool deepCopyFrom(const Classifier *classifier)
Float getBootstrappedDatasetWeight() const
virtual bool loadFromFile(std::fstream &file)
bool copyBaseVariables(const Classifier *classifier)
bool loadBaseSettingsFromFile(std::fstream &file)
Float getValidationSetAccuracy() const
bool setMaxDepth(const UINT maxDepth)
unsigned int getNumRows() const
UINT getNumDimensions() const
UINT getNumClasses() const
unsigned int getNumCols() const
bool setTrainingMode(const UINT trainingMode)
UINT getMinNumSamplesPerNode() const
bool setDecisionTreeNode(const DecisionTreeNode &node)
virtual bool predict_(VectorDouble &inputVector)
DecisionTreeNode * deepCopy() const
Vector< MinMax > getRanges() const
VectorDouble getFeatureWeights(const bool normWeights=true) const
const Vector< DecisionTreeNode * > & getForest() const
VectorFloat getValidationSetPrecision() const
bool enableNullRejection(bool useNullRejection)
bool getRemoveFeaturesAtEachSpilt() const
static Node * createInstanceFromString(std::string const &nodeType)
bool setTrainingMode(const UINT trainingMode)
bool setDecisionTreeNode(const DecisionTreeNode &node)
UINT getNumInputDimensions() const
ClassificationData getBootstrappedDataset(UINT numSamples=0, bool balanceDataset=false) const
bool setUseValidationSet(const bool useValidationSet)
bool scale(const Float minTarget, const Float maxTarget)
virtual bool print() const
VectorFloat getValidationSetRecall() const
static Float sum(const VectorFloat &x)
bool setRemoveFeaturesAtEachSpilt(const bool removeFeaturesAtEachSpilt)
UINT getTrainingMode() const
DecisionTreeNode * deepCopyDecisionTreeNode() const