21 #define GRT_DLL_EXPORTS
22 #include "RandomForests.h"
27 std::string RandomForests::id =
"RandomForests";
33 RandomForests::RandomForests(
const DecisionTreeNode &decisionTreeNode,
const UINT forestSize,
const UINT numRandomSplits,
const UINT minNumSamplesPerNode,
const UINT maxDepth,
const UINT trainingMode,
const bool removeFeaturesAtEachSpilt,
const bool useScaling,
const Float bootstrappedDatasetWeight)
35 this->decisionTreeNode = decisionTreeNode.
deepCopy();
36 this->forestSize = forestSize;
37 this->numRandomSplits = numRandomSplits;
38 this->minNumSamplesPerNode = minNumSamplesPerNode;
39 this->maxDepth = maxDepth;
40 this->trainingMode = trainingMode;
41 this->removeFeaturesAtEachSpilt = removeFeaturesAtEachSpilt;
42 this->useScaling = useScaling;
43 this->bootstrappedDatasetWeight = bootstrappedDatasetWeight;
45 classifierType = classType;
46 classifierMode = STANDARD_CLASSIFIER_MODE;
47 useNullRejection =
false;
48 supportsNullRejection =
false;
49 useValidationSet =
true;
50 validationSetSize = 20;
58 this->decisionTreeNode = NULL;
60 classifierType = classType;
61 classifierMode = STANDARD_CLASSIFIER_MODE;
73 if( decisionTreeNode != NULL ){
74 delete decisionTreeNode;
75 decisionTreeNode = NULL;
88 if( this->decisionTreeNode != NULL ){
89 delete decisionTreeNode;
90 decisionTreeNode = NULL;
96 for(UINT i=0; i<rhs.forest.size(); i++){
97 this->forest.push_back( rhs.forest[i]->deepCopy() );
101 this->forestSize = rhs.forestSize;
102 this->numRandomSplits = rhs.numRandomSplits;
103 this->minNumSamplesPerNode = rhs.minNumSamplesPerNode;
104 this->maxDepth = rhs.maxDepth;
105 this->removeFeaturesAtEachSpilt = rhs.removeFeaturesAtEachSpilt;
106 this->bootstrappedDatasetWeight = rhs.bootstrappedDatasetWeight;
107 this->trainingMode = rhs.trainingMode;
109 }
else errorLog <<
"deepCopyFrom(const Classifier *classifier) - Failed to copy base variables!" << std::endl;
116 if( classifier == NULL )
return false;
128 if( this->decisionTreeNode != NULL ){
129 delete decisionTreeNode;
130 decisionTreeNode = NULL;
136 this->forest.reserve( ptr->forest.
getSize() );
137 for(UINT i=0; i<ptr->forest.
getSize(); i++){
138 this->forest.push_back( ptr->forest[i]->deepCopy() );
142 this->forestSize = ptr->forestSize;
143 this->numRandomSplits = ptr->numRandomSplits;
144 this->minNumSamplesPerNode = ptr->minNumSamplesPerNode;
145 this->maxDepth = ptr->maxDepth;
146 this->removeFeaturesAtEachSpilt = ptr->removeFeaturesAtEachSpilt;
147 this->bootstrappedDatasetWeight = ptr->bootstrappedDatasetWeight;
148 this->trainingMode = ptr->trainingMode;
153 errorLog <<
"deepCopyFrom(const Classifier *classifier) - Failed to copy base variables!" << std::endl;
168 errorLog <<
"train_(ClassificationData &trainingData) - Training data has zero samples!" << std::endl;
172 if( bootstrappedDatasetWeight <= 0.0 || bootstrappedDatasetWeight > 1.0 ){
173 errorLog <<
"train_(ClassificationData &trainingData) - Bootstrapped Dataset Weight must be [> 0.0 and <= 1.0]" << std::endl;
177 numInputDimensions = N;
185 trainingData.
scale(0, 1);
188 if( useValidationSet ){
189 validationSetAccuracy = 0;
190 validationSetPrecision.
resize( useNullRejection ? K+1 : K, 0 );
191 validationSetRecall.
resize( useNullRejection ? K+1 : K, 0 );
198 forest.reserve( forestSize );
200 for(UINT i=0; i<forestSize; i++){
203 UINT datasetSize = (UINT)floor(trainingData.
getNumSamples() * bootstrappedDatasetWeight);
221 trainingLog <<
"Training decision tree " << i+1 <<
"/" << forestSize <<
"..." << std::endl;
224 if( !tree.
train_( data ) ){
225 errorLog <<
"train_(ClassificationData &trainingData) - Failed to train tree at forest index: " << i << std::endl;
231 trainingLog <<
"Decision tree trained in " << (computeTime*0.001)/60.0 <<
" minutes" << std::endl;
233 if( useValidationSet ){
234 Float forestNorm = 1.0 / forestSize;
239 grt_assert( precision.
getSize() == validationSetPrecision.
getSize() );
242 for(UINT i=0; i<validationSetPrecision.
getSize(); i++){
243 validationSetPrecision[i] += precision[i] * forestNorm;
246 for(UINT i=0; i<validationSetRecall.
getSize(); i++){
247 validationSetRecall[i] += recall[i] * forestNorm;
255 if( useValidationSet ){
256 validationSetAccuracy /= forestSize;
257 trainingLog <<
"Validation set accuracy: " << validationSetAccuracy << std::endl;
259 trainingLog <<
"Validation set precision: ";
260 for(UINT i=0; i<validationSetPrecision.
getSize(); i++){
261 trainingLog << validationSetPrecision[i] <<
" ";
263 trainingLog << std::endl;
265 trainingLog <<
"Validation set recall: ";
266 for(UINT i=0; i<validationSetRecall.
getSize(); i++){
267 trainingLog << validationSetRecall[i] <<
" ";
269 trainingLog << std::endl;
277 predictedClassLabel = 0;
281 errorLog <<
"predict_(VectorDouble &inputVector) - Model Not Trained!" << std::endl;
285 if( inputVector.
getSize() != numInputDimensions ){
286 errorLog <<
"predict_(VectorDouble &inputVector) - The size of the input Vector (" << inputVector.
getSize() <<
") does not match the num features in the model (" << numInputDimensions << std::endl;
291 for(UINT n=0; n<numInputDimensions; n++){
292 inputVector[n] = grt_scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0.0, 1.0);
296 if( classLikelihoods.
getSize() != numClasses ) classLikelihoods.
resize(numClasses,0);
297 if( classDistances.
getSize() != numClasses ) classDistances.
resize(numClasses,0);
299 std::fill(classDistances.begin(),classDistances.end(),0);
303 for(UINT i=0; i<forestSize; i++){
304 if( !forest[i]->
predict(inputVector, y) ){
305 errorLog <<
"predict_(VectorDouble &inputVector) - Tree " << i <<
" failed prediction!" << std::endl;
309 for(UINT j=0; j<numClasses; j++){
310 classDistances[j] += y[j];
317 Float classNorm = 1.0 / Float(forestSize);
318 for(UINT k=0; k<numClasses; k++){
319 classLikelihoods[k] = classDistances[k] * classNorm;
321 if( classLikelihoods[k] > maxLikelihood ){
322 maxLikelihood = classLikelihoods[k];
323 bestDistance = classDistances[k];
328 predictedClassLabel = classLabels[ bestIndex ];
339 for(UINT i=0; i<forest.
getSize(); i++){
340 if( forest[i] != NULL ){
353 std::cout <<
"RandomForest\n";
354 std::cout <<
"ForestSize: " << forestSize << std::endl;
355 std::cout <<
"NumSplittingSteps: " << numRandomSplits << std::endl;
356 std::cout <<
"MinNumSamplesPerNode: " << minNumSamplesPerNode << std::endl;
357 std::cout <<
"MaxDepth: " << maxDepth << std::endl;
358 std::cout <<
"RemoveFeaturesAtEachSpilt: " << removeFeaturesAtEachSpilt << std::endl;
359 std::cout <<
"TrainingMode: " << trainingMode << std::endl;
360 std::cout <<
"ForestBuilt: " << (trained ? 1 : 0) << std::endl;
363 std::cout <<
"Forest:\n";
364 for(UINT i=0; i<forestSize; i++){
365 std::cout <<
"Tree: " << i+1 << std::endl;
377 errorLog <<
"save(fstream &file) - The file is not open!" << std::endl;
382 file <<
"GRT_RANDOM_FOREST_MODEL_FILE_V1.0\n";
386 errorLog <<
"save(fstream &file) - Failed to save classifier base settings to file!" << std::endl;
390 if( decisionTreeNode != NULL ){
391 file <<
"DecisionTreeNodeType: " << decisionTreeNode->
getNodeType() << std::endl;
392 if( !decisionTreeNode->
save( file ) ){
393 Classifier::errorLog <<
"save(fstream &file) - Failed to save decisionTreeNode settings to file!" << std::endl;
397 file <<
"DecisionTreeNodeType: " <<
"NULL" << std::endl;
400 file <<
"ForestSize: " << forestSize << std::endl;
401 file <<
"NumSplittingSteps: " << numRandomSplits << std::endl;
402 file <<
"MinNumSamplesPerNode: " << minNumSamplesPerNode << std::endl;
403 file <<
"MaxDepth: " << maxDepth << std::endl;
404 file <<
"RemoveFeaturesAtEachSpilt: " << removeFeaturesAtEachSpilt << std::endl;
405 file <<
"TrainingMode: " << trainingMode << std::endl;
406 file <<
"ForestBuilt: " << (trained ? 1 : 0) << std::endl;
410 for(UINT i=0; i<forestSize; i++){
411 file <<
"Tree: " << i+1 << std::endl;
412 file <<
"TreeNodeType: " << forest[i]->getNodeType() << std::endl;
413 if( !forest[i]->
save( file ) ){
414 errorLog <<
"save(fstream &file) - Failed to save tree " << i <<
" to file!" << std::endl;
429 errorLog <<
"load(string filename) - Could not open file to load model" << std::endl;
434 std::string treeNodeType;
439 if(word !=
"GRT_RANDOM_FOREST_MODEL_FILE_V1.0"){
440 errorLog <<
"load(string filename) - Could not find Model File Header" << std::endl;
446 errorLog <<
"load(string filename) - Failed to load base settings from file!" << std::endl;
451 if(word !=
"DecisionTreeNodeType:"){
452 Classifier::errorLog <<
"load(string filename) - Could not find the DecisionTreeNodeType!" << std::endl;
455 file >> treeNodeType;
457 if( treeNodeType !=
"NULL" ){
461 if( decisionTreeNode == NULL ){
462 Classifier::errorLog <<
"load(string filename) - Could not create new DecisionTreeNode from type: " << treeNodeType << std::endl;
466 if( !decisionTreeNode->
load( file ) ){
467 Classifier::errorLog <<
"load(fstream &file) - Failed to load decisionTreeNode settings from file!" << std::endl;
471 Classifier::errorLog <<
"load(fstream &file) - Failed to load decisionTreeNode! DecisionTreeNodeType is NULL!" << std::endl;
476 if(word !=
"ForestSize:"){
477 errorLog <<
"load(string filename) - Could not find the ForestSize!" << std::endl;
483 if(word !=
"NumSplittingSteps:"){
484 errorLog <<
"load(string filename) - Could not find the NumSplittingSteps!" << std::endl;
487 file >> numRandomSplits;
490 if(word !=
"MinNumSamplesPerNode:"){
491 errorLog <<
"load(string filename) - Could not find the MinNumSamplesPerNode!" << std::endl;
494 file >> minNumSamplesPerNode;
497 if(word !=
"MaxDepth:"){
498 errorLog <<
"load(string filename) - Could not find the MaxDepth!" << std::endl;
504 if(word !=
"RemoveFeaturesAtEachSpilt:"){
505 errorLog <<
"load(string filename) - Could not find the RemoveFeaturesAtEachSpilt!" << std::endl;
508 file >> removeFeaturesAtEachSpilt;
511 if(word !=
"TrainingMode:"){
512 errorLog <<
"load(string filename) - Could not find the TrainingMode!" << std::endl;
515 file >> trainingMode;
518 if(word !=
"ForestBuilt:"){
519 errorLog <<
"load(string filename) - Could not find the ForestBuilt!" << std::endl;
527 if(word !=
"Forest:"){
528 errorLog <<
"load(string filename) - Could not find the Forest!" << std::endl;
534 forest.reserve( forestSize );
535 for(UINT i=0; i<forestSize; i++){
539 errorLog <<
"load(string filename) - Could not find the Tree Header!" << std::endl;
540 std::cout <<
"WORD: " << word << std::endl;
541 std::cout <<
"Tree i: " << i << std::endl;
546 if( treeIndex != i+1 ){
547 errorLog <<
"load(string filename) - Incorrect tree index: " << treeIndex << std::endl;
552 if(word !=
"TreeNodeType:"){
553 errorLog <<
"load(string filename) - Could not find the TreeNodeType!" << std::endl;
554 std::cout <<
"WORD: " << word << std::endl;
555 std::cout <<
"i: " << i << std::endl;
558 file >> treeNodeType;
564 errorLog <<
"load(fstream &file) - Failed to create new Tree!" << std::endl;
569 tree->setParent( NULL );
570 if( !tree->
load( file ) ){
571 errorLog <<
"load(fstream &file) - Failed to load tree from file!" << std::endl;
576 forest.push_back( tree );
586 errorLog <<
"combineModels( const RandomForests &forest ) - This instance has not been trained!" << std::endl;
591 errorLog <<
"combineModels( const RandomForests &forest ) - This external forest instance has not been trained!" << std::endl;
596 errorLog <<
"combineModels( const RandomForests &forest ) - The number of input dimensions of the external forest (";
597 errorLog << forest.
getNumInputDimensions() <<
") does not match the number of input dimensions of this instance (";
607 this->forest.push_back( node->
deepCopy() );
620 return numRandomSplits;
624 return minNumSamplesPerNode;
636 return removeFeaturesAtEachSpilt;
640 return bootstrappedDatasetWeight;
649 if( decisionTreeNode == NULL ){
653 return decisionTreeNode->
deepCopy();
658 if( !trained || index >= forestSize )
return NULL;
660 return forest[ index ];
669 for(UINT i=0; i<forestSize; i++){
670 if( !forest[i]->computeFeatureWeights( weights ) ){
671 warningLog <<
"getFeatureWeights( const bool normWeights ) - Failed to compute weights for tree: " << i << std::endl;
679 const Float norm = 1.0 / sum;
680 for(UINT j=0; j<numInputDimensions; j++){
696 for(UINT i=0; i<forestSize; i++){
697 if( !forest[i]->computeLeafNodeWeights( weights ) ){
698 warningLog <<
"computeLeafNodeWeights( const bool normWeights ) - Failed to compute leaf node weights for tree: " << i << std::endl;
707 sum += weights[i][j];
710 const Float norm = 1.0 / sum;
712 weights[i][j] *= norm;
722 if( forestSize > 0 ){
724 this->forestSize = forestSize;
731 if( numRandomSplits > 0 ){
732 this->numRandomSplits = numRandomSplits;
739 if( minNumSamplesPerNode > 0 ){
740 this->minNumSamplesPerNode = minNumSamplesPerNode;
748 this->maxDepth = maxDepth;
755 this->removeFeaturesAtEachSpilt = removeFeaturesAtEachSpilt;
761 if( trainingMode == DecisionTree::BEST_ITERATIVE_SPILT || trainingMode == DecisionTree::BEST_RANDOM_SPLIT ){
762 this->trainingMode = trainingMode;
766 warningLog <<
"setTrainingMode(const UINT mode) - Unknown training mode!" << std::endl;
772 if( decisionTreeNode != NULL ){
773 delete decisionTreeNode;
774 decisionTreeNode = NULL;
776 this->decisionTreeNode = node.
deepCopy();
783 if( bootstrappedDatasetWeight > 0.0 && bootstrappedDatasetWeight <= 1.0 ){
784 this->bootstrappedDatasetWeight = bootstrappedDatasetWeight;
788 warningLog <<
"setBootstrappedDatasetWeight(...) - Bad parameter, the weight must be > 0.0 and <= 1.0. Weight: " << bootstrappedDatasetWeight << std::endl;
DecisionTreeNode * getTree(const UINT index) const
bool saveBaseSettingsToFile(std::fstream &file) const
virtual bool predict(VectorFloat inputVector)
bool setRemoveFeaturesAtEachSpilt(const bool removeFeaturesAtEachSpilt)
UINT getForestSize() const
RandomForests(const DecisionTreeNode &decisionTreeNode=DecisionTreeClusterNode(), const UINT forestSize=10, const UINT numRandomSplits=100, const UINT minNumSamplesPerNode=5, const UINT maxDepth=10, const UINT trainingMode=DecisionTree::BEST_RANDOM_SPLIT, const bool removeFeaturesAtEachSpilt=true, const bool useScaling=false, const Float bootstrappedDatasetWeight=0.8)
virtual ~RandomForests(void)
bool setBootstrappedDatasetWeight(const Float bootstrappedDatasetWeight)
bool setMinNumSamplesPerNode(const UINT minNumSamplesPerNode)
virtual bool save(std::fstream &file) const
std::string getClassifierType() const
virtual UINT getNumClasses() const
bool enableScaling(const bool useScaling)
virtual bool resize(const unsigned int size)
virtual bool train_(ClassificationData &trainingData)
std::string getNodeType() const
bool setMinNumSamplesPerNode(const UINT minNumSamplesPerNode)
bool setNumSplittingSteps(const UINT numSplittingSteps)
Vector< UINT > getClassLabels() const
bool combineModels(const RandomForests &forest)
signed long getMilliSeconds()
RandomForests & operator=(const RandomForests &rhs)
static std::string getId()
bool setNumRandomSplits(const UINT numSplittingSteps)
bool setAllValues(const T &value)
virtual bool train_(ClassificationData &trainingData)
MatrixDouble getLeafNodeFeatureWeights(const bool normWeights=true) const
DecisionTreeNode * deepCopyTree() const
virtual bool save(std::fstream &file) const
UINT getNumSamples() const
bool setForestSize(const UINT forestSize)
bool setValidationSetSize(const UINT validationSetSize)
bool setMaxDepth(const UINT maxDepth)
UINT getNumRandomSplits() const
virtual bool deepCopyFrom(const Classifier *classifier)
Float getBootstrappedDatasetWeight() const
bool copyBaseVariables(const Classifier *classifier)
bool loadBaseSettingsFromFile(std::fstream &file)
Float getValidationSetAccuracy() const
bool setMaxDepth(const UINT maxDepth)
unsigned int getNumRows() const
UINT getNumDimensions() const
UINT getNumClasses() const
unsigned int getNumCols() const
bool setTrainingMode(const UINT trainingMode)
UINT getMinNumSamplesPerNode() const
bool setDecisionTreeNode(const DecisionTreeNode &node)
virtual bool predict_(VectorDouble &inputVector)
DecisionTreeNode * deepCopy() const
Vector< MinMax > getRanges() const
VectorDouble getFeatureWeights(const bool normWeights=true) const
const Vector< DecisionTreeNode * > & getForest() const
VectorFloat getValidationSetPrecision() const
bool enableNullRejection(bool useNullRejection)
virtual bool load(std::fstream &file)
bool getRemoveFeaturesAtEachSpilt() const
static Node * createInstanceFromString(std::string const &nodeType)
bool setTrainingMode(const UINT trainingMode)
bool setDecisionTreeNode(const DecisionTreeNode &node)
UINT getNumInputDimensions() const
ClassificationData getBootstrappedDataset(UINT numSamples=0, bool balanceDataset=false) const
bool setUseValidationSet(const bool useValidationSet)
bool scale(const Float minTarget, const Float maxTarget)
virtual bool load(std::fstream &file)
virtual bool print() const
VectorFloat getValidationSetRecall() const
static Float sum(const VectorFloat &x)
bool setRemoveFeaturesAtEachSpilt(const bool removeFeaturesAtEachSpilt)
UINT getTrainingMode() const
DecisionTreeNode * deepCopyDecisionTreeNode() const