21 #define GRT_DLL_EXPORTS 27 const std::string RandomForests::id =
"RandomForests";
36 this->forestSize = forestSize;
37 this->numRandomSplits = numRandomSplits;
38 this->minNumSamplesPerNode = minNumSamplesPerNode;
39 this->maxDepth = maxDepth;
40 this->trainingMode = trainingMode;
41 this->removeFeaturesAtEachSplit = removeFeaturesAtEachSplit;
42 this->useScaling = useScaling;
43 this->bootstrappedDatasetWeight = bootstrappedDatasetWeight;
44 classifierMode = STANDARD_CLASSIFIER_MODE;
45 useNullRejection =
false;
46 supportsNullRejection =
false;
47 useValidationSet =
true;
48 validationSetSize = 20;
53 this->decisionTreeNode = NULL;
54 classifierMode = STANDARD_CLASSIFIER_MODE;
62 if( decisionTreeNode != NULL ){
63 this->decisionTreeNode->
clear();
64 delete decisionTreeNode;
65 decisionTreeNode = NULL;
78 if( this->decisionTreeNode != NULL ){
79 this->decisionTreeNode->
clear();
80 delete decisionTreeNode;
81 decisionTreeNode = NULL;
87 for(UINT i=0; i<rhs.forest.size(); i++){
88 this->forest.push_back( dynamic_cast<DecisionTreeNode*>(rhs.forest[i]->deepCopy()) );
92 this->forestSize = rhs.forestSize;
93 this->numRandomSplits = rhs.numRandomSplits;
94 this->minNumSamplesPerNode = rhs.minNumSamplesPerNode;
95 this->maxDepth = rhs.maxDepth;
96 this->removeFeaturesAtEachSplit = rhs.removeFeaturesAtEachSplit;
97 this->bootstrappedDatasetWeight = rhs.bootstrappedDatasetWeight;
98 this->trainingMode = rhs.trainingMode;
100 }
else errorLog << __GRT_LOG__ <<
" Failed to copy base variables!" << std::endl;
107 if( classifier == NULL )
return false;
119 if( this->decisionTreeNode != NULL ){
120 delete decisionTreeNode;
121 decisionTreeNode = NULL;
127 this->forest.reserve( ptr->forest.
getSize() );
128 for(UINT i=0; i<ptr->forest.
getSize(); i++){
129 this->forest.push_back( dynamic_cast<DecisionTreeNode*>(ptr->forest[i]->deepCopy()) );
133 this->forestSize = ptr->forestSize;
134 this->numRandomSplits = ptr->numRandomSplits;
135 this->minNumSamplesPerNode = ptr->minNumSamplesPerNode;
136 this->maxDepth = ptr->maxDepth;
137 this->removeFeaturesAtEachSplit = ptr->removeFeaturesAtEachSplit;
138 this->bootstrappedDatasetWeight = ptr->bootstrappedDatasetWeight;
139 this->trainingMode = ptr->trainingMode;
144 errorLog << __GRT_LOG__ <<
" Failed to copy base variables!" << std::endl;
159 errorLog << __GRT_LOG__ <<
" Training data has zero samples!" << std::endl;
163 if( bootstrappedDatasetWeight <= 0.0 || bootstrappedDatasetWeight > 1.0 ){
164 errorLog << __GRT_LOG__ <<
" Bootstrapped Dataset Weight must be [> 0.0 and <= 1.0]" << std::endl;
168 numInputDimensions = N;
169 numOutputDimensions = K;
177 trainingData.
scale(0, 1);
180 if( useValidationSet ){
181 validationSetAccuracy = 0;
182 validationSetPrecision.
resize( useNullRejection ? K+1 : K, 0 );
183 validationSetRecall.
resize( useNullRejection ? K+1 : K, 0 );
190 forest.reserve( forestSize );
192 for(UINT i=0; i<forestSize; i++){
195 UINT datasetSize = (UINT)floor(trainingData.
getNumSamples() * bootstrappedDatasetWeight);
213 trainingLog <<
"Training decision tree " << i+1 <<
"/" << forestSize <<
"..." << std::endl;
216 if( !tree.
train_( data ) ){
217 errorLog << __GRT_LOG__ <<
" Failed to train tree at forest index: " << i << std::endl;
223 trainingLog <<
"Decision tree trained in " << (computeTime*0.001)/60.0 <<
" minutes" << std::endl;
225 if( useValidationSet ){
226 Float forestNorm = 1.0 / forestSize;
231 grt_assert( precision.
getSize() == validationSetPrecision.
getSize() );
234 for(UINT i=0; i<validationSetPrecision.
getSize(); i++){
235 validationSetPrecision[i] += precision[i] * forestNorm;
238 for(UINT i=0; i<validationSetRecall.
getSize(); i++){
239 validationSetRecall[i] += recall[i] * forestNorm;
252 trainingSetAccuracy = 0;
255 bool scalingState = useScaling;
260 errorLog << __GRT_LOG__ <<
" Failed to compute training set accuracy! Failed to fully train model!" << std::endl;
264 trainingLog <<
"Training set accuracy: " << trainingSetAccuracy << std::endl;
267 useScaling = scalingState;
269 if( useValidationSet ){
270 validationSetAccuracy /= forestSize;
271 trainingLog <<
"Validation set accuracy: " << validationSetAccuracy << std::endl;
273 trainingLog <<
"Validation set precision: ";
274 for(UINT i=0; i<validationSetPrecision.
getSize(); i++){
275 trainingLog << validationSetPrecision[i] <<
" ";
277 trainingLog << std::endl;
279 trainingLog <<
"Validation set recall: ";
280 for(UINT i=0; i<validationSetRecall.
getSize(); i++){
281 trainingLog << validationSetRecall[i] <<
" ";
283 trainingLog << std::endl;
291 predictedClassLabel = 0;
295 errorLog << __GRT_LOG__ <<
" Model Not Trained!" << std::endl;
299 if( inputVector.
getSize() != numInputDimensions ){
300 errorLog << __GRT_LOG__ <<
" The size of the input Vector (" << inputVector.
getSize() <<
") does not match the num features in the model (" << numInputDimensions << std::endl;
305 for(UINT n=0; n<numInputDimensions; n++){
306 inputVector[n] = grt_scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0.0, 1.0);
310 if( classLikelihoods.
getSize() != numClasses ) classLikelihoods.
resize(numClasses,0);
311 if( classDistances.
getSize() != numClasses ) classDistances.
resize(numClasses,0);
313 std::fill(classDistances.begin(),classDistances.end(),0);
317 for(UINT i=0; i<forestSize; i++){
318 if( !forest[i]->
predict_(inputVector, y) ){
319 errorLog << __GRT_LOG__ <<
" Tree " << i <<
" failed prediction!" << std::endl;
323 for(UINT j=0; j<numClasses; j++){
324 classDistances[j] += y[j];
331 Float classNorm = 1.0 / Float(forestSize);
332 for(UINT k=0; k<numClasses; k++){
333 classLikelihoods[k] = classDistances[k] * classNorm;
335 if( classLikelihoods[k] > maxLikelihood ){
336 maxLikelihood = classLikelihoods[k];
337 bestDistance = classDistances[k];
342 predictedClassLabel = classLabels[ bestIndex ];
353 for(UINT i=0; i<forest.
getSize(); i++){
354 if( forest[i] != NULL ){
362 if( this->decisionTreeNode ){
363 this->decisionTreeNode->
clear();
372 std::cout <<
"RandomForest\n";
373 std::cout <<
"ForestSize: " << forestSize << std::endl;
374 std::cout <<
"NumSplittingSteps: " << numRandomSplits << std::endl;
375 std::cout <<
"MinNumSamplesPerNode: " << minNumSamplesPerNode << std::endl;
376 std::cout <<
"MaxDepth: " << maxDepth << std::endl;
377 std::cout <<
"RemoveFeaturesAtEachSpilt: " << removeFeaturesAtEachSplit << std::endl;
378 std::cout <<
"TrainingMode: " << trainingMode << std::endl;
379 std::cout <<
"ForestBuilt: " << (trained ? 1 : 0) << std::endl;
382 std::cout <<
"Forest:\n";
383 for(UINT i=0; i<forestSize; i++){
384 std::cout <<
"Tree: " << i+1 << std::endl;
396 errorLog << __GRT_LOG__ <<
" The file is not open!" << std::endl;
401 file <<
"GRT_RANDOM_FOREST_MODEL_FILE_V1.0\n";
405 errorLog << __GRT_LOG__ <<
" Failed to save classifier base settings to file!" << std::endl;
409 if( decisionTreeNode != NULL ){
410 file <<
"DecisionTreeNodeType: " << decisionTreeNode->
getNodeType() << std::endl;
411 if( !decisionTreeNode->
save( file ) ){
412 errorLog << __GRT_LOG__ <<
" Failed to save decisionTreeNode settings to file!" << std::endl;
416 file <<
"DecisionTreeNodeType: " <<
"NULL" << std::endl;
419 file <<
"ForestSize: " << forestSize << std::endl;
420 file <<
"NumSplittingSteps: " << numRandomSplits << std::endl;
421 file <<
"MinNumSamplesPerNode: " << minNumSamplesPerNode << std::endl;
422 file <<
"MaxDepth: " << maxDepth << std::endl;
423 file <<
"RemoveFeaturesAtEachSpilt: " << removeFeaturesAtEachSplit << std::endl;
424 file <<
"TrainingMode: " << trainingMode << std::endl;
425 file <<
"ForestBuilt: " << (trained ? 1 : 0) << std::endl;
429 for(UINT i=0; i<forestSize; i++){
430 file <<
"Tree: " << i+1 << std::endl;
431 file <<
"TreeNodeType: " << forest[i]->getNodeType() << std::endl;
432 if( !forest[i]->
save( file ) ){
433 errorLog << __GRT_LOG__ <<
" Failed to save tree " << i <<
" to file!" << std::endl;
448 errorLog << __GRT_LOG__ <<
" Could not open file to load model" << std::endl;
453 std::string treeNodeType;
458 if(word !=
"GRT_RANDOM_FOREST_MODEL_FILE_V1.0"){
459 errorLog << __GRT_LOG__ <<
" Could not find Model File Header" << std::endl;
465 errorLog << __GRT_LOG__ <<
" Failed to load base settings from file!" << std::endl;
470 if(word !=
"DecisionTreeNodeType:"){
471 errorLog << __GRT_LOG__ <<
" Could not find the DecisionTreeNodeType!" << std::endl;
474 file >> treeNodeType;
476 if( treeNodeType !=
"NULL" ){
480 if( decisionTreeNode == NULL ){
481 errorLog << __GRT_LOG__ <<
" Could not create new DecisionTreeNode from type: " << treeNodeType << std::endl;
485 if( !decisionTreeNode->
load( file ) ){
486 errorLog << __GRT_LOG__ <<
" Failed to load decisionTreeNode settings from file!" << std::endl;
490 errorLog << __GRT_LOG__ <<
" Failed to load decisionTreeNode! DecisionTreeNodeType is NULL!" << std::endl;
495 if(word !=
"ForestSize:"){
496 errorLog << __GRT_LOG__ <<
" Could not find the ForestSize!" << std::endl;
502 if(word !=
"NumSplittingSteps:"){
503 errorLog << __GRT_LOG__ <<
" Could not find the NumSplittingSteps!" << std::endl;
506 file >> numRandomSplits;
509 if(word !=
"MinNumSamplesPerNode:"){
510 errorLog << __GRT_LOG__ <<
" Could not find the MinNumSamplesPerNode!" << std::endl;
513 file >> minNumSamplesPerNode;
516 if(word !=
"MaxDepth:"){
517 errorLog << __GRT_LOG__ <<
" Could not find the MaxDepth!" << std::endl;
523 if(word !=
"RemoveFeaturesAtEachSpilt:"){
524 errorLog << __GRT_LOG__ <<
" Could not find the RemoveFeaturesAtEachSpilt!" << std::endl;
527 file >> removeFeaturesAtEachSplit;
530 if(word !=
"TrainingMode:"){
531 errorLog << __GRT_LOG__ <<
" Could not find the TrainingMode!" << std::endl;
534 UINT trainingModeTmp;
535 file >> trainingModeTmp;
536 trainingMode = (Tree::TrainingMode)trainingModeTmp;
539 if(word !=
"ForestBuilt:"){
540 errorLog << __GRT_LOG__ <<
" Could not find the ForestBuilt!" << std::endl;
548 if(word !=
"Forest:"){
549 errorLog << __GRT_LOG__ <<
" Could not find the Forest!" << std::endl;
555 forest.reserve( forestSize );
556 for(UINT i=0; i<forestSize; i++){
560 errorLog << __GRT_LOG__ <<
" Could not find the Tree Header!" << std::endl;
561 std::cout <<
"WORD: " << word << std::endl;
562 std::cout <<
"Tree i: " << i << std::endl;
567 if( treeIndex != i+1 ){
568 errorLog << __GRT_LOG__ <<
" Incorrect tree index: " << treeIndex << std::endl;
573 if(word !=
"TreeNodeType:"){
574 errorLog <<
"load(string filename) - Could not find the TreeNodeType!" << std::endl;
575 std::cout <<
"WORD: " << word << std::endl;
576 std::cout <<
"i: " << i << std::endl;
579 file >> treeNodeType;
585 errorLog <<
"load(fstream &file) - Failed to create new Tree!" << std::endl;
590 tree->setParent( NULL );
591 if( !tree->
load( file ) ){
592 errorLog <<
"load(fstream &file) - Failed to load tree from file!" << std::endl;
597 forest.push_back( tree );
607 errorLog <<
"combineModels( const RandomForests &forest ) - This instance has not been trained!" << std::endl;
612 errorLog <<
"combineModels( const RandomForests &forest ) - This external forest instance has not been trained!" << std::endl;
617 errorLog <<
"combineModels( const RandomForests &forest ) - The number of input dimensions of the external forest (";
618 errorLog << forest.
getNumInputDimensions() <<
") does not match the number of input dimensions of this instance (";
628 this->forest.push_back( dynamic_cast<DecisionTreeNode*>(node->
deepCopy()) );
641 return numRandomSplits;
645 return minNumSamplesPerNode;
657 return removeFeaturesAtEachSplit;
661 return bootstrappedDatasetWeight;
670 if( decisionTreeNode == NULL ){
679 if( !trained || index >= forestSize )
return NULL;
681 return forest[ index ];
690 for(UINT i=0; i<forestSize; i++){
691 if( !forest[i]->computeFeatureWeights( weights ) ){
692 warningLog << __GRT_LOG__ <<
" Failed to compute weights for tree: " << i << std::endl;
700 const Float norm = 1.0 / sum;
701 for(UINT j=0; j<numInputDimensions; j++){
717 for(UINT i=0; i<forestSize; i++){
718 if( !forest[i]->computeLeafNodeWeights( weights ) ){
719 warningLog << __GRT_LOG__ <<
" Failed to compute leaf node weights for tree: " << i << std::endl;
728 sum += weights[i][j];
731 const Float norm = 1.0 / sum;
733 weights[i][j] *= norm;
743 if( forestSize > 0 ){
745 this->forestSize = forestSize;
752 if( numRandomSplits > 0 ){
753 this->numRandomSplits = numRandomSplits;
760 if( minNumSamplesPerNode > 0 ){
761 this->minNumSamplesPerNode = minNumSamplesPerNode;
769 this->maxDepth = maxDepth;
776 this->removeFeaturesAtEachSplit = removeFeaturesAtEachSplit;
780 bool RandomForests::setRemoveFeaturesAtEachSpilt(
const bool removeFeaturesAtEachSpilt){
785 if( trainingMode == Tree::BEST_ITERATIVE_SPILT || trainingMode == Tree::BEST_RANDOM_SPLIT ){
786 this->trainingMode = trainingMode;
789 warningLog << __GRT_LOG__ <<
" Unknown training mode!" << std::endl;
795 if( decisionTreeNode != NULL ){
796 delete decisionTreeNode;
797 decisionTreeNode = NULL;
806 if( bootstrappedDatasetWeight > 0.0 && bootstrappedDatasetWeight <= 1.0 ){
807 this->bootstrappedDatasetWeight = bootstrappedDatasetWeight;
811 warningLog << __GRT_LOG__ <<
" Bad parameter, the weight must be > 0.0 and <= 1.0. Weight: " << bootstrappedDatasetWeight << std::endl;
DecisionTreeNode * getTree(const UINT index) const
bool saveBaseSettingsToFile(std::fstream &file) const
bool setMinNumSamplesPerNode(const UINT minNumSamplesPerNode)
std::string getId() const
virtual bool clear() override
UINT getForestSize() const
virtual ~RandomForests(void)
bool setRemoveFeaturesAtEachSplit(const bool removeFeaturesAtEachSplit)
bool setBootstrappedDatasetWeight(const Float bootstrappedDatasetWeight)
virtual bool save(std::fstream &file) const
virtual UINT getNumClasses() const
bool enableScaling(const bool useScaling)
virtual bool resize(const unsigned int size)
std::string getNodeType() const
bool setRemoveFeaturesAtEachSplit(const bool removeFeaturesAtEachSplit)
bool setTrainingMode(const Tree::TrainingMode trainingMode)
bool setMinNumSamplesPerNode(const UINT minNumSamplesPerNode)
Vector< UINT > getClassLabels() const
virtual Node * deepCopy() const override
bool combineModels(const RandomForests &forest)
signed long getMilliSeconds()
RandomForests & operator=(const RandomForests &rhs)
static std::string getId()
bool setNumRandomSplits(const UINT numSplittingSteps)
bool setAllValues(const T &value)
virtual bool train_(ClassificationData &trainingData)
virtual bool computeAccuracy(const ClassificationData &data, Float &accuracy)
MatrixDouble getLeafNodeFeatureWeights(const bool normWeights=true) const
DecisionTreeNode * deepCopyTree() const
UINT getNumSamples() const
RandomForests(const DecisionTreeNode &decisionTreeNode=DecisionTreeClusterNode(), const UINT forestSize=10, const UINT numRandomSplits=100, const UINT minNumSamplesPerNode=5, const UINT maxDepth=10, const Tree::TrainingMode trainingMode=Tree::BEST_RANDOM_SPLIT, const bool removeFeaturesAtEachSplit=true, const bool useScaling=false, const Float bootstrappedDatasetWeight=0.8)
bool setForestSize(const UINT forestSize)
bool setValidationSetSize(const UINT validationSetSize)
bool setMaxDepth(const UINT maxDepth)
UINT getNumRandomSplits() const
virtual bool deepCopyFrom(const Classifier *classifier)
Float getBootstrappedDatasetWeight() const
bool setMaxDepth(const UINT maxDepth)
bool setNumSplittingSteps(const UINT numSplittingSteps)
bool copyBaseVariables(const Classifier *classifier)
bool loadBaseSettingsFromFile(std::fstream &file)
virtual bool save(std::fstream &file) const override
Float getValidationSetAccuracy() const
unsigned int getNumRows() const
UINT getNumDimensions() const
UINT getNumClasses() const
unsigned int getNumCols() const
UINT getMinNumSamplesPerNode() const
bool setDecisionTreeNode(const DecisionTreeNode &node)
virtual bool predict_(VectorDouble &inputVector)
Vector< MinMax > getRanges() const
VectorDouble getFeatureWeights(const bool normWeights=true) const
const Vector< DecisionTreeNode * > & getForest() const
VectorFloat getValidationSetPrecision() const
bool getRemoveFeaturesAtEachSplit() const
virtual bool load(std::fstream &file)
virtual bool train_(ClassificationData &trainingData) override
static Node * createInstanceFromString(std::string const &nodeType)
virtual bool load(std::fstream &file) override
ClassificationData getBootstrappedDataset(const UINT numSamples=0, const bool balanceDataset=false) const
bool setDecisionTreeNode(const DecisionTreeNode &node)
UINT getNumInputDimensions() const
bool setUseValidationSet(const bool useValidationSet)
bool enableNullRejection(const bool useNullRejection)
bool scale(const Float minTarget, const Float maxTarget)
virtual bool print() const
VectorFloat getValidationSetRecall() const
This is the main base class that all GRT Classification algorithms should inherit from...
static Float sum(const VectorFloat &x)
UINT getTrainingMode() const
DecisionTreeNode * deepCopyDecisionTreeNode() const
bool setTrainingMode(const Tree::TrainingMode trainingMode)