31 RegressionTree::RegressionTree(
const UINT numSplittingSteps,
const UINT minNumSamplesPerNode,
const UINT maxDepth,
const bool removeFeaturesAtEachSpilt,
const UINT trainingMode,
const bool useScaling,
const Float minRMSErrorPerNode)
34 this->numSplittingSteps = numSplittingSteps;
35 this->minNumSamplesPerNode = minNumSamplesPerNode;
36 this->maxDepth = maxDepth;
37 this->removeFeaturesAtEachSpilt = removeFeaturesAtEachSpilt;
38 this->trainingMode = trainingMode;
39 this->useScaling = useScaling;
41 Regressifier::classType =
"RegressionTree";
42 regressifierType = Regressifier::classType;
43 Regressifier::debugLog.setProceedingText(
"[DEBUG RegressionTree]");
44 Regressifier::errorLog.setProceedingText(
"[ERROR RegressionTree]");
45 Regressifier::trainingLog.setProceedingText(
"[TRAINING RegressionTree]");
46 Regressifier::warningLog.setProceedingText(
"[WARNING RegressionTree]");
52 Regressifier::classType =
"RegressionTree";
53 regressifierType = Regressifier::classType;
54 Regressifier::debugLog.setProceedingText(
"[DEBUG RegressionTree]");
55 Regressifier::errorLog.setProceedingText(
"[ERROR RegressionTree]");
56 Regressifier::trainingLog.setProceedingText(
"[TRAINING RegressionTree]");
57 Regressifier::warningLog.setProceedingText(
"[WARNING RegressionTree]");
76 this->numSplittingSteps = rhs.numSplittingSteps;
77 this->minNumSamplesPerNode = rhs.minNumSamplesPerNode;
78 this->maxDepth = rhs.maxDepth;
79 this->removeFeaturesAtEachSpilt = rhs.removeFeaturesAtEachSpilt;
80 this->trainingMode = rhs.trainingMode;
91 if( regressifier == NULL )
return false;
105 this->numSplittingSteps = ptr->numSplittingSteps;
106 this->minNumSamplesPerNode = ptr->minNumSamplesPerNode;
107 this->maxDepth = ptr->maxDepth;
108 this->removeFeaturesAtEachSpilt = ptr->removeFeaturesAtEachSpilt;
109 this->trainingMode = ptr->trainingMode;
128 Regressifier::errorLog <<
"train_(RegressionData &trainingData) - Training data has zero samples!" << std::endl;
132 numInputDimensions = N;
133 numOutputDimensions = T;
140 trainingData.
scale(0, 1);
145 for(UINT i=0; i<N; i++){
151 tree = buildTree( trainingData, NULL, features, nodeID );
155 Regressifier::errorLog <<
"train_(RegressionData &trainingData) - Failed to build tree!" << std::endl;
168 Regressifier::errorLog <<
"predict_(VectorFloat &inputVector) - Model Not Trained!" << std::endl;
173 Regressifier::errorLog <<
"predict_(VectorFloat &inputVector) - Tree pointer is null!" << std::endl;
177 if( inputVector.size() != numInputDimensions ){
178 Regressifier::errorLog <<
"predict_(VectorFloat &inputVector) - The size of the input Vector (" << inputVector.size() <<
") does not match the num features in the model (" << numInputDimensions << std::endl;
183 for(UINT n=0; n<numInputDimensions; n++){
184 inputVector[n] =
scale(inputVector[n], inputVectorRanges[n].minValue, inputVectorRanges[n].maxValue, 0, 1);
188 if( !tree->
predict( inputVector, regressionData ) ){
189 Regressifier::errorLog <<
"predict_(VectorFloat &inputVector) - Failed to predict!" << std::endl;
212 return tree->
print();
220 Regressifier::errorLog <<
"saveModelToFile(fstream &file) - The file is not open!" << std::endl;
225 file <<
"GRT_REGRESSION_TREE_MODEL_FILE_V1.0\n";
229 Regressifier::errorLog <<
"saveModelToFile(fstream &file) - Failed to save classifier base settings to file!" << std::endl;
233 file <<
"NumSplittingSteps: " << numSplittingSteps << std::endl;
234 file <<
"MinNumSamplesPerNode: " << minNumSamplesPerNode << std::endl;
235 file <<
"MaxDepth: " << maxDepth << std::endl;
236 file <<
"RemoveFeaturesAtEachSpilt: " << removeFeaturesAtEachSpilt << std::endl;
237 file <<
"TrainingMode: " << trainingMode << std::endl;
238 file <<
"TreeBuilt: " << (tree != NULL ? 1 : 0) << std::endl;
243 Regressifier::errorLog <<
"saveModelToFile(fstream &file) - Failed to save tree to file!" << std::endl;
257 Regressifier::errorLog <<
"loadModelFromFile(string filename) - Could not open file to load model" << std::endl;
265 if(word !=
"GRT_REGRESSION_TREE_MODEL_FILE_V1.0"){
266 Regressifier::errorLog <<
"loadModelFromFile(string filename) - Could not find Model File Header" << std::endl;
272 Regressifier::errorLog <<
"loadModelFromFile(string filename) - Failed to load base settings from file!" << std::endl;
277 if(word !=
"NumSplittingSteps:"){
278 Regressifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the NumSplittingSteps!" << std::endl;
281 file >> numSplittingSteps;
284 if(word !=
"MinNumSamplesPerNode:"){
285 Regressifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the MinNumSamplesPerNode!" << std::endl;
288 file >> minNumSamplesPerNode;
291 if(word !=
"MaxDepth:"){
292 Regressifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the MaxDepth!" << std::endl;
298 if(word !=
"RemoveFeaturesAtEachSpilt:"){
299 Regressifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the RemoveFeaturesAtEachSpilt!" << std::endl;
302 file >> removeFeaturesAtEachSpilt;
305 if(word !=
"TrainingMode:"){
306 Regressifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the TrainingMode!" << std::endl;
309 file >> trainingMode;
312 if(word !=
"TreeBuilt:"){
313 Regressifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the TreeBuilt!" << std::endl;
321 Regressifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the Tree!" << std::endl;
330 Regressifier::errorLog <<
"loadModelFromFile(fstream &file) - Failed to create new RegressionTreeNode!" << std::endl;
334 tree->setParent( NULL );
337 Regressifier::errorLog <<
"loadModelFromFile(fstream &file) - Failed to load tree from file!" << std::endl;
393 node->initNode( parent, depth, nodeID );
396 if( features.size() == 0 || M < minNumSamplesPerNode || depth >= maxDepth ){
399 node->setIsLeafNode(
true );
402 computeNodeRegressionData( trainingData, regressionData );
407 Regressifier::trainingLog <<
"Reached leaf node. Depth: " << depth <<
" NumSamples: " << trainingData.
getNumSamples() << std::endl;
413 UINT featureIndex = 0;
416 if( !computeBestSpilt( trainingData, features, featureIndex, threshold, minError ) ){
421 Regressifier::trainingLog <<
"Depth: " << depth <<
" FeatureIndex: " << featureIndex <<
" Threshold: " << threshold <<
" MinError: " << minError << std::endl;
426 computeNodeRegressionData( trainingData, regressionData );
429 node->
set( trainingData.
getNumSamples(), featureIndex, threshold, regressionData );
431 Regressifier::trainingLog <<
"Reached leaf node. Depth: " << depth <<
" NumSamples: " << M << std::endl;
437 node->
set( trainingData.
getNumSamples(), featureIndex, threshold, regressionData );
440 if( removeFeaturesAtEachSpilt ){
441 for(UINT i=0; i<features.
getSize(); i++){
442 if( features[i] == featureIndex ){
443 features.erase( features.begin()+i );
453 for(UINT i=0; i<M; i++){
454 if( node->
predict( trainingData[i].getInputVector() ) ){
455 rhs.addSample(trainingData[i].getInputVector(), trainingData[i].getTargetVector());
456 }
else lhs.addSample(trainingData[i].getInputVector(), trainingData[i].getTargetVector());
460 node->setLeftChild( buildTree( lhs, node, features, nodeID ) );
461 node->setRightChild( buildTree( rhs, node, features, nodeID ) );
466 bool RegressionTree::computeBestSpilt(
const RegressionData &trainingData,
const Vector< UINT > &features, UINT &featureIndex, Float &threshold, Float &minError ){
468 switch( trainingMode ){
469 case BEST_ITERATIVE_SPILT:
470 return computeBestSpiltBestIterativeSpilt( trainingData, features, featureIndex, threshold, minError );
472 case BEST_RANDOM_SPLIT:
476 Regressifier::errorLog <<
"Uknown trainingMode!" << std::endl;
484 bool RegressionTree::computeBestSpiltBestIterativeSpilt(
const RegressionData &trainingData,
const Vector< UINT > &features, UINT &featureIndex, Float &threshold, Float &minError ){
487 const UINT N = (UINT)features.size();
489 if( N == 0 )
return false;
492 UINT bestFeatureIndex = 0;
494 Float bestThreshold = 0;
506 for(UINT n=0; n<N; n++){
507 minRange = ranges[n].minValue;
508 maxRange = ranges[n].maxValue;
509 step = (maxRange-minRange)/Float(numSplittingSteps);
510 threshold = minRange;
511 featureIndex = features[n];
512 while( threshold <= maxRange ){
515 for(UINT i=0; i<M; i++){
516 groupID = trainingData[i].getInputVector()[featureIndex] >= threshold ? 1 : 0;
517 groupIndex[i] = groupID;
518 groupMean[ groupID ] += trainingData[i].getInputVector()[featureIndex];
519 groupCounter[ groupID ]++;
521 groupMean[0] /= groupCounter[0] > 0 ? groupCounter[0] : 1;
522 groupMean[1] /= groupCounter[1] > 0 ? groupCounter[1] : 1;
525 for(UINT i=0; i<M; i++){
526 groupMSE[ groupIndex[i] ] += grt_sqr( groupMean[ groupIndex[i] ] - trainingData[ i ].getInputVector()[features[n]] );
528 groupMSE[0] /= groupCounter[0] > 0 ? groupCounter[0] : 1;
529 groupMSE[1] /= groupCounter[1] > 0 ? groupCounter[1] : 1;
531 error = sqrt( groupMSE[0] + groupMSE[1] );
534 if( error < minError ){
536 bestThreshold = threshold;
537 bestFeatureIndex = featureIndex;
546 featureIndex = bestFeatureIndex;
547 threshold = bestThreshold;
632 Regressifier::errorLog <<
"computeNodeRegressionData(...) - Failed to compute regression data, there are zero training samples!" << std::endl;
637 regressionData.clear();
638 regressionData.
resize( T, 0 );
641 for(
unsigned int j=0; j<N; j++){
642 for(
unsigned int i=0; i<M; i++){
643 regressionData[j] += trainingData[i].getTargetVector()[j];
645 regressionData[j] /= M;
virtual bool loadModelFromFile(std::fstream &file)
Float getMinRMSErrorPerNode() const
Float minRMSErrorPerNode
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
This class implements a basic Regression Tree.
virtual bool train_(RegressionData &trainingData)
Vector< MinMax > getInputRanges() const
virtual bool print() const
const RegressionTreeNode * getTree() const
virtual bool resize(const unsigned int size)
bool copyBaseVariables(const Regressifier *regressifier)
UINT getNumInputDimensions() const
bool set(const UINT nodeSize, const UINT featureIndex, const Float threshold, const VectorFloat ®ressionData)
unsigned int getSize() const
Vector< MinMax > getTargetRanges() const
bool saveBaseSettingsToFile(std::fstream &file) const
bool scale(const Float minTarget, const Float maxTarget)
UINT getNumTargetDimensions() const
virtual bool predict(const VectorFloat &x)
std::string getRegressifierType() const
virtual Node * deepCopyNode() const
bool setMinRMSErrorPerNode(const Float minRMSErrorPerNode)
virtual bool saveModelToFile(std::fstream &file) const
virtual bool saveToFile(std::fstream &file) const
virtual bool deepCopyFrom(const Regressifier *regressifier)
virtual bool loadFromFile(std::fstream &file)
RegressionTree & operator=(const RegressionTree &rhs)
bool loadBaseSettingsFromFile(std::fstream &file)
RegressionTreeNode * deepCopyTree() const
virtual bool predict_(VectorFloat &inputVector)
RegressionTree(const UINT numSplittingSteps=100, const UINT minNumSamplesPerNode=5, const UINT maxDepth=10, const bool removeFeaturesAtEachSpilt=false, const UINT trainingMode=BEST_ITERATIVE_SPILT, const bool useScaling=false, const Float minRMSErrorPerNode=0.01)
virtual ~RegressionTree(void)
virtual bool print() const
virtual bool predict(const VectorFloat &x)
UINT getNumSamples() const