21 #define GRT_DLL_EXPORTS
32 RegressionTree::RegressionTree(
const UINT numSplittingSteps,
const UINT minNumSamplesPerNode,
const UINT maxDepth,
const bool removeFeaturesAtEachSpilt,
const UINT trainingMode,
const bool useScaling,
const Float minRMSErrorPerNode)
35 this->numSplittingSteps = numSplittingSteps;
36 this->minNumSamplesPerNode = minNumSamplesPerNode;
37 this->maxDepth = maxDepth;
38 this->removeFeaturesAtEachSpilt = removeFeaturesAtEachSpilt;
39 this->trainingMode = trainingMode;
40 this->useScaling = useScaling;
42 Regressifier::classType =
"RegressionTree";
43 regressifierType = Regressifier::classType;
44 Regressifier::debugLog.setProceedingText(
"[DEBUG RegressionTree]");
45 Regressifier::errorLog.setProceedingText(
"[ERROR RegressionTree]");
46 Regressifier::trainingLog.setProceedingText(
"[TRAINING RegressionTree]");
47 Regressifier::warningLog.setProceedingText(
"[WARNING RegressionTree]");
53 Regressifier::classType =
"RegressionTree";
54 regressifierType = Regressifier::classType;
55 Regressifier::debugLog.setProceedingText(
"[DEBUG RegressionTree]");
56 Regressifier::errorLog.setProceedingText(
"[ERROR RegressionTree]");
57 Regressifier::trainingLog.setProceedingText(
"[TRAINING RegressionTree]");
58 Regressifier::warningLog.setProceedingText(
"[WARNING RegressionTree]");
77 this->numSplittingSteps = rhs.numSplittingSteps;
78 this->minNumSamplesPerNode = rhs.minNumSamplesPerNode;
79 this->maxDepth = rhs.maxDepth;
80 this->removeFeaturesAtEachSpilt = rhs.removeFeaturesAtEachSpilt;
81 this->trainingMode = rhs.trainingMode;
92 if( regressifier == NULL )
return false;
106 this->numSplittingSteps = ptr->numSplittingSteps;
107 this->minNumSamplesPerNode = ptr->minNumSamplesPerNode;
108 this->maxDepth = ptr->maxDepth;
109 this->removeFeaturesAtEachSpilt = ptr->removeFeaturesAtEachSpilt;
110 this->trainingMode = ptr->trainingMode;
129 Regressifier::errorLog <<
"train_(RegressionData &trainingData) - Training data has zero samples!" << std::endl;
133 numInputDimensions = N;
134 numOutputDimensions = T;
141 trainingData.
scale(0, 1);
146 for(UINT i=0; i<N; i++){
152 tree = buildTree( trainingData, NULL, features, nodeID );
156 Regressifier::errorLog <<
"train_(RegressionData &trainingData) - Failed to build tree!" << std::endl;
169 Regressifier::errorLog <<
"predict_(VectorFloat &inputVector) - Model Not Trained!" << std::endl;
174 Regressifier::errorLog <<
"predict_(VectorFloat &inputVector) - Tree pointer is null!" << std::endl;
178 if( inputVector.size() != numInputDimensions ){
179 Regressifier::errorLog <<
"predict_(VectorFloat &inputVector) - The size of the input Vector (" << inputVector.size() <<
") does not match the num features in the model (" << numInputDimensions << std::endl;
184 for(UINT n=0; n<numInputDimensions; n++){
185 inputVector[n] =
scale(inputVector[n], inputVectorRanges[n].minValue, inputVectorRanges[n].maxValue, 0, 1);
189 if( !tree->
predict( inputVector, regressionData ) ){
190 Regressifier::errorLog <<
"predict_(VectorFloat &inputVector) - Failed to predict!" << std::endl;
213 return tree->
print();
221 Regressifier::errorLog <<
"save(fstream &file) - The file is not open!" << std::endl;
226 file <<
"GRT_REGRESSION_TREE_MODEL_FILE_V1.0\n";
230 Regressifier::errorLog <<
"save(fstream &file) - Failed to save classifier base settings to file!" << std::endl;
234 file <<
"NumSplittingSteps: " << numSplittingSteps << std::endl;
235 file <<
"MinNumSamplesPerNode: " << minNumSamplesPerNode << std::endl;
236 file <<
"MaxDepth: " << maxDepth << std::endl;
237 file <<
"RemoveFeaturesAtEachSpilt: " << removeFeaturesAtEachSpilt << std::endl;
238 file <<
"TrainingMode: " << trainingMode << std::endl;
239 file <<
"TreeBuilt: " << (tree != NULL ? 1 : 0) << std::endl;
243 if( !tree->
save( file ) ){
244 Regressifier::errorLog <<
"save(fstream &file) - Failed to save tree to file!" << std::endl;
258 Regressifier::errorLog <<
"load(string filename) - Could not open file to load model" << std::endl;
266 if(word !=
"GRT_REGRESSION_TREE_MODEL_FILE_V1.0"){
267 Regressifier::errorLog <<
"load(string filename) - Could not find Model File Header" << std::endl;
273 Regressifier::errorLog <<
"load(string filename) - Failed to load base settings from file!" << std::endl;
278 if(word !=
"NumSplittingSteps:"){
279 Regressifier::errorLog <<
"load(string filename) - Could not find the NumSplittingSteps!" << std::endl;
282 file >> numSplittingSteps;
285 if(word !=
"MinNumSamplesPerNode:"){
286 Regressifier::errorLog <<
"load(string filename) - Could not find the MinNumSamplesPerNode!" << std::endl;
289 file >> minNumSamplesPerNode;
292 if(word !=
"MaxDepth:"){
293 Regressifier::errorLog <<
"load(string filename) - Could not find the MaxDepth!" << std::endl;
299 if(word !=
"RemoveFeaturesAtEachSpilt:"){
300 Regressifier::errorLog <<
"load(string filename) - Could not find the RemoveFeaturesAtEachSpilt!" << std::endl;
303 file >> removeFeaturesAtEachSpilt;
306 if(word !=
"TrainingMode:"){
307 Regressifier::errorLog <<
"load(string filename) - Could not find the TrainingMode!" << std::endl;
310 file >> trainingMode;
313 if(word !=
"TreeBuilt:"){
314 Regressifier::errorLog <<
"load(string filename) - Could not find the TreeBuilt!" << std::endl;
322 Regressifier::errorLog <<
"load(string filename) - Could not find the Tree!" << std::endl;
331 Regressifier::errorLog <<
"load(fstream &file) - Failed to create new RegressionTreeNode!" << std::endl;
335 tree->setParent( NULL );
336 if( !tree->
load( file ) ){
338 Regressifier::errorLog <<
"load(fstream &file) - Failed to load tree from file!" << std::endl;
394 node->initNode( parent, depth, nodeID );
397 if( features.size() == 0 || M < minNumSamplesPerNode || depth >= maxDepth ){
400 node->setIsLeafNode(
true );
403 computeNodeRegressionData( trainingData, regressionData );
408 Regressifier::trainingLog <<
"Reached leaf node. Depth: " << depth <<
" NumSamples: " << trainingData.
getNumSamples() << std::endl;
414 UINT featureIndex = 0;
417 if( !computeBestSpilt( trainingData, features, featureIndex, threshold, minError ) ){
422 Regressifier::trainingLog <<
"Depth: " << depth <<
" FeatureIndex: " << featureIndex <<
" Threshold: " << threshold <<
" MinError: " << minError << std::endl;
427 computeNodeRegressionData( trainingData, regressionData );
430 node->
set( trainingData.
getNumSamples(), featureIndex, threshold, regressionData );
432 Regressifier::trainingLog <<
"Reached leaf node. Depth: " << depth <<
" NumSamples: " << M << std::endl;
438 node->
set( trainingData.
getNumSamples(), featureIndex, threshold, regressionData );
441 if( removeFeaturesAtEachSpilt ){
442 for(UINT i=0; i<features.
getSize(); i++){
443 if( features[i] == featureIndex ){
444 features.erase( features.begin()+i );
454 for(UINT i=0; i<M; i++){
455 if( node->
predict( trainingData[i].getInputVector() ) ){
456 rhs.addSample(trainingData[i].getInputVector(), trainingData[i].getTargetVector());
457 }
else lhs.addSample(trainingData[i].getInputVector(), trainingData[i].getTargetVector());
461 node->setLeftChild( buildTree( lhs, node, features, nodeID ) );
462 node->setRightChild( buildTree( rhs, node, features, nodeID ) );
467 bool RegressionTree::computeBestSpilt(
const RegressionData &trainingData,
const Vector< UINT > &features, UINT &featureIndex, Float &threshold, Float &minError ){
469 switch( trainingMode ){
470 case BEST_ITERATIVE_SPILT:
471 return computeBestSpiltBestIterativeSpilt( trainingData, features, featureIndex, threshold, minError );
473 case BEST_RANDOM_SPLIT:
477 Regressifier::errorLog <<
"Uknown trainingMode!" << std::endl;
485 bool RegressionTree::computeBestSpiltBestIterativeSpilt(
const RegressionData &trainingData,
const Vector< UINT > &features, UINT &featureIndex, Float &threshold, Float &minError ){
488 const UINT N = (UINT)features.size();
490 if( N == 0 )
return false;
493 UINT bestFeatureIndex = 0;
495 Float bestThreshold = 0;
507 for(UINT n=0; n<N; n++){
508 minRange = ranges[n].minValue;
509 maxRange = ranges[n].maxValue;
510 step = (maxRange-minRange)/Float(numSplittingSteps);
511 threshold = minRange;
512 featureIndex = features[n];
513 while( threshold <= maxRange ){
516 for(UINT i=0; i<M; i++){
517 groupID = trainingData[i].getInputVector()[featureIndex] >= threshold ? 1 : 0;
518 groupIndex[i] = groupID;
519 groupMean[ groupID ] += trainingData[i].getInputVector()[featureIndex];
520 groupCounter[ groupID ]++;
522 groupMean[0] /= groupCounter[0] > 0 ? groupCounter[0] : 1;
523 groupMean[1] /= groupCounter[1] > 0 ? groupCounter[1] : 1;
526 for(UINT i=0; i<M; i++){
527 groupMSE[ groupIndex[i] ] += grt_sqr( groupMean[ groupIndex[i] ] - trainingData[ i ].getInputVector()[features[n]] );
529 groupMSE[0] /= groupCounter[0] > 0 ? groupCounter[0] : 1;
530 groupMSE[1] /= groupCounter[1] > 0 ? groupCounter[1] : 1;
532 error = sqrt( groupMSE[0] + groupMSE[1] );
535 if( error < minError ){
537 bestThreshold = threshold;
538 bestFeatureIndex = featureIndex;
547 featureIndex = bestFeatureIndex;
548 threshold = bestThreshold;
633 Regressifier::errorLog <<
"computeNodeRegressionData(...) - Failed to compute regression data, there are zero training samples!" << std::endl;
638 regressionData.clear();
639 regressionData.
resize( T, 0 );
642 for(
unsigned int j=0; j<N; j++){
643 for(
unsigned int i=0; i<M; i++){
644 regressionData[j] += trainingData[i].getTargetVector()[j];
646 regressionData[j] /= M;
Float getMinRMSErrorPerNode() const
Float minRMSErrorPerNode
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
This class implements a basic Regression Tree.
virtual bool train_(RegressionData &trainingData)
Vector< MinMax > getInputRanges() const
virtual bool print() const
const RegressionTreeNode * getTree() const
virtual bool resize(const unsigned int size)
virtual bool save(std::fstream &file) const
bool copyBaseVariables(const Regressifier *regressifier)
UINT getNumInputDimensions() const
bool set(const UINT nodeSize, const UINT featureIndex, const Float threshold, const VectorFloat ®ressionData)
virtual bool load(std::fstream &file)
Vector< MinMax > getTargetRanges() const
bool saveBaseSettingsToFile(std::fstream &file) const
bool scale(const Float minTarget, const Float maxTarget)
virtual bool save(std::fstream &file) const
UINT getNumTargetDimensions() const
virtual bool predict(const VectorFloat &x)
std::string getRegressifierType() const
virtual Node * deepCopyNode() const
bool setMinRMSErrorPerNode(const Float minRMSErrorPerNode)
virtual bool deepCopyFrom(const Regressifier *regressifier)
RegressionTree & operator=(const RegressionTree &rhs)
bool loadBaseSettingsFromFile(std::fstream &file)
RegressionTreeNode * deepCopyTree() const
virtual bool predict_(VectorFloat &inputVector)
RegressionTree(const UINT numSplittingSteps=100, const UINT minNumSamplesPerNode=5, const UINT maxDepth=10, const bool removeFeaturesAtEachSpilt=false, const UINT trainingMode=BEST_ITERATIVE_SPILT, const bool useScaling=false, const Float minRMSErrorPerNode=0.01)
virtual ~RegressionTree(void)
virtual bool load(std::fstream &file)
virtual bool print() const
virtual bool predict(const VectorFloat &x)
UINT getNumSamples() const