28 DecisionTree::DecisionTree(
const DecisionTreeNode &decisionTreeNode,
const UINT minNumSamplesPerNode,
const UINT maxDepth,
const bool removeFeaturesAtEachSpilt,
const UINT trainingMode,
const UINT numSplittingSteps,
const bool useScaling)
31 this->decisionTreeNode = NULL;
32 this->minNumSamplesPerNode = minNumSamplesPerNode;
33 this->maxDepth = maxDepth;
34 this->removeFeaturesAtEachSpilt = removeFeaturesAtEachSpilt;
35 this->trainingMode = trainingMode;
36 this->numSplittingSteps = numSplittingSteps;
37 this->useScaling = useScaling;
38 this->supportsNullRejection =
true;
39 Classifier::classType =
"DecisionTree";
40 classifierType = Classifier::classType;
41 classifierMode = STANDARD_CLASSIFIER_MODE;
42 Classifier::debugLog.setProceedingText(
"[DEBUG DecisionTree]");
43 Classifier::errorLog.setProceedingText(
"[ERROR DecisionTree]");
44 Classifier::trainingLog.setProceedingText(
"[TRAINING DecisionTree]");
45 Classifier::warningLog.setProceedingText(
"[WARNING DecisionTree]");
47 this->decisionTreeNode = decisionTreeNode.
deepCopy();
53 decisionTreeNode = NULL;
54 Classifier::classType =
"DecisionTree";
55 classifierType = Classifier::classType;
56 classifierMode = STANDARD_CLASSIFIER_MODE;
57 Classifier:: debugLog.setProceedingText(
"[DEBUG DecisionTree]");
58 Classifier::errorLog.setProceedingText(
"[ERROR DecisionTree]");
59 Classifier::trainingLog.setProceedingText(
"[TRAINING DecisionTree]");
60 Classifier::warningLog.setProceedingText(
"[WARNING DecisionTree]");
68 if( decisionTreeNode != NULL ){
69 delete decisionTreeNode;
70 decisionTreeNode = NULL;
85 if( this->decisionTreeNode != NULL ){
86 delete decisionTreeNode;
87 decisionTreeNode = NULL;
91 this->minNumSamplesPerNode = rhs.minNumSamplesPerNode;
92 this->maxDepth = rhs.maxDepth;
93 this->removeFeaturesAtEachSpilt = rhs.removeFeaturesAtEachSpilt;
94 this->trainingMode = rhs.trainingMode;
95 this->numSplittingSteps = rhs.numSplittingSteps;
96 this->nodeClusters = rhs.nodeClusters;
106 if( classifier == NULL )
return false;
121 if( this->decisionTreeNode != NULL ){
122 delete decisionTreeNode;
123 decisionTreeNode = NULL;
127 this->minNumSamplesPerNode = ptr->minNumSamplesPerNode;
128 this->maxDepth = ptr->maxDepth;
129 this->removeFeaturesAtEachSpilt = ptr->removeFeaturesAtEachSpilt;
130 this->trainingMode = ptr->trainingMode;
131 this->numSplittingSteps = ptr->numSplittingSteps;
132 this->nodeClusters = ptr->nodeClusters;
145 if( decisionTreeNode == NULL ){
146 Classifier::errorLog <<
"train_(ClassificationData &trainingData) - The decision tree node has not been set! You must set this first before training a model." << std::endl;
155 Classifier::errorLog <<
"train_(ClassificationData &trainingData) - Training data has zero samples!" << std::endl;
159 numInputDimensions = N;
166 if( useValidationSet ){
167 validationData = trainingData.
partition( validationSetSize );
168 validationSetAccuracy = 0;
169 validationSetPrecision.
resize( useNullRejection ? K+1 : K, 0 );
170 validationSetRecall.
resize( useNullRejection ? K+1 : K, 0 );
176 trainingData.
scale(0, 1);
181 if( useNullRejection ){
182 trainingDataCopy = trainingData;
187 for(UINT i=0; i<N; i++){
193 tree = buildTree( trainingData, NULL, features, classLabels, nodeID );
197 Classifier::errorLog <<
"train_(ClassificationData &trainingData) - Failed to build tree!" << std::endl;
205 if( useNullRejection ){
212 for(UINT i=0; i<M; i++){
214 if( !tree->
predict( trainingDataCopy[i].getSample(), classLikelihoods ) ){
215 Classifier::errorLog <<
"predict_(VectorFloat &inputVector) - Failed to predict!" << std::endl;
221 distances[i] = getNodeDistance(trainingDataCopy[i].getSample(), tree->
getPredictedNodeID() );
223 classCounter[ predictions[i] ]++;
227 classClusterMean.clear();
228 classClusterStdDev.clear();
229 classClusterMean.
resize( numClasses, 0 );
230 classClusterStdDev.
resize( numClasses, 0.01 );
232 for(UINT i=0; i<M; i++){
233 classClusterMean[ predictions[i] ] += distances[ i ];
235 for(UINT k=0; k<numClasses; k++){
236 classClusterMean[k] /= MAX( classCounter[k], 1 );
240 for(UINT i=0; i<M; i++){
241 classClusterStdDev[ predictions[i] ] += MLBase::SQR( distances[ i ] - classClusterMean[ predictions[i] ] );
243 for(UINT k=0; k<numClasses; k++){
244 classClusterStdDev[k] = sqrt( classClusterStdDev[k] / MAX( classCounter[k], 1 ) );
251 if( useValidationSet ){
253 double numCorrect = 0;
256 VectorDouble validationSetPrecisionCounter( validationSetPrecision.size(), 0.0 );
257 VectorDouble validationSetRecallCounter( validationSetRecall.size(), 0.0 );
258 Classifier::trainingLog <<
"Testing model with validation set..." << std::endl;
259 for(UINT i=0; i<numTestSamples; i++){
260 testLabel = validationData[i].getClassLabel();
261 testSample = validationData[i].getSample();
263 if( predictedClassLabel == testLabel ){
272 validationSetAccuracy = (numCorrect / numTestSamples) * 100.0;
273 for(
size_t i=0; i<validationSetPrecision.size(); i++){
274 validationSetPrecision[i] /= validationSetPrecisionCounter[i] > 0 ? validationSetPrecisionCounter[i] : 1;
276 for(
size_t i=0; i<validationSetRecall.size(); i++){
277 validationSetRecall[i] /= validationSetRecallCounter[i] > 0 ? validationSetRecallCounter[i] : 1;
280 Classifier::trainingLog <<
"Validation set accuracy: " << validationSetAccuracy << std::endl;
282 Classifier::trainingLog <<
"Validation set precision: ";
283 for(
size_t i=0; i<validationSetPrecision.size(); i++){
284 Classifier::trainingLog << validationSetPrecision[i] <<
" ";
286 Classifier::trainingLog << std::endl;
288 Classifier::trainingLog <<
"Validation set recall: ";
289 for(
size_t i=0; i<validationSetRecall.size(); i++){
290 Classifier::trainingLog << validationSetRecall[i] <<
" ";
292 Classifier::trainingLog << std::endl;
300 predictedClassLabel = 0;
305 Classifier::errorLog <<
"predict_(VectorFloat &inputVector) - Model Not Trained!" << std::endl;
310 Classifier::errorLog <<
"predict_(VectorFloat &inputVector) - DecisionTree pointer is null!" << std::endl;
314 if( inputVector.
getSize() != numInputDimensions ){
315 Classifier::errorLog <<
"predict_(VectorFloat &inputVector) - The size of the input Vector (" << inputVector.
getSize() <<
") does not match the num features in the model (" << numInputDimensions << std::endl;
321 for(UINT n=0; n<numInputDimensions; n++){
322 inputVector[n] = grt_scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0.0, 1.0);
326 if( classLikelihoods.size() != numClasses ) classLikelihoods.
resize(numClasses,0);
327 if( classDistances.size() != numClasses ) classDistances.
resize(numClasses,0);
330 if( !tree->
predict( inputVector, classLikelihoods ) ){
331 Classifier::errorLog <<
"predict_(VectorFloat &inputVector) - Failed to predict!" << std::endl;
339 for(UINT k=0; k<numClasses; k++){
340 if( classLikelihoods[k] > maxLikelihood ){
341 maxLikelihood = classLikelihoods[k];
347 if( useNullRejection ){
352 if( grt_isnan(leafDistance) ){
353 Classifier::errorLog <<
"predict_(VectorFloat &inputVector) - Failed to match leaf node ID to compute node distance!" << std::endl;
358 std::fill(classDistances.begin(),classDistances.end(),0);
359 classDistances[ maxIndex ] = leafDistance;
362 if( leafDistance <= nullRejectionThresholds[ maxIndex ] ){
363 predictedClassLabel = classLabels[ maxIndex ];
364 }
else predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL;
368 predictedClassLabel = classLabels[ maxIndex ];
380 nodeClusters.clear();
397 Classifier::warningLog <<
"recomputeNullRejectionThresholds() - Failed to recompute null rejection thresholds, the model has not been trained!" << std::endl;
401 if( !useNullRejection ){
402 Classifier::warningLog <<
"recomputeNullRejectionThresholds() - Failed to recompute null rejection thresholds, null rejection is not enabled!" << std::endl;
406 nullRejectionThresholds.
resize( numClasses );
409 for(UINT k=0; k<numClasses; k++){
410 nullRejectionThresholds[k] = classClusterMean[k] + (classClusterStdDev[k]*nullRejectionCoeff);
420 Classifier::errorLog <<
"saveModelToFile(fstream &file) - The file is not open!" << std::endl;
425 file <<
"GRT_DECISION_TREE_MODEL_FILE_V4.0\n";
429 Classifier::errorLog <<
"saveModelToFile(fstream &file) - Failed to save classifier base settings to file!" << std::endl;
433 if( decisionTreeNode != NULL ){
434 file <<
"DecisionTreeNodeType: " << decisionTreeNode->
getNodeType() << std::endl;
436 Classifier::errorLog <<
"saveModelToFile(fstream &file) - Failed to save decisionTreeNode settings to file!" << std::endl;
440 file <<
"DecisionTreeNodeType: " <<
"NULL" << std::endl;
443 file <<
"MinNumSamplesPerNode: " << minNumSamplesPerNode << std::endl;
444 file <<
"MaxDepth: " << maxDepth << std::endl;
445 file <<
"RemoveFeaturesAtEachSpilt: " << removeFeaturesAtEachSpilt << std::endl;
446 file <<
"TrainingMode: " << trainingMode << std::endl;
447 file <<
"NumSplittingSteps: " << numSplittingSteps << std::endl;
448 file <<
"TreeBuilt: " << (tree != NULL ? 1 : 0) << std::endl;
453 Classifier::errorLog <<
"saveModelToFile(fstream &file) - Failed to save tree to file!" << std::endl;
458 if( useNullRejection ){
460 file <<
"ClassClusterMean:";
461 for(UINT k=0; k<numClasses; k++){
462 file <<
" " << classClusterMean[k];
466 file <<
"ClassClusterStdDev:";
467 for(UINT k=0; k<numClasses; k++){
468 file <<
" " << classClusterStdDev[k];
472 file <<
"NumNodes: " << nodeClusters.size() << std::endl;
473 file <<
"NodeClusters:\n";
475 std::map< UINT, VectorFloat >::const_iterator iter = nodeClusters.begin();
477 while( iter != nodeClusters.end() ){
483 for(UINT j=0; j<numInputDimensions; j++){
484 file <<
" " << iter->second[j];
501 if( decisionTreeNode != NULL ){
502 delete decisionTreeNode;
503 decisionTreeNode = NULL;
506 if( !file.is_open() )
508 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not open file to load model" << std::endl;
516 if( word ==
"GRT_DECISION_TREE_MODEL_FILE_V1.0" ){
520 if( word ==
"GRT_DECISION_TREE_MODEL_FILE_V2.0" ){
521 return loadLegacyModelFromFile_v2( file );
524 if( word ==
"GRT_DECISION_TREE_MODEL_FILE_V3.0" ){
525 return loadLegacyModelFromFile_v3( file );
529 if( word !=
"GRT_DECISION_TREE_MODEL_FILE_V4.0" ){
530 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find Model File Header" << std::endl;
536 Classifier::errorLog <<
"loadModelFromFile(string filename) - Failed to load base settings from file!" << std::endl;
541 if(word !=
"DecisionTreeNodeType:"){
542 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the DecisionTreeNodeType!" << std::endl;
547 if( word !=
"NULL" ){
551 if( decisionTreeNode == NULL ){
552 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not create new DecisionTreeNode from type: " << word << std::endl;
557 Classifier::errorLog <<
"loadModelFromFile(fstream &file) - Failed to load decisionTreeNode settings from file!" << std::endl;
561 Classifier::errorLog <<
"loadModelFromFile(fstream &file) - Failed to load decisionTreeNode! DecisionTreeNodeType is NULL!" << std::endl;
566 if(word !=
"MinNumSamplesPerNode:"){
567 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the MinNumSamplesPerNode!" << std::endl;
570 file >> minNumSamplesPerNode;
573 if(word !=
"MaxDepth:"){
574 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the MaxDepth!" << std::endl;
580 if(word !=
"RemoveFeaturesAtEachSpilt:"){
581 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the RemoveFeaturesAtEachSpilt!" << std::endl;
584 file >> removeFeaturesAtEachSpilt;
587 if(word !=
"TrainingMode:"){
588 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the TrainingMode!" << std::endl;
591 file >> trainingMode;
594 if(word !=
"NumSplittingSteps:"){
595 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the NumSplittingSteps!" << std::endl;
598 file >> numSplittingSteps;
601 if(word !=
"TreeBuilt:"){
602 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the TreeBuilt!" << std::endl;
610 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the Tree!" << std::endl;
619 Classifier::errorLog <<
"loadModelFromFile(fstream &file) - Failed to create new DecisionTreeNode!" << std::endl;
623 tree->setParent( NULL );
626 Classifier::errorLog <<
"loadModelFromFile(fstream &file) - Failed to load tree from file!" << std::endl;
631 if( useNullRejection ){
634 classClusterMean.
resize( numClasses );
635 classClusterStdDev.
resize( numClasses );
638 if(word !=
"ClassClusterMean:"){
639 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the ClassClusterMean header!" << std::endl;
642 for(UINT k=0; k<numClasses; k++){
643 file >> classClusterMean[k];
647 if(word !=
"ClassClusterStdDev:"){
648 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the ClassClusterStdDev header!" << std::endl;
651 for(UINT k=0; k<numClasses; k++){
652 file >> classClusterStdDev[k];
656 if(word !=
"NumNodes:"){
657 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the NumNodes header!" << std::endl;
663 if(word !=
"NodeClusters:"){
664 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the NodeClusters header!" << std::endl;
670 for(UINT i=0; i<numNodes; i++){
675 for(UINT j=0; j<numInputDimensions; j++){
680 nodeClusters[ nodeID ] = cluster;
689 bestDistance = DEFAULT_NULL_DISTANCE_VALUE;
691 classDistances.
resize(numClasses,DEFAULT_NULL_DISTANCE_VALUE);
716 if( decisionTreeNode == NULL ){
720 return decisionTreeNode->
deepCopy();
729 if( decisionTreeNode != NULL ){
730 delete decisionTreeNode;
731 decisionTreeNode = NULL;
733 this->decisionTreeNode = node.
deepCopy();
763 VectorFloat classProbs = trainingData.getClassProbabilities( classLabels );
766 node->initNode( parent, depth, nodeID );
769 if( trainingData.
getNumClasses() == 1 || features.size() == 0 || M < minNumSamplesPerNode || depth >= maxDepth ){
775 if( useNullRejection ){
776 nodeClusters[ nodeID ] = trainingData.
getMean();
779 std::string info =
"Reached leaf node.";
780 if( trainingData.
getNumClasses() == 1 ) info =
"Reached pure leaf node.";
781 else if( features.size() == 0 ) info =
"Reached leaf node, no remaining features.";
782 else if( M < minNumSamplesPerNode ) info =
"Reached leaf node, hit min-samples-per-node limit.";
783 else if( depth >= maxDepth ) info =
"Reached leaf node, max depth reached.";
785 Classifier::trainingLog << info <<
" Depth: " << depth <<
" NumSamples: " << trainingData.
getNumSamples();
787 Classifier::trainingLog <<
" Class Probabilities: ";
788 for(UINT k=0; k<classProbs.
getSize(); k++){
789 Classifier::trainingLog << classProbs[k] <<
" ";
791 Classifier::trainingLog << std::endl;
797 UINT featureIndex = 0;
800 if( !node->
computeBestSpilt( trainingMode, numSplittingSteps, trainingData, features, classLabels, featureIndex, minError ) ){
805 Classifier::trainingLog <<
"Depth: " << depth <<
" FeatureIndex: " << featureIndex <<
" MinError: " << minError;
806 Classifier::trainingLog <<
" Class Probabilities: ";
807 for(
size_t k=0; k<classProbs.size(); k++){
808 Classifier::trainingLog << classProbs[k] <<
" ";
810 Classifier::trainingLog << std::endl;
813 if( removeFeaturesAtEachSpilt ){
814 for(
size_t i=0; i<features.size(); i++){
815 if( features[i] == featureIndex ){
816 features.erase( features.begin()+i );
830 for(UINT i=0; i<M; i++){
831 if( node->
predict( trainingData[i].getSample() ) ){
832 rhs.addSample(trainingData[i].getClassLabel(), trainingData[i].getSample());
833 }
else lhs.addSample(trainingData[i].getClassLabel(), trainingData[i].getSample());
837 trainingData.
clear();
840 UINT leftNodeID = ++nodeID;
841 UINT rightNodeID = ++nodeID;
844 node->setLeftChild( buildTree( lhs, node, features, classLabels, leftNodeID ) );
845 node->setRightChild( buildTree( rhs, node, features, classLabels, rightNodeID ) );
848 if( useNullRejection ){
849 nodeClusters[ leftNodeID ] = lhs.getMean();
850 nodeClusters[ rightNodeID ] = rhs.getMean();
856 Float DecisionTree::getNodeDistance(
const VectorFloat &x,
const UINT nodeID ){
859 std::map< UINT,VectorFloat >::iterator iter = nodeClusters.find( nodeID );
862 if( iter == nodeClusters.end() )
return NAN;
865 return getNodeDistance( x, iter->second );
871 const size_t N = x.size();
873 for(
size_t i=0; i<N; i++){
874 distance += MLBase::SQR( x[i] - y[i] );
886 if(word !=
"NumFeatures:"){
887 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find NumFeatures!" << std::endl;
890 file >> numInputDimensions;
893 if(word !=
"NumClasses:"){
894 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find NumClasses!" << std::endl;
900 if(word !=
"UseScaling:"){
901 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find UseScaling!" << std::endl;
907 if(word !=
"UseNullRejection:"){
908 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find UseNullRejection!" << std::endl;
911 file >> useNullRejection;
916 ranges.
resize( numInputDimensions );
919 if(word !=
"Ranges:"){
920 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the Ranges!" << std::endl;
923 for(UINT n=0; n<ranges.size(); n++){
924 file >> ranges[n].minValue;
925 file >> ranges[n].maxValue;
930 if(word !=
"NumSplittingSteps:"){
931 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the NumSplittingSteps!" << std::endl;
934 file >> numSplittingSteps;
937 if(word !=
"MinNumSamplesPerNode:"){
938 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the MinNumSamplesPerNode!" << std::endl;
941 file >> minNumSamplesPerNode;
944 if(word !=
"MaxDepth:"){
945 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the MaxDepth!" << std::endl;
951 if(word !=
"RemoveFeaturesAtEachSpilt:"){
952 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the RemoveFeaturesAtEachSpilt!" << std::endl;
955 file >> removeFeaturesAtEachSpilt;
958 if(word !=
"TrainingMode:"){
959 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the TrainingMode!" << std::endl;
962 file >> trainingMode;
965 if(word !=
"TreeBuilt:"){
966 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the TreeBuilt!" << std::endl;
974 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the Tree!" << std::endl;
983 Classifier::errorLog <<
"loadModelFromFile(fstream &file) - Failed to create new DecisionTreeNode!" << std::endl;
987 tree->setParent( NULL );
990 Classifier::errorLog <<
"loadModelFromFile(fstream &file) - Failed to load tree from file!" << std::endl;
998 bool DecisionTree::loadLegacyModelFromFile_v2( std::fstream &file ){
1004 Classifier::errorLog <<
"loadModelFromFile(string filename) - Failed to load base settings from file!" << std::endl;
1009 if(word !=
"NumSplittingSteps:"){
1010 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the NumSplittingSteps!" << std::endl;
1013 file >> numSplittingSteps;
1016 if(word !=
"MinNumSamplesPerNode:"){
1017 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the MinNumSamplesPerNode!" << std::endl;
1020 file >> minNumSamplesPerNode;
1023 if(word !=
"MaxDepth:"){
1024 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the MaxDepth!" << std::endl;
1030 if(word !=
"RemoveFeaturesAtEachSpilt:"){
1031 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the RemoveFeaturesAtEachSpilt!" << std::endl;
1034 file >> removeFeaturesAtEachSpilt;
1037 if(word !=
"TrainingMode:"){
1038 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the TrainingMode!" << std::endl;
1041 file >> trainingMode;
1044 if(word !=
"TreeBuilt:"){
1045 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the TreeBuilt!" << std::endl;
1052 if(word !=
"Tree:"){
1053 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the Tree!" << std::endl;
1062 Classifier::errorLog <<
"loadModelFromFile(fstream &file) - Failed to create new DecisionTreeNode!" << std::endl;
1066 tree->setParent( NULL );
1069 Classifier::errorLog <<
"loadModelFromFile(fstream &file) - Failed to load tree from file!" << std::endl;
1078 bestDistance = DEFAULT_NULL_DISTANCE_VALUE;
1080 classDistances.
resize(numClasses,DEFAULT_NULL_DISTANCE_VALUE);
1086 bool DecisionTree::loadLegacyModelFromFile_v3( std::fstream &file ){
1092 Classifier::errorLog <<
"loadModelFromFile(string filename) - Failed to load base settings from file!" << std::endl;
1097 if(word !=
"NumSplittingSteps:"){
1098 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the NumSplittingSteps!" << std::endl;
1101 file >> numSplittingSteps;
1104 if(word !=
"MinNumSamplesPerNode:"){
1105 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the MinNumSamplesPerNode!" << std::endl;
1108 file >> minNumSamplesPerNode;
1111 if(word !=
"MaxDepth:"){
1112 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the MaxDepth!" << std::endl;
1118 if(word !=
"RemoveFeaturesAtEachSpilt:"){
1119 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the RemoveFeaturesAtEachSpilt!" << std::endl;
1122 file >> removeFeaturesAtEachSpilt;
1125 if(word !=
"TrainingMode:"){
1126 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the TrainingMode!" << std::endl;
1129 file >> trainingMode;
1132 if(word !=
"TreeBuilt:"){
1133 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the TreeBuilt!" << std::endl;
1140 if(word !=
"Tree:"){
1141 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the Tree!" << std::endl;
1150 Classifier::errorLog <<
"loadModelFromFile(fstream &file) - Failed to create new DecisionTreeNode!" << std::endl;
1154 tree->setParent( NULL );
1157 Classifier::errorLog <<
"loadModelFromFile(fstream &file) - Failed to load tree from file!" << std::endl;
1162 if( useNullRejection ){
1165 classClusterMean.
resize( numClasses );
1166 classClusterStdDev.
resize( numClasses );
1169 if(word !=
"ClassClusterMean:"){
1170 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the ClassClusterMean header!" << std::endl;
1173 for(UINT k=0; k<numClasses; k++){
1174 file >> classClusterMean[k];
1178 if(word !=
"ClassClusterStdDev:"){
1179 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the ClassClusterStdDev header!" << std::endl;
1182 for(UINT k=0; k<numClasses; k++){
1183 file >> classClusterStdDev[k];
1187 if(word !=
"NumNodes:"){
1188 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the NumNodes header!" << std::endl;
1194 if(word !=
"NodeClusters:"){
1195 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the NodeClusters header!" << std::endl;
1201 for(UINT i=0; i<numNodes; i++){
1206 for(UINT j=0; j<numInputDimensions; j++){
1211 nodeClusters[ nodeID ] = cluster;
1220 bestDistance = DEFAULT_NULL_DISTANCE_VALUE;
1222 classDistances.
resize(numClasses,DEFAULT_NULL_DISTANCE_VALUE);
virtual bool saveModelToFile(std::fstream &file) const
bool saveBaseSettingsToFile(std::fstream &file) const
This class implements a basic Decision Tree classifier. Decision Trees are conceptually simple classi...
#define DEFAULT_NULL_LIKELIHOOD_VALUE
virtual bool computeBestSpilt(const UINT &trainingMode, const UINT &numSplittingSteps, const ClassificationData &trainingData, const Vector< UINT > &features, const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError)
std::string getClassifierType() const
const DecisionTreeNode * getTree() const
DecisionTreeNode * deepCopyDecisionTreeNode() const
bool loadLegacyModelFromFile_v1(std::fstream &file)
virtual bool resize(const unsigned int size)
virtual bool train_(ClassificationData &trainingData)
std::string getNodeType() const
virtual bool getModel(std::ostream &stream) const
virtual bool predict_(VectorFloat &inputVector)
UINT getClassLabelIndexValue(UINT classLabel) const
Vector< UINT > getClassLabels() const
DecisionTree(const DecisionTreeNode &decisionTreeNode=DecisionTreeClusterNode(), const UINT minNumSamplesPerNode=5, const UINT maxDepth=10, const bool removeFeaturesAtEachSpilt=false, const UINT trainingMode=BEST_ITERATIVE_SPILT, const UINT numSplittingSteps=100, const bool useScaling=false)
virtual bool deepCopyFrom(const Classifier *classifier)
unsigned int getSize() const
DecisionTreeNode * deepCopyTree() const
virtual bool recomputeNullRejectionThresholds()
UINT getNumSamples() const
virtual Node * deepCopyNode() const
virtual bool saveToFile(std::fstream &file) const
virtual bool loadModelFromFile(std::fstream &file)
virtual bool loadFromFile(std::fstream &file)
bool copyBaseVariables(const Classifier *classifier)
bool loadBaseSettingsFromFile(std::fstream &file)
ClassificationData partition(const UINT partitionPercentage, const bool useStratifiedSampling=false)
UINT getNumDimensions() const
UINT getNumClasses() const
Node * createNewInstance() const
UINT getPredictedNodeID() const
DecisionTree & operator=(const DecisionTree &rhs)
virtual bool predict(const VectorFloat &x, VectorFloat &classLikelihoods)
bool setLeafNode(const UINT nodeSize, const VectorFloat &classProbabilities)
DecisionTreeNode * deepCopy() const
Vector< MinMax > getRanges() const
static Node * createInstanceFromString(std::string const &nodeType)
static unsigned int getMaxIndex(const VectorFloat &x)
bool setDecisionTreeNode(const DecisionTreeNode &node)
virtual ~DecisionTree(void)
bool scale(const Float minTarget, const Float maxTarget)
virtual bool getModel(std::ostream &stream) const
virtual bool predict(const VectorFloat &x)
VectorFloat getMean() const