10 nodeType =
"DecisionTreeClusterNode";
23 if( x[ featureIndex ] >= threshold )
return true;
41 std::ostringstream stream;
44 std::cout << stream.str();
57 if( featureIndex >= ((UINT)weights.size()) ){
58 warningLog <<
"computeFeatureWeights( VectorFloat &weights ) - Feature index is greater than weights Vector size!" << std::endl;
61 weights[ featureIndex ]++;
81 warningLog <<
"computeFeatureWeights( VectorFloat &weights ) - Feature index is greater than weights Vector size!" << std::endl;
88 warningLog <<
"computeFeatureWeights( VectorFloat &weights ) - The number of rows in the weights matrix does not match the class probabilities Vector size!" << std::endl;
91 for(UINT i=0; i<classProbabilities.
getSize(); i++){
92 weights[ i ][ featureIndex ] += classProbabilities[ i ];
100 warningLog <<
"computeFeatureWeights( VectorFloat &weights ) - The number of rows in the weights matrix does not match the class probabilities Vector size!" << std::endl;
103 for(UINT i=0; i<classProbabilities.
getSize(); i++){
104 weights[ i ][ featureIndex ] += classProbabilities[ i ];
114 std::string tab =
"";
115 for(UINT i=0; i<depth; i++) tab +=
"\t";
117 stream << tab <<
"depth: " << depth;
118 stream <<
" nodeSize: " << nodeSize;
119 stream <<
" featureIndex: " << featureIndex;
120 stream <<
" threshold " << threshold;
121 stream <<
" isLeafNode: " << isLeafNode << std::endl;
123 stream << tab <<
"ClassProbabilities: ";
124 for(UINT i=0; i<classProbabilities.
getSize(); i++){
125 stream << classProbabilities[i] <<
"\t";
129 if( leftChild != NULL ){
130 stream << tab <<
"LeftChild: " << std::endl;
134 if( rightChild != NULL ){
135 stream << tab <<
"RightChild: " << std::endl;
152 node->isLeafNode = isLeafNode;
153 node->nodeID = nodeID;
154 node->predictedNodeID = predictedNodeID;
155 node->nodeSize = nodeSize;
156 node->featureIndex = featureIndex;
157 node->threshold = threshold;
158 node->classProbabilities = classProbabilities;
163 node->leftChild->setParent( node );
169 node->rightChild->setParent( node );
188 this->nodeSize = nodeSize;
189 this->featureIndex = featureIndex;
190 this->threshold = threshold;
191 this->classProbabilities = classProbabilities;
195 bool DecisionTreeClusterNode::computeBestSpiltBestIterativeSpilt(
const UINT &numSplittingSteps,
const ClassificationData &trainingData,
const Vector< UINT > &features,
const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){
197 return computeBestSpilt( numSplittingSteps, trainingData, features, classLabels, featureIndex, minError);
200 bool DecisionTreeClusterNode::computeBestSpiltBestRandomSpilt(
const UINT &numSplittingSteps,
const ClassificationData &trainingData,
const Vector< UINT > &features,
const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){
202 return computeBestSpilt( numSplittingSteps, trainingData, features, classLabels, featureIndex, minError);
208 const UINT N = (UINT)features.size();
209 const UINT K = (UINT)classLabels.size();
211 if( N == 0 )
return false;
215 UINT bestFeatureIndex = 0;
216 Float bestThreshold = 0;
223 UINT numRandomFeatures = numSplittingSteps > N ? N : numSplittingSteps;
227 for(UINT n=0; n<numRandomFeatures; n++){
229 featureIndex = features[ randomFeatures[n] ];
232 for(UINT i=0; i<M; i++){
233 data[i][0] = trainingData[i][featureIndex];
236 if( computeError( trainingData, data, classLabels, ranges, groupIndex, featureIndex, threshold, error ) ){
238 if( error < minError ){
240 bestThreshold = threshold;
241 bestFeatureIndex = featureIndex;
316 featureIndex = bestFeatureIndex;
319 set( M, featureIndex, bestThreshold, trainingData.getClassProbabilities(classLabels) );
330 const UINT K = (UINT)classLabels.size();
332 Float giniIndexL = 0;
333 Float giniIndexR = 0;
342 kmeans.setComputeTheta(
true );
350 if( !kmeans.
train_( data ) ){
351 errorLog <<
"computeSplitError() - Failed to train KMeans model for feature: " << featureIndex << std::endl;
356 const MatrixFloat &clusters = kmeans.getClusters();
359 threshold += clusters[i][0];
364 groupCounter[0] = groupCounter[1] = 0;
365 classProbabilities.setAllValues(0);
366 for(UINT i=0; i<M; i++){
367 groupIndex[i] = trainingData[ i ][ featureIndex ] >= threshold ? 1 : 0;
368 groupCounter[ groupIndex[i] ]++;
369 classProbabilities[ getClassLabelIndexValue(trainingData[i].getClassLabel(),classLabels) ][ groupIndex[i] ]++;
373 for(UINT k=0; k<K; k++){
374 classProbabilities[k][0] = groupCounter[0]>0 ? classProbabilities[k][0]/groupCounter[0] : 0;
375 classProbabilities[k][1] = groupCounter[1]>0 ? classProbabilities[k][1]/groupCounter[1] : 0;
379 giniIndexL = giniIndexR = 0;
380 for(UINT k=0; k<K; k++){
381 giniIndexL += classProbabilities[k][0] * (1.0-classProbabilities[k][0]);
382 giniIndexR += classProbabilities[k][1] * (1.0-classProbabilities[k][1]);
384 weightL = groupCounter[0]/M;
385 weightR = groupCounter[1]/M;
386 error = (giniIndexL*weightL) + (giniIndexR*weightR);
393 if( !file.is_open() )
395 errorLog <<
"saveParametersToFile(fstream &file) - File is not open!" << std::endl;
401 errorLog <<
"saveParametersToFile(fstream &file) - Failed to save DecisionTreeNode parameters to file!" << std::endl;
406 file <<
"FeatureIndex: " << featureIndex << std::endl;
407 file <<
"Threshold: " << threshold << std::endl;
416 errorLog <<
"loadParametersFromFile(fstream &file) - File is not open!" << std::endl;
422 errorLog <<
"loadParametersFromFile(fstream &file) - Failed to load DecisionTreeNode parameters from file!" << std::endl;
429 if( word !=
"FeatureIndex:" ){
430 errorLog <<
"loadParametersFromFile(fstream &file) - Failed to find FeatureIndex header!" << std::endl;
433 file >> featureIndex;
436 if( word !=
"Threshold:" ){
437 errorLog <<
"loadParametersFromFile(fstream &file) - Failed to find Threshold header!" << std::endl;
DecisionTreeClusterNode()
virtual bool saveParametersToFile(std::fstream &file) const
virtual bool loadParametersFromFile(std::fstream &file)
bool setTrainingLoggingEnabled(const bool loggingEnabled)
virtual bool predict(const VectorFloat &x)
virtual bool train_(MatrixFloat &data)
virtual bool getModel(std::ostream &stream) const
bool setMinChange(const Float minChange)
DecisionTreeClusterNode * deepCopy() const
This file implements a DecisionTreeClusterNode, which is a specific type of node used for a DecisionT...
unsigned int getSize() const
virtual bool computeLeafNodeWeights(MatrixFloat &weights) const
Vector< unsigned int > getRandomSubset(const unsigned int startRange, const unsigned int endRange, const unsigned int subsetSize)
UINT getNumSamples() const
virtual bool computeFeatureWeights(VectorFloat &weights) const
virtual Node * deepCopyNode() const
UINT getFeatureIndex() const
virtual Node * deepCopyNode() const
bool getIsLeafNode() const
unsigned int getNumRows() const
virtual bool computeFeatureWeights(VectorFloat &weights) const
virtual bool getModel(std::ostream &stream) const
unsigned int getNumCols() const
virtual bool saveParametersToFile(std::fstream &file) const
bool setMinNumEpochs(const UINT minNumEpochs)
virtual ~DecisionTreeClusterNode()
bool set(const UINT nodeSize, const UINT featureIndex, const Float threshold, const VectorFloat &classProbabilities)
Vector< MinMax > getRanges() const
Float getThreshold() const
virtual bool print() const
virtual bool computeLeafNodeWeights(MatrixFloat &weights) const
bool setMaxNumEpochs(const UINT maxNumEpochs)
virtual bool loadParametersFromFile(std::fstream &file)
bool setNumClusters(const UINT numClusters)