2 #define GRT_DLL_EXPORTS
11 nodeType =
"DecisionTreeClusterNode";
24 if( x[ featureIndex ] >= threshold )
return true;
42 std::ostringstream stream;
45 std::cout << stream.str();
58 if( featureIndex >= ((UINT)weights.size()) ){
59 warningLog <<
"computeFeatureWeights( VectorFloat &weights ) - Feature index is greater than weights Vector size!" << std::endl;
62 weights[ featureIndex ]++;
82 warningLog <<
"computeFeatureWeights( VectorFloat &weights ) - Feature index is greater than weights Vector size!" << std::endl;
89 warningLog <<
"computeFeatureWeights( VectorFloat &weights ) - The number of rows in the weights matrix does not match the class probabilities Vector size!" << std::endl;
92 for(UINT i=0; i<classProbabilities.
getSize(); i++){
93 weights[ i ][ featureIndex ] += classProbabilities[ i ];
101 warningLog <<
"computeFeatureWeights( VectorFloat &weights ) - The number of rows in the weights matrix does not match the class probabilities Vector size!" << std::endl;
104 for(UINT i=0; i<classProbabilities.
getSize(); i++){
105 weights[ i ][ featureIndex ] += classProbabilities[ i ];
115 std::string tab =
"";
116 for(UINT i=0; i<depth; i++) tab +=
"\t";
118 stream << tab <<
"depth: " << depth;
119 stream <<
" nodeSize: " << nodeSize;
120 stream <<
" featureIndex: " << featureIndex;
121 stream <<
" threshold " << threshold;
122 stream <<
" isLeafNode: " << isLeafNode << std::endl;
124 stream << tab <<
"ClassProbabilities: ";
125 for(UINT i=0; i<classProbabilities.
getSize(); i++){
126 stream << classProbabilities[i] <<
"\t";
130 if( leftChild != NULL ){
131 stream << tab <<
"LeftChild: " << std::endl;
135 if( rightChild != NULL ){
136 stream << tab <<
"RightChild: " << std::endl;
153 node->isLeafNode = isLeafNode;
154 node->nodeID = nodeID;
155 node->predictedNodeID = predictedNodeID;
156 node->nodeSize = nodeSize;
157 node->featureIndex = featureIndex;
158 node->threshold = threshold;
159 node->classProbabilities = classProbabilities;
164 node->leftChild->setParent( node );
170 node->rightChild->setParent( node );
189 this->nodeSize = nodeSize;
190 this->featureIndex = featureIndex;
191 this->threshold = threshold;
192 this->classProbabilities = classProbabilities;
196 bool DecisionTreeClusterNode::computeBestSpiltBestIterativeSpilt(
const UINT &numSplittingSteps,
const ClassificationData &trainingData,
const Vector< UINT > &features,
const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){
198 return computeBestSpilt( numSplittingSteps, trainingData, features, classLabels, featureIndex, minError);
201 bool DecisionTreeClusterNode::computeBestSpiltBestRandomSpilt(
const UINT &numSplittingSteps,
const ClassificationData &trainingData,
const Vector< UINT > &features,
const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){
203 return computeBestSpilt( numSplittingSteps, trainingData, features, classLabels, featureIndex, minError);
209 const UINT N = (UINT)features.size();
210 const UINT K = (UINT)classLabels.size();
212 if( N == 0 )
return false;
216 UINT bestFeatureIndex = 0;
217 Float bestThreshold = 0;
224 UINT numRandomFeatures = numSplittingSteps > N ? N : numSplittingSteps;
228 for(UINT n=0; n<numRandomFeatures; n++){
230 featureIndex = features[ randomFeatures[n] ];
233 for(UINT i=0; i<M; i++){
234 data[i][0] = trainingData[i][featureIndex];
237 if( computeError( trainingData, data, classLabels, ranges, groupIndex, featureIndex, threshold, error ) ){
239 if( error < minError ){
241 bestThreshold = threshold;
242 bestFeatureIndex = featureIndex;
248 featureIndex = bestFeatureIndex;
251 set( M, featureIndex, bestThreshold, trainingData.getClassProbabilities(classLabels) );
262 const UINT K = (UINT)classLabels.size();
264 Float giniIndexL = 0;
265 Float giniIndexR = 0;
274 kmeans.setComputeTheta(
true );
282 if( !kmeans.
train_( data ) ){
283 errorLog <<
"computeSplitError() - Failed to train KMeans model for feature: " << featureIndex << std::endl;
288 const MatrixFloat &clusters = kmeans.getClusters();
291 threshold += clusters[i][0];
296 groupCounter[0] = groupCounter[1] = 0;
297 classProbabilities.setAllValues(0);
298 for(UINT i=0; i<M; i++){
299 groupIndex[i] = trainingData[ i ][ featureIndex ] >= threshold ? 1 : 0;
300 groupCounter[ groupIndex[i] ]++;
301 classProbabilities[ getClassLabelIndexValue(trainingData[i].getClassLabel(),classLabels) ][ groupIndex[i] ]++;
305 for(UINT k=0; k<K; k++){
306 classProbabilities[k][0] = groupCounter[0]>0 ? classProbabilities[k][0]/groupCounter[0] : 0;
307 classProbabilities[k][1] = groupCounter[1]>0 ? classProbabilities[k][1]/groupCounter[1] : 0;
311 giniIndexL = giniIndexR = 0;
312 for(UINT k=0; k<K; k++){
313 giniIndexL += classProbabilities[k][0] * (1.0-classProbabilities[k][0]);
314 giniIndexR += classProbabilities[k][1] * (1.0-classProbabilities[k][1]);
316 weightL = groupCounter[0]/M;
317 weightR = groupCounter[1]/M;
318 error = (giniIndexL*weightL) + (giniIndexR*weightR);
325 if( !file.is_open() )
327 errorLog <<
"saveParametersToFile(fstream &file) - File is not open!" << std::endl;
333 errorLog <<
"saveParametersToFile(fstream &file) - Failed to save DecisionTreeNode parameters to file!" << std::endl;
338 file <<
"FeatureIndex: " << featureIndex << std::endl;
339 file <<
"Threshold: " << threshold << std::endl;
348 errorLog <<
"loadParametersFromFile(fstream &file) - File is not open!" << std::endl;
354 errorLog <<
"loadParametersFromFile(fstream &file) - Failed to load DecisionTreeNode parameters from file!" << std::endl;
361 if( word !=
"FeatureIndex:" ){
362 errorLog <<
"loadParametersFromFile(fstream &file) - Failed to find FeatureIndex header!" << std::endl;
365 file >> featureIndex;
368 if( word !=
"Threshold:" ){
369 errorLog <<
"loadParametersFromFile(fstream &file) - Failed to find Threshold header!" << std::endl;
DecisionTreeClusterNode()
virtual bool saveParametersToFile(std::fstream &file) const
virtual bool loadParametersFromFile(std::fstream &file)
bool setTrainingLoggingEnabled(const bool loggingEnabled)
virtual bool predict(const VectorFloat &x)
virtual bool train_(MatrixFloat &data)
virtual bool getModel(std::ostream &stream) const
bool setMinChange(const Float minChange)
DecisionTreeClusterNode * deepCopy() const
This file implements a DecisionTreeClusterNode, which is a specific type of node used for a DecisionT...
virtual bool computeLeafNodeWeights(MatrixFloat &weights) const
Vector< unsigned int > getRandomSubset(const unsigned int startRange, const unsigned int endRange, const unsigned int subsetSize)
UINT getNumSamples() const
virtual bool computeFeatureWeights(VectorFloat &weights) const
virtual Node * deepCopyNode() const
UINT getFeatureIndex() const
virtual Node * deepCopyNode() const
bool getIsLeafNode() const
unsigned int getNumRows() const
virtual bool computeFeatureWeights(VectorFloat &weights) const
virtual bool getModel(std::ostream &stream) const
unsigned int getNumCols() const
virtual bool saveParametersToFile(std::fstream &file) const
bool setMinNumEpochs(const UINT minNumEpochs)
virtual ~DecisionTreeClusterNode()
bool set(const UINT nodeSize, const UINT featureIndex, const Float threshold, const VectorFloat &classProbabilities)
Vector< MinMax > getRanges() const
Float getThreshold() const
virtual bool print() const
virtual bool computeLeafNodeWeights(MatrixFloat &weights) const
bool setMaxNumEpochs(const UINT maxNumEpochs)
virtual bool loadParametersFromFile(std::fstream &file)
bool setNumClusters(const UINT numClusters)