2 #define GRT_DLL_EXPORTS 20 if( x[ featureIndex ] >= threshold )
return true;
38 std::ostringstream stream;
41 std::cout << stream.str();
54 if( featureIndex >= weights.
getSize() ){
55 warningLog << __GRT_LOG__ <<
" Feature index is greater than weights Vector size!" << std::endl;
58 weights[ featureIndex ]++;
78 warningLog << __GRT_LOG__ <<
" Feature index is greater than weights Vector size!" << std::endl;
85 warningLog << __GRT_LOG__ <<
" The number of rows in the weights matrix does not match the class probabilities Vector size!" << std::endl;
88 for(UINT i=0; i<classProbabilities.
getSize(); i++){
89 weights[ i ][ featureIndex ] += classProbabilities[ i ];
97 warningLog << __GRT_LOG__ <<
" The number of rows in the weights matrix does not match the class probabilities Vector size!" << std::endl;
100 for(UINT i=0; i<classProbabilities.
getSize(); i++){
101 weights[ i ][ featureIndex ] += classProbabilities[ i ];
111 std::string tab =
"";
112 for(UINT i=0; i<depth; i++) tab +=
"\t";
114 stream << tab <<
"depth: " << depth;
115 stream <<
" nodeSize: " << nodeSize;
116 stream <<
" featureIndex: " << featureIndex;
117 stream <<
" threshold " << threshold;
118 stream <<
" isLeafNode: " << isLeafNode << std::endl;
120 stream << tab <<
"ClassProbabilities: ";
121 for(UINT i=0; i<classProbabilities.
getSize(); i++){
122 stream << classProbabilities[i] <<
"\t";
126 if( leftChild != NULL ){
127 stream << tab <<
"LeftChild: " << std::endl;
131 if( rightChild != NULL ){
132 stream << tab <<
"RightChild: " << std::endl;
149 node->isLeafNode = isLeafNode;
150 node->nodeID = nodeID;
151 node->predictedNodeID = predictedNodeID;
152 node->nodeSize = nodeSize;
153 node->featureIndex = featureIndex;
154 node->threshold = threshold;
155 node->classProbabilities = classProbabilities;
159 node->leftChild = leftChild->
deepCopy();
160 node->leftChild->setParent( node );
165 node->rightChild = rightChild->
deepCopy();
166 node->rightChild->setParent( node );
169 return dynamic_cast< Node*
>( node );
181 this->nodeSize = nodeSize;
182 this->featureIndex = featureIndex;
183 this->threshold = threshold;
184 this->classProbabilities = classProbabilities;
188 bool DecisionTreeClusterNode::computeBestSplitBestIterativeSplit(
const UINT &numSplittingSteps,
const ClassificationData &trainingData,
const Vector< UINT > &features,
const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){
189 return computeSplit( numSplittingSteps, trainingData, features, classLabels, featureIndex, minError);
192 bool DecisionTreeClusterNode::computeBestSplitBestRandomSplit(
const UINT &numSplittingSteps,
const ClassificationData &trainingData,
const Vector< UINT > &features,
const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){
193 return computeSplit( numSplittingSteps, trainingData, features, classLabels, featureIndex, minError);
199 const UINT N = features.
getSize();
200 const UINT K = classLabels.
getSize();
202 if( N == 0 )
return false;
203 if( K == 0 )
return false;
207 UINT bestFeatureIndex = 0;
208 Float bestThreshold = 0;
215 UINT numRandomFeatures = numSplittingSteps > N ? N : numSplittingSteps;
219 for(UINT n=0; n<numRandomFeatures; n++){
221 featureIndex = features[ randomFeatures[n] ];
224 for(UINT i=0; i<M; i++){
225 data[i][0] = trainingData[i][featureIndex];
228 if( computeError( trainingData, data, classLabels, ranges, groupIndex, featureIndex, threshold, error ) ){
230 if( error < minError ){
232 bestThreshold = threshold;
233 bestFeatureIndex = featureIndex;
239 featureIndex = bestFeatureIndex;
242 set( M, featureIndex, bestThreshold, trainingData.getClassProbabilities(classLabels) );
253 const UINT K = (UINT)classLabels.size();
255 Float giniIndexL = 0;
256 Float giniIndexR = 0;
265 kmeans.setComputeTheta(
true );
273 if( !kmeans.
train_( data ) ){
274 errorLog << __GRT_LOG__ <<
" Failed to train KMeans model for feature: " << featureIndex << std::endl;
279 const MatrixFloat &clusters = kmeans.getClusters();
282 threshold += clusters[i][0];
287 groupCounter[0] = groupCounter[1] = 0;
289 for(UINT i=0; i<M; i++){
290 groupIndex[i] = trainingData[ i ][ featureIndex ] >= threshold ? 1 : 0;
291 groupCounter[ groupIndex[i] ]++;
292 classProbabilities[ getClassLabelIndexValue(trainingData[i].getClassLabel(),classLabels) ][ groupIndex[i] ]++;
296 for(UINT k=0; k<K; k++){
297 classProbabilities[k][0] = groupCounter[0]>0 ? classProbabilities[k][0]/groupCounter[0] : 0;
298 classProbabilities[k][1] = groupCounter[1]>0 ? classProbabilities[k][1]/groupCounter[1] : 0;
302 giniIndexL = giniIndexR = 0;
303 for(UINT k=0; k<K; k++){
304 giniIndexL += classProbabilities[k][0] * (1.0-classProbabilities[k][0]);
305 giniIndexR += classProbabilities[k][1] * (1.0-classProbabilities[k][1]);
307 weightL = groupCounter[0]/M;
308 weightR = groupCounter[1]/M;
309 error = (giniIndexL*weightL) + (giniIndexR*weightR);
316 if( !file.is_open() )
318 errorLog << __GRT_LOG__ <<
" File is not open!" << std::endl;
324 errorLog << __GRT_LOG__ <<
" Failed to save DecisionTreeNode parameters to file!" << std::endl;
329 file <<
"FeatureIndex: " << featureIndex << std::endl;
330 file <<
"Threshold: " << threshold << std::endl;
339 errorLog << __GRT_LOG__ <<
" File is not open!" << std::endl;
345 errorLog << __GRT_LOG__ <<
" Failed to load DecisionTreeNode parameters from file!" << std::endl;
352 if( word !=
"FeatureIndex:" ){
353 errorLog << __GRT_LOG__ <<
" Failed to find FeatureIndex header!" << std::endl;
356 file >> featureIndex;
359 if( word !=
"Threshold:" ){
360 errorLog << __GRT_LOG__ <<
" Failed to find Threshold header!" << std::endl;
virtual bool loadParametersFromFile(std::fstream &file) override
DecisionTreeClusterNode()
virtual Node * deepCopy() const override
virtual bool clear() override
virtual bool getModel(std::ostream &stream) const override
bool setTrainingLoggingEnabled(const bool loggingEnabled)
This file contains the Random class, a useful wrapper for generating cross platform random functions...
virtual bool train_(MatrixFloat &data)
virtual bool predict_(VectorFloat &x) override
virtual bool loadParametersFromFile(std::fstream &file) override
bool setMinChange(const Float minChange)
virtual bool saveParametersToFile(std::fstream &file) const override
bool setAllValues(const T &value)
Vector< unsigned int > getRandomSubset(const unsigned int startRange, const unsigned int endRange, const unsigned int subsetSize)
UINT getNumSamples() const
UINT getFeatureIndex() const
bool getIsLeafNode() const
virtual bool print() const override
unsigned int getNumRows() const
virtual bool computeFeatureWeights(VectorFloat &weights) const
unsigned int getNumCols() const
bool setMinNumEpochs(const UINT minNumEpochs)
virtual ~DecisionTreeClusterNode()
bool set(const UINT nodeSize, const UINT featureIndex, const Float threshold, const VectorFloat &classProbabilities)
virtual bool getModel(std::ostream &stream) const override
Vector< MinMax > getRanges() const
Float getThreshold() const
virtual bool computeLeafNodeWeights(MatrixFloat &weights) const override
virtual bool computeLeafNodeWeights(MatrixFloat &weights) const
virtual bool clear() override
bool setMaxNumEpochs(const UINT maxNumEpochs)
virtual bool computeFeatureWeights(VectorFloat &weights) const override
bool setNumClusters(const UINT numClusters)
virtual bool saveParametersToFile(std::fstream &file) const override
virtual Node * deepCopy() const