GestureRecognitionToolkit  Version: 0.2.5
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
DecisionTreeClusterNode.cpp
1 
2 #define GRT_DLL_EXPORTS
4 
5 GRT_BEGIN_NAMESPACE
6 
7 //Register the DecisionTreeClusterNode module with the Node base class
8 RegisterNode< DecisionTreeClusterNode > DecisionTreeClusterNode::registerModule("DecisionTreeClusterNode");
9 
11  clear();
12 }
13 
15  clear();
16 }
17 
19 
20  if( x[ featureIndex ] >= threshold ) return true;
21 
22  return false;
23 }
24 
26 
27  //Call the base class clear function
29 
30  featureIndex = 0;
31  threshold = 0;
32 
33  return true;
34 }
35 
37 
38  std::ostringstream stream;
39 
40  if( getModel( stream ) ){
41  std::cout << stream.str();
42  return true;
43  }
44 
45  return false;
46 }
47 
49 
50  if( isLeafNode ){ //If we reach a leaf node, no weight update needed
51  return true;
52  }
53 
54  if( featureIndex >= weights.getSize() ){ //Feature index is out of bounds
55  warningLog << __GRT_LOG__ << " Feature index is greater than weights Vector size!" << std::endl;
56  return false;
57  }else{
58  weights[ featureIndex ]++;
59  }
60 
61  if( leftChild ){ //Recursively compute the weights for the left child
62  leftChild->computeFeatureWeights( weights );
63  }
64  if( rightChild ){ //Recursively compute the weights for the right child
65  rightChild->computeFeatureWeights( weights );
66  }
67 
68  return true;
69 }
70 
72 
73  if( isLeafNode ){ //If we reach a leaf node, there is nothing to do
74  return true;
75  }
76 
77  if( featureIndex >= weights.getNumCols() ){ //Feature index is out of bounds
78  warningLog << __GRT_LOG__ << " Feature index is greater than weights Vector size!" << std::endl;
79  return false;
80  }
81 
82  if( leftChild ){ //Recursively compute the weights for the left child until we reach the node above a leaf node
83  if( leftChild->getIsLeafNode() ){
84  if( classProbabilities.getSize() != weights.getNumRows() ){
85  warningLog << __GRT_LOG__ << " The number of rows in the weights matrix does not match the class probabilities Vector size!" << std::endl;
86  return false;
87  }
88  for(UINT i=0; i<classProbabilities.getSize(); i++){
89  weights[ i ][ featureIndex ] += classProbabilities[ i ];
90  }
91 
92  } leftChild->computeLeafNodeWeights( weights );
93  }
94  if( rightChild ){ //Recursively compute the weights for the right child until we reach the node above a leaf node
95  if( rightChild->getIsLeafNode() ){
96  if( classProbabilities.getSize() != weights.getNumRows() ){
97  warningLog << __GRT_LOG__ << " The number of rows in the weights matrix does not match the class probabilities Vector size!" << std::endl;
98  return false;
99  }
100  for(UINT i=0; i<classProbabilities.getSize(); i++){
101  weights[ i ][ featureIndex ] += classProbabilities[ i ];
102  }
103  } rightChild->computeLeafNodeWeights( weights );
104  }
105 
106  return true;
107 }
108 
109 bool DecisionTreeClusterNode::getModel( std::ostream &stream ) const{
110 
111  std::string tab = "";
112  for(UINT i=0; i<depth; i++) tab += "\t";
113 
114  stream << tab << "depth: " << depth;
115  stream << " nodeSize: " << nodeSize;
116  stream << " featureIndex: " << featureIndex;
117  stream << " threshold " << threshold;
118  stream << " isLeafNode: " << isLeafNode << std::endl;
119 
120  stream << tab << "ClassProbabilities: ";
121  for(UINT i=0; i<classProbabilities.getSize(); i++){
122  stream << classProbabilities[i] << "\t";
123  }
124  stream << std::endl;
125 
126  if( leftChild != NULL ){
127  stream << tab << "LeftChild: " << std::endl;
128  leftChild->getModel( stream );
129  }
130 
131  if( rightChild != NULL ){
132  stream << tab << "RightChild: " << std::endl;
133  rightChild->getModel( stream );
134  }
135 
136  return true;
137 }
138 
140 
142 
143  if( node == NULL ){
144  return NULL;
145  }
146 
147  //Copy this node into the node
148  node->depth = depth;
149  node->isLeafNode = isLeafNode;
150  node->nodeID = nodeID;
151  node->predictedNodeID = predictedNodeID;
152  node->nodeSize = nodeSize;
153  node->featureIndex = featureIndex;
154  node->threshold = threshold;
155  node->classProbabilities = classProbabilities;
156 
157  //Recursively deep copy the left child
158  if( leftChild ){
159  node->leftChild = leftChild->deepCopy();
160  node->leftChild->setParent( node );
161  }
162 
163  //Recursively deep copy the right child
164  if( rightChild ){
165  node->rightChild = rightChild->deepCopy();
166  node->rightChild->setParent( node );
167  }
168 
169  return dynamic_cast< Node* >( node );
170 }
171 
173  return featureIndex;
174 }
175 
177  return threshold;
178 }
179 
180 bool DecisionTreeClusterNode::set(const UINT nodeSize,const UINT featureIndex,const Float threshold,const VectorFloat &classProbabilities){
181  this->nodeSize = nodeSize;
182  this->featureIndex = featureIndex;
183  this->threshold = threshold;
184  this->classProbabilities = classProbabilities;
185  return true;
186 }
187 
188 bool DecisionTreeClusterNode::computeBestSplitBestIterativeSplit( const UINT &numSplittingSteps, const ClassificationData &trainingData, const Vector< UINT > &features, const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){
189  return computeSplit( numSplittingSteps, trainingData, features, classLabels, featureIndex, minError);
190 }
191 
192 bool DecisionTreeClusterNode::computeBestSplitBestRandomSplit( const UINT &numSplittingSteps, const ClassificationData &trainingData, const Vector< UINT > &features, const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){
193  return computeSplit( numSplittingSteps, trainingData, features, classLabels, featureIndex, minError);
194 }
195 
196 bool DecisionTreeClusterNode::computeSplit( const UINT &numSplittingSteps, const ClassificationData &trainingData, const Vector< UINT > &features, const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){
197 
198  const UINT M = trainingData.getNumSamples();
199  const UINT N = features.getSize();
200  const UINT K = classLabels.getSize();
201 
202  if( N == 0 ) return false;
203  if( K == 0 ) return false;
204 
206  Random random;
207  UINT bestFeatureIndex = 0;
208  Float bestThreshold = 0;
209  Float error = 0;
210  Vector< UINT > groupIndex(M);
211  Vector< MinMax > ranges = trainingData.getRanges();
212  MatrixDouble data(M,1); //This will store our temporary data for each dimension
213 
214  //Randomly select which features we want to use
215  UINT numRandomFeatures = numSplittingSteps > N ? N : numSplittingSteps;
216  Vector< UINT > randomFeatures = random.getRandomSubset( 0, N, numRandomFeatures );
217 
218  //Loop over each random feature and try and find the best split point
219  for(UINT n=0; n<numRandomFeatures; n++){
220 
221  featureIndex = features[ randomFeatures[n] ];
222 
223  //Use the data in this feature dimension to create a sum dataset
224  for(UINT i=0; i<M; i++){
225  data[i][0] = trainingData[i][featureIndex];
226  }
227 
228  if( computeError( trainingData, data, classLabels, ranges, groupIndex, featureIndex, threshold, error ) ){
229  //Store the best threshold and feature index
230  if( error < minError ){
231  minError = error;
232  bestThreshold = threshold;
233  bestFeatureIndex = featureIndex;
234  }
235  }
236  }
237 
238  //Set the best feature index that will be returned to the DecisionTree that called this function
239  featureIndex = bestFeatureIndex;
240 
241  //Store the node size, feature index, best threshold and class probabilities for this node
242  set( M, featureIndex, bestThreshold, trainingData.getClassProbabilities(classLabels) );
243 
244  return true;
245 }
246 
247 bool DecisionTreeClusterNode::computeError( const ClassificationData &trainingData, MatrixFloat &data, const Vector< UINT > &classLabels, Vector< MinMax > ranges, Vector< UINT > groupIndex, const UINT featureIndex, Float &threshold, Float &error ){
248 
249  error = 0;
250  threshold = 0;
251 
252  const UINT M = trainingData.getNumSamples();
253  const UINT K = (UINT)classLabels.size();
254 
255  Float giniIndexL = 0;
256  Float giniIndexR = 0;
257  Float weightL = 0;
258  Float weightR = 0;
259  VectorFloat groupCounter(2,0);
260  MatrixFloat classProbabilities(K,2);
261 
262  //Use this data to train a KMeans cluster with 2 clusters
263  KMeans kmeans;
264  kmeans.setNumClusters( 2 );
265  kmeans.setComputeTheta( true );
266  kmeans.setMinChange( 1.0e-5 );
267  kmeans.setMinNumEpochs( 1 );
268  kmeans.setMaxNumEpochs( 100 );
269 
270  //Disable the logging to clean things up
271  kmeans.setTrainingLoggingEnabled( false );
272 
273  if( !kmeans.train_( data ) ){
274  errorLog << __GRT_LOG__ << " Failed to train KMeans model for feature: " << featureIndex << std::endl;
275  return false;
276  }
277 
278  //Set the split threshold as the mid point between the two clusters
279  const MatrixFloat &clusters = kmeans.getClusters();
280  threshold = 0;
281  for(UINT i=0; i<clusters.getNumRows(); i++){
282  threshold += clusters[i][0];
283  }
284  threshold /= clusters.getNumRows();
285 
286  //Iterate over each sample and work out if it should be in the lhs (0) or rhs (1) group based on the current threshold
287  groupCounter[0] = groupCounter[1] = 0;
288  classProbabilities.setAllValues(0);
289  for(UINT i=0; i<M; i++){
290  groupIndex[i] = trainingData[ i ][ featureIndex ] >= threshold ? 1 : 0;
291  groupCounter[ groupIndex[i] ]++;
292  classProbabilities[ getClassLabelIndexValue(trainingData[i].getClassLabel(),classLabels) ][ groupIndex[i] ]++;
293  }
294 
295  //Compute the class probabilities for the lhs group and rhs group
296  for(UINT k=0; k<K; k++){
297  classProbabilities[k][0] = groupCounter[0]>0 ? classProbabilities[k][0]/groupCounter[0] : 0;
298  classProbabilities[k][1] = groupCounter[1]>0 ? classProbabilities[k][1]/groupCounter[1] : 0;
299  }
300 
301  //Compute the Gini index for the lhs and rhs groups
302  giniIndexL = giniIndexR = 0;
303  for(UINT k=0; k<K; k++){
304  giniIndexL += classProbabilities[k][0] * (1.0-classProbabilities[k][0]);
305  giniIndexR += classProbabilities[k][1] * (1.0-classProbabilities[k][1]);
306  }
307  weightL = groupCounter[0]/M;
308  weightR = groupCounter[1]/M;
309  error = (giniIndexL*weightL) + (giniIndexR*weightR);
310 
311  return true;
312 }
313 
314 bool DecisionTreeClusterNode::saveParametersToFile( std::fstream &file ) const{
315 
316  if( !file.is_open() )
317  {
318  errorLog << __GRT_LOG__ << " File is not open!" << std::endl;
319  return false;
320  }
321 
322  //Save the DecisionTreeNode parameters
324  errorLog << __GRT_LOG__ << " Failed to save DecisionTreeNode parameters to file!" << std::endl;
325  return false;
326  }
327 
328  //Save the custom DecisionTreeThresholdNode parameters
329  file << "FeatureIndex: " << featureIndex << std::endl;
330  file << "Threshold: " << threshold << std::endl;
331 
332  return true;
333 }
334 
336 
337  if(!file.is_open())
338  {
339  errorLog << __GRT_LOG__ << " File is not open!" << std::endl;
340  return false;
341  }
342 
343  //Load the DecisionTreeNode parameters
345  errorLog << __GRT_LOG__ << " Failed to load DecisionTreeNode parameters from file!" << std::endl;
346  return false;
347  }
348 
349  std::string word;
350  //Load the custom DecisionTreeThresholdNode Parameters
351  file >> word;
352  if( word != "FeatureIndex:" ){
353  errorLog << __GRT_LOG__ << " Failed to find FeatureIndex header!" << std::endl;
354  return false;
355  }
356  file >> featureIndex;
357 
358  file >> word;
359  if( word != "Threshold:" ){
360  errorLog << __GRT_LOG__ << " Failed to find Threshold header!" << std::endl;
361  return false;
362  }
363  file >> threshold;
364 
365  return true;
366 }
367 
368 GRT_END_NAMESPACE
369 
virtual bool loadParametersFromFile(std::fstream &file) override
virtual Node * deepCopy() const override
virtual bool clear() override
virtual bool getModel(std::ostream &stream) const override
Definition: Node.cpp:116
Definition: Node.h:37
bool setTrainingLoggingEnabled(const bool loggingEnabled)
Definition: MLBase.cpp:383
This file contains the Random class, a useful wrapper for generating cross platform random functions...
Definition: Random.h:46
virtual bool train_(MatrixFloat &data)
Definition: KMeans.cpp:153
virtual bool predict_(VectorFloat &x) override
UINT getSize() const
Definition: Vector.h:201
virtual bool loadParametersFromFile(std::fstream &file) override
bool setMinChange(const Float minChange)
Definition: MLBase.cpp:344
virtual bool saveParametersToFile(std::fstream &file) const override
bool setAllValues(const T &value)
Definition: Matrix.h:366
Vector< unsigned int > getRandomSubset(const unsigned int startRange, const unsigned int endRange, const unsigned int subsetSize)
Definition: Random.cpp:185
UINT getNumSamples() const
bool getIsLeafNode() const
Definition: Node.cpp:339
virtual bool print() const override
unsigned int getNumRows() const
Definition: Matrix.h:574
virtual bool computeFeatureWeights(VectorFloat &weights) const
Definition: Node.cpp:97
unsigned int getNumCols() const
Definition: Matrix.h:581
bool setMinNumEpochs(const UINT minNumEpochs)
Definition: MLBase.cpp:329
bool set(const UINT nodeSize, const UINT featureIndex, const Float threshold, const VectorFloat &classProbabilities)
virtual bool getModel(std::ostream &stream) const override
Vector< MinMax > getRanges() const
Definition: KMeans.h:41
virtual bool computeLeafNodeWeights(MatrixFloat &weights) const override
virtual bool computeLeafNodeWeights(MatrixFloat &weights) const
Definition: Node.cpp:101
virtual bool clear() override
bool setMaxNumEpochs(const UINT maxNumEpochs)
Definition: MLBase.cpp:320
virtual bool computeFeatureWeights(VectorFloat &weights) const override
bool setNumClusters(const UINT numClusters)
Definition: Clusterer.cpp:262
virtual bool saveParametersToFile(std::fstream &file) const override
virtual Node * deepCopy() const
Definition: Node.cpp:272