GestureRecognitionToolkit  Version: 0.1.0
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
DecisionTreeClusterNode.cpp
1 
3 
4 GRT_BEGIN_NAMESPACE
5 
6 //Register the DecisionTreeClusterNode module with the Node base class
7 RegisterNode< DecisionTreeClusterNode > DecisionTreeClusterNode::registerModule("DecisionTreeClusterNode");
8 
10  nodeType = "DecisionTreeClusterNode";
11  parent = NULL;
12  leftChild = NULL;
13  rightChild = NULL;
14  clear();
15 }
16 
18  clear();
19 }
20 
22 
23  if( x[ featureIndex ] >= threshold ) return true;
24 
25  return false;
26 }
27 
29 
30  //Call the base class clear function
32 
33  featureIndex = 0;
34  threshold = 0;
35 
36  return true;
37 }
38 
40 
41  std::ostringstream stream;
42 
43  if( getModel( stream ) ){
44  std::cout << stream.str();
45  return true;
46  }
47 
48  return false;
49 }
50 
52 
53  if( isLeafNode ){ //If we reach a leaf node, no weight update needed
54  return true;
55  }
56 
57  if( featureIndex >= ((UINT)weights.size()) ){ //Feature index is out of bounds
58  warningLog << "computeFeatureWeights( VectorFloat &weights ) - Feature index is greater than weights Vector size!" << std::endl;
59  return false;
60  }else{
61  weights[ featureIndex ]++;
62  }
63 
64  if( leftChild ){ //Recursively compute the weights for the left child
65  leftChild->computeFeatureWeights( weights );
66  }
67  if( rightChild ){ //Recursively compute the weights for the right child
68  rightChild->computeFeatureWeights( weights );
69  }
70 
71  return true;
72 }
73 
75 
76  if( isLeafNode ){ //If we reach a leaf node, there is nothing to do
77  return true;
78  }
79 
80  if( featureIndex >= weights.getNumCols() ){ //Feature index is out of bounds
81  warningLog << "computeFeatureWeights( VectorFloat &weights ) - Feature index is greater than weights Vector size!" << std::endl;
82  return false;
83  }
84 
85  if( leftChild ){ //Recursively compute the weights for the left child until we reach the node above a leaf node
86  if( leftChild->getIsLeafNode() ){
87  if( classProbabilities.getSize() != weights.getNumRows() ){
88  warningLog << "computeFeatureWeights( VectorFloat &weights ) - The number of rows in the weights matrix does not match the class probabilities Vector size!" << std::endl;
89  return false;
90  }
91  for(UINT i=0; i<classProbabilities.getSize(); i++){
92  weights[ i ][ featureIndex ] += classProbabilities[ i ];
93  }
94 
95  } leftChild->computeLeafNodeWeights( weights );
96  }
97  if( rightChild ){ //Recursively compute the weights for the right child until we reach the node above a leaf node
98  if( rightChild->getIsLeafNode() ){
99  if( classProbabilities.getSize() != weights.getNumRows() ){
100  warningLog << "computeFeatureWeights( VectorFloat &weights ) - The number of rows in the weights matrix does not match the class probabilities Vector size!" << std::endl;
101  return false;
102  }
103  for(UINT i=0; i<classProbabilities.getSize(); i++){
104  weights[ i ][ featureIndex ] += classProbabilities[ i ];
105  }
106  } rightChild->computeLeafNodeWeights( weights );
107  }
108 
109  return true;
110 }
111 
112 bool DecisionTreeClusterNode::getModel( std::ostream &stream ) const{
113 
114  std::string tab = "";
115  for(UINT i=0; i<depth; i++) tab += "\t";
116 
117  stream << tab << "depth: " << depth;
118  stream << " nodeSize: " << nodeSize;
119  stream << " featureIndex: " << featureIndex;
120  stream << " threshold " << threshold;
121  stream << " isLeafNode: " << isLeafNode << std::endl;
122 
123  stream << tab << "ClassProbabilities: ";
124  for(UINT i=0; i<classProbabilities.getSize(); i++){
125  stream << classProbabilities[i] << "\t";
126  }
127  stream << std::endl;
128 
129  if( leftChild != NULL ){
130  stream << tab << "LeftChild: " << std::endl;
131  leftChild->getModel( stream );
132  }
133 
134  if( rightChild != NULL ){
135  stream << tab << "RightChild: " << std::endl;
136  rightChild->getModel( stream );
137  }
138 
139  return true;
140 }
141 
143 
145 
146  if( node == NULL ){
147  return NULL;
148  }
149 
150  //Copy this node into the node
151  node->depth = depth;
152  node->isLeafNode = isLeafNode;
153  node->nodeID = nodeID;
154  node->predictedNodeID = predictedNodeID;
155  node->nodeSize = nodeSize;
156  node->featureIndex = featureIndex;
157  node->threshold = threshold;
158  node->classProbabilities = classProbabilities;
159 
160  //Recursively deep copy the left child
161  if( leftChild ){
162  node->leftChild = leftChild->deepCopyNode();
163  node->leftChild->setParent( node );
164  }
165 
166  //Recursively deep copy the right child
167  if( rightChild ){
168  node->rightChild = rightChild->deepCopyNode();
169  node->rightChild->setParent( node );
170  }
171 
172  return dynamic_cast< DecisionTreeClusterNode* >( node );
173 }
174 
176  return dynamic_cast< DecisionTreeClusterNode* >( deepCopyNode() );
177 }
178 
180  return featureIndex;
181 }
182 
184  return threshold;
185 }
186 
187 bool DecisionTreeClusterNode::set(const UINT nodeSize,const UINT featureIndex,const Float threshold,const VectorFloat &classProbabilities){
188  this->nodeSize = nodeSize;
189  this->featureIndex = featureIndex;
190  this->threshold = threshold;
191  this->classProbabilities = classProbabilities;
192  return true;
193 }
194 
195 bool DecisionTreeClusterNode::computeBestSpiltBestIterativeSpilt( const UINT &numSplittingSteps, const ClassificationData &trainingData, const Vector< UINT > &features, const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){
196 
197  return computeBestSpilt( numSplittingSteps, trainingData, features, classLabels, featureIndex, minError);
198 }
199 
200 bool DecisionTreeClusterNode::computeBestSpiltBestRandomSpilt( const UINT &numSplittingSteps, const ClassificationData &trainingData, const Vector< UINT > &features, const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){
201 
202  return computeBestSpilt( numSplittingSteps, trainingData, features, classLabels, featureIndex, minError);
203 }
204 
205 bool DecisionTreeClusterNode::computeBestSpilt( const UINT &numSplittingSteps, const ClassificationData &trainingData, const Vector< UINT > &features, const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){
206 
207  const UINT M = trainingData.getNumSamples();
208  const UINT N = (UINT)features.size();
209  const UINT K = (UINT)classLabels.size();
210 
211  if( N == 0 ) return false;
212 
214  Random random;
215  UINT bestFeatureIndex = 0;
216  Float bestThreshold = 0;
217  Float error = 0;
218  Vector< UINT > groupIndex(M);
219  Vector< MinMax > ranges = trainingData.getRanges();
220  MatrixDouble data(M,1); //This will store our temporary data for each dimension
221 
222  //Randomly select which features we want to use
223  UINT numRandomFeatures = numSplittingSteps > N ? N : numSplittingSteps;
224  Vector< UINT > randomFeatures = random.getRandomSubset( 0, N, numRandomFeatures );
225 
226  //Loop over each random feature and try and find the best split point
227  for(UINT n=0; n<numRandomFeatures; n++){
228 
229  featureIndex = features[ randomFeatures[n] ];
230 
231  //Use the data in this feature dimension to create a sum dataset
232  for(UINT i=0; i<M; i++){
233  data[i][0] = trainingData[i][featureIndex];
234  }
235 
236  if( computeError( trainingData, data, classLabels, ranges, groupIndex, featureIndex, threshold, error ) ){
237  //Store the best threshold and feature index
238  if( error < minError ){
239  minError = error;
240  bestThreshold = threshold;
241  bestFeatureIndex = featureIndex;
242  }
243  }
244 
245 
246 /*
247  //Use the data in this feature dimension to create a sum dataset
248  for(UINT i=0; i<M; i++){
249  data[i][0] = trainingData[i][featureIndex];
250  }
251 
252  //Use this data to train a KMeans cluster with 2 clusters
253  KMeans kmeans;
254  kmeans.setNumClusters( 2 );
255  kmeans.setComputeTheta( true );
256  kmeans.setMinChange( 1.0e-5 );
257  kmeans.setMinNumEpochs( 1 );
258  kmeans.setMaxNumEpochs( 100 );
259 
260  //Disable the logging to clean things up
261  kmeans.setTrainingLoggingEnabled( false );
262 
263 <<<<<<< HEAD
264  if( !kmeans.train( data ) ){
265  errorLog << "computeBestSpilt() - Failed to train KMeans model for feature: " << featureIndex << std::endl;
266 =======
267  if( !kmeans.train_( data ) ){
268  errorLog << "computeBestSpilt() - Failed to train KMeans model for feature: " << featureIndex << endl;
269 >>>>>>> master
270  return false;
271  }
272 
273  //Set the split threshold as the mid point between the two clusters
274  MatrixFloat clusters = kmeans.getClusters();
275  threshold = 0;
276  for(UINT i=0; i<clusters.getNumRows(); i++){
277  threshold += clusters[i][0];
278  }
279  threshold /= clusters.getNumRows();
280 
281  //Iterate over each sample and work out if it should be in the lhs (0) or rhs (1) group based on the current threshold
282  groupCounter[0] = groupCounter[1] = 0;
283  classProbabilities.setAllValues(0);
284  for(UINT i=0; i<M; i++){
285  groupIndex[i] = trainingData[ i ][ featureIndex ] >= threshold ? 1 : 0;
286  groupCounter[ groupIndex[i] ]++;
287  classProbabilities[ getClassLabelIndexValue(trainingData[i].getClassLabel(),classLabels) ][ groupIndex[i] ]++;
288  }
289 
290  //Compute the class probabilities for the lhs group and rhs group
291  for(UINT k=0; k<K; k++){
292  classProbabilities[k][0] = groupCounter[0]>0 ? classProbabilities[k][0]/groupCounter[0] : 0;
293  classProbabilities[k][1] = groupCounter[1]>0 ? classProbabilities[k][1]/groupCounter[1] : 0;
294  }
295 
296  //Compute the Gini index for the lhs and rhs groups
297  giniIndexL = giniIndexR = 0;
298  for(UINT k=0; k<K; k++){
299  giniIndexL += classProbabilities[k][0] * (1.0-classProbabilities[k][0]);
300  giniIndexR += classProbabilities[k][1] * (1.0-classProbabilities[k][1]);
301  }
302  weightL = groupCounter[0]/M;
303  weightR = groupCounter[1]/M;
304  error = (giniIndexL*weightL) + (giniIndexR*weightR);
305 
306  //Store the best threshold and feature index
307  if( error < minError ){
308  minError = error;
309  bestThreshold = threshold;
310  bestFeatureIndex = featureIndex;
311  }
312 */
313  }
314 
315  //Set the best feature index that will be returned to the DecisionTree that called this function
316  featureIndex = bestFeatureIndex;
317 
318  //Store the node size, feature index, best threshold and class probabilities for this node
319  set( M, featureIndex, bestThreshold, trainingData.getClassProbabilities(classLabels) );
320 
321  return true;
322 }
323 
324 bool DecisionTreeClusterNode::computeError( const ClassificationData &trainingData, MatrixFloat &data, const Vector< UINT > &classLabels, Vector< MinMax > ranges, Vector< UINT > groupIndex, const UINT featureIndex, Float &threshold, Float &error ){
325 
326  error = 0;
327  threshold = 0;
328 
329  const UINT M = trainingData.getNumSamples();
330  const UINT K = (UINT)classLabels.size();
331 
332  Float giniIndexL = 0;
333  Float giniIndexR = 0;
334  Float weightL = 0;
335  Float weightR = 0;
336  VectorFloat groupCounter(2,0);
337  MatrixFloat classProbabilities(K,2);
338 
339  //Use this data to train a KMeans cluster with 2 clusters
340  KMeans kmeans;
341  kmeans.setNumClusters( 2 );
342  kmeans.setComputeTheta( true );
343  kmeans.setMinChange( 1.0e-5 );
344  kmeans.setMinNumEpochs( 1 );
345  kmeans.setMaxNumEpochs( 100 );
346 
347  //Disable the logging to clean things up
348  kmeans.setTrainingLoggingEnabled( false );
349 
350  if( !kmeans.train_( data ) ){
351  errorLog << "computeSplitError() - Failed to train KMeans model for feature: " << featureIndex << std::endl;
352  return false;
353  }
354 
355  //Set the split threshold as the mid point between the two clusters
356  const MatrixFloat &clusters = kmeans.getClusters();
357  threshold = 0;
358  for(UINT i=0; i<clusters.getNumRows(); i++){
359  threshold += clusters[i][0];
360  }
361  threshold /= clusters.getNumRows();
362 
363  //Iterate over each sample and work out if it should be in the lhs (0) or rhs (1) group based on the current threshold
364  groupCounter[0] = groupCounter[1] = 0;
365  classProbabilities.setAllValues(0);
366  for(UINT i=0; i<M; i++){
367  groupIndex[i] = trainingData[ i ][ featureIndex ] >= threshold ? 1 : 0;
368  groupCounter[ groupIndex[i] ]++;
369  classProbabilities[ getClassLabelIndexValue(trainingData[i].getClassLabel(),classLabels) ][ groupIndex[i] ]++;
370  }
371 
372  //Compute the class probabilities for the lhs group and rhs group
373  for(UINT k=0; k<K; k++){
374  classProbabilities[k][0] = groupCounter[0]>0 ? classProbabilities[k][0]/groupCounter[0] : 0;
375  classProbabilities[k][1] = groupCounter[1]>0 ? classProbabilities[k][1]/groupCounter[1] : 0;
376  }
377 
378  //Compute the Gini index for the lhs and rhs groups
379  giniIndexL = giniIndexR = 0;
380  for(UINT k=0; k<K; k++){
381  giniIndexL += classProbabilities[k][0] * (1.0-classProbabilities[k][0]);
382  giniIndexR += classProbabilities[k][1] * (1.0-classProbabilities[k][1]);
383  }
384  weightL = groupCounter[0]/M;
385  weightR = groupCounter[1]/M;
386  error = (giniIndexL*weightL) + (giniIndexR*weightR);
387 
388  return true;
389 }
390 
391 bool DecisionTreeClusterNode::saveParametersToFile( std::fstream &file ) const{
392 
393  if( !file.is_open() )
394  {
395  errorLog << "saveParametersToFile(fstream &file) - File is not open!" << std::endl;
396  return false;
397  }
398 
399  //Save the DecisionTreeNode parameters
401  errorLog << "saveParametersToFile(fstream &file) - Failed to save DecisionTreeNode parameters to file!" << std::endl;
402  return false;
403  }
404 
405  //Save the custom DecisionTreeThresholdNode parameters
406  file << "FeatureIndex: " << featureIndex << std::endl;
407  file << "Threshold: " << threshold << std::endl;
408 
409  return true;
410 }
411 
413 
414  if(!file.is_open())
415  {
416  errorLog << "loadParametersFromFile(fstream &file) - File is not open!" << std::endl;
417  return false;
418  }
419 
420  //Load the DecisionTreeNode parameters
422  errorLog << "loadParametersFromFile(fstream &file) - Failed to load DecisionTreeNode parameters from file!" << std::endl;
423  return false;
424  }
425 
426  std::string word;
427  //Load the custom DecisionTreeThresholdNode Parameters
428  file >> word;
429  if( word != "FeatureIndex:" ){
430  errorLog << "loadParametersFromFile(fstream &file) - Failed to find FeatureIndex header!" << std::endl;
431  return false;
432  }
433  file >> featureIndex;
434 
435  file >> word;
436  if( word != "Threshold:" ){
437  errorLog << "loadParametersFromFile(fstream &file) - Failed to find Threshold header!" << std::endl;
438  return false;
439  }
440  file >> threshold;
441 
442  return true;
443 }
444 
445 GRT_END_NAMESPACE
446 
virtual bool clear()
Definition: Node.h:37
virtual bool saveParametersToFile(std::fstream &file) const
virtual bool loadParametersFromFile(std::fstream &file)
bool setTrainingLoggingEnabled(const bool loggingEnabled)
Definition: MLBase.cpp:321
Definition: Random.h:40
virtual bool predict(const VectorFloat &x)
virtual bool train_(MatrixFloat &data)
Definition: KMeans.cpp:162
virtual bool getModel(std::ostream &stream) const
Definition: Node.cpp:119
bool setMinChange(const Float minChange)
Definition: MLBase.cpp:282
DecisionTreeClusterNode * deepCopy() const
This file implements a DecisionTreeClusterNode, which is a specific type of node used for a DecisionT...
unsigned int getSize() const
Definition: Vector.h:193
virtual bool computeLeafNodeWeights(MatrixFloat &weights) const
Vector< unsigned int > getRandomSubset(const unsigned int startRange, const unsigned int endRange, const unsigned int subsetSize)
Definition: Random.h:268
UINT getNumSamples() const
virtual bool computeFeatureWeights(VectorFloat &weights) const
virtual Node * deepCopyNode() const
Definition: Node.cpp:275
virtual Node * deepCopyNode() const
bool getIsLeafNode() const
Definition: Node.cpp:342
unsigned int getNumRows() const
Definition: Matrix.h:542
virtual bool computeFeatureWeights(VectorFloat &weights) const
Definition: Node.cpp:100
virtual bool getModel(std::ostream &stream) const
unsigned int getNumCols() const
Definition: Matrix.h:549
virtual bool saveParametersToFile(std::fstream &file) const
bool setMinNumEpochs(const UINT minNumEpochs)
Definition: MLBase.cpp:277
bool set(const UINT nodeSize, const UINT featureIndex, const Float threshold, const VectorFloat &classProbabilities)
Vector< MinMax > getRanges() const
Definition: KMeans.h:41
virtual bool computeLeafNodeWeights(MatrixFloat &weights) const
Definition: Node.cpp:104
bool setMaxNumEpochs(const UINT maxNumEpochs)
Definition: MLBase.cpp:268
virtual bool loadParametersFromFile(std::fstream &file)
bool setNumClusters(const UINT numClusters)
Definition: Clusterer.cpp:265