GestureRecognitionToolkit  Version: 0.2.0
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
DecisionTreeClusterNode.cpp
1 
2 #define GRT_DLL_EXPORTS
4 
5 GRT_BEGIN_NAMESPACE
6 
7 //Register the DecisionTreeClusterNode module with the Node base class
8 RegisterNode< DecisionTreeClusterNode > DecisionTreeClusterNode::registerModule("DecisionTreeClusterNode");
9 
11  nodeType = "DecisionTreeClusterNode";
12  parent = NULL;
13  leftChild = NULL;
14  rightChild = NULL;
15  clear();
16 }
17 
19  clear();
20 }
21 
23 
24  if( x[ featureIndex ] >= threshold ) return true;
25 
26  return false;
27 }
28 
30 
31  //Call the base class clear function
33 
34  featureIndex = 0;
35  threshold = 0;
36 
37  return true;
38 }
39 
41 
42  std::ostringstream stream;
43 
44  if( getModel( stream ) ){
45  std::cout << stream.str();
46  return true;
47  }
48 
49  return false;
50 }
51 
53 
54  if( isLeafNode ){ //If we reach a leaf node, no weight update needed
55  return true;
56  }
57 
58  if( featureIndex >= ((UINT)weights.size()) ){ //Feature index is out of bounds
59  warningLog << "computeFeatureWeights( VectorFloat &weights ) - Feature index is greater than weights Vector size!" << std::endl;
60  return false;
61  }else{
62  weights[ featureIndex ]++;
63  }
64 
65  if( leftChild ){ //Recursively compute the weights for the left child
66  leftChild->computeFeatureWeights( weights );
67  }
68  if( rightChild ){ //Recursively compute the weights for the right child
69  rightChild->computeFeatureWeights( weights );
70  }
71 
72  return true;
73 }
74 
76 
77  if( isLeafNode ){ //If we reach a leaf node, there is nothing to do
78  return true;
79  }
80 
81  if( featureIndex >= weights.getNumCols() ){ //Feature index is out of bounds
82  warningLog << "computeFeatureWeights( VectorFloat &weights ) - Feature index is greater than weights Vector size!" << std::endl;
83  return false;
84  }
85 
86  if( leftChild ){ //Recursively compute the weights for the left child until we reach the node above a leaf node
87  if( leftChild->getIsLeafNode() ){
88  if( classProbabilities.getSize() != weights.getNumRows() ){
89  warningLog << "computeFeatureWeights( VectorFloat &weights ) - The number of rows in the weights matrix does not match the class probabilities Vector size!" << std::endl;
90  return false;
91  }
92  for(UINT i=0; i<classProbabilities.getSize(); i++){
93  weights[ i ][ featureIndex ] += classProbabilities[ i ];
94  }
95 
96  } leftChild->computeLeafNodeWeights( weights );
97  }
98  if( rightChild ){ //Recursively compute the weights for the right child until we reach the node above a leaf node
99  if( rightChild->getIsLeafNode() ){
100  if( classProbabilities.getSize() != weights.getNumRows() ){
101  warningLog << "computeFeatureWeights( VectorFloat &weights ) - The number of rows in the weights matrix does not match the class probabilities Vector size!" << std::endl;
102  return false;
103  }
104  for(UINT i=0; i<classProbabilities.getSize(); i++){
105  weights[ i ][ featureIndex ] += classProbabilities[ i ];
106  }
107  } rightChild->computeLeafNodeWeights( weights );
108  }
109 
110  return true;
111 }
112 
113 bool DecisionTreeClusterNode::getModel( std::ostream &stream ) const{
114 
115  std::string tab = "";
116  for(UINT i=0; i<depth; i++) tab += "\t";
117 
118  stream << tab << "depth: " << depth;
119  stream << " nodeSize: " << nodeSize;
120  stream << " featureIndex: " << featureIndex;
121  stream << " threshold " << threshold;
122  stream << " isLeafNode: " << isLeafNode << std::endl;
123 
124  stream << tab << "ClassProbabilities: ";
125  for(UINT i=0; i<classProbabilities.getSize(); i++){
126  stream << classProbabilities[i] << "\t";
127  }
128  stream << std::endl;
129 
130  if( leftChild != NULL ){
131  stream << tab << "LeftChild: " << std::endl;
132  leftChild->getModel( stream );
133  }
134 
135  if( rightChild != NULL ){
136  stream << tab << "RightChild: " << std::endl;
137  rightChild->getModel( stream );
138  }
139 
140  return true;
141 }
142 
144 
146 
147  if( node == NULL ){
148  return NULL;
149  }
150 
151  //Copy this node into the node
152  node->depth = depth;
153  node->isLeafNode = isLeafNode;
154  node->nodeID = nodeID;
155  node->predictedNodeID = predictedNodeID;
156  node->nodeSize = nodeSize;
157  node->featureIndex = featureIndex;
158  node->threshold = threshold;
159  node->classProbabilities = classProbabilities;
160 
161  //Recursively deep copy the left child
162  if( leftChild ){
163  node->leftChild = leftChild->deepCopyNode();
164  node->leftChild->setParent( node );
165  }
166 
167  //Recursively deep copy the right child
168  if( rightChild ){
169  node->rightChild = rightChild->deepCopyNode();
170  node->rightChild->setParent( node );
171  }
172 
173  return dynamic_cast< DecisionTreeClusterNode* >( node );
174 }
175 
177  return dynamic_cast< DecisionTreeClusterNode* >( deepCopyNode() );
178 }
179 
181  return featureIndex;
182 }
183 
185  return threshold;
186 }
187 
188 bool DecisionTreeClusterNode::set(const UINT nodeSize,const UINT featureIndex,const Float threshold,const VectorFloat &classProbabilities){
189  this->nodeSize = nodeSize;
190  this->featureIndex = featureIndex;
191  this->threshold = threshold;
192  this->classProbabilities = classProbabilities;
193  return true;
194 }
195 
196 bool DecisionTreeClusterNode::computeBestSpiltBestIterativeSpilt( const UINT &numSplittingSteps, const ClassificationData &trainingData, const Vector< UINT > &features, const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){
197 
198  return computeBestSpilt( numSplittingSteps, trainingData, features, classLabels, featureIndex, minError);
199 }
200 
201 bool DecisionTreeClusterNode::computeBestSpiltBestRandomSpilt( const UINT &numSplittingSteps, const ClassificationData &trainingData, const Vector< UINT > &features, const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){
202 
203  return computeBestSpilt( numSplittingSteps, trainingData, features, classLabels, featureIndex, minError);
204 }
205 
206 bool DecisionTreeClusterNode::computeBestSpilt( const UINT &numSplittingSteps, const ClassificationData &trainingData, const Vector< UINT > &features, const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){
207 
208  const UINT M = trainingData.getNumSamples();
209  const UINT N = (UINT)features.size();
210  const UINT K = (UINT)classLabels.size();
211 
212  if( N == 0 ) return false;
213 
215  Random random;
216  UINT bestFeatureIndex = 0;
217  Float bestThreshold = 0;
218  Float error = 0;
219  Vector< UINT > groupIndex(M);
220  Vector< MinMax > ranges = trainingData.getRanges();
221  MatrixDouble data(M,1); //This will store our temporary data for each dimension
222 
223  //Randomly select which features we want to use
224  UINT numRandomFeatures = numSplittingSteps > N ? N : numSplittingSteps;
225  Vector< UINT > randomFeatures = random.getRandomSubset( 0, N, numRandomFeatures );
226 
227  //Loop over each random feature and try and find the best split point
228  for(UINT n=0; n<numRandomFeatures; n++){
229 
230  featureIndex = features[ randomFeatures[n] ];
231 
232  //Use the data in this feature dimension to create a sum dataset
233  for(UINT i=0; i<M; i++){
234  data[i][0] = trainingData[i][featureIndex];
235  }
236 
237  if( computeError( trainingData, data, classLabels, ranges, groupIndex, featureIndex, threshold, error ) ){
238  //Store the best threshold and feature index
239  if( error < minError ){
240  minError = error;
241  bestThreshold = threshold;
242  bestFeatureIndex = featureIndex;
243  }
244  }
245  }
246 
247  //Set the best feature index that will be returned to the DecisionTree that called this function
248  featureIndex = bestFeatureIndex;
249 
250  //Store the node size, feature index, best threshold and class probabilities for this node
251  set( M, featureIndex, bestThreshold, trainingData.getClassProbabilities(classLabels) );
252 
253  return true;
254 }
255 
256 bool DecisionTreeClusterNode::computeError( const ClassificationData &trainingData, MatrixFloat &data, const Vector< UINT > &classLabels, Vector< MinMax > ranges, Vector< UINT > groupIndex, const UINT featureIndex, Float &threshold, Float &error ){
257 
258  error = 0;
259  threshold = 0;
260 
261  const UINT M = trainingData.getNumSamples();
262  const UINT K = (UINT)classLabels.size();
263 
264  Float giniIndexL = 0;
265  Float giniIndexR = 0;
266  Float weightL = 0;
267  Float weightR = 0;
268  VectorFloat groupCounter(2,0);
269  MatrixFloat classProbabilities(K,2);
270 
271  //Use this data to train a KMeans cluster with 2 clusters
272  KMeans kmeans;
273  kmeans.setNumClusters( 2 );
274  kmeans.setComputeTheta( true );
275  kmeans.setMinChange( 1.0e-5 );
276  kmeans.setMinNumEpochs( 1 );
277  kmeans.setMaxNumEpochs( 100 );
278 
279  //Disable the logging to clean things up
280  kmeans.setTrainingLoggingEnabled( false );
281 
282  if( !kmeans.train_( data ) ){
283  errorLog << "computeSplitError() - Failed to train KMeans model for feature: " << featureIndex << std::endl;
284  return false;
285  }
286 
287  //Set the split threshold as the mid point between the two clusters
288  const MatrixFloat &clusters = kmeans.getClusters();
289  threshold = 0;
290  for(UINT i=0; i<clusters.getNumRows(); i++){
291  threshold += clusters[i][0];
292  }
293  threshold /= clusters.getNumRows();
294 
295  //Iterate over each sample and work out if it should be in the lhs (0) or rhs (1) group based on the current threshold
296  groupCounter[0] = groupCounter[1] = 0;
297  classProbabilities.setAllValues(0);
298  for(UINT i=0; i<M; i++){
299  groupIndex[i] = trainingData[ i ][ featureIndex ] >= threshold ? 1 : 0;
300  groupCounter[ groupIndex[i] ]++;
301  classProbabilities[ getClassLabelIndexValue(trainingData[i].getClassLabel(),classLabels) ][ groupIndex[i] ]++;
302  }
303 
304  //Compute the class probabilities for the lhs group and rhs group
305  for(UINT k=0; k<K; k++){
306  classProbabilities[k][0] = groupCounter[0]>0 ? classProbabilities[k][0]/groupCounter[0] : 0;
307  classProbabilities[k][1] = groupCounter[1]>0 ? classProbabilities[k][1]/groupCounter[1] : 0;
308  }
309 
310  //Compute the Gini index for the lhs and rhs groups
311  giniIndexL = giniIndexR = 0;
312  for(UINT k=0; k<K; k++){
313  giniIndexL += classProbabilities[k][0] * (1.0-classProbabilities[k][0]);
314  giniIndexR += classProbabilities[k][1] * (1.0-classProbabilities[k][1]);
315  }
316  weightL = groupCounter[0]/M;
317  weightR = groupCounter[1]/M;
318  error = (giniIndexL*weightL) + (giniIndexR*weightR);
319 
320  return true;
321 }
322 
323 bool DecisionTreeClusterNode::saveParametersToFile( std::fstream &file ) const{
324 
325  if( !file.is_open() )
326  {
327  errorLog << "saveParametersToFile(fstream &file) - File is not open!" << std::endl;
328  return false;
329  }
330 
331  //Save the DecisionTreeNode parameters
333  errorLog << "saveParametersToFile(fstream &file) - Failed to save DecisionTreeNode parameters to file!" << std::endl;
334  return false;
335  }
336 
337  //Save the custom DecisionTreeThresholdNode parameters
338  file << "FeatureIndex: " << featureIndex << std::endl;
339  file << "Threshold: " << threshold << std::endl;
340 
341  return true;
342 }
343 
345 
346  if(!file.is_open())
347  {
348  errorLog << "loadParametersFromFile(fstream &file) - File is not open!" << std::endl;
349  return false;
350  }
351 
352  //Load the DecisionTreeNode parameters
354  errorLog << "loadParametersFromFile(fstream &file) - Failed to load DecisionTreeNode parameters from file!" << std::endl;
355  return false;
356  }
357 
358  std::string word;
359  //Load the custom DecisionTreeThresholdNode Parameters
360  file >> word;
361  if( word != "FeatureIndex:" ){
362  errorLog << "loadParametersFromFile(fstream &file) - Failed to find FeatureIndex header!" << std::endl;
363  return false;
364  }
365  file >> featureIndex;
366 
367  file >> word;
368  if( word != "Threshold:" ){
369  errorLog << "loadParametersFromFile(fstream &file) - Failed to find Threshold header!" << std::endl;
370  return false;
371  }
372  file >> threshold;
373 
374  return true;
375 }
376 
377 GRT_END_NAMESPACE
378 
virtual bool clear()
Definition: Node.h:37
virtual bool saveParametersToFile(std::fstream &file) const
virtual bool loadParametersFromFile(std::fstream &file)
bool setTrainingLoggingEnabled(const bool loggingEnabled)
Definition: MLBase.cpp:326
Definition: Random.h:40
virtual bool predict(const VectorFloat &x)
virtual bool train_(MatrixFloat &data)
Definition: KMeans.cpp:163
virtual bool getModel(std::ostream &stream) const
Definition: Node.cpp:120
UINT getSize() const
Definition: Vector.h:191
bool setMinChange(const Float minChange)
Definition: MLBase.cpp:287
DecisionTreeClusterNode * deepCopy() const
This file implements a DecisionTreeClusterNode, which is a specific type of node used for a DecisionT...
virtual bool computeLeafNodeWeights(MatrixFloat &weights) const
Vector< unsigned int > getRandomSubset(const unsigned int startRange, const unsigned int endRange, const unsigned int subsetSize)
Definition: Random.h:268
UINT getNumSamples() const
virtual bool computeFeatureWeights(VectorFloat &weights) const
virtual Node * deepCopyNode() const
Definition: Node.cpp:276
virtual Node * deepCopyNode() const
bool getIsLeafNode() const
Definition: Node.cpp:343
unsigned int getNumRows() const
Definition: Matrix.h:542
virtual bool computeFeatureWeights(VectorFloat &weights) const
Definition: Node.cpp:101
virtual bool getModel(std::ostream &stream) const
unsigned int getNumCols() const
Definition: Matrix.h:549
virtual bool saveParametersToFile(std::fstream &file) const
bool setMinNumEpochs(const UINT minNumEpochs)
Definition: MLBase.cpp:282
bool set(const UINT nodeSize, const UINT featureIndex, const Float threshold, const VectorFloat &classProbabilities)
Vector< MinMax > getRanges() const
Definition: KMeans.h:41
virtual bool computeLeafNodeWeights(MatrixFloat &weights) const
Definition: Node.cpp:105
bool setMaxNumEpochs(const UINT maxNumEpochs)
Definition: MLBase.cpp:273
virtual bool loadParametersFromFile(std::fstream &file)
bool setNumClusters(const UINT numClusters)
Definition: Clusterer.cpp:266