GestureRecognitionToolkit  Version: 0.2.5
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
ClusterTree.cpp
1 /*
2 GRT MIT License
3 Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>
4 
5 Permission is hereby granted, free of charge, to any person obtaining a copy of this software
6 and associated documentation files (the "Software"), to deal in the Software without restriction,
7 including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in all copies or substantial
12 portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
15 LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
18 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 */
20 
21 #define GRT_DLL_EXPORTS
22 #include "ClusterTree.h"
23 
24 GRT_BEGIN_NAMESPACE
25 
26 //Define the string that will be used to identify the object
27 const std::string ClusterTree::id = "ClusterTree";
28 std::string ClusterTree::getId() { return ClusterTree::id; }
29 
30 //Register the ClusterTreeNode with the Node base class
31 RegisterNode< ClusterTreeNode > ClusterTreeNode::registerModule("ClusterTreeNode");
32 
33 //Register the ClusterTree module with the Clusterer base class
34 RegisterClustererModule< ClusterTree > ClusterTree::registerModule( ClusterTree::getId() );
35 
36 ClusterTree::ClusterTree(const UINT numSplittingSteps,const UINT minNumSamplesPerNode,const UINT maxDepth,const bool removeFeaturesAtEachSplit,const Tree::TrainingMode trainingMode,const bool useScaling,const Float minRMSErrorPerNode) : Clusterer( ClusterTree::getId() )
37 {
38  tree = NULL;
40  this->numSplittingSteps = numSplittingSteps;
41  this->minNumSamplesPerNode = minNumSamplesPerNode;
42  this->maxDepth = maxDepth;
43  this->removeFeaturesAtEachSplit = removeFeaturesAtEachSplit;
44  this->trainingMode = trainingMode;
45  this->minRMSErrorPerNode = minRMSErrorPerNode;
46 }
47 
49 {
50  tree = NULL;
52  *this = rhs;
53 }
54 
56 {
57  clear();
58 }
59 
61  if( this != &rhs ){
62  //Clear this tree
63  this->clear();
64 
65  if( rhs.getTrained() ){
66  //Deep copy the tree
67  this->tree = dynamic_cast<ClusterTreeNode*>( rhs.deepCopyTree() );
68  }
69 
70  this->numSplittingSteps = rhs.numSplittingSteps;
71  this->minNumSamplesPerNode = rhs.minNumSamplesPerNode;
72  this->maxDepth = rhs.maxDepth;
73  this->removeFeaturesAtEachSplit = rhs.removeFeaturesAtEachSplit;
74  this->trainingMode = rhs.trainingMode;
75  this->minRMSErrorPerNode = rhs.minRMSErrorPerNode;
77 
78  //Copy the base variables
79  copyBaseVariables( (Clusterer*)&rhs );
80  }
81  return *this;
82 }
83 
84 bool ClusterTree::deepCopyFrom(const Clusterer *clusterer){
85 
86  if( clusterer == NULL ) return false;
87 
88  if( this->getId() == clusterer->getId() ){
89 
90  const ClusterTree *ptr = dynamic_cast<const ClusterTree*>(clusterer);
91 
92  //Clear this tree
93  this->clear();
94 
95  if( ptr->getTrained() ){
96  //Deep copy the tree
97  this->tree = dynamic_cast<ClusterTreeNode*>( ptr->deepCopyTree() );
98  }
99 
100  this->numSplittingSteps = ptr->numSplittingSteps;
101  this->minNumSamplesPerNode = ptr->minNumSamplesPerNode;
102  this->maxDepth = ptr->maxDepth;
103  this->removeFeaturesAtEachSplit = ptr->removeFeaturesAtEachSplit;
104  this->trainingMode = ptr->trainingMode;
105  this->minRMSErrorPerNode = ptr->minRMSErrorPerNode;
107 
108  //Copy the base classifier variables
109  return copyBaseVariables( clusterer );
110  }
111  return false;
112 }
113 
114 bool ClusterTree::train_(MatrixFloat &trainingData){
115 
116  //Clear any previous model
117  clear();
118 
119  const unsigned int M = trainingData.getNumRows();
120  const unsigned int N = trainingData.getNumCols();
121 
122  if( M == 0 ){
123  errorLog << __GRT_LOG__ << " Training data has zero samples!" << std::endl;
124  return false;
125  }
126 
127  numInputDimensions = N;
128  numOutputDimensions = 1;
129  ranges = trainingData.getRanges();
130 
131  //Scale the training data if needed
132  if( useScaling ){
133  //Scale the training data between 0 and 1
134  trainingData.scale(0, 1);
135  }
136 
137  //Setup the valid features - at this point all features can be used
138  Vector< UINT > features(N);
139  for(UINT i=0; i<N; i++){
140  features[i] = i;
141  }
142 
143  //Build the tree
144  UINT clusterLabel = 0;
145  UINT nodeID = 0;
146  tree = buildTree( trainingData, NULL, features, clusterLabel, nodeID );
147  numClusters = clusterLabel;
148 
149  if( tree == NULL ){
150  clear();
151  errorLog << __GRT_LOG__ << " Failed to build tree!" << std::endl;
152  return false;
153  }
154 
155  //Flag that the algorithm has been trained
156  trained = true;
157 
158  //Setup the cluster labels
159  clusterLabels.resize(numClusters);
160  for(UINT i=0; i<numClusters; i++){
161  clusterLabels[i] = i+1;
162  }
163  clusterLikelihoods.resize(numClusters,0);
164  clusterDistances.resize(numClusters,0);
165 
166  return true;
167 }
168 
170 
171  if( !trained ){
172  errorLog << __GRT_LOG__ << " Model Not Trained!" << std::endl;
173  return false;
174  }
175 
176  if( tree == NULL ){
177  errorLog << __GRT_LOG__ << " DecisionTree pointer is null!" << std::endl;
178  return false;
179  }
180 
181  if( inputVector.getSize() != numInputDimensions ){
182  errorLog << __GRT_LOG__ << " The size of the input Vector (" << inputVector.getSize() << ") does not match the num features in the model (" << numInputDimensions << std::endl;
183  return false;
184  }
185 
186  if( useScaling ){
187  for(UINT n=0; n<numInputDimensions; n++){
188  inputVector[n] = scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0, 1);
189  }
190  }
191 
192  VectorFloat clusterLabel(1);
193  if( !tree->predict_( inputVector, clusterLabel ) ){
194  errorLog << __GRT_LOG__ << " Failed to predict!" << std::endl;
195  return false;
196  }
197  predictedClusterLabel = (UINT)clusterLabel[0];
198 
199  return true;
200 }
201 
203 
204  //Clear the base variables
206 
207  if( tree != NULL ){
208  tree->clear();
209  delete tree;
210  tree = NULL;
211  }
212 
213  return true;
214 }
215 
216 bool ClusterTree::print() const{
217  if( tree != NULL )
218  return tree->print();
219  return false;
220 }
221 
222 bool ClusterTree::saveModelToFile( std::fstream &file ) const{
223 
224  if( !file.is_open() )
225  {
226  errorLog <<"saveModelToFile(fstream &file) - The file is not open!" << std::endl;
227  return false;
228  }
229 
230  //Write the header info
231  file << "GRT_CLUSTER_TREE_MODEL_FILE_V1.0" << std::endl;
232 
233  //Write the clusterer settings to the file
234  if( !saveClustererSettingsToFile(file) ){
235  errorLog <<"saveModelToFile(fstream &file) - Failed to save clusterer settings to file!" << std::endl;
236  return false;
237  }
238 
239  file << "NumSplittingSteps: " << numSplittingSteps << std::endl;
240  file << "MinNumSamplesPerNode: " << minNumSamplesPerNode << std::endl;
241  file << "MaxDepth: " << maxDepth << std::endl;
242  file << "RemoveFeaturesAtEachSpilt: " << removeFeaturesAtEachSplit << std::endl;
243  file << "TrainingMode: " << trainingMode << std::endl;
244  file << "MinRMSErrorPerNode: " << minRMSErrorPerNode << std::endl;
245  file << "TreeBuilt: " << (tree != NULL ? 1 : 0) << std::endl;
246 
247  if( tree != NULL ){
248  file << "Tree:\n";
249  if( !tree->save( file ) ){
250  errorLog << "saveModelToFile(fstream &file) - Failed to save tree to file!" << std::endl;
251  return false;
252  }
253  }
254 
255  return true;
256 }
257 
258 bool ClusterTree::loadModelFromFile( std::fstream &file ){
259 
260  clear();
261 
262  if(!file.is_open())
263  {
264  errorLog << "loadModelFromFile(string filename) - Could not open file to load model" << std::endl;
265  return false;
266  }
267 
268  std::string word;
269 
270  //Find the file type header
271  file >> word;
272  if(word != "GRT_CLUSTER_TREE_MODEL_FILE_V1.0"){
273  errorLog << "loadModelFromFile(string filename) - Could not find Model File Header" << std::endl;
274  return false;
275  }
276 
277  //Load the base settings from the file
278  if( !loadClustererSettingsFromFile(file) ){
279  errorLog << "loadModelFromFile(string filename) - Failed to load base settings from file!" << std::endl;
280  return false;
281  }
282 
283  file >> word;
284  if(word != "NumSplittingSteps:"){
285  errorLog << "loadModelFromFile(string filename) - Could not find the NumSplittingSteps!" << std::endl;
286  return false;
287  }
288  file >> numSplittingSteps;
289 
290  file >> word;
291  if(word != "MinNumSamplesPerNode:"){
292  errorLog << "loadModelFromFile(string filename) - Could not find the MinNumSamplesPerNode!" << std::endl;
293  return false;
294  }
295  file >> minNumSamplesPerNode;
296 
297  file >> word;
298  if(word != "MaxDepth:"){
299  errorLog << "loadModelFromFile(string filename) - Could not find the MaxDepth!" << std::endl;
300  return false;
301  }
302  file >> maxDepth;
303 
304  file >> word;
305  if(word != "RemoveFeaturesAtEachSpilt:"){
306  errorLog << "loadModelFromFile(string filename) - Could not find the RemoveFeaturesAtEachSpilt!" << std::endl;
307  return false;
308  }
309  file >> removeFeaturesAtEachSplit;
310 
311  file >> word;
312  if(word != "TrainingMode:"){
313  errorLog << "loadModelFromFile(string filename) - Could not find the TrainingMode!" << std::endl;
314  return false;
315  }
316  UINT tempTrainingMode = 0;
317  file >> tempTrainingMode;
318  trainingMode = static_cast<Tree::TrainingMode>(tempTrainingMode);
319 
320  file >> word;
321  if(word != "MinRMSErrorPerNode:"){
322  errorLog << "loadModelFromFile(string filename) - Could not find the MinRMSErrorPerNode!" << std::endl;
323  return false;
324  }
325  file >> minRMSErrorPerNode;
326 
327  file >> word;
328  if(word != "TreeBuilt:"){
329  errorLog << "loadModelFromFile(string filename) - Could not find the TreeBuilt!" << std::endl;
330  return false;
331  }
332  file >> trained;
333 
334  if( trained ){
335  file >> word;
336  if(word != "Tree:"){
337  errorLog << "loadModelFromFile(string filename) - Could not find the Tree!" << std::endl;
338  return false;
339  }
340 
341  //Create a new tree
342  tree = new ClusterTreeNode;
343 
344  if( tree == NULL ){
345  clear();
346  errorLog << "loadModelFromFile(fstream &file) - Failed to create new RegressionTreeNode!" << std::endl;
347  return false;
348  }
349 
350  tree->setParent( NULL );
351  if( !tree->load( file ) ){
352  clear();
353  errorLog << "loadModelFromFile(fstream &file) - Failed to load tree from file!" << std::endl;
354  return false;
355  }
356 
357  //Setup the cluster labels
358  clusterLabels.resize(numClusters);
359  for(UINT i=0; i<numClusters; i++){
360  clusterLabels[i] = i+1;
361  }
362  clusterLikelihoods.resize(numClusters,0);
363  clusterDistances.resize(numClusters,0);
364  }
365 
366  return true;
367 }
368 
370 
371  if( tree == NULL ){
372  return NULL;
373  }
374 
375  return dynamic_cast<ClusterTreeNode*>(tree->deepCopy());
376 }
377 
379  return (ClusterTreeNode*)tree;
380 }
381 
383  return predictedClusterLabel;
384 }
385 
387  return minRMSErrorPerNode;
388 }
389 
390 Tree::TrainingMode ClusterTree::getTrainingMode() const{
391  return trainingMode;
392 }
393 
395  return numSplittingSteps;
396 }
397 
399  return minNumSamplesPerNode;
400 }
401 
403  return maxDepth;
404 }
405 
407 
408  if( tree == NULL ){
409  return 0;
410  }
411 
412  return tree->getPredictedNodeID();
413 }
414 
416  return removeFeaturesAtEachSplit;
417 }
418 
419 bool ClusterTree::setTrainingMode(const Tree::TrainingMode trainingMode){
420  if( trainingMode >= Tree::BEST_ITERATIVE_SPILT && trainingMode < Tree::NUM_TRAINING_MODES ){
421  this->trainingMode = trainingMode;
422  return true;
423  }
424  warningLog << "Unknown trainingMode: " << trainingMode << std::endl;
425  return false;
426 }
427 
428 bool ClusterTree::setNumSplittingSteps(const UINT numSplittingSteps){
429  if( numSplittingSteps > 0 ){
430  this->numSplittingSteps = numSplittingSteps;
431  return true;
432  }
433  warningLog << "setNumSplittingSteps(const UINT numSplittingSteps) - The number of splitting steps must be greater than zero!" << std::endl;
434  return false;
435 }
436 
437 bool ClusterTree::setMinNumSamplesPerNode(const UINT minNumSamplesPerNode){
438  if( minNumSamplesPerNode > 0 ){
439  this->minNumSamplesPerNode = minNumSamplesPerNode;
440  return true;
441  }
442  warningLog << "setMinNumSamplesPerNode(const UINT minNumSamplesPerNode) - The minimum number of samples per node must be greater than zero!" << std::endl;
443  return false;
444 }
445 
446 bool ClusterTree::setMaxDepth(const UINT maxDepth){
447  if( maxDepth > 0 ){
448  this->maxDepth = maxDepth;
449  return true;
450  }
451  warningLog << "setMaxDepth(const UINT maxDepth) - The maximum depth must be greater than zero!" << std::endl;
452  return false;
453 }
454 
455 bool ClusterTree::setRemoveFeaturesAtEachSplit(const bool removeFeaturesAtEachSplit){
456  this->removeFeaturesAtEachSplit = removeFeaturesAtEachSplit;
457  return true;
458 }
459 
460 bool ClusterTree::setMinRMSErrorPerNode(const Float minRMSErrorPerNode){
461  this->minRMSErrorPerNode = minRMSErrorPerNode;
462  return true;
463 }
464 
465 ClusterTreeNode* ClusterTree::buildTree(const MatrixFloat &trainingData,ClusterTreeNode *parent,Vector< UINT > features,UINT &clusterLabel,UINT nodeID){
466 
467  const UINT M = trainingData.getNumRows();
468 
469  //Update the nodeID
470 
471  //Get the depth
472  UINT depth = 0;
473 
474  if( parent != NULL )
475  depth = parent->getDepth() + 1;
476 
477  //If there are no training data then return NULL
478  if( M == 0 )
479  return NULL;
480 
481  //Create the new node
482  ClusterTreeNode *node = new ClusterTreeNode;
483 
484  if( node == NULL )
485  return NULL;
486 
487  //Set the parent
488  node->initNode( parent, depth, nodeID );
489 
490  //If there are no features left then create a leaf node and return
491  if( features.getSize() == 0 || M < minNumSamplesPerNode || depth >= maxDepth ){
492 
493  //Update the clusterLabel
494  clusterLabel++;
495 
496  //Flag that this is a leaf node
497  node->setIsLeafNode( true );
498 
499  //Set the node - the feature and threshold are both 0
500  node->set( M, 0, 0, clusterLabel );
501 
502  Clusterer::trainingLog << "Reached leaf node. Depth: " << depth << " NumSamples: " << M << std::endl;
503 
504  return node;
505  }
506 
507  //Compute the best spilt point
508  UINT featureIndex = 0;
509  Float threshold = 0;
510  Float minError = 0;
511  if( !computeBestSplit( trainingData, features, featureIndex, threshold, minError ) ){
512  delete node;
513  return NULL;
514  }
515 
516  Clusterer::trainingLog << "Depth: " << depth << " FeatureIndex: " << featureIndex << " Threshold: " << threshold << " MinError: " << minError << std::endl;
517 
518  //If the minError is below the minRMSError then create a leaf node and return
519  if( minError <= minRMSErrorPerNode ){
520  //Update the clusterLabel
521  clusterLabel++;
522 
523  //Flag that this is a leaf node
524  node->setIsLeafNode( true );
525 
526  //Set the node
527  node->set( M, featureIndex, threshold, clusterLabel );
528 
529  Clusterer::trainingLog << "Reached leaf node. Depth: " << depth << " NumSamples: " << M << std::endl;
530 
531  return node;
532  }
533 
534  //Set the node (any node that is not a leaf node has a cluster label of 0
535  node->set( M, featureIndex, threshold, 0 );
536 
537  //Remove the selected feature so we will not use it again
538  if( removeFeaturesAtEachSplit ){
539  for(UINT i=0; i<features.getSize(); i++){
540  if( features[i] == featureIndex ){
541  features.erase( features.begin()+i );
542  break;
543  }
544  }
545  }
546 
547  //Split the data
548  MatrixFloat lhs;
549  MatrixFloat rhs;
550 
551  for(UINT i=0; i<M; i++){
552  if( node->predict( trainingData.getRow(i) ) ){
553  rhs.push_back(trainingData.getRow(i));
554  }else lhs.push_back(trainingData.getRow(i));
555  }
556 
557  //Run the recursive tree building on the children
558  node->setLeftChild( buildTree( lhs, node, features, clusterLabel, nodeID ) );
559  node->setRightChild( buildTree( rhs, node, features, clusterLabel, nodeID ) );
560 
561  return node;
562 }
563 
564 bool ClusterTree::computeBestSplit( const MatrixFloat &trainingData, const Vector< UINT > &features, UINT &featureIndex, Float &threshold, Float &minError ){
565 
566  switch( trainingMode ){
567  case Tree::BEST_ITERATIVE_SPILT:
568  return computeBestSplitBestIterativeSplit( trainingData, features, featureIndex, threshold, minError );
569  break;
570  case Tree::BEST_RANDOM_SPLIT:
571  return computeBestSplitBestRandomSplit( trainingData, features, featureIndex, threshold, minError );
572  break;
573  default:
574  errorLog << "Uknown trainingMode!" << std::endl;
575  return false;
576  break;
577  }
578 
579  return true;
580 }
581 
582 bool ClusterTree::computeBestSplitBestIterativeSplit( const MatrixFloat &trainingData, const Vector< UINT > &features, UINT &featureIndex, Float &threshold, Float &minError ){
583 
584  const UINT M = trainingData.getNumRows();
585  const UINT N = (UINT)features.size();
586 
587  //Clusterer::debugLog << "computeBestSpiltBestIterativeSpilt() M: " << M << std::endl;
588 
589  if( N == 0 ) return false;
590 
592  UINT bestFeatureIndex = 0;
593  UINT groupID = 0;
594  Float bestThreshold = 0;
595  Float error = 0;
596  Float minRange = 0;
597  Float maxRange = 0;
598  Float step = 0;
599  Vector< UINT > groupIndex(M);
600  Vector< Float > groupCounter(2);
601  Vector< Float > groupMean(2);
602  Vector< Float > groupMSE(2);
603  Vector< MinMax > ranges = trainingData.getRanges();
604 
605  //Loop over each feature and try and find the best split point
606  for(UINT n=0; n<N; n++){
607  minRange = ranges[n].minValue;
608  maxRange = ranges[n].maxValue;
609  step = (maxRange-minRange)/Float(numSplittingSteps);
610  threshold = minRange;
611  featureIndex = features[n];
612 
613  while( threshold <= maxRange ){
614 
615  //Reset the counters to zero
616  groupCounter[0] = groupCounter[1] = 0;
617  groupMean[0] = groupMean[1] = 0;
618  groupMSE[0] = groupMSE[1] = 0;
619 
620  //Iterate over each sample and work out what group it falls into
621  for(UINT i=0; i<M; i++){
622  groupID = trainingData[i][featureIndex] >= threshold ? 1 : 0;
623  groupIndex[i] = groupID;
624 
625  //Update the group mean and group counters
626  groupMean[ groupID ] += trainingData[i][featureIndex];
627  groupCounter[ groupID ]++;
628  }
629 
630  //Compute the group mean
631  groupMean[0] /= (groupCounter[0] > 0 ? groupCounter[0] : 1);
632  groupMean[1] /= (groupCounter[1] > 0 ? groupCounter[1] : 1);
633 
634  //Compute the MSE for each group
635  for(UINT i=0; i<M; i++){
636  groupMSE[ groupIndex[i] ] += grt_sqr( groupMean[ groupIndex[i] ] - trainingData[i][featureIndex] );
637  }
638  groupMSE[0] /= (groupCounter[0] > 0 ? groupCounter[0] : 1);
639  groupMSE[1] /= (groupCounter[1] > 0 ? groupCounter[1] : 1);
640 
641  error = grt_sqrt( groupMSE[0] + groupMSE[1] );
642 
643  //Store the best threshold and feature index
644  if( error < minError ){
645  minError = error;
646  bestThreshold = threshold;
647  bestFeatureIndex = featureIndex;
648  }
649 
650  //Update the threshold
651  threshold += step;
652  }
653  }
654 
655  //Set the best feature index and threshold
656  featureIndex = bestFeatureIndex;
657  threshold = bestThreshold;
658 
659  return true;
660 }
661 
662 bool ClusterTree::computeBestSplitBestRandomSplit( const MatrixFloat &trainingData, const Vector< UINT > &features, UINT &featureIndex, Float &threshold, Float &minError ){
663 
664  const UINT M = trainingData.getNumRows();
665  const UINT N = (UINT)features.size();
666 
667  debugLog << "computeBestSpiltBestRandomSpilt() M: " << M << std::endl;
668 
669  if( N == 0 ) return false;
670 
672  UINT bestFeatureIndex = 0;
673  UINT groupID = 0;
674  Float bestThreshold = 0;
675  Float error = 0;
676  Vector< UINT > groupIndex(M);
677  Vector< Float > groupCounter(2);
678  Vector< Float > groupMean(2);
679  Vector< Float > groupMSE(2);
680  Vector< MinMax > ranges = trainingData.getRanges();
681 
682  //Loop over each feature and try and find the best split point
683  for(UINT n=0; n<N; n++){
684  featureIndex = features[n];
685 
686  for(UINT m=0; m<numSplittingSteps; m++){
687  //Randomly select a threshold value
688  threshold = random.getRandomNumberUniform(ranges[n].minValue,ranges[n].maxValue);
689 
690  //Reset the counters to zero
691  groupCounter[0] = groupCounter[1] = 0;
692  groupMean[0] = groupMean[1] = 0;
693  groupMSE[0] = groupMSE[1] = 0;
694 
695  //Iterate over each sample and work out what group it falls into
696  for(UINT i=0; i<M; i++){
697  groupID = trainingData[i][featureIndex] >= threshold ? 1 : 0;
698  groupIndex[i] = groupID;
699 
700  //Update the group mean and group counters
701  groupMean[ groupID ] += trainingData[i][featureIndex];
702  groupCounter[ groupID ]++;
703  }
704 
705  //Compute the group mean
706  groupMean[0] /= (groupCounter[0] > 0 ? groupCounter[0] : 1);
707  groupMean[1] /= (groupCounter[1] > 0 ? groupCounter[1] : 1);
708 
709  //Compute the MSE for each group
710  for(UINT i=0; i<M; i++){
711  groupMSE[ groupIndex[i] ] += MLBase::SQR( groupMean[ groupIndex[i] ] - trainingData[i][featureIndex] );
712  }
713  groupMSE[0] /= (groupCounter[0] > 0 ? groupCounter[0] : 1);
714  groupMSE[1] /= (groupCounter[1] > 0 ? groupCounter[1] : 1);
715 
716  error = sqrt( groupMSE[0] + groupMSE[1] );
717 
718  //Store the best threshold and feature index
719  if( error < minError ){
720  minError = error;
721  bestThreshold = threshold;
722  bestFeatureIndex = featureIndex;
723  }
724  }
725  }
726 
727  //Set the best feature index and threshold
728  featureIndex = bestFeatureIndex;
729  threshold = bestThreshold;
730 
731  return true;
732 }
733 
734 GRT_END_NAMESPACE
735 
std::string getId() const
Definition: GRTBase.cpp:85
const ClusterTreeNode * getTree() const
virtual bool predict(VectorFloat inputVector)
Definition: MLBase.cpp:135
bool setMinRMSErrorPerNode(const Float minRMSErrorPerNode)
virtual ~ClusterTree(void)
Definition: ClusterTree.cpp:55
UINT getNumSplittingSteps() const
virtual bool predict_(VectorFloat &x) override
Definition: Node.cpp:56
This class implements a Cluster Tree. This can be used to automatically build a cluster model (where ...
bool setTrainingMode(const Tree::TrainingMode trainingMode)
bool scale(const Float minTarget, const Float maxTarget)
virtual bool clear() override
Definition: Clusterer.cpp:144
virtual bool resize(const unsigned int size)
Definition: Vector.h:133
virtual bool train_(MatrixFloat &trainingData) override
virtual bool saveModelToFile(std::fstream &file) const override
bool getTrained() const
Definition: MLBase.cpp:294
UINT getDepth() const
Definition: Node.cpp:304
UINT getSize() const
Definition: Vector.h:201
virtual bool clear() override
virtual bool deepCopyFrom(const Clusterer *cluster) override
Definition: ClusterTree.cpp:84
bool copyBaseVariables(const Clusterer *clusterer)
Definition: Clusterer.cpp:90
bool loadClustererSettingsFromFile(std::fstream &file)
Definition: Clusterer.cpp:181
bool setNumSplittingSteps(const UINT numSplittingSteps)
UINT getPredictedNodeID() const
bool set(const UINT nodeSize, const UINT featureIndex, const Float threshold, const UINT clusterLabel)
ClusterTreeNode * deepCopyTree() const
bool setMaxDepth(const UINT maxDepth)
UINT predictedClusterLabel
Stores the predicted cluster label from the most recent predict( )
Definition: Clusterer.h:240
ClusterTree(const UINT numSplittingSteps=100, const UINT minNumSamplesPerNode=5, const UINT maxDepth=10, const bool removeFeaturesAtEachSplit=false, const Tree::TrainingMode trainingMode=Tree::BEST_ITERATIVE_SPILT, const bool useScaling=false, const Float minRMSErrorPerNode=0.01)
Definition: ClusterTree.cpp:36
bool saveClustererSettingsToFile(std::fstream &file) const
Definition: Clusterer.cpp:159
ClusterTree & operator=(const ClusterTree &rhs)
Definition: ClusterTree.cpp:60
bool getRemoveFeaturesAtEachSplit() const
virtual bool save(std::fstream &file) const override
Definition: Node.cpp:136
UINT numClusters
Number of clusters in the model.
Definition: Clusterer.h:239
unsigned int getNumRows() const
Definition: Matrix.h:574
unsigned int getNumCols() const
Definition: Matrix.h:581
UINT getPredictedNodeID() const
Definition: Node.cpp:312
virtual bool print() const override
virtual bool print() const override
Definition: Node.cpp:105
VectorFloat getRow(const unsigned int r) const
Definition: MatrixFloat.h:107
Vector< MinMax > getRanges() const
Float getRandomNumberUniform(Float minRange=0.0, Float maxRange=1.0)
Definition: Random.cpp:129
virtual bool load(std::fstream &file) override
Definition: Node.cpp:178
static std::string getId()
Definition: ClusterTree.cpp:28
virtual bool loadModelFromFile(std::fstream &file) override
UINT getPredictedClusterLabel() const
bool push_back(const Vector< T > &sample)
Definition: Matrix.h:431
virtual bool predict_(VectorFloat &inputVector) override
virtual bool clear() override
Definition: Node.cpp:66
UINT getMaxDepth() const
Tree::TrainingMode getTrainingMode() const
bool setRemoveFeaturesAtEachSplit(const bool removeFeaturesAtEachSplit)
bool setMinNumSamplesPerNode(const UINT minNumSamplesPerNode)
UINT getMinNumSamplesPerNode() const
virtual Node * deepCopy() const
Definition: Node.cpp:272
Float getMinRMSErrorPerNode() const
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
Definition: GRTBase.h:184