GestureRecognitionToolkit  Version: 0.2.5
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
KNN.cpp
1 /*
2 GRT MIT License
3 Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>
4 
5 Permission is hereby granted, free of charge, to any person obtaining a copy of this software
6 and associated documentation files (the "Software"), to deal in the Software without restriction,
7 including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in all copies or substantial
12 portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
15 LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
18 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 */
20 
21 #define GRT_DLL_EXPORTS
22 #include "KNN.h"
23 
24 GRT_BEGIN_NAMESPACE
25 
26 //Define the string that will be used to identify the object
27 const std::string KNN::id = "KNN";
28 std::string KNN::getId() { return KNN::id; }
29 
30 //Register the KNN module with the Classifier base class
31 RegisterClassifierModule< KNN > KNN::registerModule( KNN::getId() );
32 
33 KNN::KNN(unsigned int K,bool useScaling,bool useNullRejection,Float nullRejectionCoeff,bool searchForBestKValue,UINT minKSearchValue,UINT maxKSearchValue) : Classifier( KNN::getId() )
34 {
35  this->K = K;
36  this->distanceMethod = EUCLIDEAN_DISTANCE;
37  this->useScaling = useScaling;
38  this->useNullRejection = useNullRejection;
39  this->nullRejectionCoeff = nullRejectionCoeff;
43  supportsNullRejection = true;
44  classifierMode = STANDARD_CLASSIFIER_MODE;
45  distanceMethod = EUCLIDEAN_DISTANCE;
46 }
47 
48 KNN::KNN(const KNN &rhs) : Classifier( KNN::getId() )
49 {
50  classifierMode = STANDARD_CLASSIFIER_MODE;
51  *this = rhs;
52 }
53 
54 KNN::~KNN(void)
55 {
56 }
57 
58 KNN& KNN::operator=(const KNN &rhs){
59  if( this != &rhs ){
60  //KNN variables
61  this->K = rhs.K;
62  this->distanceMethod = rhs.distanceMethod;
64  this->minKSearchValue = rhs.minKSearchValue;
65  this->maxKSearchValue = rhs.maxKSearchValue;
66  this->trainingData = rhs.trainingData;
67  this->trainingMu = rhs.trainingMu;
68  this->trainingSigma = rhs.trainingSigma;
69 
70  //Classifier variables
71  copyBaseVariables( (Classifier*)&rhs );
72  }
73  return *this;
74 }
75 
76 bool KNN::deepCopyFrom(const Classifier *classifier){
77 
78  if( classifier == NULL ) return false;
79 
80  if( this->getId() == classifier->getId() ){
81  //Get a pointer the KNN copy instance
82  const KNN *ptr = dynamic_cast<const KNN*>(classifier);
83 
84  this->K = ptr->K;
85  this->distanceMethod = ptr->distanceMethod;
87  this->minKSearchValue = ptr->minKSearchValue;
88  this->maxKSearchValue = ptr->maxKSearchValue;
89  this->trainingData = ptr->trainingData;
90  this->trainingMu = ptr->trainingMu;
91  this->trainingSigma = ptr->trainingSigma;
92 
93  //Classifier variables
94  return copyBaseVariables( classifier );
95  }
96  return false;
97 }
98 
100 
101  //Clear any previous models
102  clear();
103 
104  if( trainingData.getNumSamples() == 0 ){
105  errorLog << __GRT_LOG__ << " Training data has zero samples!" << std::endl;
106  return false;
107  }
108 
109  //Store the number of features, classes and the training data
110  this->numInputDimensions = trainingData.getNumDimensions();
111  this->numOutputDimensions = trainingData.getNumClasses();
112  this->numClasses = trainingData.getNumClasses();
113  this->ranges = trainingData.getRanges();
114 
115  if( useScaling ){
116  //Scale the training data between 0 and 1
117  trainingData.scale(0, 1);
118  }
119 
120  //TODO: In the future need to build a kdtree from the training data to allow better realtime prediction
121  this->trainingData = trainingData;
122 
123  ClassificationData validationData;
124  if( useValidationSet ){
125  validationData = trainingData.split( 100-validationSetSize );
126  }
127 
128  //Set the class labels
129  classLabels.resize( numClasses );
130  for(UINT k=0; k<numClasses; k++){
131  classLabels[k] = trainingData.getClassTracker()[k].classLabel;
132  }
133 
134  //If we do not need to search for the best K value, then call the sub training function with the default value of K
135  if( !searchForBestKValue ){
136  if( !train_(trainingData,K) ){
137  return false;
138  }
139  }else{
140  //If we have got this far then we are going to search for the best K value
141  UINT index = 0;
142  Float bestAccuracy = 0;
143  Vector< IndexedDouble > trainingAccuracyLog;
144 
145  for(UINT k=minKSearchValue; k<=maxKSearchValue; k++){
146  //Randomly spilt the data and use 80% to train the algorithm and 20% to test it
147  ClassificationData testSet = useValidationSet ? validationData : trainingData.split(80,true);
148 
149  if( !train_(trainingData, k) ){
150  errorLog << __GRT_LOG__ << " Failed to train model for a k value of " << k << std::endl;
151  }else{
152 
153  //Compute the classification error
154  Float accuracy = 0;
155  for(UINT i=0; i<testSet.getNumSamples(); i++){
156 
157  VectorFloat sample = testSet[i].getSample();
158 
159  if( !predict( sample , k) ){
160  errorLog << __GRT_LOG__ << " Failed to predict label for test sample with a k value of " << k << std::endl;
161  return false;
162  }
163 
164  if( testSet[i].getClassLabel() == predictedClassLabel ){
165  accuracy++;
166  }
167  }
168 
169  accuracy = accuracy /Float( testSet.getNumSamples() ) * 100.0;
170  trainingAccuracyLog.push_back( IndexedDouble(k,accuracy) );
171 
172  trainingLog << "K:\t" << k << "\tAccuracy:\t" << accuracy << std::endl;
173 
174  if( accuracy > bestAccuracy ){
175  bestAccuracy = accuracy;
176  }
177 
178  index++;
179  }
180 
181  }
182 
183  if( bestAccuracy > 0 ){
184  //Sort the training log by value
185  std::sort(trainingAccuracyLog.begin(),trainingAccuracyLog.end(),IndexedDouble::sortIndexedDoubleByValueDescending);
186 
187  //Copy the top matching values into a temporary buffer
188  Vector< IndexedDouble > tempLog;
189 
190  //Add the first value
191  tempLog.push_back( trainingAccuracyLog[0] );
192 
193  //Keep adding values until the value changes
194  for(UINT i=1; i<trainingAccuracyLog.size(); i++){
195  if( trainingAccuracyLog[i].value == tempLog[0].value ){
196  tempLog.push_back( trainingAccuracyLog[i] );
197  }else break;
198  }
199 
200  //Sort the temp values by index (the index is the K value so we want to get the minimum K value with the maximum accuracy)
201  std::sort(tempLog.begin(),tempLog.end(),IndexedDouble::sortIndexedDoubleByIndexAscending);
202 
203  trainingLog << "Best K Value: " << tempLog[0].index << "\tAccuracy:\t" << tempLog[0].value << std::endl;
204 
205  //Use the minimum index, this should give us the best accuracy with the minimum K value
206  //We now need to train the model again to make sure all the training metrics are computed correctly
207  if( !train_(trainingData,tempLog[0].index) ){
208  return false;
209  }
210  }
211  }
212 
213  //If we get this far, then a model has been trained
214 
215  //Compute the final training stats
216  trainingSetAccuracy = 0;
217  validationSetAccuracy = 0;
218 
219  //If scaling was on, then the data will already be scaled, so turn it off temporially
220  bool scalingState = useScaling;
221  useScaling = false;
222  for(UINT i=0; i<trainingData.getNumSamples(); i++){
223  if( !predict_( trainingData[i].getSample() ) ){
224  trained = false;
225  errorLog << __GRT_LOG__ << " Failed to run prediction for training sample: " << i << "! Failed to fully train model!" << std::endl;
226  return false;
227  }
228 
229  if( predictedClassLabel == trainingData[i].getClassLabel() ){
230  trainingSetAccuracy++;
231  }
232  }
233 
234  if( useValidationSet ){
235  for(UINT i=0; i<validationData.getNumSamples(); i++){
236  if( !predict_( validationData[i].getSample() ) ){
237  trained = false;
238  errorLog << __GRT_LOG__ << " Failed to run prediction for validation sample: " << i << "! Failed to fully train model!" << std::endl;
239  return false;
240  }
241 
242  if( predictedClassLabel == validationData[i].getClassLabel() ){
243  validationSetAccuracy++;
244  }
245  }
246  }
247 
248  trainingSetAccuracy = trainingSetAccuracy / trainingData.getNumSamples() * 100.0;
249 
250  trainingLog << "Training set accuracy: " << trainingSetAccuracy << std::endl;
251 
252  if( useValidationSet ){
253  validationSetAccuracy = validationSetAccuracy / validationData.getNumSamples() * 100.0;
254  trainingLog << "Validation set accuracy: " << validationSetAccuracy << std::endl;
255  }
256 
257  //Reset the scaling state for future prediction
258  useScaling = scalingState;
259 
260  converged = true;
261 
262  return true;
263 }
264 
265 bool KNN::train_(const ClassificationData &trainingData,const UINT K){
266 
267  //Set the dimensionality of the input data
268  this->K = K;
269 
270  //Flag that the algorithm has been trained so we can compute the rejection thresholds
271  trained = true;
272 
273  //If null rejection is enabled then compute the null rejection thresholds
274  if( useNullRejection ){
275 
276  //Set the null rejection to false so we can compute the values for it (this will be set back to its current value later)
277  useNullRejection = false;
278  nullRejectionThresholds.clear();
279 
280  //Compute the rejection thresholds for each of the K classes
281  VectorFloat counter(numClasses,0);
282  trainingMu.resize( numClasses, 0 );
283  trainingSigma.resize( numClasses, 0 );
284  nullRejectionThresholds.resize( numClasses, 0 );
285 
286  //Compute Mu for each of the classes
287  const unsigned int numTrainingExamples = trainingData.getNumSamples();
288  Vector< IndexedDouble > predictionResults( numTrainingExamples );
289  for(UINT i=0; i<numTrainingExamples; i++){
290  predict( trainingData[i].getSample(), K);
291 
292  UINT classLabelIndex = 0;
293  for(UINT k=0; k<numClasses; k++){
294  if( predictedClassLabel == classLabels[k] ){
295  classLabelIndex = k;
296  break;
297  }
298  }
299 
300  predictionResults[ i ].index = classLabelIndex;
301  predictionResults[ i ].value = classDistances[ classLabelIndex ];
302 
303  trainingMu[ classLabelIndex ] += predictionResults[ i ].value;
304  counter[ classLabelIndex ]++;
305  }
306 
307  for(UINT j=0; j<numClasses; j++){
308  trainingMu[j] /= counter[j];
309  }
310 
311  //Compute Sigma for each of the classes
312  for(UINT i=0; i<numTrainingExamples; i++){
313  trainingSigma[predictionResults[i].index] += SQR(predictionResults[i].value - trainingMu[predictionResults[i].index]);
314  }
315 
316  for(UINT j=0; j<numClasses; j++){
317  Float count = counter[j];
318  if( count > 1 ){
319  trainingSigma[ j ] = sqrt( trainingSigma[j] / (count-1) );
320  }else{
321  trainingSigma[ j ] = 1.0;
322  }
323  }
324 
325  //Check to see if any of the mu or sigma values are zero or NaN
326  bool errorFound = false;
327  for(UINT j=0; j<numClasses; j++){
328  if( trainingMu[j] == 0 ){
329  warningLog << __GRT_LOG__ << " TrainingMu[ " << j << " ] is zero for a K value of " << K << std::endl;
330  }
331  if( trainingSigma[j] == 0 ){
332  warningLog << __GRT_LOG__ << " TrainingSigma[ " << j << " ] is zero for a K value of " << K << std::endl;
333  }
334  if( grt_isnan( trainingMu[j] ) ){
335  errorLog << __GRT_LOG__ << " TrainingMu[ " << j << " ] is NAN for a K value of " << K << std::endl;
336  errorFound = true;
337  }
338  if( grt_isnan( trainingSigma[j] ) ){
339  errorLog << __GRT_LOG__ << " TrainingSigma[ " << j << " ] is NAN for a K value of " << K << std::endl;
340  errorFound = true;
341  }
342  }
343 
344  if( errorFound ){
345  trained = false;
346  return false;
347  }
348 
349  //Compute the rejection thresholds
350  for(unsigned int j=0; j<numClasses; j++){
351  nullRejectionThresholds[j] = trainingMu[j] + (trainingSigma[j]*nullRejectionCoeff);
352  }
353 
354  //Restore the actual state of the null rejection
355  useNullRejection = true;
356 
357  }else{
358  //Resize the rejection thresholds but set the values to 0
359  nullRejectionThresholds.clear();
360  nullRejectionThresholds.resize( numClasses, 0 );
361  }
362 
363  return true;
364 }
365 
366 bool KNN::predict_(VectorFloat &inputVector){
367 
368  if( !trained ){
369  errorLog << __GRT_LOG__ << " KNN model has not been trained" << std::endl;
370  return false;
371  }
372 
373  if( inputVector.getSize() != numInputDimensions ){
374  errorLog << __GRT_LOG__ << " The size of the input vector " << inputVector.getSize() << " does not match the number of features " << numInputDimensions << std::endl;
375  return false;
376  }
377 
378  //Scale the input vector if needed
379  if( useScaling ){
380  for(UINT i=0; i<numInputDimensions; i++){
381  inputVector[i] = scale(inputVector[i], ranges[i].minValue, ranges[i].maxValue, 0, 1);
382  }
383  }
384 
385  //Run the prediction
386  return predict(inputVector,K);
387 }
388 
389 bool KNN::predict(const VectorFloat &inputVector,const UINT K){
390 
391  if( !trained ){
392  errorLog << __GRT_LOG__ << " KNN model has not been trained" << std::endl;
393  return false;
394  }
395 
396  if( inputVector.getSize() != numInputDimensions ){
397  errorLog << __GRT_LOG__ << " The size of the input vector " << inputVector.size() << " does not match the number of features " << numInputDimensions << std::endl;
398  return false;
399  }
400 
401  if( K > trainingData.getNumSamples() ){
402  errorLog << __GRT_LOG__ << " K Is Greater Than The Number Of Training Samples" << std::endl;
403  return false;
404  }
405 
406  //TODO - need to build a kdtree of the training data to allow better realtime prediction
407  const UINT M = trainingData.getNumSamples();
408  Vector< IndexedDouble > neighbours;
409 
410  for(UINT i=0; i<M; i++){
411  Float dist = 0;
412  UINT classLabel = trainingData[i].getClassLabel();
413  VectorFloat trainingSample = trainingData[i].getSample();
414 
415  switch( distanceMethod ){
416  case EUCLIDEAN_DISTANCE:
417  dist = computeEuclideanDistance(inputVector,trainingSample);
418  break;
419  case COSINE_DISTANCE:
420  dist = computeCosineDistance(inputVector,trainingSample);
421  break;
422  case MANHATTAN_DISTANCE:
423  dist = computeManhattanDistance(inputVector, trainingSample);
424  break;
425  default:
426  errorLog << __GRT_LOG__ << " unkown distance measure!" << std::endl;
427  return false;
428  break;
429  }
430 
431  if( neighbours.size() < K ){
432  neighbours.push_back( IndexedDouble(classLabel,dist) );
433  }else{
434  //Find the maximum value in the neighbours buffer
435  Float maxValue = neighbours[0].value;
436  UINT maxIndex = 0;
437  for(UINT n=1; n<neighbours.size(); n++){
438  if( neighbours[n].value > maxValue ){
439  maxValue = neighbours[n].value;
440  maxIndex = n;
441  }
442  }
443 
444  //If the dist is less than the maximum value in the buffer, then replace that value with the new dist
445  if( dist < maxValue ){
446  neighbours[ maxIndex ] = IndexedDouble(classLabel,dist);
447  }
448  }
449  }
450 
451  //Predict the class ID using the labels of the K nearest neighbours
452  if( classLikelihoods.size() != numClasses ) classLikelihoods.resize(numClasses);
453  if( classDistances.size() != numClasses ) classDistances.resize(numClasses);
454 
455  std::fill(classLikelihoods.begin(),classLikelihoods.end(),0);
456  std::fill(classDistances.begin(),classDistances.end(),0);
457 
458  //Count the classes
459  for(UINT k=0; k<neighbours.size(); k++){
460  UINT classLabel = neighbours[k].index;
461  if( classLabel == 0 ){
462  errorLog << __GRT_LOG__ << " Class label of training example can not be zero!" << std::endl;
463  return false;
464  }
465 
466  //Find the index of the classLabel
467  UINT classLabelIndex = 0;
468  for(UINT j=0; j<numClasses; j++){
469  if( classLabel == classLabels[j] ){
470  classLabelIndex = j;
471  break;
472  }
473  }
474  classLikelihoods[ classLabelIndex ] += 1;
475  classDistances[ classLabelIndex ] += neighbours[k].value;
476  }
477 
478  //Get the max count
479  Float maxCount = classLikelihoods[0];
480  UINT maxIndex = 0;
481  for(UINT i=1; i<classLikelihoods.size(); i++){
482  if( classLikelihoods[i] > maxCount ){
483  maxCount = classLikelihoods[i];
484  maxIndex = i;
485  }
486  }
487 
488  //Compute the average distances per class
489  for(UINT i=0; i<numClasses; i++){
490  if( classLikelihoods[i] > 0 ) classDistances[i] /= classLikelihoods[i];
491  else classDistances[i] = BIG_DISTANCE;
492  }
493 
494  //Normalize the likelihoods
495  for(UINT i=0; i<numClasses; i++){
496  classLikelihoods[i] /= Float( neighbours.size() );
497  }
498 
499  //Set the maximum likelihood value
500  maxLikelihood = classLikelihoods[ maxIndex ];
501 
502  if( useNullRejection ){
503  if( classDistances[ maxIndex ] <= nullRejectionThresholds[ maxIndex ] ){
504  predictedClassLabel = classLabels[maxIndex];
505  }else{
506  predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL; //Set the gesture label as the null label
507  }
508  }else{
509  predictedClassLabel = classLabels[maxIndex];
510  }
511 
512  return true;
513 }
514 
515 bool KNN::clear(){
516 
517  //Clear the Classifier variables
519 
520  //Clear the KNN model
521  trainingData.clear();
522  trainingMu.clear();
523  trainingSigma.clear();
524 
525  return true;
526 }
527 
528 bool KNN::save( std::fstream &file ) const{
529 
530  if(!file.is_open())
531  {
532  errorLog << __GRT_LOG__ << " Could not open file to save model!" << std::endl;
533  return false;
534  }
535 
536  //Write the header info
537  file << "GRT_KNN_MODEL_FILE_V2.0\n";
538 
539  //Write the classifier settings to the file
541  errorLog << __GRT_LOG__ << " Failed to save classifier base settings to file!" << std::endl;
542  return false;
543  }
544 
545  file << "K: " << K << std::endl;
546  file << "DistanceMethod: " << distanceMethod << std::endl;
547  file << "SearchForBestKValue: " << searchForBestKValue << std::endl;
548  file << "MinKSearchValue: " << minKSearchValue << std::endl;
549  file << "MaxKSearchValue: " << maxKSearchValue << std::endl;
550 
551  if( trained ){
552  if( useNullRejection ){
553  file << "TrainingMu: ";
554  for(UINT j=0; j<trainingMu.size(); j++){
555  file << trainingMu[j] << "\t";
556  }file << std::endl;
557 
558  file << "TrainingSigma: ";
559  for(UINT j=0; j<trainingSigma.size(); j++){
560  file << trainingSigma[j] << "\t";
561  }file << std::endl;
562  }
563 
564  file << "NumTrainingSamples: " << trainingData.getNumSamples() << std::endl;
565  file << "TrainingData: \n";
566 
567  //Right each of the models
568  for(UINT i=0; i<trainingData.getNumSamples(); i++){
569  file<< trainingData[i].getClassLabel() << "\t";
570 
571  for(UINT j=0; j<numInputDimensions; j++){
572  file << trainingData[i][j] << "\t";
573  }
574  file << std::endl;
575  }
576  }
577 
578  return true;
579 }
580 
581 bool KNN::load( std::fstream &file ){
582 
583  if(!file.is_open())
584  {
585  errorLog << __GRT_LOG__ << " Could not open file to load model!" << std::endl;
586  return false;
587  }
588 
589  std::string word;
590 
591  file >> word;
592 
593  //Check to see if we should load a legacy file
594  if( word == "GRT_KNN_MODEL_FILE_V1.0" ){
595  return loadLegacyModelFromFile( file );
596  }
597 
598  //Find the file type header
599  if(word != "GRT_KNN_MODEL_FILE_V2.0"){
600  errorLog << __GRT_LOG__ << " Could not find Model File Header!" << std::endl;
601  return false;
602  }
603 
604  //Load the base settings from the file
606  errorLog << __GRT_LOG__ << " Failed to load base settings from file!" << std::endl;
607  return false;
608  }
609 
610  file >> word;
611  if(word != "K:"){
612  errorLog << __GRT_LOG__ << " Could not find K!" << std::endl;
613  return false;
614  }
615  file >> K;
616 
617  file >> word;
618  if(word != "DistanceMethod:"){
619  errorLog << __GRT_LOG__ << " Could not find DistanceMethod!" << std::endl;
620  return false;
621  }
622  file >> distanceMethod;
623 
624  file >> word;
625  if(word != "SearchForBestKValue:"){
626  errorLog << __GRT_LOG__ << " Could not find SearchForBestKValue!" << std::endl;
627  return false;
628  }
629  file >> searchForBestKValue;
630 
631  file >> word;
632  if(word != "MinKSearchValue:"){
633  errorLog << __GRT_LOG__ << " Could not find MinKSearchValue!" << std::endl;
634  return false;
635  }
636  file >> minKSearchValue;
637 
638  file >> word;
639  if(word != "MaxKSearchValue:"){
640  errorLog << __GRT_LOG__ << " Could not find MaxKSearchValue!" << std::endl;
641  return false;
642  }
643  file >> maxKSearchValue;
644 
645  if( trained ){
646 
647  //Resize the buffers
648  trainingMu.resize(numClasses,0);
649  trainingSigma.resize(numClasses,0);
650 
651  if( useNullRejection ){
652  file >> word;
653  if(word != "TrainingMu:"){
654  errorLog << __GRT_LOG__ << " Could not find TrainingMu!" << std::endl;
655  return false;
656  }
657 
658  //Load the trainingMu data
659  for(UINT j=0; j<numClasses; j++){
660  file >> trainingMu[j];
661  }
662 
663  file >> word;
664  if(word != "TrainingSigma:"){
665  errorLog << __GRT_LOG__ << " Could not find TrainingSigma!" << std::endl;
666  return false;
667  }
668 
669  //Load the trainingSigma data
670  for(UINT j=0; j<numClasses; j++){
671  file >> trainingSigma[j];
672  }
673  }
674 
675  file >> word;
676  if(word != "NumTrainingSamples:"){
677  errorLog << __GRT_LOG__ << " Could not find NumTrainingSamples!" << std::endl;
678  return false;
679  }
680  unsigned int numTrainingSamples = 0;
681  file >> numTrainingSamples;
682 
683  file >> word;
684  if(word != "TrainingData:"){
685  errorLog << __GRT_LOG__ << " Could not find TrainingData!" << std::endl;
686  return false;
687  }
688 
689  //Load the training data
690  trainingData.setNumDimensions(numInputDimensions);
691  unsigned int classLabel = 0;
692  VectorFloat sample(numInputDimensions,0);
693  for(UINT i=0; i<numTrainingSamples; i++){
694  //Read the class label
695  file >> classLabel;
696 
697  //Read the feature vector
698  for(UINT j=0; j<numInputDimensions; j++){
699  file >> sample[j];
700  }
701 
702  //Add it to the training data
703  trainingData.addSample(classLabel, sample);
704  }
705 
706  maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE;
707  bestDistance = DEFAULT_NULL_DISTANCE_VALUE;
708  classLikelihoods.resize(numClasses,DEFAULT_NULL_LIKELIHOOD_VALUE);
709  classDistances.resize(numClasses,DEFAULT_NULL_DISTANCE_VALUE);
710  }
711 
712  return true;
713 }
714 
716 
717  if( !trained ){
718  return false;
719  }
720 
721  nullRejectionThresholds.resize(numClasses,0);
722 
723  if( trainingMu.size() != numClasses || trainingSigma.size() != numClasses ){
724  return false;
725  }
726 
727  for(unsigned int j=0; j<numClasses; j++){
728  nullRejectionThresholds[j] = trainingMu[j] + (trainingSigma[j]*nullRejectionCoeff);
729  }
730 
731  return true;
732 }
733 
734 bool KNN::setK(UINT K){
735  if( K > 0 ){
736  this->K = K;
737  return true;
738  }
739  return false;
740 }
741 
743  this->minKSearchValue = minKSearchValue;
744  return true;
745 }
746 
748  this->maxKSearchValue = maxKSearchValue;
749  return true;
750 }
751 
753  this->searchForBestKValue = searchForBestKValue;
754  return true;
755 }
756 
757 bool KNN::setNullRejectionCoeff(Float nullRejectionCoeff){
758  if( nullRejectionCoeff > 0 ){
759  this->nullRejectionCoeff = nullRejectionCoeff;
761  return true;
762  }
763  return false;
764 }
765 
767  if( distanceMethod == EUCLIDEAN_DISTANCE || distanceMethod == COSINE_DISTANCE || distanceMethod == MANHATTAN_DISTANCE ){
768  this->distanceMethod = distanceMethod;
769  return true;
770  }
771  return false;
772 }
773 
774 Float KNN::computeEuclideanDistance(const VectorFloat &a,const VectorFloat &b){
775  Float dist = 0;
776  for(UINT j=0; j<numInputDimensions; j++){
777  dist += SQR( a[j] - b[j] );
778  }
779  return sqrt( dist );
780 }
781 
782 Float KNN::computeCosineDistance(const VectorFloat &a,const VectorFloat &b){
783  Float dist = 0;
784 
785  Float dotAB = 0;
786  Float magA = 0;
787  Float magB = 0;
788 
789  for(UINT j=0; j<numInputDimensions; j++){
790  dotAB += a[j] * b[j];
791  magA += SQR(a[j]);
792  magB += SQR(b[j]);
793  }
794 
795  dist = dotAB / (sqrt(magA) * sqrt(magB));
796 
797  return dist;
798 }
799 
800 Float KNN::computeManhattanDistance(const VectorFloat &a,const VectorFloat &b){
801  Float dist = 0;
802 
803  for(UINT j=0; j<numInputDimensions; j++){
804  dist += fabs( a[j] - b[j] );
805  }
806 
807  return dist;
808 }
809 
810 bool KNN::loadLegacyModelFromFile( std::fstream &file ){
811 
812  std::string word;
813 
814  //Find the file type header
815  file >> word;
816  if(word != "NumFeatures:"){
817  errorLog << __GRT_LOG__ << " Could not find NumFeatures!" << std::endl;
818  return false;
819  }
820  file >> numInputDimensions;
821 
822  file >> word;
823  if(word != "NumClasses:"){
824  errorLog << __GRT_LOG__ << " Could not find NumClasses!" << std::endl;
825  return false;
826  }
827  file >> numClasses;
828 
829  file >> word;
830  if(word != "K:"){
831  errorLog << __GRT_LOG__ << " Could not find K!" << std::endl;
832  return false;
833  }
834  file >> K;
835 
836  file >> word;
837  if(word != "DistanceMethod:"){
838  errorLog << __GRT_LOG__ << " Could not find DistanceMethod!" << std::endl;
839  return false;
840  }
841  file >> distanceMethod;
842 
843  file >> word;
844  if(word != "SearchForBestKValue:"){
845  errorLog << __GRT_LOG__ << " Could not find SearchForBestKValue!" << std::endl;
846  return false;
847  }
848  file >> searchForBestKValue;
849 
850  file >> word;
851  if(word != "MinKSearchValue:"){
852  errorLog << __GRT_LOG__ << " Could not find MinKSearchValue!" << std::endl;
853  return false;
854  }
855  file >> minKSearchValue;
856 
857  file >> word;
858  if(word != "MaxKSearchValue:"){
859  errorLog << __GRT_LOG__ << " Could not find MaxKSearchValue!" << std::endl;
860  return false;
861  }
862  file >> maxKSearchValue;
863 
864  file >> word;
865  if(word != "UseScaling:"){
866  errorLog << __GRT_LOG__ << " Could not find UseScaling!" << std::endl;
867  return false;
868  }
869  file >> useScaling;
870 
871  file >> word;
872  if(word != "UseNullRejection:"){
873  errorLog << __GRT_LOG__ << " Could not find UseNullRejection!" << std::endl;
874  return false;
875  }
876  file >> useNullRejection;
877 
878  file >> word;
879  if(word != "NullRejectionCoeff:"){
880  errorLog << __GRT_LOG__ << " Could not find NullRejectionCoeff!" << std::endl;
881  return false;
882  }
883  file >> nullRejectionCoeff;
884 
886  if( useScaling ){
887  //Resize the ranges buffer
888  ranges.resize( numInputDimensions );
889 
890  file >> word;
891  if(word != "Ranges:"){
892  errorLog << __GRT_LOG__ << " Could not find Ranges!" << std::endl;
893  return false;
894  }
895  for(UINT n=0; n<ranges.getSize(); n++){
896  file >> ranges[n].minValue;
897  file >> ranges[n].maxValue;
898  }
899  }
900 
901  //Resize the buffers
902  trainingMu.resize(numClasses,0);
903  trainingSigma.resize(numClasses,0);
904 
905  file >> word;
906  if(word != "TrainingMu:"){
907  errorLog << __GRT_LOG__ << " Could not find TrainingMu!" << std::endl;
908  return false;
909  }
910 
911  //Load the trainingMu data
912  for(UINT j=0; j<numClasses; j++){
913  file >> trainingMu[j];
914  }
915 
916  file >> word;
917  if(word != "TrainingSigma:"){
918  errorLog << __GRT_LOG__ << " Could not find TrainingSigma!" << std::endl;
919  return false;
920  }
921 
922  //Load the trainingSigma data
923  for(UINT j=0; j<numClasses; j++){
924  file >> trainingSigma[j];
925  }
926 
927  file >> word;
928  if(word != "NumTrainingSamples:"){
929  errorLog << __GRT_LOG__ << " Could not find NumTrainingSamples!" << std::endl;
930  return false;
931  }
932  unsigned int numTrainingSamples = 0;
933  file >> numTrainingSamples;
934 
935  file >> word;
936  if(word != "TrainingData:"){
937  errorLog << __GRT_LOG__ << " Could not find TrainingData!" << std::endl;
938  return false;
939  }
940 
941  //Load the training data
942  trainingData.setNumDimensions(numInputDimensions);
943  unsigned int classLabel = 0;
944  VectorFloat sample(numInputDimensions,0);
945  for(UINT i=0; i<numTrainingSamples; i++){
946  //Read the class label
947  file >> classLabel;
948 
949  //Read the feature vector
950  for(UINT j=0; j<numInputDimensions; j++){
951  file >> sample[j];
952  }
953 
954  //Add it to the training data
955  trainingData.addSample(classLabel, sample);
956  }
957 
958  //Flag that the model has been trained
959  trained = true;
960 
961  //Compute the null rejection thresholds
963 
964  return true;
965 }
966 
967 GRT_END_NAMESPACE
968 
bool saveBaseSettingsToFile(std::fstream &file) const
Definition: Classifier.cpp:274
VectorFloat trainingSigma
Holds the average max-class distance of the training data for each of classes
Definition: KNN.h:247
std::string getId() const
Definition: GRTBase.cpp:85
#define DEFAULT_NULL_LIKELIHOOD_VALUE
Definition: Classifier.h:33
virtual bool save(std::fstream &file) const
Definition: KNN.cpp:528
virtual bool load(std::fstream &file)
Definition: KNN.cpp:581
bool addSample(const UINT classLabel, const VectorFloat &sample)
bool searchForBestKValue
The distance method used to compute the distance between each data point
Definition: KNN.h:242
Classifier(const std::string &classifierId="")
Definition: Classifier.cpp:77
Vector< ClassTracker > getClassTracker() const
virtual bool resize(const unsigned int size)
Definition: Vector.h:133
bool setNumDimensions(UINT numDimensions)
static std::string getId()
Definition: KNN.cpp:28
virtual bool train_(ClassificationData &trainingData)
Definition: KNN.cpp:99
UINT distanceMethod
The number of neighbours to search for
Definition: KNN.h:241
UINT getSize() const
Definition: Vector.h:201
bool setK(UINT K)
Definition: KNN.cpp:734
KNN(UINT K=10, bool useScaling=false, bool useNullRejection=false, Float nullRejectionCoeff=10.0, bool searchForBestKValue=false, UINT minKSearchValue=1, UINT maxKSearchValue=10)
ClassificationData trainingData
The maximum K value to end the search at
Definition: KNN.h:245
UINT maxKSearchValue
The minimum K value to start the search from
Definition: KNN.h:244
bool setDistanceMethod(UINT distanceMethod)
Definition: KNN.cpp:766
virtual bool recomputeNullRejectionThresholds()
Definition: KNN.cpp:715
KNN & operator=(const KNN &rhs)
Definition: KNN.cpp:58
bool setMaxKSearchValue(UINT maxKSearchValue)
Definition: KNN.cpp:747
bool enableBestKValueSearch(bool searchForBestKValue)
Definition: KNN.cpp:752
UINT getNumSamples() const
virtual bool predict_(VectorFloat &inputVector)
Definition: KNN.cpp:366
virtual bool deepCopyFrom(const Classifier *classifier)
Definition: KNN.cpp:76
UINT minKSearchValue
Sets if the best K value should be searched for or if the model should be trained with K ...
Definition: KNN.h:243
bool copyBaseVariables(const Classifier *classifier)
Definition: Classifier.cpp:101
bool loadBaseSettingsFromFile(std::fstream &file)
Definition: Classifier.cpp:321
VectorFloat trainingMu
Holds the trainingData to perform the predictions
Definition: KNN.h:246
UINT getNumDimensions() const
UINT getNumClasses() const
#define BIG_DISTANCE
Definition: KNN.h:33
Vector< MinMax > getRanges() const
ClassificationData split(const UINT splitPercentage, const bool useStratifiedSampling=false)
bool setNullRejectionCoeff(Float nullRejectionCoeff)
Definition: KNN.cpp:757
bool setMinKSearchValue(UINT minKSearchValue)
Definition: KNN.cpp:742
bool scale(const Float minTarget, const Float maxTarget)
virtual bool clear()
Definition: KNN.cpp:515
virtual ~KNN(void)
Definition: KNN.cpp:54
virtual bool clear()
Definition: Classifier.cpp:151
Definition: KNN.h:51
This is the main base class that all GRT Classification algorithms should inherit from...
Definition: Classifier.h:41
bool loadLegacyModelFromFile(std::fstream &file)
Definition: KNN.cpp:810
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
Definition: GRTBase.h:184