GestureRecognitionToolkit  Version: 0.1.0
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
KNN.cpp
1 /*
2 GRT MIT License
3 Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>
4 
5 Permission is hereby granted, free of charge, to any person obtaining a copy of this software
6 and associated documentation files (the "Software"), to deal in the Software without restriction,
7 including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in all copies or substantial
12 portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
15 LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
18 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 */
20 
21 #include "KNN.h"
22 
23 GRT_BEGIN_NAMESPACE
24 
25 //Register the DTW module with the Classifier base class
27 
28 KNN::KNN(unsigned int K,bool useScaling,bool useNullRejection,Float nullRejectionCoeff,bool searchForBestKValue,UINT minKSearchValue,UINT maxKSearchValue){
29  this->K = K;
30  this->distanceMethod = EUCLIDEAN_DISTANCE;
31  this->useScaling = useScaling;
32  this->useNullRejection = useNullRejection;
33  this->nullRejectionCoeff = nullRejectionCoeff;
34  this->searchForBestKValue = searchForBestKValue;
35  this->minKSearchValue = minKSearchValue;
36  this->maxKSearchValue = maxKSearchValue;
37  supportsNullRejection = true;
38  classType = "KNN";
39  classifierType = classType;
40  classifierMode = STANDARD_CLASSIFIER_MODE;
41  distanceMethod = EUCLIDEAN_DISTANCE;
42  debugLog.setProceedingText("[DEBUG KNN]");
43  errorLog.setProceedingText("[ERROR KNN]");
44  trainingLog.setProceedingText("[TRAINING KNN]");
45  warningLog.setProceedingText("[WARNING KNN]");
46 }
47 
48 KNN::KNN(const KNN &rhs){
49  classType = "KNN";
50  classifierType = classType;
51  classifierMode = STANDARD_CLASSIFIER_MODE;
52  debugLog.setProceedingText("[DEBUG KNN]");
53  errorLog.setProceedingText("[ERROR KNN]");
54  trainingLog.setProceedingText("[TRAINING KNN]");
55  warningLog.setProceedingText("[WARNING KNN]");
56  *this = rhs;
57 }
58 
59 KNN::~KNN(void)
60 {
61 }
62 
63 KNN& KNN::operator=(const KNN &rhs){
64  if( this != &rhs ){
65  //KNN variables
66  this->K = rhs.K;
67  this->distanceMethod = rhs.distanceMethod;
68  this->searchForBestKValue = rhs.searchForBestKValue;
69  this->minKSearchValue = rhs.minKSearchValue;
70  this->maxKSearchValue = rhs.maxKSearchValue;
71  this->trainingData = rhs.trainingData;
72  this->trainingMu = rhs.trainingMu;
73  this->trainingSigma = rhs.trainingSigma;
74 
75  //Classifier variables
76  copyBaseVariables( (Classifier*)&rhs );
77  }
78  return *this;
79 }
80 
81 bool KNN::deepCopyFrom(const Classifier *classifier){
82 
83  if( classifier == NULL ) return false;
84 
85  if( this->getClassifierType() == classifier->getClassifierType() ){
86  //Get a pointer the KNN copy instance
87  KNN *ptr = (KNN*)classifier;
88 
89  this->K = ptr->K;
90  this->distanceMethod = ptr->distanceMethod;
91  this->searchForBestKValue = ptr->searchForBestKValue;
92  this->minKSearchValue = ptr->minKSearchValue;
93  this->maxKSearchValue = ptr->maxKSearchValue;
94  this->trainingData = ptr->trainingData;
95  this->trainingMu = ptr->trainingMu;
96  this->trainingSigma = ptr->trainingSigma;
97 
98  //Classifier variables
99  return copyBaseVariables( classifier );
100  }
101  return false;
102 }
103 
104 bool KNN::train_(ClassificationData &trainingData){
105 
106  //Clear any previous models
107  clear();
108 
109  if( trainingData.getNumSamples() == 0 ){
110  errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << std::endl;
111  return false;
112  }
113 
114  //Get the ranges of the data
115  ranges = trainingData.getRanges();
116  if( useScaling ){
117  //Scale the training data between 0 and 1
118  trainingData.scale(0, 1);
119  }
120 
121  //Store the number of features, classes and the training data
122  this->numInputDimensions = trainingData.getNumDimensions();
123  this->numClasses = trainingData.getNumClasses();
124 
125  //TODO: In the future need to build a kdtree from the training data to allow better realtime prediction
126  this->trainingData = trainingData;
127 
128  //Set the class labels
129  classLabels.resize( numClasses );
130  for(UINT k=0; k<numClasses; k++){
131  classLabels[k] = trainingData.getClassTracker()[k].classLabel;
132  }
133 
134  //If we do not need to search for the best K value, then call the sub training function and return the result
135  if( !searchForBestKValue ){
136  return train_(trainingData,K);
137  }
138 
139  //If we have got this far then we are going to search for the best K value
140  UINT index = 0;
141  Float bestAccuracy = 0;
142  Vector< IndexedDouble > trainingAccuracyLog;
143 
144  for(UINT k=minKSearchValue; k<=maxKSearchValue; k++){
145  //Randomly spilt the data and use 80% to train the algorithm and 20% to test it
146  ClassificationData trainingSet(trainingData);
147  ClassificationData testSet = trainingSet.partition(80,true);
148 
149  if( !train_(trainingSet, k) ){
150  errorLog << "Failed to train model for a k value of " << k << std::endl;
151  }else{
152 
153  //Compute the classification error
154  Float accuracy = 0;
155  for(UINT i=0; i<testSet.getNumSamples(); i++){
156 
157  VectorFloat sample = testSet[i].getSample();
158 
159  if( !predict( sample , k) ){
160  errorLog << "Failed to predict label for test sample with a k value of " << k << std::endl;
161  return false;
162  }
163 
164  if( testSet[i].getClassLabel() == predictedClassLabel ){
165  accuracy++;
166  }
167  }
168 
169  accuracy = accuracy /Float( testSet.getNumSamples() ) * 100.0;
170  trainingAccuracyLog.push_back( IndexedDouble(k,accuracy) );
171 
172  trainingLog << "K:\t" << k << "\tAccuracy:\t" << accuracy << std::endl;
173 
174  if( accuracy > bestAccuracy ){
175  bestAccuracy = accuracy;
176  }
177 
178  index++;
179  }
180 
181  }
182 
183  if( bestAccuracy > 0 ){
184  //Sort the training log by value
185  std::sort(trainingAccuracyLog.begin(),trainingAccuracyLog.end(),IndexedDouble::sortIndexedDoubleByValueDescending);
186 
187  //Copy the top matching values into a temporary buffer
188  Vector< IndexedDouble > tempLog;
189 
190  //Add the first value
191  tempLog.push_back( trainingAccuracyLog[0] );
192 
193  //Keep adding values until the value changes
194  for(UINT i=1; i<trainingAccuracyLog.size(); i++){
195  if( trainingAccuracyLog[i].value == tempLog[0].value ){
196  tempLog.push_back( trainingAccuracyLog[i] );
197  }else break;
198  }
199 
200  //Sort the temp values by index (the index is the K value so we want to get the minimum K value with the maximum accuracy)
201  std::sort(tempLog.begin(),tempLog.end(),IndexedDouble::sortIndexedDoubleByIndexAscending);
202 
203  trainingLog << "Best K Value: " << tempLog[0].index << "\tAccuracy:\t" << tempLog[0].value << std::endl;
204 
205  //Use the minimum index, this should give us the best accuracy with the minimum K value
206  //We now need to train the model again to make sure all the training metrics are computed correctly
207  return train_(trainingData,tempLog[0].index);
208  }
209 
210  return false;
211 }
212 
213 bool KNN::train_(const ClassificationData &trainingData,const UINT K){
214 
215  //Set the dimensionality of the input data
216  this->K = K;
217 
218  //Flag that the algorithm has been trained so we can compute the rejection thresholds
219  trained = true;
220 
221  //If null rejection is enabled then compute the null rejection thresholds
222  if( useNullRejection ){
223 
224  //Set the null rejection to false so we can compute the values for it (this will be set back to its current value later)
225  useNullRejection = false;
226  nullRejectionThresholds.clear();
227 
228  //Compute the rejection thresholds for each of the K classes
229  VectorFloat counter(numClasses,0);
230  trainingMu.resize( numClasses, 0 );
231  trainingSigma.resize( numClasses, 0 );
232  nullRejectionThresholds.resize( numClasses, 0 );
233 
234  //Compute Mu for each of the classes
235  const unsigned int numTrainingExamples = trainingData.getNumSamples();
236  Vector< IndexedDouble > predictionResults( numTrainingExamples );
237  for(UINT i=0; i<numTrainingExamples; i++){
238  predict( trainingData[i].getSample(), K);
239 
240  UINT classLabelIndex = 0;
241  for(UINT k=0; k<numClasses; k++){
242  if( predictedClassLabel == classLabels[k] ){
243  classLabelIndex = k;
244  break;
245  }
246  }
247 
248  predictionResults[ i ].index = classLabelIndex;
249  predictionResults[ i ].value = classDistances[ classLabelIndex ];
250 
251  trainingMu[ classLabelIndex ] += predictionResults[ i ].value;
252  counter[ classLabelIndex ]++;
253  }
254 
255  for(UINT j=0; j<numClasses; j++){
256  trainingMu[j] /= counter[j];
257  }
258 
259  //Compute Sigma for each of the classes
260  for(UINT i=0; i<numTrainingExamples; i++){
261  trainingSigma[predictionResults[i].index] += SQR(predictionResults[i].value - trainingMu[predictionResults[i].index]);
262  }
263 
264  for(UINT j=0; j<numClasses; j++){
265  Float count = counter[j];
266  if( count > 1 ){
267  trainingSigma[ j ] = sqrt( trainingSigma[j] / (count-1) );
268  }else{
269  trainingSigma[ j ] = 1.0;
270  }
271  }
272 
273  //Check to see if any of the mu or sigma values are zero or NaN
274  bool errorFound = false;
275  for(UINT j=0; j<numClasses; j++){
276  if( trainingMu[j] == 0 ){
277  warningLog << "TrainingMu[ " << j << " ] is zero for a K value of " << K << std::endl;
278  }
279  if( trainingSigma[j] == 0 ){
280  warningLog << "TrainingSigma[ " << j << " ] is zero for a K value of " << K << std::endl;
281  }
282  if( grt_isnan( trainingMu[j] ) ){
283  errorLog << "TrainingMu[ " << j << " ] is NAN for a K value of " << K << std::endl;
284  errorFound = true;
285  }
286  if( grt_isnan( trainingSigma[j] ) ){
287  errorLog << "TrainingSigma[ " << j << " ] is NAN for a K value of " << K << std::endl;
288  errorFound = true;
289  }
290  }
291 
292  if( errorFound ){
293  trained = false;
294  return false;
295  }
296 
297  //Compute the rejection thresholds
298  for(unsigned int j=0; j<numClasses; j++){
299  nullRejectionThresholds[j] = trainingMu[j] + (trainingSigma[j]*nullRejectionCoeff);
300  }
301 
302  //Restore the actual state of the null rejection
303  useNullRejection = true;
304 
305  }else{
306  //Resize the rejection thresholds but set the values to 0
307  nullRejectionThresholds.clear();
308  nullRejectionThresholds.resize( numClasses, 0 );
309  }
310 
311  return true;
312 }
313 
314 bool KNN::predict_(VectorFloat &inputVector){
315 
316  if( !trained ){
317  errorLog << "predict_(VectorFloat &inputVector) - KNN model has not been trained" << std::endl;
318  return false;
319  }
320 
321  if( inputVector.size() != numInputDimensions ){
322  errorLog << "predict_(VectorFloat &inputVector) - the size of the input vector " << inputVector.size() << " does not match the number of features " << numInputDimensions << std::endl;
323  return false;
324  }
325 
326  //Scale the input vector if needed
327  if( useScaling ){
328  for(UINT i=0; i<numInputDimensions; i++){
329  inputVector[i] = scale(inputVector[i], ranges[i].minValue, ranges[i].maxValue, 0, 1);
330  }
331  }
332 
333  //Run the prediction
334  return predict(inputVector,K);
335 }
336 
337 bool KNN::predict(const VectorFloat &inputVector,const UINT K){
338 
339  if( !trained ){
340  errorLog << "predict(VectorFloat inputVector,UINT K) - KNN model has not been trained" << std::endl;
341  return false;
342  }
343 
344  if( inputVector.size() != numInputDimensions ){
345  errorLog << "predict(VectorFloat inputVector) - the size of the input vector " << inputVector.size() << " does not match the number of features " << numInputDimensions << std::endl;
346  return false;
347  }
348 
349  if( K > trainingData.getNumSamples() ){
350  errorLog << "predict(VectorFloat inputVector,UINT K) - K Is Greater Than The Number Of Training Samples" << std::endl;
351  return false;
352  }
353 
354  //TODO - need to build a kdtree of the training data to allow better realtime prediction
355  const UINT M = trainingData.getNumSamples();
356  Vector< IndexedDouble > neighbours;
357 
358  for(UINT i=0; i<M; i++){
359  Float dist = 0;
360  UINT classLabel = trainingData[i].getClassLabel();
361  VectorFloat trainingSample = trainingData[i].getSample();
362 
363  switch( distanceMethod ){
364  case EUCLIDEAN_DISTANCE:
365  dist = computeEuclideanDistance(inputVector,trainingSample);
366  break;
367  case COSINE_DISTANCE:
368  dist = computeCosineDistance(inputVector,trainingSample);
369  break;
370  case MANHATTAN_DISTANCE:
371  dist = computeManhattanDistance(inputVector, trainingSample);
372  break;
373  default:
374  errorLog << "predict(vector< Float > inputVector) - unkown distance measure!" << std::endl;
375  return false;
376  break;
377  }
378 
379  if( neighbours.size() < K ){
380  neighbours.push_back( IndexedDouble(classLabel,dist) );
381  }else{
382  //Find the maximum value in the neighbours buffer
383  Float maxValue = neighbours[0].value;
384  UINT maxIndex = 0;
385  for(UINT n=1; n<neighbours.size(); n++){
386  if( neighbours[n].value > maxValue ){
387  maxValue = neighbours[n].value;
388  maxIndex = n;
389  }
390  }
391 
392  //If the dist is less than the maximum value in the buffer, then replace that value with the new dist
393  if( dist < maxValue ){
394  neighbours[ maxIndex ] = IndexedDouble(classLabel,dist);
395  }
396  }
397  }
398 
399  //Predict the class ID using the labels of the K nearest neighbours
400  if( classLikelihoods.size() != numClasses ) classLikelihoods.resize(numClasses);
401  if( classDistances.size() != numClasses ) classDistances.resize(numClasses);
402 
403  std::fill(classLikelihoods.begin(),classLikelihoods.end(),0);
404  std::fill(classDistances.begin(),classDistances.end(),0);
405 
406  //Count the classes
407  for(UINT k=0; k<neighbours.size(); k++){
408  UINT classLabel = neighbours[k].index;
409  if( classLabel == 0 ){
410  errorLog << "predict(VectorFloat inputVector) - Class label of training example can not be zero!" << std::endl;
411  return false;
412  }
413 
414  //Find the index of the classLabel
415  UINT classLabelIndex = 0;
416  for(UINT j=0; j<numClasses; j++){
417  if( classLabel == classLabels[j] ){
418  classLabelIndex = j;
419  break;
420  }
421  }
422  classLikelihoods[ classLabelIndex ] += 1;
423  classDistances[ classLabelIndex ] += neighbours[k].value;
424  }
425 
426  //Get the max count
427  Float maxCount = classLikelihoods[0];
428  UINT maxIndex = 0;
429  for(UINT i=1; i<classLikelihoods.size(); i++){
430  if( classLikelihoods[i] > maxCount ){
431  maxCount = classLikelihoods[i];
432  maxIndex = i;
433  }
434  }
435 
436  //Compute the average distances per class
437  for(UINT i=0; i<numClasses; i++){
438  if( classLikelihoods[i] > 0 ) classDistances[i] /= classLikelihoods[i];
439  else classDistances[i] = BIG_DISTANCE;
440  }
441 
442  //Normalize the likelihoods
443  for(UINT i=0; i<numClasses; i++){
444  classLikelihoods[i] /= Float( neighbours.size() );
445  }
446 
447  //Set the maximum likelihood value
448  maxLikelihood = classLikelihoods[ maxIndex ];
449 
450  if( useNullRejection ){
451  if( classDistances[ maxIndex ] <= nullRejectionThresholds[ maxIndex ] ){
452  predictedClassLabel = classLabels[maxIndex];
453  }else{
454  predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL; //Set the gesture label as the null label
455  }
456  }else{
457  predictedClassLabel = classLabels[maxIndex];
458  }
459 
460  return true;
461 }
462 
463 bool KNN::clear(){
464 
465  //Clear the Classifier variables
467 
468  //Clear the KNN model
469  trainingData.clear();
470  trainingMu.clear();
471  trainingSigma.clear();
472 
473  return true;
474 }
475 
476 bool KNN::saveModelToFile( std::fstream &file ) const{
477 
478  if(!file.is_open())
479  {
480  errorLog << "saveModelToFile(fstream &file) - Could not open file to save model!" << std::endl;
481  return false;
482  }
483 
484  //Write the header info
485  file << "GRT_KNN_MODEL_FILE_V2.0\n";
486 
487  //Write the classifier settings to the file
489  errorLog <<"saveModelToFile(fstream &file) - Failed to save classifier base settings to file!" << std::endl;
490  return false;
491  }
492 
493  file << "K: " << K << std::endl;
494  file << "DistanceMethod: " << distanceMethod << std::endl;
495  file << "SearchForBestKValue: " << searchForBestKValue << std::endl;
496  file << "MinKSearchValue: " << minKSearchValue << std::endl;
497  file << "MaxKSearchValue: " << maxKSearchValue << std::endl;
498 
499  if( trained ){
500  if( useNullRejection ){
501  file << "TrainingMu: ";
502  for(UINT j=0; j<trainingMu.size(); j++){
503  file << trainingMu[j] << "\t";
504  }file << std::endl;
505 
506  file << "TrainingSigma: ";
507  for(UINT j=0; j<trainingSigma.size(); j++){
508  file << trainingSigma[j] << "\t";
509  }file << std::endl;
510  }
511 
512  file << "NumTrainingSamples: " << trainingData.getNumSamples() << std::endl;
513  file << "TrainingData: \n";
514 
515  //Right each of the models
516  for(UINT i=0; i<trainingData.getNumSamples(); i++){
517  file<< trainingData[i].getClassLabel() << "\t";
518 
519  for(UINT j=0; j<numInputDimensions; j++){
520  file << trainingData[i][j] << "\t";
521  }
522  file << std::endl;
523  }
524  }
525 
526  return true;
527 }
528 
529 bool KNN::loadModelFromFile( std::fstream &file ){
530 
531  if(!file.is_open())
532  {
533  errorLog << "loadModelFromFile(fstream &file) - Could not open file to load model!" << std::endl;
534  return false;
535  }
536 
537  std::string word;
538 
539  file >> word;
540 
541  //Check to see if we should load a legacy file
542  if( word == "GRT_KNN_MODEL_FILE_V1.0" ){
543  return loadLegacyModelFromFile( file );
544  }
545 
546  //Find the file type header
547  if(word != "GRT_KNN_MODEL_FILE_V2.0"){
548  errorLog << "loadModelFromFile(fstream &file) - Could not find Model File Header!" << std::endl;
549  return false;
550  }
551 
552  //Load the base settings from the file
554  errorLog << "loadModelFromFile(string filename) - Failed to load base settings from file!" << std::endl;
555  return false;
556  }
557 
558  file >> word;
559  if(word != "K:"){
560  errorLog << "loadModelFromFile(fstream &file) - Could not find K!" << std::endl;
561  return false;
562  }
563  file >> K;
564 
565  file >> word;
566  if(word != "DistanceMethod:"){
567  errorLog << "loadModelFromFile(fstream &file) - Could not find DistanceMethod!" << std::endl;
568  return false;
569  }
570  file >> distanceMethod;
571 
572  file >> word;
573  if(word != "SearchForBestKValue:"){
574  errorLog << "loadModelFromFile(fstream &file) - Could not find SearchForBestKValue!" << std::endl;
575  return false;
576  }
577  file >> searchForBestKValue;
578 
579  file >> word;
580  if(word != "MinKSearchValue:"){
581  errorLog << "loadModelFromFile(fstream &file) - Could not find MinKSearchValue!" << std::endl;
582  return false;
583  }
584  file >> minKSearchValue;
585 
586  file >> word;
587  if(word != "MaxKSearchValue:"){
588  errorLog << "loadModelFromFile(fstream &file) - Could not find MaxKSearchValue!" << std::endl;
589  return false;
590  }
591  file >> maxKSearchValue;
592 
593  if( trained ){
594 
595  //Resize the buffers
596  trainingMu.resize(numClasses,0);
597  trainingSigma.resize(numClasses,0);
598 
599  if( useNullRejection ){
600  file >> word;
601  if(word != "TrainingMu:"){
602  errorLog << "loadModelFromFile(fstream &file) - Could not find TrainingMu!" << std::endl;
603  return false;
604  }
605 
606  //Load the trainingMu data
607  for(UINT j=0; j<numClasses; j++){
608  file >> trainingMu[j];
609  }
610 
611  file >> word;
612  if(word != "TrainingSigma:"){
613  errorLog << "loadModelFromFile(fstream &file) - Could not find TrainingSigma!" << std::endl;
614  return false;
615  }
616 
617  //Load the trainingSigma data
618  for(UINT j=0; j<numClasses; j++){
619  file >> trainingSigma[j];
620  }
621  }
622 
623  file >> word;
624  if(word != "NumTrainingSamples:"){
625  errorLog << "loadModelFromFile(fstream &file) - Could not find NumTrainingSamples!" << std::endl;
626  return false;
627  }
628  unsigned int numTrainingSamples = 0;
629  file >> numTrainingSamples;
630 
631  file >> word;
632  if(word != "TrainingData:"){
633  errorLog << "loadModelFromFile(fstream &file) - Could not find TrainingData!" << std::endl;
634  return false;
635  }
636 
637  //Load the training data
638  trainingData.setNumDimensions(numInputDimensions);
639  unsigned int classLabel = 0;
640  VectorFloat sample(numInputDimensions,0);
641  for(UINT i=0; i<numTrainingSamples; i++){
642  //Read the class label
643  file >> classLabel;
644 
645  //Read the feature vector
646  for(UINT j=0; j<numInputDimensions; j++){
647  file >> sample[j];
648  }
649 
650  //Add it to the training data
651  trainingData.addSample(classLabel, sample);
652  }
653 
654  maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE;
655  bestDistance = DEFAULT_NULL_DISTANCE_VALUE;
656  classLikelihoods.resize(numClasses,DEFAULT_NULL_LIKELIHOOD_VALUE);
657  classDistances.resize(numClasses,DEFAULT_NULL_DISTANCE_VALUE);
658  }
659 
660  return true;
661 }
662 
664 
665  if( !trained ){
666  return false;
667  }
668 
669  nullRejectionThresholds.resize(numClasses,0);
670 
671  if( trainingMu.size() != numClasses || trainingSigma.size() != numClasses ){
672  return false;
673  }
674 
675  for(unsigned int j=0; j<numClasses; j++){
676  nullRejectionThresholds[j] = trainingMu[j] + (trainingSigma[j]*nullRejectionCoeff);
677  }
678 
679  return true;
680 }
681 
682 bool KNN::setK(UINT K){
683  if( K > 0 ){
684  this->K = K;
685  return true;
686  }
687  return false;
688 }
689 
690 bool KNN::setMinKSearchValue(UINT minKSearchValue){
691  this->minKSearchValue = minKSearchValue;
692  return true;
693 }
694 
695 bool KNN::setMaxKSearchValue(UINT maxKSearchValue){
696  this->maxKSearchValue = maxKSearchValue;
697  return true;
698 }
699 
700 bool KNN::enableBestKValueSearch(bool searchForBestKValue){
701  this->searchForBestKValue = searchForBestKValue;
702  return true;
703 }
704 
705 bool KNN::setNullRejectionCoeff(Float nullRejectionCoeff){
706  if( nullRejectionCoeff > 0 ){
707  this->nullRejectionCoeff = nullRejectionCoeff;
709  return true;
710  }
711  return false;
712 }
713 
714 bool KNN::setDistanceMethod(UINT distanceMethod){
715  if( distanceMethod == EUCLIDEAN_DISTANCE || distanceMethod == COSINE_DISTANCE || distanceMethod == MANHATTAN_DISTANCE ){
716  this->distanceMethod = distanceMethod;
717  return true;
718  }
719  return false;
720 }
721 
722 Float KNN::computeEuclideanDistance(const VectorFloat &a,const VectorFloat &b){
723  Float dist = 0;
724  for(UINT j=0; j<numInputDimensions; j++){
725  dist += SQR( a[j] - b[j] );
726  }
727  return sqrt( dist );
728 }
729 
730 Float KNN::computeCosineDistance(const VectorFloat &a,const VectorFloat &b){
731  Float dist = 0;
732 
733  Float dotAB = 0;
734  Float magA = 0;
735  Float magB = 0;
736 
737  for(UINT j=0; j<numInputDimensions; j++){
738  dotAB += a[j] * b[j];
739  magA += SQR(a[j]);
740  magB += SQR(b[j]);
741  }
742 
743  dist = dotAB / (sqrt(magA) * sqrt(magB));
744 
745  return dist;
746 }
747 
748 Float KNN::computeManhattanDistance(const VectorFloat &a,const VectorFloat &b){
749  Float dist = 0;
750 
751  for(UINT j=0; j<numInputDimensions; j++){
752  dist += fabs( a[j] - b[j] );
753  }
754 
755  return dist;
756 }
757 
758 bool KNN::loadLegacyModelFromFile( std::fstream &file ){
759 
760  std::string word;
761 
762  //Find the file type header
763  file >> word;
764  if(word != "NumFeatures:"){
765  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find NumFeatures!" << std::endl;
766  return false;
767  }
768  file >> numInputDimensions;
769 
770  file >> word;
771  if(word != "NumClasses:"){
772  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find NumClasses!" << std::endl;
773  return false;
774  }
775  file >> numClasses;
776 
777  file >> word;
778  if(word != "K:"){
779  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find K!" << std::endl;
780  return false;
781  }
782  file >> K;
783 
784  file >> word;
785  if(word != "DistanceMethod:"){
786  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find DistanceMethod!" << std::endl;
787  return false;
788  }
789  file >> distanceMethod;
790 
791  file >> word;
792  if(word != "SearchForBestKValue:"){
793  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find SearchForBestKValue!" << std::endl;
794  return false;
795  }
796  file >> searchForBestKValue;
797 
798  file >> word;
799  if(word != "MinKSearchValue:"){
800  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find MinKSearchValue!" << std::endl;
801  return false;
802  }
803  file >> minKSearchValue;
804 
805  file >> word;
806  if(word != "MaxKSearchValue:"){
807  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find MaxKSearchValue!" << std::endl;
808  return false;
809  }
810  file >> maxKSearchValue;
811 
812  file >> word;
813  if(word != "UseScaling:"){
814  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find UseScaling!" << std::endl;
815  return false;
816  }
817  file >> useScaling;
818 
819  file >> word;
820  if(word != "UseNullRejection:"){
821  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find UseNullRejection!" << std::endl;
822  return false;
823  }
824  file >> useNullRejection;
825 
826  file >> word;
827  if(word != "NullRejectionCoeff:"){
828  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find NullRejectionCoeff!" << std::endl;
829  return false;
830  }
831  file >> nullRejectionCoeff;
832 
834  if( useScaling ){
835  //Resize the ranges buffer
836  ranges.resize( numInputDimensions );
837 
838  file >> word;
839  if(word != "Ranges:"){
840  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find Ranges!" << std::endl;
841  std::cout << "Word: " << word << std::endl;
842  return false;
843  }
844  for(UINT n=0; n<ranges.size(); n++){
845  file >> ranges[n].minValue;
846  file >> ranges[n].maxValue;
847  }
848  }
849 
850  //Resize the buffers
851  trainingMu.resize(numClasses,0);
852  trainingSigma.resize(numClasses,0);
853 
854  file >> word;
855  if(word != "TrainingMu:"){
856  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find TrainingMu!" << std::endl;
857  return false;
858  }
859 
860  //Load the trainingMu data
861  for(UINT j=0; j<numClasses; j++){
862  file >> trainingMu[j];
863  }
864 
865  file >> word;
866  if(word != "TrainingSigma:"){
867  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find TrainingSigma!" << std::endl;
868  return false;
869  }
870 
871  //Load the trainingSigma data
872  for(UINT j=0; j<numClasses; j++){
873  file >> trainingSigma[j];
874  }
875 
876  file >> word;
877  if(word != "NumTrainingSamples:"){
878  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find NumTrainingSamples!" << std::endl;
879  return false;
880  }
881  unsigned int numTrainingSamples = 0;
882  file >> numTrainingSamples;
883 
884  file >> word;
885  if(word != "TrainingData:"){
886  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find TrainingData!" << std::endl;
887  return false;
888  }
889 
890  //Load the training data
891  trainingData.setNumDimensions(numInputDimensions);
892  unsigned int classLabel = 0;
893  VectorFloat sample(numInputDimensions,0);
894  for(UINT i=0; i<numTrainingSamples; i++){
895  //Read the class label
896  file >> classLabel;
897 
898  //Read the feature vector
899  for(UINT j=0; j<numInputDimensions; j++){
900  file >> sample[j];
901  }
902 
903  //Add it to the training data
904  trainingData.addSample(classLabel, sample);
905  }
906 
907  //Flag that the model has been trained
908  trained = true;
909 
910  //Compute the null rejection thresholds
912 
913  return true;
914 }
915 
916 GRT_END_NAMESPACE
917 
bool saveBaseSettingsToFile(std::fstream &file) const
Definition: Classifier.cpp:255
VectorFloat trainingSigma
Holds the average max-class distance of the training data for each of classes
Definition: KNN.h:239
#define DEFAULT_NULL_LIKELIHOOD_VALUE
Definition: Classifier.h:38
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
Definition: MLBase.h:339
bool addSample(UINT classLabel, const VectorFloat &sample)
bool searchForBestKValue
The distance method used to compute the distance between each data point
Definition: KNN.h:234
std::string getClassifierType() const
Definition: Classifier.cpp:160
Vector< ClassTracker > getClassTracker() const
This class implements the K-Nearest Neighbor classification algorithm (http://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm). KNN is a simple but powerful classifier, based on finding the closest K training examples in the feature space for the new input vector. The KNN algorithm is amongst the simplest of all machine learning algorithms: an object is classified by a majority vote of its neighbors, with the object being assigned to the class most common amongst its k nearest neighbors (k is a positive integer, typically small). If k = 1, then the object is simply assigned to the class of its nearest neighbor.
virtual bool resize(const unsigned int size)
Definition: Vector.h:133
bool setNumDimensions(UINT numDimensions)
virtual bool loadModelFromFile(std::fstream &file)
Definition: KNN.cpp:529
virtual bool train_(ClassificationData &trainingData)
Definition: KNN.cpp:104
UINT distanceMethod
The number of neighbours to search for
Definition: KNN.h:233
bool setK(UINT K)
Definition: KNN.cpp:682
KNN(UINT K=10, bool useScaling=false, bool useNullRejection=false, Float nullRejectionCoeff=10.0, bool searchForBestKValue=false, UINT minKSearchValue=1, UINT maxKSearchValue=10)
ClassificationData trainingData
The maximum K value to end the search at
Definition: KNN.h:237
UINT maxKSearchValue
The minimum K value to start the search from
Definition: KNN.h:236
bool setDistanceMethod(UINT distanceMethod)
Definition: KNN.cpp:714
virtual bool recomputeNullRejectionThresholds()
Definition: KNN.cpp:663
KNN & operator=(const KNN &rhs)
Definition: KNN.cpp:63
bool setMaxKSearchValue(UINT maxKSearchValue)
Definition: KNN.cpp:695
virtual bool saveModelToFile(std::fstream &file) const
Definition: KNN.cpp:476
bool enableBestKValueSearch(bool searchForBestKValue)
Definition: KNN.cpp:700
UINT getNumSamples() const
virtual bool predict_(VectorFloat &inputVector)
Definition: KNN.cpp:314
virtual bool deepCopyFrom(const Classifier *classifier)
Definition: KNN.cpp:81
UINT minKSearchValue
Sets if the best K value should be searched for or if the model should be trained with K ...
Definition: KNN.h:235
static RegisterClassifierModule< KNN > registerModule
Holds the stddev of the max-class distance of the training data for each of classes ...
Definition: KNN.h:241
bool copyBaseVariables(const Classifier *classifier)
Definition: Classifier.cpp:92
bool loadBaseSettingsFromFile(std::fstream &file)
Definition: Classifier.cpp:302
ClassificationData partition(const UINT partitionPercentage, const bool useStratifiedSampling=false)
VectorFloat trainingMu
Holds the trainingData to perform the predictions
Definition: KNN.h:238
UINT getNumDimensions() const
UINT getNumClasses() const
#define BIG_DISTANCE
Definition: KNN.h:49
Vector< MinMax > getRanges() const
bool setNullRejectionCoeff(Float nullRejectionCoeff)
Definition: KNN.cpp:705
bool setMinKSearchValue(UINT minKSearchValue)
Definition: KNN.cpp:690
bool scale(const Float minTarget, const Float maxTarget)
virtual bool clear()
Definition: KNN.cpp:463
virtual ~KNN(void)
Definition: KNN.cpp:59
virtual bool clear()
Definition: Classifier.cpp:141
Definition: KNN.h:51
bool loadLegacyModelFromFile(std::fstream &file)
Definition: KNN.cpp:758