GestureRecognitionToolkit  Version: 0.2.0
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
KNN.cpp
1 /*
2 GRT MIT License
3 Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>
4 
5 Permission is hereby granted, free of charge, to any person obtaining a copy of this software
6 and associated documentation files (the "Software"), to deal in the Software without restriction,
7 including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in all copies or substantial
12 portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
15 LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
18 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 */
20 
21 #define GRT_DLL_EXPORTS
22 #include "KNN.h"
23 
24 GRT_BEGIN_NAMESPACE
25 
26 //Register the DTW module with the Classifier base class
28 
29 KNN::KNN(unsigned int K,bool useScaling,bool useNullRejection,Float nullRejectionCoeff,bool searchForBestKValue,UINT minKSearchValue,UINT maxKSearchValue){
30  this->K = K;
31  this->distanceMethod = EUCLIDEAN_DISTANCE;
32  this->useScaling = useScaling;
33  this->useNullRejection = useNullRejection;
34  this->nullRejectionCoeff = nullRejectionCoeff;
35  this->searchForBestKValue = searchForBestKValue;
36  this->minKSearchValue = minKSearchValue;
37  this->maxKSearchValue = maxKSearchValue;
38  supportsNullRejection = true;
39  classType = "KNN";
40  classifierType = classType;
41  classifierMode = STANDARD_CLASSIFIER_MODE;
42  distanceMethod = EUCLIDEAN_DISTANCE;
43  debugLog.setProceedingText("[DEBUG KNN]");
44  errorLog.setProceedingText("[ERROR KNN]");
45  trainingLog.setProceedingText("[TRAINING KNN]");
46  warningLog.setProceedingText("[WARNING KNN]");
47 }
48 
49 KNN::KNN(const KNN &rhs){
50  classType = "KNN";
51  classifierType = classType;
52  classifierMode = STANDARD_CLASSIFIER_MODE;
53  debugLog.setProceedingText("[DEBUG KNN]");
54  errorLog.setProceedingText("[ERROR KNN]");
55  trainingLog.setProceedingText("[TRAINING KNN]");
56  warningLog.setProceedingText("[WARNING KNN]");
57  *this = rhs;
58 }
59 
60 KNN::~KNN(void)
61 {
62 }
63 
64 KNN& KNN::operator=(const KNN &rhs){
65  if( this != &rhs ){
66  //KNN variables
67  this->K = rhs.K;
68  this->distanceMethod = rhs.distanceMethod;
69  this->searchForBestKValue = rhs.searchForBestKValue;
70  this->minKSearchValue = rhs.minKSearchValue;
71  this->maxKSearchValue = rhs.maxKSearchValue;
72  this->trainingData = rhs.trainingData;
73  this->trainingMu = rhs.trainingMu;
74  this->trainingSigma = rhs.trainingSigma;
75 
76  //Classifier variables
77  copyBaseVariables( (Classifier*)&rhs );
78  }
79  return *this;
80 }
81 
82 bool KNN::deepCopyFrom(const Classifier *classifier){
83 
84  if( classifier == NULL ) return false;
85 
86  if( this->getClassifierType() == classifier->getClassifierType() ){
87  //Get a pointer the KNN copy instance
88  KNN *ptr = (KNN*)classifier;
89 
90  this->K = ptr->K;
91  this->distanceMethod = ptr->distanceMethod;
92  this->searchForBestKValue = ptr->searchForBestKValue;
93  this->minKSearchValue = ptr->minKSearchValue;
94  this->maxKSearchValue = ptr->maxKSearchValue;
95  this->trainingData = ptr->trainingData;
96  this->trainingMu = ptr->trainingMu;
97  this->trainingSigma = ptr->trainingSigma;
98 
99  //Classifier variables
100  return copyBaseVariables( classifier );
101  }
102  return false;
103 }
104 
105 bool KNN::train_(ClassificationData &trainingData){
106 
107  //Clear any previous models
108  clear();
109 
110  if( trainingData.getNumSamples() == 0 ){
111  errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << std::endl;
112  return false;
113  }
114 
115  //Get the ranges of the data
116  ranges = trainingData.getRanges();
117  if( useScaling ){
118  //Scale the training data between 0 and 1
119  trainingData.scale(0, 1);
120  }
121 
122  //Store the number of features, classes and the training data
123  this->numInputDimensions = trainingData.getNumDimensions();
124  this->numClasses = trainingData.getNumClasses();
125 
126  //TODO: In the future need to build a kdtree from the training data to allow better realtime prediction
127  this->trainingData = trainingData;
128 
129  //Set the class labels
130  classLabels.resize( numClasses );
131  for(UINT k=0; k<numClasses; k++){
132  classLabels[k] = trainingData.getClassTracker()[k].classLabel;
133  }
134 
135  //If we do not need to search for the best K value, then call the sub training function and return the result
136  if( !searchForBestKValue ){
137  return train_(trainingData,K);
138  }
139 
140  //If we have got this far then we are going to search for the best K value
141  UINT index = 0;
142  Float bestAccuracy = 0;
143  Vector< IndexedDouble > trainingAccuracyLog;
144 
145  for(UINT k=minKSearchValue; k<=maxKSearchValue; k++){
146  //Randomly spilt the data and use 80% to train the algorithm and 20% to test it
147  ClassificationData trainingSet(trainingData);
148  ClassificationData testSet = trainingSet.split(80,true);
149 
150  if( !train_(trainingSet, k) ){
151  errorLog << "Failed to train model for a k value of " << k << std::endl;
152  }else{
153 
154  //Compute the classification error
155  Float accuracy = 0;
156  for(UINT i=0; i<testSet.getNumSamples(); i++){
157 
158  VectorFloat sample = testSet[i].getSample();
159 
160  if( !predict( sample , k) ){
161  errorLog << "Failed to predict label for test sample with a k value of " << k << std::endl;
162  return false;
163  }
164 
165  if( testSet[i].getClassLabel() == predictedClassLabel ){
166  accuracy++;
167  }
168  }
169 
170  accuracy = accuracy /Float( testSet.getNumSamples() ) * 100.0;
171  trainingAccuracyLog.push_back( IndexedDouble(k,accuracy) );
172 
173  trainingLog << "K:\t" << k << "\tAccuracy:\t" << accuracy << std::endl;
174 
175  if( accuracy > bestAccuracy ){
176  bestAccuracy = accuracy;
177  }
178 
179  index++;
180  }
181 
182  }
183 
184  if( bestAccuracy > 0 ){
185  //Sort the training log by value
186  std::sort(trainingAccuracyLog.begin(),trainingAccuracyLog.end(),IndexedDouble::sortIndexedDoubleByValueDescending);
187 
188  //Copy the top matching values into a temporary buffer
189  Vector< IndexedDouble > tempLog;
190 
191  //Add the first value
192  tempLog.push_back( trainingAccuracyLog[0] );
193 
194  //Keep adding values until the value changes
195  for(UINT i=1; i<trainingAccuracyLog.size(); i++){
196  if( trainingAccuracyLog[i].value == tempLog[0].value ){
197  tempLog.push_back( trainingAccuracyLog[i] );
198  }else break;
199  }
200 
201  //Sort the temp values by index (the index is the K value so we want to get the minimum K value with the maximum accuracy)
202  std::sort(tempLog.begin(),tempLog.end(),IndexedDouble::sortIndexedDoubleByIndexAscending);
203 
204  trainingLog << "Best K Value: " << tempLog[0].index << "\tAccuracy:\t" << tempLog[0].value << std::endl;
205 
206  //Use the minimum index, this should give us the best accuracy with the minimum K value
207  //We now need to train the model again to make sure all the training metrics are computed correctly
208  return train_(trainingData,tempLog[0].index);
209  }
210 
211  return false;
212 }
213 
214 bool KNN::train_(const ClassificationData &trainingData,const UINT K){
215 
216  //Set the dimensionality of the input data
217  this->K = K;
218 
219  //Flag that the algorithm has been trained so we can compute the rejection thresholds
220  trained = true;
221 
222  //If null rejection is enabled then compute the null rejection thresholds
223  if( useNullRejection ){
224 
225  //Set the null rejection to false so we can compute the values for it (this will be set back to its current value later)
226  useNullRejection = false;
227  nullRejectionThresholds.clear();
228 
229  //Compute the rejection thresholds for each of the K classes
230  VectorFloat counter(numClasses,0);
231  trainingMu.resize( numClasses, 0 );
232  trainingSigma.resize( numClasses, 0 );
233  nullRejectionThresholds.resize( numClasses, 0 );
234 
235  //Compute Mu for each of the classes
236  const unsigned int numTrainingExamples = trainingData.getNumSamples();
237  Vector< IndexedDouble > predictionResults( numTrainingExamples );
238  for(UINT i=0; i<numTrainingExamples; i++){
239  predict( trainingData[i].getSample(), K);
240 
241  UINT classLabelIndex = 0;
242  for(UINT k=0; k<numClasses; k++){
243  if( predictedClassLabel == classLabels[k] ){
244  classLabelIndex = k;
245  break;
246  }
247  }
248 
249  predictionResults[ i ].index = classLabelIndex;
250  predictionResults[ i ].value = classDistances[ classLabelIndex ];
251 
252  trainingMu[ classLabelIndex ] += predictionResults[ i ].value;
253  counter[ classLabelIndex ]++;
254  }
255 
256  for(UINT j=0; j<numClasses; j++){
257  trainingMu[j] /= counter[j];
258  }
259 
260  //Compute Sigma for each of the classes
261  for(UINT i=0; i<numTrainingExamples; i++){
262  trainingSigma[predictionResults[i].index] += SQR(predictionResults[i].value - trainingMu[predictionResults[i].index]);
263  }
264 
265  for(UINT j=0; j<numClasses; j++){
266  Float count = counter[j];
267  if( count > 1 ){
268  trainingSigma[ j ] = sqrt( trainingSigma[j] / (count-1) );
269  }else{
270  trainingSigma[ j ] = 1.0;
271  }
272  }
273 
274  //Check to see if any of the mu or sigma values are zero or NaN
275  bool errorFound = false;
276  for(UINT j=0; j<numClasses; j++){
277  if( trainingMu[j] == 0 ){
278  warningLog << "TrainingMu[ " << j << " ] is zero for a K value of " << K << std::endl;
279  }
280  if( trainingSigma[j] == 0 ){
281  warningLog << "TrainingSigma[ " << j << " ] is zero for a K value of " << K << std::endl;
282  }
283  if( grt_isnan( trainingMu[j] ) ){
284  errorLog << "TrainingMu[ " << j << " ] is NAN for a K value of " << K << std::endl;
285  errorFound = true;
286  }
287  if( grt_isnan( trainingSigma[j] ) ){
288  errorLog << "TrainingSigma[ " << j << " ] is NAN for a K value of " << K << std::endl;
289  errorFound = true;
290  }
291  }
292 
293  if( errorFound ){
294  trained = false;
295  return false;
296  }
297 
298  //Compute the rejection thresholds
299  for(unsigned int j=0; j<numClasses; j++){
300  nullRejectionThresholds[j] = trainingMu[j] + (trainingSigma[j]*nullRejectionCoeff);
301  }
302 
303  //Restore the actual state of the null rejection
304  useNullRejection = true;
305 
306  }else{
307  //Resize the rejection thresholds but set the values to 0
308  nullRejectionThresholds.clear();
309  nullRejectionThresholds.resize( numClasses, 0 );
310  }
311 
312  return true;
313 }
314 
315 bool KNN::predict_(VectorFloat &inputVector){
316 
317  if( !trained ){
318  errorLog << "predict_(VectorFloat &inputVector) - KNN model has not been trained" << std::endl;
319  return false;
320  }
321 
322  if( inputVector.size() != numInputDimensions ){
323  errorLog << "predict_(VectorFloat &inputVector) - the size of the input vector " << inputVector.size() << " does not match the number of features " << numInputDimensions << std::endl;
324  return false;
325  }
326 
327  //Scale the input vector if needed
328  if( useScaling ){
329  for(UINT i=0; i<numInputDimensions; i++){
330  inputVector[i] = scale(inputVector[i], ranges[i].minValue, ranges[i].maxValue, 0, 1);
331  }
332  }
333 
334  //Run the prediction
335  return predict(inputVector,K);
336 }
337 
338 bool KNN::predict(const VectorFloat &inputVector,const UINT K){
339 
340  if( !trained ){
341  errorLog << "predict(VectorFloat inputVector,UINT K) - KNN model has not been trained" << std::endl;
342  return false;
343  }
344 
345  if( inputVector.size() != numInputDimensions ){
346  errorLog << "predict(VectorFloat inputVector) - the size of the input vector " << inputVector.size() << " does not match the number of features " << numInputDimensions << std::endl;
347  return false;
348  }
349 
350  if( K > trainingData.getNumSamples() ){
351  errorLog << "predict(VectorFloat inputVector,UINT K) - K Is Greater Than The Number Of Training Samples" << std::endl;
352  return false;
353  }
354 
355  //TODO - need to build a kdtree of the training data to allow better realtime prediction
356  const UINT M = trainingData.getNumSamples();
357  Vector< IndexedDouble > neighbours;
358 
359  for(UINT i=0; i<M; i++){
360  Float dist = 0;
361  UINT classLabel = trainingData[i].getClassLabel();
362  VectorFloat trainingSample = trainingData[i].getSample();
363 
364  switch( distanceMethod ){
365  case EUCLIDEAN_DISTANCE:
366  dist = computeEuclideanDistance(inputVector,trainingSample);
367  break;
368  case COSINE_DISTANCE:
369  dist = computeCosineDistance(inputVector,trainingSample);
370  break;
371  case MANHATTAN_DISTANCE:
372  dist = computeManhattanDistance(inputVector, trainingSample);
373  break;
374  default:
375  errorLog << "predict(vector< Float > inputVector) - unkown distance measure!" << std::endl;
376  return false;
377  break;
378  }
379 
380  if( neighbours.size() < K ){
381  neighbours.push_back( IndexedDouble(classLabel,dist) );
382  }else{
383  //Find the maximum value in the neighbours buffer
384  Float maxValue = neighbours[0].value;
385  UINT maxIndex = 0;
386  for(UINT n=1; n<neighbours.size(); n++){
387  if( neighbours[n].value > maxValue ){
388  maxValue = neighbours[n].value;
389  maxIndex = n;
390  }
391  }
392 
393  //If the dist is less than the maximum value in the buffer, then replace that value with the new dist
394  if( dist < maxValue ){
395  neighbours[ maxIndex ] = IndexedDouble(classLabel,dist);
396  }
397  }
398  }
399 
400  //Predict the class ID using the labels of the K nearest neighbours
401  if( classLikelihoods.size() != numClasses ) classLikelihoods.resize(numClasses);
402  if( classDistances.size() != numClasses ) classDistances.resize(numClasses);
403 
404  std::fill(classLikelihoods.begin(),classLikelihoods.end(),0);
405  std::fill(classDistances.begin(),classDistances.end(),0);
406 
407  //Count the classes
408  for(UINT k=0; k<neighbours.size(); k++){
409  UINT classLabel = neighbours[k].index;
410  if( classLabel == 0 ){
411  errorLog << "predict(VectorFloat inputVector) - Class label of training example can not be zero!" << std::endl;
412  return false;
413  }
414 
415  //Find the index of the classLabel
416  UINT classLabelIndex = 0;
417  for(UINT j=0; j<numClasses; j++){
418  if( classLabel == classLabels[j] ){
419  classLabelIndex = j;
420  break;
421  }
422  }
423  classLikelihoods[ classLabelIndex ] += 1;
424  classDistances[ classLabelIndex ] += neighbours[k].value;
425  }
426 
427  //Get the max count
428  Float maxCount = classLikelihoods[0];
429  UINT maxIndex = 0;
430  for(UINT i=1; i<classLikelihoods.size(); i++){
431  if( classLikelihoods[i] > maxCount ){
432  maxCount = classLikelihoods[i];
433  maxIndex = i;
434  }
435  }
436 
437  //Compute the average distances per class
438  for(UINT i=0; i<numClasses; i++){
439  if( classLikelihoods[i] > 0 ) classDistances[i] /= classLikelihoods[i];
440  else classDistances[i] = BIG_DISTANCE;
441  }
442 
443  //Normalize the likelihoods
444  for(UINT i=0; i<numClasses; i++){
445  classLikelihoods[i] /= Float( neighbours.size() );
446  }
447 
448  //Set the maximum likelihood value
449  maxLikelihood = classLikelihoods[ maxIndex ];
450 
451  if( useNullRejection ){
452  if( classDistances[ maxIndex ] <= nullRejectionThresholds[ maxIndex ] ){
453  predictedClassLabel = classLabels[maxIndex];
454  }else{
455  predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL; //Set the gesture label as the null label
456  }
457  }else{
458  predictedClassLabel = classLabels[maxIndex];
459  }
460 
461  return true;
462 }
463 
464 bool KNN::clear(){
465 
466  //Clear the Classifier variables
468 
469  //Clear the KNN model
470  trainingData.clear();
471  trainingMu.clear();
472  trainingSigma.clear();
473 
474  return true;
475 }
476 
477 bool KNN::save( std::fstream &file ) const{
478 
479  if(!file.is_open())
480  {
481  errorLog << "save(fstream &file) - Could not open file to save model!" << std::endl;
482  return false;
483  }
484 
485  //Write the header info
486  file << "GRT_KNN_MODEL_FILE_V2.0\n";
487 
488  //Write the classifier settings to the file
490  errorLog <<"save(fstream &file) - Failed to save classifier base settings to file!" << std::endl;
491  return false;
492  }
493 
494  file << "K: " << K << std::endl;
495  file << "DistanceMethod: " << distanceMethod << std::endl;
496  file << "SearchForBestKValue: " << searchForBestKValue << std::endl;
497  file << "MinKSearchValue: " << minKSearchValue << std::endl;
498  file << "MaxKSearchValue: " << maxKSearchValue << std::endl;
499 
500  if( trained ){
501  if( useNullRejection ){
502  file << "TrainingMu: ";
503  for(UINT j=0; j<trainingMu.size(); j++){
504  file << trainingMu[j] << "\t";
505  }file << std::endl;
506 
507  file << "TrainingSigma: ";
508  for(UINT j=0; j<trainingSigma.size(); j++){
509  file << trainingSigma[j] << "\t";
510  }file << std::endl;
511  }
512 
513  file << "NumTrainingSamples: " << trainingData.getNumSamples() << std::endl;
514  file << "TrainingData: \n";
515 
516  //Right each of the models
517  for(UINT i=0; i<trainingData.getNumSamples(); i++){
518  file<< trainingData[i].getClassLabel() << "\t";
519 
520  for(UINT j=0; j<numInputDimensions; j++){
521  file << trainingData[i][j] << "\t";
522  }
523  file << std::endl;
524  }
525  }
526 
527  return true;
528 }
529 
530 bool KNN::load( std::fstream &file ){
531 
532  if(!file.is_open())
533  {
534  errorLog << "load(fstream &file) - Could not open file to load model!" << std::endl;
535  return false;
536  }
537 
538  std::string word;
539 
540  file >> word;
541 
542  //Check to see if we should load a legacy file
543  if( word == "GRT_KNN_MODEL_FILE_V1.0" ){
544  return loadLegacyModelFromFile( file );
545  }
546 
547  //Find the file type header
548  if(word != "GRT_KNN_MODEL_FILE_V2.0"){
549  errorLog << "load(fstream &file) - Could not find Model File Header!" << std::endl;
550  return false;
551  }
552 
553  //Load the base settings from the file
555  errorLog << "load(string filename) - Failed to load base settings from file!" << std::endl;
556  return false;
557  }
558 
559  file >> word;
560  if(word != "K:"){
561  errorLog << "load(fstream &file) - Could not find K!" << std::endl;
562  return false;
563  }
564  file >> K;
565 
566  file >> word;
567  if(word != "DistanceMethod:"){
568  errorLog << "load(fstream &file) - Could not find DistanceMethod!" << std::endl;
569  return false;
570  }
571  file >> distanceMethod;
572 
573  file >> word;
574  if(word != "SearchForBestKValue:"){
575  errorLog << "load(fstream &file) - Could not find SearchForBestKValue!" << std::endl;
576  return false;
577  }
578  file >> searchForBestKValue;
579 
580  file >> word;
581  if(word != "MinKSearchValue:"){
582  errorLog << "load(fstream &file) - Could not find MinKSearchValue!" << std::endl;
583  return false;
584  }
585  file >> minKSearchValue;
586 
587  file >> word;
588  if(word != "MaxKSearchValue:"){
589  errorLog << "load(fstream &file) - Could not find MaxKSearchValue!" << std::endl;
590  return false;
591  }
592  file >> maxKSearchValue;
593 
594  if( trained ){
595 
596  //Resize the buffers
597  trainingMu.resize(numClasses,0);
598  trainingSigma.resize(numClasses,0);
599 
600  if( useNullRejection ){
601  file >> word;
602  if(word != "TrainingMu:"){
603  errorLog << "load(fstream &file) - Could not find TrainingMu!" << std::endl;
604  return false;
605  }
606 
607  //Load the trainingMu data
608  for(UINT j=0; j<numClasses; j++){
609  file >> trainingMu[j];
610  }
611 
612  file >> word;
613  if(word != "TrainingSigma:"){
614  errorLog << "load(fstream &file) - Could not find TrainingSigma!" << std::endl;
615  return false;
616  }
617 
618  //Load the trainingSigma data
619  for(UINT j=0; j<numClasses; j++){
620  file >> trainingSigma[j];
621  }
622  }
623 
624  file >> word;
625  if(word != "NumTrainingSamples:"){
626  errorLog << "load(fstream &file) - Could not find NumTrainingSamples!" << std::endl;
627  return false;
628  }
629  unsigned int numTrainingSamples = 0;
630  file >> numTrainingSamples;
631 
632  file >> word;
633  if(word != "TrainingData:"){
634  errorLog << "load(fstream &file) - Could not find TrainingData!" << std::endl;
635  return false;
636  }
637 
638  //Load the training data
639  trainingData.setNumDimensions(numInputDimensions);
640  unsigned int classLabel = 0;
641  VectorFloat sample(numInputDimensions,0);
642  for(UINT i=0; i<numTrainingSamples; i++){
643  //Read the class label
644  file >> classLabel;
645 
646  //Read the feature vector
647  for(UINT j=0; j<numInputDimensions; j++){
648  file >> sample[j];
649  }
650 
651  //Add it to the training data
652  trainingData.addSample(classLabel, sample);
653  }
654 
655  maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE;
656  bestDistance = DEFAULT_NULL_DISTANCE_VALUE;
657  classLikelihoods.resize(numClasses,DEFAULT_NULL_LIKELIHOOD_VALUE);
658  classDistances.resize(numClasses,DEFAULT_NULL_DISTANCE_VALUE);
659  }
660 
661  return true;
662 }
663 
665 
666  if( !trained ){
667  return false;
668  }
669 
670  nullRejectionThresholds.resize(numClasses,0);
671 
672  if( trainingMu.size() != numClasses || trainingSigma.size() != numClasses ){
673  return false;
674  }
675 
676  for(unsigned int j=0; j<numClasses; j++){
677  nullRejectionThresholds[j] = trainingMu[j] + (trainingSigma[j]*nullRejectionCoeff);
678  }
679 
680  return true;
681 }
682 
683 bool KNN::setK(UINT K){
684  if( K > 0 ){
685  this->K = K;
686  return true;
687  }
688  return false;
689 }
690 
691 bool KNN::setMinKSearchValue(UINT minKSearchValue){
692  this->minKSearchValue = minKSearchValue;
693  return true;
694 }
695 
696 bool KNN::setMaxKSearchValue(UINT maxKSearchValue){
697  this->maxKSearchValue = maxKSearchValue;
698  return true;
699 }
700 
701 bool KNN::enableBestKValueSearch(bool searchForBestKValue){
702  this->searchForBestKValue = searchForBestKValue;
703  return true;
704 }
705 
706 bool KNN::setNullRejectionCoeff(Float nullRejectionCoeff){
707  if( nullRejectionCoeff > 0 ){
708  this->nullRejectionCoeff = nullRejectionCoeff;
710  return true;
711  }
712  return false;
713 }
714 
715 bool KNN::setDistanceMethod(UINT distanceMethod){
716  if( distanceMethod == EUCLIDEAN_DISTANCE || distanceMethod == COSINE_DISTANCE || distanceMethod == MANHATTAN_DISTANCE ){
717  this->distanceMethod = distanceMethod;
718  return true;
719  }
720  return false;
721 }
722 
723 Float KNN::computeEuclideanDistance(const VectorFloat &a,const VectorFloat &b){
724  Float dist = 0;
725  for(UINT j=0; j<numInputDimensions; j++){
726  dist += SQR( a[j] - b[j] );
727  }
728  return sqrt( dist );
729 }
730 
731 Float KNN::computeCosineDistance(const VectorFloat &a,const VectorFloat &b){
732  Float dist = 0;
733 
734  Float dotAB = 0;
735  Float magA = 0;
736  Float magB = 0;
737 
738  for(UINT j=0; j<numInputDimensions; j++){
739  dotAB += a[j] * b[j];
740  magA += SQR(a[j]);
741  magB += SQR(b[j]);
742  }
743 
744  dist = dotAB / (sqrt(magA) * sqrt(magB));
745 
746  return dist;
747 }
748 
749 Float KNN::computeManhattanDistance(const VectorFloat &a,const VectorFloat &b){
750  Float dist = 0;
751 
752  for(UINT j=0; j<numInputDimensions; j++){
753  dist += fabs( a[j] - b[j] );
754  }
755 
756  return dist;
757 }
758 
759 bool KNN::loadLegacyModelFromFile( std::fstream &file ){
760 
761  std::string word;
762 
763  //Find the file type header
764  file >> word;
765  if(word != "NumFeatures:"){
766  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find NumFeatures!" << std::endl;
767  return false;
768  }
769  file >> numInputDimensions;
770 
771  file >> word;
772  if(word != "NumClasses:"){
773  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find NumClasses!" << std::endl;
774  return false;
775  }
776  file >> numClasses;
777 
778  file >> word;
779  if(word != "K:"){
780  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find K!" << std::endl;
781  return false;
782  }
783  file >> K;
784 
785  file >> word;
786  if(word != "DistanceMethod:"){
787  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find DistanceMethod!" << std::endl;
788  return false;
789  }
790  file >> distanceMethod;
791 
792  file >> word;
793  if(word != "SearchForBestKValue:"){
794  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find SearchForBestKValue!" << std::endl;
795  return false;
796  }
797  file >> searchForBestKValue;
798 
799  file >> word;
800  if(word != "MinKSearchValue:"){
801  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find MinKSearchValue!" << std::endl;
802  return false;
803  }
804  file >> minKSearchValue;
805 
806  file >> word;
807  if(word != "MaxKSearchValue:"){
808  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find MaxKSearchValue!" << std::endl;
809  return false;
810  }
811  file >> maxKSearchValue;
812 
813  file >> word;
814  if(word != "UseScaling:"){
815  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find UseScaling!" << std::endl;
816  return false;
817  }
818  file >> useScaling;
819 
820  file >> word;
821  if(word != "UseNullRejection:"){
822  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find UseNullRejection!" << std::endl;
823  return false;
824  }
825  file >> useNullRejection;
826 
827  file >> word;
828  if(word != "NullRejectionCoeff:"){
829  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find NullRejectionCoeff!" << std::endl;
830  return false;
831  }
832  file >> nullRejectionCoeff;
833 
835  if( useScaling ){
836  //Resize the ranges buffer
837  ranges.resize( numInputDimensions );
838 
839  file >> word;
840  if(word != "Ranges:"){
841  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find Ranges!" << std::endl;
842  std::cout << "Word: " << word << std::endl;
843  return false;
844  }
845  for(UINT n=0; n<ranges.size(); n++){
846  file >> ranges[n].minValue;
847  file >> ranges[n].maxValue;
848  }
849  }
850 
851  //Resize the buffers
852  trainingMu.resize(numClasses,0);
853  trainingSigma.resize(numClasses,0);
854 
855  file >> word;
856  if(word != "TrainingMu:"){
857  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find TrainingMu!" << std::endl;
858  return false;
859  }
860 
861  //Load the trainingMu data
862  for(UINT j=0; j<numClasses; j++){
863  file >> trainingMu[j];
864  }
865 
866  file >> word;
867  if(word != "TrainingSigma:"){
868  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find TrainingSigma!" << std::endl;
869  return false;
870  }
871 
872  //Load the trainingSigma data
873  for(UINT j=0; j<numClasses; j++){
874  file >> trainingSigma[j];
875  }
876 
877  file >> word;
878  if(word != "NumTrainingSamples:"){
879  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find NumTrainingSamples!" << std::endl;
880  return false;
881  }
882  unsigned int numTrainingSamples = 0;
883  file >> numTrainingSamples;
884 
885  file >> word;
886  if(word != "TrainingData:"){
887  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find TrainingData!" << std::endl;
888  return false;
889  }
890 
891  //Load the training data
892  trainingData.setNumDimensions(numInputDimensions);
893  unsigned int classLabel = 0;
894  VectorFloat sample(numInputDimensions,0);
895  for(UINT i=0; i<numTrainingSamples; i++){
896  //Read the class label
897  file >> classLabel;
898 
899  //Read the feature vector
900  for(UINT j=0; j<numInputDimensions; j++){
901  file >> sample[j];
902  }
903 
904  //Add it to the training data
905  trainingData.addSample(classLabel, sample);
906  }
907 
908  //Flag that the model has been trained
909  trained = true;
910 
911  //Compute the null rejection thresholds
913 
914  return true;
915 }
916 
917 GRT_END_NAMESPACE
918 
bool saveBaseSettingsToFile(std::fstream &file) const
Definition: Classifier.cpp:256
VectorFloat trainingSigma
Holds the average max-class distance of the training data for each of classes
Definition: KNN.h:238
#define DEFAULT_NULL_LIKELIHOOD_VALUE
Definition: Classifier.h:38
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
Definition: MLBase.h:353
bool addSample(UINT classLabel, const VectorFloat &sample)
virtual bool save(std::fstream &file) const
Definition: KNN.cpp:477
virtual bool load(std::fstream &file)
Definition: KNN.cpp:530
bool searchForBestKValue
The distance method used to compute the distance between each data point
Definition: KNN.h:233
std::string getClassifierType() const
Definition: Classifier.cpp:161
Vector< ClassTracker > getClassTracker() const
This class implements the K-Nearest Neighbor classification algorithm (http://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm). KNN is a simple but powerful classifier, based on finding the closest K training examples in the feature space for the new input vector. The KNN algorithm is amongst the simplest of all machine learning algorithms: an object is classified by a majority vote of its neighbors, with the object being assigned to the class most common amongst its k nearest neighbors (k is a positive integer, typically small). If k = 1, then the object is simply assigned to the class of its nearest neighbor.
virtual bool resize(const unsigned int size)
Definition: Vector.h:133
bool setNumDimensions(UINT numDimensions)
virtual bool train_(ClassificationData &trainingData)
Definition: KNN.cpp:105
UINT distanceMethod
The number of neighbours to search for
Definition: KNN.h:232
bool setK(UINT K)
Definition: KNN.cpp:683
KNN(UINT K=10, bool useScaling=false, bool useNullRejection=false, Float nullRejectionCoeff=10.0, bool searchForBestKValue=false, UINT minKSearchValue=1, UINT maxKSearchValue=10)
ClassificationData trainingData
The maximum K value to end the search at
Definition: KNN.h:236
UINT maxKSearchValue
The minimum K value to start the search from
Definition: KNN.h:235
bool setDistanceMethod(UINT distanceMethod)
Definition: KNN.cpp:715
virtual bool recomputeNullRejectionThresholds()
Definition: KNN.cpp:664
KNN & operator=(const KNN &rhs)
Definition: KNN.cpp:64
bool setMaxKSearchValue(UINT maxKSearchValue)
Definition: KNN.cpp:696
bool enableBestKValueSearch(bool searchForBestKValue)
Definition: KNN.cpp:701
UINT getNumSamples() const
virtual bool predict_(VectorFloat &inputVector)
Definition: KNN.cpp:315
virtual bool deepCopyFrom(const Classifier *classifier)
Definition: KNN.cpp:82
UINT minKSearchValue
Sets if the best K value should be searched for or if the model should be trained with K ...
Definition: KNN.h:234
static RegisterClassifierModule< KNN > registerModule
Holds the stddev of the max-class distance of the training data for each of classes ...
Definition: KNN.h:240
bool copyBaseVariables(const Classifier *classifier)
Definition: Classifier.cpp:93
bool loadBaseSettingsFromFile(std::fstream &file)
Definition: Classifier.cpp:303
VectorFloat trainingMu
Holds the trainingData to perform the predictions
Definition: KNN.h:237
UINT getNumDimensions() const
UINT getNumClasses() const
#define BIG_DISTANCE
Definition: KNN.h:49
Vector< MinMax > getRanges() const
ClassificationData split(const UINT splitPercentage, const bool useStratifiedSampling=false)
bool setNullRejectionCoeff(Float nullRejectionCoeff)
Definition: KNN.cpp:706
bool setMinKSearchValue(UINT minKSearchValue)
Definition: KNN.cpp:691
bool scale(const Float minTarget, const Float maxTarget)
virtual bool clear()
Definition: KNN.cpp:464
virtual ~KNN(void)
Definition: KNN.cpp:60
virtual bool clear()
Definition: Classifier.cpp:142
Definition: KNN.h:51
bool loadLegacyModelFromFile(std::fstream &file)
Definition: KNN.cpp:759