21 #define GRT_DLL_EXPORTS 27 const std::string MinDist::id =
"MinDist";
35 this->useScaling = useScaling;
36 this->useNullRejection = useNullRejection;
37 this->nullRejectionCoeff = nullRejectionCoeff;
38 this->numClusters = numClusters;
39 supportsNullRejection =
true;
40 classifierMode = STANDARD_CLASSIFIER_MODE;
45 classifierMode = STANDARD_CLASSIFIER_MODE;
56 this->numClusters = rhs.numClusters;
57 this->models = rhs.models;
67 if( classifier == NULL )
return false;
73 this->numClusters = ptr->numClusters;
74 this->models = ptr->models;
93 errorLog << __GRT_LOG__ <<
" Training data has zero samples!" << std::endl;
97 if( M <= numClusters ){
98 errorLog << __GRT_LOG__ <<
" There are not enough training samples for the number of clusters. Either reduce the number of clusters or increase the number of training samples!" << std::endl;
102 numInputDimensions = N;
103 numOutputDimensions = K;
107 nullRejectionThresholds.
resize(K);
114 trainingData.
scale(0, 1);
117 if( useValidationSet ){
118 validationData = trainingData.
split( 100-validationSetSize );
122 for(UINT k=0; k<numClasses; k++){
124 trainingLog <<
"Training model for class: " << trainingData.
getClassTracker()[k].classLabel << std::endl;
133 classLabels[k] = classLabel;
140 for(UINT i=0; i<data.getNumRows(); i++){
141 for(UINT j=0; j<data.getNumCols(); j++){
142 data[i][j] = classData[i][j];
147 models[k].setGamma( nullRejectionCoeff );
148 if( !models[k].
train(classLabel,data,numClusters,minChange,maxNumEpochs) ){
149 errorLog << __GRT_LOG__ <<
" Failed to train model for class: " << classLabel;
150 errorLog <<
". This is might be because this class does not have enough training samples! You should reduce the number of clusters or increase the number of training samples for this class." << std::endl;
156 nullRejectionThresholds[k] = models[k].getRejectionThreshold();
164 trainingSetAccuracy = 0;
165 validationSetAccuracy = 0;
168 bool scalingState = useScaling;
173 errorLog << __GRT_LOG__ <<
" Failed to compute training set accuracy! Failed to fully train model!" << std::endl;
177 if( useValidationSet ){
181 errorLog << __GRT_LOG__ <<
" Failed to compute validation set accuracy! Failed to fully train model!" << std::endl;
187 trainingLog <<
"Training set accuracy: " << trainingSetAccuracy << std::endl;
189 if( useValidationSet ){
190 trainingLog <<
"Validation set accuracy: " << validationSetAccuracy << std::endl;
194 useScaling = scalingState;
201 predictedClassLabel = 0;
205 errorLog <<
"predict_(VectorFloat &inputVector) - MinDist Model Not Trained!" << std::endl;
209 if( inputVector.size() != numInputDimensions ){
210 errorLog <<
"predict_(VectorFloat &inputVector) - The size of the input vector (" << inputVector.size() <<
") does not match the num features in the model (" << numInputDimensions << std::endl;
215 for(UINT n=0; n<numInputDimensions; n++){
216 inputVector[n] = grt_scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0.0, 1.0);
220 if( classLikelihoods.size() != numClasses ) classLikelihoods.
resize(numClasses,0);
221 if( classDistances.size() != numClasses ) classDistances.
resize(numClasses,0);
225 for(UINT k=0; k<numClasses; k++){
227 classDistances[k] = models[k].predict( inputVector );
230 if( classDistances[k] < minDist ){
231 minDist = classDistances[k];
232 predictedClassLabel = k;
236 classLikelihoods[k] = 1.0 / (classDistances[k] + 0.0001);
237 sum += classLikelihoods[k];
242 for(UINT k=0; k<numClasses; k++){
243 classLikelihoods[k] /= sum;
245 maxLikelihood = classLikelihoods[predictedClassLabel];
246 }
else maxLikelihood = classLikelihoods[predictedClassLabel];
248 if( useNullRejection ){
250 if( minDist <= models[predictedClassLabel].getRejectionThreshold() ) predictedClassLabel = models[predictedClassLabel].getClassLabel();
251 else predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL;
252 }
else predictedClassLabel = models[predictedClassLabel].getClassLabel();
271 for(UINT k=0; k<numClasses; k++) {
272 models[k].setGamma( nullRejectionCoeff );
273 models[k].recomputeThresholdValue();
282 if( nullRejectionCoeff > 0 ){
283 this->nullRejectionCoeff = nullRejectionCoeff;
302 errorLog <<
"save(fstream &file) - The file is not open!" << std::endl;
307 file<<
"GRT_MINDIST_MODEL_FILE_V2.0\n";
311 errorLog <<
"save(fstream &file) - Failed to save classifier base settings to file!" << std::endl;
318 for(UINT k=0; k<numClasses; k++){
319 file <<
"ClassLabel: " << models[k].getClassLabel() << std::endl;
320 file <<
"NumClusters: " << models[k].getNumClusters() << std::endl;
321 file <<
"RejectionThreshold: " << models[k].getRejectionThreshold() << std::endl;
322 file <<
"Gamma: " << models[k].getGamma() << std::endl;
323 file <<
"TrainingMu: " << models[k].getTrainingMu() << std::endl;
324 file <<
"TrainingSigma: " << models[k].getTrainingSigma() << std::endl;
325 file <<
"ClusterData:" << std::endl;
327 for(UINT i=0; i<models[k].getNumClusters(); i++){
328 for(UINT j=0; j<models[k].getNumFeatures(); j++){
329 file << clusters[i][j] <<
"\t";
346 errorLog <<
"load(string filename) - Could not open file to load model" << std::endl;
356 if( word ==
"GRT_MINDIST_MODEL_FILE_V1.0" ){
361 if(word !=
"GRT_MINDIST_MODEL_FILE_V2.0"){
362 errorLog <<
"load(string filename) - Could not find Model File Header" << std::endl;
368 errorLog <<
"load(string filename) - Failed to load base settings from file!" << std::endl;
375 models.
resize(numClasses);
376 classLabels.
resize(numClasses);
379 for(UINT k=0; k<numClasses; k++){
380 Float rejectionThreshold;
386 if( word !=
"ClassLabel:" ){
387 errorLog <<
"load(string filename) - Could not load the class label for class " << k << std::endl;
390 file >> classLabels[k];
393 if( word !=
"NumClusters:" ){
394 errorLog <<
"load(string filename) - Could not load the NumClusters for class " << k << std::endl;
400 if( word !=
"RejectionThreshold:" ){
401 errorLog <<
"load(string filename) - Could not load the RejectionThreshold for class " << k << std::endl;
404 file >> rejectionThreshold;
407 if( word !=
"Gamma:" ){
408 errorLog <<
"load(string filename) - Could not load the Gamma for class " << k << std::endl;
414 if( word !=
"TrainingMu:" ){
415 errorLog <<
"load(string filename) - Could not load the TrainingMu for class " << k << std::endl;
421 if( word !=
"TrainingSigma:" ){
422 errorLog <<
"load(string filename) - Could not load the TrainingSigma for class " << k << std::endl;
425 file >> trainingSigma;
428 if( word !=
"ClusterData:" ){
429 errorLog <<
"load(string filename) - Could not load the ClusterData for class " << k << std::endl;
434 MatrixFloat clusters(numClusters,numInputDimensions);
435 for(UINT i=0; i<numClusters; i++){
436 for(UINT j=0; j<numInputDimensions; j++){
437 file >> clusters[i][j];
441 models[k].setClassLabel( classLabels[k] );
442 models[k].setClusters( clusters );
443 models[k].setGamma( gamma );
444 models[k].setRejectionThreshold( rejectionThreshold );
445 models[k].setTrainingSigma( trainingSigma );
446 models[k].setTrainingMu( trainingMu );
454 bestDistance = DEFAULT_NULL_DISTANCE_VALUE;
456 classDistances.
resize(numClasses,DEFAULT_NULL_DISTANCE_VALUE);
463 if( numClusters > 0 ){
464 this->numClusters = numClusters;
475 if(word !=
"NumFeatures:"){
476 errorLog <<
"load(string filename) - Could not find NumFeatures " << std::endl;
479 file >> numInputDimensions;
482 if(word !=
"NumClasses:"){
483 errorLog <<
"load(string filename) - Could not find NumClasses" << std::endl;
489 if(word !=
"UseScaling:"){
490 errorLog <<
"load(string filename) - Could not find UseScaling" << std::endl;
496 if(word !=
"UseNullRejection:"){
497 errorLog <<
"load(string filename) - Could not find UseNullRejection" << std::endl;
500 file >> useNullRejection;
505 ranges.
resize(numInputDimensions);
508 if(word !=
"Ranges:"){
509 errorLog <<
"load(string filename) - Could not find the Ranges" << std::endl;
512 for(UINT n=0; n<ranges.size(); n++){
513 file >> ranges[n].minValue;
514 file >> ranges[n].maxValue;
519 models.
resize(numClasses);
520 classLabels.
resize(numClasses);
523 for(UINT k=0; k<numClasses; k++){
524 Float rejectionThreshold;
530 if( word !=
"ClassLabel:" ){
531 errorLog <<
"load(string filename) - Could not load the class label for class " << k << std::endl;
534 file >> classLabels[k];
537 if( word !=
"NumClusters:" ){
538 errorLog <<
"load(string filename) - Could not load the NumClusters for class " << k << std::endl;
544 if( word !=
"RejectionThreshold:" ){
545 errorLog <<
"load(string filename) - Could not load the RejectionThreshold for class " << k << std::endl;
548 file >> rejectionThreshold;
551 if( word !=
"Gamma:" ){
552 errorLog <<
"load(string filename) - Could not load the Gamma for class " << k << std::endl;
558 if( word !=
"TrainingMu:" ){
559 errorLog <<
"load(string filename) - Could not load the TrainingMu for class " << k << std::endl;
565 if( word !=
"TrainingSigma:" ){
566 errorLog <<
"load(string filename) - Could not load the TrainingSigma for class " << k << std::endl;
569 file >> trainingSigma;
572 if( word !=
"ClusterData:" ){
573 errorLog <<
"load(string filename) - Could not load the ClusterData for class " << k << std::endl;
578 MatrixFloat clusters(numClusters,numInputDimensions);
579 for(UINT i=0; i<numClusters; i++){
580 for(UINT j=0; j<numInputDimensions; j++){
581 file >> clusters[i][j];
585 models[k].setClassLabel( classLabels[k] );
586 models[k].setClusters( clusters );
587 models[k].setGamma( gamma );
588 models[k].setRejectionThreshold( rejectionThreshold );
589 models[k].setTrainingSigma( trainingSigma );
590 models[k].setTrainingMu( trainingMu );
598 bestDistance = DEFAULT_NULL_DISTANCE_VALUE;
600 classDistances.
resize(numClasses,DEFAULT_NULL_DISTANCE_VALUE);
bool saveBaseSettingsToFile(std::fstream &file) const
virtual bool deepCopyFrom(const Classifier *classifier)
std::string getId() const
#define DEFAULT_NULL_LIKELIHOOD_VALUE
Vector< MinDistModel > getModels() const
bool getTrainingLoggingEnabled() const
MinDist & operator=(const MinDist &rhs)
Vector< ClassTracker > getClassTracker() const
ClassificationData getClassData(const UINT classLabel) const
virtual bool resize(const unsigned int size)
virtual bool train(ClassificationData trainingData)
virtual bool predict_(VectorFloat &inputVector)
virtual bool save(std::fstream &file) const
virtual bool computeAccuracy(const ClassificationData &data, Float &accuracy)
UINT getNumSamples() const
virtual bool load(std::fstream &file)
static std::string getId()
bool copyBaseVariables(const Classifier *classifier)
bool loadBaseSettingsFromFile(std::fstream &file)
UINT getNumDimensions() const
UINT getNumClasses() const
virtual bool recomputeNullRejectionThresholds()
bool loadLegacyModelFromFile(std::fstream &file)
Vector< MinMax > getRanges() const
ClassificationData split(const UINT splitPercentage, const bool useStratifiedSampling=false)
virtual bool train_(ClassificationData &trainingData)
bool scale(const Float minTarget, const Float maxTarget)
MinDist(bool useScaling=false, bool useNullRejection=false, Float nullRejectionCoeff=10.0, UINT numClusters=10)
virtual bool setNullRejectionCoeff(Float nullRejectionCoeff)
This is the main base class that all GRT Classification algorithms should inherit from...
UINT getNumClusters() const
bool setNumClusters(UINT numClusters)