21 #define GRT_DLL_EXPORTS 27 const std::string BAG::id =
"BAG";
35 this->useScaling = useScaling;
36 useNullRejection =
false;
37 classifierMode = STANDARD_CLASSIFIER_MODE;
41 classifierMode = STANDARD_CLASSIFIER_MODE;
56 this->weights = rhs.weights;
70 if( classifier == NULL )
return false;
73 BAG *ptr = (
BAG*)classifier;
79 this->weights = ptr->weights;
101 errorLog << __GRT_LOG__ <<
" Training data has zero samples!" << std::endl;
105 numInputDimensions = N;
106 numOutputDimensions = K;
115 trainingData.
scale(0, 1);
118 if( useValidationSet ){
119 validationData = trainingData.
split( 100-validationSetSize );
122 UINT ensembleSize = ensemble.
getSize();
124 if( ensembleSize == 0 ){
125 errorLog << __GRT_LOG__ <<
" The ensemble size is zero! You need to add some classifiers to the ensemble first." << std::endl;
129 for(UINT i=0; i<ensembleSize; i++){
130 if( ensemble[i] == NULL ){
131 errorLog << __GRT_LOG__ <<
" The classifier at ensemble index " << i <<
" has not been set!" << std::endl;
137 for(UINT i=0; i<ensembleSize; i++){
143 trainingLog <<
"Training ensemble " << i+1 <<
". Ensemble type: " << ensemble[i]->getId() <<
". Num Training Samples: " << boostedDataset.
getNumSamples() << std::endl;
146 if( !ensemble[i]->
train_( boostedDataset ) ){
147 errorLog << __GRT_LOG__ <<
" The classifier at ensemble index " << i <<
" failed training!" << std::endl;
160 trainingSetAccuracy = 0;
161 validationSetAccuracy = 0;
164 bool scalingState = useScaling;
168 errorLog << __GRT_LOG__ <<
" Failed to compute training set accuracy! Failed to fully train model!" << std::endl;
172 if( useValidationSet ){
175 errorLog << __GRT_LOG__ <<
" Failed to compute validation set accuracy! Failed to fully train model!" << std::endl;
181 trainingLog <<
"Training set accuracy: " << trainingSetAccuracy << std::endl;
183 if( useValidationSet ){
184 trainingLog <<
"Validation set accuracy: " << validationSetAccuracy << std::endl;
188 useScaling = scalingState;
196 errorLog << __GRT_LOG__ <<
" Model Not Trained!" << std::endl;
200 predictedClassLabel = 0;
201 maxLikelihood = -10000;
203 if( !trained )
return false;
205 if( inputVector.
getSize() != numInputDimensions ){
206 errorLog << __GRT_LOG__ <<
" The size of the input Vector (" << inputVector.
getSize() <<
") does not match the num features in the model (" << numInputDimensions << std::endl;
211 for(UINT n=0; n<numInputDimensions; n++){
212 inputVector[n] =
scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0, 1);
216 if( classLikelihoods.
getSize() != numClasses ) classLikelihoods.
resize(numClasses);
217 if( classDistances.
getSize() != numClasses ) classDistances.
resize(numClasses);
220 for(UINT k=0; k<numClasses; k++){
221 classLikelihoods[k] = 0;
222 classDistances[k] = 0;
227 UINT ensembleSize = ensemble.
getSize();
228 for(UINT i=0; i<ensembleSize; i++){
230 if( !ensemble[i]->
predict(inputVector) ){
231 errorLog << __GRT_LOG__ <<
" The " << i <<
" classifier in the ensemble failed prediction!" << std::endl;
244 for(UINT i=0; i<numClasses; i++){
245 if( classLikelihoods[i] > maxCount ){
247 maxCount = classLikelihoods[i];
249 classLikelihoods[i] /= sum;
250 classDistances[i] /= Float(ensembleSize);
253 predictedClassLabel = classLabels[ maxIndex ];
254 maxLikelihood = classLikelihoods[ maxIndex ];
262 for(UINT i=0; i<ensemble.
getSize(); i++){
263 if( ensemble[i] != NULL ){
264 ensemble[i]->reset();
277 for(UINT i=0; i<ensemble.
getSize(); i++){
278 if( ensemble[i] != NULL ){
279 ensemble[i]->clear();
290 errorLog << __GRT_LOG__ <<
" The file is not open!" << std::endl;
297 file <<
"GRT_BAG_MODEL_FILE_V2.0\n";
301 errorLog << __GRT_LOG__ <<
" Failed to save classifier base settings to file!" << std::endl;
307 file <<
"EnsembleSize: " << ensembleSize << std::endl;
315 if( i < ensembleSize-1 ) file <<
"\t";
320 file <<
"ClassifierTypes: ";
322 file << ensemble[i]->getId() << std::endl;
326 file <<
"Ensemble:" << std::endl;
328 if( !ensemble[i]->
save( file ) ){
329 errorLog << __GRT_LOG__ <<
" Failed to save classifier " << i <<
" to file!" << std::endl;
345 UINT ensembleSize = 0;
349 errorLog << __GRT_LOG__ <<
" Could not open file to load model" << std::endl;
357 if( word ==
"GRT_BAG_MODEL_FILE_V1.0" ){
358 return loadLegacyModelFromFile( file );
362 if(word !=
"GRT_BAG_MODEL_FILE_V2.0"){
363 errorLog << __GRT_LOG__ <<
" Could not find Model File Header" << std::endl;
369 errorLog << __GRT_LOG__ <<
" Failed to load base settings from file!" << std::endl;
378 if(word !=
"EnsembleSize:"){
379 errorLog << __GRT_LOG__ <<
" Could not find the EnsembleSize!" << std::endl;
383 file >> ensembleSize;
386 weights.
resize( ensembleSize );
389 if(word !=
"Weights:"){
390 errorLog << __GRT_LOG__ <<
" Could not find the Weights!" << std::endl;
394 for(UINT i=0; i<ensembleSize; i++){
402 if(word !=
"ClassifierTypes:"){
403 errorLog << __GRT_LOG__ <<
" Could not find the ClassifierTypes!" << std::endl;
407 for(UINT i=0; i<ensembleSize; i++){
408 file >> classifierTypes[i];
413 if(word !=
"Ensemble:"){
414 errorLog << __GRT_LOG__ <<
" Could not find the Ensemble! Found: " << word << std::endl;
418 ensemble.
resize(ensembleSize,NULL);
419 for(UINT i=0; i<ensembleSize; i++){
420 ensemble[i] =
create( classifierTypes[i] );
422 if( ensemble[i] == NULL ){
423 errorLog << __GRT_LOG__ <<
" Could not create a new classifier instance from the classifierType: " << classifierTypes[i] << std::endl;
429 if( !ensemble[i]->
load( file ) ){
430 errorLog << __GRT_LOG__ <<
" Failed to load ensemble classifier: " << i << std::endl;
442 bestDistance = DEFAULT_NULL_DISTANCE_VALUE;
444 classDistances.
resize(numClasses,DEFAULT_NULL_DISTANCE_VALUE);
468 if( newClassifier == NULL ){
476 weights.push_back( weight );
477 ensemble.push_back( newClassifier );
485 for(UINT i=0; i<ensemble.
getSize(); i++){
486 if( ensemble[i] != NULL ){
502 this->weights = weights;
506 bool BAG::loadLegacyModelFromFile( std::fstream &file ){
bool saveBaseSettingsToFile(std::fstream &file) const
std::string getId() const
virtual bool predict(VectorFloat inputVector)
const Vector< Classifier * > getEnsemble() const
virtual bool recomputeNullRejectionThresholds()
#define DEFAULT_NULL_LIKELIHOOD_VALUE
VectorFloat getEnsembleWeights() const
UINT getEnsembleSize() const
static std::string getId()
bool getTrainingLoggingEnabled() const
std::string getClassifierType() const
virtual bool resize(const unsigned int size)
BAG(bool useScaling=false)
virtual bool save(std::fstream &file) const
Vector< UINT > getClassLabels() const
virtual bool computeAccuracy(const ClassificationData &data, Float &accuracy)
virtual bool deepCopyFrom(const Classifier *classifier)
UINT getNumSamples() const
virtual bool deepCopyFrom(const Classifier *classifier)
UINT getPredictedClassLabel() const
virtual bool load(std::fstream &file)
bool copyBaseVariables(const Classifier *classifier)
bool loadBaseSettingsFromFile(std::fstream &file)
bool addClassifierToEnsemble(const Classifier &classifier, Float weight=1)
UINT getNumDimensions() const
UINT getNumClasses() const
UINT getClassLabelIndexValue(const UINT classLabel) const
virtual bool train_(ClassificationData &trainingData)
Vector< MinMax > getRanges() const
ClassificationData split(const UINT splitPercentage, const bool useStratifiedSampling=false)
ClassificationData getBootstrappedDataset(const UINT numSamples=0, const bool balanceDataset=false) const
BAG & operator=(const BAG &rhs)
bool scale(const Float minTarget, const Float maxTarget)
Classifier * create() const
This is the main base class that all GRT Classification algorithms should inherit from...
virtual bool predict_(VectorFloat &inputVector)
bool setWeights(const VectorFloat &weights)
static Classifier * create(const std::string &id)
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)