21 #define GRT_DLL_EXPORTS 27 const std::string LogisticRegression::id =
"LogisticRegression";
35 this->useScaling = useScaling;
36 this->learningRate = learningRate;
37 this->minChange = minChange;
38 this->batchSize = batchSize;
39 this->minNumEpochs = minNumEpochs;
40 this->maxNumEpochs = maxNumEpochs;
65 if( regressifier == NULL )
return false;
83 if( useValidationSet ){
84 validationData = trainingData.
split( 100 - validationSetSize );
91 trainingResults.clear();
94 errorLog <<
"train_(RegressionData trainingData) - Training data has zero samples!" << std::endl;
99 errorLog <<
"train_(RegressionData trainingData) - The number of target dimensions is not 1!" << std::endl;
103 numInputDimensions = N;
104 numOutputDimensions = 1;
105 inputVectorRanges.clear();
106 targetVectorRanges.clear();
117 trainingData.
scale(inputVectorRanges,targetVectorRanges,0.0,1.0);
120 if( useValidationSet ){
121 validationData.
scale(inputVectorRanges,targetVectorRanges,0.0,1.0);
129 for(UINT j=0; j<N; j++){
134 if( batchSize == 0 || batchSize > M ){
141 Float batchError = 0;
145 UINT batchStartIndex = 0;
146 UINT batchEndIndex = 0;
147 UINT numSamplesInBatch = 0;
148 const UINT numValidationSamples = validationData.
getNumSamples();
149 bool keepTraining =
true;
152 trainingResults.reserve(M);
160 for(UINT i=0; i<M; i++){
161 randomTrainingOrder[i] = i;
163 std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
166 while( keepTraining ){
171 while( batchStartIndex < M && keepTraining ){
173 rmsTrainingError = 0.0;
174 rmsValidationError = 0.0;
177 batchEndIndex = batchStartIndex + batchSize;
178 if( batchEndIndex > M ) batchEndIndex = M;
179 numSamplesInBatch = batchEndIndex-batchStartIndex;
182 meanInputData.
fill(0.0);
185 for(UINT n=0; n<numSamplesInBatch; n++){
186 const VectorFloat &x = trainingData[randomTrainingOrder[batchStartIndex+n]].getInputVector();
187 const VectorFloat &y = trainingData[randomTrainingOrder[batchStartIndex+n]].getTargetVector();
189 for(UINT j=0; j<N; j++){
190 batchInputData[n][j] = x[j];
191 meanInputData[j] += x[j];
194 batchTargetData[n] = y[0];
198 for(UINT j=0; j<N; j++){
199 meanInputData[j] /=
static_cast<Float
>(numSamplesInBatch);
204 for(UINT n=0; n<numSamplesInBatch; n++){
206 for(UINT j=0; j<N; j++){
207 h += batchInputData[n][j] *
w[j];
209 error = batchTargetData[n] - sigmoid( h );
211 rmsTrainingError += SQR(error);
214 batchError /=
static_cast<Float
>(numSamplesInBatch);
217 for(UINT j=0; j<N; j++){
218 w[j] += learningRate * batchError * meanInputData[j];
220 w0 += learningRate * batchError;
223 if( useValidationSet ){
224 for(UINT i=0; i<numValidationSamples; i++){
226 const VectorFloat &x = validationData[i].getInputVector();
227 const VectorFloat &y = validationData[i].getTargetVector();
229 for(UINT j=0; j<N; j++){
232 error = y[0] - sigmoid( h );
233 rmsValidationError += SQR(error);
235 rmsValidationError = sqrt( rmsValidationError / static_cast<Float>(numValidationSamples) );
239 rmsTrainingError = sqrt( rmsTrainingError / static_cast<Float>(numSamplesInBatch) );
240 delta = iter > 0 ? fabs( rmsTrainingError-lastError ) : rmsTrainingError;
241 lastError = rmsTrainingError;
244 if( delta <= minChange && epoch >= minNumEpochs ){
245 keepTraining =
false;
248 if( grt_isinf( rmsTrainingError ) || grt_isnan( rmsTrainingError ) ){
249 errorLog << __GRT_LOG__ <<
" Training failed! RMS error is NAN. If scaling is not enabled then you should try to scale your data and see if this solves the issue." << std::endl;
255 batchStartIndex = batchEndIndex;
259 trainingResults.push_back( result );
262 trainingResultsObserverManager.notifyObservers( result );
264 trainingLog <<
"Epoch: " << epoch <<
" | Iter: " << iter <<
" | RMS Training Error: " << rmsTrainingError <<
" | Delta: " << delta;
265 if( useValidationSet ){
266 trainingLog <<
" | RMS Validation Error: " << rmsValidationError;
268 trainingLog << std::endl;
271 if( ++epoch >= maxNumEpochs ){
272 keepTraining =
false;
277 regressionData.
resize(1,0);
285 errorLog << __GRT_LOG__ <<
" Model Not Trained!" << std::endl;
289 if( !trained )
return false;
291 if( inputVector.
getSize() != numInputDimensions ){
292 errorLog << __GRT_LOG__ <<
" The size of the input Vector (" << inputVector.
getSize() <<
") does not match the num features in the model (" << numInputDimensions << std::endl;
297 for(UINT n=0; n<numInputDimensions; n++){
298 inputVector[n] = grt_scale(inputVector[n], inputVectorRanges[n].minValue, inputVectorRanges[n].maxValue, 0.0, 1.0);
302 regressionData[0] =
w0;
303 for(UINT j=0; j<numInputDimensions; j++){
304 regressionData[0] += inputVector[j] *
w[j];
306 regressionData[0] = sigmoid( regressionData[0] );
308 for(UINT n=0; n<numOutputDimensions; n++){
309 regressionData[n] = grt_scale(regressionData[n], 0.0, 1.0, targetVectorRanges[n].minValue, targetVectorRanges[n].maxValue);
320 errorLog << __GRT_LOG__ <<
" The file is not open!" << std::endl;
325 file<<
"GRT_LOGISTIC_REGRESSION_MODEL_FILE_V2.0\n";
329 errorLog << __GRT_LOG__ <<
" Failed to save Regressifier base settings to file!" << std::endl;
336 for(UINT j=0; j<numInputDimensions; j++){
348 numInputDimensions = 0;
354 errorLog << __GRT_LOG__ <<
" Could not open file to load model" << std::endl;
364 if( word ==
"GRT_LOGISTIC_REGRESSION_MODEL_FILE_V1.0" ){
368 if( word !=
"GRT_LOGISTIC_REGRESSION_MODEL_FILE_V2.0" ){
369 errorLog << __GRT_LOG__ <<
" Could not find Model File Header" << std::endl;
375 errorLog << __GRT_LOG__ <<
" Failed to save Regressifier base settings to file!" << std::endl;
386 if(word !=
"Weights:"){
387 errorLog << __GRT_LOG__ <<
" Could not find the Weights!" << std::endl;
392 for(UINT j=0; j<numInputDimensions; j++){
409 Float LogisticRegression::sigmoid(
const Float x)
const{
410 return 1.0 / (1 + exp(-x));
418 if(word !=
"NumFeatures:"){
419 errorLog << __GRT_LOG__ <<
" Could not find NumFeatures!" << std::endl;
422 file >> numInputDimensions;
425 if(word !=
"NumOutputDimensions:"){
426 errorLog << __GRT_LOG__ <<
" Could not find NumOutputDimensions!" << std::endl;
429 file >> numOutputDimensions;
432 if(word !=
"UseScaling:"){
433 errorLog << __GRT_LOG__ <<
" Could not find UseScaling!" << std::endl;
441 inputVectorRanges.
resize(numInputDimensions);
442 targetVectorRanges.
resize(numOutputDimensions);
446 if(word !=
"InputVectorRanges:"){
448 errorLog << __GRT_LOG__ <<
" Failed to find InputVectorRanges!" << std::endl;
451 for(UINT j=0; j<inputVectorRanges.
getSize(); j++){
452 file >> inputVectorRanges[j].minValue;
453 file >> inputVectorRanges[j].maxValue;
457 if(word !=
"OutputVectorRanges:"){
459 errorLog << __GRT_LOG__ <<
" Failed to find OutputVectorRanges!" << std::endl;
462 for(UINT j=0; j<targetVectorRanges.
getSize(); j++){
463 file >> targetVectorRanges[j].minValue;
464 file >> targetVectorRanges[j].maxValue;
473 if(word !=
"Weights:"){
474 errorLog << __GRT_LOG__ <<
" Could not find the Weights!" << std::endl;
479 for(UINT j=0; j<numInputDimensions; j++){
485 regressionData.
resize(1,0);
UINT getMaxNumIterations() const
std::string getId() const
bool setRegressionResult(unsigned int trainingIteration, Float totalSquaredTrainingError, Float rootMeanSquaredTrainingError, MLBase *trainer)
Vector< MinMax > getInputRanges() const
This file contains the Random class, a useful wrapper for generating cross platform random functions...
virtual bool predict_(VectorFloat &inputVector)
virtual bool resize(const unsigned int size)
LogisticRegression & operator=(const LogisticRegression &rhs)
virtual bool load(std::fstream &file)
bool copyBaseVariables(const Regressifier *regressifier)
UINT getNumInputDimensions() const
virtual bool train_(RegressionData &trainingData)
virtual ~LogisticRegression(void)
Vector< MinMax > getTargetRanges() const
bool saveBaseSettingsToFile(std::fstream &file) const
bool scale(const Float minTarget, const Float maxTarget)
UINT getMaxNumEpochs() const
UINT getNumTargetDimensions() const
LogisticRegression(const bool useScaling=true, const Float learningRate=0.01, const Float minChange=1.0e-5, const UINT batchSize=1, const UINT maxNumEpochs=500, const UINT minNumEpochs=1)
bool fill(const T &value)
bool loadLegacyModelFromFile(std::fstream &file)
bool loadBaseSettingsFromFile(std::fstream &file)
VectorFloat w
The weights vector.
bool setMaxNumIterations(UINT maxNumIterations)
RegressionData split(const UINT trainingSizePercentage)
virtual bool deepCopyFrom(const Regressifier *regressifier)
bool setMaxNumEpochs(const UINT maxNumEpochs)
virtual bool save(std::fstream &file) const
static std::string getId()
UINT getNumSamples() const