21 #define GRT_DLL_EXPORTS
31 this->useScaling = useScaling;
35 classType =
"LogisticRegression";
36 regressifierType = classType;
37 debugLog.setProceedingText(
"[DEBUG LogisticRegression]");
38 errorLog.setProceedingText(
"[ERROR LogisticRegression]");
39 trainingLog.setProceedingText(
"[TRAINING LogisticRegression]");
40 warningLog.setProceedingText(
"[WARNING LogisticRegression]");
60 if( regressifier == NULL )
return false;
80 trainingResults.clear();
83 errorLog <<
"train_(RegressionData trainingData) - Training data has zero samples!" << std::endl;
88 errorLog <<
"train_(RegressionData trainingData) - The number of target dimensions is not 1!" << std::endl;
92 numInputDimensions = N;
93 numOutputDimensions = 1;
94 inputVectorRanges.clear();
95 targetVectorRanges.clear();
106 trainingData.
scale(inputVectorRanges,targetVectorRanges,0.0,1.0);
113 for(UINT j=0; j<N; j++){
118 Float lastSquaredError = 0;
121 bool keepTraining =
true;
124 TrainingResult result;
125 trainingResults.reserve(M);
130 for(UINT i=0; i<M; i++){
131 randomTrainingOrder[i] = i;
133 std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
136 while( keepTraining ){
139 totalSquaredTrainingError = 0;
140 for(UINT m=0; m<M; m++){
143 UINT i = randomTrainingOrder[m];
149 for(UINT j=0; j<N; j++){
152 error = y[0] - sigmoid( h );
153 totalSquaredTrainingError += SQR(error);
156 for(UINT j=0; j<N; j++){
157 w[j] += learningRate * error * x[j];
159 w0 += learningRate * error;
163 delta = fabs( totalSquaredTrainingError-lastSquaredError );
164 lastSquaredError = totalSquaredTrainingError;
167 if( delta <= minChange ){
168 keepTraining =
false;
171 if( ++iter >= maxNumEpochs ){
172 keepTraining =
false;
175 if( grt_isinf( totalSquaredTrainingError ) || grt_isnan( totalSquaredTrainingError ) ){
176 errorLog <<
"train_(RegressionData &trainingData) - Training failed! Total squared error is NAN. If scaling is not enabled then you should try to scale your data and see if this solves the issue." << std::endl;
181 rootMeanSquaredTrainingError = sqrt( totalSquaredTrainingError / Float(M) );
182 result.setRegressionResult(iter,totalSquaredTrainingError,rootMeanSquaredTrainingError,
this);
183 trainingResults.push_back( result );
186 trainingResultsObserverManager.notifyObservers( result );
188 trainingLog <<
"Epoch: " << iter <<
" SSE: " << totalSquaredTrainingError <<
" Delta: " << delta << std::endl;
192 regressionData.
resize(1,0);
200 errorLog <<
"predict_(VectorFloat &inputVector) - Model Not Trained!" << std::endl;
204 if( !trained )
return false;
206 if( inputVector.
getSize() != numInputDimensions ){
207 errorLog <<
"predict_(VectorFloat &inputVector) - The size of the input Vector (" << inputVector.
getSize() <<
") does not match the num features in the model (" << numInputDimensions << std::endl;
212 for(UINT n=0; n<numInputDimensions; n++){
213 inputVector[n] = grt_scale(inputVector[n], inputVectorRanges[n].minValue, inputVectorRanges[n].maxValue, 0.0, 1.0);
217 regressionData[0] =
w0;
218 for(UINT j=0; j<numInputDimensions; j++){
219 regressionData[0] += inputVector[j] *
w[j];
221 Float sum = regressionData[0];
222 regressionData[0] = sigmoid( regressionData[0] );
223 std::cout <<
"reg sum: " << sum <<
" sig: " << regressionData[0] << std::endl;
225 for(UINT n=0; n<numOutputDimensions; n++){
226 regressionData[n] = grt_scale(regressionData[n], 0.0, 1.0, targetVectorRanges[n].minValue, targetVectorRanges[n].maxValue);
237 errorLog <<
"save(fstream &file) - The file is not open!" << std::endl;
242 file<<
"GRT_LOGISTIC_REGRESSION_MODEL_FILE_V2.0\n";
246 errorLog <<
"save(fstream &file) - Failed to save Regressifier base settings to file!" << std::endl;
253 for(UINT j=0; j<numInputDimensions; j++){
265 numInputDimensions = 0;
271 errorLog <<
"load(string filename) - Could not open file to load model" << std::endl;
281 if( word ==
"GRT_LOGISTIC_REGRESSION_MODEL_FILE_V1.0" ){
285 if( word !=
"GRT_LOGISTIC_REGRESSION_MODEL_FILE_V2.0" ){
286 errorLog <<
"load( fstream &file ) - Could not find Model File Header" << std::endl;
292 errorLog <<
"load( fstream &file ) - Failed to save Regressifier base settings to file!" << std::endl;
303 if(word !=
"Weights:"){
304 errorLog <<
"load( fstream &file ) - Could not find the Weights!" << std::endl;
309 for(UINT j=0; j<numInputDimensions; j++){
326 Float LogisticRegression::sigmoid(
const Float x)
const{
327 return 1.0 / (1 + exp(-x));
335 if(word !=
"NumFeatures:"){
336 errorLog <<
"loadLegacyModelFromFile( fstream &file ) - Could not find NumFeatures!" << std::endl;
339 file >> numInputDimensions;
342 if(word !=
"NumOutputDimensions:"){
343 errorLog <<
"loadLegacyModelFromFile( fstream &file ) - Could not find NumOutputDimensions!" << std::endl;
346 file >> numOutputDimensions;
349 if(word !=
"UseScaling:"){
350 errorLog <<
"loadLegacyModelFromFile( fstream &file ) - Could not find UseScaling!" << std::endl;
358 inputVectorRanges.
resize(numInputDimensions);
359 targetVectorRanges.
resize(numOutputDimensions);
363 if(word !=
"InputVectorRanges:"){
365 errorLog <<
"loadLegacyModelFromFile( fstream &file ) - Failed to find InputVectorRanges!" << std::endl;
368 for(UINT j=0; j<inputVectorRanges.
getSize(); j++){
369 file >> inputVectorRanges[j].minValue;
370 file >> inputVectorRanges[j].maxValue;
374 if(word !=
"OutputVectorRanges:"){
376 errorLog <<
"loadLegacyModelFromFile( fstream &file ) - Failed to find OutputVectorRanges!" << std::endl;
379 for(UINT j=0; j<targetVectorRanges.
getSize(); j++){
380 file >> targetVectorRanges[j].minValue;
381 file >> targetVectorRanges[j].maxValue;
390 if(word !=
"Weights:"){
391 errorLog <<
"loadLegacyModelFromFile( fstream &file ) - Could not find the Weights!" << std::endl;
396 for(UINT j=0; j<numInputDimensions; j++){
402 regressionData.
resize(1,0);
UINT getMaxNumIterations() const
LogisticRegression(const bool useScaling=true)
Vector< MinMax > getInputRanges() const
virtual bool predict_(VectorFloat &inputVector)
virtual bool resize(const unsigned int size)
LogisticRegression & operator=(const LogisticRegression &rhs)
virtual bool load(std::fstream &file)
bool copyBaseVariables(const Regressifier *regressifier)
This class implements the Logistic Regression algorithm. Logistic Regression is a simple but effectiv...
UINT getNumInputDimensions() const
virtual bool train_(RegressionData &trainingData)
virtual ~LogisticRegression(void)
Vector< MinMax > getTargetRanges() const
bool saveBaseSettingsToFile(std::fstream &file) const
bool scale(const Float minTarget, const Float maxTarget)
UINT getMaxNumEpochs() const
UINT getNumTargetDimensions() const
std::string getRegressifierType() const
bool loadLegacyModelFromFile(std::fstream &file)
bool loadBaseSettingsFromFile(std::fstream &file)
VectorFloat w
The weights vector.
bool setMaxNumIterations(UINT maxNumIterations)
Float getRandomNumberUniform(Float minRange=0.0, Float maxRange=1.0)
virtual bool deepCopyFrom(const Regressifier *regressifier)
bool setMaxNumEpochs(const UINT maxNumEpochs)
virtual bool save(std::fstream &file) const
UINT getNumSamples() const