21 #define GRT_DLL_EXPORTS 27 const std::string LinearRegression::id =
"LinearRegression";
35 this->useScaling = useScaling;
63 if( regressifier == NULL )
return false;
84 trainingResults.clear();
87 errorLog <<
"train_(RegressionData &trainingData) - Training data has zero samples!" << std::endl;
92 errorLog <<
"train_(RegressionData &trainingData) - The number of target dimensions is not 1!" << std::endl;
96 numInputDimensions = N;
97 numOutputDimensions = 1;
98 inputVectorRanges.clear();
99 targetVectorRanges.clear();
110 trainingData.
scale(inputVectorRanges,targetVectorRanges,0.0,1.0);
117 for(UINT j=0; j<N; j++){
125 bool keepTraining =
true;
128 trainingResults.reserve(M);
133 for(UINT i=0; i<M; i++){
134 randomTrainingOrder[i] = i;
136 std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
139 while( keepTraining ){
142 totalSquaredTrainingError = 0;
143 for(UINT m=0; m<M; m++){
146 UINT i = randomTrainingOrder[m];
152 for(UINT j=0; j<N; j++){
156 totalSquaredTrainingError += SQR( error );
159 for(UINT j=0; j<N; j++){
160 w[j] += learningRate * error * x[j];
162 w0 += learningRate * error;
166 delta = fabs( totalSquaredTrainingError-lastError );
167 lastError = totalSquaredTrainingError;
170 if( delta <= minChange ){
171 keepTraining =
false;
174 if( grt_isinf( totalSquaredTrainingError ) || grt_isnan( totalSquaredTrainingError ) ){
175 errorLog <<
"train_(RegressionData &trainingData) - Training failed! Total squared training error is NAN. If scaling is not enabled then you should try to scale your data and see if this solves the issue." << std::endl;
179 if( ++iter >= maxNumEpochs ){
180 keepTraining =
false;
184 rmsTrainingError = sqrt( totalSquaredTrainingError / Float(M) );
186 trainingResults.push_back( result );
189 trainingResultsObserverManager.notifyObservers( result );
191 trainingLog <<
"Epoch: " << iter <<
" SSE: " << totalSquaredTrainingError <<
" Delta: " << delta << std::endl;
195 regressionData.
resize(1,0);
203 errorLog <<
"predict_(VectorFloat &inputVector) - Model Not Trained!" << std::endl;
207 if( !trained )
return false;
209 if( inputVector.size() != numInputDimensions ){
210 errorLog <<
"predict_(VectorFloat &inputVector) - The size of the input Vector (" << int( inputVector.size() ) <<
") does not match the num features in the model (" << numInputDimensions << std::endl;
215 for(UINT n=0; n<numInputDimensions; n++){
216 inputVector[n] =
scale(inputVector[n], inputVectorRanges[n].minValue, inputVectorRanges[n].maxValue, 0, 1);
220 regressionData[0] = w0;
221 for(UINT j=0; j<numInputDimensions; j++){
222 regressionData[0] += inputVector[j] * w[j];
226 for(UINT n=0; n<numOutputDimensions; n++){
227 regressionData[n] =
scale(regressionData[n], 0, 1, targetVectorRanges[n].minValue, targetVectorRanges[n].maxValue);
238 errorLog <<
"save(fstream &file) - The file is not open!" << std::endl;
243 file<<
"GRT_LINEAR_REGRESSION_MODEL_FILE_V2.0\n";
247 errorLog <<
"save(fstream &file) - Failed to save Regressifier base settings to file!" << std::endl;
254 for(UINT j=0; j<numInputDimensions; j++){
269 errorLog <<
"load( fstream &file ) - Could not open file to load model" << std::endl;
279 if( word ==
"GRT_LINEAR_REGRESSION_MODEL_FILE_V1.0" ){
283 if( word !=
"GRT_LINEAR_REGRESSION_MODEL_FILE_V2.0" ){
284 errorLog <<
"load( fstream &file ) - Could not find Model File Header" << std::endl;
290 errorLog <<
"load( fstream &file ) - Failed to save Regressifier base settings to file!" << std::endl;
297 w.
resize(numInputDimensions);
301 if(word !=
"Weights:"){
302 errorLog <<
"load( fstream &file ) - Could not find the Weights!" << std::endl;
307 for(UINT j=0; j<numInputDimensions; j++){
329 if(word !=
"NumFeatures:"){
330 errorLog <<
"loadLegacyModelFromFile( fstream &file ) - Could not find NumFeatures!" << std::endl;
333 file >> numInputDimensions;
336 if(word !=
"NumOutputDimensions:"){
337 errorLog <<
"loadLegacyModelFromFile( fstream &file ) - Could not find NumOutputDimensions!" << std::endl;
340 file >> numOutputDimensions;
343 if(word !=
"UseScaling:"){
344 errorLog <<
"loadLegacyModelFromFile( fstream &file ) - Could not find UseScaling!" << std::endl;
352 inputVectorRanges.
resize(numInputDimensions);
353 targetVectorRanges.
resize(numOutputDimensions);
357 if(word !=
"InputVectorRanges:"){
359 errorLog <<
"loadLegacyModelFromFile( fstream &file ) - Failed to find InputVectorRanges!" << std::endl;
362 for(UINT j=0; j<inputVectorRanges.size(); j++){
363 file >> inputVectorRanges[j].minValue;
364 file >> inputVectorRanges[j].maxValue;
368 if(word !=
"OutputVectorRanges:"){
370 errorLog <<
"loadLegacyModelFromFile( fstream &file ) - Failed to find OutputVectorRanges!" << std::endl;
373 for(UINT j=0; j<targetVectorRanges.size(); j++){
374 file >> targetVectorRanges[j].minValue;
375 file >> targetVectorRanges[j].maxValue;
380 w.
resize(numInputDimensions);
384 if(word !=
"Weights:"){
385 errorLog <<
"loadLegacyModelFromFile( fstream &file ) - Could not find the Weights!" << std::endl;
390 for(UINT j=0; j<numInputDimensions; j++){
396 regressionData.
resize(1,0);
std::string getId() const
bool setRegressionResult(unsigned int trainingIteration, Float totalSquaredTrainingError, Float rootMeanSquaredTrainingError, MLBase *trainer)
virtual bool predict_(VectorFloat &inputVector)
virtual bool clear() override
LinearRegression(bool useScaling=false)
Vector< MinMax > getInputRanges() const
This file contains the Random class, a useful wrapper for generating cross platform random functions...
static std::string getId()
virtual bool resize(const unsigned int size)
virtual ~LinearRegression(void)
bool copyBaseVariables(const Regressifier *regressifier)
UINT getNumInputDimensions() const
Vector< MinMax > getTargetRanges() const
bool saveBaseSettingsToFile(std::fstream &file) const
bool scale(const Float minTarget, const Float maxTarget)
UINT getMaxNumEpochs() const
UINT getNumTargetDimensions() const
virtual bool save(std::fstream &file) const
bool loadBaseSettingsFromFile(std::fstream &file)
LinearRegression & operator=(const LinearRegression &rhs)
bool setMaxNumIterations(const UINT maxNumIterations)
virtual bool load(std::fstream &file)
virtual bool train_(RegressionData &trainingData)
Float getRandomNumberUniform(Float minRange=0.0, Float maxRange=1.0)
virtual bool deepCopyFrom(const Regressifier *regressifier)
bool loadLegacyModelFromFile(std::fstream &file)
bool setMaxNumEpochs(const UINT maxNumEpochs)
UINT getMaxNumIterations() const
UINT getNumSamples() const
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)