2 #define GRT_DLL_EXPORTS 8 const std::string GaussianMixtureModels::id =
"GaussianMixtureModels";
18 this->minNumEpochs = minNumEpochs;
19 this->maxNumEpochs = maxNumEpochs;
20 this->minChange = minChange;
38 this->sigma = rhs.sigma;
39 this->invSigma = rhs.invSigma;
61 this->sigma = rhs.sigma;
62 this->invSigma = rhs.invSigma;
73 if( clusterer == NULL )
return false;
87 this->sigma = ptr->sigma;
88 this->invSigma = ptr->invSigma;
130 numTrainingIterationsToConverge = 0;
133 errorLog <<
"train_(MatrixFloat &data) - Training Failed! Training data is empty!" << std::endl;
148 sigma[k].
resize(numInputDimensions,numInputDimensions);
159 for(UINT j=0; j<numInputDimensions; j++){
160 data[i][j] =
scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,0,1);
166 UINT trainingIter = 0;
173 if( !computeInvAndDet() ){
176 errorLog <<
"train_(MatrixFloat &data) - Failed to compute inverse and determinat!" << std::endl;
184 clusterLabels.
resize(numClusters);
186 clusterLabels[i] = i+1;
188 clusterLikelihoods.
resize(numClusters,0);
189 clusterDistances.
resize(numClusters,0);
201 SWAP(randomIndexs[ i ],randomIndexs[ random.
getRandomNumberInt(0,numTrainingSamples) ]);
204 for(UINT n=0; n<numInputDimensions; n++){
205 mu[k][n] = data[ randomIndexs[k] ][n];
211 frac[k] = 1.0/Float(numClusters);
212 for(UINT i=0; i<numInputDimensions; i++){
213 for(UINT j=0; j<numInputDimensions; j++) sigma[k][i][j] = 0;
214 sigma[k][i][i] = 1.0e-2;
219 bool keepGoing =
true;
220 Float change = 99.9e99;
221 UINT numIterationsNoChange = 0;
228 if( estep( data, u, v, change ) ){
234 if( fabs( change ) < minChange ){
235 if( ++numIterationsNoChange >= minNumEpochs ){
238 }
else numIterationsNoChange = 0;
239 if( ++numTrainingIterationsToConverge >= maxNumEpochs ) keepGoing =
false;
242 warningLog <<
"train_(const UINT numTrainingSamples, MatrixFloat &data) - Estep failed at iteration " << numTrainingIterationsToConverge << std::endl;
266 if( x.
getSize() != numInputDimensions ){
271 for(UINT n=0; n<numInputDimensions; n++){
272 x[n] = grt_scale(x[n], ranges[n].minValue, ranges[n].maxValue, 0.0, 1.0);
289 dist = gauss(x,i,det,
mu,invSigma);
291 clusterDistances[i] = dist;
292 clusterLikelihoods[i] = dist;
294 sum += clusterLikelihoods[i];
296 if( dist > bestDistance ){
304 clusterLikelihoods[i] /= sum;
308 maxLikelihood = clusterLikelihoods[ minIndex ];
315 if( !file.is_open() ){
316 errorLog <<
"saveModelToFile(string filename) - Failed to open file!" << std::endl;
320 file <<
"GRT_GAUSSIAN_MIXTURE_MODELS_FILE_V1.0\n";
323 errorLog <<
"saveModelToFile(fstream &file) - Failed to save cluster settings to file!" << std::endl;
330 for(UINT n=0; n<numInputDimensions; n++){
331 file <<
mu[k][n] <<
"\t";
338 for(UINT i=0; i<numInputDimensions; i++){
339 for(UINT j=0; j<numInputDimensions; j++){
340 file << sigma[k][i][j] <<
"\t";
346 file <<
"InvSigma:\n";
348 for(UINT i=0; i<numInputDimensions; i++){
349 for(UINT j=0; j<numInputDimensions; j++){
350 file << invSigma[k][i][j] <<
"\t";
358 file << det[k] << std::endl;
373 if( word !=
"GRT_GAUSSIAN_MIXTURE_MODELS_FILE_V1.0" ){
378 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load cluster settings from file!" << std::endl;
395 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load Mu!" << std::endl;
399 for(UINT n=0; n<numInputDimensions; n++){
406 if( word !=
"Sigma:" ){
408 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load Sigma!" << std::endl;
412 sigma[k].
resize(numInputDimensions, numInputDimensions);
413 for(UINT i=0; i<numInputDimensions; i++){
414 for(UINT j=0; j<numInputDimensions; j++){
415 file >> sigma[k][i][j];
422 if( word !=
"InvSigma:" ){
424 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load InvSigma!" << std::endl;
428 invSigma[k].
resize(numInputDimensions, numInputDimensions);
429 for(UINT i=0; i<numInputDimensions; i++){
430 for(UINT j=0; j<numInputDimensions; j++){
431 file >> invSigma[k][i][j];
438 if( word !=
"Det:" ){
440 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load Det!" << std::endl;
448 clusterLabels.
resize(numClusters);
450 clusterLabels[i] = i+1;
452 clusterLikelihoods.
resize(numClusters,0);
453 clusterDistances.
resize(numClusters,0);
462 Float tmp,sum,max,oldloglike;
463 for(UINT j=0; j<numInputDimensions; j++) u[j] = v[j] = 0;
469 if( !cholesky.getSuccess() ){
return false; }
470 lndets[k] = cholesky.logdet();
473 for(UINT j=0; j<numInputDimensions; j++) u[j] = data[i][j] -
mu[k][j];
474 if( !cholesky.elsolve(u,v) ){
return false; }
476 for(UINT j=0; j<numInputDimensions; j++) sum += SQR(v[j]);
486 for(UINT k=0; k<numClusters; k++) if( resp[i][k] > max ) max =
resp[i][k];
488 tmp = max + log( sum );
493 change = (
loglike - oldloglike);
498 bool GaussianMixtureModels::mstep(
const MatrixFloat &data ){
504 frac[k] = wgt/Float(numTrainingSamples);
505 for(UINT n=0; n<numInputDimensions; n++){
509 for(UINT j=0; j<numInputDimensions; j++){
512 sum +=
resp[m][k] * (data[m][n]-
mu[k][n]) * (data[m][j]-
mu[k][j]);
514 sigma[k][n][j] = sum/wgt;
523 inline void GaussianMixtureModels::SWAP(UINT &a,UINT &b){
529 bool GaussianMixtureModels::computeInvAndDet(){
536 if( !lu.inverse( invSigma[k] ) ){
537 errorLog <<
"computeInvAndDet() - Matrix inversion failed for cluster " << k+1 << std::endl;
std::string getId() const
virtual bool reset() override
virtual bool train_(MatrixFloat &trainingData)
virtual bool predict_(VectorDouble &inputVector)
This file contains the Random class, a useful wrapper for generating cross platform random functions...
VectorDouble lndets
A vector holding the log detminants of SIGMA'k.
Float loglike
The current loglikelihood value of the models given the data.
MatrixFloat getDataAsMatrixFloat() const
virtual bool clear() override
virtual bool resize(const unsigned int size)
This class implements a Gaussian Miture Model clustering algorithm. The code is based on the GMM code...
bool copyBaseVariables(const Clusterer *clusterer)
virtual bool loadModelFromFile(std::fstream &file)
static std::string getId()
bool loadClustererSettingsFromFile(std::fstream &file)
UINT predictedClusterLabel
Stores the predicted cluster label from the most recent predict( )
virtual bool saveModelToFile(std::fstream &file) const
bool saveClustererSettingsToFile(std::fstream &file) const
MatrixFloat mu
A matrix holding the estimated mean values of each Gaussian.
GaussianMixtureModels(const UINT numClusters=10, const UINT minNumEpochs=5, const UINT maxNumEpochs=1000, const Float minChange=1.0e-5, const UINT numRestarts=5)
UINT numClusters
Number of clusters in the model.
unsigned int getNumRows() const
Vector< MatrixFloat > getSigma() const
unsigned int getNumCols() const
GaussianMixtureModels & operator=(const GaussianMixtureModels &rhs)
VectorDouble frac
A vector holding the P(k)'s.
Vector< MinMax > getRanges() const
UINT numRestarts
The number of times the learning algorithm can reattempt to train a model.
int getRandomNumberInt(int minRange, int maxRange)
MatrixFloat getDataAsMatrixFloat() const
virtual bool resize(const unsigned int r, const unsigned int c)
virtual bool deepCopyFrom(const Clusterer *clusterer)
virtual ~GaussianMixtureModels()
MatrixFloat resp
The responsibility matrix.
UINT numTrainingSamples
The number of samples in the training data.
bool setNumRestarts(const UINT numRestarts)
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)