13 this->minNumEpochs = minNumEpochs;
14 this->maxNumEpochs = maxNumEpochs;
15 this->minChange = minChange;
18 numTrainingIterationsToConverge = 0;
21 classType =
"GaussianMixtureModels";
22 clustererType = classType;
23 debugLog.setProceedingText(
"[DEBUG GaussianMixtureModels]");
24 errorLog.setProceedingText(
"[ERROR GaussianMixtureModels]");
25 trainingLog.setProceedingText(
"[TRAINING GaussianMixtureModels]");
26 warningLog.setProceedingText(
"[WARNING GaussianMixtureModels]");
31 classType =
"GaussianMixtureModels";
32 clustererType = classType;
33 debugLog.setProceedingText(
"[DEBUG GaussianMixtureModels]");
34 errorLog.setProceedingText(
"[ERROR GaussianMixtureModels]");
35 trainingLog.setProceedingText(
"[TRAINING GaussianMixtureModels]");
36 warningLog.setProceedingText(
"[WARNING GaussianMixtureModels]");
47 this->sigma = rhs.sigma;
48 this->invSigma = rhs.invSigma;
70 this->sigma = rhs.sigma;
71 this->invSigma = rhs.invSigma;
82 if( clusterer == NULL )
return false;
95 this->sigma = ptr->sigma;
96 this->invSigma = ptr->invSigma;
138 numTrainingIterationsToConverge = 0;
141 errorLog <<
"train_(MatrixFloat &data) - Training Failed! Training data is empty!" << std::endl;
156 sigma[k].
resize(numInputDimensions,numInputDimensions);
167 for(UINT j=0; j<numInputDimensions; j++){
168 data[i][j] =
scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,0,1);
178 SWAP(randomIndexs[ i ],randomIndexs[ random.
getRandomNumberInt(0,numTrainingSamples) ]);
181 for(UINT n=0; n<numInputDimensions; n++){
182 mu[k][n] = data[ randomIndexs[k] ][n];
188 frac[k] = 1.0/Float(numClusters);
189 for(UINT i=0; i<numInputDimensions; i++){
190 for(UINT j=0; j<numInputDimensions; j++) sigma[k][i][j] = 0;
191 sigma[k][i][i] = 1.0e-2;
196 bool keepGoing =
true;
197 Float change = 99.9e99;
198 UINT numIterationsNoChange = 0;
205 if( estep( data, u, v, change ) ){
211 if( fabs( change ) < minChange ){
212 if( ++numIterationsNoChange >= minNumEpochs ){
215 }
else numIterationsNoChange = 0;
216 if( ++numTrainingIterationsToConverge >= maxNumEpochs ) keepGoing =
false;
219 errorLog <<
"train_(MatrixFloat &data) - Estep failed at iteration " << numTrainingIterationsToConverge << std::endl;
225 if( !computeInvAndDet() ){
228 errorLog <<
"train_(MatrixFloat &data) - Failed to compute inverse and determinat!" << std::endl;
236 clusterLabels.
resize(numClusters);
238 clusterLabels[i] = i+1;
240 clusterLikelihoods.
resize(numClusters,0);
241 clusterDistances.
resize(numClusters,0);
262 if( x.
getSize() != numInputDimensions ){
267 for(UINT n=0; n<numInputDimensions; n++){
268 x[n] = grt_scale(x[n], ranges[n].minValue, ranges[n].maxValue, 0.0, 1.0);
285 dist = gauss(x,i,det,
mu,invSigma);
287 clusterDistances[i] = dist;
288 clusterLikelihoods[i] = dist;
290 sum += clusterLikelihoods[i];
292 if( dist > bestDistance ){
300 clusterLikelihoods[i] /= sum;
304 maxLikelihood = clusterLikelihoods[ minIndex ];
311 if( !file.is_open() ){
312 errorLog <<
"saveModelToFile(string filename) - Failed to open file!" << std::endl;
316 file <<
"GRT_GAUSSIAN_MIXTURE_MODELS_FILE_V1.0\n";
319 errorLog <<
"saveModelToFile(fstream &file) - Failed to save cluster settings to file!" << std::endl;
326 for(UINT n=0; n<numInputDimensions; n++){
327 file <<
mu[k][n] <<
"\t";
334 for(UINT i=0; i<numInputDimensions; i++){
335 for(UINT j=0; j<numInputDimensions; j++){
336 file << sigma[k][i][j] <<
"\t";
342 file <<
"InvSigma:\n";
344 for(UINT i=0; i<numInputDimensions; i++){
345 for(UINT j=0; j<numInputDimensions; j++){
346 file << invSigma[k][i][j] <<
"\t";
354 file << det[k] << std::endl;
369 if( word !=
"GRT_GAUSSIAN_MIXTURE_MODELS_FILE_V1.0" ){
374 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load cluster settings from file!" << std::endl;
391 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load Mu!" << std::endl;
395 for(UINT n=0; n<numInputDimensions; n++){
402 if( word !=
"Sigma:" ){
404 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load Sigma!" << std::endl;
408 sigma[k].
resize(numInputDimensions, numInputDimensions);
409 for(UINT i=0; i<numInputDimensions; i++){
410 for(UINT j=0; j<numInputDimensions; j++){
411 file >> sigma[k][i][j];
418 if( word !=
"InvSigma:" ){
420 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load InvSigma!" << std::endl;
424 invSigma[k].
resize(numInputDimensions, numInputDimensions);
425 for(UINT i=0; i<numInputDimensions; i++){
426 for(UINT j=0; j<numInputDimensions; j++){
427 file >> invSigma[k][i][j];
434 if( word !=
"Det:" ){
436 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load Det!" << std::endl;
444 clusterLabels.
resize(numClusters);
446 clusterLabels[i] = i+1;
448 clusterLikelihoods.
resize(numClusters,0);
449 clusterDistances.
resize(numClusters,0);
458 Float tmp,sum,max,oldloglike;
459 for(UINT j=0; j<numInputDimensions; j++) u[j] = v[j] = 0;
465 if( !cholesky.getSuccess() ){
return false; }
466 lndets[k] = cholesky.logdet();
469 for(UINT j=0; j<numInputDimensions; j++) u[j] = data[i][j] -
mu[k][j];
470 if( !cholesky.elsolve(u,v) ){
return false; }
472 for(UINT j=0; j<numInputDimensions; j++) sum += SQR(v[j]);
482 for(UINT k=0; k<numClusters; k++) if( resp[i][k] > max ) max =
resp[i][k];
484 tmp = max + log( sum );
489 change = (
loglike - oldloglike);
494 bool GaussianMixtureModels::mstep(
const MatrixFloat &data ){
500 frac[k] = wgt/Float(numTrainingSamples);
501 for(UINT n=0; n<numInputDimensions; n++){
505 for(UINT j=0; j<numInputDimensions; j++){
508 sum +=
resp[m][k] * (data[m][n]-
mu[k][n]) * (data[m][j]-
mu[k][j]);
510 sigma[k][n][j] = sum/wgt;
519 inline void GaussianMixtureModels::SWAP(UINT &a,UINT &b){
525 bool GaussianMixtureModels::computeInvAndDet(){
528 invSigma.
resize(numClusters);
532 if( !lu.inverse( invSigma[k] ) ){
533 errorLog <<
"computeInvAndDet() - Matrix inversion failed for cluster " << k+1 << std::endl;
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
virtual bool train_(MatrixFloat &trainingData)
virtual bool predict_(VectorDouble &inputVector)
VectorDouble lndets
A vector holding the log detminants of SIGMA'k.
Float loglike
The current loglikelihood value of the models given the data.
std::string getClustererType() const
MatrixFloat getDataAsMatrixFloat() const
virtual bool resize(const unsigned int size)
This class implements a Gaussian Miture Model clustering algorithm. The code is based on the GMM code...
bool copyBaseVariables(const Clusterer *clusterer)
virtual bool loadModelFromFile(std::fstream &file)
bool loadClustererSettingsFromFile(std::fstream &file)
unsigned int getSize() const
UINT predictedClusterLabel
Stores the predicted cluster label from the most recent predict( )
virtual bool saveModelToFile(std::fstream &file) const
bool saveClustererSettingsToFile(std::fstream &file) const
MatrixFloat mu
A matrix holding the estimated mean values of each Gaussian.
UINT numClusters
Number of clusters in the model.
unsigned int getNumRows() const
unsigned int getNumCols() const
GaussianMixtureModels & operator=(const GaussianMixtureModels &rhs)
VectorDouble frac
A vector holding the P(k)'s.
Vector< MinMax > getRanges() const
int getRandomNumberInt(int minRange, int maxRange)
MatrixFloat getDataAsMatrixFloat() const
virtual bool resize(const unsigned int r, const unsigned int c)
virtual bool deepCopyFrom(const Clusterer *clusterer)
GaussianMixtureModels(const UINT numClusters=10, const UINT minNumEpochs=5, const UINT maxNumEpochs=1000, const Float minChange=1.0e-5)
virtual ~GaussianMixtureModels()
MatrixFloat resp
The responsibility matrix.
UINT numTrainingSamples
The number of samples in the training data.