2 #define GRT_DLL_EXPORTS
14 this->minNumEpochs = minNumEpochs;
15 this->maxNumEpochs = maxNumEpochs;
16 this->minChange = minChange;
19 numTrainingIterationsToConverge = 0;
22 classType =
"GaussianMixtureModels";
23 clustererType = classType;
24 debugLog.setProceedingText(
"[DEBUG GaussianMixtureModels]");
25 errorLog.setProceedingText(
"[ERROR GaussianMixtureModels]");
26 trainingLog.setProceedingText(
"[TRAINING GaussianMixtureModels]");
27 warningLog.setProceedingText(
"[WARNING GaussianMixtureModels]");
32 classType =
"GaussianMixtureModels";
33 clustererType = classType;
34 debugLog.setProceedingText(
"[DEBUG GaussianMixtureModels]");
35 errorLog.setProceedingText(
"[ERROR GaussianMixtureModels]");
36 trainingLog.setProceedingText(
"[TRAINING GaussianMixtureModels]");
37 warningLog.setProceedingText(
"[WARNING GaussianMixtureModels]");
48 this->sigma = rhs.sigma;
49 this->invSigma = rhs.invSigma;
71 this->sigma = rhs.sigma;
72 this->invSigma = rhs.invSigma;
83 if( clusterer == NULL )
return false;
96 this->sigma = ptr->sigma;
97 this->invSigma = ptr->invSigma;
139 numTrainingIterationsToConverge = 0;
142 errorLog <<
"train_(MatrixFloat &data) - Training Failed! Training data is empty!" << std::endl;
157 sigma[k].
resize(numInputDimensions,numInputDimensions);
168 for(UINT j=0; j<numInputDimensions; j++){
169 data[i][j] =
scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,0,1);
179 SWAP(randomIndexs[ i ],randomIndexs[ random.
getRandomNumberInt(0,numTrainingSamples) ]);
182 for(UINT n=0; n<numInputDimensions; n++){
183 mu[k][n] = data[ randomIndexs[k] ][n];
189 frac[k] = 1.0/Float(numClusters);
190 for(UINT i=0; i<numInputDimensions; i++){
191 for(UINT j=0; j<numInputDimensions; j++) sigma[k][i][j] = 0;
192 sigma[k][i][i] = 1.0e-2;
197 bool keepGoing =
true;
198 Float change = 99.9e99;
199 UINT numIterationsNoChange = 0;
206 if( estep( data, u, v, change ) ){
212 if( fabs( change ) < minChange ){
213 if( ++numIterationsNoChange >= minNumEpochs ){
216 }
else numIterationsNoChange = 0;
217 if( ++numTrainingIterationsToConverge >= maxNumEpochs ) keepGoing =
false;
220 errorLog <<
"train_(MatrixFloat &data) - Estep failed at iteration " << numTrainingIterationsToConverge << std::endl;
226 if( !computeInvAndDet() ){
229 errorLog <<
"train_(MatrixFloat &data) - Failed to compute inverse and determinat!" << std::endl;
237 clusterLabels.
resize(numClusters);
239 clusterLabels[i] = i+1;
241 clusterLikelihoods.
resize(numClusters,0);
242 clusterDistances.
resize(numClusters,0);
263 if( x.
getSize() != numInputDimensions ){
268 for(UINT n=0; n<numInputDimensions; n++){
269 x[n] = grt_scale(x[n], ranges[n].minValue, ranges[n].maxValue, 0.0, 1.0);
286 dist = gauss(x,i,det,
mu,invSigma);
288 clusterDistances[i] = dist;
289 clusterLikelihoods[i] = dist;
291 sum += clusterLikelihoods[i];
293 if( dist > bestDistance ){
301 clusterLikelihoods[i] /= sum;
305 maxLikelihood = clusterLikelihoods[ minIndex ];
312 if( !file.is_open() ){
313 errorLog <<
"saveModelToFile(string filename) - Failed to open file!" << std::endl;
317 file <<
"GRT_GAUSSIAN_MIXTURE_MODELS_FILE_V1.0\n";
320 errorLog <<
"saveModelToFile(fstream &file) - Failed to save cluster settings to file!" << std::endl;
327 for(UINT n=0; n<numInputDimensions; n++){
328 file <<
mu[k][n] <<
"\t";
335 for(UINT i=0; i<numInputDimensions; i++){
336 for(UINT j=0; j<numInputDimensions; j++){
337 file << sigma[k][i][j] <<
"\t";
343 file <<
"InvSigma:\n";
345 for(UINT i=0; i<numInputDimensions; i++){
346 for(UINT j=0; j<numInputDimensions; j++){
347 file << invSigma[k][i][j] <<
"\t";
355 file << det[k] << std::endl;
370 if( word !=
"GRT_GAUSSIAN_MIXTURE_MODELS_FILE_V1.0" ){
375 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load cluster settings from file!" << std::endl;
392 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load Mu!" << std::endl;
396 for(UINT n=0; n<numInputDimensions; n++){
403 if( word !=
"Sigma:" ){
405 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load Sigma!" << std::endl;
409 sigma[k].
resize(numInputDimensions, numInputDimensions);
410 for(UINT i=0; i<numInputDimensions; i++){
411 for(UINT j=0; j<numInputDimensions; j++){
412 file >> sigma[k][i][j];
419 if( word !=
"InvSigma:" ){
421 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load InvSigma!" << std::endl;
425 invSigma[k].
resize(numInputDimensions, numInputDimensions);
426 for(UINT i=0; i<numInputDimensions; i++){
427 for(UINT j=0; j<numInputDimensions; j++){
428 file >> invSigma[k][i][j];
435 if( word !=
"Det:" ){
437 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load Det!" << std::endl;
445 clusterLabels.
resize(numClusters);
447 clusterLabels[i] = i+1;
449 clusterLikelihoods.
resize(numClusters,0);
450 clusterDistances.
resize(numClusters,0);
459 Float tmp,sum,max,oldloglike;
460 for(UINT j=0; j<numInputDimensions; j++) u[j] = v[j] = 0;
466 if( !cholesky.getSuccess() ){
return false; }
467 lndets[k] = cholesky.logdet();
470 for(UINT j=0; j<numInputDimensions; j++) u[j] = data[i][j] -
mu[k][j];
471 if( !cholesky.elsolve(u,v) ){
return false; }
473 for(UINT j=0; j<numInputDimensions; j++) sum += SQR(v[j]);
483 for(UINT k=0; k<numClusters; k++) if( resp[i][k] > max ) max =
resp[i][k];
485 tmp = max + log( sum );
490 change = (
loglike - oldloglike);
495 bool GaussianMixtureModels::mstep(
const MatrixFloat &data ){
501 frac[k] = wgt/Float(numTrainingSamples);
502 for(UINT n=0; n<numInputDimensions; n++){
506 for(UINT j=0; j<numInputDimensions; j++){
509 sum +=
resp[m][k] * (data[m][n]-
mu[k][n]) * (data[m][j]-
mu[k][j]);
511 sigma[k][n][j] = sum/wgt;
520 inline void GaussianMixtureModels::SWAP(UINT &a,UINT &b){
526 bool GaussianMixtureModels::computeInvAndDet(){
529 invSigma.
resize(numClusters);
533 if( !lu.inverse( invSigma[k] ) ){
534 errorLog <<
"computeInvAndDet() - Matrix inversion failed for cluster " << k+1 << std::endl;
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
virtual bool train_(MatrixFloat &trainingData)
virtual bool predict_(VectorDouble &inputVector)
VectorDouble lndets
A vector holding the log detminants of SIGMA'k.
Float loglike
The current loglikelihood value of the models given the data.
std::string getClustererType() const
MatrixFloat getDataAsMatrixFloat() const
virtual bool resize(const unsigned int size)
This class implements a Gaussian Miture Model clustering algorithm. The code is based on the GMM code...
bool copyBaseVariables(const Clusterer *clusterer)
virtual bool loadModelFromFile(std::fstream &file)
bool loadClustererSettingsFromFile(std::fstream &file)
UINT predictedClusterLabel
Stores the predicted cluster label from the most recent predict( )
virtual bool saveModelToFile(std::fstream &file) const
bool saveClustererSettingsToFile(std::fstream &file) const
MatrixFloat mu
A matrix holding the estimated mean values of each Gaussian.
UINT numClusters
Number of clusters in the model.
unsigned int getNumRows() const
unsigned int getNumCols() const
GaussianMixtureModels & operator=(const GaussianMixtureModels &rhs)
VectorDouble frac
A vector holding the P(k)'s.
Vector< MinMax > getRanges() const
int getRandomNumberInt(int minRange, int maxRange)
MatrixFloat getDataAsMatrixFloat() const
virtual bool resize(const unsigned int r, const unsigned int c)
virtual bool deepCopyFrom(const Clusterer *clusterer)
GaussianMixtureModels(const UINT numClusters=10, const UINT minNumEpochs=5, const UINT maxNumEpochs=1000, const Float minChange=1.0e-5)
virtual ~GaussianMixtureModels()
MatrixFloat resp
The responsibility matrix.
UINT numTrainingSamples
The number of samples in the training data.