21 #define GRT_DLL_EXPORTS 27 DiscreteHiddenMarkovModel::DiscreteHiddenMarkovModel() :
MLBase(
"DiscreteHiddenMarkovModel" )
32 numRandomTrainingIterations = 5;
35 modelType = HMM_LEFTRIGHT;
41 DiscreteHiddenMarkovModel::DiscreteHiddenMarkovModel(
const UINT numStates,
const UINT numSymbols,
const UINT modelType,
const UINT delta) :
MLBase(
"DiscreteHiddenMarkovModel" )
43 this->numStates = numStates;
44 this->numSymbols = numSymbols;
45 this->modelType = modelType;
48 numRandomTrainingIterations = 5;
52 randomizeMatrices(numStates,numSymbols);
56 DiscreteHiddenMarkovModel::DiscreteHiddenMarkovModel(
const MatrixFloat &a,
const MatrixFloat &b,
const VectorFloat &pi,
const UINT modelType,
const UINT delta) :
MLBase(
"DiscreteHiddenMarkovModel" )
61 numRandomTrainingIterations = 5;
71 this->modelType = modelType;
77 errorLog <<
"DiscreteHiddenMarkovModel(...) - The a,b,pi sizes are invalid!" << std::endl;
83 this->numStates = rhs.numStates;
84 this->numSymbols = rhs.numSymbols;
85 this->delta = rhs.delta;
86 this->numRandomTrainingIterations = rhs.numRandomTrainingIterations;
87 this->cThreshold = rhs.cThreshold;
88 this->modelType = rhs.modelType;
89 this->logLikelihood = rhs.logLikelihood;
93 this->trainingLog = rhs.trainingLog;
97 DiscreteHiddenMarkovModel::~DiscreteHiddenMarkovModel(){
102 bool DiscreteHiddenMarkovModel::resetModel(
const UINT numStates,
const UINT numSymbols,
const UINT modelType,
const UINT delta){
103 this->numStates = numStates;
104 this->numSymbols = numSymbols;
105 this->modelType = modelType;
107 return randomizeMatrices(numStates,numSymbols);
110 bool DiscreteHiddenMarkovModel::randomizeMatrices(
const UINT numStates,
const UINT numSymbols){
117 this->numStates = numStates;
118 this->numSymbols = numSymbols;
119 a.
resize(numStates,numStates);
120 b.
resize(numStates,numSymbols);
136 for(UINT i=0; i<numStates; i++)
146 for(UINT i=0; i<numStates; i++)
147 for(UINT j=0; j<numStates; j++)
148 if((j<i) || (j>i+delta)) a[i][j] = 0.0;
151 for(UINT i=0; i<numStates; i++){
152 pi[i] = i==0 ? 1 : 0;
156 throw(
"HMM_ERROR: Unkown model type!");
163 for (UINT i=0; i<numStates; i++) {
165 for (UINT j=0; j<numStates; j++) sum += a[i][j];
166 for (UINT j=0; j<numStates; j++) a[i][j] /= sum;
168 for (UINT i=0; i<numStates; i++) {
170 for (UINT k=0; k<numSymbols; k++) sum += b[i][k];
171 for (UINT k=0; k<numSymbols; k++) b[i][k] /= sum;
176 for (UINT i=0; i<numStates; i++) sum += pi[i];
177 for (UINT i=0; i<numStates; i++) pi[i] /= sum;
182 Float DiscreteHiddenMarkovModel::predict(
const UINT newSample){
188 observationSequence.push_back( newSample );
198 Float DiscreteHiddenMarkovModel::predict(
const Vector<UINT> &obs){
200 const int N = (int)numStates;
201 const int T = (int)obs.size();
211 alpha[t][i] = pi[i]*b[i][ obs[t] ];
219 for(i=0; i<N; i++) alpha[t][i] *= c[t];
227 alpha[t][j] += alpha[t-1][i] * a[i][j];
229 alpha[t][j] *= b[j][obs[t]];
237 for(j=0; j<N; j++) alpha[t][j] *= c[t];
240 if(
int(estimatedStates.size()) != T ) estimatedStates.
resize(T);
244 if( alpha[t][i] > maxValue ){
245 maxValue = alpha[t][i];
246 estimatedStates[t] = i;
252 Float loglikelihood = 0.0;
253 for(t=0; t<T; t++) loglikelihood += log( c[t] );
254 return -loglikelihood;
260 Float DiscreteHiddenMarkovModel::predictLogLikelihood(
const Vector<UINT> &obs){
262 const UINT T = (
unsigned int)obs.size();
263 UINT t,i,j,minState = 0;
270 for(i=0; i<numStates; i++){
271 alpha[t][i] = (-1.0 * log(pi[i])) - log(b[i][ obs[t] ]);
276 for (j=0; j<numStates; j++){
278 minWeight = alpha[t-1][j] - log(a[0][j]);
280 for (i=1; i<numStates; i++){
281 weight = alpha[t-1][i] - log(a[i][j]);
283 if (weight < minWeight)
290 alpha[t][j] = minWeight - log(b[j][ obs[t] ]);
296 minWeight = alpha[T - 1][0];
298 for(i=1; i<numStates; i++)
300 if (alpha[T-1][i] < minWeight)
303 minWeight = alpha[T-1][i];
308 return exp(-minWeight);
316 const int N = (int)numStates;
317 const int T = (int)obs.size();
325 hmm.alpha[t][i] = pi[i]*b[i][ obs[t] ];
326 hmm.c[t] += hmm.alpha[t][i];
330 hmm.c[t] = 1.0/hmm.c[t];
333 for(i=0; i<N; i++) hmm.alpha[t][i] *= hmm.c[t];
339 hmm.alpha[t][j] = 0.0;
341 hmm.alpha[t][j] += hmm.alpha[t-1][i] * a[i][j];
343 hmm.alpha[t][j] *= b[j][obs[t]];
344 hmm.c[t] += hmm.alpha[t][j];
348 hmm.c[t] = 1.0/hmm.c[t];
351 for(j=0; j<N; j++) hmm.alpha[t][j] *= hmm.c[t];
356 for(t=0; t<T; t++) hmm.pk += log( hmm.c[t] );
359 if( grt_isinf(hmm.pk) ){
366 for(i=0; i<N; i++) hmm.beta[t][i] = 1.0;
369 for(i=0; i<N; i++) hmm.beta[t][i] *= hmm.c[t];
372 for(t=T-2; t>=0; t--){
377 hmm.beta[t][i] += a[i][j] * b[j][ obs[t+1] ] * hmm.beta[t+1][j];
380 hmm.beta[t][i] *= hmm.c[t];
394 observationSequence.clear();
395 estimatedStates.clear();
396 trainingIterationLog.clear();
398 UINT n,currentIter, bestIndex = 0;
399 Float newLoglikelihood, bestLogValue = 0;
401 if( numRandomTrainingIterations > 1 ){
408 UINT maxNumTestIter = maxNumEpochs > 10 ? 10 : maxNumEpochs;
411 for(n=0; n<numRandomTrainingIterations; n++){
413 randomizeMatrices(numStates,numSymbols);
415 if( !train_(trainingData,maxNumTestIter,currentIter,newLoglikelihood) ){
420 loglikelihoodTracker[n] = newLoglikelihood;
425 bestLogValue = loglikelihoodTracker[0];
426 for(n=1; n<numRandomTrainingIterations; n++){
427 if(bestLogValue < loglikelihoodTracker[n]){
428 bestLogValue = loglikelihoodTracker[n];
434 a = aTracker[bestIndex];
435 b = bTracker[bestIndex];
438 randomizeMatrices(numStates,numSymbols);
442 if( !train_(trainingData,maxNumEpochs,currentIter,newLoglikelihood) ){
447 const UINT numObs = (
unsigned int)trainingData.size();
449 UINT averageObsLength = 0;
450 for(k=0; k<numObs; k++){
451 const UINT T = (
unsigned int)trainingData[k].size();
452 averageObsLength += T;
455 averageObsLength = (UINT)floor( averageObsLength/Float(numObs) );
456 observationSequence.resize( averageObsLength );
457 estimatedStates.resize( averageObsLength );
465 bool DiscreteHiddenMarkovModel::train_(
const Vector<
Vector<UINT> > &obs,
const UINT maxIter, UINT ¤tIter,Float &newLoglikelihood){
467 const UINT numObs = (
unsigned int)obs.size();
469 Float num,denom,oldLoglikelihood = 0;
470 bool keepTraining =
true;
471 trainingIterationLog.clear();
481 for(k=0; k<numObs; k++){
482 const UINT T = (UINT)obs[k].size();
483 gamma[k].
resize(T,numStates);
484 epsilon[k].resize(T);
485 for(t=0; t<T; t++) epsilon[k][t].resize(numStates,numStates);
488 hmms[k].alpha.resize(T,numStates);
489 hmms[k].beta.resize(T,numStates);
495 oldLoglikelihood = 0;
496 newLoglikelihood = 0;
500 newLoglikelihood = 0.0;
503 for(k=0; k<numObs; k++){
504 if( !forwardBackward(hmms[k],obs[k]) ){
507 newLoglikelihood += hmms[k].pk;
511 newLoglikelihood /= numObs;
513 trainingIterationLog.push_back( newLoglikelihood );
515 if( ++currentIter >= maxIter ){ keepTraining =
false; trainingLog <<
"Max Iter Reached! Stopping Training" << std::endl; }
516 if( fabs(newLoglikelihood-oldLoglikelihood) < minChange && currentIter > 1 ){ keepTraining =
false; trainingLog <<
"Min Improvement Reached! Stopping Training" << std::endl; }
519 trainingLog <<
"Iter: " << currentIter <<
" logLikelihood: " << newLoglikelihood <<
" change: " << oldLoglikelihood - newLoglikelihood << std::endl;
524 oldLoglikelihood = newLoglikelihood;
530 for(i=0; i<numStates; i++){
534 for(k=0; k<numObs; k++){
535 for(t=0; t<obs[k].size()-1; t++){
536 denom += hmms[k].alpha[t][i] * hmms[k].beta[t][i] / hmms[k].c[t];
542 for(j=0; j<numStates; j++){
544 for(k=0; k<numObs; k++){
545 for(t=0; t<obs[k].size()-1; t++){
546 num += hmms[k].alpha[t][i] * a[i][j] * b[j][ obs[k][t+1] ] * hmms[k].beta[t+1][j];
554 errorLog <<
"Denom is zero for A!" << std::endl;
560 bool renormB =
false;
561 for(i=0; i<numStates; i++){
562 for(j=0; j<numSymbols; j++){
565 for(k=0; k<numObs; k++){
566 const UINT T = (
unsigned int)obs[k].size();
568 if( obs[k][t] == j ){
569 num += hmms[k].alpha[t][i] * hmms[k].beta[t][i] / hmms[k].c[t];
571 denom += hmms[k].alpha[t][i] * hmms[k].beta[t][i] / hmms[k].c[t];
576 errorLog <<
"Denominator is zero for B!" << std::endl;
582 if( num > 0 ) b[i][j] = denom > 0 ? num/denom : 1.0e-5;
583 else{ b[i][j] = 0; renormB =
true; }
589 for (UINT i=0; i<numStates; i++) {
591 for (UINT k=0; k<numSymbols; k++){
592 b[i][k] += 1.0/numSymbols;
595 for (UINT k=0; k<numSymbols; k++) b[i][k] /= sum;
600 if (modelType==HMM_ERGODIC ){
601 for(k=0; k<numObs; k++){
602 const UINT T = (
unsigned int)obs[k].size();
604 for(t=0; t<T-1; t++){
606 for(i=0; i<numStates; i++){
607 for(j=0; j<numStates; j++){
608 epsilon[k][t][i][j] = hmms[k].alpha[t][i] * a[i][j] * b[j][ obs[k][t+1] ] * hmms[k].beta[t+1][j];
609 denom += epsilon[k][t][i][j];
613 for(i=0; i<numStates; i++){
614 for(j=0; j<numStates; j++){
615 if(denom!=0) epsilon[k][t][i][j] /= denom;
616 else{ epsilon[k][t][i][j] = 0; }
622 for(t=0; t<T-1; t++){
623 for(i=0; i<numStates; i++){
625 for(j=0; j<numStates; j++)
626 gamma[k][t][i] += epsilon[k][t][i][j];
632 for(i=0; i<numStates; i++){
634 for(k=0; k<numObs; k++){
635 sum += gamma[k][0][i];
637 pi[i] = sum / numObs;
642 }
while(keepTraining);
650 for(UINT i=0; i<observationSequence.getSize(); i++){
651 observationSequence.push_back( 0 );
661 errorLog <<
"save( fstream &file ) - File is not open!" << std::endl;
666 file <<
"DISCRETE_HMM_MODEL_FILE_V1.0\n";
670 errorLog <<
"save(fstream &file) - Failed to save classifier base settings to file!" << std::endl;
674 file <<
"NumStates: " << numStates << std::endl;
675 file <<
"NumSymbols: " << numSymbols << std::endl;
676 file <<
"ModelType: " << modelType << std::endl;
677 file <<
"Delta: " << delta << std::endl;
678 file <<
"Threshold: " << cThreshold << std::endl;
679 file <<
"NumRandomTrainingIterations: " << numRandomTrainingIterations << std::endl;
682 for(UINT i=0; i<numStates; i++){
683 for(UINT j=0; j<numStates; j++){
685 if( j+1 < numStates ) file <<
"\t";
690 for(UINT i=0; i<numStates; i++){
691 for(UINT j=0; j<numSymbols; j++){
693 if( j+1 < numSymbols ) file <<
"\t";
698 for(UINT i=0; i<numStates; i++){
700 if( i+1 < numStates ) file <<
"\t";
713 errorLog <<
"load( fstream &file ) - File is not open!" << std::endl;
722 if(word !=
"DISCRETE_HMM_MODEL_FILE_V1.0"){
723 errorLog <<
"load( fstream &file ) - Could not find Model File Header!" << std::endl;
729 errorLog <<
"load(string filename) - Failed to load base settings from file!" << std::endl;
734 if(word !=
"NumStates:"){
735 errorLog <<
"load( fstream &file ) - Could not find the NumStates header." << std::endl;
741 if(word !=
"NumSymbols:"){
742 errorLog <<
"load( fstream &file ) - Could not find the NumSymbols header." << std::endl;
748 if(word !=
"ModelType:"){
749 errorLog <<
"load( fstream &file ) - Could not find the modelType for the header." << std::endl;
755 if(word !=
"Delta:"){
756 errorLog <<
"load( fstream &file ) - Could not find the Delta for the header." << std::endl;
762 if(word !=
"Threshold:"){
763 errorLog <<
"load( fstream &file ) - Could not find the Threshold for the header." << std::endl;
769 if(word !=
"NumRandomTrainingIterations:"){
770 errorLog <<
"load( fstream &file ) - Could not find the numRandomTrainingIterations header." << std::endl;
773 file >> numRandomTrainingIterations;
775 a.
resize(numStates,numStates);
776 b.
resize(numStates,numSymbols);
782 errorLog <<
"load( fstream &file ) - Could not find the A matrix header." << std::endl;
787 for(UINT i=0; i<numStates; i++){
788 for(UINT j=0; j<numStates; j++){
795 errorLog <<
"load( fstream &file ) - Could not find the B matrix header." << std::endl;
800 for(UINT i=0; i<numStates; i++){
801 for(UINT j=0; j<numSymbols; j++){
808 errorLog <<
"load( fstream &file ) - Could not find the Pi matrix header." << std::endl;
813 for(UINT i=0; i<numStates; i++){
822 trainingLog <<
"A: " << std::endl;
825 trainingLog << a[i][j] <<
"\t";
827 trainingLog << std::endl;
830 trainingLog <<
"B: " << std::endl;
833 trainingLog << b[i][j] <<
"\t";
835 trainingLog << std::endl;
838 trainingLog <<
"Pi: ";
839 for(UINT i=0; i<pi.size(); i++){
840 trainingLog << pi[i] <<
"\t";
842 trainingLog << std::endl;
849 for(UINT j=0; j<a.
getNumCols(); j++) sum += a[i][j];
850 if( sum <= 0.99 || sum >= 1.01 ) warningLog <<
"WARNING: A Row " << i <<
" Sum: "<< sum << std::endl;
855 for(UINT j=0; j<b.
getNumCols(); j++) sum += b[i][j];
856 if( sum <= 0.99 || sum >= 1.01 ) warningLog <<
"WARNING: B Row " << i <<
" Sum: " << sum << std::endl;
863 VectorFloat DiscreteHiddenMarkovModel::getTrainingIterationLog()
const{
864 return trainingIterationLog;
bool saveBaseSettingsToFile(std::fstream &file) const
virtual bool save(std::fstream &file) const
This file contains the Random class, a useful wrapper for generating cross platform random functions...
virtual bool resize(const unsigned int size)
bool clear()
clears all elements in the dictionary
unsigned int getNumRows() const
unsigned int getNumCols() const
bool loadBaseSettingsFromFile(std::fstream &file)
This class implements a discrete Hidden Markov Model.
virtual bool print() const
Float getRandomNumberUniform(Float minRange=0.0, Float maxRange=1.0)
virtual bool resize(const unsigned int r, const unsigned int c)
virtual bool load(std::fstream &file)
This is the main base class that all GRT machine learning algorithms should inherit from...