21 #define GRT_DLL_EXPORTS 28 ContinuousHiddenMarkovModel::ContinuousHiddenMarkovModel(
const UINT downsampleFactor,
const UINT delta,
const bool autoEstimateSigma,
const Float sigma) :
MLBase(
"ContinuousHiddenMarkovModel" )
32 this->downsampleFactor = downsampleFactor;
34 this->autoEstimateSigma = autoEstimateSigma;
36 modelType = HMMModelTypes::HMM_LEFTRIGHT;
43 this->downsampleFactor = rhs.downsampleFactor;
47 this->sigma = rhs.sigma;
48 this->autoEstimateSigma = rhs.autoEstimateSigma;
53 this->alpha = rhs.alpha;
56 this->obsSequence = rhs.obsSequence;
59 this->delta = rhs.
delta;
65 ContinuousHiddenMarkovModel::~ContinuousHiddenMarkovModel(){
72 this->downsampleFactor = rhs.downsampleFactor;
76 this->sigma = rhs.sigma;
77 this->autoEstimateSigma = rhs.autoEstimateSigma;
82 this->alpha = rhs.alpha;
85 this->obsSequence = rhs.obsSequence;
88 this->delta = rhs.
delta;
92 const MLBase *basePointer = &rhs;
102 errorLog <<
"predict_(VectorFloat &x) - The model is not trained!" << std::endl;
106 if( x.
getSize() != numInputDimensions ){
107 errorLog <<
"predict_(VectorFloat &x) - The input vector size (" << x.
getSize() <<
") does not match the number of input dimensions (" << numInputDimensions <<
")" << std::endl;
112 observationSequence.push_back( x );
115 for(
unsigned int i=0; i<observationSequence.getSize(); i++){
116 for(
unsigned int j=0; j<numInputDimensions; j++){
117 obsSequence[i][j] = observationSequence[i][j];
127 errorLog <<
"predict_( MatrixFloat ×eries ) - The model is not trained!" << std::endl;
131 if( timeseries.
getNumCols() != numInputDimensions ){
132 errorLog <<
"predict_( MatrixFloat ×eries ) - The matrix column size (" << timeseries.
getNumCols() <<
") does not match the number of input dimensions (" << numInputDimensions <<
")" << std::endl;
136 unsigned int t,i,j,k,index = 0;
141 const unsigned int timeseriesLength = (
unsigned int)timeseries.
getNumRows();
142 const unsigned int T = downsampleFactor < timeseriesLength ? (
unsigned int)floor( timeseriesLength / Float(downsampleFactor) ) : timeseriesLength;
143 const unsigned int K = downsampleFactor < timeseriesLength ? downsampleFactor : 1;
145 for(j=0; j<numInputDimensions; j++){
151 if( index < timeseriesLength ){
152 obs[i][j] += timeseries[index++][j];
162 if( alpha.getNumRows() != T || alpha.getNumCols() != numStates ) alpha.
resize(T,numStates);
163 if( (
unsigned int)c.size() != T ) c.resize(T);
164 if( (
unsigned int)estimatedStates.size() != T ) estimatedStates.resize(T);
171 for(i=0; i<numStates; i++){
172 alpha[t][i] = pi[i]*gauss(b,obs,sigmaStates,i,t,numInputDimensions);
176 if( alpha[t][i] > maxAlpha ){
177 maxAlpha = alpha[t][i];
178 estimatedStates[t] = i;
186 for(i=0; i<numStates; i++) alpha[t][i] *= c[t];
192 for(j=0; j<numStates; j++){
194 for(i=0; i<numStates; i++){
195 alpha[t][j] += alpha[t-1][i] * a[i][j];
197 alpha[t][j] *= gauss(b,obs,sigmaStates,j,t,numInputDimensions);
201 if( alpha[t][j] > maxAlpha ){
202 maxAlpha = alpha[t][j];
203 estimatedStates[t] = j;
211 for(j=0; j<numStates; j++) alpha[t][j] *= c[t];
216 for(t=0; t<T; t++) loglikelihood += log( c[t] );
217 loglikelihood = -loglikelihood;
220 phase = (estimatedStates[T-1]+1.0)/Float(numStates);
231 timeseriesLength = trainingData.getLength();
232 numStates = (
unsigned int)floor((
double)(timeseriesLength/downsampleFactor));
233 numInputDimensions = trainingData.getNumDimensions();
234 classLabel = trainingData.getClassLabel();
237 a.resize(numStates, numStates);
238 for(
unsigned int i=0; i<numStates; i++){
239 for(
unsigned int j=0; j<numStates; j++){
240 a[i][j] = 1.0/numStates;
245 b.resize(numStates, numInputDimensions);
247 unsigned int index = 0;
249 for(
unsigned int j=0; j<numInputDimensions; j++){
251 for(
unsigned int i=0; i<numStates; i++){
254 for(
unsigned int k=0; k<downsampleFactor; k++){
255 if( index < trainingData.getLength() ){
256 b[i][j] += trainingData[index++][j];
266 pi.resize(numStates);
270 for(UINT i=0; i<numStates; i++){
271 pi[i] = 1.0/numStates;
276 for(UINT i=0; i<numStates; i++){
278 for(UINT j=0; j<numStates; j++){
279 if((j<i) || (j>i+delta)) a[i][j] = 0.0;
283 for(UINT j=0; j<numStates; j++){
290 for(UINT i=0; i<numStates; i++){
291 pi[i] = i==0 ? 1 : 0;
295 throw(
"HMM_ERROR: Unkown model type!");
301 sigmaStates.resize( numStates, numInputDimensions );
303 if( autoEstimateSigma ){
306 MatrixFloat meanResults( numStates, numInputDimensions );
307 for(
unsigned int j=0; j<numInputDimensions; j++){
311 for(
unsigned int i=0; i<numStates; i++){
313 meanResults[i][j] = 0;
314 for(
unsigned int k=0; k<downsampleFactor; k++){
315 if( index < trainingData.getLength() ){
316 meanResults[i][j] += trainingData[index++][j];
321 meanResults[i][j] /= norm;
327 for(
unsigned int i=0; i<numStates; i++){
329 sigmaStates[i][j] = 0;
330 for(
unsigned int k=0; k<downsampleFactor; k++){
331 if( index < trainingData.getLength() ){
332 sigmaStates[i][j] += SQR( trainingData[index++][j]-meanResults[i][j] );
337 sigmaStates[i][j] = sqrt( 1.0/norm * sigmaStates[i][j] );
340 if( sigmaStates[i][j] < sigma ){
341 sigmaStates[i][j] = sigma;
347 sigmaStates.setAllValues(sigma);
351 observationSequence.resize( timeseriesLength,
VectorFloat(numInputDimensions,0) );
352 obsSequence.resize(timeseriesLength,numInputDimensions);
353 estimatedStates.resize( numStates );
367 for(
unsigned int i=0; i<observationSequence.getSize(); i++){
368 observationSequence.push_back(
VectorFloat(numInputDimensions,0) );
382 timeseriesLength = 0;
388 observationSequence.clear();
390 estimatedStates.clear();
399 trainingLog <<
"A: " << std::endl;
400 for(UINT i=0; i<a.getNumRows(); i++){
401 for(UINT j=0; j<a.getNumCols(); j++){
402 trainingLog << a[i][j] <<
"\t";
404 trainingLog << std::endl;
407 trainingLog <<
"B: " << std::endl;
408 for(UINT i=0; i<b.getNumRows(); i++){
409 for(UINT j=0; j<b.getNumCols(); j++){
410 trainingLog << b[i][j] <<
"\t";
412 trainingLog << std::endl;
415 trainingLog <<
"Pi: ";
416 for(
size_t i=0; i<pi.size(); i++){
417 trainingLog << pi[i] <<
"\t";
419 trainingLog << std::endl;
421 trainingLog <<
"SigmaStates: ";
422 for(UINT i=0; i<sigmaStates.getNumRows(); i++){
423 for(UINT j=0; j<sigmaStates.getNumCols(); j++){
424 trainingLog << sigmaStates[i][j] <<
"\t";
426 trainingLog << std::endl;
428 trainingLog << std::endl;
433 for(UINT i=0; i<a.getNumRows(); i++){
435 for(UINT j=0; j<a.getNumCols(); j++) sum += a[i][j];
436 if( sum <= 0.99 || sum >= 1.01 ) warningLog <<
"WARNING: A Row " << i <<
" Sum: "<< sum << std::endl;
445 bool ContinuousHiddenMarkovModel::setDownsampleFactor(
const UINT downsampleFactor){
446 if( downsampleFactor > 0 ){
448 this->downsampleFactor = downsampleFactor;
451 warningLog <<
"setDownsampleFactor(const UINT downsampleFactor) - Failed to set downsample factor, it must be greater than zero!" << std::endl;
456 if( modelType == HMM_ERGODIC || modelType == HMM_LEFTRIGHT ){
458 this->modelType = modelType;
461 warningLog <<
"setModelType(const UINT modelType) - Failed to set model type, unknown type!" << std::endl;
471 warningLog <<
"setDelta(const UINT delta) - Failed to set delta, it must be greater than zero!" << std::endl;
475 bool ContinuousHiddenMarkovModel::setSigma(
const Float sigma){
479 if( !autoEstimateSigma && trained ){
480 sigmaStates.setAllValues(sigma);
484 warningLog <<
"setSigma(const Float sigma) - Failed to set sigma, it must be greater than zero!" << std::endl;
488 bool ContinuousHiddenMarkovModel::setAutoEstimateSigma(
const bool autoEstimateSigma){
492 this->autoEstimateSigma = autoEstimateSigma;
497 Float ContinuousHiddenMarkovModel::gauss(
const MatrixFloat &x,
const MatrixFloat &y,
const MatrixFloat &sigma,
const unsigned int i,
const unsigned int j,
const unsigned int N ){
499 for(
unsigned int n=0; n<N; n++){
500 z *= (1.0/( sigma[i][n] * SQRT_TWO_PI )) * exp( - SQR(x[i][n]-y[j][n])/(2.0*SQR(sigma[i][n])) );
509 errorLog <<
"save( fstream &file ) - File is not open!" << std::endl;
514 file <<
"CONTINUOUS_HMM_MODEL_FILE_V1.0\n";
518 errorLog <<
"save(fstream &file) - Failed to save classifier base settings to file!" << std::endl;
522 file <<
"DownsampleFactor: " << downsampleFactor << std::endl;
523 file <<
"NumStates: " << numStates << std::endl;
524 file <<
"ClassLabel: " << classLabel << std::endl;
525 file <<
"TimeseriesLength: " << timeseriesLength << std::endl;
526 file <<
"Sigma: " << sigma << std::endl;
527 file <<
"AutoEstimateSigma: " << autoEstimateSigma << std::endl;
528 file <<
"ModelType: " << modelType << std::endl;
529 file <<
"Delta: " << delta << std::endl;
530 file <<
"Threshold: " << cThreshold << std::endl;
534 for(UINT i=0; i<numStates; i++){
535 for(UINT j=0; j<numStates; j++){
537 if( j+1 < numStates ) file <<
"\t";
542 for(UINT i=0; i<numStates; i++){
543 for(UINT j=0; j<numInputDimensions; j++){
545 if( j+1 < numInputDimensions ) file <<
"\t";
550 for(UINT i=0; i<numStates; i++){
552 if( i+1 < numStates ) file <<
"\t";
556 file <<
"SigmaStates: ";
557 for(UINT i=0; i<numStates; i++){
558 for(UINT j=0; j<numInputDimensions; j++){
559 file << sigmaStates[i][j];
560 if( j+1 < numInputDimensions ) file <<
"\t";
576 errorLog <<
"load( fstream &file ) - File is not open!" << std::endl;
585 if(word !=
"CONTINUOUS_HMM_MODEL_FILE_V1.0"){
586 errorLog <<
"load( fstream &file ) - Could not find Model File Header!" << std::endl;
592 errorLog <<
"load(string filename) - Failed to load base settings from file!" << std::endl;
597 if(word !=
"DownsampleFactor:"){
598 errorLog <<
"load( fstream &file ) - Could not find the DownsampleFactor header." << std::endl;
601 file >> downsampleFactor;
604 if(word !=
"NumStates:"){
605 errorLog <<
"load( fstream &file ) - Could not find the NumStates header." << std::endl;
611 if(word !=
"ClassLabel:"){
612 errorLog <<
"load( fstream &file ) - Could not find the ClassLabel header." << std::endl;
618 if(word !=
"TimeseriesLength:"){
619 errorLog <<
"load( fstream &file ) - Could not find the TimeseriesLength header." << std::endl;
622 file >> timeseriesLength;
625 if(word !=
"Sigma:"){
626 errorLog <<
"load( fstream &file ) - Could not find the Sigma for the header." << std::endl;
632 if(word !=
"AutoEstimateSigma:"){
633 errorLog <<
"load( fstream &file ) - Could not find the AutoEstimateSigma for the header." << std::endl;
636 file >> autoEstimateSigma;
639 if(word !=
"ModelType:"){
640 errorLog <<
"load( fstream &file ) - Could not find the ModelType for the header." << std::endl;
646 if(word !=
"Delta:"){
647 errorLog <<
"load( fstream &file ) - Could not find the Delta for the header." << std::endl;
653 if(word !=
"Threshold:"){
654 errorLog <<
"load( fstream &file ) - Could not find the Threshold for the header." << std::endl;
660 a.resize(numStates,numStates);
661 b.resize(numStates,numInputDimensions);
662 pi.resize(numStates);
663 sigmaStates.resize(numStates,numInputDimensions);
668 errorLog <<
"load( fstream &file ) - Could not find the A matrix header." << std::endl;
673 for(UINT i=0; i<numStates; i++){
674 for(UINT j=0; j<numStates; j++){
681 errorLog <<
"load( fstream &file ) - Could not find the B matrix header." << std::endl;
686 for(UINT i=0; i<numStates; i++){
687 for(UINT j=0; j<numInputDimensions; j++){
694 errorLog <<
"load( fstream &file ) - Could not find the Pi header." << std::endl;
699 for(UINT i=0; i<numStates; i++){
704 if(word !=
"SigmaStates:"){
705 errorLog <<
"load( fstream &file ) - Could not find the SigmaStates header." << std::endl;
710 for(UINT i=0; i<numStates; i++){
711 for(UINT j=0; j<numInputDimensions; j++){
712 file >> sigmaStates[i][j];
717 observationSequence.resize( timeseriesLength,
VectorFloat(numInputDimensions,0) );
718 obsSequence.resize(timeseriesLength,numInputDimensions);
719 estimatedStates.resize( numStates );
bool saveBaseSettingsToFile(std::fstream &file) const
virtual bool load(std::fstream &file) override
virtual bool save(std::fstream &file) const override
virtual bool predict_(VectorFloat &inputVector)
Vector< UINT > estimatedStates
The estimated states for prediction.
Float cThreshold
The classification threshold for this model.
This class acts as the main interface for using a Hidden Markov Model.
MatrixFloat a
The transitions probability matrix.
Float loglikelihood
The log likelihood of an observation sequence given the modal, calculated by the forward method...
VectorFloat pi
The state start probability vector.
UINT modelType
The model type (LEFTRIGHT, or ERGODIC)
virtual bool predict_(VectorFloat &x) override
bool setDelta(const UINT delta)
UINT delta
The number of states a model can move to in a LEFTRIGHT model.
virtual bool clear() override
CircularBuffer< VectorFloat > observationSequence
A buffer to store data for realtime prediction.
bool copyMLBaseVariables(const MLBase *mlBase)
This class implements a continuous Hidden Markov Model.
UINT classLabel
The class label associated with this model.
unsigned int getNumRows() const
unsigned int getNumCols() const
MatrixFloat sigmaStates
The sigma value for each state.
virtual bool print() const override
bool loadBaseSettingsFromFile(std::fstream &file)
virtual bool reset() override
virtual bool resize(const unsigned int r, const unsigned int c)
MatrixFloat b
The emissions probability matrix.
UINT timeseriesLength
The length of the training timeseries.
This is the main base class that all GRT machine learning algorithms should inherit from...
bool setModelType(const UINT modelType)
UINT numStates
The number of states for this model.