21 #define GRT_DLL_EXPORTS 27 const std::string HMM::id =
"HMM";
36 this->modelType = modelType;
38 this->useScaling = useScaling;
39 this->useNullRejection = useNullRejection;
51 autoEstimateSigma =
true;
53 supportsNullRejection =
false;
54 classifierMode = TIMESERIES_CLASSIFIER_MODE;
59 classifierMode = TIMESERIES_CLASSIFIER_MODE;
70 this->modelType = rhs.modelType;
71 this->delta = rhs.delta;
72 this->numStates = rhs.numStates;
73 this->numSymbols = rhs.numSymbols;
74 this->downsampleFactor = rhs.downsampleFactor;
75 this->committeeSize = rhs.committeeSize;
76 this->sigma = rhs.sigma;
77 this->autoEstimateSigma = rhs.autoEstimateSigma;
78 this->discreteModels = rhs.discreteModels;
79 this->continuousModels = rhs.continuousModels;
89 if( classifier == NULL )
return false;
94 const HMM *ptr =
dynamic_cast<const HMM*
>(classifier);
98 this->modelType = ptr->modelType;
99 this->delta = ptr->delta;
100 this->numStates = ptr->numStates;
101 this->numSymbols = ptr->numSymbols;
102 this->downsampleFactor = ptr->downsampleFactor;
103 this->committeeSize = ptr->committeeSize;
104 this->sigma = ptr->sigma;
105 this->autoEstimateSigma = ptr->autoEstimateSigma;
106 this->discreteModels = ptr->discreteModels;
107 this->continuousModels = ptr->continuousModels;
116 errorLog << __GRT_LOG__ <<
" The HMM classifier should be trained using the train(TimeSeriesClassificationData &trainingData) method" << std::endl;
125 return train_discrete( trainingData );
128 return train_continuous( trainingData );
132 errorLog << __GRT_LOG__ <<
" Failed to train model, unknown HMM type!" << std::endl;
142 errorLog << __GRT_LOG__ <<
" There are no training samples to train the HMM classifer!" << std::endl;
147 errorLog << __GRT_LOG__ <<
" The number of dimensions in the training data must be 1. If your training data is not 1 dimensional then you must quantize the training data using one of the GRT quantization algorithms" << std::endl;
154 discreteModels.
resize( numClasses );
155 classLabels.
resize( numClasses );
158 for(UINT k=0; k<numClasses; k++){
159 discreteModels[k].resetModel(numStates,numSymbols,modelType,delta);
160 discreteModels[k].setMaxNumEpochs( maxNumEpochs );
161 discreteModels[k].setMinChange( minChange );
165 for(UINT k=0; k<numClasses; k++){
168 classLabels[k] = classID;
173 if( !convertDataToObservationSequence( classData, observationSequences ) ){
178 if( !discreteModels[k].
train( observationSequences ) ){
179 errorLog << __GRT_LOG__ <<
" Failed to train HMM for class " << classID << std::endl;
185 nullRejectionThresholds.
resize(numClasses);
187 for(UINT k=0; k<numClasses; k++){
190 classLabels[k] = classID;
195 if( !convertDataToObservationSequence( classData, observationSequences ) ){
200 Float loglikelihood = 0;
201 Float avgLoglikelihood = 0;
202 for(UINT i=0; i<observationSequences.size(); i++){
203 loglikelihood = discreteModels[k].predict( observationSequences[i] );
204 avgLoglikelihood += fabs( loglikelihood );
206 nullRejectionThresholds[k] = -( avgLoglikelihood / Float( observationSequences.size() ) );
222 errorLog << __GRT_LOG__ <<
" There are no training samples to train the CHMM classifer!" << std::endl;
229 classLabels.
resize( numClasses );
230 for(UINT k=0; k<numClasses; k++){
237 trainingData.
scale(0, 1);
241 const UINT numTrainingSamples = trainingData.
getNumSamples();
242 continuousModels.
resize( numTrainingSamples );
245 for(UINT k=0; k<numTrainingSamples; k++){
248 continuousModels[k].setDownsampleFactor( downsampleFactor );
249 continuousModels[k].setModelType( modelType );
250 continuousModels[k].setDelta( delta );
251 continuousModels[k].setSigma( sigma );
252 continuousModels[k].setAutoEstimateSigma( autoEstimateSigma );
253 continuousModels[k].enableScaling(
false );
256 if( !continuousModels[k].
train_( trainingData[k] ) ){
257 errorLog << __GRT_LOG__ <<
" Failed to train CHMM for sample " << k << std::endl;
264 warningLog << __GRT_LOG__ <<
" The committeeSize is larger than the number of training sample. Setting committeeSize to number of training samples: " << trainingData.
getNumSamples() << std::endl;
271 if( useNullRejection ){
273 nullRejectionThresholds.
resize(numClasses);
283 return predict_discrete( inputVector );
286 return predict_continuous( inputVector );
290 errorLog << __GRT_LOG__ <<
" Failed to predict, unknown HMM type!" << std::endl;
295 bool HMM::predict_discrete(
VectorFloat &inputVector ){
297 predictedClassLabel = 0;
298 maxLikelihood = -10000;
301 errorLog << __GRT_LOG__ <<
" The HMM classifier has not been trained!" << std::endl;
305 if( inputVector.size() != numInputDimensions ){
306 errorLog << __GRT_LOG__ <<
" The size of the input vector (" << inputVector.size() <<
") does not match the num features in the model (" << numInputDimensions << std::endl;
310 if( classLikelihoods.size() != numClasses ) classLikelihoods.
resize(numClasses,0);
311 if( classDistances.size() != numClasses ) classDistances.
resize(numClasses,0);
314 bestDistance = -99e+99;
316 UINT newObservation = (UINT)inputVector[0];
318 if( newObservation >= numSymbols ){
319 errorLog << __GRT_LOG__ <<
" The new observation is not a valid symbol! It should be in the range [0 numSymbols-1]" << std::endl;
323 for(UINT k=0; k<numClasses; k++){
324 classDistances[k] = discreteModels[k].predict( newObservation );
327 classLikelihoods[k] = grt_antilog( classDistances[k] );
330 if( classDistances[k] > bestDistance ){
331 bestDistance = classDistances[k];
335 sum += classLikelihoods[k];
339 for(UINT k=0; k<numClasses; k++){
340 classLikelihoods[k] /= sum;
343 maxLikelihood = classLikelihoods[ bestIndex ];
344 predictedClassLabel = classLabels[ bestIndex ];
346 if( useNullRejection ){
347 if( maxLikelihood > nullRejectionThresholds[ bestIndex ] ){
348 predictedClassLabel = classLabels[ bestIndex ];
349 }
else predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL;
355 bool HMM::predict_continuous(
VectorFloat &inputVector ){
358 errorLog << __GRT_LOG__ <<
" The HMM classifier has not been trained!" << std::endl;
362 if( inputVector.
getSize() != numInputDimensions ){
363 errorLog << __GRT_LOG__ <<
" The size of the input vector (" << inputVector.
getSize() <<
") does not match the num features in the model (" << numInputDimensions << std::endl;
369 for(UINT i=0; i<numInputDimensions; i++){
370 inputVector[i] =
scale(inputVector[i], ranges[i].minValue, ranges[i].maxValue, 0, 1);
374 if( classLikelihoods.size() != numClasses ) classLikelihoods.
resize(numClasses,0);
375 if( classDistances.size() != numClasses ) classDistances.
resize(numClasses,0);
377 std::fill(classLikelihoods.begin(),classLikelihoods.end(),0);
378 std::fill(classDistances.begin(),classDistances.end(),0);
380 bestDistance = -1000;
382 Float minValue = -1000;
384 const UINT numModels = (UINT)continuousModels.size();
386 for(UINT i=0; i<numModels; i++){
389 if( continuousModels[i].
predict_( inputVector ) ){
390 results[i].value = continuousModels[i].getLoglikelihood();
391 results[i].index = continuousModels[i].getClassLabel();
393 errorLog << __GRT_LOG__ <<
" Prediction failed for model: " << i << std::endl;
397 if( results[i].value < minValue ){
398 if( !grt_isnan(results[i].value) ){
399 minValue = results[i].value;
403 if( results[i].value > bestDistance ){
404 if( !grt_isnan(results[i].value) ){
405 bestDistance = results[i].value;
414 phase = continuousModels[ bestIndex ].getPhase();
417 std::sort(results.begin(),results.end(),IndexedDouble::sortIndexedDoubleByValueDescending);
420 const Float committeeWeight = 1.0 / committeeSize;
421 for(UINT i=0; i<committeeSize; i++){
428 for(UINT k=0; k<numClasses; k++){
429 classLikelihoods[k] = classDistances[k] / sum;
433 for(UINT k=0; k<numClasses; k++){
434 if( classDistances[k] > bestDistance ){
435 bestDistance = classDistances[k];
440 maxLikelihood = classLikelihoods[ bestIndex ];
441 predictedClassLabel = classLabels[ bestIndex ];
445 predictedClassLabel = 0;
455 return predict_discrete( timeseries );
458 return predict_continuous( timeseries );
462 errorLog << __GRT_LOG__ <<
" Failed to predict, unknown HMM type!" << std::endl;
467 bool HMM::predict_discrete(
MatrixFloat ×eries){
470 errorLog << __GRT_LOG__ <<
" The HMM classifier has not been trained!" << std::endl;
475 errorLog << __GRT_LOG__ <<
" The number of columns in the input matrix must be 1. It is: " << timeseries.
getNumCols() << std::endl;
483 for(UINT i=0; i<M; i++){
484 observationSequence[i] = (UINT)timeseries[i][0];
486 if( observationSequence[i] >= numSymbols ){
487 errorLog << __GRT_LOG__ <<
" The new observation is not a valid symbol! It should be in the range [0 numSymbols-1]" << std::endl;
492 if( classLikelihoods.size() != numClasses ) classLikelihoods.
resize(numClasses,0);
493 if( classDistances.size() != numClasses ) classDistances.
resize(numClasses,0);
495 bestDistance = -99e+99;
498 for(UINT k=0; k<numClasses; k++){
499 classDistances[k] = discreteModels[k].predict( observationSequence );
502 classLikelihoods[k] = grt_antilog( classDistances[k] );
505 if( classDistances[k] > bestDistance ){
506 bestDistance = classDistances[k];
510 sum += classLikelihoods[k];
514 for(UINT k=0; k<numClasses; k++){
515 classLikelihoods[k] /= sum;
518 maxLikelihood = classLikelihoods[ bestIndex ];
519 predictedClassLabel = classLabels[ bestIndex ];
521 if( useNullRejection ){
522 if( maxLikelihood > nullRejectionThresholds[ bestIndex ] ){
523 predictedClassLabel = classLabels[ bestIndex ];
524 }
else predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL;
531 bool HMM::predict_continuous(
MatrixFloat ×eries){
534 errorLog << __GRT_LOG__ <<
" The HMM classifier has not been trained!" << std::endl;
538 if( timeseries.
getNumCols() != numInputDimensions ){
539 errorLog << __GRT_LOG__ <<
" The number of columns in the input matrix (" << timeseries.
getNumCols() <<
") does not match the num features in the model (" << numInputDimensions << std::endl;
545 const UINT timeseriesLength = timeseries.
getNumRows();
546 for(UINT j=0; j<numInputDimensions; j++){
547 for(UINT i=0; i<timeseriesLength; i++){
548 timeseries[i][j] =
scale(timeseries[i][j], ranges[j].minValue, ranges[j].maxValue, 0, 1);
553 if( classLikelihoods.
getSize() != numClasses ) classLikelihoods.
resize(numClasses,0);
554 if( classDistances.
getSize() != numClasses ) classDistances.
resize(numClasses,0);
556 std::fill(classLikelihoods.begin(),classLikelihoods.end(),0);
557 std::fill(classDistances.begin(),classDistances.end(),0);
559 bestDistance = -1000;
561 Float minValue = -1000;
563 const UINT numModels = (UINT)continuousModels.size();
565 for(UINT i=0; i<numModels; i++){
568 if( continuousModels[i].
predict_( timeseries ) ){
569 results[i].value = continuousModels[i].getLoglikelihood();
570 results[i].index = continuousModels[i].getClassLabel();
572 errorLog << __GRT_LOG__ <<
" Prediction failed for model: " << i << std::endl;
576 if( results[i].value < minValue ){
577 minValue = results[i].value;
580 if( results[i].value > bestDistance ){
581 bestDistance = results[i].value;
587 phase = continuousModels[ bestIndex ].getPhase();
590 std::sort(results.begin(),results.end(),IndexedDouble::sortIndexedDoubleByValueDescending);
593 const Float committeeWeight = 1.0 / committeeSize;
594 for(UINT i=0; i<committeeSize; i++){
601 for(UINT k=0; k<numClasses; k++){
602 classLikelihoods[k] = classDistances[k] / sum;
606 for(UINT k=0; k<numClasses; k++){
607 if( classDistances[k] > bestDistance ){
608 bestDistance = classDistances[k];
613 maxLikelihood = classLikelihoods[ bestIndex ];
614 predictedClassLabel = classLabels[ bestIndex ];
618 predictedClassLabel = 0;
630 for(
size_t i=0; i<discreteModels.size(); i++){
631 discreteModels[i].reset();
635 for(
size_t i=0; i<continuousModels.size(); i++){
636 continuousModels[i].reset();
649 discreteModels.clear();
650 continuousModels.clear();
657 std::cout <<
"HMM Model\n";
660 std::cout <<
"HmmType: " <<
hmmType << std::endl;
661 std::cout <<
"ModelType: " << modelType << std::endl;
662 std::cout <<
"Delta: " << delta << std::endl;
667 std::cout <<
"NumStates: " << numStates << std::endl;
668 std::cout <<
"NumSymbols: " << numSymbols << std::endl;
669 std::cout <<
"NumRandomTrainingIterations: " << numRandomTrainingIterations << std::endl;
670 std::cout <<
"NumDiscreteModels: " << discreteModels.
getSize() << std::endl;
671 std::cout <<
"DiscreteModels: " << std::endl;
672 for(
size_t i=0; i<discreteModels.
getSize(); i++){
673 if( !discreteModels[i].
print() ){
674 errorLog << __GRT_LOG__ <<
" Failed to print discrete model " << i <<
" to file!" << std::endl;
680 std::cout <<
"DownsampleFactor: " << downsampleFactor << std::endl;
681 std::cout <<
"CommitteeSize: " << committeeSize << std::endl;
682 std::cout <<
"Sigma: " << sigma << std::endl;
683 std::cout <<
"AutoEstimateSigma: " << autoEstimateSigma << std::endl;
684 std::cout <<
"NumContinuousModels: " << continuousModels.
getSize() << std::endl;
685 std::cout <<
"ContinuousModels: " << std::endl;
686 for(
size_t i=0; i<continuousModels.
getSize(); i++){
687 if( !continuousModels[i].
print() ){
688 errorLog << __GRT_LOG__ <<
" Failed to print continuous model " << i <<
" to file!" << std::endl;
702 errorLog << __GRT_LOG__ <<
" File is not open!" << std::endl;
707 file <<
"HMM_MODEL_FILE_V2.0\n";
711 errorLog << __GRT_LOG__ <<
" Failed to save classifier base settings to file!" << std::endl;
716 file <<
"HmmType: " <<
hmmType << std::endl;
717 file <<
"ModelType: " << modelType << std::endl;
718 file <<
"Delta: " << delta << std::endl;
723 file <<
"NumStates: " << numStates << std::endl;
724 file <<
"NumSymbols: " << numSymbols << std::endl;
725 file <<
"NumRandomTrainingIterations: " << numRandomTrainingIterations << std::endl;
726 file <<
"NumDiscreteModels: " << discreteModels.
getSize() << std::endl;
727 file <<
"DiscreteModels: " << std::endl;
728 for(
size_t i=0; i<discreteModels.
getSize(); i++){
729 if( !discreteModels[i].
save( file ) ){
730 errorLog << __GRT_LOG__ <<
" Failed to save discrete model " << i <<
" to file!" << std::endl;
736 file <<
"DownsampleFactor: " << downsampleFactor << std::endl;
737 file <<
"CommitteeSize: " << committeeSize << std::endl;
738 file <<
"Sigma: " << sigma << std::endl;
739 file <<
"NumContinuousModels: " << continuousModels.
getSize() << std::endl;
740 file <<
"ContinuousModels: " << std::endl;
741 for(UINT i=0; i<continuousModels.
getSize(); i++){
742 if( !continuousModels[i].
save( file ) ){
743 errorLog << __GRT_LOG__ <<
" Failed to save continuous model " << i <<
" to file!" << std::endl;
759 errorLog << __GRT_LOG__ <<
" File is not open!" << std::endl;
769 if(word !=
"HMM_MODEL_FILE_V2.0"){
770 errorLog << __GRT_LOG__ <<
" Could not find Model File Header!" << std::endl;
776 errorLog << __GRT_LOG__ <<
" Failed to load base settings from file!" << std::endl;
782 if(word !=
"HmmType:"){
783 errorLog << __GRT_LOG__ <<
" Could not find HmmType." << std::endl;
789 if(word !=
"ModelType:"){
790 errorLog << __GRT_LOG__ <<
" Could not find ModelType." << std::endl;
796 if(word !=
"Delta:"){
797 errorLog << __GRT_LOG__ <<
" Could not find Delta." << std::endl;
807 if(word !=
"NumStates:"){
808 errorLog << __GRT_LOG__ <<
" Could not find NumStates." << std::endl;
814 if(word !=
"NumSymbols:"){
815 errorLog << __GRT_LOG__ <<
" Could not find NumSymbols." << std::endl;
821 if(word !=
"NumRandomTrainingIterations:"){
822 errorLog << __GRT_LOG__ <<
" Could not find NumRandomTrainingIterations." << std::endl;
825 file >> numRandomTrainingIterations;
828 if(word !=
"NumDiscreteModels:"){
829 errorLog << __GRT_LOG__ <<
" Could not find NumDiscreteModels." << std::endl;
835 if(word !=
"DiscreteModels:"){
836 errorLog << __GRT_LOG__ <<
" Could not find DiscreteModels." << std::endl;
841 discreteModels.
resize(numModels);
842 for(
size_t i=0; i<discreteModels.
getSize(); i++){
843 if( !discreteModels[i].
load( file ) ){
844 errorLog << __GRT_LOG__ <<
" Failed to load discrete model " << i <<
" from file!" << std::endl;
853 if(word !=
"DownsampleFactor:"){
854 errorLog << __GRT_LOG__ <<
" Could not find DownsampleFactor." << std::endl;
857 file >> downsampleFactor;
860 if(word !=
"CommitteeSize:"){
861 errorLog << __GRT_LOG__ <<
" Could not find CommitteeSize." << std::endl;
864 file >> committeeSize;
867 if(word !=
"Sigma:"){
868 errorLog << __GRT_LOG__ <<
" Could not find Sigma." << std::endl;
874 if(word !=
"NumContinuousModels:"){
875 errorLog << __GRT_LOG__ <<
" Could not find NumContinuousModels." << std::endl;
881 if(word !=
"ContinuousModels:"){
882 errorLog << __GRT_LOG__ <<
" Could not find ContinuousModels." << std::endl;
887 continuousModels.
resize(numModels);
888 for(
size_t i=0; i<continuousModels.
getSize(); i++){
889 if( !continuousModels[i].
load( file ) ){
890 errorLog << __GRT_LOG__ <<
" Failed to load continuous model " << i <<
" from file!" << std::endl;
908 observationSequences[i].resize( timeseries.
getNumRows() );
909 for(UINT j=0; j<timeseries.
getNumRows(); j++){
910 if( timeseries[j][0] >= numSymbols ){
911 errorLog << __GRT_LOG__ <<
" Found an observation sequence with a value outside of the symbol range! Value: " << timeseries[j][0] << std::endl;
914 observationSequences[i][j] = (UINT)timeseries[j][0];
942 return numRandomTrainingIterations;
946 return discreteModels;
950 return continuousModels;
957 if( hmmType == HMM_DISCRETE || hmmType == HMM_CONTINUOUS ){
962 warningLog << __GRT_LOG__ <<
" Unknown HMM type!" << std::endl;
970 if( modelType == HMM_ERGODIC || modelType == HMM_LEFTRIGHT ){
971 this->modelType = modelType;
975 warningLog << __GRT_LOG__ <<
" Unknown model type!" << std::endl;
988 warningLog << __GRT_LOG__ <<
" Delta must be greater than zero!" << std::endl;
995 if( downsampleFactor > 0 ){
996 this->downsampleFactor = downsampleFactor;
1004 if( committeeSize > 0 ){
1005 this->committeeSize = committeeSize;
1016 if( numStates > 0 ){
1017 this->numStates = numStates;
1021 warningLog << __GRT_LOG__ <<
" Num states must be greater than zero!" << std::endl;
1029 if( numSymbols > 0 ){
1030 this->numSymbols = numSymbols;
1034 warningLog << __GRT_LOG__ <<
" Num symbols must be greater than zero!" << std::endl;
1042 if( numRandomTrainingIterations > 0 ){
1043 this->numRandomTrainingIterations = numRandomTrainingIterations;
1047 warningLog << __GRT_LOG__ <<
" The number of random training iterations must be greater than zero!" << std::endl;
1053 this->sigma = sigma;
1054 for(UINT i=0; i<continuousModels.
getSize(); i++){
1055 continuousModels[i].setSigma( sigma );
1062 bool HMM::setAutoEstimateSigma(
const bool autoEstimateSigma){
1066 this->autoEstimateSigma = autoEstimateSigma;
bool saveBaseSettingsToFile(std::fstream &file) const
std::string getId() const
static std::string getId()
bool setHMMType(const UINT hmmType)
bool setDownsampleFactor(const UINT downsampleFactor)
static Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
virtual bool predict_(VectorFloat &inputVector)
virtual bool resize(const unsigned int size)
virtual bool deepCopyFrom(const Classifier *classifier)
bool setNumRandomTrainingIterations(const UINT numRandomTrainingIterations)
virtual bool load(std::fstream &file)
bool setNumSymbols(const UINT numStates)
virtual bool train(ClassificationData trainingData)
Vector< DiscreteHiddenMarkovModel > getDiscreteModels() const
Vector< MinMax > getRanges() const
bool setModelType(const UINT modelType)
UINT getModelType() const
virtual bool print() const
Vector< ClassTracker > getClassTracker() const
HMM & operator=(const HMM &rhs)
UINT hmmType
Controls if this is a HMM_DISCRETE or a HMM_CONTINUOUS.
UINT getNumStates() const
bool setSigma(const Float sigma)
virtual bool train_(TimeSeriesClassificationData &trainingData)
HMM(const UINT hmmType=HMM_CONTINUOUS, const UINT modelType=HMM_LEFTRIGHT, const UINT delta=1, const bool useScaling=false, const bool useNullRejection=false)
bool copyBaseVariables(const Classifier *classifier)
bool loadBaseSettingsFromFile(std::fstream &file)
UINT getNumSymbols() const
unsigned int getNumRows() const
unsigned int getNumCols() const
UINT getClassLabelIndexValue(const UINT classLabel) const
bool setDelta(const UINT delta)
bool setCommitteeSize(const UINT committeeSize)
virtual bool save(std::fstream &file) const
This class acts as the main interface for using a Hidden Markov Model.
bool scale(const Float minTarget, const Float maxTarget)
TimeSeriesClassificationData getClassData(const UINT classLabel) const
UINT getNumDimensions() const
UINT getNumClasses() const
UINT getNumRandomTrainingIterations() const
bool setNumStates(const UINT numStates)
UINT getNumSamples() const
This is the main base class that all GRT Classification algorithms should inherit from...
static Float sum(const VectorFloat &x)
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
Vector< ContinuousHiddenMarkovModel > getContinuousModels() const