21 #define GRT_DLL_EXPORTS
29 HMM::HMM(
const UINT hmmType,
const UINT modelType,
const UINT delta,
const bool useScaling,
const bool useNullRejection)
32 this->modelType = modelType;
34 this->useScaling = useScaling;
35 this->useNullRejection = useNullRejection;
47 autoEstimateSigma =
true;
49 supportsNullRejection =
false;
50 classifierMode = TIMESERIES_CLASSIFIER_MODE;
52 classifierType = classType;
53 debugLog.setProceedingText(
"[DEBUG HMM]");
54 errorLog.setProceedingText(
"[ERROR HMM]");
55 warningLog.setProceedingText(
"[WARNING HMM]");
59 classifierMode = TIMESERIES_CLASSIFIER_MODE;
61 classifierType = classType;
62 debugLog.setProceedingText(
"[DEBUG HMM]");
63 errorLog.setProceedingText(
"[ERROR HMM]");
64 warningLog.setProceedingText(
"[WARNING HMM]");
75 this->modelType = rhs.modelType;
76 this->delta = rhs.delta;
77 this->numStates = rhs.numStates;
78 this->numSymbols = rhs.numSymbols;
79 this->downsampleFactor = rhs.downsampleFactor;
80 this->committeeSize = rhs.committeeSize;
81 this->sigma = rhs.sigma;
82 this->autoEstimateSigma = rhs.autoEstimateSigma;
83 this->discreteModels = rhs.discreteModels;
84 this->continuousModels = rhs.continuousModels;
94 if( classifier == NULL )
return false;
99 HMM *ptr = (
HMM*)classifier;
103 this->modelType = ptr->modelType;
104 this->delta = ptr->delta;
105 this->numStates = ptr->numStates;
106 this->numSymbols = ptr->numSymbols;
107 this->downsampleFactor = ptr->downsampleFactor;
108 this->committeeSize = ptr->committeeSize;
109 this->sigma = ptr->sigma;
110 this->autoEstimateSigma = ptr->autoEstimateSigma;
111 this->discreteModels = ptr->discreteModels;
112 this->continuousModels = ptr->continuousModels;
121 errorLog <<
"train(ClassificationData trainingData) - The HMM classifier should be trained using the train(TimeSeriesClassificationData &trainingData) method" << std::endl;
130 return train_discrete( trainingData );
133 return train_continuous( trainingData );
137 errorLog <<
"train_(TimeSeriesClassificationData &trainingData) - Failed to train model, unknown HMM type!" << std::endl;
147 errorLog <<
"train_discrete(TimeSeriesClassificationData &trainingData) - There are no training samples to train the HMM classifer!" << std::endl;
152 errorLog <<
"train_discrete(TimeSeriesClassificationData &trainingData) - The number of dimensions in the training data must be 1. If your training data is not 1 dimensional then you must quantize the training data using one of the GRT quantization algorithms" << std::endl;
159 discreteModels.
resize( numClasses );
160 classLabels.
resize( numClasses );
163 for(UINT k=0; k<numClasses; k++){
164 discreteModels[k].resetModel(numStates,numSymbols,modelType,delta);
165 discreteModels[k].setMaxNumEpochs( maxNumEpochs );
166 discreteModels[k].setMinChange( minChange );
170 for(UINT k=0; k<numClasses; k++){
173 classLabels[k] = classID;
178 if( !convertDataToObservationSequence( classData, observationSequences ) ){
183 if( !discreteModels[k].
train( observationSequences ) ){
184 errorLog <<
"train_discrete(TimeSeriesClassificationData &trainingData) - Failed to train HMM for class " << classID << std::endl;
190 nullRejectionThresholds.
resize(numClasses);
192 for(UINT k=0; k<numClasses; k++){
195 classLabels[k] = classID;
200 if( !convertDataToObservationSequence( classData, observationSequences ) ){
205 Float loglikelihood = 0;
206 Float avgLoglikelihood = 0;
207 for(UINT i=0; i<observationSequences.size(); i++){
208 loglikelihood = discreteModels[k].predict( observationSequences[i] );
209 avgLoglikelihood += fabs( loglikelihood );
211 nullRejectionThresholds[k] = -( avgLoglikelihood / Float( observationSequences.size() ) );
226 errorLog <<
"train_continuous(TimeSeriesClassificationData &trainingData) - There are no training samples to train the CHMM classifer!" << std::endl;
233 classLabels.
resize( numClasses );
234 for(UINT k=0; k<numClasses; k++){
241 trainingData.
scale(0, 1);
245 const UINT numTrainingSamples = trainingData.
getNumSamples();
246 continuousModels.
resize( numTrainingSamples );
249 for(UINT k=0; k<numTrainingSamples; k++){
252 continuousModels[k].setDownsampleFactor( downsampleFactor );
253 continuousModels[k].setModelType( modelType );
254 continuousModels[k].setDelta( delta );
255 continuousModels[k].setSigma( sigma );
256 continuousModels[k].setAutoEstimateSigma( autoEstimateSigma );
257 continuousModels[k].enableScaling(
false );
260 if( !continuousModels[k].
train_( trainingData[k] ) ){
261 errorLog <<
"train_continuous(TimeSeriesClassificationData &trainingData) - Failed to train CHMM for sample " << k << std::endl;
268 warningLog <<
"train_continuous(TimeSeriesClassificationData &trainingData) - The committeeSize is larger than the number of training sample. Setting committeeSize to number of training samples: " << trainingData.
getNumSamples() << std::endl;
275 if( useNullRejection ){
277 nullRejectionThresholds.
resize(numClasses);
287 return predict_discrete( inputVector );
290 return predict_continuous( inputVector );
294 errorLog <<
"predict_(VectorFloat &inputVector) - Failed to predict, unknown HMM type!" << std::endl;
299 bool HMM::predict_discrete(
VectorFloat &inputVector ){
301 predictedClassLabel = 0;
302 maxLikelihood = -10000;
305 errorLog <<
"predict_(VectorFloat &inputVector) - The HMM classifier has not been trained!" << std::endl;
309 if( inputVector.size() != numInputDimensions ){
310 errorLog <<
"predict_(VectorFloat &inputVector) - The size of the input vector (" << inputVector.size() <<
") does not match the num features in the model (" << numInputDimensions << std::endl;
314 if( classLikelihoods.size() != numClasses ) classLikelihoods.
resize(numClasses,0);
315 if( classDistances.size() != numClasses ) classDistances.
resize(numClasses,0);
318 bestDistance = -99e+99;
320 UINT newObservation = (UINT)inputVector[0];
322 if( newObservation >= numSymbols ){
323 errorLog <<
"predict_(VectorFloat &inputVector) - The new observation is not a valid symbol! It should be in the range [0 numSymbols-1]" << std::endl;
327 for(UINT k=0; k<numClasses; k++){
328 classDistances[k] = discreteModels[k].predict( newObservation );
331 classLikelihoods[k] = grt_antilog( classDistances[k] );
334 if( classDistances[k] > bestDistance ){
335 bestDistance = classDistances[k];
339 sum += classLikelihoods[k];
343 for(UINT k=0; k<numClasses; k++){
344 classLikelihoods[k] /= sum;
347 maxLikelihood = classLikelihoods[ bestIndex ];
348 predictedClassLabel = classLabels[ bestIndex ];
350 if( useNullRejection ){
351 if( maxLikelihood > nullRejectionThresholds[ bestIndex ] ){
352 predictedClassLabel = classLabels[ bestIndex ];
353 }
else predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL;
359 bool HMM::predict_continuous(
VectorFloat &inputVector ){
362 errorLog <<
"predict_(VectorFloat &inputVector) - The HMM classifier has not been trained!" << std::endl;
366 if( inputVector.size() != numInputDimensions ){
367 errorLog <<
"predict_(VectorFloat &inputVector) - The size of the input vector (" << inputVector.size() <<
") does not match the num features in the model (" << numInputDimensions << std::endl;
373 for(UINT i=0; i<numInputDimensions; i++){
374 inputVector[i] =
scale(inputVector[i], ranges[i].minValue, ranges[i].maxValue, 0, 1);
378 if( classLikelihoods.size() != numClasses ) classLikelihoods.
resize(numClasses,0);
379 if( classDistances.size() != numClasses ) classDistances.
resize(numClasses,0);
381 std::fill(classLikelihoods.begin(),classLikelihoods.end(),0);
382 std::fill(classDistances.begin(),classDistances.end(),0);
384 bestDistance = -1000;
386 Float minValue = -1000;
388 const UINT numModels = (UINT)continuousModels.size();
390 for(UINT i=0; i<numModels; i++){
393 if( continuousModels[i].
predict_( inputVector ) ){
394 results[i].value = continuousModels[i].getLoglikelihood();
395 results[i].index = continuousModels[i].getClassLabel();
397 errorLog <<
"predict_(VectorFloat &inputVector) - Prediction failed for model: " << i << std::endl;
401 if( results[i].value < minValue ){
402 if( !grt_isnan(results[i].value) ){
403 minValue = results[i].value;
407 if( results[i].value > bestDistance ){
408 if( !grt_isnan(results[i].value) ){
409 bestDistance = results[i].value;
418 phase = continuousModels[ bestIndex ].getPhase();
421 std::sort(results.begin(),results.end(),IndexedDouble::sortIndexedDoubleByValueDescending);
424 const Float committeeWeight = 1.0 / committeeSize;
425 for(UINT i=0; i<committeeSize; i++){
432 for(UINT k=0; k<numClasses; k++){
433 classLikelihoods[k] = classDistances[k] / sum;
437 for(UINT k=0; k<numClasses; k++){
438 if( classDistances[k] > bestDistance ){
439 bestDistance = classDistances[k];
444 maxLikelihood = classLikelihoods[ bestIndex ];
445 predictedClassLabel = classLabels[ bestIndex ];
449 predictedClassLabel = 0;
459 return predict_discrete( timeseries );
462 return predict_continuous( timeseries );
466 errorLog <<
"predict_(MatrixFloat ×eries) - Failed to predict, unknown HMM type!" << std::endl;
472 bool HMM::predict_discrete(
MatrixFloat ×eries){
475 errorLog <<
"predict_continuous(MatrixFloat ×eries) - The HMM classifier has not been trained!" << std::endl;
480 errorLog <<
"predict_discrete(MatrixFloat ×eries) The number of columns in the input matrix must be 1. It is: " << timeseries.
getNumCols() << std::endl;
488 for(UINT i=0; i<M; i++){
489 observationSequence[i] = (UINT)timeseries[i][0];
491 if( observationSequence[i] >= numSymbols ){
492 errorLog <<
"predict_discrete(VectorFloat &inputVector) - The new observation is not a valid symbol! It should be in the range [0 numSymbols-1]" << std::endl;
497 if( classLikelihoods.size() != numClasses ) classLikelihoods.
resize(numClasses,0);
498 if( classDistances.size() != numClasses ) classDistances.
resize(numClasses,0);
500 bestDistance = -99e+99;
503 for(UINT k=0; k<numClasses; k++){
504 classDistances[k] = discreteModels[k].predict( observationSequence );
507 classLikelihoods[k] = grt_antilog( classDistances[k] );
510 if( classDistances[k] > bestDistance ){
511 bestDistance = classDistances[k];
515 sum += classLikelihoods[k];
519 for(UINT k=0; k<numClasses; k++){
520 classLikelihoods[k] /= sum;
523 maxLikelihood = classLikelihoods[ bestIndex ];
524 predictedClassLabel = classLabels[ bestIndex ];
526 if( useNullRejection ){
527 if( maxLikelihood > nullRejectionThresholds[ bestIndex ] ){
528 predictedClassLabel = classLabels[ bestIndex ];
529 }
else predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL;
536 bool HMM::predict_continuous(
MatrixFloat ×eries){
539 errorLog <<
"predict_continuous(MatrixFloat ×eries) - The HMM classifier has not been trained!" << std::endl;
543 if( timeseries.
getNumCols() != numInputDimensions ){
544 errorLog <<
"predict_continuous(MatrixFloat ×eries) - The number of columns in the input matrix (" << timeseries.
getNumCols() <<
") does not match the num features in the model (" << numInputDimensions << std::endl;
550 const UINT timeseriesLength = timeseries.
getNumRows();
551 for(UINT j=0; j<numInputDimensions; j++){
552 for(UINT i=0; i<timeseriesLength; i++){
553 timeseries[i][j] =
scale(timeseries[i][j], ranges[j].minValue, ranges[j].maxValue, 0, 1);
558 if( classLikelihoods.size() != numClasses ) classLikelihoods.
resize(numClasses,0);
559 if( classDistances.size() != numClasses ) classDistances.
resize(numClasses,0);
561 std::fill(classLikelihoods.begin(),classLikelihoods.end(),0);
562 std::fill(classDistances.begin(),classDistances.end(),0);
564 bestDistance = -1000;
566 Float minValue = -1000;
568 const UINT numModels = (UINT)continuousModels.size();
570 for(UINT i=0; i<numModels; i++){
573 if( continuousModels[i].
predict_( timeseries ) ){
574 results[i].value = continuousModels[i].getLoglikelihood();
575 results[i].index = continuousModels[i].getClassLabel();
577 errorLog <<
"predict_(VectorFloat &inputVector) - Prediction failed for model: " << i << std::endl;
581 if( results[i].value < minValue ){
582 minValue = results[i].value;
585 if( results[i].value > bestDistance ){
586 bestDistance = results[i].value;
592 phase = continuousModels[ bestIndex ].getPhase();
595 std::sort(results.begin(),results.end(),IndexedDouble::sortIndexedDoubleByValueDescending);
598 const Float committeeWeight = 1.0 / committeeSize;
599 for(UINT i=0; i<committeeSize; i++){
606 for(UINT k=0; k<numClasses; k++){
607 classLikelihoods[k] = classDistances[k] / sum;
611 for(UINT k=0; k<numClasses; k++){
612 if( classDistances[k] > bestDistance ){
613 bestDistance = classDistances[k];
618 maxLikelihood = classLikelihoods[ bestIndex ];
619 predictedClassLabel = classLabels[ bestIndex ];
623 predictedClassLabel = 0;
635 for(
size_t i=0; i<discreteModels.size(); i++){
636 discreteModels[i].reset();
640 for(
size_t i=0; i<continuousModels.size(); i++){
641 continuousModels[i].reset();
654 discreteModels.clear();
655 continuousModels.clear();
662 std::cout <<
"HMM Model\n";
665 std::cout <<
"HmmType: " <<
hmmType << std::endl;
666 std::cout <<
"ModelType: " << modelType << std::endl;
667 std::cout <<
"Delta: " << delta << std::endl;
672 std::cout <<
"NumStates: " << numStates << std::endl;
673 std::cout <<
"NumSymbols: " << numSymbols << std::endl;
674 std::cout <<
"NumRandomTrainingIterations: " << numRandomTrainingIterations << std::endl;
675 std::cout <<
"NumDiscreteModels: " << discreteModels.size() << std::endl;
676 std::cout <<
"DiscreteModels: " << std::endl;
677 for(
size_t i=0; i<discreteModels.size(); i++){
678 if( !discreteModels[i].
print() ){
679 errorLog <<
"print() - Failed to print discrete model " << i <<
" to file!" << std::endl;
685 std::cout <<
"DownsampleFactor: " << downsampleFactor << std::endl;
686 std::cout <<
"CommitteeSize: " << committeeSize << std::endl;
687 std::cout <<
"Sigma: " << sigma << std::endl;
688 std::cout <<
"AutoEstimateSigma: " << autoEstimateSigma << std::endl;
689 std::cout <<
"NumContinuousModels: " << continuousModels.size() << std::endl;
690 std::cout <<
"ContinuousModels: " << std::endl;
691 for(
size_t i=0; i<continuousModels.size(); i++){
692 if( !continuousModels[i].
print() ){
693 errorLog <<
"print() - Failed to print continuous model " << i <<
" to file!" << std::endl;
707 errorLog <<
"save( fstream &file ) - File is not open!" << std::endl;
712 file <<
"HMM_MODEL_FILE_V2.0\n";
716 errorLog <<
"save(fstream &file) - Failed to save classifier base settings to file!" << std::endl;
721 file <<
"HmmType: " <<
hmmType << std::endl;
722 file <<
"ModelType: " << modelType << std::endl;
723 file <<
"Delta: " << delta << std::endl;
728 file <<
"NumStates: " << numStates << std::endl;
729 file <<
"NumSymbols: " << numSymbols << std::endl;
730 file <<
"NumRandomTrainingIterations: " << numRandomTrainingIterations << std::endl;
731 file <<
"NumDiscreteModels: " << discreteModels.size() << std::endl;
732 file <<
"DiscreteModels: " << std::endl;
733 for(
size_t i=0; i<discreteModels.size(); i++){
734 if( !discreteModels[i].
save( file ) ){
735 errorLog <<
"save(fstream &file) - Failed to save discrete model " << i <<
" to file!" << std::endl;
741 file <<
"DownsampleFactor: " << downsampleFactor << std::endl;
742 file <<
"CommitteeSize: " << committeeSize << std::endl;
743 file <<
"Sigma: " << sigma << std::endl;
744 file <<
"NumContinuousModels: " << continuousModels.size() << std::endl;
745 file <<
"ContinuousModels: " << std::endl;
746 for(
size_t i=0; i<continuousModels.size(); i++){
747 if( !continuousModels[i].
save( file ) ){
748 errorLog <<
"save(fstream &file) - Failed to save continuous model " << i <<
" to file!" << std::endl;
764 errorLog <<
"load( fstream &file ) - File is not open!" << std::endl;
774 if(word !=
"HMM_MODEL_FILE_V2.0"){
775 errorLog <<
"load( fstream &file ) - Could not find Model File Header!" << std::endl;
781 errorLog <<
"load(string filename) - Failed to load base settings from file!" << std::endl;
787 if(word !=
"HmmType:"){
788 errorLog <<
"load( fstream &file ) - Could not find HmmType." << std::endl;
794 if(word !=
"ModelType:"){
795 errorLog <<
"load( fstream &file ) - Could not find ModelType." << std::endl;
801 if(word !=
"Delta:"){
802 errorLog <<
"load( fstream &file ) - Could not find Delta." << std::endl;
812 if(word !=
"NumStates:"){
813 errorLog <<
"load( fstream &file ) - Could not find NumStates." << std::endl;
819 if(word !=
"NumSymbols:"){
820 errorLog <<
"load( fstream &file ) - Could not find NumSymbols." << std::endl;
826 if(word !=
"NumRandomTrainingIterations:"){
827 errorLog <<
"load( fstream &file ) - Could not find NumRandomTrainingIterations." << std::endl;
830 file >> numRandomTrainingIterations;
833 if(word !=
"NumDiscreteModels:"){
834 errorLog <<
"load( fstream &file ) - Could not find NumDiscreteModels." << std::endl;
840 if(word !=
"DiscreteModels:"){
841 errorLog <<
"load( fstream &file ) - Could not find DiscreteModels." << std::endl;
846 discreteModels.
resize(numModels);
847 for(
size_t i=0; i<discreteModels.size(); i++){
848 if( !discreteModels[i].
load( file ) ){
849 errorLog <<
"load(fstream &file) - Failed to load discrete model " << i <<
" from file!" << std::endl;
858 if(word !=
"DownsampleFactor:"){
859 errorLog <<
"load( fstream &file ) - Could not find DownsampleFactor." << std::endl;
862 file >> downsampleFactor;
865 if(word !=
"CommitteeSize:"){
866 errorLog <<
"load( fstream &file ) - Could not find CommitteeSize." << std::endl;
869 file >> committeeSize;
872 if(word !=
"Sigma:"){
873 errorLog <<
"load( fstream &file ) - Could not find Sigma." << std::endl;
879 if(word !=
"NumContinuousModels:"){
880 errorLog <<
"load( fstream &file ) - Could not find NumContinuousModels." << std::endl;
886 if(word !=
"ContinuousModels:"){
887 errorLog <<
"load( fstream &file ) - Could not find ContinuousModels." << std::endl;
892 continuousModels.
resize(numModels);
893 for(
size_t i=0; i<continuousModels.size(); i++){
894 if( !continuousModels[i].
load( file ) ){
895 errorLog <<
"load(fstream &file) - Failed to load continuous model " << i <<
" from file!" << std::endl;
914 for(UINT j=0; j<timeseries.
getNumRows(); j++){
915 if( timeseries[j][0] >= numSymbols ){
916 errorLog <<
"train(TimeSeriesClassificationData &trainingData) - Found an observation sequence with a value outside of the symbol range! Value: " << timeseries[j][0] << std::endl;
919 observationSequences[i][j] = (UINT)timeseries[j][0];
947 return numRandomTrainingIterations;
951 return discreteModels;
955 return continuousModels;
962 if( hmmType == HMM_DISCRETE || hmmType == HMM_CONTINUOUS ){
967 warningLog <<
"setHMMType(const UINT hmmType) - Unknown HMM type!" << std::endl;
975 if( modelType == HMM_ERGODIC || modelType == HMM_LEFTRIGHT ){
976 this->modelType = modelType;
980 warningLog <<
"setModelType(const UINT modelType) - Unknown model type!" << std::endl;
993 warningLog <<
"setDelta(const UINT delta) - Delta must be greater than zero!" << std::endl;
1000 if( downsampleFactor > 0 ){
1001 this->downsampleFactor = downsampleFactor;
1009 if( committeeSize > 0 ){
1010 this->committeeSize = committeeSize;
1021 if( numStates > 0 ){
1022 this->numStates = numStates;
1026 warningLog <<
"setNumStates(const UINT numStates) - Num states must be greater than zero!" << std::endl;
1034 if( numSymbols > 0 ){
1035 this->numSymbols = numSymbols;
1039 warningLog <<
"setNumSymbols(const UINT numSymbols) - Num symbols must be greater than zero!" << std::endl;
1047 if( numRandomTrainingIterations > 0 ){
1048 this->numRandomTrainingIterations = numRandomTrainingIterations;
1052 warningLog <<
"setMaxNumIterations(const UINT maxNumIter) - The number of random training iterations must be greater than zero!" << std::endl;
1058 this->sigma = sigma;
1059 for(
size_t i=0; i<continuousModels.size(); i++){
1060 continuousModels[i].setSigma( sigma );
1067 bool HMM::setAutoEstimateSigma(
const bool autoEstimateSigma){
1071 this->autoEstimateSigma = autoEstimateSigma;
bool saveBaseSettingsToFile(std::fstream &file) const
bool setHMMType(const UINT hmmType)
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
bool setDownsampleFactor(const UINT downsampleFactor)
static Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
std::string getClassifierType() const
virtual bool predict_(VectorFloat &inputVector)
virtual bool resize(const unsigned int size)
virtual bool deepCopyFrom(const Classifier *classifier)
bool setNumRandomTrainingIterations(const UINT numRandomTrainingIterations)
virtual bool load(std::fstream &file)
UINT getClassLabelIndexValue(UINT classLabel) const
bool setNumSymbols(const UINT numStates)
virtual bool train(ClassificationData trainingData)
Vector< DiscreteHiddenMarkovModel > getDiscreteModels() const
Vector< MinMax > getRanges() const
bool setModelType(const UINT modelType)
UINT getModelType() const
virtual bool print() const
Vector< ClassTracker > getClassTracker() const
HMM & operator=(const HMM &rhs)
UINT hmmType
Controls if this is a HMM_DISCRETE or a HMM_CONTINUOUS.
UINT getNumStates() const
bool setSigma(const Float sigma)
virtual bool train_(TimeSeriesClassificationData &trainingData)
HMM(const UINT hmmType=HMM_CONTINUOUS, const UINT modelType=HMM_LEFTRIGHT, const UINT delta=1, const bool useScaling=false, const bool useNullRejection=false)
bool copyBaseVariables(const Classifier *classifier)
bool loadBaseSettingsFromFile(std::fstream &file)
UINT getNumSymbols() const
unsigned int getNumRows() const
unsigned int getNumCols() const
bool setDelta(const UINT delta)
bool setCommitteeSize(const UINT committeeSize)
virtual bool save(std::fstream &file) const
bool scale(const Float minTarget, const Float maxTarget)
TimeSeriesClassificationData getClassData(const UINT classLabel) const
UINT getNumDimensions() const
This class acts as the main interface for using a Hidden Markov Model.
UINT getNumClasses() const
UINT getNumRandomTrainingIterations() const
bool setNumStates(const UINT numStates)
UINT getNumSamples() const
static Float sum(const VectorFloat &x)
Vector< ContinuousHiddenMarkovModel > getContinuousModels() const