api/0.1.0/_k_means_features_8cpp_source.html

 /*

  GRT MIT License

  Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>


  Permission is hereby granted, free of charge, to any person obtaining a copy of this software

  and associated documentation files (the "Software"), to deal in the Software without restriction,

  including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,

  and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,

  subject to the following conditions:


  The above copyright notice and this permission notice shall be included in all copies or substantial

  portions of the Software.


  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT

  LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,

  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE

  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

  */


 #include "KMeansFeatures.h"


 GRT_BEGIN_NAMESPACE


 //Register your module with the FeatureExtraction base class

 RegisterFeatureExtractionModule< KMeansFeatures > KMeansFeatures::registerModule("KMeansFeatures");


 KMeansFeatures::KMeansFeatures(const Vector< UINT > numClustersPerLayer,const Float alpha,const bool useScaling){


     classType = "KMeansFeatures";

     featureExtractionType = classType;


     debugLog.setProceedingText("[DEBUG KMeansFeatures]");

     errorLog.setProceedingText("[ERROR KMeansFeatures]");

     warningLog.setProceedingText("[WARNING KMeansFeatures]");


     this->numClustersPerLayer = numClustersPerLayer;

     this->alpha = alpha;

     this->useScaling = useScaling;


     if( numClustersPerLayer.size() > 0 ){

         init( numClustersPerLayer );

     }

 }


 KMeansFeatures::KMeansFeatures(const KMeansFeatures &rhs){


     classType = "KMeansFeatures";

     featureExtractionType = classType;


     debugLog.setProceedingText("[DEBUG KMeansFeatures]");

     errorLog.setProceedingText("[ERROR KMeansFeatures]");

     warningLog.setProceedingText("[WARNING KMeansFeatures]");


     //Invoke the equals operator to copy the data from the rhs instance to this instance

     *this = rhs;

 }


 KMeansFeatures::~KMeansFeatures(){

     //Here you should add any specific code to cleanup your custom feature extraction module if needed

 }


 KMeansFeatures& KMeansFeatures::operator=(const KMeansFeatures &rhs){

     if(this!=&rhs){

         //Here you should copy any class variables from the rhs instance to this instance

         this->numClustersPerLayer = rhs.numClustersPerLayer;


         //Copy the base variables

         copyBaseVariables( (FeatureExtraction*)&rhs );

     }

     return *this;

 }


 bool KMeansFeatures::deepCopyFrom(const FeatureExtraction *featureExtraction){


     if( featureExtraction == NULL ) return false;


     if( this->getFeatureExtractionType() == featureExtraction->getFeatureExtractionType() ){


         //Cast the feature extraction pointer to a pointer to your custom feature extraction module

         //Then invoke the equals operator

         *this = *(KMeansFeatures*)featureExtraction;


         return true;

     }


     errorLog << "clone(FeatureExtraction *featureExtraction) -  FeatureExtraction Types Do Not Match!" << std::endl;


     return false;

 }


 bool KMeansFeatures::computeFeatures(const VectorFloat &inputVector){


     VectorFloat data( numInputDimensions );


     //Scale the input data if needed, if not just copy it

     if( useScaling ){

         for(UINT j=0; j<numInputDimensions; j++){

             data[j] = scale(inputVector[j],ranges[j].minValue,ranges[j].maxValue,0,1);

         }

     }else{

         for(UINT j=0; j<numInputDimensions; j++){

             data[j] = inputVector[j];

         }

     }


     const UINT numLayers = getNumLayers();

     for(UINT layer=0; layer<numLayers; layer++){

         if( !projectDataThroughLayer(data, featureVector, layer) ){

             errorLog << "computeFeatures(const VectorFloat &inputVector) - Failed to project data through layer: " << layer << std::endl;

             return false;

         }


         //The output of the current layer will become the input to the next layer unless it is the last layer

         if( layer+1 < numLayers ){

             data = featureVector;

         }

     }


     return true;

 }


 bool KMeansFeatures::reset(){

     return true;

 }


 bool KMeansFeatures::saveModelToFile( std::string filename ) const{


     std::fstream file;

     file.open(filename.c_str(), std::ios::out);


     if( !saveModelToFile( file ) ){

         return false;

     }


     file.close();


     return true;

 }


 bool KMeansFeatures::loadModelFromFile( std::string filename ){


     std::fstream file;

     file.open(filename.c_str(), std::ios::in);


     if( !loadModelFromFile( file ) ){

         return false;

     }


     //Close the file

     file.close();


     return true;

 }


 bool KMeansFeatures::saveModelToFile( std::fstream &file ) const{


     if( !file.is_open() ){

         errorLog << "saveModelToFile(fstream &file) - The file is not open!" << std::endl;

         return false;

     }


     //First, you should add a header (with no spaces) e.g.

     file << "KMEANS_FEATURES_FILE_V1.0" << std::endl;


     //Second, you should save the base feature extraction settings to the file

     if( !saveFeatureExtractionSettingsToFile( file ) ){

         errorLog << "saveFeatureExtractionSettingsToFile(fstream &file) - Failed to save base feature extraction settings to file!" << std::endl;

         return false;

     }


     file << "NumLayers: " << getNumLayers() << std::endl;

     file << "NumClustersPerLayer: ";

     for(UINT i=0; i<numClustersPerLayer.getSize(); i++){

         file << " " << numClustersPerLayer[i];

     }

     file << std::endl;


     file << "Alpha: " << alpha << std::endl;


     if( trained ){

         file << "Ranges: ";

         for(UINT i=0; i<ranges.getSize(); i++){

             file << ranges[i].minValue << " " << ranges[i].maxValue << " ";

         }

         file << std::endl;


         file << "Clusters: " << std::endl;

         for(UINT k=0; k<clusters.getSize(); k++){

             file << "NumRows: " << clusters[k].getNumRows() << std::endl;

             file << "NumCols: " << clusters[k].getNumCols() << std::endl;

             for(UINT i=0; i<clusters[k].getNumRows(); i++){

                 for(UINT j=0; j<clusters[k].getNumCols(); j++){

                     file << clusters[k][i][j];

                     if( j+1 < clusters[k].getNumCols() )

                         file << "\t";

                 }

                 file << std::endl;

             }

         }

     }


     return true;

 }


 bool KMeansFeatures::loadModelFromFile( std::fstream &file ){


     clear();


     if( !file.is_open() ){

         errorLog << "loadModelFromFile(fstream &file) - The file is not open!" << std::endl;

         return false;

     }


     std::string word;

     UINT numLayers = 0;

     UINT numRows = 0;

     UINT numCols = 0;


     //First, you should read and validate the header

     file >> word;

     if( word != "KMEANS_FEATURES_FILE_V1.0" ){

         errorLog << "loadModelFromFile(fstream &file) - Invalid file format!" << std::endl;

         return false;

     }


     //Second, you should load the base feature extraction settings to the file

     if( !loadFeatureExtractionSettingsFromFile( file ) ){

         errorLog << "loadFeatureExtractionSettingsFromFile(fstream &file) - Failed to load base feature extraction settings from file!" << std::endl;

         return false;

     }


     //Load the number of layers

     file >> word;

     if( word != "NumLayers:" ){

         errorLog << "loadModelFromFile(fstream &file) - Failed to read NumLayers header!" << std::endl;

         return false;

     }

     file >> numLayers;

     numClustersPerLayer.resize( numLayers );


     //Load the number clusters per layer

     file >> word;

     if( word != "NumClustersPerLayer:" ){

         errorLog << "loadModelFromFile(fstream &file) - Failed to read NumClustersPerLayer header!" << std::endl;

         return false;

     }

     for(UINT i=0; i<numClustersPerLayer.getSize(); i++){

         file >> numClustersPerLayer[i];

     }


     //Load the alpha parameter

     file >> word;

     if( word != "Alpha:" ){

         errorLog << "loadModelFromFile(fstream &file) - Failed to read Alpha header!" << std::endl;

         return false;

     }

     file >> alpha;


     //If the model has been trained then load it

     if( trained ){


         //Load the Ranges

         file >> word;

         if( word != "Ranges:" ){

             errorLog << "loadModelFromFile(fstream &file) - Failed to read Ranges header!" << std::endl;

             return false;

         }

         ranges.resize(numInputDimensions);

         for(UINT i=0; i<ranges.size(); i++){

             file >> ranges[i].minValue;

             file >> ranges[i].maxValue;

         }


         //Load the Clusters

         file >> word;

         if( word != "Clusters:" ){

             errorLog << "loadModelFromFile(fstream &file) - Failed to read Clusters header!" << std::endl;

             return false;

         }

         clusters.resize( numLayers );


         for(UINT k=0; k<clusters.size(); k++){


             //Load the NumRows

             file >> word;

             if( word != "NumRows:" ){

                 errorLog << "loadModelFromFile(fstream &file) - Failed to read NumRows header!" << std::endl;

                 return false;

             }

             file >> numRows;


             //Load the NumCols

             file >> word;

             if( word != "NumCols:" ){

                 errorLog << "loadModelFromFile(fstream &file) - Failed to read NumCols header!" << std::endl;

                 return false;

             }

             file >> numCols;


             clusters[k].resize(numRows, numCols);

             for(UINT i=0; i<clusters[k].getNumRows(); i++){

                 for(UINT j=0; j<clusters[k].getNumCols(); j++){

                     file >> clusters[k][i][j];

                 }

             }

         }

     }


     return true;

 }


 bool KMeansFeatures::init( const Vector< UINT > numClustersPerLayer ){


     clear();


     if( numClustersPerLayer.size() == 0 ) return false;


     this->numClustersPerLayer = numClustersPerLayer;

     numInputDimensions = 0; //This will be 0 until the KMeansFeatures has been trained

     numOutputDimensions = 0; //This will be 0 until the KMeansFeatures has been trained


     //Flag that the feature extraction has been initialized but not trained

     initialized = true;

     trained = false;


     return true;

 }


 bool KMeansFeatures::train_(ClassificationData &trainingData){

     MatrixFloat data = trainingData.getDataAsMatrixFloat();

     return train_( data );

 }


 bool KMeansFeatures::train_(TimeSeriesClassificationData &trainingData){

     MatrixFloat data = trainingData.getDataAsMatrixFloat();

     return train_( data );

 }


 bool KMeansFeatures::train_(ClassificationDataStream &trainingData){

     MatrixFloat data = trainingData.getDataAsMatrixFloat();

     return train_( data );

 }


 bool KMeansFeatures::train_(UnlabelledData &trainingData){

  MatrixFloat data = trainingData.getDataAsMatrixFloat();

     return train_( data );

 }


 bool KMeansFeatures::train_(MatrixFloat &trainingData){


     if( !initialized ){

         errorLog << "train_(MatrixFloat &trainingData) - The quantizer has not been initialized!" << std::endl;

         return false;

     }


     //Reset any previous model

     featureDataReady = false;


     const UINT M = trainingData.getNumRows();

     const UINT N = trainingData.getNumCols();


     numInputDimensions = N;

     numOutputDimensions = numClustersPerLayer[ numClustersPerLayer.getSize()-1 ];


     //Scale the input data if needed

     ranges = trainingData.getRanges();

     if( useScaling ){

         for(UINT i=0; i<M; i++){

             for(UINT j=0; j<N; j++){

                 trainingData[i][j] = grt_scale(trainingData[i][j],ranges[j].minValue,ranges[j].maxValue,0.0,1.0);

             }

         }

     }


     //Train the KMeans model at each layer

     const UINT K = numClustersPerLayer.getSize();

     for(UINT k=0; k<K; k++){

         KMeans kmeans;

         kmeans.setNumClusters( numClustersPerLayer[k] );

         kmeans.setComputeTheta( true );

         kmeans.setMinChange( minChange );

         kmeans.setMinNumEpochs( minNumEpochs );

         kmeans.setMaxNumEpochs( maxNumEpochs );


         trainingLog << "Layer " << k+1 << "/" << K << " NumClusters: " << numClustersPerLayer[k] << std::endl;

         if( !kmeans.train_( trainingData ) ){

             errorLog << "train_(MatrixFloat &trainingData) - Failed to train kmeans model at layer: " << k << std::endl;

             return false;

         }


         //Save the clusters

         clusters.push_back( kmeans.getClusters() );


         //Project the data through the current layer to use as training data for the next layer

         if( k+1 != K ){

             MatrixFloat data( M, numClustersPerLayer[k] );

             VectorFloat input( trainingData.getNumCols() );

             VectorFloat output( data.getNumCols() );


             for(UINT i=0; i<M; i++){


                 //Copy the data into the sample

                 for(UINT j=0; j<input.getSize(); j++){

                     input[j] = trainingData[i][j];

                 }


                 //Project the sample through the current layer

                 if( !projectDataThroughLayer( input, output, k ) ){

                     errorLog << "train_(MatrixFloat &trainingData) - Failed to project sample through layer: " << k << std::endl;

                     return false;

                 }


                 //Copy the result into the training data for the next layer

                 for(UINT j=0; j<output.getSize(); j++){

                     data[i][j] = output[j];

                 }

             }


             //Swap the data for the next layer

             trainingData = data;

         }


     }


     //Flag that the kmeans model has been trained

     trained = true;

     featureVector.resize( numOutputDimensions, 0 );


     return true;

 }


 bool KMeansFeatures::projectDataThroughLayer( const VectorFloat &input, VectorFloat &output, const UINT layer ){


     if( layer >= clusters.getSize() ){

         errorLog << "projectDataThroughLayer(...) - Layer out of bounds! It should be less than: " << clusters.getSize() << std::endl;

         return false;

     }


     const UINT M = clusters[ layer ].getNumRows();

     const UINT N = clusters[ layer ].getNumCols();


     if( input.getSize() != N ){

         errorLog << "projectDataThroughLayer(...) - The size of the input Vector (" << input.getSize() << ") does not match the size: " << N << std::endl;

         return false;

     }


     //Make sure the output Vector size is OK

     if( output.getSize() != M ){

         output.resize( M );

     }


     UINT i,j = 0;

     //Float gamma = 2.0*SQR(alpha);

     //Float gamma = 2.0*SQR( 1 );

     for(i=0; i<M; i++){

         output[i] = 0;

         for(j=0; j<N; j++){

             output[i] += grt_sqr( input[j] - clusters[layer][i][j] );

             //output[i] += input[j] * clusters[layer][i][j];

         }

         //cout << "i: " << i << " sum: " << output[i] << " output: " << 1.0/(1.0+exp(-output[i])) << std::endl;

         //cout << "i: " << i << " sum: " << output[i] << " output: " << exp( -output[i] / gamma ) << std::endl;

         //output[i] = exp( -output[i] / gamma );

         //output[i] = 1.0/(1.0+exp(-output[i]));

         output[i] = grt_sqrt( output[i] ); //L2 Norm


     }


     return true;

 }


 UINT KMeansFeatures::getNumLayers() const{

     return numClustersPerLayer.getSize();

 }


 UINT KMeansFeatures::getLayerSize(const UINT layerIndex) const{

     if( layerIndex >= numClustersPerLayer.getSize() ){

         warningLog << "LayerIndex is out of bounds. It must be less than the number of layers: " << numClustersPerLayer.getSize() << std::endl;

         return 0;

     }

     return numClustersPerLayer[layerIndex];

 }


 Vector< MatrixFloat > KMeansFeatures::getClusters() const{

     return clusters;

 }


 GRT_END_NAMESPACE

KMeansFeatures::KMeansFeatures
KMeansFeatures(const Vector< UINT > numClustersPerLayer=Vector< UINT >(1, 100), const Float alpha=0.2, const bool useScaling=true)
Definition: KMeansFeatures.cpp:28

MLBase::scale
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
Definition: MLBase.h:339

MatrixFloat
Definition: MatrixFloat.h:36

KMeansFeatures::deepCopyFrom
virtual bool deepCopyFrom(const FeatureExtraction *featureExtraction)
Definition: KMeansFeatures.cpp:74

FeatureExtraction::init
bool init()
Definition: FeatureExtraction.cpp:87

FeatureExtraction::saveFeatureExtractionSettingsToFile
bool saveFeatureExtractionSettingsToFile(std::fstream &file) const
Definition: FeatureExtraction.cpp:119

ClassificationDataStream
Definition: ClassificationDataStream.h:42

UnlabelledData::getDataAsMatrixFloat
MatrixFloat getDataAsMatrixFloat() const
Definition: UnlabelledData.cpp:657

Vector::resize
virtual bool resize(const unsigned int size)
Definition: Vector.h:133

KMeans::train_
virtual bool train_(MatrixFloat &data)
Definition: KMeans.cpp:162

KMeansFeatures::train_
virtual bool train_(ClassificationData &trainingData)
Definition: KMeansFeatures.cpp:330

KMeansFeatures::computeFeatures
virtual bool computeFeatures(const VectorFloat &inputVector)
Definition: KMeansFeatures.cpp:92

MLBase::setMinChange
bool setMinChange(const Float minChange)
Definition: MLBase.cpp:282

UnlabelledData
Definition: UnlabelledData.h:38

Vector::getSize
unsigned int getSize() const
Definition: Vector.h:193

FeatureExtraction::getFeatureExtractionType
std::string getFeatureExtractionType() const
Definition: FeatureExtraction.cpp:164

KMeansFeatures.h

TimeSeriesClassificationData::getDataAsMatrixFloat
MatrixFloat getDataAsMatrixFloat() const
Definition: TimeSeriesClassificationData.cpp:1062

KMeansFeatures::operator=
KMeansFeatures & operator=(const KMeansFeatures &rhs)
Definition: KMeansFeatures.cpp:63

RegisterFeatureExtractionModule< KMeansFeatures >

ClassificationData
Definition: ClassificationData.h:43

Matrix::getNumRows
unsigned int getNumRows() const
Definition: Matrix.h:542

ClassificationDataStream::getDataAsMatrixFloat
MatrixFloat getDataAsMatrixFloat() const
Definition: ClassificationDataStream.cpp:919

Matrix::getNumCols
unsigned int getNumCols() const
Definition: Matrix.h:549

KMeansFeatures
Definition: KMeansFeatures.h:41

FeatureExtraction
Definition: FeatureExtraction.h:38

MLBase::setMinNumEpochs
bool setMinNumEpochs(const UINT minNumEpochs)
Definition: MLBase.cpp:277

KMeansFeatures::loadModelFromFile
virtual bool loadModelFromFile(std::string filename)
Definition: KMeansFeatures.cpp:141

KMeansFeatures::~KMeansFeatures
virtual ~KMeansFeatures()
Definition: KMeansFeatures.cpp:59

VectorFloat
Definition: VectorFloat.h:33

FeatureExtraction::loadFeatureExtractionSettingsFromFile
bool loadFeatureExtractionSettingsFromFile(std::fstream &file)
Definition: FeatureExtraction.cpp:133

MatrixFloat::getRanges
Vector< MinMax > getRanges() const
Definition: MatrixFloat.cpp:491

KMeans
Definition: KMeans.h:41

ClassificationData::getDataAsMatrixFloat
MatrixFloat getDataAsMatrixFloat() const
Definition: ClassificationData.cpp:1476

TimeSeriesClassificationData
Definition: TimeSeriesClassificationData.h:42

FeatureExtraction::copyBaseVariables
bool copyBaseVariables(const FeatureExtraction *featureExtractionModule)
Definition: FeatureExtraction.cpp:62

Vector< UINT >

MLBase::setMaxNumEpochs
bool setMaxNumEpochs(const UINT maxNumEpochs)
Definition: MLBase.cpp:268

KMeansFeatures::reset
virtual bool reset()
Definition: KMeansFeatures.cpp:123

Clusterer::setNumClusters
bool setNumClusters(const UINT numClusters)
Definition: Clusterer.cpp:265

KMeansFeatures::saveModelToFile
virtual bool saveModelToFile(std::string filename) const
Definition: KMeansFeatures.cpp:127

FeatureExtraction::clear
virtual bool clear()
Definition: FeatureExtraction.cpp:107