api/0.2.0/_k_means_features_8cpp_source.html

 /*

  GRT MIT License

  Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>


  Permission is hereby granted, free of charge, to any person obtaining a copy of this software

  and associated documentation files (the "Software"), to deal in the Software without restriction,

  including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,

  and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,

  subject to the following conditions:


  The above copyright notice and this permission notice shall be included in all copies or substantial

  portions of the Software.


  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT

  LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,

  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE

  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

  */


 #define GRT_DLL_EXPORTS

 #include "KMeansFeatures.h"


 GRT_BEGIN_NAMESPACE


 //Register your module with the FeatureExtraction base class

 RegisterFeatureExtractionModule< KMeansFeatures > KMeansFeatures::registerModule("KMeansFeatures");


 KMeansFeatures::KMeansFeatures(const Vector< UINT > numClustersPerLayer,const Float alpha,const bool useScaling){


  classType = "KMeansFeatures";

  featureExtractionType = classType;


  debugLog.setProceedingText("[DEBUG KMeansFeatures]");

  errorLog.setProceedingText("[ERROR KMeansFeatures]");

  warningLog.setProceedingText("[WARNING KMeansFeatures]");


  this->numClustersPerLayer = numClustersPerLayer;

  this->alpha = alpha;

  this->useScaling = useScaling;


  if( numClustersPerLayer.size() > 0 ){

  init( numClustersPerLayer );

  }

 }


 KMeansFeatures::KMeansFeatures(const KMeansFeatures &rhs){


  classType = "KMeansFeatures";

  featureExtractionType = classType;


  debugLog.setProceedingText("[DEBUG KMeansFeatures]");

  errorLog.setProceedingText("[ERROR KMeansFeatures]");

  warningLog.setProceedingText("[WARNING KMeansFeatures]");


  //Invoke the equals operator to copy the data from the rhs instance to this instance

  *this = rhs;

 }


 KMeansFeatures::~KMeansFeatures(){

  //Here you should add any specific code to cleanup your custom feature extraction module if needed

 }


 KMeansFeatures& KMeansFeatures::operator=(const KMeansFeatures &rhs){

  if(this!=&rhs){

  //Here you should copy any class variables from the rhs instance to this instance

  this->numClustersPerLayer = rhs.numClustersPerLayer;


  //Copy the base variables

  copyBaseVariables( (FeatureExtraction*)&rhs );

  }

  return *this;

 }


 bool KMeansFeatures::deepCopyFrom(const FeatureExtraction *featureExtraction){


  if( featureExtraction == NULL ) return false;


  if( this->getFeatureExtractionType() == featureExtraction->getFeatureExtractionType() ){


  //Cast the feature extraction pointer to a pointer to your custom feature extraction module

  //Then invoke the equals operator

  *this = *(KMeansFeatures*)featureExtraction;


  return true;

  }


  errorLog << "clone(FeatureExtraction *featureExtraction) - FeatureExtraction Types Do Not Match!" << std::endl;


  return false;

 }


 bool KMeansFeatures::computeFeatures(const VectorFloat &inputVector){


  VectorFloat data( numInputDimensions );


  //Scale the input data if needed, if not just copy it

  if( useScaling ){

  for(UINT j=0; j<numInputDimensions; j++){

  data[j] = scale(inputVector[j],ranges[j].minValue,ranges[j].maxValue,0,1);

  }

  }else{

  for(UINT j=0; j<numInputDimensions; j++){

  data[j] = inputVector[j];

  }

  }


  const UINT numLayers = getNumLayers();

  for(UINT layer=0; layer<numLayers; layer++){

  if( !projectDataThroughLayer(data, featureVector, layer) ){

  errorLog << "computeFeatures(const VectorFloat &inputVector) - Failed to project data through layer: " << layer << std::endl;

  return false;

  }


  //The output of the current layer will become the input to the next layer unless it is the last layer

  if( layer+1 < numLayers ){

  data = featureVector;

  }

  }


  return true;

 }


 bool KMeansFeatures::reset(){

  return true;

 }


 bool KMeansFeatures::saveModelToFile( std::string filename ) const{


  std::fstream file;

  file.open(filename.c_str(), std::ios::out);


  if( !saveModelToFile( file ) ){

  return false;

  }


  file.close();


  return true;

 }


 bool KMeansFeatures::loadModelFromFile( std::string filename ){


  std::fstream file;

  file.open(filename.c_str(), std::ios::in);


  if( !loadModelFromFile( file ) ){

  return false;

  }


  //Close the file

  file.close();


  return true;

 }


 bool KMeansFeatures::saveModelToFile( std::fstream &file ) const{


  if( !file.is_open() ){

  errorLog << "saveModelToFile(fstream &file) - The file is not open!" << std::endl;

  return false;

  }


  //First, you should add a header (with no spaces) e.g.

  file << "KMEANS_FEATURES_FILE_V1.0" << std::endl;


  //Second, you should save the base feature extraction settings to the file

  if( !saveFeatureExtractionSettingsToFile( file ) ){

  errorLog << "saveFeatureExtractionSettingsToFile(fstream &file) - Failed to save base feature extraction settings to file!" << std::endl;

  return false;

  }


  file << "NumLayers: " << getNumLayers() << std::endl;

  file << "NumClustersPerLayer: ";

  for(UINT i=0; i<numClustersPerLayer.getSize(); i++){

  file << " " << numClustersPerLayer[i];

  }

  file << std::endl;


  file << "Alpha: " << alpha << std::endl;


  if( trained ){

  file << "Ranges: ";

  for(UINT i=0; i<ranges.getSize(); i++){

  file << ranges[i].minValue << " " << ranges[i].maxValue << " ";

  }

  file << std::endl;


  file << "Clusters: " << std::endl;

  for(UINT k=0; k<clusters.getSize(); k++){

  file << "NumRows: " << clusters[k].getNumRows() << std::endl;

  file << "NumCols: " << clusters[k].getNumCols() << std::endl;

  for(UINT i=0; i<clusters[k].getNumRows(); i++){

  for(UINT j=0; j<clusters[k].getNumCols(); j++){

  file << clusters[k][i][j];

  if( j+1 < clusters[k].getNumCols() )

  file << "\t";

  }

  file << std::endl;

  }

  }

  }


  return true;

 }


 bool KMeansFeatures::loadModelFromFile( std::fstream &file ){


  clear();


  if( !file.is_open() ){

  errorLog << "loadModelFromFile(fstream &file) - The file is not open!" << std::endl;

  return false;

  }


  std::string word;

  UINT numLayers = 0;

  UINT numRows = 0;

  UINT numCols = 0;


  //First, you should read and validate the header

  file >> word;

  if( word != "KMEANS_FEATURES_FILE_V1.0" ){

  errorLog << "loadModelFromFile(fstream &file) - Invalid file format!" << std::endl;

  return false;

  }


  //Second, you should load the base feature extraction settings to the file

  if( !loadFeatureExtractionSettingsFromFile( file ) ){

  errorLog << "loadFeatureExtractionSettingsFromFile(fstream &file) - Failed to load base feature extraction settings from file!" << std::endl;

  return false;

  }


  //Load the number of layers

  file >> word;

  if( word != "NumLayers:" ){

  errorLog << "loadModelFromFile(fstream &file) - Failed to read NumLayers header!" << std::endl;

  return false;

  }

  file >> numLayers;

  numClustersPerLayer.resize( numLayers );


  //Load the number clusters per layer

  file >> word;

  if( word != "NumClustersPerLayer:" ){

  errorLog << "loadModelFromFile(fstream &file) - Failed to read NumClustersPerLayer header!" << std::endl;

  return false;

  }

  for(UINT i=0; i<numClustersPerLayer.getSize(); i++){

  file >> numClustersPerLayer[i];

  }


  //Load the alpha parameter

  file >> word;

  if( word != "Alpha:" ){

  errorLog << "loadModelFromFile(fstream &file) - Failed to read Alpha header!" << std::endl;

  return false;

  }

  file >> alpha;


  //If the model has been trained then load it

  if( trained ){


  //Load the Ranges

  file >> word;

  if( word != "Ranges:" ){

  errorLog << "loadModelFromFile(fstream &file) - Failed to read Ranges header!" << std::endl;

  return false;

  }

  ranges.resize(numInputDimensions);

  for(UINT i=0; i<ranges.size(); i++){

  file >> ranges[i].minValue;

  file >> ranges[i].maxValue;

  }


  //Load the Clusters

  file >> word;

  if( word != "Clusters:" ){

  errorLog << "loadModelFromFile(fstream &file) - Failed to read Clusters header!" << std::endl;

  return false;

  }

  clusters.resize( numLayers );


  for(UINT k=0; k<clusters.size(); k++){


  //Load the NumRows

  file >> word;

  if( word != "NumRows:" ){

  errorLog << "loadModelFromFile(fstream &file) - Failed to read NumRows header!" << std::endl;

  return false;

  }

  file >> numRows;


  //Load the NumCols

  file >> word;

  if( word != "NumCols:" ){

  errorLog << "loadModelFromFile(fstream &file) - Failed to read NumCols header!" << std::endl;

  return false;

  }

  file >> numCols;


  clusters[k].resize(numRows, numCols);

  for(UINT i=0; i<clusters[k].getNumRows(); i++){

  for(UINT j=0; j<clusters[k].getNumCols(); j++){

  file >> clusters[k][i][j];

  }

  }

  }

  }


  return true;

 }


 bool KMeansFeatures::init( const Vector< UINT > numClustersPerLayer ){


  clear();


  if( numClustersPerLayer.size() == 0 ) return false;


  this->numClustersPerLayer = numClustersPerLayer;

  numInputDimensions = 0; //This will be 0 until the KMeansFeatures has been trained

  numOutputDimensions = 0; //This will be 0 until the KMeansFeatures has been trained


  //Flag that the feature extraction has been initialized but not trained

  initialized = true;

  trained = false;


  return true;

 }


 bool KMeansFeatures::train_(ClassificationData &trainingData){

  MatrixFloat data = trainingData.getDataAsMatrixFloat();

  return train_( data );

 }


 bool KMeansFeatures::train_(TimeSeriesClassificationData &trainingData){

  MatrixFloat data = trainingData.getDataAsMatrixFloat();

  return train_( data );

 }


 bool KMeansFeatures::train_(ClassificationDataStream &trainingData){

  MatrixFloat data = trainingData.getDataAsMatrixFloat();

  return train_( data );

 }


 bool KMeansFeatures::train_(UnlabelledData &trainingData){

  MatrixFloat data = trainingData.getDataAsMatrixFloat();

  return train_( data );

 }


 bool KMeansFeatures::train_(MatrixFloat &trainingData){


  if( !initialized ){

  errorLog << "train_(MatrixFloat &trainingData) - The quantizer has not been initialized!" << std::endl;

  return false;

  }


  //Reset any previous model

  featureDataReady = false;


  const UINT M = trainingData.getNumRows();

  const UINT N = trainingData.getNumCols();


  numInputDimensions = N;

  numOutputDimensions = numClustersPerLayer[ numClustersPerLayer.getSize()-1 ];


  //Scale the input data if needed

  ranges = trainingData.getRanges();

  if( useScaling ){

  for(UINT i=0; i<M; i++){

  for(UINT j=0; j<N; j++){

  trainingData[i][j] = grt_scale(trainingData[i][j],ranges[j].minValue,ranges[j].maxValue,0.0,1.0);

  }

  }

  }


  //Train the KMeans model at each layer

  const UINT K = numClustersPerLayer.getSize();

  for(UINT k=0; k<K; k++){

  KMeans kmeans;

  kmeans.setNumClusters( numClustersPerLayer[k] );

  kmeans.setComputeTheta( true );

  kmeans.setMinChange( minChange );

  kmeans.setMinNumEpochs( minNumEpochs );

  kmeans.setMaxNumEpochs( maxNumEpochs );


  trainingLog << "Layer " << k+1 << "/" << K << " NumClusters: " << numClustersPerLayer[k] << std::endl;

  if( !kmeans.train_( trainingData ) ){

  errorLog << "train_(MatrixFloat &trainingData) - Failed to train kmeans model at layer: " << k << std::endl;

  return false;

  }


  //Save the clusters

  clusters.push_back( kmeans.getClusters() );


  //Project the data through the current layer to use as training data for the next layer

  if( k+1 != K ){

  MatrixFloat data( M, numClustersPerLayer[k] );

  VectorFloat input( trainingData.getNumCols() );

  VectorFloat output( data.getNumCols() );


  for(UINT i=0; i<M; i++){


  //Copy the data into the sample

  for(UINT j=0; j<input.getSize(); j++){

  input[j] = trainingData[i][j];

  }


  //Project the sample through the current layer

  if( !projectDataThroughLayer( input, output, k ) ){

  errorLog << "train_(MatrixFloat &trainingData) - Failed to project sample through layer: " << k << std::endl;

  return false;

  }


  //Copy the result into the training data for the next layer

  for(UINT j=0; j<output.getSize(); j++){

  data[i][j] = output[j];

  }

  }


  //Swap the data for the next layer

  trainingData = data;

  }


  }


  //Flag that the kmeans model has been trained

  trained = true;

  featureVector.resize( numOutputDimensions, 0 );


  return true;

 }


 bool KMeansFeatures::projectDataThroughLayer( const VectorFloat &input, VectorFloat &output, const UINT layer ){


  if( layer >= clusters.getSize() ){

  errorLog << "projectDataThroughLayer(...) - Layer out of bounds! It should be less than: " << clusters.getSize() << std::endl;

  return false;

  }


  const UINT M = clusters[ layer ].getNumRows();

  const UINT N = clusters[ layer ].getNumCols();


  if( input.getSize() != N ){

  errorLog << "projectDataThroughLayer(...) - The size of the input Vector (" << input.getSize() << ") does not match the size: " << N << std::endl;

  return false;

  }


  //Make sure the output Vector size is OK

  if( output.getSize() != M ){

  output.resize( M );

  }


  UINT i,j = 0;

  //Float gamma = 2.0*SQR(alpha);

  //Float gamma = 2.0*SQR( 1 );

  for(i=0; i<M; i++){

  output[i] = 0;

  for(j=0; j<N; j++){

  output[i] += grt_sqr( input[j] - clusters[layer][i][j] );

  //output[i] += input[j] * clusters[layer][i][j];

  }

  //cout << "i: " << i << " sum: " << output[i] << " output: " << 1.0/(1.0+exp(-output[i])) << std::endl;

  //cout << "i: " << i << " sum: " << output[i] << " output: " << exp( -output[i] / gamma ) << std::endl;

  //output[i] = exp( -output[i] / gamma );

  //output[i] = 1.0/(1.0+exp(-output[i]));

  output[i] = grt_sqrt( output[i] ); //L2 Norm


  }


  return true;

 }


 UINT KMeansFeatures::getNumLayers() const{

  return numClustersPerLayer.getSize();

 }


 UINT KMeansFeatures::getLayerSize(const UINT layerIndex) const{

  if( layerIndex >= numClustersPerLayer.getSize() ){

  warningLog << "LayerIndex is out of bounds. It must be less than the number of layers: " << numClustersPerLayer.getSize() << std::endl;

  return 0;

  }

  return numClustersPerLayer[layerIndex];

 }


 Vector< MatrixFloat > KMeansFeatures::getClusters() const{

  return clusters;

 }


 GRT_END_NAMESPACE

KMeansFeatures::KMeansFeatures
KMeansFeatures(const Vector< UINT > numClustersPerLayer=Vector< UINT >(1, 100), const Float alpha=0.2, const bool useScaling=true)
Definition: KMeansFeatures.cpp:29

MLBase::scale
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
Definition: MLBase.h:353

MatrixFloat
Definition: MatrixFloat.h:36

KMeansFeatures::deepCopyFrom
virtual bool deepCopyFrom(const FeatureExtraction *featureExtraction)
Definition: KMeansFeatures.cpp:75

FeatureExtraction::init
bool init()
Definition: FeatureExtraction.cpp:88

FeatureExtraction::saveFeatureExtractionSettingsToFile
bool saveFeatureExtractionSettingsToFile(std::fstream &file) const
Definition: FeatureExtraction.cpp:120

ClassificationDataStream
Definition: ClassificationDataStream.h:42

UnlabelledData::getDataAsMatrixFloat
MatrixFloat getDataAsMatrixFloat() const
Definition: UnlabelledData.cpp:662

Vector::resize
virtual bool resize(const unsigned int size)
Definition: Vector.h:133

KMeans::train_
virtual bool train_(MatrixFloat &data)
Definition: KMeans.cpp:163

KMeansFeatures::train_
virtual bool train_(ClassificationData &trainingData)
Definition: KMeansFeatures.cpp:331

KMeansFeatures::computeFeatures
virtual bool computeFeatures(const VectorFloat &inputVector)
Definition: KMeansFeatures.cpp:93

Vector::getSize
UINT getSize() const
Definition: Vector.h:191

MLBase::setMinChange
bool setMinChange(const Float minChange)
Definition: MLBase.cpp:287

UnlabelledData
Definition: UnlabelledData.h:38

FeatureExtraction::getFeatureExtractionType
std::string getFeatureExtractionType() const
Definition: FeatureExtraction.cpp:165

KMeansFeatures.h

TimeSeriesClassificationData::getDataAsMatrixFloat
MatrixFloat getDataAsMatrixFloat() const
Definition: TimeSeriesClassificationData.cpp:1067

KMeansFeatures::operator=
KMeansFeatures & operator=(const KMeansFeatures &rhs)
Definition: KMeansFeatures.cpp:64

RegisterFeatureExtractionModule< KMeansFeatures >

ClassificationData
Definition: ClassificationData.h:43

Matrix::getNumRows
unsigned int getNumRows() const
Definition: Matrix.h:542

ClassificationDataStream::getDataAsMatrixFloat
MatrixFloat getDataAsMatrixFloat() const
Definition: ClassificationDataStream.cpp:920

Matrix::getNumCols
unsigned int getNumCols() const
Definition: Matrix.h:549

KMeansFeatures
Definition: KMeansFeatures.h:41

FeatureExtraction
Definition: FeatureExtraction.h:38

MLBase::setMinNumEpochs
bool setMinNumEpochs(const UINT minNumEpochs)
Definition: MLBase.cpp:282

KMeansFeatures::loadModelFromFile
virtual bool loadModelFromFile(std::string filename)
Definition: KMeansFeatures.cpp:142

KMeansFeatures::~KMeansFeatures
virtual ~KMeansFeatures()
Definition: KMeansFeatures.cpp:60

VectorFloat
Definition: VectorFloat.h:33

FeatureExtraction::loadFeatureExtractionSettingsFromFile
bool loadFeatureExtractionSettingsFromFile(std::fstream &file)
Definition: FeatureExtraction.cpp:134

MatrixFloat::getRanges
Vector< MinMax > getRanges() const
Definition: MatrixFloat.cpp:492

KMeans
Definition: KMeans.h:41

ClassificationData::getDataAsMatrixFloat
MatrixFloat getDataAsMatrixFloat() const
Definition: ClassificationData.cpp:1486

TimeSeriesClassificationData
Definition: TimeSeriesClassificationData.h:42

FeatureExtraction::copyBaseVariables
bool copyBaseVariables(const FeatureExtraction *featureExtractionModule)
Definition: FeatureExtraction.cpp:63

Vector< UINT >

MLBase::setMaxNumEpochs
bool setMaxNumEpochs(const UINT maxNumEpochs)
Definition: MLBase.cpp:273

KMeansFeatures::reset
virtual bool reset()
Definition: KMeansFeatures.cpp:124

Clusterer::setNumClusters
bool setNumClusters(const UINT numClusters)
Definition: Clusterer.cpp:266

KMeansFeatures::saveModelToFile
virtual bool saveModelToFile(std::string filename) const
Definition: KMeansFeatures.cpp:128

FeatureExtraction::clear
virtual bool clear()
Definition: FeatureExtraction.cpp:108