21 #define GRT_DLL_EXPORTS
31 classType =
"KMeansFeatures";
32 featureExtractionType = classType;
34 debugLog.setProceedingText(
"[DEBUG KMeansFeatures]");
35 errorLog.setProceedingText(
"[ERROR KMeansFeatures]");
36 warningLog.setProceedingText(
"[WARNING KMeansFeatures]");
38 this->numClustersPerLayer = numClustersPerLayer;
40 this->useScaling = useScaling;
42 if( numClustersPerLayer.size() > 0 ){
43 init( numClustersPerLayer );
49 classType =
"KMeansFeatures";
50 featureExtractionType = classType;
52 debugLog.setProceedingText(
"[DEBUG KMeansFeatures]");
53 errorLog.setProceedingText(
"[ERROR KMeansFeatures]");
54 warningLog.setProceedingText(
"[WARNING KMeansFeatures]");
67 this->numClustersPerLayer = rhs.numClustersPerLayer;
77 if( featureExtraction == NULL )
return false;
88 errorLog <<
"clone(FeatureExtraction *featureExtraction) - FeatureExtraction Types Do Not Match!" << std::endl;
99 for(UINT j=0; j<numInputDimensions; j++){
100 data[j] =
scale(inputVector[j],ranges[j].minValue,ranges[j].maxValue,0,1);
103 for(UINT j=0; j<numInputDimensions; j++){
104 data[j] = inputVector[j];
108 const UINT numLayers = getNumLayers();
109 for(UINT layer=0; layer<numLayers; layer++){
110 if( !projectDataThroughLayer(data, featureVector, layer) ){
111 errorLog <<
"computeFeatures(const VectorFloat &inputVector) - Failed to project data through layer: " << layer << std::endl;
116 if( layer+1 < numLayers ){
117 data = featureVector;
131 file.open(filename.c_str(), std::ios::out);
145 file.open(filename.c_str(), std::ios::in);
159 if( !file.is_open() ){
160 errorLog <<
"saveModelToFile(fstream &file) - The file is not open!" << std::endl;
165 file <<
"KMEANS_FEATURES_FILE_V1.0" << std::endl;
169 errorLog <<
"saveFeatureExtractionSettingsToFile(fstream &file) - Failed to save base feature extraction settings to file!" << std::endl;
173 file <<
"NumLayers: " << getNumLayers() << std::endl;
174 file <<
"NumClustersPerLayer: ";
175 for(UINT i=0; i<numClustersPerLayer.
getSize(); i++){
176 file <<
" " << numClustersPerLayer[i];
180 file <<
"Alpha: " << alpha << std::endl;
184 for(UINT i=0; i<ranges.
getSize(); i++){
185 file << ranges[i].minValue <<
" " << ranges[i].maxValue <<
" ";
189 file <<
"Clusters: " << std::endl;
190 for(UINT k=0; k<clusters.
getSize(); k++){
191 file <<
"NumRows: " << clusters[k].getNumRows() << std::endl;
192 file <<
"NumCols: " << clusters[k].getNumCols() << std::endl;
193 for(UINT i=0; i<clusters[k].getNumRows(); i++){
194 for(UINT j=0; j<clusters[k].getNumCols(); j++){
195 file << clusters[k][i][j];
196 if( j+1 < clusters[k].getNumCols() )
211 if( !file.is_open() ){
212 errorLog <<
"loadModelFromFile(fstream &file) - The file is not open!" << std::endl;
223 if( word !=
"KMEANS_FEATURES_FILE_V1.0" ){
224 errorLog <<
"loadModelFromFile(fstream &file) - Invalid file format!" << std::endl;
230 errorLog <<
"loadFeatureExtractionSettingsFromFile(fstream &file) - Failed to load base feature extraction settings from file!" << std::endl;
236 if( word !=
"NumLayers:" ){
237 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read NumLayers header!" << std::endl;
241 numClustersPerLayer.
resize( numLayers );
245 if( word !=
"NumClustersPerLayer:" ){
246 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read NumClustersPerLayer header!" << std::endl;
249 for(UINT i=0; i<numClustersPerLayer.
getSize(); i++){
250 file >> numClustersPerLayer[i];
255 if( word !=
"Alpha:" ){
256 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read Alpha header!" << std::endl;
266 if( word !=
"Ranges:" ){
267 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read Ranges header!" << std::endl;
270 ranges.
resize(numInputDimensions);
271 for(UINT i=0; i<ranges.size(); i++){
272 file >> ranges[i].minValue;
273 file >> ranges[i].maxValue;
278 if( word !=
"Clusters:" ){
279 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read Clusters header!" << std::endl;
282 clusters.
resize( numLayers );
284 for(UINT k=0; k<clusters.size(); k++){
288 if( word !=
"NumRows:" ){
289 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read NumRows header!" << std::endl;
296 if( word !=
"NumCols:" ){
297 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read NumCols header!" << std::endl;
302 clusters[k].
resize(numRows, numCols);
303 for(UINT i=0; i<clusters[k].getNumRows(); i++){
304 for(UINT j=0; j<clusters[k].getNumCols(); j++){
305 file >> clusters[k][i][j];
318 if( numClustersPerLayer.size() == 0 )
return false;
320 this->numClustersPerLayer = numClustersPerLayer;
321 numInputDimensions = 0;
322 numOutputDimensions = 0;
354 errorLog <<
"train_(MatrixFloat &trainingData) - The quantizer has not been initialized!" << std::endl;
359 featureDataReady =
false;
364 numInputDimensions = N;
365 numOutputDimensions = numClustersPerLayer[ numClustersPerLayer.
getSize()-1 ];
370 for(UINT i=0; i<M; i++){
371 for(UINT j=0; j<N; j++){
372 trainingData[i][j] = grt_scale(trainingData[i][j],ranges[j].minValue,ranges[j].maxValue,0.0,1.0);
378 const UINT K = numClustersPerLayer.
getSize();
379 for(UINT k=0; k<K; k++){
382 kmeans.setComputeTheta(
true );
387 trainingLog <<
"Layer " << k+1 <<
"/" << K <<
" NumClusters: " << numClustersPerLayer[k] << std::endl;
388 if( !kmeans.
train_( trainingData ) ){
389 errorLog <<
"train_(MatrixFloat &trainingData) - Failed to train kmeans model at layer: " << k << std::endl;
394 clusters.push_back( kmeans.getClusters() );
402 for(UINT i=0; i<M; i++){
405 for(UINT j=0; j<input.getSize(); j++){
406 input[j] = trainingData[i][j];
410 if( !projectDataThroughLayer( input, output, k ) ){
411 errorLog <<
"train_(MatrixFloat &trainingData) - Failed to project sample through layer: " << k << std::endl;
416 for(UINT j=0; j<output.getSize(); j++){
417 data[i][j] = output[j];
429 featureVector.
resize( numOutputDimensions, 0 );
434 bool KMeansFeatures::projectDataThroughLayer(
const VectorFloat &input,
VectorFloat &output,
const UINT layer ){
436 if( layer >= clusters.
getSize() ){
437 errorLog <<
"projectDataThroughLayer(...) - Layer out of bounds! It should be less than: " << clusters.
getSize() << std::endl;
441 const UINT M = clusters[ layer ].getNumRows();
442 const UINT N = clusters[ layer ].getNumCols();
445 errorLog <<
"projectDataThroughLayer(...) - The size of the input Vector (" << input.
getSize() <<
") does not match the size: " << N << std::endl;
460 output[i] += grt_sqr( input[j] - clusters[layer][i][j] );
467 output[i] = grt_sqrt( output[i] );
474 UINT KMeansFeatures::getNumLayers()
const{
475 return numClustersPerLayer.
getSize();
478 UINT KMeansFeatures::getLayerSize(
const UINT layerIndex)
const{
479 if( layerIndex >= numClustersPerLayer.
getSize() ){
480 warningLog <<
"LayerIndex is out of bounds. It must be less than the number of layers: " << numClustersPerLayer.
getSize() << std::endl;
483 return numClustersPerLayer[layerIndex];
KMeansFeatures(const Vector< UINT > numClustersPerLayer=Vector< UINT >(1, 100), const Float alpha=0.2, const bool useScaling=true)
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
virtual bool deepCopyFrom(const FeatureExtraction *featureExtraction)
MatrixFloat getDataAsMatrixFloat() const
virtual bool resize(const unsigned int size)
virtual bool train_(MatrixFloat &data)
virtual bool train_(ClassificationData &trainingData)
virtual bool computeFeatures(const VectorFloat &inputVector)
bool setMinChange(const Float minChange)
MatrixFloat getDataAsMatrixFloat() const
KMeansFeatures & operator=(const KMeansFeatures &rhs)
unsigned int getNumRows() const
MatrixFloat getDataAsMatrixFloat() const
unsigned int getNumCols() const
bool setMinNumEpochs(const UINT minNumEpochs)
virtual bool loadModelFromFile(std::string filename)
virtual ~KMeansFeatures()
Vector< MinMax > getRanges() const
MatrixFloat getDataAsMatrixFloat() const
bool setMaxNumEpochs(const UINT maxNumEpochs)
bool setNumClusters(const UINT numClusters)
virtual bool saveModelToFile(std::string filename) const