30 classType =
"KMeansFeatures";
31 featureExtractionType = classType;
33 debugLog.setProceedingText(
"[DEBUG KMeansFeatures]");
34 errorLog.setProceedingText(
"[ERROR KMeansFeatures]");
35 warningLog.setProceedingText(
"[WARNING KMeansFeatures]");
37 this->numClustersPerLayer = numClustersPerLayer;
39 this->useScaling = useScaling;
41 if( numClustersPerLayer.size() > 0 ){
42 init( numClustersPerLayer );
48 classType =
"KMeansFeatures";
49 featureExtractionType = classType;
51 debugLog.setProceedingText(
"[DEBUG KMeansFeatures]");
52 errorLog.setProceedingText(
"[ERROR KMeansFeatures]");
53 warningLog.setProceedingText(
"[WARNING KMeansFeatures]");
66 this->numClustersPerLayer = rhs.numClustersPerLayer;
76 if( featureExtraction == NULL )
return false;
87 errorLog <<
"clone(FeatureExtraction *featureExtraction) - FeatureExtraction Types Do Not Match!" << std::endl;
98 for(UINT j=0; j<numInputDimensions; j++){
99 data[j] =
scale(inputVector[j],ranges[j].minValue,ranges[j].maxValue,0,1);
102 for(UINT j=0; j<numInputDimensions; j++){
103 data[j] = inputVector[j];
107 const UINT numLayers = getNumLayers();
108 for(UINT layer=0; layer<numLayers; layer++){
109 if( !projectDataThroughLayer(data, featureVector, layer) ){
110 errorLog <<
"computeFeatures(const VectorFloat &inputVector) - Failed to project data through layer: " << layer << std::endl;
115 if( layer+1 < numLayers ){
116 data = featureVector;
130 file.open(filename.c_str(), std::ios::out);
144 file.open(filename.c_str(), std::ios::in);
158 if( !file.is_open() ){
159 errorLog <<
"saveModelToFile(fstream &file) - The file is not open!" << std::endl;
164 file <<
"KMEANS_FEATURES_FILE_V1.0" << std::endl;
168 errorLog <<
"saveFeatureExtractionSettingsToFile(fstream &file) - Failed to save base feature extraction settings to file!" << std::endl;
172 file <<
"NumLayers: " << getNumLayers() << std::endl;
173 file <<
"NumClustersPerLayer: ";
174 for(UINT i=0; i<numClustersPerLayer.
getSize(); i++){
175 file <<
" " << numClustersPerLayer[i];
179 file <<
"Alpha: " << alpha << std::endl;
183 for(UINT i=0; i<ranges.
getSize(); i++){
184 file << ranges[i].minValue <<
" " << ranges[i].maxValue <<
" ";
188 file <<
"Clusters: " << std::endl;
189 for(UINT k=0; k<clusters.
getSize(); k++){
190 file <<
"NumRows: " << clusters[k].getNumRows() << std::endl;
191 file <<
"NumCols: " << clusters[k].getNumCols() << std::endl;
192 for(UINT i=0; i<clusters[k].getNumRows(); i++){
193 for(UINT j=0; j<clusters[k].getNumCols(); j++){
194 file << clusters[k][i][j];
195 if( j+1 < clusters[k].getNumCols() )
210 if( !file.is_open() ){
211 errorLog <<
"loadModelFromFile(fstream &file) - The file is not open!" << std::endl;
222 if( word !=
"KMEANS_FEATURES_FILE_V1.0" ){
223 errorLog <<
"loadModelFromFile(fstream &file) - Invalid file format!" << std::endl;
229 errorLog <<
"loadFeatureExtractionSettingsFromFile(fstream &file) - Failed to load base feature extraction settings from file!" << std::endl;
235 if( word !=
"NumLayers:" ){
236 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read NumLayers header!" << std::endl;
240 numClustersPerLayer.
resize( numLayers );
244 if( word !=
"NumClustersPerLayer:" ){
245 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read NumClustersPerLayer header!" << std::endl;
248 for(UINT i=0; i<numClustersPerLayer.
getSize(); i++){
249 file >> numClustersPerLayer[i];
254 if( word !=
"Alpha:" ){
255 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read Alpha header!" << std::endl;
265 if( word !=
"Ranges:" ){
266 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read Ranges header!" << std::endl;
269 ranges.
resize(numInputDimensions);
270 for(UINT i=0; i<ranges.size(); i++){
271 file >> ranges[i].minValue;
272 file >> ranges[i].maxValue;
277 if( word !=
"Clusters:" ){
278 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read Clusters header!" << std::endl;
281 clusters.
resize( numLayers );
283 for(UINT k=0; k<clusters.size(); k++){
287 if( word !=
"NumRows:" ){
288 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read NumRows header!" << std::endl;
295 if( word !=
"NumCols:" ){
296 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read NumCols header!" << std::endl;
301 clusters[k].
resize(numRows, numCols);
302 for(UINT i=0; i<clusters[k].getNumRows(); i++){
303 for(UINT j=0; j<clusters[k].getNumCols(); j++){
304 file >> clusters[k][i][j];
317 if( numClustersPerLayer.size() == 0 )
return false;
319 this->numClustersPerLayer = numClustersPerLayer;
320 numInputDimensions = 0;
321 numOutputDimensions = 0;
353 errorLog <<
"train_(MatrixFloat &trainingData) - The quantizer has not been initialized!" << std::endl;
358 featureDataReady =
false;
363 numInputDimensions = N;
364 numOutputDimensions = numClustersPerLayer[ numClustersPerLayer.
getSize()-1 ];
369 for(UINT i=0; i<M; i++){
370 for(UINT j=0; j<N; j++){
371 trainingData[i][j] = grt_scale(trainingData[i][j],ranges[j].minValue,ranges[j].maxValue,0.0,1.0);
377 const UINT K = numClustersPerLayer.
getSize();
378 for(UINT k=0; k<K; k++){
381 kmeans.setComputeTheta(
true );
386 trainingLog <<
"Layer " << k+1 <<
"/" << K <<
" NumClusters: " << numClustersPerLayer[k] << std::endl;
387 if( !kmeans.
train_( trainingData ) ){
388 errorLog <<
"train_(MatrixFloat &trainingData) - Failed to train kmeans model at layer: " << k << std::endl;
393 clusters.push_back( kmeans.getClusters() );
401 for(UINT i=0; i<M; i++){
404 for(UINT j=0; j<input.getSize(); j++){
405 input[j] = trainingData[i][j];
409 if( !projectDataThroughLayer( input, output, k ) ){
410 errorLog <<
"train_(MatrixFloat &trainingData) - Failed to project sample through layer: " << k << std::endl;
415 for(UINT j=0; j<output.getSize(); j++){
416 data[i][j] = output[j];
428 featureVector.
resize( numOutputDimensions, 0 );
433 bool KMeansFeatures::projectDataThroughLayer(
const VectorFloat &input,
VectorFloat &output,
const UINT layer ){
435 if( layer >= clusters.
getSize() ){
436 errorLog <<
"projectDataThroughLayer(...) - Layer out of bounds! It should be less than: " << clusters.
getSize() << std::endl;
440 const UINT M = clusters[ layer ].getNumRows();
441 const UINT N = clusters[ layer ].getNumCols();
444 errorLog <<
"projectDataThroughLayer(...) - The size of the input Vector (" << input.
getSize() <<
") does not match the size: " << N << std::endl;
459 output[i] += grt_sqr( input[j] - clusters[layer][i][j] );
466 output[i] = grt_sqrt( output[i] );
473 UINT KMeansFeatures::getNumLayers()
const{
474 return numClustersPerLayer.
getSize();
477 UINT KMeansFeatures::getLayerSize(
const UINT layerIndex)
const{
478 if( layerIndex >= numClustersPerLayer.
getSize() ){
479 warningLog <<
"LayerIndex is out of bounds. It must be less than the number of layers: " << numClustersPerLayer.
getSize() << std::endl;
482 return numClustersPerLayer[layerIndex];
KMeansFeatures(const Vector< UINT > numClustersPerLayer=Vector< UINT >(1, 100), const Float alpha=0.2, const bool useScaling=true)
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
virtual bool deepCopyFrom(const FeatureExtraction *featureExtraction)
MatrixFloat getDataAsMatrixFloat() const
virtual bool resize(const unsigned int size)
virtual bool train_(MatrixFloat &data)
virtual bool train_(ClassificationData &trainingData)
virtual bool computeFeatures(const VectorFloat &inputVector)
bool setMinChange(const Float minChange)
unsigned int getSize() const
MatrixFloat getDataAsMatrixFloat() const
KMeansFeatures & operator=(const KMeansFeatures &rhs)
unsigned int getNumRows() const
MatrixFloat getDataAsMatrixFloat() const
unsigned int getNumCols() const
bool setMinNumEpochs(const UINT minNumEpochs)
virtual bool loadModelFromFile(std::string filename)
virtual ~KMeansFeatures()
Vector< MinMax > getRanges() const
MatrixFloat getDataAsMatrixFloat() const
bool setMaxNumEpochs(const UINT maxNumEpochs)
bool setNumClusters(const UINT numClusters)
virtual bool saveModelToFile(std::string filename) const