21 #define GRT_DLL_EXPORTS 27 std::string KMeansFeatures::id =
"KMeansFeatures";
35 this->numClustersPerLayer = numClustersPerLayer;
37 this->useScaling = useScaling;
39 if( numClustersPerLayer.size() > 0 ){
40 init( numClustersPerLayer );
57 this->numClustersPerLayer = rhs.numClustersPerLayer;
67 if( featureExtraction == NULL )
return false;
69 if( this->
getId() == featureExtraction->
getId() ){
78 errorLog << __GRT_LOG__ <<
" FeatureExtraction Types Do Not Match!" << std::endl;
89 for(UINT j=0; j<numInputDimensions; j++){
90 data[j] =
scale(inputVector[j],ranges[j].minValue,ranges[j].maxValue,0,1);
93 for(UINT j=0; j<numInputDimensions; j++){
94 data[j] = inputVector[j];
98 const UINT numLayers = getNumLayers();
99 for(UINT layer=0; layer<numLayers; layer++){
100 if( !projectDataThroughLayer(data, featureVector, layer) ){
101 errorLog << __GRT_LOG__ <<
" Failed to project data through layer: " << layer << std::endl;
106 if( layer+1 < numLayers ){
107 data = featureVector;
120 if( !file.is_open() ){
121 errorLog << __GRT_LOG__ <<
" The file is not open!" << std::endl;
126 file <<
"KMEANS_FEATURES_FILE_V1.0" << std::endl;
130 errorLog << __GRT_LOG__ <<
" Failed to save base feature extraction settings to file!" << std::endl;
134 file <<
"NumLayers: " << getNumLayers() << std::endl;
135 file <<
"NumClustersPerLayer: ";
136 for(UINT i=0; i<numClustersPerLayer.
getSize(); i++){
137 file <<
" " << numClustersPerLayer[i];
141 file <<
"Alpha: " << alpha << std::endl;
145 for(UINT i=0; i<ranges.
getSize(); i++){
146 file << ranges[i].minValue <<
" " << ranges[i].maxValue <<
" ";
150 file <<
"Clusters: " << std::endl;
151 for(UINT k=0; k<clusters.
getSize(); k++){
152 file <<
"NumRows: " << clusters[k].getNumRows() << std::endl;
153 file <<
"NumCols: " << clusters[k].getNumCols() << std::endl;
154 for(UINT i=0; i<clusters[k].getNumRows(); i++){
155 for(UINT j=0; j<clusters[k].getNumCols(); j++){
156 file << clusters[k][i][j];
157 if( j+1 < clusters[k].getNumCols() )
172 if( !file.is_open() ){
173 errorLog << __GRT_LOG__ <<
" The file is not open!" << std::endl;
184 if( word !=
"KMEANS_FEATURES_FILE_V1.0" ){
185 errorLog << __GRT_LOG__ <<
" Invalid file format!" << std::endl;
191 errorLog << __GRT_LOG__ <<
" Failed to load base feature extraction settings from file!" << std::endl;
197 if( word !=
"NumLayers:" ){
198 errorLog << __GRT_LOG__ <<
" Failed to read NumLayers header!" << std::endl;
202 numClustersPerLayer.
resize( numLayers );
206 if( word !=
"NumClustersPerLayer:" ){
207 errorLog << __GRT_LOG__ <<
" Failed to read NumClustersPerLayer header!" << std::endl;
210 for(UINT i=0; i<numClustersPerLayer.
getSize(); i++){
211 file >> numClustersPerLayer[i];
216 if( word !=
"Alpha:" ){
217 errorLog << __GRT_LOG__ <<
" Failed to read Alpha header!" << std::endl;
227 if( word !=
"Ranges:" ){
228 errorLog << __GRT_LOG__ <<
" Failed to read Ranges header!" << std::endl;
231 ranges.
resize(numInputDimensions);
232 for(UINT i=0; i<ranges.
getSize(); i++){
233 file >> ranges[i].minValue;
234 file >> ranges[i].maxValue;
239 if( word !=
"Clusters:" ){
240 errorLog << __GRT_LOG__ <<
" Failed to read Clusters header!" << std::endl;
243 clusters.
resize( numLayers );
245 for(UINT k=0; k<clusters.
getSize(); k++){
249 if( word !=
"NumRows:" ){
250 errorLog << __GRT_LOG__ <<
" Failed to read NumRows header!" << std::endl;
257 if( word !=
"NumCols:" ){
258 errorLog << __GRT_LOG__ <<
" Failed to read NumCols header!" << std::endl;
263 clusters[k].
resize(numRows, numCols);
264 for(UINT i=0; i<clusters[k].getNumRows(); i++){
265 for(UINT j=0; j<clusters[k].getNumCols(); j++){
266 file >> clusters[k][i][j];
279 if( numClustersPerLayer.
getSize() == 0 )
return false;
281 this->numClustersPerLayer = numClustersPerLayer;
282 numInputDimensions = 0;
283 numOutputDimensions = 0;
315 errorLog <<
"train_(MatrixFloat &trainingData) - The quantizer has not been initialized!" << std::endl;
320 featureDataReady =
false;
325 numInputDimensions = N;
326 numOutputDimensions = numClustersPerLayer[ numClustersPerLayer.
getSize()-1 ];
331 for(UINT i=0; i<M; i++){
332 for(UINT j=0; j<N; j++){
333 trainingData[i][j] = grt_scale(trainingData[i][j],ranges[j].minValue,ranges[j].maxValue,0.0,1.0);
339 const UINT K = numClustersPerLayer.
getSize();
340 for(UINT k=0; k<K; k++){
343 kmeans.setComputeTheta(
true );
348 trainingLog <<
"Layer " << k+1 <<
"/" << K <<
" NumClusters: " << numClustersPerLayer[k] << std::endl;
349 if( !kmeans.
train_( trainingData ) ){
350 errorLog <<
"train_(MatrixFloat &trainingData) - Failed to train kmeans model at layer: " << k << std::endl;
355 clusters.push_back( kmeans.getClusters() );
363 for(UINT i=0; i<M; i++){
366 for(UINT j=0; j<input.getSize(); j++){
367 input[j] = trainingData[i][j];
371 if( !projectDataThroughLayer( input, output, k ) ){
372 errorLog <<
"train_(MatrixFloat &trainingData) - Failed to project sample through layer: " << k << std::endl;
377 for(UINT j=0; j<output.getSize(); j++){
378 data[i][j] = output[j];
390 featureVector.
resize( numOutputDimensions, 0 );
395 bool KMeansFeatures::projectDataThroughLayer(
const VectorFloat &input,
VectorFloat &output,
const UINT layer ){
397 if( layer >= clusters.
getSize() ){
398 errorLog << __GRT_LOG__ <<
" Layer out of bounds! It should be less than: " << clusters.
getSize() << std::endl;
402 const UINT M = clusters[ layer ].getNumRows();
403 const UINT N = clusters[ layer ].getNumCols();
406 errorLog << __GRT_LOG__ <<
" The size of the input Vector (" << input.
getSize() <<
") does not match the size: " << N << std::endl;
421 output[i] += grt_sqr( input[j] - clusters[layer][i][j] );
428 output[i] = grt_sqrt( output[i] );
435 UINT KMeansFeatures::getNumLayers()
const{
436 return numClustersPerLayer.
getSize();
439 UINT KMeansFeatures::getLayerSize(
const UINT layerIndex)
const{
440 if( layerIndex >= numClustersPerLayer.
getSize() ){
441 warningLog << __GRT_LOG__ <<
" LayerIndex is out of bounds. It must be less than the number of layers: " << numClustersPerLayer.
getSize() << std::endl;
444 return numClustersPerLayer[layerIndex];
std::string getId() const
KMeansFeatures(const Vector< UINT > numClustersPerLayer=Vector< UINT >(1, 100), const Float alpha=0.2, const bool useScaling=true)
static std::string getId()
virtual bool computeFeatures(const VectorFloat &inputVector) override
virtual bool save(std::fstream &file) const override
MatrixFloat getDataAsMatrixFloat() const
virtual bool resize(const unsigned int size)
virtual bool train_(MatrixFloat &data)
bool setMinChange(const Float minChange)
virtual bool load(std::fstream &file) override
MatrixFloat getDataAsMatrixFloat() const
KMeansFeatures & operator=(const KMeansFeatures &rhs)
unsigned int getNumRows() const
MatrixFloat getDataAsMatrixFloat() const
unsigned int getNumCols() const
bool setMinNumEpochs(const UINT minNumEpochs)
virtual bool deepCopyFrom(const FeatureExtraction *featureExtraction) override
virtual ~KMeansFeatures()
Vector< MinMax > getRanges() const
virtual bool train_(ClassificationData &trainingData) override
MatrixFloat getDataAsMatrixFloat() const
bool setMaxNumEpochs(const UINT maxNumEpochs)
virtual bool reset() override
bool setNumClusters(const UINT numClusters)
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)