GestureRecognitionToolkit  Version: 0.1.0
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
SelfOrganizingMap.cpp
1 /*
2  GRT MIT License
3  Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>
4 
5  Permission is hereby granted, free of charge, to any person obtaining a copy of this software
6  and associated documentation files (the "Software"), to deal in the Software without restriction,
7  including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
9  subject to the following conditions:
10 
11  The above copyright notice and this permission notice shall be included in all copies or substantial
12  portions of the Software.
13 
14  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
15  LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
18  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19  */
20 
21 #include "SelfOrganizingMap.h"
22 
23 GRT_BEGIN_NAMESPACE
24 
25 //Register the SelfOrganizingMap class with the Clusterer base class
26 RegisterClustererModule< SelfOrganizingMap > SelfOrganizingMap::registerModule("SelfOrganizingMap");
27 
28 SelfOrganizingMap::SelfOrganizingMap( const UINT networkSize, const UINT networkTypology, const UINT maxNumEpochs, const Float alphaStart, const Float alphaEnd ){
29 
30  this->numClusters = networkSize;
31  this->networkTypology = networkTypology;
32  this->maxNumEpochs = maxNumEpochs;
33  this->alphaStart = alphaStart;
34  this->alphaEnd = alphaEnd;
35 
36  classType = "SelfOrganizingMap";
37  clustererType = classType;
38  debugLog.setProceedingText("[DEBUG SelfOrganizingMap]");
39  errorLog.setProceedingText("[ERROR SelfOrganizingMap]");
40  trainingLog.setProceedingText("[TRAINING SelfOrganizingMap]");
41  warningLog.setProceedingText("[WARNING SelfOrganizingMap]");
42 }
43 
45 
46  classType = "SelfOrganizingMap";
47  clustererType = classType;
48  debugLog.setProceedingText("[DEBUG KMeans]");
49  errorLog.setProceedingText("[ERROR KMeans]");
50  trainingLog.setProceedingText("[TRAINING KMeans]");
51  warningLog.setProceedingText("[WARNING KMeans]");
52 
53  if( this != &rhs ){
54 
55  this->networkTypology = rhs.networkTypology;
56  this->alphaStart = rhs.alphaStart;
57  this->alphaEnd = rhs.alphaEnd;
58 
59  //Clone the Clusterer variables
60  copyBaseVariables( (Clusterer*)&rhs );
61  }
62 }
63 
65 
66 }
67 
69 
70  if( this != &rhs ){
71 
72  this->networkTypology = rhs.networkTypology;
73  this->alphaStart = rhs.alphaStart;
74  this->alphaEnd = rhs.alphaEnd;
75 
76  //Clone the Clusterer variables
77  copyBaseVariables( (Clusterer*)&rhs );
78  }
79 
80  return *this;
81 }
82 
84 
85  if( clusterer == NULL ) return false;
86 
87  if( this->getClustererType() == clusterer->getClustererType() ){
88  //Clone the SelfOrganizingMap values
89  SelfOrganizingMap *ptr = (SelfOrganizingMap*)clusterer;
90 
91  this->networkTypology = ptr->networkTypology;
92  this->alphaStart = ptr->alphaStart;
93  this->alphaEnd = ptr->alphaEnd;
94 
95  //Clone the Clusterer variables
96  return copyBaseVariables( clusterer );
97  }
98 
99  return false;
100 }
101 
103 
104  //Reset the base class
106 
107  return true;
108 }
109 
111 
112  //Reset the base class
114 
115  //Clear the SelfOrganizingMap models
116  neurons.clear();
117  networkWeights.clear();
118 
119  return true;
120 }
121 
123 
124  //Clear any previous models
125  clear();
126 
127  const UINT M = data.getNumRows();
128  const UINT N = data.getNumCols();
129  numInputDimensions = N;
130  numOutputDimensions = numClusters;
131  Random rand;
132 
133  //Setup the neurons
134  neurons.resize( numClusters );
135 
136  if( neurons.size() != numClusters ){
137  errorLog << "train_( MatrixFloat &data ) - Failed to resize neurons Vector, there might not be enough memory!" << std::endl;
138  return false;
139  }
140 
141  for(UINT j=0; j<numClusters; j++){
142 
143  //Init the neuron
144  neurons[j].init( N, 0.5 );
145 
146  //Set the weights as a random training example
147  neurons[j].weights = data.getRowVector( rand.getRandomNumberInt(0, M) );
148  }
149 
150  //Setup the network weights
151  switch( networkTypology ){
152  case RANDOM_NETWORK:
153  networkWeights.resize(numClusters, numClusters);
154 
155  //Set the diagonal weights as 1 (as i==j)
156  for(UINT i=0; i<numClusters; i++){
157  networkWeights[i][i] = 1;
158  }
159 
160  //Randomize the other weights
161  UINT indexA = 0;
162  UINT indexB = 0;
163  Float weight = 0;
164  for(UINT i=0; i<numClusters*numClusters; i++){
165  indexA = rand.getRandomNumberInt(0, numClusters);
166  indexB = rand.getRandomNumberInt(0, numClusters);
167 
168  //Make sure the two random indexs are the same (as this is a diagonal and should be 1)
169  if( indexA != indexB ){
170  //Pick a random weight between these two neurons
171  weight = rand.getRandomNumberUniform(0,1);
172 
173  //The weight betwen neurons a and b is the mirrored
174  networkWeights[indexA][indexB] = weight;
175  networkWeights[indexB][indexA] = weight;
176  }
177  }
178  break;
179  }
180 
181  //Scale the data if needed
182  ranges = data.getRanges();
183  if( useScaling ){
184  for(UINT i=0; i<M; i++){
185  for(UINT j=0; j<numInputDimensions; j++){
186  data[i][j] = scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,0,1);
187  }
188  }
189  }
190 
191  Float error = 0;
192  Float lastError = 0;
193  Float trainingSampleError = 0;
194  Float delta = 0;
195  Float minChange = 0;
196  Float weightUpdate = 0;
197  Float weightUpdateSum = 0;
198  Float alpha = 1.0;
199  Float neuronDiff = 0;
200  UINT iter = 0;
201  bool keepTraining = true;
202  VectorFloat trainingSample;
203  Vector< UINT > randomTrainingOrder(M);
204 
205  //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
206  //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
207  //training samples. This random order is then used at each epoch.
208  for(UINT i=0; i<M; i++){
209  randomTrainingOrder[i] = i;
210  }
211  std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
212 
213  //Enter the main training loop
214  while( keepTraining ){
215 
216  //Update alpha based on the current iteration
217  alpha = Util::scale(iter,0,maxNumEpochs,alphaStart,alphaEnd);
218 
219  //Run one epoch of training using the online best-matching-unit algorithm
220  error = 0;
221  for(UINT i=0; i<M; i++){
222 
223  trainingSampleError = 0;
224 
225  //Get the i'th random training sample
226  trainingSample = data.getRowVector( randomTrainingOrder[i] );
227 
228  //Find the best matching unit
229  Float dist = 0;
230  Float bestDist = grt_numeric_limits< Float >::max();
231  UINT bestIndex = 0;
232  for(UINT j=0; j<numClusters; j++){
233  dist = neurons[j].getSquaredWeightDistance( trainingSample );
234  if( dist < bestDist ){
235  bestDist = dist;
236  bestIndex = j;
237  }
238  }
239 
240  //Update the weights based on the distance to the winning neuron
241  //Neurons closer to the winning neuron will have their weights update more
242  for(UINT j=0; j<numClusters; j++){
243 
244  //Update the weights for the j'th neuron
245  weightUpdateSum = 0;
246  neuronDiff = 0;
247  for(UINT n=0; n<N; n++){
248  neuronDiff = trainingSample[n] - neurons[j][n];
249  weightUpdate = networkWeights[bestIndex][j] * alpha * neuronDiff;
250  neurons[j][n] += weightUpdate;
251  weightUpdateSum += neuronDiff;
252  }
253 
254  trainingSampleError += grt_sqr( weightUpdateSum );
255  }
256 
257  error += grt_sqrt( trainingSampleError / numClusters );
258  }
259 
260  //Compute the error
261  delta = fabs( error-lastError );
262  lastError = error;
263 
264  //Check to see if we should stop
265  if( delta <= minChange ){
266  converged = true;
267  keepTraining = false;
268  }
269 
270  if( grt_isinf( error ) ){
271  errorLog << "train_(MatrixFloat &data) - Training failed! Error is NAN!" << std::endl;
272  return false;
273  }
274 
275  if( ++iter >= maxNumEpochs ){
276  keepTraining = false;
277  }
278 
279  trainingLog << "Epoch: " << iter << " Squared Error: " << error << " Delta: " << delta << " Alpha: " << alpha << std::endl;
280  }
281 
282  numTrainingIterationsToConverge = iter;
283  trained = true;
284 
285  return true;
286 }
287 
289  MatrixFloat data = trainingData.getDataAsMatrixFloat();
290  return train_(data);
291 }
292 
294  MatrixFloat data = trainingData.getDataAsMatrixFloat();
295  return train_(data);
296 }
297 
299 
300  if( !trained ){
301  return false;
302  }
303 
304  if( useScaling ){
305  for(UINT i=0; i<numInputDimensions; i++){
306  x[i] = scale(x[i], ranges[i].minValue, ranges[i].maxValue, 0, 1);
307  }
308  }
309 
310  if( mappedData.getSize() != numClusters )
311  mappedData.resize( numClusters );
312 
313  for(UINT i=0; i<numClusters; i++){
314  mappedData[i] = neurons[i].fire( x );
315  }
316 
317  return true;
318 }
319 
320 bool SelfOrganizingMap::saveModelToFile( std::fstream &file ) const{
321 
322  if( !trained ){
323  errorLog << "saveModelToFile(fstream &file) - Can't save model to file, the model has not been trained!" << std::endl;
324  return false;
325  }
326 
327  file << "GRT_SELF_ORGANIZING_MAP_MODEL_FILE_V1.0\n";
328 
329  if( !saveClustererSettingsToFile( file ) ){
330  errorLog << "saveModelToFile(fstream &file) - Failed to save cluster settings to file!" << std::endl;
331  return false;
332  }
333 
334  file << "NetworkTypology: " << networkTypology << std::endl;
335  file << "AlphaStart: " << alphaStart << std::endl;
336  file << "AlphaEnd: " << alphaEnd << std::endl;
337 
338  if( trained ){
339  file << "NetworkWeights: \n";
340  for(UINT i=0; i<networkWeights.getNumRows(); i++){
341  for(UINT j=0; j<networkWeights.getNumCols(); j++){
342  file << networkWeights[i][j];
343  if( j<networkWeights.getNumCols()-1 ) file << "\t";
344  }
345  file << "\n";
346  }
347 
348  file << "Neurons: \n";
349  for(UINT i=0; i<neurons.getSize(); i++){
350  if( !neurons[i].saveNeuronToFile( file ) ){
351  errorLog << "saveModelToFile(fstream &file) - Failed to save neuron to file!" << std::endl;
352  return false;
353  }
354  }
355  }
356 
357  return true;
358 
359 }
360 
361 bool SelfOrganizingMap::loadModelFromFile( std::fstream &file ){
362 
363  //Clear any previous model
364  clear();
365 
366  std::string word;
367  file >> word;
368  if( word != "GRT_SELF_ORGANIZING_MAP_MODEL_FILE_V1.0" ){
369  errorLog << "loadModelFromFile(fstream &file) - Failed to load file header!" << std::endl;
370  return false;
371  }
372 
373  if( !loadClustererSettingsFromFile( file ) ){
374  errorLog << "loadModelFromFile(fstream &file) - Failed to load cluster settings from file!" << std::endl;
375  return false;
376  }
377 
378  file >> word;
379  if( word != "NetworkTypology:" ){
380  errorLog << "loadModelFromFile(fstream &file) - Failed to load NetworkTypology header!" << std::endl;
381  return false;
382  }
383  file >> networkTypology;
384 
385  file >> word;
386  if( word != "AlphaStart:" ){
387  errorLog << "loadModelFromFile(fstream &file) - Failed to load AlphaStart header!" << std::endl;
388  return false;
389  }
390  file >> alphaStart;
391 
392  file >> word;
393  if( word != "AlphaEnd:" ){
394  errorLog << "loadModelFromFile(fstream &file) - Failed to load alphaEnd header!" << std::endl;
395  return false;
396  }
397  file >> alphaEnd;
398 
399  //Load the model if it has been trained
400  if( trained ){
401  file >> word;
402  if( word != "NetworkWeights:" ){
403  errorLog << "loadModelFromFile(fstream &file) - Failed to load NetworkWeights header!" << std::endl;
404  return false;
405  }
406 
407  networkWeights.resize(numClusters, numClusters);
408  for(UINT i=0; i<networkWeights.getNumRows(); i++){
409  for(UINT j=0; j<networkWeights.getNumCols(); j++){
410  file >> networkWeights[i][j];
411  }
412  }
413 
414  file >> word;
415  if( word != "Neurons:" ){
416  errorLog << "loadModelFromFile(fstream &file) - Failed to load Neurons header!" << std::endl;
417  return false;
418  }
419 
420  neurons.resize(numClusters);
421  for(UINT i=0; i<neurons.size(); i++){
422  if( !neurons[i].loadNeuronFromFile( file ) ){
423  errorLog << "loadModelFromFile(fstream &file) - Failed to save neuron to file!" << std::endl;
424  return false;
425  }
426  }
427  }
428 
429  return true;
430 }
431 
432 bool SelfOrganizingMap::validateNetworkTypology( const UINT networkTypology ){
433  if( networkTypology == RANDOM_NETWORK ) return true;
434 
435  warningLog << "validateNetworkTypology(const UINT networkTypology) - Unknown networkTypology!" << std::endl;
436 
437  return false;
438 }
439 
441  return numClusters;
442 }
443 
444 Float SelfOrganizingMap::getAlphaStart() const{
445  return alphaStart;
446 }
447 
448 Float SelfOrganizingMap::getAlphaEnd() const{
449  return alphaEnd;
450 }
451 
452 VectorFloat SelfOrganizingMap::getMappedData() const{
453  return mappedData;
454 }
455 
456 Vector< GaussNeuron > SelfOrganizingMap::getNeurons() const{
457  return neurons;
458 }
459 
460 const Vector< GaussNeuron >& SelfOrganizingMap::getNeuronsRef() const{
461  return neurons;
462 }
463 
464 MatrixFloat SelfOrganizingMap::getNetworkWeights() const{
465  return networkWeights;
466 }
467 
468 bool SelfOrganizingMap::setNetworkSize( const UINT networkSize ){
469  if( networkSize > 0 ){
470  this->numClusters = networkSize;
471  return true;
472  }
473 
474  warningLog << "setNetworkSize(const UINT networkSize) - The networkSize must be greater than 0!" << std::endl;
475 
476  return false;
477 }
478 
479 bool SelfOrganizingMap::setNetworkTypology( const UINT networkTypology ){
480  if( validateNetworkTypology( networkTypology ) ){
481  this->networkTypology = networkTypology;
482  return true;
483  }
484  return false;
485 }
486 
487 bool SelfOrganizingMap::setAlphaStart( const Float alphaStart ){
488 
489  if( alphaStart > 0 ){
490  this->alphaStart = alphaStart;
491  return true;
492  }
493 
494  warningLog << "setAlphaStart(const Float alphaStart) - AlphaStart must be greater than zero!" << std::endl;
495 
496  return false;
497 }
498 
499 bool SelfOrganizingMap::setAlphaEnd( const Float alphaEnd ){
500 
501  if( alphaEnd > 0 ){
502  this->alphaEnd = alphaEnd;
503  return true;
504  }
505 
506  warningLog << "setAlphaEnd(const Float alphaEnd) - AlphaEnd must be greater than zero!" << std::endl;
507 
508  return false;
509 }
510 
511 GRT_END_NAMESPACE
512 
void clear()
Definition: Matrix.h:522
virtual bool deepCopyFrom(const Clusterer *clusterer)
Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
Definition: MLBase.h:339
This class implements the Self Oganizing Map clustering algorithm.
static Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
Definition: Util.cpp:52
Definition: Random.h:40
std::string getClustererType() const
Definition: Clusterer.cpp:259
MatrixFloat getDataAsMatrixFloat() const
virtual bool resize(const unsigned int size)
Definition: Vector.h:133
virtual bool train_(MatrixFloat &trainingData)
UINT getNetworkSize() const
bool copyBaseVariables(const Clusterer *clusterer)
Definition: Clusterer.cpp:86
bool validateNetworkTypology(const UINT networkTypology)
bool loadClustererSettingsFromFile(std::fstream &file)
Definition: Clusterer.cpp:178
unsigned int getSize() const
Definition: Vector.h:193
virtual bool map_(VectorFloat &x)
SelfOrganizingMap(const UINT networkSize=20, const UINT networkTypology=RANDOM_NETWORK, const UINT maxNumEpochs=1000, const Float alphaStart=0.8, const Float alphaEnd=0.1)
bool saveClustererSettingsToFile(std::fstream &file) const
Definition: Clusterer.cpp:156
Vector< T > getRowVector(const unsigned int r) const
Definition: Matrix.h:171
virtual bool reset()
Definition: Clusterer.cpp:127
UINT numClusters
Number of clusters in the model.
Definition: Clusterer.h:249
unsigned int getNumRows() const
Definition: Matrix.h:542
unsigned int getNumCols() const
Definition: Matrix.h:549
virtual bool saveModelToFile(std::fstream &file) const
virtual bool loadModelFromFile(std::fstream &file)
Vector< MinMax > getRanges() const
Float getRandomNumberUniform(Float minRange=0.0, Float maxRange=1.0)
Definition: Random.h:198
int getRandomNumberInt(int minRange, int maxRange)
Definition: Random.h:88
MatrixFloat getDataAsMatrixFloat() const
virtual bool resize(const unsigned int r, const unsigned int c)
Definition: Matrix.h:232
virtual bool clear()
Definition: Clusterer.cpp:141
SelfOrganizingMap & operator=(const SelfOrganizingMap &rhs)