30 classType =
"HierarchicalClustering";
31 clustererType = classType;
32 debugLog.setProceedingText(
"[DEBUG HierarchicalClustering]");
33 errorLog.setProceedingText(
"[ERROR HierarchicalClustering]");
34 trainingLog.setProceedingText(
"[TRAINING HierarchicalClustering]");
35 warningLog.setProceedingText(
"[WARNING HierarchicalClustering]");
39 classType =
"HierarchicalClustering";
40 clustererType = classType;
41 debugLog.setProceedingText(
"[DEBUG HierarchicalClustering]");
42 errorLog.setProceedingText(
"[ERROR HierarchicalClustering]");
43 trainingLog.setProceedingText(
"[TRAINING HierarchicalClustering]");
44 warningLog.setProceedingText(
"[WARNING HierarchicalClustering]");
58 this->clusters = rhs.clusters;
59 this->distanceMatrix = rhs.distanceMatrix;
70 if( clusterer == NULL )
return false;
78 this->clusters = ptr->clusters;
79 this->distanceMatrix = ptr->distanceMatrix;
101 distanceMatrix.
clear();
117 for(UINT i=0; i<M; i++){
118 for(UINT j=0; j<N; j++){
119 data[i][j] = trainingData[i][j];
137 for(UINT i=0; i<M; i++){
138 for(UINT j=0; j<N; j++){
139 data[i][j] = trainingData[i][j];
143 return train( data );
150 distanceMatrix.
clear();
161 distanceMatrix.
resize(M,M);
164 for(UINT i=0; i<M; i++){
165 for(UINT j=0; j<M; j++){
168 distanceMatrix[i][j] = squaredEuclideanDistance(data[i], data[j]);
174 UINT uniqueClusterID = 0;
176 for(UINT i=0; i<M; i++){
177 clusterData[i].uniqueClusterID = uniqueClusterID++;
178 clusterData[i].addSampleToCluster(i);
181 trainingLog <<
"Starting clustering..." << std::endl;
186 newLevel.level = level;
187 for(UINT i=0; i<M; i++){
188 newLevel.clusters.push_back( clusterData[i] );
190 clusters.push_back( newLevel );
194 bool keepClustering =
true;
196 while( keepClustering ){
201 UINT K = (UINT)clusterData.size();
202 for(UINT i=0; i<K; i++){
203 for(UINT j=0; j<K; j++){
205 Float dist = computeClusterDistance( clusterData[i], clusterData[j] );
207 if( dist < minDist ){
212 clusterPairs.clear();
213 clusterPairs.push_back( clusterPair );
221 keepClustering =
false;
222 warningLog <<
"train_(MatrixFloat &data) - Failed to find any cluster at level: " << level << std::endl;
228 newLevel.level = level;
232 newCluster.uniqueClusterID = uniqueClusterID++;
234 const UINT numClusterPairs = clusterPairs.
getSize();
236 for(UINT k=0; k<numClusterPairs; k++){
238 UINT numSamplesInClusterA = clusterData[ clusterPairs[k][0] ].getNumSamplesInCluster();
239 for(UINT i=0; i<numSamplesInClusterA; i++){
240 UINT index = clusterData[ clusterPairs[k][0] ][ i ];
241 newCluster.addSampleToCluster( index );
245 UINT numSamplesInClusterB = clusterData[ clusterPairs[k][1] ].getNumSamplesInCluster();
246 for(UINT i=0; i<numSamplesInClusterB; i++){
247 UINT index = clusterData[ clusterPairs[k][1] ][ i ];
248 newCluster.addSampleToCluster( index );
252 newCluster.clusterVariance = computeClusterVariance( newCluster, data );
255 UINT idA = clusterData[ clusterPairs[k][0] ].getUniqueClusterID();
256 UINT idB = clusterData[ clusterPairs[k][1] ].getUniqueClusterID();
259 while( iter != clusterData.end() ){
260 if( iter->getUniqueClusterID() == idA || iter->getUniqueClusterID() == idB ){
261 iter = clusterData.erase( iter );
262 if( ++numRemoved >= 2 )
break;
268 clusterData.push_back( newCluster );
271 newLevel.clusters.push_back( newCluster );
273 clusters.push_back( newLevel );
281 keepClustering =
false;
284 if( clusterData.size() == 0 ){
285 keepClustering =
false;
288 trainingLog <<
"Cluster level: " << level <<
" Number of clusters: " << clusters.back().getNumClusters() << std::endl;
297 clusterLabels[i] = i+1;
299 clusterLikelihoods.
resize(numClusters,0);
300 clusterDistances.
resize(numClusters,0);
305 bool HierarchicalClustering::printModel(){
307 UINT K = (UINT)clusters.size();
309 std::cout <<
"Hierarchical Clustering Model\n\n";
310 for(UINT k=0; k<K; k++){
314 numSamples += clusters[k][i].getNumSamplesInCluster();
317 std::cout <<
"Level: " << clusters[k].level <<
"\tNumClusters: " << numClusters <<
"\tNumSamples: " << numSamples << std::endl;
319 std::cout <<
"ClusterVariance: " << clusters[k][i].clusterVariance << std::endl;
320 std::cout <<
"Indexs: ";
321 UINT numSamplesInCluster = clusters[k][i].getNumSamplesInCluster();
322 for(UINT j=0; j<numSamplesInCluster; j++){
323 std::cout << clusters[k][i][j] <<
"\t";
325 std::cout << std::endl;
332 Float HierarchicalClustering::squaredEuclideanDistance(
const Float *a,
const Float *b){
334 for(UINT i=0; i<N; i++){
335 dist += SQR( a[i] - b[i] );
340 Float HierarchicalClustering::computeClusterDistance(
const ClusterInfo &clusterA,
const ClusterInfo &clusterB ){
343 const UINT numSamplesA = clusterA.getNumSamplesInCluster();
344 const UINT numSamplesB = clusterB.getNumSamplesInCluster();
347 for(UINT i=0; i<numSamplesA; i++){
348 for(UINT j=0; j<numSamplesB; j++){
349 if( distanceMatrix[ clusterA[i] ][ clusterB[j] ] < minDist ){
350 minDist = distanceMatrix[ clusterA[i] ][ clusterB[j] ];
358 Float HierarchicalClustering::computeClusterVariance(
const ClusterInfo &cluster,
const MatrixFloat &data ){
364 UINT numSamples = cluster.getNumSamplesInCluster();
365 for(UINT j=0; j<N; j++){
366 for(UINT i=0; i<numSamples; i++){
367 UINT index = cluster[i];
368 mean[j] += data[ index ][j];
370 mean[j] /= Float( numSamples );
374 for(UINT j=0; j<N; j++){
375 for(UINT i=0; i<numSamples; i++){
376 std[j] += grt_sqr( data[ cluster[i] ][j] - mean[j] );
378 std[j] = grt_sqrt( std[j] / Float( numSamples-1 ) );
382 for(UINT j=0; j<N; j++){
390 if( !file.is_open() ){
391 errorLog <<
"saveModelToFile(string filename) - Failed to open file!" << std::endl;
395 file <<
"GRT_HIERARCHICAL_CLUSTERING_FILE_V1.0\n";
398 errorLog <<
"saveModelToFile(fstream &file) - Failed to save cluster settings to file!" << std::endl;
403 file <<
"M: " << M << std::endl;
404 file <<
"N: " << N << std::endl;
405 file <<
"NumLevels: " << clusters.
getSize() << std::endl;
407 for(UINT i=0; i<clusters.
getSize(); i++){
408 file <<
"Level: " << clusters[i].getLevel() << std::endl;
409 file <<
"NumClusters: " << clusters[i].getNumClusters() << std::endl;
425 if( word !=
"GRT_HIERARCHICAL_CLUSTERING_FILE_V1.0" ){
430 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load cluster settings from file!" << std::endl;
virtual bool loadModelFromFile(std::fstream &file)
std::string getClustererType() const
virtual bool saveModelToFile(std::fstream &file) const
UINT getNumDimensions() const
virtual bool resize(const unsigned int size)
virtual bool train(ClassificationData trainingData)
virtual ~HierarchicalClustering()
UINT getNumSamples() const
bool copyBaseVariables(const Clusterer *clusterer)
bool loadClustererSettingsFromFile(std::fstream &file)
unsigned int getSize() const
UINT getNumSamples() const
virtual bool train_(MatrixFloat &trainingData)
bool saveClustererSettingsToFile(std::fstream &file) const
UINT numClusters
Number of clusters in the model.
unsigned int getNumRows() const
UINT getNumDimensions() const
unsigned int getNumCols() const
virtual bool deepCopyFrom(const Clusterer *clusterer)
HierarchicalClustering & operator=(const HierarchicalClustering &rhs)
virtual bool resize(const unsigned int r, const unsigned int c)
This class implements a basic Hierarchial Clustering algorithm.