26 this->datasetName = datasetName;
27 this->numDimensions = numDimensions;
28 this->infoText = infoText;
30 crossValidationSetup =
false;
31 useExternalRanges =
false;
32 allowNullGestureClass =
true;
34 infoLog.setProceedingText(
"[ClassificationData]");
35 debugLog.setProceedingText(
"[DEBUG ClassificationData]");
36 errorLog.setProceedingText(
"[ERROR ClassificationData]");
37 warningLog.setProceedingText(
"[WARNING ClassificationData]");
49 this->datasetName = rhs.datasetName;
50 this->infoText = rhs.infoText;
51 this->numDimensions = rhs.numDimensions;
52 this->totalNumSamples = rhs.totalNumSamples;
53 this->kFoldValue = rhs.kFoldValue;
54 this->crossValidationSetup = rhs.crossValidationSetup;
55 this->useExternalRanges = rhs.useExternalRanges;
56 this->allowNullGestureClass = rhs.allowNullGestureClass;
57 this->externalRanges = rhs.externalRanges;
58 this->classTracker = rhs.classTracker;
59 this->data = rhs.data;
60 this->crossValidationIndexs = rhs.crossValidationIndexs;
61 this->infoLog = rhs.infoLog;
62 this->debugLog = rhs.debugLog;
63 this->errorLog = rhs.errorLog;
64 this->warningLog = rhs.warningLog;
73 crossValidationSetup =
false;
74 crossValidationIndexs.clear();
79 if( numDimensions > 0 ){
84 this->numDimensions = numDimensions;
87 useExternalRanges =
false;
88 externalRanges.clear();
93 errorLog <<
"setNumDimensions(const UINT numDimensions) - The number of dimensions of the dataset must be greater than zero!" << std::endl;
100 if( datasetName.find(
" ") == std::string::npos ){
101 this->datasetName = datasetName;
105 errorLog <<
"setDatasetName(const std::string datasetName) - The dataset name cannot contain any spaces!" << std::endl;
110 this->infoText = infoText;
116 for(UINT i=0; i<classTracker.
getSize(); i++){
117 if( classTracker[i].classLabel == classLabel ){
118 classTracker[i].className = className;
123 errorLog <<
"setClassNameForCorrespondingClassLabel(const std::string className,const UINT classLabel) - Failed to find class with label: " << classLabel << std::endl;
128 this->allowNullGestureClass = allowNullGestureClass;
134 if( sample.
getSize() != numDimensions ){
135 errorLog <<
"addSample(const UINT classLabel, VectorFloat &sample) - the size of the new sample (" << sample.
getSize() <<
") does not match the number of dimensions of the dataset (" << numDimensions <<
")" << std::endl;
140 if( classLabel == GRT_DEFAULT_NULL_CLASS_LABEL && !allowNullGestureClass ){
141 errorLog <<
"addSample(const UINT classLabel, VectorFloat &sample) - the class label can not be 0!" << std::endl;
146 crossValidationSetup =
false;
147 crossValidationIndexs.clear();
150 data.push_back( newSample );
153 if( classTracker.
getSize() == 0 ){
155 classTracker.push_back(tracker);
157 bool labelFound =
false;
158 for(UINT i=0; i<classTracker.
getSize(); i++){
159 if( classLabel == classTracker[i].classLabel ){
160 classTracker[i].counter++;
167 classTracker.push_back(tracker);
179 if( totalNumSamples == 0 ){
180 warningLog <<
"removeSample( const UINT index ) - Failed to remove sample, the training dataset is empty!" << std::endl;
184 if( index >= totalNumSamples ){
185 warningLog <<
"removeSample( const UINT index ) - Failed to remove sample, the index is out of bounds! Number of training samples: " << totalNumSamples <<
" index: " << index << std::endl;
190 crossValidationSetup =
false;
191 crossValidationIndexs.clear();
194 UINT classLabel = data[ index ].getClassLabel();
197 data.erase( data.begin()+index );
199 totalNumSamples = data.
getSize();
202 for(
size_t i=0; i<classTracker.
getSize(); i++){
203 if( classTracker[i].classLabel == classLabel ){
204 classTracker[i].counter--;
214 if( totalNumSamples == 0 ){
215 warningLog <<
"removeLastSample() - Failed to remove sample, the training dataset is empty!" << std::endl;
226 if( data.capacity() >= N )
return true;
238 for(
size_t i=0; i<classTracker.
getSize(); i++){
239 if( classTracker[i].classLabel == classLabel ){
240 warningLog <<
"addClass(const UINT classLabel,const std::string className) - Failed to add class, it already exists! Class label: " << classLabel << std::endl;
246 classTracker.push_back(
ClassTracker(classLabel,0,className) );
256 UINT numExamplesRemoved = 0;
257 UINT numExamplesToRemove = 0;
260 crossValidationSetup =
false;
261 crossValidationIndexs.clear();
264 for(UINT i=0; i<classTracker.
getSize(); i++){
265 if( classTracker[i].classLabel == classLabel ){
266 numExamplesToRemove = classTracker[i].counter;
267 classTracker.erase(classTracker.begin()+i);
273 if( numExamplesToRemove > 0 ){
275 while( numExamplesRemoved < numExamplesToRemove ){
276 if( data[i].getClassLabel() == classLabel ){
277 data.erase(data.begin()+i);
278 numExamplesRemoved++;
279 }
else if( ++i == data.
getSize() )
break;
283 totalNumSamples = data.
getSize();
285 return numExamplesRemoved;
289 bool oldClassLabelFound =
false;
290 bool newClassLabelAllReadyExists =
false;
291 UINT indexOfOldClassLabel = 0;
292 UINT indexOfNewClassLabel = 0;
295 for(UINT i=0; i<classTracker.
getSize(); i++){
296 if( classTracker[i].classLabel == oldClassLabel ){
297 indexOfOldClassLabel = i;
298 oldClassLabelFound =
true;
300 if( classTracker[i].classLabel == newClassLabel ){
301 indexOfNewClassLabel = i;
302 newClassLabelAllReadyExists =
true;
307 if( !oldClassLabelFound ){
312 for(UINT i=0; i<totalNumSamples; i++){
313 if( data[i].getClassLabel() == oldClassLabel ){
314 data[i].setClassLabel(newClassLabel);
319 if( newClassLabelAllReadyExists ){
321 classTracker[ indexOfNewClassLabel ].counter += classTracker[ indexOfOldClassLabel ].counter;
324 classTracker.push_back(
ClassTracker(newClassLabel,classTracker[ indexOfOldClassLabel ].counter,classTracker[ indexOfOldClassLabel ].className) );
328 classTracker.erase( classTracker.begin() + indexOfOldClassLabel );
338 if( externalRanges.size() != numDimensions )
return false;
340 this->externalRanges = externalRanges;
341 this->useExternalRanges = useExternalRanges;
347 if( externalRanges.
getSize() == numDimensions ){
348 this->useExternalRanges = useExternalRanges;
356 return scale(ranges,minTarget,maxTarget);
360 if( ranges.
getSize() != numDimensions )
return false;
363 for(UINT i=0; i<totalNumSamples; i++){
364 for(UINT j=0; j<numDimensions; j++){
365 data[i][j] = grt_scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,minTarget,maxTarget);
397 file.open(filename.c_str(), std::ios::out);
399 if( !file.is_open() ){
403 file <<
"GRT_LABELLED_CLASSIFICATION_DATA_FILE_V1.0\n";
404 file <<
"DatasetName: " << datasetName << std::endl;
405 file <<
"InfoText: " << infoText << std::endl;
406 file <<
"NumDimensions: " << numDimensions << std::endl;
407 file <<
"TotalNumExamples: " << totalNumSamples << std::endl;
408 file <<
"NumberOfClasses: " << classTracker.size() << std::endl;
409 file <<
"ClassIDsAndCounters: " << std::endl;
411 for(UINT i=0; i<classTracker.size(); i++){
412 file << classTracker[i].classLabel <<
"\t" << classTracker[i].counter <<
"\t" << classTracker[i].className << std::endl;
415 file <<
"UseExternalRanges: " << useExternalRanges << std::endl;
417 if( useExternalRanges ){
418 for(UINT i=0; i<externalRanges.size(); i++){
419 file << externalRanges[i].minValue <<
"\t" << externalRanges[i].maxValue << std::endl;
425 for(UINT i=0; i<totalNumSamples; i++){
426 file << data[i].getClassLabel();
427 for(UINT j=0; j<numDimensions; j++){
428 file <<
"\t" << data[i][j];
440 file.open(filename.c_str(), std::ios::in);
444 if( !file.is_open() ){
445 errorLog <<
"loadDatasetFromFile(const std::string &filename) - could not open file!" << std::endl;
453 if(word !=
"GRT_LABELLED_CLASSIFICATION_DATA_FILE_V1.0"){
454 errorLog <<
"loadDatasetFromFile(const std::string &filename) - could not find file header!" << std::endl;
461 if(word !=
"DatasetName:"){
462 errorLog <<
"loadDatasetFromFile(const std::string &filename) - failed to find DatasetName header!" << std::endl;
463 errorLog << word << std::endl;
470 if(word !=
"InfoText:"){
471 errorLog <<
"loadDatasetFromFile(const std::string &filename) - failed to find InfoText header!" << std::endl;
479 while( word !=
"NumDimensions:" ){
480 infoText += word +
" ";
485 if( word !=
"NumDimensions:" ){
486 errorLog <<
"loadDatasetFromFile(const std::string &filename) - failed to find NumDimensions header!" << std::endl;
490 file >> numDimensions;
494 if( word !=
"TotalNumTrainingExamples:" && word !=
"TotalNumExamples:" ){
495 errorLog <<
"loadDatasetFromFile(const std::string &filename) - failed to find TotalNumTrainingExamples header!" << std::endl;
499 file >> totalNumSamples;
503 if(word !=
"NumberOfClasses:"){
504 errorLog <<
"loadDatasetFromFile(string filename) - failed to find NumberOfClasses header!" << std::endl;
511 classTracker.
resize(numClasses);
515 if(word !=
"ClassIDsAndCounters:"){
516 errorLog <<
"loadDatasetFromFile(const std::string &filename) - failed to find ClassIDsAndCounters header!" << std::endl;
521 for(UINT i=0; i<classTracker.
getSize(); i++){
522 file >> classTracker[i].classLabel;
523 file >> classTracker[i].counter;
524 file >> classTracker[i].className;
529 if(word !=
"UseExternalRanges:"){
530 errorLog <<
"loadDatasetFromFile(const std::string &filename) - failed to find UseExternalRanges header!" << std::endl;
534 file >> useExternalRanges;
537 if( useExternalRanges ){
538 externalRanges.
resize(numDimensions);
539 for(UINT i=0; i<externalRanges.
getSize(); i++){
540 file >> externalRanges[i].minValue;
541 file >> externalRanges[i].maxValue;
547 if( word !=
"LabelledTrainingData:" && word !=
"Data:"){
548 errorLog <<
"loadDatasetFromFile(const std::string &filename) - failed to find LabelledTrainingData header!" << std::endl;
554 data.
resize( totalNumSamples, tempSample );
556 for(UINT i=0; i<totalNumSamples; i++){
560 for(UINT j=0; j<numDimensions; j++){
563 data[i].set(classLabel, sample);
577 file.open(filename.c_str(), std::ios::out );
579 if( !file.is_open() ){
584 for(UINT i=0; i<totalNumSamples; i++){
585 file << data[i].getClassLabel();
586 for(UINT j=0; j<numDimensions; j++){
587 file <<
"," << data[i][j];
600 datasetName =
"NOT_SET";
613 if( !parser.parseCSVFile(filename,
true) ){
614 errorLog <<
"loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - Failed to parse CSV file!" << std::endl;
618 if( !parser.getConsistentColumnSize() ){
619 errorLog <<
"loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndexe) - The CSV file does not have a consistent number of columns!" << std::endl;
623 if( parser.getColumnSize() <= 1 ){
624 errorLog <<
"loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - The CSV file does not have enough columns! It should contain at least two columns!" << std::endl;
629 numDimensions = parser.getColumnSize()-1;
642 totalNumSamples = parser.getRowSize();
643 for(UINT i=0; i<totalNumSamples; i++){
645 classLabel = grt_from_str< UINT >( parser[i][classLabelColumnIndex] );
648 data[i].setClassLabel( classLabel );
653 while( j != numDimensions ){
654 if( n != classLabelColumnIndex ){
655 data[i][j++] = grt_from_str< Float >( parser[i][n] );
661 if( classTracker.size() == 0 ){
663 classTracker.push_back(tracker);
665 bool labelFound =
false;
666 const size_t numClasses = classTracker.size();
667 for(
size_t i=0; i<numClasses; i++){
668 if( classLabel == classTracker[i].classLabel ){
669 classTracker[i].counter++;
676 classTracker.push_back(tracker);
696 sort(classTracker.begin(),classTracker.end(),ClassTracker::sortByClassLabelAscending);
709 crossValidationSetup =
false;
710 crossValidationIndexs.clear();
720 UINT randomIndex = 0;
722 if( useStratifiedSampling ){
727 for(UINT i=0; i<totalNumSamples; i++){
733 UINT numSamples = classData[k].
getSize();
734 for(UINT x=0; x<numSamples; x++){
739 SWAP(classData[k][ x ], classData[k][ randomIndex ]);
744 UINT numTrainingSamples = 0;
745 UINT numTestSamples = 0;
748 UINT numTrainingExamples = (UINT) floor( Float(classData[k].size()) / 100.0 * Float(trainingSizePercentage) );
749 UINT numTestExamples = ((UINT)classData[k].size())-numTrainingExamples;
750 numTrainingSamples += numTrainingExamples;
751 numTestSamples += numTestExamples;
754 trainingSet.
reserve( numTrainingSamples );
755 testSet.
reserve( numTestSamples );
759 UINT numTrainingExamples = (UINT) floor( Float(classData[k].getSize()) / 100.0 * Float(trainingSizePercentage) );
762 for(UINT i=0; i<numTrainingExamples; i++){
763 trainingSet.
addSample( data[ classData[k][i] ].getClassLabel(), data[ classData[k][i] ].getSample() );
765 for(UINT i=numTrainingExamples; i<classData[k].
getSize(); i++){
766 testSet.
addSample( data[ classData[k][i] ].getClassLabel(), data[ classData[k][i] ].getSample() );
771 const UINT numTrainingExamples = (UINT) floor( Float(totalNumSamples) / 100.0 * Float(trainingSizePercentage) );
774 UINT randomIndex = 0;
775 for(UINT i=0; i<totalNumSamples; i++) indexs[i] = i;
776 std::random_shuffle(indexs.begin(), indexs.end());
779 trainingSet.
reserve( numTrainingExamples );
780 testSet.
reserve( totalNumSamples-numTrainingExamples );
783 for(UINT i=0; i<numTrainingExamples; i++){
784 trainingSet.
addSample( data[ indexs[i] ].getClassLabel(), data[ indexs[i] ].getSample() );
786 for(UINT i=numTrainingExamples; i<totalNumSamples; i++){
787 testSet.
addSample( data[ indexs[i] ].getClassLabel(), data[ indexs[i] ].getSample() );
806 errorLog <<
"merge(const ClassificationData &labelledData) - The number of dimensions in the labelledData (" << labelledData.
getNumDimensions() <<
") does not match the number of dimensions of this dataset (" << numDimensions <<
")" << std::endl;
811 crossValidationSetup =
false;
812 crossValidationIndexs.clear();
819 addSample(labelledData[i].getClassLabel(), labelledData[i].getSample());
824 for(UINT i=0; i<classTracker.size(); i++){
836 crossValidationSetup =
false;
837 crossValidationIndexs.clear();
840 if( K > totalNumSamples ){
841 errorLog <<
"spiltDataIntoKFolds(const UINT K,const bool useStratifiedSampling) - K can not be zero!" << std::endl;
846 if( K > totalNumSamples ){
847 errorLog <<
"spiltDataIntoKFolds(const UINT K,const bool useStratifiedSampling) - K can not be larger than the total number of samples in the dataset!" << std::endl;
852 if( useStratifiedSampling ){
853 for(UINT c=0; c<classTracker.size(); c++){
854 if( K > classTracker[c].counter ){
855 errorLog <<
"spiltDataIntoKFolds(const UINT K,const bool useStratifiedSampling) - K can not be larger than the number of samples in any given class!" << std::endl;
866 UINT numSamplesPerFold = (UINT) floor( totalNumSamples/Float(K) );
869 crossValidationIndexs.
resize(K);
873 UINT randomIndex = 0;
875 if( useStratifiedSampling ){
880 for(UINT i=0; i<totalNumSamples; i++){
886 UINT numSamples = (UINT)classData[c].size();
887 for(UINT x=0; x<numSamples; x++){
892 SWAP(classData[c][ x ] , classData[c][ randomIndex ]);
899 iter = classData[ c ].begin();
901 while( iter != classData[c].end() ){
902 crossValidationIndexs[ k ].push_back( *iter );
911 for(UINT i=0; i<totalNumSamples; i++) indexs[i] = i;
912 for(UINT x=0; x<totalNumSamples; x++){
917 SWAP(indexs[ x ] , indexs[ randomIndex ]);
922 for(UINT i=0; i<totalNumSamples; i++){
924 crossValidationIndexs[ foldIndex ].push_back( indexs[i] );
927 if( ++counter == numSamplesPerFold && foldIndex < K-1 ){
934 crossValidationSetup =
true;
945 if( !crossValidationSetup ){
946 errorLog <<
"getTrainingFoldData(const UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << std::endl;
950 if( foldIndex >= kFoldValue )
return trainingData;
954 trainingData.
addClass( classTracker[k].classLabel, classTracker[k].className );
959 for(UINT k=0; k<kFoldValue; k++){
960 if( k != foldIndex ){
961 for(UINT i=0; i<crossValidationIndexs[k].
getSize(); i++){
963 index = crossValidationIndexs[k][i];
964 trainingData.
addSample( data[ index ].getClassLabel(), data[ index ].getSample() );
981 if( !crossValidationSetup )
return testData;
983 if( foldIndex >= kFoldValue )
return testData;
987 testData.
addClass( classTracker[k].classLabel, classTracker[k].className );
990 testData.
reserve( crossValidationIndexs[ foldIndex ].getSize() );
994 for(UINT i=0; i<crossValidationIndexs[ foldIndex ].
getSize(); i++){
996 index = crossValidationIndexs[ foldIndex ][i];
997 testData.
addSample( data[ index ].getClassLabel(), data[ index ].getSample() );
1013 for(UINT i=0; i<classTracker.
getSize(); i++){
1014 if( classTracker[i].classLabel == classLabel ){
1015 classData.
reserve( classTracker[i].counter );
1020 for(UINT i=0; i<totalNumSamples; i++){
1021 if( data[i].getClassLabel() == classLabel ){
1022 classData.
addSample(classLabel, data[i].getSample());
1037 if( numSamples == 0 ) numSamples = totalNumSamples;
1039 newDataset.
reserve( numSamples );
1044 for(UINT k=0; k<K; k++){
1045 newDataset.
addClass( classTracker[k].classLabel );
1048 if( balanceDataset ){
1051 for(UINT i=0; i<totalNumSamples; i++){
1056 UINT numSamplesPerClass = (UINT)floor( numSamples / Float(K) );
1059 UINT classIndex = 0;
1060 UINT classCounter = 0;
1061 UINT randomIndex = 0;
1062 for(UINT i=0; i<numSamples; i++){
1064 randomIndex = classIndexs[ classIndex ][ randomIndex ];
1065 newDataset.
addSample(data[ randomIndex ].getClassLabel(), data[ randomIndex ].getSample());
1066 if( classCounter++ >= numSamplesPerClass && classIndex+1 < K ){
1075 for(UINT i=0; i<numSamples; i++){
1077 newDataset.
addSample( data[randomIndex].getClassLabel(), data[randomIndex].getSample() );
1095 if( totalNumSamples == 0 ){
1096 return regressionData;
1099 const UINT numInputDimensions = numDimensions;
1103 for(UINT i=0; i<totalNumSamples; i++){
1107 UINT classLabel = data[i].getClassLabel();
1109 if( classLabel > 0 ){
1110 targetVector[ classLabel-1 ] = 1;
1112 regressionData.
clear();
1113 return regressionData;
1116 regressionData.
addSample(data[i].getSample(),targetVector);
1119 return regressionData;
1126 if( totalNumSamples == 0 ){
1127 return unlabelledData;
1132 for(UINT i=0; i<totalNumSamples; i++){
1133 unlabelledData.
addSample( data[i].getSample() );
1136 return unlabelledData;
1142 for(UINT i=0; i<classTracker.
getSize(); i++){
1143 if( classTracker[i].classLabel < minClassLabel ){
1144 minClassLabel = classTracker[i].classLabel;
1148 return minClassLabel;
1153 UINT maxClassLabel = 0;
1155 for(UINT i=0; i<classTracker.
getSize(); i++){
1156 if( classTracker[i].classLabel > maxClassLabel ){
1157 maxClassLabel = classTracker[i].classLabel;
1161 return maxClassLabel;
1165 for(UINT k=0; k<classTracker.
getSize(); k++){
1166 if( classTracker[k].classLabel == classLabel ){
1170 warningLog <<
"getClassLabelIndexValue(UINT classLabel) - Failed to find class label: " << classLabel <<
" in class tracker!" << std::endl;
1176 for(UINT i=0; i<classTracker.
getSize(); i++){
1177 if( classTracker[i].classLabel == classLabel ){
1178 return classTracker[i].className;
1182 return "CLASS_LABEL_NOT_FOUND";
1186 std::string statsText;
1187 statsText +=
"DatasetName:\t" + datasetName +
"\n";
1188 statsText +=
"DatasetInfo:\t" + infoText +
"\n";
1189 statsText +=
"Number of Dimensions:\t" +
Util::toString( numDimensions ) +
"\n";
1190 statsText +=
"Number of Samples:\t" +
Util::toString( totalNumSamples ) +
"\n";
1192 statsText +=
"ClassStats:\n";
1195 statsText +=
"ClassLabel:\t" +
Util::toString( classTracker[k].classLabel );
1196 statsText +=
"\tNumber of Samples:\t" +
Util::toString(classTracker[k].counter);
1197 statsText +=
"\tClassName:\t" + classTracker[k].className +
"\n";
1202 statsText +=
"Dataset Ranges:\n";
1203 for(UINT j=0; j<ranges.size(); j++){
1213 if( useExternalRanges )
return externalRanges;
1218 if( totalNumSamples > 0 ){
1219 for(UINT j=0; j<numDimensions; j++){
1220 ranges[j].minValue = data[0][j];
1221 ranges[j].maxValue = data[0][j];
1222 for(UINT i=0; i<totalNumSamples; i++){
1223 if( data[i][j] < ranges[j].minValue ){ ranges[j].minValue = data[i][j]; }
1224 else if( data[i][j] > ranges[j].maxValue ){ ranges[j].maxValue = data[i][j]; }
1237 classLabels[i] = classTracker[i].classLabel;
1249 classSampleCounts[i] = classTracker[i].counter;
1252 return classSampleCounts;
1259 for(UINT j=0; j<numDimensions; j++){
1260 for(UINT i=0; i<totalNumSamples; i++){
1261 mean[j] += data[i][j];
1263 mean[j] /= Float(totalNumSamples);
1274 for(UINT j=0; j<numDimensions; j++){
1275 for(UINT i=0; i<totalNumSamples; i++){
1276 stdDev[j] += SQR(data[i][j]-mean[j]);
1278 stdDev[j] = sqrt( stdDev[j] / Float(totalNumSamples-1) );
1291 for(UINT i=0; i<ranges.size(); i++){
1292 binRange[i] = (ranges[i].maxValue-ranges[i].minValue)/Float(numBins);
1299 for(UINT i=0; i<M; i++){
1300 if( data[i].getClassLabel() == classLabel ){
1301 for(UINT j=0; j<N; j++){
1303 bool binFound =
false;
1304 for(UINT k=0; k<numBins-1; k++){
1305 if( data[i][j] >= ranges[i].minValue + (binRange[j]*k) && data[i][j] >= ranges[i].minValue + (binRange[j]*(k+1)) ){
1311 if( !binFound ) binIndex = numBins-1;
1312 histData[j][binIndex]++;
1318 if( norm == 0 )
return histData;
1323 histData[i][j] /= norm;
1337 for(UINT i=0; i<totalNumSamples; i++){
1339 for(UINT j=0; j<numDimensions; j++){
1340 mean[classIndex][j] += data[i][j];
1342 counter[ classIndex ]++;
1346 for(UINT j=0; j<numDimensions; j++){
1347 mean[k][j] = counter[k] > 0 ? mean[k][j]/counter[k] : 0;
1362 for(UINT i=0; i<totalNumSamples; i++){
1364 for(UINT j=0; j<numDimensions; j++){
1365 stdDev[classIndex][j] += SQR(data[i][j]-mean[classIndex][j]);
1367 counter[ classIndex ]++;
1371 for(UINT j=0; j<numDimensions; j++){
1372 stdDev[k][j] = sqrt( stdDev[k][j] / Float(counter[k]-1) );
1382 MatrixFloat covariance(numDimensions,numDimensions);
1384 for(UINT j=0; j<numDimensions; j++){
1385 for(UINT k=0; k<numDimensions; k++){
1386 for(UINT i=0; i<totalNumSamples; i++){
1387 covariance[j][k] += (data[i][j]-mean[j]) * (data[i][k]-mean[k]) ;
1389 covariance[j][k] /= Float(totalNumSamples-1);
1400 for(UINT k=0; k<K; k++){
1407 VectorFloat ClassificationData::getClassProbabilities()
const {
1412 const UINT K = (UINT)classLabels.size();
1416 for(UINT k=0; k<K; k++){
1417 for(UINT n=0; n<N; n++){
1418 if( classLabels[k] == classTracker[n].classLabel ){
1419 x[k] = classTracker[n].counter;
1420 sum += classTracker[n].counter;
1428 for(UINT k=0; k<K; k++){
1443 for(UINT k=0; k<K; k++){
1444 if( classTracker[k].classLabel == classLabel){
1445 N = classTracker[k].counter;
1452 for(UINT i=0; i<M; i++){
1453 if( data[i].getClassLabel() == classLabel ){
1454 classIndexes[index++] = i;
1458 return classIndexes;
1467 for(UINT i=0; i<M; i++){
1468 for(UINT j=0; j<N; j++){
1469 d[i][j] = data[i][j];
1481 for(UINT i=0; i<M; i++){
1482 for(UINT j=0; j<N; j++){
1483 d[i][j] = data[i][j];
1496 for(UINT k=0; k<numClasses; k++){
1497 for(UINT j=0; j<numDimensions; j++){
1506 for(UINT i=0; i<numSamples; i++){
1513 for(UINT j=0; j<numDimensions; j++){
1518 UINT classLabel = k + 1;
1525 return data.
save( filename );
bool saveDatasetToFile(const std::string &filename) const
bool setDatasetName(std::string datasetName)
bool loadDatasetFromFile(const std::string &filename)
static std::string toString(const int &i)
RegressionData reformatAsRegressionData() const
ClassificationData & operator=(const ClassificationData &rhs)
static bool generateGaussDataset(const std::string filename, const UINT numSamples=10000, const UINT numClasses=10, const UINT numDimensions=3, const Float range=10, const Float sigma=1)
bool addSample(const VectorFloat &sample)
The ClassificationData is the main data structure for recording, labeling, managing, saving, and loading training data for supervised learning problems.
bool relabelAllSamplesWithClassLabel(const UINT oldClassLabel, const UINT newClassLabel)
bool addSample(UINT classLabel, const VectorFloat &sample)
ClassificationData getTestFoldData(const UINT foldIndex) const
bool addClass(const UINT classLabel, const std::string className="NOT_SET")
Vector< ClassTracker > getClassTracker() const
ClassificationData getClassData(const UINT classLabel) const
virtual bool resize(const unsigned int size)
bool setNumDimensions(UINT numDimensions)
UINT eraseAllSamplesWithClassLabel(const UINT classLabel)
MatrixDouble getDataAsMatrixDouble() const
MatrixFloat getClassMean() const
Float getRandomNumberGauss(Float mu=0.0, Float sigma=1.0)
std::string getClassNameForCorrespondingClassLabel(const UINT classLabel) const
bool setClassNameForCorrespondingClassLabel(std::string className, UINT classLabel)
Vector< UINT > getClassLabels() const
bool loadDatasetFromCSVFile(const std::string &filename, const UINT classLabelColumnIndex=0)
bool setAllowNullGestureClass(bool allowNullGestureClass)
UINT getMinimumClassLabel() const
Vector< MatrixFloat > getHistogramData(const UINT numBins) const
unsigned int getSize() const
UINT removeClass(const UINT classLabel)
ClassificationData(UINT numDimensions=0, std::string datasetName="NOT_SET", std::string infoText="")
bool setAllValues(const T &value)
bool setInputAndTargetDimensions(const UINT numInputDimensions, const UINT numTargetDimensions)
bool setInfoText(std::string infoText)
Vector< UINT > getNumSamplesPerClass() const
MatrixFloat getCovarianceMatrix() const
UnlabelledData reformatAsUnlabelledData() const
bool removeSample(const UINT index)
UINT getNumSamples() const
bool spiltDataIntoKFolds(const UINT K, const bool useStratifiedSampling=false)
bool save(const std::string &filename) const
bool setNumDimensions(const UINT numDimensions)
bool enableExternalRangeScaling(const bool useExternalRanges)
bool setExternalRanges(const Vector< MinMax > &externalRanges, const bool useExternalRanges=false)
bool reserve(const UINT N)
bool saveDatasetToCSVFile(const std::string &filename) const
ClassificationData partition(const UINT partitionPercentage, const bool useStratifiedSampling=false)
unsigned int getNumRows() const
UINT getNumDimensions() const
UINT getNumClasses() const
unsigned int getNumCols() const
Vector< MinMax > getRanges() const
Float getRandomNumberUniform(Float minRange=0.0, Float maxRange=1.0)
bool merge(const ClassificationData &data)
VectorFloat getStdDev() const
Vector< UINT > getClassDataIndexes(const UINT classLabel) const
int getRandomNumberInt(int minRange, int maxRange)
MatrixFloat getDataAsMatrixFloat() const
static bool stringEndsWith(const std::string &str, const std::string &ending)
UINT getClassLabelIndexValue(const UINT classLabel) const
ClassificationData getBootstrappedDataset(UINT numSamples=0, bool balanceDataset=false) const
MatrixFloat getClassHistogramData(const UINT classLabel, const UINT numBins) const
ClassificationData getTrainingFoldData(const UINT foldIndex) const
UINT getMaximumClassLabel() const
bool scale(const Float minTarget, const Float maxTarget)
bool load(const std::string &filename)
MatrixFloat getClassStdDev() const
bool addSample(const VectorFloat &inputVector, const VectorFloat &targetVector)
std::string getStatsAsString() const
virtual ~ClassificationData()
VectorFloat getMean() const