21 #define GRT_DLL_EXPORTS 27 this->datasetName = datasetName;
28 this->numDimensions = numDimensions;
29 this->infoText = infoText;
31 crossValidationSetup =
false;
32 useExternalRanges =
false;
33 allowNullGestureClass =
true;
35 infoLog.
setKey(
"[ClassificationData]");
36 debugLog.
setKey(
"[DEBUG ClassificationData]");
37 errorLog.
setKey(
"[ERROR ClassificationData]");
38 warningLog.setKey(
"[WARNING ClassificationData]");
50 this->datasetName = rhs.datasetName;
51 this->infoText = rhs.infoText;
52 this->numDimensions = rhs.numDimensions;
53 this->totalNumSamples = rhs.totalNumSamples;
54 this->kFoldValue = rhs.kFoldValue;
55 this->crossValidationSetup = rhs.crossValidationSetup;
56 this->useExternalRanges = rhs.useExternalRanges;
57 this->allowNullGestureClass = rhs.allowNullGestureClass;
58 this->externalRanges = rhs.externalRanges;
59 this->classTracker = rhs.classTracker;
60 this->data = rhs.data;
61 this->crossValidationIndexs = rhs.crossValidationIndexs;
62 this->infoLog = rhs.infoLog;
63 this->debugLog = rhs.debugLog;
64 this->errorLog = rhs.errorLog;
65 this->warningLog = rhs.warningLog;
74 crossValidationSetup =
false;
75 crossValidationIndexs.clear();
80 if( numDimensions > 0 ){
85 this->numDimensions = numDimensions;
88 useExternalRanges =
false;
89 externalRanges.clear();
94 errorLog <<
"setNumDimensions(const UINT numDimensions) - The number of dimensions of the dataset must be greater than zero!" << std::endl;
101 if( datasetName.find(
" ") == std::string::npos ){
102 this->datasetName = datasetName;
106 errorLog <<
"setDatasetName(const std::string datasetName) - The dataset name cannot contain any spaces!" << std::endl;
111 this->infoText = infoText;
117 for(UINT i=0; i<classTracker.
getSize(); i++){
118 if( classTracker[i].classLabel == classLabel ){
119 classTracker[i].className = className;
124 errorLog <<
"setClassNameForCorrespondingClassLabel(const std::string className,const UINT classLabel) - Failed to find class with label: " << classLabel << std::endl;
129 this->allowNullGestureClass = allowNullGestureClass;
135 if( sample.
getSize() != numDimensions ){
136 if( totalNumSamples == 0 ){
137 warningLog <<
"addSample(const UINT classLabel, VectorFloat &sample) - the size of the new sample (" << sample.
getSize() <<
") does not match the number of dimensions of the dataset (" << numDimensions <<
"), setting dimensionality to: " << numDimensions << std::endl;
138 numDimensions = sample.
getSize();
140 errorLog <<
"addSample(const UINT classLabel, VectorFloat &sample) - the size of the new sample (" << sample.
getSize() <<
") does not match the number of dimensions of the dataset (" << numDimensions <<
")" << std::endl;
146 if( classLabel == GRT_DEFAULT_NULL_CLASS_LABEL && !allowNullGestureClass ){
147 errorLog <<
"addSample(const UINT classLabel, VectorFloat &sample) - the class label can not be 0!" << std::endl;
152 crossValidationSetup =
false;
153 crossValidationIndexs.clear();
156 data.push_back( newSample );
159 if( classTracker.
getSize() == 0 ){
161 classTracker.push_back(tracker);
163 bool labelFound =
false;
164 for(UINT i=0; i<classTracker.
getSize(); i++){
165 if( classLabel == classTracker[i].classLabel ){
166 classTracker[i].counter++;
173 classTracker.push_back(tracker);
185 if( totalNumSamples == 0 ){
186 warningLog <<
"removeSample( const UINT index ) - Failed to remove sample, the training dataset is empty!" << std::endl;
190 if( index >= totalNumSamples ){
191 warningLog <<
"removeSample( const UINT index ) - Failed to remove sample, the index is out of bounds! Number of training samples: " << totalNumSamples <<
" index: " << index << std::endl;
196 crossValidationSetup =
false;
197 crossValidationIndexs.clear();
200 UINT classLabel = data[ index ].getClassLabel();
203 data.erase( data.begin()+index );
205 totalNumSamples = data.
getSize();
208 for(
size_t i=0; i<classTracker.
getSize(); i++){
209 if( classTracker[i].classLabel == classLabel ){
210 classTracker[i].counter--;
220 if( totalNumSamples == 0 ){
221 warningLog <<
"removeLastSample() - Failed to remove sample, the training dataset is empty!" << std::endl;
232 if( data.capacity() >= N )
return true;
244 for(
size_t i=0; i<classTracker.
getSize(); i++){
245 if( classTracker[i].classLabel == classLabel ){
246 warningLog <<
"addClass(const UINT classLabel,const std::string className) - Failed to add class, it already exists! Class label: " << classLabel << std::endl;
252 classTracker.push_back(
ClassTracker(classLabel,0,className) );
262 UINT numExamplesRemoved = 0;
263 UINT numExamplesToRemove = 0;
266 crossValidationSetup =
false;
267 crossValidationIndexs.clear();
270 for(UINT i=0; i<classTracker.
getSize(); i++){
271 if( classTracker[i].classLabel == classLabel ){
272 numExamplesToRemove = classTracker[i].counter;
273 classTracker.erase(classTracker.begin()+i);
279 if( numExamplesToRemove > 0 ){
281 while( numExamplesRemoved < numExamplesToRemove ){
282 if( data[i].getClassLabel() == classLabel ){
283 data.erase(data.begin()+i);
284 numExamplesRemoved++;
285 }
else if( ++i == data.
getSize() )
break;
289 totalNumSamples = data.
getSize();
291 return numExamplesRemoved;
295 bool oldClassLabelFound =
false;
296 bool newClassLabelAllReadyExists =
false;
297 UINT indexOfOldClassLabel = 0;
298 UINT indexOfNewClassLabel = 0;
301 for(UINT i=0; i<classTracker.
getSize(); i++){
302 if( classTracker[i].classLabel == oldClassLabel ){
303 indexOfOldClassLabel = i;
304 oldClassLabelFound =
true;
306 if( classTracker[i].classLabel == newClassLabel ){
307 indexOfNewClassLabel = i;
308 newClassLabelAllReadyExists =
true;
313 if( !oldClassLabelFound ){
318 for(UINT i=0; i<totalNumSamples; i++){
319 if( data[i].getClassLabel() == oldClassLabel ){
320 data[i].setClassLabel(newClassLabel);
325 if( newClassLabelAllReadyExists ){
327 classTracker[ indexOfNewClassLabel ].counter += classTracker[ indexOfOldClassLabel ].counter;
330 classTracker.push_back(
ClassTracker(newClassLabel,classTracker[ indexOfOldClassLabel ].counter,classTracker[ indexOfOldClassLabel ].className) );
334 classTracker.erase( classTracker.begin() + indexOfOldClassLabel );
344 if( externalRanges.size() != numDimensions )
return false;
346 this->externalRanges = externalRanges;
347 this->useExternalRanges = useExternalRanges;
353 if( externalRanges.
getSize() == numDimensions ){
354 this->useExternalRanges = useExternalRanges;
362 return scale(ranges,minTarget,maxTarget);
366 if( ranges.
getSize() != numDimensions )
return false;
369 for(UINT i=0; i<totalNumSamples; i++){
370 for(UINT j=0; j<numDimensions; j++){
371 data[i][j] = grt_scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,minTarget,maxTarget);
403 file.open(filename.c_str(), std::ios::out);
405 if( !file.is_open() ){
409 file <<
"GRT_LABELLED_CLASSIFICATION_DATA_FILE_V1.0\n";
410 file <<
"DatasetName: " << datasetName << std::endl;
411 file <<
"InfoText: " << infoText << std::endl;
412 file <<
"NumDimensions: " << numDimensions << std::endl;
413 file <<
"TotalNumExamples: " << totalNumSamples << std::endl;
414 file <<
"NumberOfClasses: " << classTracker.size() << std::endl;
415 file <<
"ClassIDsAndCounters: " << std::endl;
417 for(UINT i=0; i<classTracker.size(); i++){
418 file << classTracker[i].classLabel <<
"\t" << classTracker[i].counter <<
"\t" << classTracker[i].className << std::endl;
421 file <<
"UseExternalRanges: " << useExternalRanges << std::endl;
423 if( useExternalRanges ){
424 for(UINT i=0; i<externalRanges.size(); i++){
425 file << externalRanges[i].minValue <<
"\t" << externalRanges[i].maxValue << std::endl;
431 for(UINT i=0; i<totalNumSamples; i++){
432 file << data[i].getClassLabel();
433 for(UINT j=0; j<numDimensions; j++){
434 file <<
"\t" << data[i][j];
446 file.open(filename.c_str(), std::ios::in);
450 if( !file.is_open() ){
451 errorLog <<
"loadDatasetFromFile(const std::string &filename) - could not open file!" << std::endl;
459 if(word !=
"GRT_LABELLED_CLASSIFICATION_DATA_FILE_V1.0"){
460 errorLog <<
"loadDatasetFromFile(const std::string &filename) - could not find file header!" << std::endl;
467 if(word !=
"DatasetName:"){
468 errorLog <<
"loadDatasetFromFile(const std::string &filename) - failed to find DatasetName header!" << std::endl;
469 errorLog << word << std::endl;
476 if(word !=
"InfoText:"){
477 errorLog <<
"loadDatasetFromFile(const std::string &filename) - failed to find InfoText header!" << std::endl;
485 while( word !=
"NumDimensions:" ){
486 infoText += word +
" ";
491 if( word !=
"NumDimensions:" ){
492 errorLog <<
"loadDatasetFromFile(const std::string &filename) - failed to find NumDimensions header!" << std::endl;
496 file >> numDimensions;
500 if( word !=
"TotalNumTrainingExamples:" && word !=
"TotalNumExamples:" ){
501 errorLog <<
"loadDatasetFromFile(const std::string &filename) - failed to find TotalNumTrainingExamples header!" << std::endl;
505 file >> totalNumSamples;
509 if(word !=
"NumberOfClasses:"){
510 errorLog <<
"loadDatasetFromFile(string filename) - failed to find NumberOfClasses header!" << std::endl;
517 classTracker.
resize(numClasses);
521 if(word !=
"ClassIDsAndCounters:"){
522 errorLog <<
"loadDatasetFromFile(const std::string &filename) - failed to find ClassIDsAndCounters header!" << std::endl;
527 for(UINT i=0; i<classTracker.
getSize(); i++){
528 file >> classTracker[i].classLabel;
529 file >> classTracker[i].counter;
530 file >> classTracker[i].className;
535 if(word !=
"UseExternalRanges:"){
536 errorLog <<
"loadDatasetFromFile(const std::string &filename) - failed to find UseExternalRanges header!" << std::endl;
540 file >> useExternalRanges;
543 if( useExternalRanges ){
544 externalRanges.
resize(numDimensions);
545 for(UINT i=0; i<externalRanges.
getSize(); i++){
546 file >> externalRanges[i].minValue;
547 file >> externalRanges[i].maxValue;
553 if( word !=
"LabelledTrainingData:" && word !=
"Data:"){
554 errorLog <<
"loadDatasetFromFile(const std::string &filename) - failed to find LabelledTrainingData header!" << std::endl;
560 data.
resize( totalNumSamples, tempSample );
562 for(UINT i=0; i<totalNumSamples; i++){
566 for(UINT j=0; j<numDimensions; j++){
569 data[i].set(classLabel, sample);
583 file.open(filename.c_str(), std::ios::out );
585 if( !file.is_open() ){
590 for(UINT i=0; i<totalNumSamples; i++){
591 file << data[i].getClassLabel();
592 for(UINT j=0; j<numDimensions; j++){
593 file <<
"," << data[i][j];
606 datasetName =
"NOT_SET";
619 if( !parser.parseCSVFile(filename,
true) ){
620 errorLog <<
"loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - Failed to parse CSV file!" << std::endl;
624 if( !parser.getConsistentColumnSize() ){
625 errorLog <<
"loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndexe) - The CSV file does not have a consistent number of columns!" << std::endl;
629 if( parser.getColumnSize() <= 1 ){
630 errorLog <<
"loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - The CSV file does not have enough columns! It should contain at least two columns!" << std::endl;
635 numDimensions = parser.getColumnSize()-1;
648 totalNumSamples = parser.getRowSize();
649 for(UINT i=0; i<totalNumSamples; i++){
651 classLabel = grt_from_str< UINT >( parser[i][classLabelColumnIndex] );
654 data[i].setClassLabel( classLabel );
659 while( j != numDimensions ){
660 if( n != classLabelColumnIndex ){
661 data[i][j++] = grt_from_str< Float >( parser[i][n] );
667 if( classTracker.size() == 0 ){
669 classTracker.push_back(tracker);
671 bool labelFound =
false;
672 const size_t numClasses = classTracker.size();
673 for(
size_t i=0; i<numClasses; i++){
674 if( classLabel == classTracker[i].classLabel ){
675 classTracker[i].counter++;
682 classTracker.push_back(tracker);
702 sort(classTracker.begin(),classTracker.end(),ClassTracker::sortByClassLabelAscending);
707 ClassificationData ClassificationData::partition(
const UINT trainingSizePercentage,
const bool useStratifiedSampling){
708 return split(trainingSizePercentage, useStratifiedSampling);
719 crossValidationSetup =
false;
720 crossValidationIndexs.clear();
732 trainingSet.classTracker.
resize( K );
733 testSet.classTracker.
resize( K );
734 for(UINT k=0; k<K; k++){
735 trainingSet.classTracker[k].classLabel = classTracker[k].classLabel;
736 testSet.classTracker[k].classLabel = classTracker[k].classLabel;
737 trainingSet.classTracker[k].counter = 0;
738 testSet.classTracker[k].counter = 0;
741 if( useStratifiedSampling ){
746 for(UINT i=0; i<totalNumSamples; i++){
751 for(UINT k=0; k<K; k++){
752 std::random_shuffle(classData[k].begin(), classData[k].end());
756 UINT numTrainingSamples = 0;
757 UINT numTestSamples = 0;
759 for(UINT k=0; k<K; k++){
760 UINT numTrainingExamples = (UINT) floor( Float(classData[k].size()) / 100.0 * Float(trainingSizePercentage) );
761 UINT numTestExamples = ((UINT)classData[k].size())-numTrainingExamples;
762 numTrainingSamples += numTrainingExamples;
763 numTestSamples += numTestExamples;
766 trainingSet.
reserve( numTrainingSamples );
767 testSet.
reserve( numTestSamples );
770 for(UINT k=0; k<K; k++){
771 UINT numTrainingExamples = (UINT) floor( Float(classData[k].getSize()) / 100.0 * Float(trainingSizePercentage) );
774 for(UINT i=0; i<numTrainingExamples; i++){
775 trainingSet.
addSample( data[ classData[k][i] ].getClassLabel(), data[ classData[k][i] ].getSample() );
777 for(UINT i=numTrainingExamples; i<classData[k].
getSize(); i++){
778 testSet.
addSample( data[ classData[k][i] ].getClassLabel(), data[ classData[k][i] ].getSample() );
783 const UINT numTrainingExamples = (UINT) floor( Float(totalNumSamples) / 100.0 * Float(trainingSizePercentage) );
787 for(UINT i=0; i<totalNumSamples; i++) indexs[i] = i;
788 std::random_shuffle(indexs.begin(), indexs.end());
791 trainingSet.
reserve( numTrainingExamples );
792 testSet.
reserve( totalNumSamples-numTrainingExamples );
795 for(UINT i=0; i<numTrainingExamples; i++){
796 trainingSet.
addSample( data[ indexs[i] ].getClassLabel(), data[ indexs[i] ].getSample() );
798 for(UINT i=numTrainingExamples; i<totalNumSamples; i++){
799 testSet.
addSample( data[ indexs[i] ].getClassLabel(), data[ indexs[i] ].getSample() );
822 errorLog <<
"merge(const ClassificationData &labelledData) - The number of dimensions in the labelledData (" << otherData.
getNumDimensions() <<
") does not match the number of dimensions of this dataset (" << numDimensions <<
")" << std::endl;
827 crossValidationSetup =
false;
828 crossValidationIndexs.clear();
837 for(UINT i=0; i<M; i++){
838 addSample(otherData[i].getClassLabel(), otherData[i].getSample());
843 for(UINT i=0; i<classTracker.
getSize(); i++){
855 crossValidationSetup =
false;
856 crossValidationIndexs.clear();
860 errorLog <<
"spiltDataIntoKFolds(const UINT K,const bool useStratifiedSampling) - K can not be zero!" << std::endl;
865 if( K > totalNumSamples ){
866 errorLog <<
"spiltDataIntoKFolds(const UINT K,const bool useStratifiedSampling) - K can not be larger than the total number of samples in the dataset!" << std::endl;
871 if( useStratifiedSampling ){
872 for(UINT c=0; c<classTracker.
getSize(); c++){
873 if( K > classTracker[c].counter ){
874 errorLog <<
"spiltDataIntoKFolds(const UINT K,const bool useStratifiedSampling) - K can not be larger than the number of samples in any given class!" << std::endl;
885 UINT numSamplesPerFold = (UINT) floor( totalNumSamples/Float(K) );
888 crossValidationIndexs.
resize(K);
892 UINT randomIndex = 0;
894 if( useStratifiedSampling ){
899 for(UINT i=0; i<totalNumSamples; i++){
905 UINT numSamples = (UINT)classData[c].size();
906 for(UINT x=0; x<numSamples; x++){
911 SWAP(classData[c][ x ] , classData[c][ randomIndex ]);
918 iter = classData[ c ].begin();
920 while( iter != classData[c].end() ){
921 crossValidationIndexs[ k ].push_back( *iter );
930 for(UINT i=0; i<totalNumSamples; i++) indexs[i] = i;
931 for(UINT x=0; x<totalNumSamples; x++){
936 SWAP(indexs[ x ] , indexs[ randomIndex ]);
941 for(UINT i=0; i<totalNumSamples; i++){
943 crossValidationIndexs[ foldIndex ].push_back( indexs[i] );
946 if( ++counter == numSamplesPerFold && foldIndex < K-1 ){
953 crossValidationSetup =
true;
964 if( !crossValidationSetup ){
965 errorLog <<
"getTrainingFoldData(const UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << std::endl;
969 if( foldIndex >= kFoldValue )
return trainingData;
973 trainingData.
addClass( classTracker[k].classLabel, classTracker[k].className );
978 for(UINT k=0; k<kFoldValue; k++){
979 if( k != foldIndex ){
980 for(UINT i=0; i<crossValidationIndexs[k].
getSize(); i++){
982 index = crossValidationIndexs[k][i];
983 trainingData.
addSample( data[ index ].getClassLabel(), data[ index ].getSample() );
1000 if( !crossValidationSetup )
return testData;
1002 if( foldIndex >= kFoldValue )
return testData;
1006 testData.
addClass( classTracker[k].classLabel, classTracker[k].className );
1009 testData.
reserve( crossValidationIndexs[ foldIndex ].getSize() );
1013 for(UINT i=0; i<crossValidationIndexs[ foldIndex ].
getSize(); i++){
1015 index = crossValidationIndexs[ foldIndex ][i];
1016 testData.
addSample( data[ index ].getClassLabel(), data[ index ].getSample() );
1032 for(UINT i=0; i<classTracker.
getSize(); i++){
1033 if( classTracker[i].classLabel == classLabel ){
1034 classData.
reserve( classTracker[i].counter );
1039 for(UINT i=0; i<totalNumSamples; i++){
1040 if( data[i].getClassLabel() == classLabel ){
1041 classData.
addSample(classLabel, data[i].getSample());
1056 const UINT numBootstrapSamples = numSamples_ > 0 ? numSamples_ : totalNumSamples;
1058 grt_assert( numBootstrapSamples > 0 );
1060 newDataset.
reserve( numBootstrapSamples );
1065 for(UINT k=0; k<K; k++){
1066 newDataset.
addClass( classTracker[k].classLabel );
1069 if( balanceDataset ){
1072 for(UINT i=0; i<totalNumSamples; i++){
1077 UINT numSamplesPerClass = (UINT)floor( numBootstrapSamples / Float(K) );
1080 UINT classIndex = 0;
1081 UINT classCounter = 0;
1082 UINT randomIndex = 0;
1083 for(UINT i=0; i<numBootstrapSamples; i++){
1085 randomIndex = classIndexs[ classIndex ][ randomIndex ];
1086 newDataset.
addSample(data[ randomIndex ].getClassLabel(), data[ randomIndex ].getSample());
1087 if( classCounter++ >= numSamplesPerClass && classIndex+1 < K ){
1096 for(UINT i=0; i<numBootstrapSamples; i++){
1098 newDataset.
addSample( data[randomIndex].getClassLabel(), data[randomIndex].getSample() );
1116 if( totalNumSamples == 0 ){
1117 return regressionData;
1120 const UINT numInputDimensions = numDimensions;
1124 for(UINT i=0; i<totalNumSamples; i++){
1128 UINT classLabel = data[i].getClassLabel();
1130 if( classLabel > 0 ){
1131 targetVector[ classLabel-1 ] = 1;
1133 regressionData.
clear();
1134 return regressionData;
1137 regressionData.
addSample(data[i].getSample(),targetVector);
1140 return regressionData;
1147 if( totalNumSamples == 0 ){
1148 return unlabelledData;
1153 for(UINT i=0; i<totalNumSamples; i++){
1154 unlabelledData.
addSample( data[i].getSample() );
1157 return unlabelledData;
1163 for(UINT i=0; i<classTracker.
getSize(); i++){
1164 if( classTracker[i].classLabel < minClassLabel ){
1165 minClassLabel = classTracker[i].classLabel;
1169 return minClassLabel;
1174 UINT maxClassLabel = 0;
1176 for(UINT i=0; i<classTracker.
getSize(); i++){
1177 if( classTracker[i].classLabel > maxClassLabel ){
1178 maxClassLabel = classTracker[i].classLabel;
1182 return maxClassLabel;
1186 for(UINT k=0; k<classTracker.
getSize(); k++){
1187 if( classTracker[k].classLabel == classLabel ){
1191 warningLog <<
"getClassLabelIndexValue(UINT classLabel) - Failed to find class label: " << classLabel <<
" in class tracker!" << std::endl;
1197 for(UINT i=0; i<classTracker.
getSize(); i++){
1198 if( classTracker[i].classLabel == classLabel ){
1199 return classTracker[i].className;
1203 return "CLASS_LABEL_NOT_FOUND";
1207 std::string statsText;
1208 statsText +=
"DatasetName:\t" + datasetName +
"\n";
1209 statsText +=
"DatasetInfo:\t" + infoText +
"\n";
1210 statsText +=
"Number of Dimensions:\t" +
Util::toString( numDimensions ) +
"\n";
1211 statsText +=
"Number of Samples:\t" +
Util::toString( totalNumSamples ) +
"\n";
1213 statsText +=
"ClassStats:\n";
1216 statsText +=
"ClassLabel:\t" +
Util::toString( classTracker[k].classLabel );
1217 statsText +=
"\tNumber of Samples:\t" +
Util::toString(classTracker[k].counter);
1218 statsText +=
"\tClassName:\t" + classTracker[k].className +
"\n";
1223 statsText +=
"Dataset Ranges:\n";
1224 for(UINT j=0; j<ranges.size(); j++){
1234 if( useExternalRanges )
return externalRanges;
1239 if( totalNumSamples > 0 ){
1240 for(UINT j=0; j<numDimensions; j++){
1241 ranges[j].minValue = data[0][j];
1242 ranges[j].maxValue = data[0][j];
1243 for(UINT i=0; i<totalNumSamples; i++){
1244 if( data[i][j] < ranges[j].minValue ){ ranges[j].minValue = data[i][j]; }
1245 else if( data[i][j] > ranges[j].maxValue ){ ranges[j].maxValue = data[i][j]; }
1258 classLabels[i] = classTracker[i].classLabel;
1270 classSampleCounts[i] = classTracker[i].counter;
1273 return classSampleCounts;
1280 for(UINT j=0; j<numDimensions; j++){
1281 for(UINT i=0; i<totalNumSamples; i++){
1282 mean[j] += data[i][j];
1284 mean[j] /= Float(totalNumSamples);
1295 for(UINT j=0; j<numDimensions; j++){
1296 for(UINT i=0; i<totalNumSamples; i++){
1297 stdDev[j] += SQR(data[i][j]-mean[j]);
1299 stdDev[j] = sqrt( stdDev[j] / Float(totalNumSamples-1) );
1312 for(UINT i=0; i<ranges.size(); i++){
1313 binRange[i] = (ranges[i].maxValue-ranges[i].minValue)/Float(numBins);
1320 for(UINT i=0; i<M; i++){
1321 if( data[i].getClassLabel() == classLabel ){
1322 for(UINT j=0; j<N; j++){
1324 bool binFound =
false;
1325 for(UINT k=0; k<numBins-1; k++){
1326 if( data[i][j] >= ranges[i].minValue + (binRange[j]*k) && data[i][j] >= ranges[i].minValue + (binRange[j]*(k+1)) ){
1332 if( !binFound ) binIndex = numBins-1;
1333 histData[j][binIndex]++;
1339 if( norm == 0 )
return histData;
1344 histData[i][j] /= norm;
1358 for(UINT i=0; i<totalNumSamples; i++){
1360 for(UINT j=0; j<numDimensions; j++){
1361 mean[classIndex][j] += data[i][j];
1363 counter[ classIndex ]++;
1367 for(UINT j=0; j<numDimensions; j++){
1368 mean[k][j] = counter[k] > 0 ? mean[k][j]/counter[k] : 0;
1383 for(UINT i=0; i<totalNumSamples; i++){
1385 for(UINT j=0; j<numDimensions; j++){
1386 stdDev[classIndex][j] += SQR(data[i][j]-mean[classIndex][j]);
1388 counter[ classIndex ]++;
1392 for(UINT j=0; j<numDimensions; j++){
1393 stdDev[k][j] = sqrt( stdDev[k][j] / Float(counter[k]-1) );
1403 MatrixFloat covariance(numDimensions,numDimensions);
1405 for(UINT j=0; j<numDimensions; j++){
1406 for(UINT k=0; k<numDimensions; k++){
1407 for(UINT i=0; i<totalNumSamples; i++){
1408 covariance[j][k] += (data[i][j]-mean[j]) * (data[i][k]-mean[k]) ;
1410 covariance[j][k] /= Float(totalNumSamples-1);
1421 for(UINT k=0; k<K; k++){
1428 VectorFloat ClassificationData::getClassProbabilities()
const {
1433 const UINT K = (UINT)classLabels.size();
1437 for(UINT k=0; k<K; k++){
1438 for(UINT n=0; n<N; n++){
1439 if( classLabels[k] == classTracker[n].classLabel ){
1440 x[k] = classTracker[n].counter;
1441 sum += classTracker[n].counter;
1449 for(UINT k=0; k<K; k++){
1464 for(UINT k=0; k<K; k++){
1465 if( classTracker[k].classLabel == classLabel){
1466 N = classTracker[k].counter;
1473 for(UINT i=0; i<M; i++){
1474 if( data[i].getClassLabel() == classLabel ){
1475 classIndexes[index++] = i;
1479 return classIndexes;
1488 for(UINT i=0; i<M; i++){
1489 for(UINT j=0; j<N; j++){
1490 d[i][j] = data[i][j];
1502 for(UINT i=0; i<M; i++){
1503 for(UINT j=0; j<N; j++){
1504 d[i][j] = data[i][j];
1517 return data.
save( filename );
1526 for(UINT k=0; k<numClasses; k++){
1527 for(UINT j=0; j<numDimensions; j++){
1537 for(UINT i=0; i<numSamples; i++){
1544 for(UINT j=0; j<numDimensions; j++){
1549 UINT classLabel = k + 1;
1566 for(UINT k=0; k<numClasses; k++){
1567 for(UINT j=0; j<numDimensions; j++){
1568 model[k][j] =
Util::scale(k,0,numClasses-1,-range,range,
true);
1577 for(UINT i=0; i<numSamples; i++){
1584 for(UINT j=0; j<numDimensions; j++){
1589 UINT classLabel = k + 1;
bool saveDatasetToFile(const std::string &filename) const
bool setDatasetName(std::string datasetName)
bool loadDatasetFromFile(const std::string &filename)
static std::string toString(const int &i)
RegressionData reformatAsRegressionData() const
ClassificationData & operator=(const ClassificationData &rhs)
static bool generateGaussDataset(const std::string filename, const UINT numSamples=10000, const UINT numClasses=10, const UINT numDimensions=3, const Float range=10, const Float sigma=1)
bool addSample(const VectorFloat &sample)
bool setAllowNullGestureClass(const bool allowNullGestureClass)
bool relabelAllSamplesWithClassLabel(const UINT oldClassLabel, const UINT newClassLabel)
ClassificationData getTestFoldData(const UINT foldIndex) const
bool addClass(const UINT classLabel, const std::string className="NOT_SET")
bool addSample(const UINT classLabel, const VectorFloat &sample)
static Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
Vector< ClassTracker > getClassTracker() const
This file contains the Random class, a useful wrapper for generating cross platform random functions...
ClassificationData getClassData(const UINT classLabel) const
virtual bool resize(const unsigned int size)
bool setNumDimensions(UINT numDimensions)
UINT eraseAllSamplesWithClassLabel(const UINT classLabel)
MatrixDouble getDataAsMatrixDouble() const
virtual bool setKey(const std::string &key)
sets the key that gets written at the start of each message, this will be written in the format 'key ...
MatrixFloat getClassMean() const
Float getRandomNumberGauss(Float mu=0.0, Float sigma=1.0)
std::string getClassNameForCorrespondingClassLabel(const UINT classLabel) const
Vector< UINT > getClassLabels() const
bool loadDatasetFromCSVFile(const std::string &filename, const UINT classLabelColumnIndex=0)
UINT getMinimumClassLabel() const
Vector< MatrixFloat > getHistogramData(const UINT numBins) const
UINT removeClass(const UINT classLabel)
ClassificationData(UINT numDimensions=0, std::string datasetName="NOT_SET", std::string infoText="")
bool setAllValues(const T &value)
bool setInputAndTargetDimensions(const UINT numInputDimensions, const UINT numTargetDimensions)
bool setInfoText(std::string infoText)
Vector< UINT > getNumSamplesPerClass() const
MatrixFloat getCovarianceMatrix() const
UnlabelledData reformatAsUnlabelledData() const
bool removeSample(const UINT index)
UINT getNumSamples() const
bool spiltDataIntoKFolds(const UINT K, const bool useStratifiedSampling=false)
bool save(const std::string &filename) const
bool setNumDimensions(const UINT numDimensions)
bool enableExternalRangeScaling(const bool useExternalRanges)
bool setExternalRanges(const Vector< MinMax > &externalRanges, const bool useExternalRanges=false)
bool saveDatasetToCSVFile(const std::string &filename) const
static ClassificationData generateGaussLinearDataset(const UINT numSamples=10000, const UINT numClasses=10, const UINT numDimensions=3, const Float range=10, const Float sigma=1)
unsigned int getNumRows() const
UINT getNumDimensions() const
UINT getNumClasses() const
unsigned int getNumCols() const
Vector< MinMax > getRanges() const
Float getRandomNumberUniform(Float minRange=0.0, Float maxRange=1.0)
bool merge(const ClassificationData &data)
ClassificationData split(const UINT splitPercentage, const bool useStratifiedSampling=false)
VectorFloat getStdDev() const
Vector< UINT > getClassDataIndexes(const UINT classLabel) const
int getRandomNumberInt(int minRange, int maxRange)
bool reserve(const UINT M)
MatrixFloat getDataAsMatrixFloat() const
static bool stringEndsWith(const std::string &str, const std::string &ending)
bool setClassNameForCorrespondingClassLabel(const std::string className, const UINT classLabel)
ClassificationData getBootstrappedDataset(const UINT numSamples=0, const bool balanceDataset=false) const
This class stores the class label and raw data for a single labelled classification sample...
UINT getClassLabelIndexValue(const UINT classLabel) const
MatrixFloat getClassHistogramData(const UINT classLabel, const UINT numBins) const
ClassificationData getTrainingFoldData(const UINT foldIndex) const
UINT getMaximumClassLabel() const
bool scale(const Float minTarget, const Float maxTarget)
bool load(const std::string &filename)
MatrixFloat getClassStdDev() const
bool addSample(const VectorFloat &inputVector, const VectorFloat &targetVector)
std::string getStatsAsString() const
virtual ~ClassificationData()
VectorFloat getMean() const