33 trackingClass =
false;
35 debugLog.setProceedingText(
"[DEBUG ClassificationDataStream]");
36 errorLog.setProceedingText(
"[ERROR ClassificationDataStream]");
37 warningLog.setProceedingText(
"[WARNING ClassificationDataStream]");
39 if( numDimensions > 0 ){
55 this->totalNumSamples = rhs.totalNumSamples;
56 this->lastClassID = rhs.lastClassID;
57 this->playbackIndex = rhs.playbackIndex;
58 this->trackingClass = rhs.trackingClass;
61 this->data = rhs.data;
62 this->classTracker = rhs.classTracker;
63 this->timeSeriesPositionTracker = rhs.timeSeriesPositionTracker;
75 trackingClass =
false;
78 timeSeriesPositionTracker.clear();
82 if( numDimensions > 0 ){
92 errorLog <<
"setNumDimensions(const UINT numDimensions) - The number of dimensions of the dataset must be greater than zero!" << std::endl;
100 if( datasetName.find(
" ") == std::string::npos ){
105 errorLog <<
"setDatasetName(const std::string datasetName) - The dataset name cannot contain any spaces!" << std::endl;
116 for(UINT i=0; i<classTracker.size(); i++){
117 if( classTracker[i].classLabel == classLabel ){
118 classTracker[i].className = className;
123 errorLog <<
"setClassNameForCorrespondingClassLabel(const std::string className,const UINT classLabel) - Failed to find class with label: " << classLabel << std::endl;
130 errorLog <<
"addSample(const UINT classLabel, VectorFloat sample) - the size of the new sample (" << sample.size() <<
") does not match the number of dimensions of the dataset (" <<
numDimensions <<
")" << std::endl;
134 bool searchForNewClass =
true;
136 if( classLabel != lastClassID ){
138 timeSeriesPositionTracker[ timeSeriesPositionTracker.size()-1 ].setEndIndex( totalNumSamples-1 );
139 }
else searchForNewClass =
false;
142 if( searchForNewClass ){
143 bool newClass =
true;
145 for(UINT k=0; k<classTracker.size(); k++){
146 if( classTracker[k].classLabel == classLabel ){
148 classTracker[k].counter++;
153 classTracker.push_back( newCounter );
157 trackingClass =
true;
158 lastClassID = classLabel;
160 timeSeriesPositionTracker.push_back( newTracker );
164 data.push_back( labelledSample );
172 errorLog <<
"addSample(const UINT classLabel, const MatrixFloat &sample) - the number of columns in the sample (" << sample.
getNumCols() <<
") does not match the number of dimensions of the dataset (" <<
numDimensions <<
")" << std::endl;
176 bool searchForNewClass =
true;
178 if( classLabel != lastClassID ){
180 timeSeriesPositionTracker[ timeSeriesPositionTracker.size()-1 ].setEndIndex( totalNumSamples-1 );
181 }
else searchForNewClass =
false;
184 if( searchForNewClass ){
185 bool newClass =
true;
187 for(UINT k=0; k<classTracker.size(); k++){
188 if( classTracker[k].classLabel == classLabel ){
190 classTracker[k].counter += sample.
getNumRows();
195 classTracker.push_back( newCounter );
199 trackingClass =
true;
200 lastClassID = classLabel;
202 timeSeriesPositionTracker.push_back( newTracker );
207 data.push_back( labelledSample );
208 data.back().setClassLabel( classLabel );
210 data.back()[j] = sample[i][j];
220 if( totalNumSamples > 0 ){
223 UINT classLabel = data[ totalNumSamples-1 ].getClassLabel();
226 data.erase( data.end()-1 );
228 totalNumSamples = (UINT)data.size();
231 for(UINT i=0; i<classTracker.size(); i++){
232 if( classTracker[i].classLabel == classLabel ){
233 classTracker[i].counter--;
239 if( !trackingClass ){
240 UINT endIndex = timeSeriesPositionTracker[ timeSeriesPositionTracker.size()-1 ].getEndIndex();
241 timeSeriesPositionTracker[ timeSeriesPositionTracker.size()-1 ].setEndIndex( endIndex-1 );
251 UINT numExamplesRemoved = 0;
252 UINT numExamplesToRemove = 0;
255 for(UINT i=0; i<classTracker.size(); i++){
256 if( classTracker[i].classLabel == classLabel ){
257 numExamplesToRemove = classTracker[i].counter;
258 classTracker.erase(classTracker.begin()+i);
264 if( numExamplesToRemove > 0 ){
266 while( numExamplesRemoved < numExamplesToRemove ){
267 if( data[i].getClassLabel() == classLabel ){
268 data.erase(data.begin()+i);
269 numExamplesRemoved++;
270 }
else if( ++i == data.size() )
break;
277 while( iter != timeSeriesPositionTracker.end() ){
278 if( iter->getClassLabel() == classLabel ){
279 UINT length = iter->getLength();
283 while( updateIter != timeSeriesPositionTracker.end() ){
284 updateIter->setStartIndex( updateIter->getStartIndex() - length );
285 updateIter->setEndIndex( updateIter->getEndIndex() - length );
290 iter = timeSeriesPositionTracker.erase( iter );
294 totalNumSamples = (UINT)data.size();
296 return numExamplesRemoved;
300 bool oldClassLabelFound =
false;
301 bool newClassLabelAllReadyExists =
false;
302 UINT indexOfOldClassLabel = 0;
303 UINT indexOfNewClassLabel = 0;
306 for(UINT i=0; i<classTracker.size(); i++){
307 if( classTracker[i].classLabel == oldClassLabel ){
308 indexOfOldClassLabel = i;
309 oldClassLabelFound =
true;
311 if( classTracker[i].classLabel == newClassLabel ){
312 indexOfNewClassLabel = i;
313 newClassLabelAllReadyExists =
true;
318 if( !oldClassLabelFound ){
323 for(UINT i=0; i<totalNumSamples; i++){
324 if( data[i].getClassLabel() == oldClassLabel ){
325 data[i].set(newClassLabel, data[i].getSample());
330 if( newClassLabelAllReadyExists ){
332 classTracker[ indexOfNewClassLabel ].counter += classTracker[ indexOfOldClassLabel ].counter;
335 classTracker.erase( classTracker.begin() + indexOfOldClassLabel );
338 classTracker.push_back(
ClassTracker(newClassLabel,classTracker[ indexOfOldClassLabel ].counter,classTracker[ indexOfOldClassLabel ].className) );
342 for(UINT i=0; i<timeSeriesPositionTracker.size(); i++){
343 if( timeSeriesPositionTracker[i].getClassLabel() == oldClassLabel ){
344 timeSeriesPositionTracker[i].setClassLabel( newClassLabel );
371 return scale(ranges,minTarget,maxTarget);
378 for(UINT i=0; i<totalNumSamples; i++){
380 data[i][j] =
Util::scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,minTarget,maxTarget);
387 if( playbackIndex < totalNumSamples ){
388 this->playbackIndex = playbackIndex;
397 UINT index = playbackIndex++ % totalNumSamples;
398 return data[ index ];
403 for(UINT x=0; x<timeSeriesPositionTracker.size(); x++){
404 if( timeSeriesPositionTracker[x].getClassLabel() == classLabel && timeSeriesPositionTracker[x].getEndIndex() > 0){
406 for(UINT i=timeSeriesPositionTracker[x].getStartIndex(); i<timeSeriesPositionTracker[x].getEndIndex(); i++){
407 timeSeries.
push_back( data[ i ].getSample() );
409 classData.
addSample(classLabel,timeSeries);
416 UINT minClassLabel = 99999;
418 for(UINT i=0; i<classTracker.size(); i++){
419 if( classTracker[i].classLabel < minClassLabel ){
420 minClassLabel = classTracker[i].classLabel;
424 return minClassLabel;
429 UINT maxClassLabel = 0;
431 for(UINT i=0; i<classTracker.size(); i++){
432 if( classTracker[i].classLabel > maxClassLabel ){
433 maxClassLabel = classTracker[i].classLabel;
437 return maxClassLabel;
441 for(UINT k=0; k<classTracker.size(); k++){
442 if( classTracker[k].classLabel == classLabel ){
446 warningLog <<
"getClassLabelIndexValue(const UINT classLabel) - Failed to find class label: " << classLabel <<
" in class tracker!" << std::endl;
452 for(UINT i=0; i<classTracker.size(); i++){
453 if( classTracker[i].classLabel == classLabel ){
454 return classTracker[i].className;
457 return "CLASS_LABEL_NOT_FOUND";
468 if( totalNumSamples > 0 ){
470 ranges[j].minValue = data[0][0];
471 ranges[j].maxValue = data[0][0];
472 for(UINT i=0; i<totalNumSamples; i++){
473 if( data[i][j] < ranges[j].minValue ){ ranges[j].minValue = data[i][j]; }
474 else if( data[i][j] > ranges[j].maxValue ){ ranges[j].maxValue = data[i][j]; }
506 file.open(filename.c_str(), std::ios::out);
508 if( !file.is_open() ){
509 errorLog <<
"saveDatasetToFile(const std::string &filename) - Failed to open file!" << std::endl;
515 trackingClass =
false;
516 timeSeriesPositionTracker[ timeSeriesPositionTracker.size()-1 ].setEndIndex( totalNumSamples-1 );
519 file <<
"GRT_LABELLED_CONTINUOUS_TIME_SERIES_CLASSIFICATION_FILE_V1.0\n";
520 file <<
"DatasetName: " <<
datasetName << std::endl;
521 file <<
"InfoText: " <<
infoText << std::endl;
523 file <<
"TotalNumSamples: " << totalNumSamples << std::endl;
524 file <<
"NumberOfClasses: " << classTracker.size() << std::endl;
525 file <<
"ClassIDsAndCounters: " << std::endl;
526 for(UINT i=0; i<classTracker.size(); i++){
527 file << classTracker[i].classLabel <<
"\t" << classTracker[i].counter << std::endl;
530 file <<
"NumberOfPositionTrackers: " << timeSeriesPositionTracker.size() << std::endl;
531 file <<
"TimeSeriesPositionTrackers: " << std::endl;
532 for(UINT i=0; i<timeSeriesPositionTracker.size(); i++){
533 file << timeSeriesPositionTracker[i].getClassLabel() <<
"\t" << timeSeriesPositionTracker[i].getStartIndex() <<
"\t" << timeSeriesPositionTracker[i].getEndIndex() << std::endl;
544 file <<
"LabelledContinuousTimeSeriesClassificationData:\n";
545 for(UINT i=0; i<totalNumSamples; i++){
546 file << data[i].getClassLabel();
548 file <<
"\t" << data[i][j];
560 file.open(filename.c_str(), std::ios::in);
562 UINT numTrackingPoints = 0;
565 if( !file.is_open() ){
566 errorLog<<
"loadDatasetFromFile(string fileName) - Failed to open file!" << std::endl;
574 if(word !=
"GRT_LABELLED_CONTINUOUS_TIME_SERIES_CLASSIFICATION_FILE_V1.0"){
576 errorLog<<
"loadDatasetFromFile(string fileName) - Failed to find file header!" << std::endl;
582 if(word !=
"DatasetName:"){
583 errorLog <<
"loadDatasetFromFile(string filename) - failed to find DatasetName!" << std::endl;
590 if(word !=
"InfoText:"){
591 errorLog <<
"loadDatasetFromFile(string filename) - failed to find InfoText!" << std::endl;
599 while( word !=
"NumDimensions:" ){
605 if(word !=
"NumDimensions:"){
606 errorLog<<
"loadDatasetFromFile(string fileName) - Failed to find NumDimensions!" << std::endl;
614 if(word !=
"TotalNumSamples:"){
615 errorLog<<
"loadDatasetFromFile(string fileName) - Failed to find TotalNumSamples!" << std::endl;
619 file >> totalNumSamples;
623 if(word !=
"NumberOfClasses:"){
624 errorLog<<
"loadDatasetFromFile(string fileName) - Failed to find NumberOfClasses!" << std::endl;
631 classTracker.
resize(numClasses);
635 if(word !=
"ClassIDsAndCounters:"){
636 errorLog<<
"loadDatasetFromFile(string fileName) - Failed to find ClassIDsAndCounters!" << std::endl;
641 for(UINT i=0; i<classTracker.size(); i++){
642 file >> classTracker[i].classLabel;
643 file >> classTracker[i].counter;
648 if(word !=
"NumberOfPositionTrackers:"){
649 errorLog<<
"loadDatasetFromFile(string fileName) - Failed to find NumberOfPositionTrackers!" << std::endl;
653 file >> numTrackingPoints;
654 timeSeriesPositionTracker.
resize( numTrackingPoints );
658 if(word !=
"TimeSeriesPositionTrackers:"){
659 errorLog<<
"loadDatasetFromFile(string fileName) - Failed to find TimeSeriesPositionTrackers!" << std::endl;
664 for(UINT i=0; i<timeSeriesPositionTracker.size(); i++){
671 timeSeriesPositionTracker[i].setTracker(startIndex,endIndex,classLabel);
676 if(word !=
"UseExternalRanges:"){
677 errorLog <<
"loadDatasetFromFile(string filename) - failed to find DatasetName!" << std::endl;
684 if( useExternalRanges ){
694 if(word !=
"LabelledContinuousTimeSeriesClassificationData:"){
695 errorLog<<
"loadDatasetFromFile(string fileName) - Failed to find LabelledContinuousTimeSeriesClassificationData!" << std::endl;
706 for(UINT i=0; i<totalNumSamples; i++){
713 data[i].set(classLabel,sample);
722 file.open(filename.c_str(), std::ios::out );
724 if( !file.is_open() ){
730 for(UINT i=0; i<data.size(); i++){
731 file << data[i].getClassLabel();
733 file <<
"," << data[i][j];
754 if( !parser.parseCSVFile(filename,
true) ){
755 errorLog <<
"loadDatasetFromCSVFile(const std::string filename,const UINT classLabelColumnIndex) - Failed to parse CSV file!" << std::endl;
759 if( !parser.getConsistentColumnSize() ){
760 errorLog <<
"loadDatasetFromCSVFile(const std::string filename,const UINT classLabelColumnIndex) - The CSV file does not have a consistent number of columns!" << std::endl;
764 if( parser.getColumnSize() <= 1 ){
765 errorLog <<
"loadDatasetFromCSVFile(const std::string filename,const UINT classLabelColumnIndex) - The CSV file does not have enough columns! It should contain at least two columns!" << std::endl;
775 for(UINT i=0; i<parser.getRowSize(); i++){
783 if( n != classLabelColumnIndex ){
791 warningLog <<
"loadDatasetFromCSVFile(const std::string filename,const UINT classLabelColumnIndex) - Could not add sample " << i <<
" to the dataset!" << std::endl;
800 std::cout <<
"DatasetName:\t" <<
datasetName << std::endl;
801 std::cout <<
"DatasetInfo:\t" <<
infoText << std::endl;
802 std::cout <<
"Number of Dimensions:\t" <<
numDimensions << std::endl;
803 std::cout <<
"Number of Samples:\t" << totalNumSamples << std::endl;
804 std::cout <<
"Number of Classes:\t" <<
getNumClasses() << std::endl;
805 std::cout <<
"ClassStats:\n";
808 std::cout <<
"ClassLabel:\t" << classTracker[k].classLabel;
809 std::cout <<
"\tNumber of Samples:\t" << classTracker[k].counter;
810 std::cout <<
"\tClassName:\t" << classTracker[k].className << std::endl;
813 std::cout <<
"TimeSeriesMarkerStats:\n";
814 for(UINT i=0; i<timeSeriesPositionTracker.size(); i++){
815 std::cout <<
"ClassLabel: " << timeSeriesPositionTracker[i].getClassLabel();
816 std::cout <<
"\tStartIndex: " << timeSeriesPositionTracker[i].getStartIndex();
817 std::cout <<
"\tEndIndex: " << timeSeriesPositionTracker[i].getEndIndex();
818 std::cout <<
"\tLength: " << timeSeriesPositionTracker[i].getLength() << std::endl;
823 std::cout <<
"Dataset Ranges:\n";
824 for(UINT j=0; j<ranges.size(); j++){
825 std::cout <<
"[" << j+1 <<
"] Min:\t" << ranges[j].minValue <<
"\tMax: " << ranges[j].maxValue << std::endl;
835 if( endIndex >= totalNumSamples ){
836 warningLog <<
"getSubset(const UINT startIndex,const UINT endIndex) - The endIndex is greater than or equal to the number of samples in the current dataset!" << std::endl;
840 if( startIndex >= endIndex ){
841 warningLog <<
"getSubset(const UINT startIndex,const UINT endIndex) - The startIndex is greater than or equal to the endIndex!" << std::endl;
851 for(UINT i=startIndex; i<=endIndex; i++){
852 subset.
addSample(data[i].getClassLabel(), data[i].getSample());
866 const UINT numTimeseries = (UINT)timeSeriesPositionTracker.size();
867 for(UINT i=0; i<numTimeseries; i++){
868 addSample = includeNullGestures ?
true : timeSeriesPositionTracker[i].getClassLabel() != GRT_DEFAULT_NULL_CLASS_LABEL;
885 for(UINT i=0; i<timeSeriesPositionTracker.size(); i++){
886 addSample = includeNullGestures ?
true : timeSeriesPositionTracker[i].getClassLabel() != GRT_DEFAULT_NULL_CLASS_LABEL;
889 for(UINT j=0; j<dataSegment.
getNumRows(); j++){
890 classificationData.
addSample(timeSeriesPositionTracker[i].getClassLabel(), dataSegment.
getRow(j) );
895 return classificationData;
901 warningLog <<
"getTimeSeriesData(TimeSeriesPositionTracker trackerInfo) - Invalid tracker indexs!" << std::endl;
907 UINT M = endIndex > 0 ? trackerInfo.
getLength() : totalNumSamples - startIndex;
911 for(UINT i=0; i<M; i++){
912 for(UINT j=0; j<N; j++){
913 tsData[i][j] = data[ i+startIndex ][j];
923 for(UINT i=0; i<M; i++){
924 for(UINT j=0; j<N; j++){
925 matrixData[i][j] = data[i][j];
932 const UINT K = (UINT)classTracker.size();
935 for(UINT i=0; i<K; i++){
936 classLabels[i] = classTracker[i].classLabel;
bool loadDatasetFromCSVFile(const std::string &filename, const UINT classLabelColumnIndex=0)
TimeSeriesClassificationData getTimeSeriesClassificationData(const bool includeNullGestures=false) const
UINT eraseAllSamplesWithClassLabel(const UINT classLabel)
UINT getNumSamples() const
bool enableExternalRangeScaling(const bool useExternalRanges)
bool addSample(UINT classLabel, const VectorFloat &sample)
bool save(const std::string &filename)
static Float scale(const Float &x, const Float &minSource, const Float &maxSource, const Float &minTarget, const Float &maxTarget, const bool constrain=false)
bool setDatasetName(const std::string datasetName)
static Float stringToFloat(const std::string &s)
ClassificationDataStream(const UINT numDimensions=0, const std::string datasetName="NOT_SET", const std::string infoText="")
bool setNumDimensions(const UINT numDimensions)
ClassificationDataStream & operator=(const ClassificationDataStream &rhs)
bool resetPlaybackIndex(const UINT playbackIndex)
UINT getStartIndex() const
virtual bool resize(const unsigned int size)
bool setNumDimensions(UINT numDimensions)
bool setExternalRanges(const Vector< MinMax > &externalRanges, const bool useExternalRanges=false)
std::string getClassNameForCorrespondingClassLabel(const UINT classLabel)
bool saveDatasetToCSVFile(const std::string &filename)
bool scale(const Float minTarget, const Float maxTarget)
virtual ~ClassificationDataStream()
bool load(const std::string &filename)
DebugLog debugLog
Default debugging log.
bool setAllowNullGestureClass(bool allowNullGestureClass)
ClassificationData getClassificationData(const bool includeNullGestures=false) const
Vector< UINT > getClassLabels() const
ErrorLog errorLog
Default error log.
bool useExternalRanges
A flag to show if the dataset should be scaled using the externalRanges values.
ClassificationDataStream getSubset(const UINT startIndex, const UINT endIndex) const
UINT getNumDimensions() const
bool loadDatasetFromFile(const std::string &filename)
Vector< MinMax > externalRanges
A Vector containing a set of externalRanges set by the user.
ClassificationSample getNextSample()
std::string infoText
Some infoText about the dataset.
UINT getMaximumClassLabel() const
bool setInfoText(const std::string infoText)
UINT numDimensions
The number of dimensions in the dataset.
std::string datasetName
The name of the dataset.
The ClassificationDataStream is the main data structure for recording, labeling, managing, saving, and loading datasets that can be used to test the continuous classification abilities of the GRT supervised learning algorithms.
bool relabelAllSamplesWithClassLabel(const UINT oldClassLabel, const UINT newClassLabel)
UINT getMinimumClassLabel() const
unsigned int getNumRows() const
MatrixFloat getDataAsMatrixFloat() const
unsigned int getNumCols() const
bool addSample(const UINT classLabel, const VectorFloat &sample)
bool saveDatasetToFile(const std::string &filename)
bool addSample(const UINT classLabel, const MatrixFloat &trainingSample)
VectorFloat getRow(const unsigned int r) const
UINT getNumClasses() const
bool setNumDimensions(const UINT numDimensions)
std::string getInfoText() const
static bool stringEndsWith(const std::string &str, const std::string &ending)
MatrixFloat getTimeSeriesData(const TimeSeriesPositionTracker &trackerInfo) const
Vector< MinMax > getRanges() const
static int stringToInt(const std::string &s)
WarningLog warningLog
Default warning log.
TimeSeriesClassificationData getAllTrainingExamplesWithClassLabel(const UINT classLabel) const
bool push_back(const Vector< T > &sample)
UINT getClassLabelIndexValue(const UINT classLabel) const
bool setClassNameForCorrespondingClassLabel(const std::string className, const UINT classLabel)
bool setAllowNullGestureClass(const bool allowNullGestureClass)
std::string getDatasetName() const