Marsyas
0.6.0-alpha
|
00001 #ifndef MARSYAS_WEKADATA_H 00002 #define MARSYAS_WEKADATA_H 00003 00004 #include <marsyas/system/MarSystem.h> 00005 #include <list> 00006 #include <vector> 00007 #include <cfloat> 00008 #include <cstdlib> 00009 #include <cstring> 00010 00011 namespace Marsyas 00012 { 00013 //This class represents a collection of row data read from a weka arff file. 00014 //It is organized as a vector collection of vector pointers. 00015 //It is optimized for fast sorting and shuffling of the data. It is not intended 00016 //that the data change once it is loaded. 00017 // 00018 //It is also assumed that the last column of each row is the class attribute. 00019 //All data items are mrs_real, including the class attribute, however the class 00020 //attribute should be interpreted as an mrs_natural. 00021 class WekaData : public std::vector<std::vector<mrs_real>*> 00022 { 00023 public: 00024 WekaData(); 00025 virtual ~WekaData(); 00026 00027 //create the table. Will clear contents first and fix the number of columns. 00028 void Create(mrs_natural cols); 00029 00030 //clear all data from the table 00031 void Clear(); 00032 00033 //randomly shuffle the data in the table 00034 void Shuffle(); 00035 00036 // NormMaxMin normalize the data with minimums and maximums 00037 void NormMaxMin(); 00038 00039 void NormMaxMinRow(realvec& in); 00040 00041 //sort the table based on an attribute. sorts in ascending order. 00042 void Sort(mrs_natural attIndex); 00043 00044 //add rows of data to the table 00045 void Append(const realvec& in); 00046 void Append(std::vector<mrs_real> *); 00047 00048 // Add a filename to the table 00049 void AppendFilename(mrs_string); 00050 00051 //return the number of columns(including the class attribute) 00052 inline mrs_natural getCols()const {return cols_;} 00053 inline mrs_natural getRows()const {return rows_;} 00054 00055 //get the class attribute for a row and convert to a int 00056 mrs_natural GetClass(mrs_natural row)const; 00057 00058 // get the filename for a row 00059 mrs_string GetFilename(mrs_natural row)const; 00060 00061 //debug helper funtion to dump table to an ascii file 00062 void Dump(const std::string& filename, const std::vector<std::string>& classNames)const; 00063 00064 //get the minimums_ and maximums_ attribute 00065 mrs_realvec GetMinimums()const; 00066 mrs_realvec GetMaximums()const; 00067 00068 void setFold(bool); // data is used for a fold 00069 // don't Clear 00070 00071 00072 private: 00073 00074 00075 mrs_natural cols_; 00076 mrs_natural rows_; 00077 mrs_bool isFold_; 00078 realvec minimums_; 00079 realvec maximums_; 00080 std::vector<mrs_real> *data_; 00081 std::vector<mrs_string> filenames_; 00082 //some sorting private functions 00083 void quickSort(mrs_natural attIndex, mrs_natural left, mrs_natural right); 00084 void swapRows(mrs_natural l, mrs_natural r); 00085 mrs_natural partition(mrs_natural attIndex, mrs_natural l, mrs_natural r); 00086 00087 };//class WekaData 00088 } 00089 #endif