Marsyas  0.6.0-alpha
/usr/src/RPM/BUILD/marsyas-0.6.0/src/marsyas/WekaData.h
Go to the documentation of this file.
00001 #ifndef MARSYAS_WEKADATA_H
00002 #define MARSYAS_WEKADATA_H
00003 
00004 #include <marsyas/system/MarSystem.h>
00005 #include <list>
00006 #include <vector>
00007 #include <cfloat>
00008 #include <cstdlib>
00009 #include <cstring>
00010 
00011 namespace Marsyas
00012 {
00013 //This class represents a collection of row data read from a weka arff file.
00014 //It is organized as a vector collection of vector pointers.
00015 //It is optimized for fast sorting and shuffling of the data. It is not intended
00016 //that the data change once it is loaded.
00017 //
00018 //It is also assumed that the last column of each row is the class attribute.
00019 //All data items are mrs_real, including the class attribute, however the class
00020 //attribute should be interpreted as an mrs_natural.
00021 class WekaData : public std::vector<std::vector<mrs_real>*>
00022 {
00023 public:
00024   WekaData();
00025   virtual ~WekaData();
00026 
00027   //create the table. Will clear contents first and fix the number of columns.
00028   void Create(mrs_natural cols);
00029 
00030   //clear all data from the table
00031   void Clear();
00032 
00033   //randomly shuffle the data in the table
00034   void Shuffle();
00035 
00036   // NormMaxMin normalize the data with minimums and maximums
00037   void NormMaxMin();
00038 
00039   void NormMaxMinRow(realvec& in);
00040 
00041   //sort the table based on an attribute. sorts in ascending order.
00042   void Sort(mrs_natural attIndex);
00043 
00044   //add rows of data to the table
00045   void Append(const realvec& in);
00046   void Append(std::vector<mrs_real> *);
00047 
00048   // Add a filename to the table
00049   void AppendFilename(mrs_string);
00050 
00051   //return the number of columns(including the class attribute)
00052   inline mrs_natural getCols()const {return cols_;}
00053   inline mrs_natural getRows()const {return rows_;}
00054 
00055   //get the class attribute for a row and convert to a int
00056   mrs_natural GetClass(mrs_natural row)const;
00057 
00058   // get the filename for a row
00059   mrs_string GetFilename(mrs_natural row)const;
00060 
00061   //debug helper funtion to dump table to an ascii file
00062   void Dump(const std::string& filename, const std::vector<std::string>& classNames)const;
00063 
00064   //get the minimums_ and maximums_ attribute
00065   mrs_realvec GetMinimums()const;
00066   mrs_realvec GetMaximums()const;
00067 
00068   void setFold(bool);               // data is used for a fold
00069   // don't Clear
00070 
00071 
00072 private:
00073 
00074 
00075   mrs_natural cols_;
00076   mrs_natural rows_;
00077   mrs_bool isFold_;
00078   realvec minimums_;
00079   realvec maximums_;
00080   std::vector<mrs_real> *data_;
00081   std::vector<mrs_string> filenames_;
00082   //some sorting private functions
00083   void quickSort(mrs_natural attIndex, mrs_natural left, mrs_natural right);
00084   void swapRows(mrs_natural l, mrs_natural r);
00085   mrs_natural partition(mrs_natural attIndex, mrs_natural l, mrs_natural r);
00086 
00087 };//class WekaData
00088 }
00089 #endif