Marsyas  0.6.0-alpha
/usr/src/RPM/BUILD/marsyas-0.6.0/src/marsyas/marsystems/WekaSink.cpp
Go to the documentation of this file.
00001 /*
00002 ** Copyright (C) 1998-2010 George Tzanetakis <gtzan@cs.uvic.ca>
00003 **
00004 ** This program is free software; you can redistribute it and/or modify
00005 ** it under the terms of the GNU General Public License as published by
00006 ** the Free Software Foundation; either version 2 of the License, or
00007 ** (at your option) any later version.
00008 **
00009 ** This program is distributed in the hope that it will be useful,
00010 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
00011 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012 ** GNU General Public License for more details.
00013 **
00014 ** You should have received a copy of the GNU General Public License
00015 ** along with this program; if not, write to the Free Software
00016 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00017 */
00018 
00019 #include "WekaSink.h"
00020 #include "../common_source.h"
00021 
00022 using namespace std;
00023 using namespace Marsyas;
00024 
00025 WekaSink::WekaSink(mrs_string name) : MarSystem("WekaSink",name)
00026 {
00027   mos_ = NULL;
00028   stabilizingTicks_ = 0;
00029   addControls();
00030 }
00031 
00032 WekaSink::~WekaSink()
00033 {
00034   // Close the output stream if required.
00035   if (mos_ != NULL)
00036   {
00037     mos_->close();
00038     delete mos_;
00039   }
00040 }
00041 
00042 WekaSink::WekaSink(const WekaSink& a) : MarSystem(a)
00043 {
00044   mos_ = NULL;
00045   stabilizingTicks_ = 0;
00046 
00047   ctrl_regression_ = getControl("mrs_bool/regression");
00048   ctrl_putHeader_ = getControl("mrs_bool/putHeader");
00049   ctrl_labelNames_ = getControl("mrs_string/labelNames");
00050   ctrl_nLabels_ = getControl("mrs_natural/nLabels");
00051   ctrl_precision_ = getControl("mrs_natural/precision");
00052   ctrl_downsample_ = getControl("mrs_natural/downsample");
00053   ctrl_filename_ = getControl("mrs_string/filename");
00054   ctrl_currentlyPlaying_ = getControl("mrs_string/currentlyPlaying");
00055   ctrl_inject_ = getControl("mrs_bool/inject");
00056   ctrl_injectComment_ = getControl("mrs_string/injectComment");
00057   ctrl_injectVector_ = getControl("mrs_realvec/injectVector");
00058   ctrl_onlyStable_ = getControl("mrs_bool/onlyStable");
00059   ctrl_resetStable_ = getControl("mrs_bool/resetStable");
00060 }
00061 
00062 MarSystem*
00063 WekaSink::clone() const
00064 {
00065   return new WekaSink(*this);
00066 }
00067 
00068 void
00069 WekaSink::addControls()
00070 {
00071   addctrl("mrs_natural/precision", 6, ctrl_precision_);
00072   setctrlState("mrs_natural/precision", true);
00073   addctrl("mrs_string/filename", "weka.arff", ctrl_filename_);
00074   setctrlState("mrs_string/filename", true);
00075   addctrl("mrs_natural/nLabels", 2, ctrl_nLabels_);
00076   addctrl("mrs_natural/downsample", 1, ctrl_downsample_);
00077   setctrlState("mrs_natural/downsample", true);
00078   addctrl("mrs_string/labelNames", "Music,Speech", ctrl_labelNames_);
00079   setctrlState("mrs_string/labelNames", true);
00080 
00081 
00082   addctrl("mrs_bool/regression", false, ctrl_regression_);
00083   addctrl("mrs_string/currentlyPlaying", "", ctrl_currentlyPlaying_);
00084   addctrl("mrs_bool/putHeader", false, ctrl_putHeader_);
00085   setctrlState(ctrl_putHeader_, true);
00086   addctrl("mrs_bool/inject", false, ctrl_inject_);
00087   setctrlState(ctrl_inject_, true);
00088   addctrl("mrs_string/injectComment", "", ctrl_injectComment_);
00089   setctrlState(ctrl_injectComment_, true);
00090   addctrl("mrs_realvec/injectVector", realvec(), ctrl_injectVector_);
00091   setctrlState(ctrl_injectVector_, true);
00092 
00093   addctrl("mrs_bool/onlyStable", false, ctrl_onlyStable_);
00094   setctrlState(ctrl_onlyStable_, true);
00095   addctrl("mrs_bool/resetStable", false, ctrl_resetStable_);
00096 }
00097 
00098 void
00099 WekaSink::putHeader(mrs_string inObsNames)
00100 {
00101   //updctrl(ctrl_putHeader_, false);
00102   ctrl_putHeader_->setValue(true);
00103 
00104   // Only write the header when we are dealing with a new file, i.e. when
00105   // the filename setting differs from the filename we were (previously)
00106   // writing to.
00107   if ((filename_ != ctrl_filename_->to<mrs_string>()))
00108   {
00109     // Close the previously used output file if needed and cleanup.
00110     if (mos_ != NULL)
00111     {
00112       mos_->close();
00113       delete mos_;
00114       // TODO: do something about this ugly hack.
00115       if (filename_ == "weka.arff")
00116       {
00117         remove(filename_.c_str());
00118       }
00119     }
00120 
00121     // Set the current filename to the new value.
00122     filename_ = ctrl_filename_->to<mrs_string>();
00123 
00124     // Open a new output stream.
00125     mos_ = new ofstream;
00126     mos_->open(filename_.c_str());
00127 
00128     // General header stuff.
00129     (*mos_) << "% Created by Marsyas" << endl;
00130     (*mos_) << "@relation " << filename_ << endl;
00131 
00132     // The number of attributes is one less than the number of input
00133     // observations because we assume the last observation is for the label?
00134     // TODO: why this assumption? What if a use case requires two labels per
00135     // feature vector or no labels?
00136     // There is no such assumption is the WEKA ARFF format anyway.
00137     mrs_natural nAttributes = ctrl_inObservations_->to<mrs_natural>() - 1;
00138     mrs_natural nLabels = ctrl_nLabels_->to<mrs_natural>();
00139 
00140     // Print the attribute names.
00141     // TODO: this is could be done way more elegant
00142     // (e.g. using a 'split()' or 'explode()' function).
00143     mrs_natural i;
00144     for (i =0; i < nAttributes; ++i)
00145     {
00146       mrs_string inObsName;
00147       mrs_string temp;
00148       inObsName = inObsNames.substr(0, inObsNames.find(","));
00149       temp = inObsNames.substr(inObsNames.find(",") + 1, inObsNames.length());
00150       inObsNames = temp;
00151       // TODO: what's the point of using an extra ostringstream here?
00152       ostringstream oss;
00153       // oss << "attribute" << i;
00154       (*mos_) << "@attribute " << inObsName << " real" << endl;
00155     }
00156 
00157     // The attribute for the label.
00158     if (!ctrl_regression_->isTrue())
00159     {
00160       (*mos_) << "@attribute output {";
00161       // TODO: this could be done way more elegant
00162       // (e.g. with a 'join()' or 'implode()' function).
00163       for (i=0; i < nLabels; ++i)
00164       {
00165         // TODO: what's the point of using an extra ostringstream here?
00166         ostringstream oss;
00167         // oss << "label" << i;
00168         oss << labelNames_[i];
00169         (*mos_) << oss.str();
00170         if (i < nLabels - 1)
00171         {
00172           (*mos_) << ",";
00173         }
00174         // (*mos_) << "@attribute output {music,speech}" << endl;
00175       }
00176       (*mos_) << "}" << endl;
00177     }
00178     else
00179     {
00180       (*mos_) << "@attribute output real" << endl;
00181     }
00182 
00183     // End of header, now we are ready for outputting the data.
00184     (*mos_) << "\n\n@data" << endl;
00185   }
00186 }
00187 
00188 void
00189 WekaSink::myUpdate(MarControlPtr sender)
00190 {
00191   MRSDIAG("WekaSink.cpp - WekaSink:myUpdate");
00192 
00193   MarSystem::myUpdate(sender);
00194 
00195   // (Re)build the list of label names.
00196   mrs_string labelNames = ctrl_labelNames_->to<mrs_string>();
00197 
00198   labelNames_.clear();
00199   // TODO: this could be done way more elegant
00200   // (e.g. by using a split() or explode() function).
00201   for (int i = 0; i < ctrl_nLabels_->to<mrs_natural>(); ++i)
00202   {
00203     mrs_string labelName;
00204     mrs_string temp;
00205     labelName = labelNames.substr(0, labelNames.find(","));
00206     temp = labelNames.substr(labelNames.find(",") + 1, labelNames.length());
00207     labelNames = temp;
00208     labelNames_.push_back(labelName);
00209   }
00210 
00211   downsample_ = ctrl_downsample_->to<mrs_natural>();
00212   ctrl_israte_->setValue(israte_ / downsample_, NOUPDATE);
00213 
00214 
00215 
00216   // If not muted: write the header with the observation names.
00217   if (!ctrl_mute_->isTrue())
00218   {
00219     mrs_string onObsNames = ctrl_onObsNames_->to<mrs_string>();
00220     putHeader(onObsNames);
00221   }
00222 
00223 
00224 
00225 
00226   // Optional injecting of additional data.
00227   // TODO: this should be refactored together with the printing
00228   // from WekaSink::myProcess().
00229   if (!ctrl_mute_->isTrue())
00230   {
00231     if (ctrl_inject_->isTrue())
00232     {
00233       (* mos_) << ctrl_injectComment_->to<mrs_string>() << endl;
00234       (* mos_) << "% srate " << israte_ << endl;
00235       ctrl_inject_->setValue(false, NOUPDATE);
00236       MarControlAccessor acc_injectVector(ctrl_injectVector_);
00237       realvec& injectVector = acc_injectVector.to<mrs_realvec>();
00238 
00239       for (mrs_natural j=0; j < injectVector.getSize() - 1; j++)
00240       {
00241         (*mos_) << fixed << setprecision(precision_) << injectVector(j) << ",";
00242       }
00243       // TODO: the following assumes that the last item is a label, which is
00244       // not always true, see ctrl_regression.
00245       int label = (int)injectVector(injectVector.getSize() - 1);
00246       // TODO: what's the point of all those ostringstreams?
00247       ostringstream oss;
00248       oss << labelNames_[label];
00249       (*mos_) << oss.str();
00250       (*mos_) << endl;
00251     }
00252   }
00253   precision_ = ctrl_precision_->to<mrs_natural>();
00254 
00255 
00256 
00257   // initalize the downsample count
00258   count_ = 0;
00259 }
00260 
00261 void
00262 WekaSink::myProcess(realvec& in, realvec& out)
00263 {
00264   mrs_natural o,t;
00265   // If muted: just copy input to output.
00266   if (ctrl_mute_->isTrue())
00267   {
00268     for (o=0; o < inObservations_; o++)
00269     {
00270       for (t = 0; t < inSamples_; t++)
00271       {
00272         out(o,t) =  in(o,t);
00273       }
00274     }
00275     return;
00276   }
00277 
00278   mrs_bool print_line;
00279   if (ctrl_onlyStable_->isTrue())
00280   {
00281     stabilizingTicks_++;
00282     // under normal circumstances, do we print it?
00283     // use <= because we just incremented it.
00284     if (stabilizingTicks_ <= ctrl_inStabilizingDelay_->to<mrs_natural>()) {
00285       print_line = false;
00286     } else {
00287       print_line = true;
00288     }
00289     // what about special circumstances?
00290     if (ctrl_resetStable_->isTrue()) {
00291       stabilizingTicks_ = 0;
00292       // end of file
00293       if (ctrl_currentlyPlaying_->to<mrs_string>() == prev_playing_) {
00294         print_line = false;
00295       }
00296     }
00297   } else {
00298     print_line = true;
00299   }
00300 
00301   // Counter for handling the decimation (see ctrl_downsample).
00302 
00303   mrs_natural label_class = 0;
00304 
00305 
00306   for (t = 0; t < inSamples_; t++)
00307   {
00308     // Add a comment about the current input file.
00309     if (ctrl_currentlyPlaying_->to<mrs_string>() != prev_playing_)
00310     {
00311       (*mos_) << "% filename " << ctrl_currentlyPlaying_->to<mrs_string>() << endl;
00312       (*mos_) << "% srate " << israte_ << endl;
00313       prev_playing_ = ctrl_currentlyPlaying_->to<mrs_string>();
00314     }
00315 
00316     // round value, in case of weird floating-point effects
00317     label_class = (mrs_natural) (in(inObservations_ - 1, t) + 0.5);
00318 
00319     // Output all but last feature values.
00320     // TODO: this should be refactored together with the injection stuff from
00321     // WekaSink::myUpdate().
00322     for (o=0; o < inObservations_; o++)
00323     {
00324       out(o,t) = in(o,t);
00325       if ((label_class >= 0) || (ctrl_regression_->isTrue()))
00326       {
00327         if (o < inObservations_ - 1)
00328         {
00329           if ((count_ % downsample_) == 0)
00330           {
00331             if (print_line)
00332             {
00333               if ( out(o,t) != out(o,t) )   // Jen's NaN check for MIREX 05
00334               {
00335                 // (*mos_) << fixed << setprecision(precision_) << 0. << ",";
00336                 // DO NOT OUTPUT FEATURES
00337                 // (*mos_) << fixed << setprecision(precision_) << 0. << ",";
00338                 //notPrint = true;
00339                 (*mos_) << "?" << ",";
00340               }
00341               else
00342               {
00343                 (*mos_) << fixed << setprecision(precision_) << out(o,t) << ",";
00344                 //notPrint = false;
00345               }
00346             }
00347           }
00348         }
00349       }
00350     }
00351 
00352     // Output last value (e.g. as label).
00353     ostringstream oss;
00354     if ((count_ % downsample_) == 0)
00355     {
00356       if (print_line)
00357       {
00358         if (!ctrl_regression_->isTrue())
00359         {
00360           if (label_class >= 0)
00361           {
00362             //  if (!notPrint)
00363             //{
00364             if (label_class >= (mrs_natural)labelNames_.size())
00365             {
00366               MRSWARN("WekaSink: label number is too big");
00367               oss << "non-label";
00368             }
00369             else
00370             {
00371               oss << labelNames_[label_class];
00372             }
00373             (*mos_) << oss.str();
00374             (*mos_) << endl;
00375           }
00376           //  else
00377           //{
00378           //  cout << "skipping instance" << endl;
00379           //}
00380           //}
00381         }
00382         else
00383         {
00384           (*mos_) << in(inObservations_ - 1, t);
00385           (*mos_) << endl;
00386         }
00387       }
00388     }
00389   }
00390   count_++;
00391 }