Marsyas
0.6.0-alpha
|
00001 /* 00002 ** Copyright (C) 1998-2010 George Tzanetakis <gtzan@cs.uvic.ca> 00003 ** 00004 ** This program is free software; you can redistribute it and/or modify 00005 ** it under the terms of the GNU General Public License as published by 00006 ** the Free Software Foundation; either version 2 of the License, or 00007 ** (at your option) any later version. 00008 ** 00009 ** This program is distributed in the hope that it will be useful, 00010 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 ** GNU General Public License for more details. 00013 ** 00014 ** You should have received a copy of the GNU General Public License 00015 ** along with this program; if not, write to the Free Software 00016 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00017 */ 00018 00019 #include "WekaSink.h" 00020 #include "../common_source.h" 00021 00022 using namespace std; 00023 using namespace Marsyas; 00024 00025 WekaSink::WekaSink(mrs_string name) : MarSystem("WekaSink",name) 00026 { 00027 mos_ = NULL; 00028 stabilizingTicks_ = 0; 00029 addControls(); 00030 } 00031 00032 WekaSink::~WekaSink() 00033 { 00034 // Close the output stream if required. 00035 if (mos_ != NULL) 00036 { 00037 mos_->close(); 00038 delete mos_; 00039 } 00040 } 00041 00042 WekaSink::WekaSink(const WekaSink& a) : MarSystem(a) 00043 { 00044 mos_ = NULL; 00045 stabilizingTicks_ = 0; 00046 00047 ctrl_regression_ = getControl("mrs_bool/regression"); 00048 ctrl_putHeader_ = getControl("mrs_bool/putHeader"); 00049 ctrl_labelNames_ = getControl("mrs_string/labelNames"); 00050 ctrl_nLabels_ = getControl("mrs_natural/nLabels"); 00051 ctrl_precision_ = getControl("mrs_natural/precision"); 00052 ctrl_downsample_ = getControl("mrs_natural/downsample"); 00053 ctrl_filename_ = getControl("mrs_string/filename"); 00054 ctrl_currentlyPlaying_ = getControl("mrs_string/currentlyPlaying"); 00055 ctrl_inject_ = getControl("mrs_bool/inject"); 00056 ctrl_injectComment_ = getControl("mrs_string/injectComment"); 00057 ctrl_injectVector_ = getControl("mrs_realvec/injectVector"); 00058 ctrl_onlyStable_ = getControl("mrs_bool/onlyStable"); 00059 ctrl_resetStable_ = getControl("mrs_bool/resetStable"); 00060 } 00061 00062 MarSystem* 00063 WekaSink::clone() const 00064 { 00065 return new WekaSink(*this); 00066 } 00067 00068 void 00069 WekaSink::addControls() 00070 { 00071 addctrl("mrs_natural/precision", 6, ctrl_precision_); 00072 setctrlState("mrs_natural/precision", true); 00073 addctrl("mrs_string/filename", "weka.arff", ctrl_filename_); 00074 setctrlState("mrs_string/filename", true); 00075 addctrl("mrs_natural/nLabels", 2, ctrl_nLabels_); 00076 addctrl("mrs_natural/downsample", 1, ctrl_downsample_); 00077 setctrlState("mrs_natural/downsample", true); 00078 addctrl("mrs_string/labelNames", "Music,Speech", ctrl_labelNames_); 00079 setctrlState("mrs_string/labelNames", true); 00080 00081 00082 addctrl("mrs_bool/regression", false, ctrl_regression_); 00083 addctrl("mrs_string/currentlyPlaying", "", ctrl_currentlyPlaying_); 00084 addctrl("mrs_bool/putHeader", false, ctrl_putHeader_); 00085 setctrlState(ctrl_putHeader_, true); 00086 addctrl("mrs_bool/inject", false, ctrl_inject_); 00087 setctrlState(ctrl_inject_, true); 00088 addctrl("mrs_string/injectComment", "", ctrl_injectComment_); 00089 setctrlState(ctrl_injectComment_, true); 00090 addctrl("mrs_realvec/injectVector", realvec(), ctrl_injectVector_); 00091 setctrlState(ctrl_injectVector_, true); 00092 00093 addctrl("mrs_bool/onlyStable", false, ctrl_onlyStable_); 00094 setctrlState(ctrl_onlyStable_, true); 00095 addctrl("mrs_bool/resetStable", false, ctrl_resetStable_); 00096 } 00097 00098 void 00099 WekaSink::putHeader(mrs_string inObsNames) 00100 { 00101 //updctrl(ctrl_putHeader_, false); 00102 ctrl_putHeader_->setValue(true); 00103 00104 // Only write the header when we are dealing with a new file, i.e. when 00105 // the filename setting differs from the filename we were (previously) 00106 // writing to. 00107 if ((filename_ != ctrl_filename_->to<mrs_string>())) 00108 { 00109 // Close the previously used output file if needed and cleanup. 00110 if (mos_ != NULL) 00111 { 00112 mos_->close(); 00113 delete mos_; 00114 // TODO: do something about this ugly hack. 00115 if (filename_ == "weka.arff") 00116 { 00117 remove(filename_.c_str()); 00118 } 00119 } 00120 00121 // Set the current filename to the new value. 00122 filename_ = ctrl_filename_->to<mrs_string>(); 00123 00124 // Open a new output stream. 00125 mos_ = new ofstream; 00126 mos_->open(filename_.c_str()); 00127 00128 // General header stuff. 00129 (*mos_) << "% Created by Marsyas" << endl; 00130 (*mos_) << "@relation " << filename_ << endl; 00131 00132 // The number of attributes is one less than the number of input 00133 // observations because we assume the last observation is for the label? 00134 // TODO: why this assumption? What if a use case requires two labels per 00135 // feature vector or no labels? 00136 // There is no such assumption is the WEKA ARFF format anyway. 00137 mrs_natural nAttributes = ctrl_inObservations_->to<mrs_natural>() - 1; 00138 mrs_natural nLabels = ctrl_nLabels_->to<mrs_natural>(); 00139 00140 // Print the attribute names. 00141 // TODO: this is could be done way more elegant 00142 // (e.g. using a 'split()' or 'explode()' function). 00143 mrs_natural i; 00144 for (i =0; i < nAttributes; ++i) 00145 { 00146 mrs_string inObsName; 00147 mrs_string temp; 00148 inObsName = inObsNames.substr(0, inObsNames.find(",")); 00149 temp = inObsNames.substr(inObsNames.find(",") + 1, inObsNames.length()); 00150 inObsNames = temp; 00151 // TODO: what's the point of using an extra ostringstream here? 00152 ostringstream oss; 00153 // oss << "attribute" << i; 00154 (*mos_) << "@attribute " << inObsName << " real" << endl; 00155 } 00156 00157 // The attribute for the label. 00158 if (!ctrl_regression_->isTrue()) 00159 { 00160 (*mos_) << "@attribute output {"; 00161 // TODO: this could be done way more elegant 00162 // (e.g. with a 'join()' or 'implode()' function). 00163 for (i=0; i < nLabels; ++i) 00164 { 00165 // TODO: what's the point of using an extra ostringstream here? 00166 ostringstream oss; 00167 // oss << "label" << i; 00168 oss << labelNames_[i]; 00169 (*mos_) << oss.str(); 00170 if (i < nLabels - 1) 00171 { 00172 (*mos_) << ","; 00173 } 00174 // (*mos_) << "@attribute output {music,speech}" << endl; 00175 } 00176 (*mos_) << "}" << endl; 00177 } 00178 else 00179 { 00180 (*mos_) << "@attribute output real" << endl; 00181 } 00182 00183 // End of header, now we are ready for outputting the data. 00184 (*mos_) << "\n\n@data" << endl; 00185 } 00186 } 00187 00188 void 00189 WekaSink::myUpdate(MarControlPtr sender) 00190 { 00191 MRSDIAG("WekaSink.cpp - WekaSink:myUpdate"); 00192 00193 MarSystem::myUpdate(sender); 00194 00195 // (Re)build the list of label names. 00196 mrs_string labelNames = ctrl_labelNames_->to<mrs_string>(); 00197 00198 labelNames_.clear(); 00199 // TODO: this could be done way more elegant 00200 // (e.g. by using a split() or explode() function). 00201 for (int i = 0; i < ctrl_nLabels_->to<mrs_natural>(); ++i) 00202 { 00203 mrs_string labelName; 00204 mrs_string temp; 00205 labelName = labelNames.substr(0, labelNames.find(",")); 00206 temp = labelNames.substr(labelNames.find(",") + 1, labelNames.length()); 00207 labelNames = temp; 00208 labelNames_.push_back(labelName); 00209 } 00210 00211 downsample_ = ctrl_downsample_->to<mrs_natural>(); 00212 ctrl_israte_->setValue(israte_ / downsample_, NOUPDATE); 00213 00214 00215 00216 // If not muted: write the header with the observation names. 00217 if (!ctrl_mute_->isTrue()) 00218 { 00219 mrs_string onObsNames = ctrl_onObsNames_->to<mrs_string>(); 00220 putHeader(onObsNames); 00221 } 00222 00223 00224 00225 00226 // Optional injecting of additional data. 00227 // TODO: this should be refactored together with the printing 00228 // from WekaSink::myProcess(). 00229 if (!ctrl_mute_->isTrue()) 00230 { 00231 if (ctrl_inject_->isTrue()) 00232 { 00233 (* mos_) << ctrl_injectComment_->to<mrs_string>() << endl; 00234 (* mos_) << "% srate " << israte_ << endl; 00235 ctrl_inject_->setValue(false, NOUPDATE); 00236 MarControlAccessor acc_injectVector(ctrl_injectVector_); 00237 realvec& injectVector = acc_injectVector.to<mrs_realvec>(); 00238 00239 for (mrs_natural j=0; j < injectVector.getSize() - 1; j++) 00240 { 00241 (*mos_) << fixed << setprecision(precision_) << injectVector(j) << ","; 00242 } 00243 // TODO: the following assumes that the last item is a label, which is 00244 // not always true, see ctrl_regression. 00245 int label = (int)injectVector(injectVector.getSize() - 1); 00246 // TODO: what's the point of all those ostringstreams? 00247 ostringstream oss; 00248 oss << labelNames_[label]; 00249 (*mos_) << oss.str(); 00250 (*mos_) << endl; 00251 } 00252 } 00253 precision_ = ctrl_precision_->to<mrs_natural>(); 00254 00255 00256 00257 // initalize the downsample count 00258 count_ = 0; 00259 } 00260 00261 void 00262 WekaSink::myProcess(realvec& in, realvec& out) 00263 { 00264 mrs_natural o,t; 00265 // If muted: just copy input to output. 00266 if (ctrl_mute_->isTrue()) 00267 { 00268 for (o=0; o < inObservations_; o++) 00269 { 00270 for (t = 0; t < inSamples_; t++) 00271 { 00272 out(o,t) = in(o,t); 00273 } 00274 } 00275 return; 00276 } 00277 00278 mrs_bool print_line; 00279 if (ctrl_onlyStable_->isTrue()) 00280 { 00281 stabilizingTicks_++; 00282 // under normal circumstances, do we print it? 00283 // use <= because we just incremented it. 00284 if (stabilizingTicks_ <= ctrl_inStabilizingDelay_->to<mrs_natural>()) { 00285 print_line = false; 00286 } else { 00287 print_line = true; 00288 } 00289 // what about special circumstances? 00290 if (ctrl_resetStable_->isTrue()) { 00291 stabilizingTicks_ = 0; 00292 // end of file 00293 if (ctrl_currentlyPlaying_->to<mrs_string>() == prev_playing_) { 00294 print_line = false; 00295 } 00296 } 00297 } else { 00298 print_line = true; 00299 } 00300 00301 // Counter for handling the decimation (see ctrl_downsample). 00302 00303 mrs_natural label_class = 0; 00304 00305 00306 for (t = 0; t < inSamples_; t++) 00307 { 00308 // Add a comment about the current input file. 00309 if (ctrl_currentlyPlaying_->to<mrs_string>() != prev_playing_) 00310 { 00311 (*mos_) << "% filename " << ctrl_currentlyPlaying_->to<mrs_string>() << endl; 00312 (*mos_) << "% srate " << israte_ << endl; 00313 prev_playing_ = ctrl_currentlyPlaying_->to<mrs_string>(); 00314 } 00315 00316 // round value, in case of weird floating-point effects 00317 label_class = (mrs_natural) (in(inObservations_ - 1, t) + 0.5); 00318 00319 // Output all but last feature values. 00320 // TODO: this should be refactored together with the injection stuff from 00321 // WekaSink::myUpdate(). 00322 for (o=0; o < inObservations_; o++) 00323 { 00324 out(o,t) = in(o,t); 00325 if ((label_class >= 0) || (ctrl_regression_->isTrue())) 00326 { 00327 if (o < inObservations_ - 1) 00328 { 00329 if ((count_ % downsample_) == 0) 00330 { 00331 if (print_line) 00332 { 00333 if ( out(o,t) != out(o,t) ) // Jen's NaN check for MIREX 05 00334 { 00335 // (*mos_) << fixed << setprecision(precision_) << 0. << ","; 00336 // DO NOT OUTPUT FEATURES 00337 // (*mos_) << fixed << setprecision(precision_) << 0. << ","; 00338 //notPrint = true; 00339 (*mos_) << "?" << ","; 00340 } 00341 else 00342 { 00343 (*mos_) << fixed << setprecision(precision_) << out(o,t) << ","; 00344 //notPrint = false; 00345 } 00346 } 00347 } 00348 } 00349 } 00350 } 00351 00352 // Output last value (e.g. as label). 00353 ostringstream oss; 00354 if ((count_ % downsample_) == 0) 00355 { 00356 if (print_line) 00357 { 00358 if (!ctrl_regression_->isTrue()) 00359 { 00360 if (label_class >= 0) 00361 { 00362 // if (!notPrint) 00363 //{ 00364 if (label_class >= (mrs_natural)labelNames_.size()) 00365 { 00366 MRSWARN("WekaSink: label number is too big"); 00367 oss << "non-label"; 00368 } 00369 else 00370 { 00371 oss << labelNames_[label_class]; 00372 } 00373 (*mos_) << oss.str(); 00374 (*mos_) << endl; 00375 } 00376 // else 00377 //{ 00378 // cout << "skipping instance" << endl; 00379 //} 00380 //} 00381 } 00382 else 00383 { 00384 (*mos_) << in(inObservations_ - 1, t); 00385 (*mos_) << endl; 00386 } 00387 } 00388 } 00389 } 00390 count_++; 00391 }