Marsyas  0.6.0-alpha
/usr/src/RPM/BUILD/marsyas-0.6.0/src/marsyas/marsystems/BICchangeDetector.cpp
Go to the documentation of this file.
00001 /*
00002 ** Copyright (C) 1998-2011 George Tzanetakis <gtzan@cs.uvic.ca>
00003 **
00004 ** This program is free software; you can redistribute it and/or modify
00005 ** it under the terms of the GNU General Public License as published by
00006 ** the Free Software Foundation; either version 2 of the License, or
00007 ** (at your option) any later version.
00008 **
00009 ** This program is distributed in the hope that it will be useful,
00010 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
00011 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012 ** GNU General Public License for more details.
00013 **
00014 ** You should have received a copy of the GNU General Public License
00015 ** along with this program; if not, write to the Free Software
00016 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00017 */
00018 
00019 #include "BICchangeDetector.h"
00020 #include "Memory.h"
00021 #include <marsyas/NumericLib.h>
00022 
00023 
00024 using std::ostringstream;
00025 using std::cout;
00026 using std::endl;
00027 
00028 using namespace Marsyas;
00029 
00036 BICchangeDetector::BICchangeDetector(mrs_string name):MarSystem("BICchangeDetector", name)
00037 {
00038   prevDists_ = new Memory("cirMem");
00039   BICTick_ = 0;
00040   nfeats_ = 0;
00041   addControls();
00042   pdists_.create(nrPrevDists_);
00043 }
00044 
00051 BICchangeDetector::BICchangeDetector(const BICchangeDetector& a) : MarSystem(a)
00052 {
00053   prevDists_ = new Memory(*(a.prevDists_));
00054 
00055   BICTick_ = 0;
00056 
00057   nfeats_ = 0;
00058   ctrl_reset_ = getctrl("mrs_bool/reset");
00059   ctrl_alpha1_= getctrl("mrs_real/alpha1");
00060   ctrl_lambda_= getctrl("mrs_real/lambda");
00061   ctrl_prevDists_= getctrl("mrs_natural/prevDists");
00062   ctrl_hopMS_ = getctrl("mrs_natural/hopMillis");
00063   nrPrevDists_ = ctrl_prevDists_->to<mrs_natural>();
00064   pdists_.create(nrPrevDists_);
00065   prev_change_time_ = 0.0;
00066 }
00067 
00068 BICchangeDetector::~BICchangeDetector()
00069 {
00070   delete prevDists_;
00071 }
00072 
00073 MarSystem*
00074 
00080 BICchangeDetector::clone() const
00081 {
00082   return new BICchangeDetector(*this);
00083 }
00084 
00095 void
00096 BICchangeDetector::addControls()
00097 {
00098   addctrl("mrs_bool/reset", true, ctrl_reset_);
00099   setctrlState(ctrl_reset_, true);
00100 
00101   addctrl("mrs_real/alpha1", 0.4, ctrl_alpha1_);
00102   addctrl("mrs_real/lambda", 0.6, ctrl_lambda_);
00103   addctrl("mrs_natural/prevDists",3,ctrl_prevDists_);
00104   addctrl("mrs_natural/hopMillis",16,ctrl_hopMS_);
00105 
00106   dynThres_ = 0.0;
00107   prevDists_->updControl("mrs_natural/inSamples", 1);
00108   prevDists_->updControl("mrs_natural/inObservations", 1);
00109   nrPrevDists_ = getctrl("mrs_natural/prevDists")->to<mrs_natural>();
00110   prevDists_->updControl("mrs_natural/memSize", nrPrevDists_); //store 3 previous distances, for dynamic thresholding
00111   prev_change_time_ = 0.0;
00112 
00113 }
00114 
00137 void
00138 BICchangeDetector::myUpdate(MarControlPtr sender)
00139 {
00140   MarSystem::myUpdate(sender);
00141 
00142   //BICchangeDetector must receive as input a vector of feature frames as depicted bellow,
00143   //which includes 4 speech sub-segments (C1, C2, C3, C4) that will be used for detecting
00144   //speaker changes:
00145   // |------------------------|
00146   //      C1        C2
00147   // |----+----|----+----|
00148   //      |----+----|----+----|
00149   //           C3        C4
00150   // |--->|
00151   //   hop
00152   //
00153   // For now, hop is set fixed as 1/5 of inSamples [!]
00154   //
00155   if(segFrames_ != ctrl_inSamples_->to<mrs_natural>()*2/5 ||
00156       nfeats_ != ctrl_inObservations_->to<mrs_natural>()) //hardcoded [!]
00157   {
00158     segFrames_ = ctrl_inSamples_->to<mrs_natural>()*2/5; // hardcoded [!]
00159     segHop_ = ctrl_inSamples_->to<mrs_natural>()*1/5; // hardcoded [!]
00160 
00161 
00162     hopSeconds_ = 0.001*segHop_*(mrs_real)ctrl_hopMS_->to<mrs_natural>();
00163     nfeats_ = ctrl_inObservations_->to<mrs_natural>();
00164     /* C1_.allocate(nfeats, segFrames_);
00165        C2_.allocate(nfeats, segFrames_);
00166        C3_.allocate(nfeats, segFrames_);
00167        C4_.allocate(nfeats, segFrames_);
00168     */
00169     // there is no allocate anymore in realvec
00170     // not sure how it compiled. Gustavo ?
00171 
00172     // cooplogic - this shouldn't be needed since it is
00173     //  done every time in myProcess
00174     //      C1_.create(nfeats_, segFrames_);
00175     //      C2_.create(nfeats_, segFrames_);
00176     //      C3_.create(nfeats_, segFrames_);
00177     //      C4_.create(nfeats_, segFrames_);
00178 
00179   }
00180 
00181   if(ctrl_reset_->to<bool>())
00182   {
00183     QGMMmodel_.resetModel();
00184     prevDists_->updControl("mrs_bool/reset", true);
00185     pdists_.setval(0.0);
00186     pIndex_ = 0;
00187     ctrl_reset_->setValue(false, NOUPDATE);
00188   }
00189 }
00190 
00191 void
00192 BICchangeDetector::myProcess(realvec& in, realvec& out)
00193 {
00194 
00195   // skip initial hops that have zeroes
00196   if (BICTick_ < 5)
00197   {
00198     BICTick_ ++;
00199     return;
00200   }
00201 
00202 
00203 
00204 
00205   mrs_natural o,t;
00206   // [!note!] if CX_ matrices are reused they need to be resized since
00207   // they meanwhile were assigned to covariance matrices (10x10)
00208   C1_.create(nfeats_, segFrames_);
00209   C2_.create(nfeats_, segFrames_);
00210   C3_.create(nfeats_, segFrames_);
00211   C4_.create(nfeats_, segFrames_);
00212 
00213 
00214   for(o=0; o < inObservations_; ++o)
00215   {
00216     //get segments => use pointers to "in" instead of copies?! [!]
00217     for(t=0; t < segFrames_ ; ++t)
00218     {
00219       C1_(o, t) = in(o, t);
00220       C2_(o, t) = in(o, t + segFrames_);
00221       C3_(o, t) = in(o, t + segHop_);
00222       C4_(o, t) = in(o, t + segHop_ + segFrames_);
00223     }
00224 
00225     //bypass input to output unchanged [!]
00226     for(t=0; t < inSamples_; ++t)
00227       out(o, t) = in(o, t);
00228   }
00229 
00230 
00231 
00232 
00233 
00234   //calculate covariance matrix for each segment
00235   realvec tmp;
00236 
00237   C1_.covariance(tmp);
00238   C1_ = tmp;
00239 
00240   C2_.covariance(tmp);
00241   C2_ = tmp;
00242   C3_.covariance(tmp);
00243   C3_ = tmp;
00244   C4_.covariance(tmp);
00245   C4_ = tmp;
00246 
00247 
00248 
00249   //update current qGMM model, using the first sub-segment data, C1_
00250   QGMMmodel_.updateModel(C1_, segFrames_);
00251 
00252   //calculate divergenceShape between sub-segment pairs
00253   dist12_ = NumericLib::divergenceShape(C1_, C2_);
00254   dist34_ = NumericLib::divergenceShape(C3_, C4_);
00255 
00256   //calculate bhattacharyyaShape between sub-segment pairs => should be an option! [!]
00257   //mrs_real dist12 = realvec::bhattacharyyaShape(C1_, C2_);
00258   //mrs_real dist34 = realvec::bhattacharyyaShape(C3_, C4_);
00259 
00260   //calculate dynamic threshold, using the distance values from
00261   //the previously stored sub-segment pairs
00262   // (NOTE:this implementation is slightly different from the one used in marsyas0.1
00263   //       implementation, where the mean calculation was performed only on the values
00264   //       actually sent to the circular buffer (i.e. avoids calculating mean of an
00265   //       "empty" circular buffer, as will happen here in the first process() calls...)
00266   //    pdists_ = prevDists_->getctrl("mrs_realvec/processedData")->to<mrs_realvec>(); // processedData: does this work?!?!?!?!?! [?]
00267 
00268 
00269 
00270   dynThres_ = pdists_.mean() * ctrl_alpha1_->to<mrs_real>();
00271 
00272 
00273 
00274   // get the values for the previous (i.e. left) and next (i.e. right) distances (in time)
00275   // so we can later check if the current distance is a local maximum (i.e. a peak)
00276   //    mrs_real distanceLeft = pdists_(pdists_.getSize()-1); //i.e. the previous distance value
00277   mrs_real distanceLeft = pdists_(pIndex_); //i.e. the previous distance value
00278   pIndex_ = (pIndex_ + 1) % nrPrevDists_;
00279 
00280   mrs_real distanceRight = dist34_; //i.e the next distance value
00281 
00282 
00283   //just to avoid "spurious" peaks in the audio stream borders (i.e. start and end of stream)...
00284   if(distanceLeft == 0.0)
00285     distanceLeft = dist12_;
00286   if(distanceRight == 0.0)
00287     distanceRight = dist12_;
00288 
00289 
00290 
00291   //store current distance(C1,C2) in the circular buffer for next iteration
00292   realvec newDist(1);
00293   newDist(0) = dist12_;
00294   prevDists_->process(newDist, pdists_);
00295 
00296   //check for a potential change (based only on distances!)
00297   // (i.e. distance is local maxima and is above the dynamic threshold)
00298   //        time_t currTime = ((mrs_real)BICTick_)*0.675;
00299   //time_t currTime = ((mrs_real)BICTick_-2)*hopSeconds_; // debug only
00300 
00301 
00302   mrs_real change_time = ((mrs_real)BICTick_-2) * hopSeconds_;
00303 
00304 
00305   //tm * currTm = gmtime(&currTime);  // for debug only
00306 
00307 
00308   if (dist12_ > distanceRight && dist12_ > distanceLeft && dist12_ > dynThres_)
00309   {
00310 
00311     mrs_real confidence = 1.0 - dynThres_/dist12_;
00312 
00313 
00314 
00315     //if this a potential change point, validate it using BIC and the current model
00316     BICdist_ = QGMMmodel_.BICdistance(C2_, segFrames_, ctrl_lambda_->to<mrs_real>());
00317 
00318 
00319 //  cout> << name_ << ": Potential change, with confidence " << confidence
00320 //       << " at " << currTm->tm_hour << "h::"
00321 //       << currTm->tm_min << "m::"
00322 //       << currTm->tm_sec << "s" << endl;
00323 
00324 
00325 
00326 
00327 
00328     //Apply BIC criteria
00329     if (BICdist_ > 0.0)
00330     {
00331       //BIC validated the change point!
00332 
00333       //reset current model, because we will now start a new one
00334       // (we could also store these models for future use - e.g. clustering)
00335       QGMMmodel_.resetModel();
00336 
00337       //do something to mark the detected change POINT:
00338       // - print a message
00339       // - write to a file
00340       // - generate a sound (e.g. a beep)
00341       // - ...
00342       //            mrs_real confidence = 1.0 - dynThres_/dist12_;
00343       // cout << " confirmed!";
00344 
00345       if (confidence > 0.0)
00346       {
00347 
00348         cout  << prev_change_time_ << "\t" << change_time << "\t" << confidence << endl;
00349         prev_change_time_ = change_time;
00350 
00351 
00352 //        cout  << prev_change_time_ << "\t" << change_time << "\t" << confidence << endl;
00353 //        prev_change_time_ = change_time;
00354       }
00355 
00356     }
00357     else
00358     {
00359       // cout << " UNCONFIRMED.";
00360       //BIC rejected potential change point
00361       //do something here? Probably not...
00362     }
00363     // cout << endl;
00364   }
00365   //    cout << "TESTING TICKS: "
00366   //         << " at " << currTm->tm_hour << "h::"
00367   //         << currTm->tm_min << "m::"
00368   //         << currTm->tm_sec << "s"
00369   //         << endl;
00370 
00371 
00372   ++BICTick_;
00373 
00374 }