Marsyas
0.6.0-alpha
|
00001 /* 00002 ** Copyright (C) 1998-2011 George Tzanetakis <gtzan@cs.uvic.ca> 00003 ** 00004 ** This program is free software; you can redistribute it and/or modify 00005 ** it under the terms of the GNU General Public License as published by 00006 ** the Free Software Foundation; either version 2 of the License, or 00007 ** (at your option) any later version. 00008 ** 00009 ** This program is distributed in the hope that it will be useful, 00010 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 ** GNU General Public License for more details. 00013 ** 00014 ** You should have received a copy of the GNU General Public License 00015 ** along with this program; if not, write to the Free Software 00016 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00017 */ 00018 00019 #include "BICchangeDetector.h" 00020 #include "Memory.h" 00021 #include <marsyas/NumericLib.h> 00022 00023 00024 using std::ostringstream; 00025 using std::cout; 00026 using std::endl; 00027 00028 using namespace Marsyas; 00029 00036 BICchangeDetector::BICchangeDetector(mrs_string name):MarSystem("BICchangeDetector", name) 00037 { 00038 prevDists_ = new Memory("cirMem"); 00039 BICTick_ = 0; 00040 nfeats_ = 0; 00041 addControls(); 00042 pdists_.create(nrPrevDists_); 00043 } 00044 00051 BICchangeDetector::BICchangeDetector(const BICchangeDetector& a) : MarSystem(a) 00052 { 00053 prevDists_ = new Memory(*(a.prevDists_)); 00054 00055 BICTick_ = 0; 00056 00057 nfeats_ = 0; 00058 ctrl_reset_ = getctrl("mrs_bool/reset"); 00059 ctrl_alpha1_= getctrl("mrs_real/alpha1"); 00060 ctrl_lambda_= getctrl("mrs_real/lambda"); 00061 ctrl_prevDists_= getctrl("mrs_natural/prevDists"); 00062 ctrl_hopMS_ = getctrl("mrs_natural/hopMillis"); 00063 nrPrevDists_ = ctrl_prevDists_->to<mrs_natural>(); 00064 pdists_.create(nrPrevDists_); 00065 prev_change_time_ = 0.0; 00066 } 00067 00068 BICchangeDetector::~BICchangeDetector() 00069 { 00070 delete prevDists_; 00071 } 00072 00073 MarSystem* 00074 00080 BICchangeDetector::clone() const 00081 { 00082 return new BICchangeDetector(*this); 00083 } 00084 00095 void 00096 BICchangeDetector::addControls() 00097 { 00098 addctrl("mrs_bool/reset", true, ctrl_reset_); 00099 setctrlState(ctrl_reset_, true); 00100 00101 addctrl("mrs_real/alpha1", 0.4, ctrl_alpha1_); 00102 addctrl("mrs_real/lambda", 0.6, ctrl_lambda_); 00103 addctrl("mrs_natural/prevDists",3,ctrl_prevDists_); 00104 addctrl("mrs_natural/hopMillis",16,ctrl_hopMS_); 00105 00106 dynThres_ = 0.0; 00107 prevDists_->updControl("mrs_natural/inSamples", 1); 00108 prevDists_->updControl("mrs_natural/inObservations", 1); 00109 nrPrevDists_ = getctrl("mrs_natural/prevDists")->to<mrs_natural>(); 00110 prevDists_->updControl("mrs_natural/memSize", nrPrevDists_); //store 3 previous distances, for dynamic thresholding 00111 prev_change_time_ = 0.0; 00112 00113 } 00114 00137 void 00138 BICchangeDetector::myUpdate(MarControlPtr sender) 00139 { 00140 MarSystem::myUpdate(sender); 00141 00142 //BICchangeDetector must receive as input a vector of feature frames as depicted bellow, 00143 //which includes 4 speech sub-segments (C1, C2, C3, C4) that will be used for detecting 00144 //speaker changes: 00145 // |------------------------| 00146 // C1 C2 00147 // |----+----|----+----| 00148 // |----+----|----+----| 00149 // C3 C4 00150 // |--->| 00151 // hop 00152 // 00153 // For now, hop is set fixed as 1/5 of inSamples [!] 00154 // 00155 if(segFrames_ != ctrl_inSamples_->to<mrs_natural>()*2/5 || 00156 nfeats_ != ctrl_inObservations_->to<mrs_natural>()) //hardcoded [!] 00157 { 00158 segFrames_ = ctrl_inSamples_->to<mrs_natural>()*2/5; // hardcoded [!] 00159 segHop_ = ctrl_inSamples_->to<mrs_natural>()*1/5; // hardcoded [!] 00160 00161 00162 hopSeconds_ = 0.001*segHop_*(mrs_real)ctrl_hopMS_->to<mrs_natural>(); 00163 nfeats_ = ctrl_inObservations_->to<mrs_natural>(); 00164 /* C1_.allocate(nfeats, segFrames_); 00165 C2_.allocate(nfeats, segFrames_); 00166 C3_.allocate(nfeats, segFrames_); 00167 C4_.allocate(nfeats, segFrames_); 00168 */ 00169 // there is no allocate anymore in realvec 00170 // not sure how it compiled. Gustavo ? 00171 00172 // cooplogic - this shouldn't be needed since it is 00173 // done every time in myProcess 00174 // C1_.create(nfeats_, segFrames_); 00175 // C2_.create(nfeats_, segFrames_); 00176 // C3_.create(nfeats_, segFrames_); 00177 // C4_.create(nfeats_, segFrames_); 00178 00179 } 00180 00181 if(ctrl_reset_->to<bool>()) 00182 { 00183 QGMMmodel_.resetModel(); 00184 prevDists_->updControl("mrs_bool/reset", true); 00185 pdists_.setval(0.0); 00186 pIndex_ = 0; 00187 ctrl_reset_->setValue(false, NOUPDATE); 00188 } 00189 } 00190 00191 void 00192 BICchangeDetector::myProcess(realvec& in, realvec& out) 00193 { 00194 00195 // skip initial hops that have zeroes 00196 if (BICTick_ < 5) 00197 { 00198 BICTick_ ++; 00199 return; 00200 } 00201 00202 00203 00204 00205 mrs_natural o,t; 00206 // [!note!] if CX_ matrices are reused they need to be resized since 00207 // they meanwhile were assigned to covariance matrices (10x10) 00208 C1_.create(nfeats_, segFrames_); 00209 C2_.create(nfeats_, segFrames_); 00210 C3_.create(nfeats_, segFrames_); 00211 C4_.create(nfeats_, segFrames_); 00212 00213 00214 for(o=0; o < inObservations_; ++o) 00215 { 00216 //get segments => use pointers to "in" instead of copies?! [!] 00217 for(t=0; t < segFrames_ ; ++t) 00218 { 00219 C1_(o, t) = in(o, t); 00220 C2_(o, t) = in(o, t + segFrames_); 00221 C3_(o, t) = in(o, t + segHop_); 00222 C4_(o, t) = in(o, t + segHop_ + segFrames_); 00223 } 00224 00225 //bypass input to output unchanged [!] 00226 for(t=0; t < inSamples_; ++t) 00227 out(o, t) = in(o, t); 00228 } 00229 00230 00231 00232 00233 00234 //calculate covariance matrix for each segment 00235 realvec tmp; 00236 00237 C1_.covariance(tmp); 00238 C1_ = tmp; 00239 00240 C2_.covariance(tmp); 00241 C2_ = tmp; 00242 C3_.covariance(tmp); 00243 C3_ = tmp; 00244 C4_.covariance(tmp); 00245 C4_ = tmp; 00246 00247 00248 00249 //update current qGMM model, using the first sub-segment data, C1_ 00250 QGMMmodel_.updateModel(C1_, segFrames_); 00251 00252 //calculate divergenceShape between sub-segment pairs 00253 dist12_ = NumericLib::divergenceShape(C1_, C2_); 00254 dist34_ = NumericLib::divergenceShape(C3_, C4_); 00255 00256 //calculate bhattacharyyaShape between sub-segment pairs => should be an option! [!] 00257 //mrs_real dist12 = realvec::bhattacharyyaShape(C1_, C2_); 00258 //mrs_real dist34 = realvec::bhattacharyyaShape(C3_, C4_); 00259 00260 //calculate dynamic threshold, using the distance values from 00261 //the previously stored sub-segment pairs 00262 // (NOTE:this implementation is slightly different from the one used in marsyas0.1 00263 // implementation, where the mean calculation was performed only on the values 00264 // actually sent to the circular buffer (i.e. avoids calculating mean of an 00265 // "empty" circular buffer, as will happen here in the first process() calls...) 00266 // pdists_ = prevDists_->getctrl("mrs_realvec/processedData")->to<mrs_realvec>(); // processedData: does this work?!?!?!?!?! [?] 00267 00268 00269 00270 dynThres_ = pdists_.mean() * ctrl_alpha1_->to<mrs_real>(); 00271 00272 00273 00274 // get the values for the previous (i.e. left) and next (i.e. right) distances (in time) 00275 // so we can later check if the current distance is a local maximum (i.e. a peak) 00276 // mrs_real distanceLeft = pdists_(pdists_.getSize()-1); //i.e. the previous distance value 00277 mrs_real distanceLeft = pdists_(pIndex_); //i.e. the previous distance value 00278 pIndex_ = (pIndex_ + 1) % nrPrevDists_; 00279 00280 mrs_real distanceRight = dist34_; //i.e the next distance value 00281 00282 00283 //just to avoid "spurious" peaks in the audio stream borders (i.e. start and end of stream)... 00284 if(distanceLeft == 0.0) 00285 distanceLeft = dist12_; 00286 if(distanceRight == 0.0) 00287 distanceRight = dist12_; 00288 00289 00290 00291 //store current distance(C1,C2) in the circular buffer for next iteration 00292 realvec newDist(1); 00293 newDist(0) = dist12_; 00294 prevDists_->process(newDist, pdists_); 00295 00296 //check for a potential change (based only on distances!) 00297 // (i.e. distance is local maxima and is above the dynamic threshold) 00298 // time_t currTime = ((mrs_real)BICTick_)*0.675; 00299 //time_t currTime = ((mrs_real)BICTick_-2)*hopSeconds_; // debug only 00300 00301 00302 mrs_real change_time = ((mrs_real)BICTick_-2) * hopSeconds_; 00303 00304 00305 //tm * currTm = gmtime(&currTime); // for debug only 00306 00307 00308 if (dist12_ > distanceRight && dist12_ > distanceLeft && dist12_ > dynThres_) 00309 { 00310 00311 mrs_real confidence = 1.0 - dynThres_/dist12_; 00312 00313 00314 00315 //if this a potential change point, validate it using BIC and the current model 00316 BICdist_ = QGMMmodel_.BICdistance(C2_, segFrames_, ctrl_lambda_->to<mrs_real>()); 00317 00318 00319 // cout> << name_ << ": Potential change, with confidence " << confidence 00320 // << " at " << currTm->tm_hour << "h::" 00321 // << currTm->tm_min << "m::" 00322 // << currTm->tm_sec << "s" << endl; 00323 00324 00325 00326 00327 00328 //Apply BIC criteria 00329 if (BICdist_ > 0.0) 00330 { 00331 //BIC validated the change point! 00332 00333 //reset current model, because we will now start a new one 00334 // (we could also store these models for future use - e.g. clustering) 00335 QGMMmodel_.resetModel(); 00336 00337 //do something to mark the detected change POINT: 00338 // - print a message 00339 // - write to a file 00340 // - generate a sound (e.g. a beep) 00341 // - ... 00342 // mrs_real confidence = 1.0 - dynThres_/dist12_; 00343 // cout << " confirmed!"; 00344 00345 if (confidence > 0.0) 00346 { 00347 00348 cout << prev_change_time_ << "\t" << change_time << "\t" << confidence << endl; 00349 prev_change_time_ = change_time; 00350 00351 00352 // cout << prev_change_time_ << "\t" << change_time << "\t" << confidence << endl; 00353 // prev_change_time_ = change_time; 00354 } 00355 00356 } 00357 else 00358 { 00359 // cout << " UNCONFIRMED."; 00360 //BIC rejected potential change point 00361 //do something here? Probably not... 00362 } 00363 // cout << endl; 00364 } 00365 // cout << "TESTING TICKS: " 00366 // << " at " << currTm->tm_hour << "h::" 00367 // << currTm->tm_min << "m::" 00368 // << currTm->tm_sec << "s" 00369 // << endl; 00370 00371 00372 ++BICTick_; 00373 00374 }