qm-dsp
1.8
|
00001 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ 00002 00003 /* 00004 QM DSP Library 00005 00006 Centre for Digital Music, Queen Mary, University of London. 00007 This file copyright 2008-2009 Matthew Davies and QMUL. 00008 00009 This program is free software; you can redistribute it and/or 00010 modify it under the terms of the GNU General Public License as 00011 published by the Free Software Foundation; either version 2 of the 00012 License, or (at your option) any later version. See the file 00013 COPYING included with this distribution for more information. 00014 */ 00015 00016 #include "TempoTrackV2.h" 00017 00018 #include <cmath> 00019 #include <cstdlib> 00020 #include <iostream> 00021 00022 #include "maths/MathUtilities.h" 00023 00024 #define EPS 0.0000008 // just some arbitrary small number 00025 00026 TempoTrackV2::TempoTrackV2(float rate, size_t increment) : 00027 m_rate(rate), m_increment(increment) { } 00028 TempoTrackV2::~TempoTrackV2() { } 00029 00030 void 00031 TempoTrackV2::filter_df(d_vec_t &df) 00032 { 00033 d_vec_t a(3); 00034 d_vec_t b(3); 00035 d_vec_t lp_df(df.size()); 00036 00037 //equivalent in matlab to [b,a] = butter(2,0.4); 00038 a[0] = 1.0000; 00039 a[1] = -0.3695; 00040 a[2] = 0.1958; 00041 b[0] = 0.2066; 00042 b[1] = 0.4131; 00043 b[2] = 0.2066; 00044 00045 double inp1 = 0.; 00046 double inp2 = 0.; 00047 double out1 = 0.; 00048 double out2 = 0.; 00049 00050 00051 // forwards filtering 00052 for (unsigned int i = 0;i < df.size();i++) 00053 { 00054 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2; 00055 inp2 = inp1; 00056 inp1 = df[i]; 00057 out2 = out1; 00058 out1 = lp_df[i]; 00059 } 00060 00061 // copy forwards filtering to df... 00062 // but, time-reversed, ready for backwards filtering 00063 for (unsigned int i = 0;i < df.size();i++) 00064 { 00065 df[i] = lp_df[df.size()-i-1]; 00066 } 00067 00068 for (unsigned int i = 0;i < df.size();i++) 00069 { 00070 lp_df[i] = 0.; 00071 } 00072 00073 inp1 = 0.; inp2 = 0.; 00074 out1 = 0.; out2 = 0.; 00075 00076 // backwards filetering on time-reversed df 00077 for (unsigned int i = 0;i < df.size();i++) 00078 { 00079 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2; 00080 inp2 = inp1; 00081 inp1 = df[i]; 00082 out2 = out1; 00083 out1 = lp_df[i]; 00084 } 00085 00086 // write the re-reversed (i.e. forward) version back to df 00087 for (unsigned int i = 0;i < df.size();i++) 00088 { 00089 df[i] = lp_df[df.size()-i-1]; 00090 } 00091 } 00092 00093 00094 // MEPD 28/11/12 00095 // This function now allows for a user to specify an inputtempo (in BPM) 00096 // and a flag "constraintempo" which replaces the general rayleigh weighting for periodicities 00097 // with a gaussian which is centered around the input tempo 00098 // Note, if inputtempo = 120 and constraintempo = false, then functionality is 00099 // as it was before 00100 void 00101 TempoTrackV2::calculateBeatPeriod(const vector<double> &df, 00102 vector<double> &beat_period, 00103 vector<double> &tempi, 00104 double inputtempo, bool constraintempo) 00105 { 00106 // to follow matlab.. split into 512 sample frames with a 128 hop size 00107 // calculate the acf, 00108 // then the rcf.. and then stick the rcfs as columns of a matrix 00109 // then call viterbi decoding with weight vector and transition matrix 00110 // and get best path 00111 00112 unsigned int wv_len = 128; 00113 00114 // MEPD 28/11/12 00115 // the default value of inputtempo in the beat tracking plugin is 120 00116 // so if the user specifies a different inputtempo, the rayparam will be updated 00117 // accordingly. 00118 // note: 60*44100/512 is a magic number 00119 // this might (will?) break if a user specifies a different frame rate for the onset detection function 00120 double rayparam = (60*44100/512)/inputtempo; 00121 00122 // these debug statements can be removed. 00123 // std::cerr << "inputtempo" << inputtempo << std::endl; 00124 // std::cerr << "rayparam" << rayparam << std::endl; 00125 // std::cerr << "constraintempo" << constraintempo << std::endl; 00126 00127 // make rayleigh weighting curve 00128 d_vec_t wv(wv_len); 00129 00130 // check whether or not to use rayleigh weighting (if constraintempo is false) 00131 // or use gaussian weighting it (constraintempo is true) 00132 if (constraintempo) 00133 { 00134 for (unsigned int i=0; i<wv.size(); i++) 00135 { 00136 // MEPD 28/11/12 00137 // do a gaussian weighting instead of rayleigh 00138 wv[i] = exp( (-1.*pow((static_cast<double> (i)-rayparam),2.)) / (2.*pow(rayparam/4.,2.)) ); 00139 } 00140 } 00141 else 00142 { 00143 for (unsigned int i=0; i<wv.size(); i++) 00144 { 00145 // MEPD 28/11/12 00146 // standard rayleigh weighting over periodicities 00147 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.))); 00148 } 00149 } 00150 00151 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds) 00152 unsigned int winlen = 512; 00153 unsigned int step = 128; 00154 00155 // matrix to store output of comb filter bank, increment column of matrix at each frame 00156 d_mat_t rcfmat; 00157 int col_counter = -1; 00158 00159 // main loop for beat period calculation 00160 for (unsigned int i=0; i+winlen<df.size(); i+=step) 00161 { 00162 // get dfframe 00163 d_vec_t dfframe(winlen); 00164 for (unsigned int k=0; k<winlen; k++) 00165 { 00166 dfframe[k] = df[i+k]; 00167 } 00168 // get rcf vector for current frame 00169 d_vec_t rcf(wv_len); 00170 get_rcf(dfframe,wv,rcf); 00171 00172 rcfmat.push_back( d_vec_t() ); // adds a new column 00173 col_counter++; 00174 for (unsigned int j=0; j<rcf.size(); j++) 00175 { 00176 rcfmat[col_counter].push_back( rcf[j] ); 00177 } 00178 } 00179 00180 // now call viterbi decoding function 00181 viterbi_decode(rcfmat,wv,beat_period,tempi); 00182 } 00183 00184 00185 void 00186 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf) 00187 { 00188 // calculate autocorrelation function 00189 // then rcf 00190 // just hard code for now... don't really need separate functions to do this 00191 00192 // make acf 00193 00194 d_vec_t dfframe(dfframe_in); 00195 00196 MathUtilities::adaptiveThreshold(dfframe); 00197 00198 d_vec_t acf(dfframe.size()); 00199 00200 00201 for (unsigned int lag=0; lag<dfframe.size(); lag++) 00202 { 00203 double sum = 0.; 00204 double tmp = 0.; 00205 00206 for (unsigned int n=0; n<(dfframe.size()-lag); n++) 00207 { 00208 tmp = dfframe[n] * dfframe[n+lag]; 00209 sum += tmp; 00210 } 00211 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag)); 00212 } 00213 00214 // now apply comb filtering 00215 int numelem = 4; 00216 00217 for (unsigned int i = 2;i < rcf.size();i++) // max beat period 00218 { 00219 for (int a = 1;a <= numelem;a++) // number of comb elements 00220 { 00221 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements 00222 { 00223 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row 00224 } 00225 } 00226 } 00227 00228 // apply adaptive threshold to rcf 00229 MathUtilities::adaptiveThreshold(rcf); 00230 00231 double rcfsum =0.; 00232 for (unsigned int i=0; i<rcf.size(); i++) 00233 { 00234 rcf[i] += EPS ; 00235 rcfsum += rcf[i]; 00236 } 00237 00238 // normalise rcf to sum to unity 00239 for (unsigned int i=0; i<rcf.size(); i++) 00240 { 00241 rcf[i] /= (rcfsum + EPS); 00242 } 00243 } 00244 00245 void 00246 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi) 00247 { 00248 // following Kevin Murphy's Viterbi decoding to get best path of 00249 // beat periods through rfcmat 00250 00251 // make transition matrix 00252 d_mat_t tmat; 00253 for (unsigned int i=0;i<wv.size();i++) 00254 { 00255 tmat.push_back ( d_vec_t() ); // adds a new column 00256 for (unsigned int j=0; j<wv.size(); j++) 00257 { 00258 tmat[i].push_back(0.); // fill with zeros initially 00259 } 00260 } 00261 00262 // variance of Gaussians in transition matrix 00263 // formed of Gaussians on diagonal - implies slow tempo change 00264 double sigma = 8.; 00265 // don't want really short beat periods, or really long ones 00266 for (unsigned int i=20;i <wv.size()-20; i++) 00267 { 00268 for (unsigned int j=20; j<wv.size()-20; j++) 00269 { 00270 double mu = static_cast<double>(i); 00271 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) ); 00272 } 00273 } 00274 00275 // parameters for Viterbi decoding... this part is taken from 00276 // Murphy's matlab 00277 00278 d_mat_t delta; 00279 i_mat_t psi; 00280 for (unsigned int i=0;i <rcfmat.size(); i++) 00281 { 00282 delta.push_back( d_vec_t()); 00283 psi.push_back( i_vec_t()); 00284 for (unsigned int j=0; j<rcfmat[i].size(); j++) 00285 { 00286 delta[i].push_back(0.); // fill with zeros initially 00287 psi[i].push_back(0); // fill with zeros initially 00288 } 00289 } 00290 00291 00292 unsigned int T = delta.size(); 00293 00294 if (T < 2) return; // can't do anything at all meaningful 00295 00296 unsigned int Q = delta[0].size(); 00297 00298 // initialize first column of delta 00299 for (unsigned int j=0; j<Q; j++) 00300 { 00301 delta[0][j] = wv[j] * rcfmat[0][j]; 00302 psi[0][j] = 0; 00303 } 00304 00305 double deltasum = 0.; 00306 for (unsigned int i=0; i<Q; i++) 00307 { 00308 deltasum += delta[0][i]; 00309 } 00310 for (unsigned int i=0; i<Q; i++) 00311 { 00312 delta[0][i] /= (deltasum + EPS); 00313 } 00314 00315 00316 for (unsigned int t=1; t<T; t++) 00317 { 00318 d_vec_t tmp_vec(Q); 00319 00320 for (unsigned int j=0; j<Q; j++) 00321 { 00322 for (unsigned int i=0; i<Q; i++) 00323 { 00324 tmp_vec[i] = delta[t-1][i] * tmat[j][i]; 00325 } 00326 00327 delta[t][j] = get_max_val(tmp_vec); 00328 00329 psi[t][j] = get_max_ind(tmp_vec); 00330 00331 delta[t][j] *= rcfmat[t][j]; 00332 } 00333 00334 // normalise current delta column 00335 double deltasum = 0.; 00336 for (unsigned int i=0; i<Q; i++) 00337 { 00338 deltasum += delta[t][i]; 00339 } 00340 for (unsigned int i=0; i<Q; i++) 00341 { 00342 delta[t][i] /= (deltasum + EPS); 00343 } 00344 } 00345 00346 i_vec_t bestpath(T); 00347 d_vec_t tmp_vec(Q); 00348 for (unsigned int i=0; i<Q; i++) 00349 { 00350 tmp_vec[i] = delta[T-1][i]; 00351 } 00352 00353 // find starting point - best beat period for "last" frame 00354 bestpath[T-1] = get_max_ind(tmp_vec); 00355 00356 // backtrace through index of maximum values in psi 00357 for (unsigned int t=T-2; t>0 ;t--) 00358 { 00359 bestpath[t] = psi[t+1][bestpath[t+1]]; 00360 } 00361 00362 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0 00363 bestpath[0] = psi[1][bestpath[1]]; 00364 00365 unsigned int lastind = 0; 00366 for (unsigned int i=0; i<T; i++) 00367 { 00368 unsigned int step = 128; 00369 for (unsigned int j=0; j<step; j++) 00370 { 00371 lastind = i*step+j; 00372 beat_period[lastind] = bestpath[i]; 00373 } 00374 // std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl; 00375 } 00376 00377 //fill in the last values... 00378 for (unsigned int i=lastind; i<beat_period.size(); i++) 00379 { 00380 beat_period[i] = beat_period[lastind]; 00381 } 00382 00383 for (unsigned int i = 0; i < beat_period.size(); i++) 00384 { 00385 tempi.push_back((60. * m_rate / m_increment)/beat_period[i]); 00386 } 00387 } 00388 00389 double 00390 TempoTrackV2::get_max_val(const d_vec_t &df) 00391 { 00392 double maxval = 0.; 00393 for (unsigned int i=0; i<df.size(); i++) 00394 { 00395 if (maxval < df[i]) 00396 { 00397 maxval = df[i]; 00398 } 00399 } 00400 00401 return maxval; 00402 } 00403 00404 int 00405 TempoTrackV2::get_max_ind(const d_vec_t &df) 00406 { 00407 double maxval = 0.; 00408 int ind = 0; 00409 for (unsigned int i=0; i<df.size(); i++) 00410 { 00411 if (maxval < df[i]) 00412 { 00413 maxval = df[i]; 00414 ind = i; 00415 } 00416 } 00417 00418 return ind; 00419 } 00420 00421 void 00422 TempoTrackV2::normalise_vec(d_vec_t &df) 00423 { 00424 double sum = 0.; 00425 for (unsigned int i=0; i<df.size(); i++) 00426 { 00427 sum += df[i]; 00428 } 00429 00430 for (unsigned int i=0; i<df.size(); i++) 00431 { 00432 df[i]/= (sum + EPS); 00433 } 00434 } 00435 00436 // MEPD 28/11/12 00437 // this function has been updated to allow the "alpha" and "tightness" parameters 00438 // of the dynamic program to be set by the user 00439 // the default value of alpha = 0.9 and tightness = 4 00440 void 00441 TempoTrackV2::calculateBeats(const vector<double> &df, 00442 const vector<double> &beat_period, 00443 vector<double> &beats, double alpha, double tightness) 00444 { 00445 if (df.empty() || beat_period.empty()) return; 00446 00447 d_vec_t cumscore(df.size()); // store cumulative score 00448 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant) 00449 d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function 00450 00451 for (unsigned int i=0; i<df.size(); i++) 00452 { 00453 localscore[i] = df[i]; 00454 backlink[i] = -1; 00455 } 00456 00457 //double tightness = 4.; 00458 //double alpha = 0.9; 00459 // MEPD 28/11/12 00460 // debug statements that can be removed. 00461 // std::cerr << "alpha" << alpha << std::endl; 00462 // std::cerr << "tightness" << tightness << std::endl; 00463 00464 // main loop 00465 for (unsigned int i=0; i<localscore.size(); i++) 00466 { 00467 int prange_min = -2*beat_period[i]; 00468 int prange_max = round(-0.5*beat_period[i]); 00469 00470 // transition range 00471 d_vec_t txwt (prange_max - prange_min + 1); 00472 d_vec_t scorecands (txwt.size()); 00473 00474 for (unsigned int j=0;j<txwt.size();j++) 00475 { 00476 double mu = static_cast<double> (beat_period[i]); 00477 txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2)); 00478 00479 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J 00480 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE()); 00481 00482 int cscore_ind = i+prange_min+j; 00483 if (cscore_ind >= 0) 00484 { 00485 scorecands[j] = txwt[j] * cumscore[cscore_ind]; 00486 } 00487 } 00488 00489 // find max value and index of maximum value 00490 double vv = get_max_val(scorecands); 00491 int xx = get_max_ind(scorecands); 00492 00493 cumscore[i] = alpha*vv + (1.-alpha)*localscore[i]; 00494 backlink[i] = i+prange_min+xx; 00495 00496 // std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl; 00497 } 00498 00499 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR 00500 d_vec_t tmp_vec; 00501 for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++) 00502 { 00503 tmp_vec.push_back(cumscore[i]); 00504 } 00505 00506 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ; 00507 00508 // can happen if no results obtained earlier (e.g. input too short) 00509 if (startpoint >= (int)backlink.size()) startpoint = backlink.size()-1; 00510 00511 // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE) 00512 // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0 00513 i_vec_t ibeats; 00514 ibeats.push_back(startpoint); 00515 // std::cerr << "startpoint = " << startpoint << std::endl; 00516 while (backlink[ibeats.back()] > 0) 00517 { 00518 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl; 00519 int b = ibeats.back(); 00520 if (backlink[b] == b) break; // shouldn't happen... haha 00521 ibeats.push_back(backlink[b]); 00522 } 00523 00524 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS 00525 for (unsigned int i=0; i<ibeats.size(); i++) 00526 { 00527 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) ); 00528 } 00529 } 00530 00531