qm-dsp  1.8
TempoTrackV2.cpp
Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
00002 
00003 /*
00004     QM DSP Library
00005 
00006     Centre for Digital Music, Queen Mary, University of London.
00007     This file copyright 2008-2009 Matthew Davies and QMUL.
00008 
00009     This program is free software; you can redistribute it and/or
00010     modify it under the terms of the GNU General Public License as
00011     published by the Free Software Foundation; either version 2 of the
00012     License, or (at your option) any later version.  See the file
00013     COPYING included with this distribution for more information.
00014 */
00015 
00016 #include "TempoTrackV2.h"
00017 
00018 #include <cmath>
00019 #include <cstdlib>
00020 #include <iostream>
00021 
00022 #include "maths/MathUtilities.h"
00023 
00024 #define   EPS 0.0000008 // just some arbitrary small number
00025 
00026 TempoTrackV2::TempoTrackV2(float rate, size_t increment) :
00027     m_rate(rate), m_increment(increment) { }
00028 TempoTrackV2::~TempoTrackV2() { }
00029 
00030 void
00031 TempoTrackV2::filter_df(d_vec_t &df)
00032 {
00033     d_vec_t a(3);
00034     d_vec_t b(3);
00035     d_vec_t     lp_df(df.size());
00036 
00037     //equivalent in matlab to [b,a] = butter(2,0.4);
00038     a[0] = 1.0000;
00039     a[1] = -0.3695;
00040     a[2] = 0.1958;
00041     b[0] = 0.2066;
00042     b[1] = 0.4131;
00043     b[2] = 0.2066;
00044 
00045     double inp1 = 0.;
00046     double inp2 = 0.;
00047     double out1 = 0.;
00048     double out2 = 0.;
00049 
00050 
00051     // forwards filtering
00052     for (unsigned int i = 0;i < df.size();i++)
00053     {
00054         lp_df[i] =  b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
00055         inp2 = inp1;
00056         inp1 = df[i];
00057         out2 = out1;
00058         out1 = lp_df[i];
00059     }
00060 
00061     // copy forwards filtering to df...
00062     // but, time-reversed, ready for backwards filtering
00063     for (unsigned int i = 0;i < df.size();i++)
00064     {
00065         df[i] = lp_df[df.size()-i-1];
00066     }
00067 
00068     for (unsigned int i = 0;i < df.size();i++)
00069     {
00070         lp_df[i] = 0.;
00071     }
00072 
00073     inp1 = 0.; inp2 = 0.;
00074     out1 = 0.; out2 = 0.;
00075 
00076   // backwards filetering on time-reversed df
00077     for (unsigned int i = 0;i < df.size();i++)
00078     {
00079         lp_df[i] =  b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
00080         inp2 = inp1;
00081         inp1 = df[i];
00082         out2 = out1;
00083         out1 = lp_df[i];
00084     }
00085 
00086   // write the re-reversed (i.e. forward) version back to df
00087     for (unsigned int i = 0;i < df.size();i++)
00088     {
00089         df[i] = lp_df[df.size()-i-1];
00090     }
00091 }
00092 
00093 
00094 // MEPD 28/11/12
00095 // This function now allows for a user to specify an inputtempo (in BPM)
00096 // and a flag "constraintempo" which replaces the general rayleigh weighting for periodicities
00097 // with a gaussian which is centered around the input tempo
00098 // Note, if inputtempo = 120 and constraintempo = false, then functionality is
00099 // as it was before
00100 void
00101 TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
00102                                   vector<double> &beat_period,
00103                                   vector<double> &tempi,
00104                                   double inputtempo, bool constraintempo)
00105 {
00106     // to follow matlab.. split into 512 sample frames with a 128 hop size
00107     // calculate the acf,
00108     // then the rcf.. and then stick the rcfs as columns of a matrix
00109     // then call viterbi decoding with weight vector and transition matrix
00110     // and get best path
00111 
00112     unsigned int wv_len = 128;
00113 
00114     // MEPD 28/11/12
00115     // the default value of inputtempo in the beat tracking plugin is 120
00116     // so if the user specifies a different inputtempo, the rayparam will be updated
00117     // accordingly.
00118     // note: 60*44100/512 is a magic number
00119     // this might (will?) break if a user specifies a different frame rate for the onset detection function
00120     double rayparam = (60*44100/512)/inputtempo;
00121 
00122     // these debug statements can be removed.
00123 //    std::cerr << "inputtempo" << inputtempo << std::endl;
00124 //    std::cerr << "rayparam" << rayparam << std::endl;
00125 //    std::cerr << "constraintempo" << constraintempo << std::endl;
00126 
00127     // make rayleigh weighting curve
00128     d_vec_t wv(wv_len);
00129 
00130     // check whether or not to use rayleigh weighting (if constraintempo is false)
00131     // or use gaussian weighting it (constraintempo is true)
00132     if (constraintempo)
00133     {
00134         for (unsigned int i=0; i<wv.size(); i++)
00135         {
00136             // MEPD 28/11/12
00137             // do a gaussian weighting instead of rayleigh
00138             wv[i] = exp( (-1.*pow((static_cast<double> (i)-rayparam),2.)) / (2.*pow(rayparam/4.,2.)) );
00139         }
00140     }
00141     else
00142     {
00143         for (unsigned int i=0; i<wv.size(); i++)
00144         {
00145             // MEPD 28/11/12
00146             // standard rayleigh weighting over periodicities
00147             wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
00148         }
00149     }
00150 
00151     // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
00152     unsigned int winlen = 512;
00153     unsigned int step = 128;
00154 
00155     // matrix to store output of comb filter bank, increment column of matrix at each frame
00156     d_mat_t rcfmat;
00157     int col_counter = -1;
00158 
00159     // main loop for beat period calculation
00160     for (unsigned int i=0; i+winlen<df.size(); i+=step)
00161     {
00162         // get dfframe
00163         d_vec_t dfframe(winlen);
00164         for (unsigned int k=0; k<winlen; k++)
00165         {
00166             dfframe[k] = df[i+k];
00167         }
00168         // get rcf vector for current frame
00169         d_vec_t rcf(wv_len);
00170         get_rcf(dfframe,wv,rcf);
00171 
00172         rcfmat.push_back( d_vec_t() ); // adds a new column
00173         col_counter++;
00174         for (unsigned int j=0; j<rcf.size(); j++)
00175         {
00176             rcfmat[col_counter].push_back( rcf[j] );
00177         }
00178     }
00179 
00180     // now call viterbi decoding function
00181     viterbi_decode(rcfmat,wv,beat_period,tempi);
00182 }
00183 
00184 
00185 void
00186 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
00187 {
00188     // calculate autocorrelation function
00189     // then rcf
00190     // just hard code for now... don't really need separate functions to do this
00191 
00192     // make acf
00193 
00194     d_vec_t dfframe(dfframe_in);
00195 
00196     MathUtilities::adaptiveThreshold(dfframe);
00197 
00198     d_vec_t acf(dfframe.size());
00199 
00200 
00201     for (unsigned int lag=0; lag<dfframe.size(); lag++)
00202     {
00203         double sum = 0.;
00204         double tmp = 0.;
00205 
00206         for (unsigned int n=0; n<(dfframe.size()-lag); n++)
00207         {
00208             tmp = dfframe[n] * dfframe[n+lag];
00209             sum += tmp;
00210         }
00211         acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
00212     }
00213 
00214     // now apply comb filtering
00215     int numelem = 4;
00216 
00217     for (unsigned int i = 2;i < rcf.size();i++) // max beat period
00218     {
00219         for (int a = 1;a <= numelem;a++) // number of comb elements
00220         {
00221             for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
00222             {
00223                 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.);     // calculate value for comb filter row
00224             }
00225         }
00226     }
00227 
00228     // apply adaptive threshold to rcf
00229     MathUtilities::adaptiveThreshold(rcf);
00230 
00231     double rcfsum =0.;
00232     for (unsigned int i=0; i<rcf.size(); i++)
00233     {
00234         rcf[i] += EPS ;
00235         rcfsum += rcf[i];
00236     }
00237 
00238     // normalise rcf to sum to unity
00239     for (unsigned int i=0; i<rcf.size(); i++)
00240     {
00241         rcf[i] /= (rcfsum + EPS);
00242     }
00243 }
00244 
00245 void
00246 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
00247 {
00248     // following Kevin Murphy's Viterbi decoding to get best path of
00249     // beat periods through rfcmat
00250 
00251     // make transition matrix
00252     d_mat_t tmat;
00253     for (unsigned int i=0;i<wv.size();i++)
00254     {
00255         tmat.push_back ( d_vec_t() ); // adds a new column
00256         for (unsigned int j=0; j<wv.size(); j++)
00257         {
00258             tmat[i].push_back(0.); // fill with zeros initially
00259         }
00260     }
00261 
00262     // variance of Gaussians in transition matrix
00263     // formed of Gaussians on diagonal - implies slow tempo change
00264     double sigma = 8.;
00265     // don't want really short beat periods, or really long ones
00266     for (unsigned int i=20;i <wv.size()-20; i++)
00267     {
00268         for (unsigned int j=20; j<wv.size()-20; j++)
00269         {
00270             double mu = static_cast<double>(i);
00271             tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
00272         }
00273     }
00274 
00275     // parameters for Viterbi decoding... this part is taken from
00276     // Murphy's matlab
00277 
00278     d_mat_t delta;
00279     i_mat_t psi;
00280     for (unsigned int i=0;i <rcfmat.size(); i++)
00281     {
00282         delta.push_back( d_vec_t());
00283         psi.push_back( i_vec_t());
00284         for (unsigned int j=0; j<rcfmat[i].size(); j++)
00285         {
00286             delta[i].push_back(0.); // fill with zeros initially
00287             psi[i].push_back(0); // fill with zeros initially
00288         }
00289     }
00290 
00291 
00292     unsigned int T = delta.size();
00293 
00294     if (T < 2) return; // can't do anything at all meaningful
00295 
00296     unsigned int Q = delta[0].size();
00297 
00298     // initialize first column of delta
00299     for (unsigned int j=0; j<Q; j++)
00300     {
00301         delta[0][j] = wv[j] * rcfmat[0][j];
00302         psi[0][j] = 0;
00303     }
00304 
00305     double deltasum = 0.;
00306     for (unsigned int i=0; i<Q; i++)
00307     {
00308         deltasum += delta[0][i];
00309     }
00310     for (unsigned int i=0; i<Q; i++)
00311     {
00312         delta[0][i] /= (deltasum + EPS);
00313     }
00314 
00315 
00316     for (unsigned int t=1; t<T; t++)
00317     {
00318         d_vec_t tmp_vec(Q);
00319 
00320         for (unsigned int j=0; j<Q; j++)
00321         {
00322             for (unsigned int i=0; i<Q; i++)
00323             {
00324                 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
00325             }
00326 
00327             delta[t][j] = get_max_val(tmp_vec);
00328 
00329             psi[t][j] = get_max_ind(tmp_vec);
00330 
00331             delta[t][j] *= rcfmat[t][j];
00332         }
00333 
00334         // normalise current delta column
00335         double deltasum = 0.;
00336         for (unsigned int i=0; i<Q; i++)
00337         {
00338             deltasum += delta[t][i];
00339         }
00340         for (unsigned int i=0; i<Q; i++)
00341         {
00342             delta[t][i] /= (deltasum + EPS);
00343         }
00344     }
00345 
00346     i_vec_t bestpath(T);
00347     d_vec_t tmp_vec(Q);
00348     for (unsigned int i=0; i<Q; i++)
00349     {
00350         tmp_vec[i] = delta[T-1][i];
00351     }
00352 
00353     // find starting point - best beat period for "last" frame
00354     bestpath[T-1] = get_max_ind(tmp_vec);
00355 
00356     // backtrace through index of maximum values in psi
00357     for (unsigned int t=T-2; t>0 ;t--)
00358     {
00359         bestpath[t] = psi[t+1][bestpath[t+1]];
00360     }
00361 
00362     // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
00363     bestpath[0] = psi[1][bestpath[1]];
00364 
00365     unsigned int lastind = 0;
00366     for (unsigned int i=0; i<T; i++)
00367     {
00368         unsigned int step = 128;
00369         for (unsigned int j=0; j<step; j++)
00370         {
00371             lastind = i*step+j;
00372             beat_period[lastind] = bestpath[i];
00373         }
00374 //        std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl;
00375     }
00376 
00377     //fill in the last values...
00378     for (unsigned int i=lastind; i<beat_period.size(); i++)
00379     {
00380         beat_period[i] = beat_period[lastind];
00381     }
00382 
00383     for (unsigned int i = 0; i < beat_period.size(); i++)
00384     {
00385         tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
00386     }
00387 }
00388 
00389 double
00390 TempoTrackV2::get_max_val(const d_vec_t &df)
00391 {
00392     double maxval = 0.;
00393     for (unsigned int i=0; i<df.size(); i++)
00394     {
00395         if (maxval < df[i])
00396         {
00397             maxval = df[i];
00398         }
00399     }
00400 
00401     return maxval;
00402 }
00403 
00404 int
00405 TempoTrackV2::get_max_ind(const d_vec_t &df)
00406 {
00407     double maxval = 0.;
00408     int ind = 0;
00409     for (unsigned int i=0; i<df.size(); i++)
00410     {
00411         if (maxval < df[i])
00412         {
00413             maxval = df[i];
00414             ind = i;
00415         }
00416     }
00417 
00418     return ind;
00419 }
00420 
00421 void
00422 TempoTrackV2::normalise_vec(d_vec_t &df)
00423 {
00424     double sum = 0.;
00425     for (unsigned int i=0; i<df.size(); i++)
00426     {
00427         sum += df[i];
00428     }
00429 
00430     for (unsigned int i=0; i<df.size(); i++)
00431     {
00432         df[i]/= (sum + EPS);
00433     }
00434 }
00435 
00436 // MEPD 28/11/12
00437 // this function has been updated to allow the "alpha" and "tightness" parameters
00438 // of the dynamic program to be set by the user
00439 // the default value of alpha = 0.9 and tightness = 4
00440 void
00441 TempoTrackV2::calculateBeats(const vector<double> &df,
00442                              const vector<double> &beat_period,
00443                              vector<double> &beats, double alpha, double tightness)
00444 {
00445     if (df.empty() || beat_period.empty()) return;
00446 
00447     d_vec_t cumscore(df.size()); // store cumulative score
00448     i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
00449     d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
00450 
00451     for (unsigned int i=0; i<df.size(); i++)
00452     {
00453         localscore[i] = df[i];
00454         backlink[i] = -1;
00455     }
00456 
00457     //double tightness = 4.;
00458     //double alpha = 0.9;
00459     // MEPD 28/11/12
00460     // debug statements that can be removed.
00461 //    std::cerr << "alpha" << alpha << std::endl;
00462 //    std::cerr << "tightness" << tightness << std::endl;
00463 
00464     // main loop
00465     for (unsigned int i=0; i<localscore.size(); i++)
00466     {
00467         int prange_min = -2*beat_period[i];
00468         int prange_max = round(-0.5*beat_period[i]);
00469 
00470         // transition range
00471         d_vec_t txwt (prange_max - prange_min + 1);
00472         d_vec_t scorecands (txwt.size());
00473 
00474         for (unsigned int j=0;j<txwt.size();j++)
00475         {
00476             double mu = static_cast<double> (beat_period[i]);
00477             txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
00478 
00479             // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
00480             // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION:  D_VEC_T SCORECANDS (TXWT.SIZE());
00481 
00482             int cscore_ind = i+prange_min+j;
00483             if (cscore_ind >= 0)
00484             {
00485                 scorecands[j] = txwt[j] * cumscore[cscore_ind];
00486             }
00487         }
00488 
00489         // find max value and index of maximum value
00490         double vv = get_max_val(scorecands);
00491         int xx = get_max_ind(scorecands);
00492 
00493         cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
00494         backlink[i] = i+prange_min+xx;
00495 
00496 //        std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl;
00497     }
00498 
00499     // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
00500     d_vec_t tmp_vec;
00501     for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
00502     {
00503         tmp_vec.push_back(cumscore[i]);
00504     }
00505 
00506     int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
00507 
00508     // can happen if no results obtained earlier (e.g. input too short)
00509     if (startpoint >= (int)backlink.size()) startpoint = backlink.size()-1;
00510 
00511     // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
00512     //  BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
00513     i_vec_t ibeats;
00514     ibeats.push_back(startpoint);
00515 //    std::cerr << "startpoint = " << startpoint << std::endl;
00516     while (backlink[ibeats.back()] > 0)
00517     {
00518 //        std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
00519         int b = ibeats.back();
00520         if (backlink[b] == b) break; // shouldn't happen... haha
00521         ibeats.push_back(backlink[b]);
00522     }
00523 
00524     // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
00525     for (unsigned int i=0; i<ibeats.size(); i++)
00526     {
00527         beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
00528     }
00529 }
00530 
00531