svcore  1.9
FFTModel.cpp
Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
00002 
00003 /*
00004     Sonic Visualiser
00005     An audio file viewer and annotation editor.
00006     Centre for Digital Music, Queen Mary, University of London.
00007     This file copyright 2006 Chris Cannam.
00008     
00009     This program is free software; you can redistribute it and/or
00010     modify it under the terms of the GNU General Public License as
00011     published by the Free Software Foundation; either version 2 of the
00012     License, or (at your option) any later version.  See the file
00013     COPYING included with this distribution for more information.
00014 */
00015 
00016 #include "FFTModel.h"
00017 #include "DenseTimeValueModel.h"
00018 #include "AggregateWaveModel.h"
00019 
00020 #include "base/Profiler.h"
00021 #include "base/Pitch.h"
00022 
00023 #include <algorithm>
00024 
00025 #include <cassert>
00026 
00027 #ifndef __GNUC__
00028 #include <alloca.h>
00029 #endif
00030 
00031 FFTModel::FFTModel(const DenseTimeValueModel *model,
00032                    int channel,
00033                    WindowType windowType,
00034                    int windowSize,
00035                    int windowIncrement,
00036                    int fftSize,
00037                    bool polar,
00038                    StorageAdviser::Criteria criteria,
00039                    int fillFromColumn) :
00041     m_server(0),
00042     m_xshift(0),
00043     m_yshift(0)
00044 {
00045     setSourceModel(const_cast<DenseTimeValueModel *>(model)); 
00046 
00047     m_server = getServer(model,
00048                          channel,
00049                          windowType,
00050                          windowSize,
00051                          windowIncrement,
00052                          fftSize,
00053                          polar,
00054                          criteria,
00055                          fillFromColumn);
00056 
00057     if (!m_server) return; // caller should check isOK()
00058 
00059     int xratio = windowIncrement / m_server->getWindowIncrement();
00060     int yratio = m_server->getFFTSize() / fftSize;
00061 
00062     while (xratio > 1) {
00063         if (xratio & 0x1) {
00064             cerr << "ERROR: FFTModel: Window increment ratio "
00065                       << windowIncrement << " / "
00066                       << m_server->getWindowIncrement()
00067                       << " must be a power of two" << endl;
00068             assert(!(xratio & 0x1));
00069         }
00070         ++m_xshift;
00071         xratio >>= 1;
00072     }
00073 
00074     while (yratio > 1) {
00075         if (yratio & 0x1) {
00076             cerr << "ERROR: FFTModel: FFT size ratio "
00077                       << m_server->getFFTSize() << " / " << fftSize
00078                       << " must be a power of two" << endl;
00079             assert(!(yratio & 0x1));
00080         }
00081         ++m_yshift;
00082         yratio >>= 1;
00083     }
00084 }
00085 
00086 FFTModel::~FFTModel()
00087 {
00088     if (m_server) FFTDataServer::releaseInstance(m_server);
00089 }
00090 
00091 void
00092 FFTModel::sourceModelAboutToBeDeleted()
00093 {
00094     if (m_sourceModel) {
00095         cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_sourceModel << ")" << endl;
00096         if (m_server) {
00097             FFTDataServer::releaseInstance(m_server);
00098             m_server = 0;
00099         }
00100         FFTDataServer::modelAboutToBeDeleted(m_sourceModel);
00101     }
00102 }
00103 
00104 FFTDataServer *
00105 FFTModel::getServer(const DenseTimeValueModel *model,
00106                     int channel,
00107                     WindowType windowType,
00108                     int windowSize,
00109                     int windowIncrement,
00110                     int fftSize,
00111                     bool polar,
00112                     StorageAdviser::Criteria criteria,
00113                     int fillFromColumn)
00114 {
00115     // Obviously, an FFT model of channel C (where C != -1) of an
00116     // aggregate model is the same as the FFT model of the appropriate
00117     // channel of whichever model that aggregate channel is drawn
00118     // from.  We should use that model here, in case we already have
00119     // the data for it or will be wanting the same data again later.
00120 
00121     // If the channel is -1 (i.e. mixture of all channels), then we
00122     // can't do this shortcut unless the aggregate model only has one
00123     // channel or contains exactly all of the channels of a single
00124     // other model.  That isn't very likely -- if it were the case,
00125     // why would we be using an aggregate model?
00126 
00127     if (channel >= 0) {
00128 
00129         const AggregateWaveModel *aggregate =
00130             dynamic_cast<const AggregateWaveModel *>(model);
00131 
00132         if (aggregate && channel < aggregate->getComponentCount()) {
00133 
00134             AggregateWaveModel::ModelChannelSpec spec =
00135                 aggregate->getComponent(channel);
00136 
00137             return getServer(spec.model,
00138                              spec.channel,
00139                              windowType,
00140                              windowSize,
00141                              windowIncrement,
00142                              fftSize,
00143                              polar,
00144                              criteria,
00145                              fillFromColumn);
00146         }
00147     }
00148 
00149     // The normal case
00150 
00151     return FFTDataServer::getFuzzyInstance(model,
00152                                            channel,
00153                                            windowType,
00154                                            windowSize,
00155                                            windowIncrement,
00156                                            fftSize,
00157                                            polar,
00158                                            criteria,
00159                                            fillFromColumn);
00160 }
00161 
00162 int
00163 FFTModel::getSampleRate() const
00164 {
00165     return isOK() ? m_server->getModel()->getSampleRate() : 0;
00166 }
00167 
00168 FFTModel::Column
00169 FFTModel::getColumn(int x) const
00170 {
00171     Profiler profiler("FFTModel::getColumn", false);
00172 
00173     Column result;
00174 
00175     result.clear();
00176     int h = getHeight();
00177     result.reserve(h);
00178 
00179 #ifdef __GNUC__
00180     float magnitudes[h];
00181 #else
00182     float *magnitudes = (float *)alloca(h * sizeof(float));
00183 #endif
00184 
00185     if (m_server->getMagnitudesAt(x << m_xshift, magnitudes)) {
00186 
00187         for (int y = 0; y < h; ++y) {
00188             result.push_back(magnitudes[y]);
00189         }
00190 
00191     } else {
00192         for (int i = 0; i < h; ++i) result.push_back(0.f);
00193     }
00194 
00195     return result;
00196 }
00197 
00198 QString
00199 FFTModel::getBinName(int n) const
00200 {
00201     int sr = getSampleRate();
00202     if (!sr) return "";
00203     QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2));
00204     return name;
00205 }
00206 
00207 bool
00208 FFTModel::estimateStableFrequency(int x, int y, float &frequency)
00209 {
00210     if (!isOK()) return false;
00211 
00212     int sampleRate = m_server->getModel()->getSampleRate();
00213 
00214     int fftSize = m_server->getFFTSize() >> m_yshift;
00215     frequency = (float(y) * sampleRate) / fftSize;
00216 
00217     if (x+1 >= getWidth()) return false;
00218 
00219     // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
00220     // At hopsize h and sample rate sr, one hop happens in h/sr sec.
00221     // At window size w, for bin b, f is b*sr/w.
00222     // thus 2pi phase shift happens in w/(b*sr) sec.
00223     // We need to know what phase shift we expect from h/sr sec.
00224     // -> 2pi * ((h/sr) / (w/(b*sr)))
00225     //  = 2pi * ((h * b * sr) / (w * sr))
00226     //  = 2pi * (h * b) / w.
00227 
00228     float oldPhase = getPhaseAt(x, y);
00229     float newPhase = getPhaseAt(x+1, y);
00230 
00231     int incr = getResolution();
00232 
00233     float expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / fftSize;
00234 
00235     float phaseError = princargf(newPhase - expectedPhase);
00236 
00237 //    bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize));
00238 
00239     // The new frequency estimate based on the phase error resulting
00240     // from assuming the "native" frequency of this bin
00241 
00242     frequency =
00243         (sampleRate * (expectedPhase + phaseError - oldPhase)) /
00244         (2 * M_PI * incr);
00245 
00246     return true;
00247 }
00248 
00249 FFTModel::PeakLocationSet
00250 FFTModel::getPeaks(PeakPickType type, int x, int ymin, int ymax)
00251 {
00252     Profiler profiler("FFTModel::getPeaks");
00253 
00254     FFTModel::PeakLocationSet peaks;
00255     if (!isOK()) return peaks;
00256 
00257     if (ymax == 0 || ymax > getHeight() - 1) {
00258         ymax = getHeight() - 1;
00259     }
00260 
00261     if (type == AllPeaks) {
00262         int minbin = ymin;
00263         if (minbin > 0) minbin = minbin - 1;
00264         int maxbin = ymax;
00265         if (maxbin < getHeight() - 1) maxbin = maxbin + 1;
00266         const int n = maxbin - minbin + 1;
00267 #ifdef __GNUC__
00268         float values[n];
00269 #else
00270         float *values = (float *)alloca(n * sizeof(float));
00271 #endif
00272         getMagnitudesAt(x, values, minbin, maxbin - minbin + 1);
00273         for (int bin = ymin; bin <= ymax; ++bin) {
00274             if (bin == minbin || bin == maxbin) continue;
00275             if (values[bin - minbin] > values[bin - minbin - 1] &&
00276                 values[bin - minbin] > values[bin - minbin + 1]) {
00277                 peaks.insert(bin);
00278             }
00279         }
00280         return peaks;
00281     }
00282 
00283     Column values = getColumn(x);
00284 
00285     float mean = 0.f;
00286     for (int i = 0; i < values.size(); ++i) mean += values[i];
00287     if (values.size() >0) mean /= values.size();
00288 
00289     // For peak picking we use a moving median window, picking the
00290     // highest value within each continuous region of values that
00291     // exceed the median.  For pitch adaptivity, we adjust the window
00292     // size to a roughly constant pitch range (about four tones).
00293 
00294     int sampleRate = getSampleRate();
00295 
00296     std::deque<float> window;
00297     std::vector<int> inrange;
00298     float dist = 0.5;
00299 
00300     int medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist);
00301     int halfWin = medianWinSize/2;
00302 
00303     int binmin;
00304     if (ymin > halfWin) binmin = ymin - halfWin;
00305     else binmin = 0;
00306 
00307     int binmax;
00308     if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
00309     else binmax = values.size()-1;
00310 
00311     int prevcentre = 0;
00312 
00313     for (int bin = binmin; bin <= binmax; ++bin) {
00314 
00315         float value = values[bin];
00316 
00317         window.push_back(value);
00318 
00319         // so-called median will actually be the dist*100'th percentile
00320         medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist);
00321         halfWin = medianWinSize/2;
00322 
00323         while ((int)window.size() > medianWinSize) {
00324             window.pop_front();
00325         }
00326 
00327         int actualSize = window.size();
00328 
00329         if (type == MajorPitchAdaptivePeaks) {
00330             if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
00331             else binmax = values.size()-1;
00332         }
00333 
00334         std::deque<float> sorted(window);
00335         std::sort(sorted.begin(), sorted.end());
00336         float median = sorted[int(sorted.size() * dist)];
00337 
00338         int centrebin = 0;
00339         if (bin > actualSize/2) centrebin = bin - actualSize/2;
00340         
00341         while (centrebin > prevcentre || bin == binmin) {
00342 
00343             if (centrebin > prevcentre) ++prevcentre;
00344 
00345             float centre = values[prevcentre];
00346 
00347             if (centre > median) {
00348                 inrange.push_back(centrebin);
00349             }
00350 
00351             if (centre <= median || centrebin+1 == values.size()) {
00352                 if (!inrange.empty()) {
00353                     int peakbin = 0;
00354                     float peakval = 0.f;
00355                     for (int i = 0; i < (int)inrange.size(); ++i) {
00356                         if (i == 0 || values[inrange[i]] > peakval) {
00357                             peakval = values[inrange[i]];
00358                             peakbin = inrange[i];
00359                         }
00360                     }
00361                     inrange.clear();
00362                     if (peakbin >= ymin && peakbin <= ymax) {
00363                         peaks.insert(peakbin);
00364                     }
00365                 }
00366             }
00367 
00368             if (bin == binmin) break;
00369         }
00370     }
00371 
00372     return peaks;
00373 }
00374 
00375 int
00376 FFTModel::getPeakPickWindowSize(PeakPickType type, int sampleRate,
00377                                 int bin, float &percentile) const
00378 {
00379     percentile = 0.5;
00380     if (type == MajorPeaks) return 10;
00381     if (bin == 0) return 3;
00382 
00383     int fftSize = m_server->getFFTSize() >> m_yshift;
00384     float binfreq = (float(sampleRate) * bin) / fftSize;
00385     float hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
00386 
00387     int hibin = lrintf((hifreq * fftSize) / sampleRate);
00388     int medianWinSize = hibin - bin;
00389     if (medianWinSize < 3) medianWinSize = 3;
00390 
00391     percentile = 0.5 + (binfreq / sampleRate);
00392 
00393     return medianWinSize;
00394 }
00395 
00396 FFTModel::PeakSet
00397 FFTModel::getPeakFrequencies(PeakPickType type, int x,
00398                              int ymin, int ymax)
00399 {
00400     Profiler profiler("FFTModel::getPeakFrequencies");
00401 
00402     PeakSet peaks;
00403     if (!isOK()) return peaks;
00404     PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
00405 
00406     int sampleRate = getSampleRate();
00407     int fftSize = m_server->getFFTSize() >> m_yshift;
00408     int incr = getResolution();
00409 
00410     // This duplicates some of the work of estimateStableFrequency to
00411     // allow us to retrieve the phases in two separate vertical
00412     // columns, instead of jumping back and forth between columns x and
00413     // x+1, which may be significantly slower if re-seeking is needed
00414 
00415     std::vector<float> phases;
00416     for (PeakLocationSet::iterator i = locations.begin();
00417          i != locations.end(); ++i) {
00418         phases.push_back(getPhaseAt(x, *i));
00419     }
00420 
00421     int phaseIndex = 0;
00422     for (PeakLocationSet::iterator i = locations.begin();
00423          i != locations.end(); ++i) {
00424         float oldPhase = phases[phaseIndex];
00425         float newPhase = getPhaseAt(x+1, *i);
00426         float expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / fftSize;
00427         float phaseError = princargf(newPhase - expectedPhase);
00428         float frequency =
00429             (sampleRate * (expectedPhase + phaseError - oldPhase))
00430             / (2 * M_PI * incr);
00431 //        bool stable = (fabsf(phaseError) < (1.1f * (incr * M_PI) / fftSize));
00432 //        if (stable)
00433         peaks[*i] = frequency;
00434         ++phaseIndex;
00435     }
00436 
00437     return peaks;
00438 }
00439 
00440 Model *
00441 FFTModel::clone() const
00442 {
00443     return new FFTModel(*this);
00444 }
00445 
00446 FFTModel::FFTModel(const FFTModel &model) :
00447     DenseThreeDimensionalModel(),
00448     m_server(model.m_server),
00449     m_xshift(model.m_xshift),
00450     m_yshift(model.m_yshift)
00451 {
00452     FFTDataServer::claimInstance(m_server);
00453 }
00454