svcore
1.9
|
00001 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ 00002 00003 /* 00004 Sonic Visualiser 00005 An audio file viewer and annotation editor. 00006 Centre for Digital Music, Queen Mary, University of London. 00007 This file copyright 2006 Chris Cannam. 00008 00009 This program is free software; you can redistribute it and/or 00010 modify it under the terms of the GNU General Public License as 00011 published by the Free Software Foundation; either version 2 of the 00012 License, or (at your option) any later version. See the file 00013 COPYING included with this distribution for more information. 00014 */ 00015 00016 #include "FFTModel.h" 00017 #include "DenseTimeValueModel.h" 00018 #include "AggregateWaveModel.h" 00019 00020 #include "base/Profiler.h" 00021 #include "base/Pitch.h" 00022 00023 #include <algorithm> 00024 00025 #include <cassert> 00026 00027 #ifndef __GNUC__ 00028 #include <alloca.h> 00029 #endif 00030 00031 FFTModel::FFTModel(const DenseTimeValueModel *model, 00032 int channel, 00033 WindowType windowType, 00034 int windowSize, 00035 int windowIncrement, 00036 int fftSize, 00037 bool polar, 00038 StorageAdviser::Criteria criteria, 00039 int fillFromColumn) : 00041 m_server(0), 00042 m_xshift(0), 00043 m_yshift(0) 00044 { 00045 setSourceModel(const_cast<DenseTimeValueModel *>(model)); 00046 00047 m_server = getServer(model, 00048 channel, 00049 windowType, 00050 windowSize, 00051 windowIncrement, 00052 fftSize, 00053 polar, 00054 criteria, 00055 fillFromColumn); 00056 00057 if (!m_server) return; // caller should check isOK() 00058 00059 int xratio = windowIncrement / m_server->getWindowIncrement(); 00060 int yratio = m_server->getFFTSize() / fftSize; 00061 00062 while (xratio > 1) { 00063 if (xratio & 0x1) { 00064 cerr << "ERROR: FFTModel: Window increment ratio " 00065 << windowIncrement << " / " 00066 << m_server->getWindowIncrement() 00067 << " must be a power of two" << endl; 00068 assert(!(xratio & 0x1)); 00069 } 00070 ++m_xshift; 00071 xratio >>= 1; 00072 } 00073 00074 while (yratio > 1) { 00075 if (yratio & 0x1) { 00076 cerr << "ERROR: FFTModel: FFT size ratio " 00077 << m_server->getFFTSize() << " / " << fftSize 00078 << " must be a power of two" << endl; 00079 assert(!(yratio & 0x1)); 00080 } 00081 ++m_yshift; 00082 yratio >>= 1; 00083 } 00084 } 00085 00086 FFTModel::~FFTModel() 00087 { 00088 if (m_server) FFTDataServer::releaseInstance(m_server); 00089 } 00090 00091 void 00092 FFTModel::sourceModelAboutToBeDeleted() 00093 { 00094 if (m_sourceModel) { 00095 cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_sourceModel << ")" << endl; 00096 if (m_server) { 00097 FFTDataServer::releaseInstance(m_server); 00098 m_server = 0; 00099 } 00100 FFTDataServer::modelAboutToBeDeleted(m_sourceModel); 00101 } 00102 } 00103 00104 FFTDataServer * 00105 FFTModel::getServer(const DenseTimeValueModel *model, 00106 int channel, 00107 WindowType windowType, 00108 int windowSize, 00109 int windowIncrement, 00110 int fftSize, 00111 bool polar, 00112 StorageAdviser::Criteria criteria, 00113 int fillFromColumn) 00114 { 00115 // Obviously, an FFT model of channel C (where C != -1) of an 00116 // aggregate model is the same as the FFT model of the appropriate 00117 // channel of whichever model that aggregate channel is drawn 00118 // from. We should use that model here, in case we already have 00119 // the data for it or will be wanting the same data again later. 00120 00121 // If the channel is -1 (i.e. mixture of all channels), then we 00122 // can't do this shortcut unless the aggregate model only has one 00123 // channel or contains exactly all of the channels of a single 00124 // other model. That isn't very likely -- if it were the case, 00125 // why would we be using an aggregate model? 00126 00127 if (channel >= 0) { 00128 00129 const AggregateWaveModel *aggregate = 00130 dynamic_cast<const AggregateWaveModel *>(model); 00131 00132 if (aggregate && channel < aggregate->getComponentCount()) { 00133 00134 AggregateWaveModel::ModelChannelSpec spec = 00135 aggregate->getComponent(channel); 00136 00137 return getServer(spec.model, 00138 spec.channel, 00139 windowType, 00140 windowSize, 00141 windowIncrement, 00142 fftSize, 00143 polar, 00144 criteria, 00145 fillFromColumn); 00146 } 00147 } 00148 00149 // The normal case 00150 00151 return FFTDataServer::getFuzzyInstance(model, 00152 channel, 00153 windowType, 00154 windowSize, 00155 windowIncrement, 00156 fftSize, 00157 polar, 00158 criteria, 00159 fillFromColumn); 00160 } 00161 00162 int 00163 FFTModel::getSampleRate() const 00164 { 00165 return isOK() ? m_server->getModel()->getSampleRate() : 0; 00166 } 00167 00168 FFTModel::Column 00169 FFTModel::getColumn(int x) const 00170 { 00171 Profiler profiler("FFTModel::getColumn", false); 00172 00173 Column result; 00174 00175 result.clear(); 00176 int h = getHeight(); 00177 result.reserve(h); 00178 00179 #ifdef __GNUC__ 00180 float magnitudes[h]; 00181 #else 00182 float *magnitudes = (float *)alloca(h * sizeof(float)); 00183 #endif 00184 00185 if (m_server->getMagnitudesAt(x << m_xshift, magnitudes)) { 00186 00187 for (int y = 0; y < h; ++y) { 00188 result.push_back(magnitudes[y]); 00189 } 00190 00191 } else { 00192 for (int i = 0; i < h; ++i) result.push_back(0.f); 00193 } 00194 00195 return result; 00196 } 00197 00198 QString 00199 FFTModel::getBinName(int n) const 00200 { 00201 int sr = getSampleRate(); 00202 if (!sr) return ""; 00203 QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2)); 00204 return name; 00205 } 00206 00207 bool 00208 FFTModel::estimateStableFrequency(int x, int y, float &frequency) 00209 { 00210 if (!isOK()) return false; 00211 00212 int sampleRate = m_server->getModel()->getSampleRate(); 00213 00214 int fftSize = m_server->getFFTSize() >> m_yshift; 00215 frequency = (float(y) * sampleRate) / fftSize; 00216 00217 if (x+1 >= getWidth()) return false; 00218 00219 // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec. 00220 // At hopsize h and sample rate sr, one hop happens in h/sr sec. 00221 // At window size w, for bin b, f is b*sr/w. 00222 // thus 2pi phase shift happens in w/(b*sr) sec. 00223 // We need to know what phase shift we expect from h/sr sec. 00224 // -> 2pi * ((h/sr) / (w/(b*sr))) 00225 // = 2pi * ((h * b * sr) / (w * sr)) 00226 // = 2pi * (h * b) / w. 00227 00228 float oldPhase = getPhaseAt(x, y); 00229 float newPhase = getPhaseAt(x+1, y); 00230 00231 int incr = getResolution(); 00232 00233 float expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / fftSize; 00234 00235 float phaseError = princargf(newPhase - expectedPhase); 00236 00237 // bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize)); 00238 00239 // The new frequency estimate based on the phase error resulting 00240 // from assuming the "native" frequency of this bin 00241 00242 frequency = 00243 (sampleRate * (expectedPhase + phaseError - oldPhase)) / 00244 (2 * M_PI * incr); 00245 00246 return true; 00247 } 00248 00249 FFTModel::PeakLocationSet 00250 FFTModel::getPeaks(PeakPickType type, int x, int ymin, int ymax) 00251 { 00252 Profiler profiler("FFTModel::getPeaks"); 00253 00254 FFTModel::PeakLocationSet peaks; 00255 if (!isOK()) return peaks; 00256 00257 if (ymax == 0 || ymax > getHeight() - 1) { 00258 ymax = getHeight() - 1; 00259 } 00260 00261 if (type == AllPeaks) { 00262 int minbin = ymin; 00263 if (minbin > 0) minbin = minbin - 1; 00264 int maxbin = ymax; 00265 if (maxbin < getHeight() - 1) maxbin = maxbin + 1; 00266 const int n = maxbin - minbin + 1; 00267 #ifdef __GNUC__ 00268 float values[n]; 00269 #else 00270 float *values = (float *)alloca(n * sizeof(float)); 00271 #endif 00272 getMagnitudesAt(x, values, minbin, maxbin - minbin + 1); 00273 for (int bin = ymin; bin <= ymax; ++bin) { 00274 if (bin == minbin || bin == maxbin) continue; 00275 if (values[bin - minbin] > values[bin - minbin - 1] && 00276 values[bin - minbin] > values[bin - minbin + 1]) { 00277 peaks.insert(bin); 00278 } 00279 } 00280 return peaks; 00281 } 00282 00283 Column values = getColumn(x); 00284 00285 float mean = 0.f; 00286 for (int i = 0; i < values.size(); ++i) mean += values[i]; 00287 if (values.size() >0) mean /= values.size(); 00288 00289 // For peak picking we use a moving median window, picking the 00290 // highest value within each continuous region of values that 00291 // exceed the median. For pitch adaptivity, we adjust the window 00292 // size to a roughly constant pitch range (about four tones). 00293 00294 int sampleRate = getSampleRate(); 00295 00296 std::deque<float> window; 00297 std::vector<int> inrange; 00298 float dist = 0.5; 00299 00300 int medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist); 00301 int halfWin = medianWinSize/2; 00302 00303 int binmin; 00304 if (ymin > halfWin) binmin = ymin - halfWin; 00305 else binmin = 0; 00306 00307 int binmax; 00308 if (ymax + halfWin < values.size()) binmax = ymax + halfWin; 00309 else binmax = values.size()-1; 00310 00311 int prevcentre = 0; 00312 00313 for (int bin = binmin; bin <= binmax; ++bin) { 00314 00315 float value = values[bin]; 00316 00317 window.push_back(value); 00318 00319 // so-called median will actually be the dist*100'th percentile 00320 medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist); 00321 halfWin = medianWinSize/2; 00322 00323 while ((int)window.size() > medianWinSize) { 00324 window.pop_front(); 00325 } 00326 00327 int actualSize = window.size(); 00328 00329 if (type == MajorPitchAdaptivePeaks) { 00330 if (ymax + halfWin < values.size()) binmax = ymax + halfWin; 00331 else binmax = values.size()-1; 00332 } 00333 00334 std::deque<float> sorted(window); 00335 std::sort(sorted.begin(), sorted.end()); 00336 float median = sorted[int(sorted.size() * dist)]; 00337 00338 int centrebin = 0; 00339 if (bin > actualSize/2) centrebin = bin - actualSize/2; 00340 00341 while (centrebin > prevcentre || bin == binmin) { 00342 00343 if (centrebin > prevcentre) ++prevcentre; 00344 00345 float centre = values[prevcentre]; 00346 00347 if (centre > median) { 00348 inrange.push_back(centrebin); 00349 } 00350 00351 if (centre <= median || centrebin+1 == values.size()) { 00352 if (!inrange.empty()) { 00353 int peakbin = 0; 00354 float peakval = 0.f; 00355 for (int i = 0; i < (int)inrange.size(); ++i) { 00356 if (i == 0 || values[inrange[i]] > peakval) { 00357 peakval = values[inrange[i]]; 00358 peakbin = inrange[i]; 00359 } 00360 } 00361 inrange.clear(); 00362 if (peakbin >= ymin && peakbin <= ymax) { 00363 peaks.insert(peakbin); 00364 } 00365 } 00366 } 00367 00368 if (bin == binmin) break; 00369 } 00370 } 00371 00372 return peaks; 00373 } 00374 00375 int 00376 FFTModel::getPeakPickWindowSize(PeakPickType type, int sampleRate, 00377 int bin, float &percentile) const 00378 { 00379 percentile = 0.5; 00380 if (type == MajorPeaks) return 10; 00381 if (bin == 0) return 3; 00382 00383 int fftSize = m_server->getFFTSize() >> m_yshift; 00384 float binfreq = (float(sampleRate) * bin) / fftSize; 00385 float hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq); 00386 00387 int hibin = lrintf((hifreq * fftSize) / sampleRate); 00388 int medianWinSize = hibin - bin; 00389 if (medianWinSize < 3) medianWinSize = 3; 00390 00391 percentile = 0.5 + (binfreq / sampleRate); 00392 00393 return medianWinSize; 00394 } 00395 00396 FFTModel::PeakSet 00397 FFTModel::getPeakFrequencies(PeakPickType type, int x, 00398 int ymin, int ymax) 00399 { 00400 Profiler profiler("FFTModel::getPeakFrequencies"); 00401 00402 PeakSet peaks; 00403 if (!isOK()) return peaks; 00404 PeakLocationSet locations = getPeaks(type, x, ymin, ymax); 00405 00406 int sampleRate = getSampleRate(); 00407 int fftSize = m_server->getFFTSize() >> m_yshift; 00408 int incr = getResolution(); 00409 00410 // This duplicates some of the work of estimateStableFrequency to 00411 // allow us to retrieve the phases in two separate vertical 00412 // columns, instead of jumping back and forth between columns x and 00413 // x+1, which may be significantly slower if re-seeking is needed 00414 00415 std::vector<float> phases; 00416 for (PeakLocationSet::iterator i = locations.begin(); 00417 i != locations.end(); ++i) { 00418 phases.push_back(getPhaseAt(x, *i)); 00419 } 00420 00421 int phaseIndex = 0; 00422 for (PeakLocationSet::iterator i = locations.begin(); 00423 i != locations.end(); ++i) { 00424 float oldPhase = phases[phaseIndex]; 00425 float newPhase = getPhaseAt(x+1, *i); 00426 float expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / fftSize; 00427 float phaseError = princargf(newPhase - expectedPhase); 00428 float frequency = 00429 (sampleRate * (expectedPhase + phaseError - oldPhase)) 00430 / (2 * M_PI * incr); 00431 // bool stable = (fabsf(phaseError) < (1.1f * (incr * M_PI) / fftSize)); 00432 // if (stable) 00433 peaks[*i] = frequency; 00434 ++phaseIndex; 00435 } 00436 00437 return peaks; 00438 } 00439 00440 Model * 00441 FFTModel::clone() const 00442 { 00443 return new FFTModel(*this); 00444 } 00445 00446 FFTModel::FFTModel(const FFTModel &model) : 00447 DenseThreeDimensionalModel(), 00448 m_server(model.m_server), 00449 m_xshift(model.m_xshift), 00450 m_yshift(model.m_yshift) 00451 { 00452 FFTDataServer::claimInstance(m_server); 00453 } 00454