qm-dsp
1.8
|
00001 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ 00002 00003 /* 00004 * ClusterMeltSegmenter.cpp 00005 * 00006 * Created by Mark Levy on 23/03/2006. 00007 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. 00008 00009 This program is free software; you can redistribute it and/or 00010 modify it under the terms of the GNU General Public License as 00011 published by the Free Software Foundation; either version 2 of the 00012 License, or (at your option) any later version. See the file 00013 COPYING included with this distribution for more information. 00014 */ 00015 00016 #include <cfloat> 00017 #include <cmath> 00018 00019 #include "ClusterMeltSegmenter.h" 00020 #include "cluster_segmenter.h" 00021 #include "segment.h" 00022 00023 #include "dsp/transforms/FFT.h" 00024 #include "dsp/chromagram/ConstantQ.h" 00025 #include "dsp/rateconversion/Decimator.h" 00026 #include "dsp/mfcc/MFCC.h" 00027 00028 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : 00029 window(NULL), 00030 fft(NULL), 00031 constq(NULL), 00032 mfcc(NULL), 00033 featureType(params.featureType), 00034 hopSize(params.hopSize), 00035 windowSize(params.windowSize), 00036 fmin(params.fmin), 00037 fmax(params.fmax), 00038 nbins(params.nbins), 00039 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c 00040 nHMMStates(params.nHMMStates), 00041 nclusters(params.nclusters), 00042 histogramLength(params.histogramLength), 00043 neighbourhoodLimit(params.neighbourhoodLimit), 00044 decimator(NULL) 00045 { 00046 } 00047 00048 void ClusterMeltSegmenter::initialise(int fs) 00049 { 00050 samplerate = fs; 00051 00052 if (featureType == FEATURE_TYPE_CONSTQ || 00053 featureType == FEATURE_TYPE_CHROMA) { 00054 00055 // run internal processing at 11025 or thereabouts 00056 int internalRate = 11025; 00057 int decimationFactor = samplerate / internalRate; 00058 if (decimationFactor < 1) decimationFactor = 1; 00059 00060 // must be a power of two 00061 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor; 00062 00063 if (decimationFactor > Decimator::getHighestSupportedFactor()) { 00064 decimationFactor = Decimator::getHighestSupportedFactor(); 00065 } 00066 00067 if (decimationFactor > 1) { 00068 decimator = new Decimator(getWindowsize(), decimationFactor); 00069 } 00070 00071 CQConfig config; 00072 config.FS = samplerate / decimationFactor; 00073 config.min = fmin; 00074 config.max = fmax; 00075 config.BPO = nbins; 00076 config.CQThresh = 0.0054; 00077 00078 constq = new ConstantQ(config); 00079 constq->sparsekernel(); 00080 00081 ncoeff = constq->getK(); 00082 00083 fft = new FFTReal(constq->getfftlength()); 00084 00085 } else if (featureType == FEATURE_TYPE_MFCC) { 00086 00087 // run internal processing at 22050 or thereabouts 00088 int internalRate = 22050; 00089 int decimationFactor = samplerate / internalRate; 00090 if (decimationFactor < 1) decimationFactor = 1; 00091 00092 // must be a power of two 00093 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor; 00094 00095 if (decimationFactor > Decimator::getHighestSupportedFactor()) { 00096 decimationFactor = Decimator::getHighestSupportedFactor(); 00097 } 00098 00099 if (decimationFactor > 1) { 00100 decimator = new Decimator(getWindowsize(), decimationFactor); 00101 } 00102 00103 MFCCConfig config(samplerate / decimationFactor); 00104 config.fftsize = 2048; 00105 config.nceps = 19; 00106 config.want_c0 = true; 00107 00108 mfcc = new MFCC(config); 00109 ncoeff = config.nceps + 1; 00110 } 00111 } 00112 00113 ClusterMeltSegmenter::~ClusterMeltSegmenter() 00114 { 00115 delete window; 00116 delete constq; 00117 delete decimator; 00118 delete fft; 00119 } 00120 00121 int 00122 ClusterMeltSegmenter::getWindowsize() 00123 { 00124 return static_cast<int>(windowSize * samplerate + 0.001); 00125 } 00126 00127 int 00128 ClusterMeltSegmenter::getHopsize() 00129 { 00130 return static_cast<int>(hopSize * samplerate + 0.001); 00131 } 00132 00133 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples) 00134 { 00135 if (featureType == FEATURE_TYPE_CONSTQ || 00136 featureType == FEATURE_TYPE_CHROMA) { 00137 extractFeaturesConstQ(samples, nsamples); 00138 } else if (featureType == FEATURE_TYPE_MFCC) { 00139 extractFeaturesMFCC(samples, nsamples); 00140 } 00141 } 00142 00143 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples) 00144 { 00145 if (!constq) { 00146 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: " 00147 << "No const-q: initialise not called?" 00148 << std::endl; 00149 return; 00150 } 00151 00152 if (nsamples < getWindowsize()) { 00153 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl; 00154 return; 00155 } 00156 00157 int fftsize = constq->getfftlength(); 00158 00159 if (!window || window->getSize() != fftsize) { 00160 delete window; 00161 window = new Window<double>(HammingWindow, fftsize); 00162 } 00163 00164 vector<double> cq(ncoeff); 00165 00166 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0; 00167 00168 const double *psource = samples; 00169 int pcount = nsamples; 00170 00171 if (decimator) { 00172 pcount = nsamples / decimator->getFactor(); 00173 double *decout = new double[pcount]; 00174 decimator->process(samples, decout); 00175 psource = decout; 00176 } 00177 00178 int origin = 0; 00179 00180 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl; 00181 00182 int frames = 0; 00183 00184 double *frame = new double[fftsize]; 00185 double *real = new double[fftsize]; 00186 double *imag = new double[fftsize]; 00187 double *cqre = new double[ncoeff]; 00188 double *cqim = new double[ncoeff]; 00189 00190 while (origin <= pcount) { 00191 00192 // always need at least one fft window per block, but after 00193 // that we want to avoid having any incomplete ones 00194 if (origin > 0 && origin + fftsize >= pcount) break; 00195 00196 for (int i = 0; i < fftsize; ++i) { 00197 if (origin + i < pcount) { 00198 frame[i] = psource[origin + i]; 00199 } else { 00200 frame[i] = 0.0; 00201 } 00202 } 00203 00204 for (int i = 0; i < fftsize/2; ++i) { 00205 double value = frame[i]; 00206 frame[i] = frame[i + fftsize/2]; 00207 frame[i + fftsize/2] = value; 00208 } 00209 00210 window->cut(frame); 00211 00212 fft->forward(frame, real, imag); 00213 00214 constq->process(real, imag, cqre, cqim); 00215 00216 for (int i = 0; i < ncoeff; ++i) { 00217 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]); 00218 } 00219 ++frames; 00220 00221 origin += fftsize/2; 00222 } 00223 00224 delete [] cqre; 00225 delete [] cqim; 00226 delete [] real; 00227 delete [] imag; 00228 delete [] frame; 00229 00230 for (int i = 0; i < ncoeff; ++i) { 00231 cq[i] /= frames; 00232 } 00233 00234 if (decimator) delete[] psource; 00235 00236 features.push_back(cq); 00237 } 00238 00239 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples) 00240 { 00241 if (!mfcc) { 00242 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: " 00243 << "No mfcc: initialise not called?" 00244 << std::endl; 00245 return; 00246 } 00247 00248 if (nsamples < getWindowsize()) { 00249 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl; 00250 return; 00251 } 00252 00253 int fftsize = mfcc->getfftlength(); 00254 00255 vector<double> cc(ncoeff); 00256 00257 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0; 00258 00259 const double *psource = samples; 00260 int pcount = nsamples; 00261 00262 if (decimator) { 00263 pcount = nsamples / decimator->getFactor(); 00264 double *decout = new double[pcount]; 00265 decimator->process(samples, decout); 00266 psource = decout; 00267 } 00268 00269 int origin = 0; 00270 int frames = 0; 00271 00272 double *frame = new double[fftsize]; 00273 double *ccout = new double[ncoeff]; 00274 00275 while (origin <= pcount) { 00276 00277 // always need at least one fft window per block, but after 00278 // that we want to avoid having any incomplete ones 00279 if (origin > 0 && origin + fftsize >= pcount) break; 00280 00281 for (int i = 0; i < fftsize; ++i) { 00282 if (origin + i < pcount) { 00283 frame[i] = psource[origin + i]; 00284 } else { 00285 frame[i] = 0.0; 00286 } 00287 } 00288 00289 mfcc->process(frame, ccout); 00290 00291 for (int i = 0; i < ncoeff; ++i) { 00292 cc[i] += ccout[i]; 00293 } 00294 ++frames; 00295 00296 origin += fftsize/2; 00297 } 00298 00299 delete [] ccout; 00300 delete [] frame; 00301 00302 for (int i = 0; i < ncoeff; ++i) { 00303 cc[i] /= frames; 00304 } 00305 00306 if (decimator) delete[] psource; 00307 00308 features.push_back(cc); 00309 } 00310 00311 void ClusterMeltSegmenter::segment(int m) 00312 { 00313 nclusters = m; 00314 segment(); 00315 } 00316 00317 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f) 00318 { 00319 features = f; 00320 featureType = FEATURE_TYPE_UNKNOWN; 00321 } 00322 00323 void ClusterMeltSegmenter::segment() 00324 { 00325 delete constq; 00326 constq = 0; 00327 delete mfcc; 00328 mfcc = 0; 00329 delete decimator; 00330 decimator = 0; 00331 00332 if (features.size() < histogramLength) return; 00333 /* 00334 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size() 00335 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl; 00336 */ 00337 // copy the features to a native array and use the existing C segmenter... 00338 double** arrFeatures = new double*[features.size()]; 00339 for (int i = 0; i < features.size(); i++) 00340 { 00341 if (featureType == FEATURE_TYPE_UNKNOWN) { 00342 arrFeatures[i] = new double[features[0].size()]; 00343 for (int j = 0; j < features[0].size(); j++) 00344 arrFeatures[i][j] = features[i][j]; 00345 } else { 00346 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope 00347 for (int j = 0; j < ncoeff; j++) 00348 arrFeatures[i][j] = features[i][j]; 00349 } 00350 } 00351 00352 q = new int[features.size()]; 00353 00354 if (featureType == FEATURE_TYPE_UNKNOWN || 00355 featureType == FEATURE_TYPE_MFCC) 00356 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, 00357 nclusters, neighbourhoodLimit); 00358 else 00359 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, 00360 nHMMStates, histogramLength, nclusters, neighbourhoodLimit); 00361 00362 // convert the cluster assignment sequence to a segmentation 00363 makeSegmentation(q, features.size()); 00364 00365 // de-allocate arrays 00366 delete [] q; 00367 for (int i = 0; i < features.size(); i++) 00368 delete [] arrFeatures[i]; 00369 delete [] arrFeatures; 00370 00371 // clear the features 00372 clear(); 00373 } 00374 00375 void ClusterMeltSegmenter::makeSegmentation(int* q, int len) 00376 { 00377 segmentation.segments.clear(); 00378 segmentation.nsegtypes = nclusters; 00379 segmentation.samplerate = samplerate; 00380 00381 Segment segment; 00382 segment.start = 0; 00383 segment.type = q[0]; 00384 00385 for (int i = 1; i < len; i++) 00386 { 00387 if (q[i] != q[i-1]) 00388 { 00389 segment.end = i * getHopsize(); 00390 segmentation.segments.push_back(segment); 00391 segment.type = q[i]; 00392 segment.start = segment.end; 00393 } 00394 } 00395 segment.end = len * getHopsize(); 00396 segmentation.segments.push_back(segment); 00397 } 00398