qm-dsp  1.8
ClusterMeltSegmenter.cpp
Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
00002 
00003 /*
00004  * ClusterMeltSegmenter.cpp
00005  *
00006  * Created by Mark Levy on 23/03/2006.
00007  * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
00008 
00009     This program is free software; you can redistribute it and/or
00010     modify it under the terms of the GNU General Public License as
00011     published by the Free Software Foundation; either version 2 of the
00012     License, or (at your option) any later version.  See the file
00013     COPYING included with this distribution for more information.
00014  */
00015 
00016 #include <cfloat>
00017 #include <cmath>
00018 
00019 #include "ClusterMeltSegmenter.h"
00020 #include "cluster_segmenter.h"
00021 #include "segment.h"
00022 
00023 #include "dsp/transforms/FFT.h"
00024 #include "dsp/chromagram/ConstantQ.h"
00025 #include "dsp/rateconversion/Decimator.h"
00026 #include "dsp/mfcc/MFCC.h"
00027 
00028 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
00029     window(NULL),
00030     fft(NULL),
00031     constq(NULL),
00032     mfcc(NULL),
00033     featureType(params.featureType),
00034     hopSize(params.hopSize),
00035     windowSize(params.windowSize),
00036     fmin(params.fmin),
00037     fmax(params.fmax),
00038     nbins(params.nbins),
00039     ncomponents(params.ncomponents),    // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
00040     nHMMStates(params.nHMMStates),
00041     nclusters(params.nclusters),
00042     histogramLength(params.histogramLength),
00043     neighbourhoodLimit(params.neighbourhoodLimit),
00044     decimator(NULL)
00045 {
00046 }
00047 
00048 void ClusterMeltSegmenter::initialise(int fs)
00049 {
00050     samplerate = fs;
00051 
00052     if (featureType == FEATURE_TYPE_CONSTQ ||
00053         featureType == FEATURE_TYPE_CHROMA) {
00054         
00055         // run internal processing at 11025 or thereabouts
00056         int internalRate = 11025;
00057         int decimationFactor = samplerate / internalRate;
00058         if (decimationFactor < 1) decimationFactor = 1;
00059 
00060         // must be a power of two
00061         while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
00062 
00063         if (decimationFactor > Decimator::getHighestSupportedFactor()) {
00064             decimationFactor = Decimator::getHighestSupportedFactor();
00065         }
00066 
00067         if (decimationFactor > 1) {
00068             decimator = new Decimator(getWindowsize(), decimationFactor);
00069         }
00070 
00071         CQConfig config;
00072         config.FS = samplerate / decimationFactor;
00073         config.min = fmin;
00074         config.max = fmax;
00075         config.BPO = nbins;
00076         config.CQThresh = 0.0054;
00077 
00078         constq = new ConstantQ(config);
00079         constq->sparsekernel();
00080         
00081         ncoeff = constq->getK();
00082 
00083         fft = new FFTReal(constq->getfftlength());
00084         
00085     } else if (featureType == FEATURE_TYPE_MFCC) {
00086 
00087         // run internal processing at 22050 or thereabouts
00088         int internalRate = 22050;
00089         int decimationFactor = samplerate / internalRate;
00090         if (decimationFactor < 1) decimationFactor = 1;
00091 
00092         // must be a power of two
00093         while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
00094 
00095         if (decimationFactor > Decimator::getHighestSupportedFactor()) {
00096             decimationFactor = Decimator::getHighestSupportedFactor();
00097         }
00098 
00099         if (decimationFactor > 1) {
00100             decimator = new Decimator(getWindowsize(), decimationFactor);
00101         }
00102 
00103         MFCCConfig config(samplerate / decimationFactor);
00104         config.fftsize = 2048;
00105         config.nceps = 19;
00106         config.want_c0 = true;
00107 
00108         mfcc = new MFCC(config);
00109         ncoeff = config.nceps + 1;
00110     }
00111 }
00112 
00113 ClusterMeltSegmenter::~ClusterMeltSegmenter() 
00114 {
00115     delete window;
00116     delete constq;
00117     delete decimator;
00118     delete fft;
00119 }
00120 
00121 int
00122 ClusterMeltSegmenter::getWindowsize()
00123 {
00124     return static_cast<int>(windowSize * samplerate + 0.001);
00125 }
00126 
00127 int
00128 ClusterMeltSegmenter::getHopsize()
00129 {
00130     return static_cast<int>(hopSize * samplerate + 0.001);
00131 }
00132 
00133 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
00134 {
00135     if (featureType == FEATURE_TYPE_CONSTQ ||
00136         featureType == FEATURE_TYPE_CHROMA) {
00137         extractFeaturesConstQ(samples, nsamples);
00138     } else if (featureType == FEATURE_TYPE_MFCC) {
00139         extractFeaturesMFCC(samples, nsamples);
00140     }
00141 }
00142 
00143 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
00144 {
00145     if (!constq) {
00146         std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
00147                   << "No const-q: initialise not called?"
00148                   << std::endl;
00149         return;
00150     }
00151 
00152     if (nsamples < getWindowsize()) {
00153         std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
00154         return;
00155     }
00156 
00157     int fftsize = constq->getfftlength();
00158 
00159     if (!window || window->getSize() != fftsize) {
00160         delete window;
00161         window = new Window<double>(HammingWindow, fftsize);
00162     }
00163 
00164     vector<double> cq(ncoeff);
00165 
00166     for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
00167     
00168     const double *psource = samples;
00169     int pcount = nsamples;
00170 
00171     if (decimator) {
00172         pcount = nsamples / decimator->getFactor();
00173         double *decout = new double[pcount];
00174         decimator->process(samples, decout);
00175         psource = decout;
00176     }
00177     
00178     int origin = 0;
00179     
00180 //    std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
00181 
00182     int frames = 0;
00183 
00184     double *frame = new double[fftsize];
00185     double *real = new double[fftsize];
00186     double *imag = new double[fftsize];
00187     double *cqre = new double[ncoeff];
00188     double *cqim = new double[ncoeff];
00189 
00190     while (origin <= pcount) {
00191 
00192         // always need at least one fft window per block, but after
00193         // that we want to avoid having any incomplete ones
00194         if (origin > 0 && origin + fftsize >= pcount) break;
00195 
00196         for (int i = 0; i < fftsize; ++i) {
00197             if (origin + i < pcount) {
00198                 frame[i] = psource[origin + i];
00199             } else {
00200                 frame[i] = 0.0;
00201             }
00202         }
00203 
00204         for (int i = 0; i < fftsize/2; ++i) {
00205             double value = frame[i];
00206             frame[i] = frame[i + fftsize/2];
00207             frame[i + fftsize/2] = value;
00208         }
00209 
00210         window->cut(frame);
00211         
00212         fft->forward(frame, real, imag);
00213         
00214         constq->process(real, imag, cqre, cqim);
00215         
00216         for (int i = 0; i < ncoeff; ++i) {
00217             cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
00218         }
00219         ++frames;
00220 
00221         origin += fftsize/2;
00222     }
00223 
00224     delete [] cqre;
00225     delete [] cqim;
00226     delete [] real;
00227     delete [] imag;
00228     delete [] frame;
00229 
00230     for (int i = 0; i < ncoeff; ++i) {
00231         cq[i] /= frames;
00232     }
00233 
00234     if (decimator) delete[] psource;
00235 
00236     features.push_back(cq);
00237 }
00238 
00239 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
00240 {
00241     if (!mfcc) {
00242         std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
00243                   << "No mfcc: initialise not called?"
00244                   << std::endl;
00245         return;
00246     }
00247 
00248     if (nsamples < getWindowsize()) {
00249         std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
00250         return;
00251     }
00252 
00253     int fftsize = mfcc->getfftlength();
00254 
00255     vector<double> cc(ncoeff);
00256 
00257     for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
00258     
00259     const double *psource = samples;
00260     int pcount = nsamples;
00261 
00262     if (decimator) {
00263         pcount = nsamples / decimator->getFactor();
00264         double *decout = new double[pcount];
00265         decimator->process(samples, decout);
00266         psource = decout;
00267     }
00268 
00269     int origin = 0;
00270     int frames = 0;
00271 
00272     double *frame = new double[fftsize];
00273     double *ccout = new double[ncoeff];
00274 
00275     while (origin <= pcount) {
00276 
00277         // always need at least one fft window per block, but after
00278         // that we want to avoid having any incomplete ones
00279         if (origin > 0 && origin + fftsize >= pcount) break;
00280 
00281         for (int i = 0; i < fftsize; ++i) {
00282             if (origin + i < pcount) {
00283                 frame[i] = psource[origin + i];
00284             } else {
00285                 frame[i] = 0.0;
00286             }
00287         }
00288 
00289         mfcc->process(frame, ccout);
00290         
00291         for (int i = 0; i < ncoeff; ++i) {
00292             cc[i] += ccout[i];
00293         }
00294         ++frames;
00295 
00296         origin += fftsize/2;
00297     }
00298 
00299     delete [] ccout;
00300     delete [] frame;
00301 
00302     for (int i = 0; i < ncoeff; ++i) {
00303         cc[i] /= frames;
00304     }
00305 
00306     if (decimator) delete[] psource;
00307 
00308     features.push_back(cc);
00309 }
00310 
00311 void ClusterMeltSegmenter::segment(int m)
00312 {
00313     nclusters = m;
00314     segment();
00315 }
00316 
00317 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
00318 {
00319     features = f;
00320     featureType = FEATURE_TYPE_UNKNOWN;
00321 }
00322 
00323 void ClusterMeltSegmenter::segment()
00324 {
00325     delete constq;
00326     constq = 0;
00327     delete mfcc;
00328     mfcc = 0;
00329     delete decimator;
00330     decimator = 0;
00331 
00332     if (features.size() < histogramLength) return;
00333 /*    
00334     std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
00335               << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
00336 */
00337     // copy the features to a native array and use the existing C segmenter...
00338     double** arrFeatures = new double*[features.size()];        
00339     for (int i = 0; i < features.size(); i++)
00340     {
00341         if (featureType == FEATURE_TYPE_UNKNOWN) {
00342             arrFeatures[i] = new double[features[0].size()];
00343             for (int j = 0; j < features[0].size(); j++)
00344                 arrFeatures[i][j] = features[i][j];     
00345         } else {
00346             arrFeatures[i] = new double[ncoeff+1];      // allow space for the normalised envelope
00347             for (int j = 0; j < ncoeff; j++)
00348                 arrFeatures[i][j] = features[i][j];     
00349         }
00350     }
00351         
00352     q = new int[features.size()];
00353         
00354     if (featureType == FEATURE_TYPE_UNKNOWN ||
00355         featureType == FEATURE_TYPE_MFCC)
00356         cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, 
00357                         nclusters, neighbourhoodLimit);
00358     else
00359         constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, 
00360                        nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
00361         
00362     // convert the cluster assignment sequence to a segmentation
00363     makeSegmentation(q, features.size());               
00364         
00365     // de-allocate arrays
00366     delete [] q;
00367     for (int i = 0; i < features.size(); i++)
00368         delete [] arrFeatures[i];
00369     delete [] arrFeatures;
00370         
00371     // clear the features
00372     clear();
00373 }
00374 
00375 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
00376 {
00377     segmentation.segments.clear();
00378     segmentation.nsegtypes = nclusters;
00379     segmentation.samplerate = samplerate;
00380         
00381     Segment segment;
00382     segment.start = 0;
00383     segment.type = q[0];
00384         
00385     for (int i = 1; i < len; i++)
00386     {
00387         if (q[i] != q[i-1])
00388         {
00389             segment.end = i * getHopsize();
00390             segmentation.segments.push_back(segment);
00391             segment.type = q[i];
00392             segment.start = segment.end;
00393         }
00394     }
00395     segment.end = len * getHopsize();
00396     segmentation.segments.push_back(segment);
00397 }
00398