qm-dsp  1.8
MFCC.cpp
Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
00002 
00003 /*
00004     QM DSP Library
00005 
00006     Centre for Digital Music, Queen Mary, University of London.
00007     This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
00008 
00009     This program is free software; you can redistribute it and/or
00010     modify it under the terms of the GNU General Public License as
00011     published by the Free Software Foundation; either version 2 of the
00012     License, or (at your option) any later version.  See the file
00013     COPYING included with this distribution for more information.
00014 */
00015 
00016 #include <cmath>
00017 #include <cstdlib>
00018 #include <cstring>
00019 
00020 #include "MFCC.h"
00021 #include "dsp/transforms/FFT.h"
00022 #include "base/Window.h"
00023 
00024 MFCC::MFCC(MFCCConfig config)
00025 {
00026     int i,j;
00027 
00028     /* Calculate at startup */
00029     double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
00030   
00031     lowestFrequency   = 66.6666666;
00032     linearFilters     = 13;
00033     linearSpacing     = 66.66666666;
00034     logFilters        = 27;
00035     logSpacing        = 1.0711703;
00036   
00037     /* FFT and analysis window sizes */
00038     fftSize           = config.fftsize;
00039     fft               = new FFTReal(fftSize);
00040 
00041     totalFilters      = linearFilters + logFilters;
00042     logPower          = config.logpower;
00043   
00044     samplingRate      = config.FS;
00045   
00046     /* The number of cepstral componenents */
00047     nceps             = config.nceps;
00048 
00049     /* Set if user want C0 */
00050     WANT_C0           = (config.want_c0 ? 1 : 0);
00051   
00052     /* Allocate space for feature vector */
00053     if (WANT_C0 == 1) {
00054         ceps              = (double*)calloc(nceps+1, sizeof(double));
00055     } else {
00056         ceps              = (double*)calloc(nceps, sizeof(double));
00057     }
00058  
00059     /* Allocate space for local vectors */
00060     mfccDCTMatrix     = (double**)calloc(nceps+1, sizeof(double*));
00061     for (i = 0; i < nceps+1; i++) {
00062         mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double)); 
00063     }
00064 
00065     mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
00066     for (i = 0; i < totalFilters; i++) {
00067         mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double)); 
00068     }
00069     
00070     freqs  = (double*)calloc(totalFilters+2,sizeof(double));
00071     
00072     lower  = (double*)calloc(totalFilters,sizeof(double));
00073     center = (double*)calloc(totalFilters,sizeof(double));
00074     upper  = (double*)calloc(totalFilters,sizeof(double));
00075     
00076     triangleHeight = (double*)calloc(totalFilters,sizeof(double));
00077     fftFreqs       = (double*)calloc(fftSize,sizeof(double));
00078   
00079     for (i = 0; i < linearFilters; i++) {
00080         freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
00081     }
00082   
00083     for (i = linearFilters; i < totalFilters+2; i++) {
00084         freqs[i] = freqs[linearFilters-1] * 
00085             pow(logSpacing, (double)(i-linearFilters+1));
00086     }
00087   
00088     /* Define lower, center and upper */
00089     memcpy(lower,  freqs,totalFilters*sizeof(double));
00090     memcpy(center, &freqs[1],totalFilters*sizeof(double));
00091     memcpy(upper,  &freqs[2],totalFilters*sizeof(double));
00092     
00093     for (i=0;i<totalFilters;i++){
00094         triangleHeight[i] = 2./(upper[i]-lower[i]);
00095     }
00096   
00097     for (i=0;i<fftSize;i++){
00098         fftFreqs[i] = ((double) i / ((double) fftSize ) * 
00099                        (double) samplingRate);
00100     }
00101 
00102     /* Build now the mccFilterWeight matrix */
00103     for (i=0;i<totalFilters;i++){
00104 
00105         for (j=0;j<fftSize;j++) {
00106       
00107             if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
00108           
00109                 mfccFilterWeights[i][j] = triangleHeight[i] * 
00110                     (fftFreqs[j]-lower[i]) / (center[i]-lower[i]); 
00111           
00112             }
00113             else
00114             {
00115                 mfccFilterWeights[i][j] = 0.0;
00116             }
00117 
00118             if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
00119 
00120                 mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
00121                     + triangleHeight[i] * (upper[i]-fftFreqs[j]) 
00122                     / (upper[i]-center[i]);
00123             }
00124             else
00125             {
00126                 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
00127             }
00128         }
00129 
00130     }
00131 
00132     /*
00133      * We calculate now mfccDCT matrix 
00134      * NB: +1 because of the DC component
00135      */
00136 
00137     const double pi = 3.14159265358979323846264338327950288;
00138   
00139     for (i = 0; i < nceps+1; i++) {
00140         for (j = 0; j < totalFilters; j++) {
00141             mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))  
00142                 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
00143         }
00144     }
00145 
00146     for (j = 0; j < totalFilters; j++){
00147         mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
00148     }
00149    
00150     /* The analysis window */
00151     window      = new Window<double>(config.window, fftSize);
00152 
00153     /* Allocate memory for the FFT */
00154     realOut     = (double*)calloc(fftSize, sizeof(double));
00155     imagOut     = (double*)calloc(fftSize, sizeof(double));
00156 
00157     earMag      = (double*)calloc(totalFilters, sizeof(double));
00158     fftMag      = (double*)calloc(fftSize/2, sizeof(double));
00159   
00160     free(freqs);
00161     free(lower);
00162     free(center);
00163     free(upper);
00164     free(triangleHeight);
00165     free(fftFreqs);
00166 }
00167 
00168 MFCC::~MFCC()
00169 {
00170     int i;
00171   
00172     /* Free the structure */
00173     for (i = 0; i < nceps+1; i++) {
00174         free(mfccDCTMatrix[i]);
00175     }
00176     free(mfccDCTMatrix);
00177     
00178     for (i = 0; i < totalFilters; i++) {
00179         free(mfccFilterWeights[i]);
00180     }
00181     free(mfccFilterWeights);
00182     
00183     /* Free the feature vector */
00184     free(ceps);
00185     
00186     /* The analysis window */
00187     delete window;
00188 
00189     free(earMag);
00190     free(fftMag);
00191     
00192     /* Free the FFT */
00193     free(realOut);
00194     free(imagOut);
00195 
00196     delete fft;
00197 }
00198 
00199 
00200 /*
00201  * 
00202  * Extract the MFCC on the input frame 
00203  * 
00204  */ 
00205 int MFCC::process(const double *inframe, double *outceps)
00206 {
00207     double *inputData = (double *)malloc(fftSize * sizeof(double));
00208     for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
00209 
00210     window->cut(inputData);
00211   
00212     /* Calculate the fft on the input frame */
00213     fft->forward(inputData, realOut, imagOut);
00214 
00215     free(inputData);
00216 
00217     return process(realOut, imagOut, outceps);
00218 }
00219 
00220 int MFCC::process(const double *real, const double *imag, double *outceps)
00221 {
00222     int i, j;
00223 
00224     for (i = 0; i < fftSize/2; ++i) {
00225         fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
00226     }
00227 
00228     for (i = 0; i < totalFilters; ++i) {
00229         earMag[i] = 0.0;
00230     }
00231 
00232     /* Multiply by mfccFilterWeights */
00233     for (i = 0; i < totalFilters; i++) {
00234         double tmp = 0.0;
00235         for (j = 0; j < fftSize/2; j++) {
00236             tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
00237         }
00238         if (tmp > 0) earMag[i] = log10(tmp);
00239         else earMag[i] = 0.0;
00240 
00241         if (logPower != 1.0) {
00242             earMag[i] = pow(earMag[i], logPower);
00243         }
00244     }
00245 
00246     /*
00247      * 
00248      * Calculate now the cepstral coefficients 
00249      * with or without the DC component
00250      *
00251      */
00252   
00253     if (WANT_C0 == 1) {
00254      
00255         for (i = 0; i < nceps+1; i++) {
00256             double tmp = 0.;
00257             for (j = 0; j < totalFilters; j++){
00258                 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
00259             }
00260             outceps[i] = tmp;
00261         }
00262     }
00263     else 
00264     {  
00265         for (i = 1; i < nceps+1; i++) {
00266             double tmp = 0.;
00267             for (j = 0; j < totalFilters; j++){
00268                 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
00269             }
00270             outceps[i-1] = tmp;
00271         }
00272     }
00273     
00274     return nceps;
00275 }
00276