qm-dsp
1.8
|
00001 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ 00002 00003 /* 00004 QM DSP Library 00005 00006 Centre for Digital Music, Queen Mary, University of London. 00007 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL. 00008 00009 This program is free software; you can redistribute it and/or 00010 modify it under the terms of the GNU General Public License as 00011 published by the Free Software Foundation; either version 2 of the 00012 License, or (at your option) any later version. See the file 00013 COPYING included with this distribution for more information. 00014 */ 00015 00016 #include <cmath> 00017 #include <cstdlib> 00018 #include <cstring> 00019 00020 #include "MFCC.h" 00021 #include "dsp/transforms/FFT.h" 00022 #include "base/Window.h" 00023 00024 MFCC::MFCC(MFCCConfig config) 00025 { 00026 int i,j; 00027 00028 /* Calculate at startup */ 00029 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs; 00030 00031 lowestFrequency = 66.6666666; 00032 linearFilters = 13; 00033 linearSpacing = 66.66666666; 00034 logFilters = 27; 00035 logSpacing = 1.0711703; 00036 00037 /* FFT and analysis window sizes */ 00038 fftSize = config.fftsize; 00039 fft = new FFTReal(fftSize); 00040 00041 totalFilters = linearFilters + logFilters; 00042 logPower = config.logpower; 00043 00044 samplingRate = config.FS; 00045 00046 /* The number of cepstral componenents */ 00047 nceps = config.nceps; 00048 00049 /* Set if user want C0 */ 00050 WANT_C0 = (config.want_c0 ? 1 : 0); 00051 00052 /* Allocate space for feature vector */ 00053 if (WANT_C0 == 1) { 00054 ceps = (double*)calloc(nceps+1, sizeof(double)); 00055 } else { 00056 ceps = (double*)calloc(nceps, sizeof(double)); 00057 } 00058 00059 /* Allocate space for local vectors */ 00060 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*)); 00061 for (i = 0; i < nceps+1; i++) { 00062 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double)); 00063 } 00064 00065 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*)); 00066 for (i = 0; i < totalFilters; i++) { 00067 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double)); 00068 } 00069 00070 freqs = (double*)calloc(totalFilters+2,sizeof(double)); 00071 00072 lower = (double*)calloc(totalFilters,sizeof(double)); 00073 center = (double*)calloc(totalFilters,sizeof(double)); 00074 upper = (double*)calloc(totalFilters,sizeof(double)); 00075 00076 triangleHeight = (double*)calloc(totalFilters,sizeof(double)); 00077 fftFreqs = (double*)calloc(fftSize,sizeof(double)); 00078 00079 for (i = 0; i < linearFilters; i++) { 00080 freqs[i] = lowestFrequency + ((double)i) * linearSpacing; 00081 } 00082 00083 for (i = linearFilters; i < totalFilters+2; i++) { 00084 freqs[i] = freqs[linearFilters-1] * 00085 pow(logSpacing, (double)(i-linearFilters+1)); 00086 } 00087 00088 /* Define lower, center and upper */ 00089 memcpy(lower, freqs,totalFilters*sizeof(double)); 00090 memcpy(center, &freqs[1],totalFilters*sizeof(double)); 00091 memcpy(upper, &freqs[2],totalFilters*sizeof(double)); 00092 00093 for (i=0;i<totalFilters;i++){ 00094 triangleHeight[i] = 2./(upper[i]-lower[i]); 00095 } 00096 00097 for (i=0;i<fftSize;i++){ 00098 fftFreqs[i] = ((double) i / ((double) fftSize ) * 00099 (double) samplingRate); 00100 } 00101 00102 /* Build now the mccFilterWeight matrix */ 00103 for (i=0;i<totalFilters;i++){ 00104 00105 for (j=0;j<fftSize;j++) { 00106 00107 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) { 00108 00109 mfccFilterWeights[i][j] = triangleHeight[i] * 00110 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]); 00111 00112 } 00113 else 00114 { 00115 mfccFilterWeights[i][j] = 0.0; 00116 } 00117 00118 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) { 00119 00120 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] 00121 + triangleHeight[i] * (upper[i]-fftFreqs[j]) 00122 / (upper[i]-center[i]); 00123 } 00124 else 00125 { 00126 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0; 00127 } 00128 } 00129 00130 } 00131 00132 /* 00133 * We calculate now mfccDCT matrix 00134 * NB: +1 because of the DC component 00135 */ 00136 00137 const double pi = 3.14159265358979323846264338327950288; 00138 00139 for (i = 0; i < nceps+1; i++) { 00140 for (j = 0; j < totalFilters; j++) { 00141 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.)) 00142 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi); 00143 } 00144 } 00145 00146 for (j = 0; j < totalFilters; j++){ 00147 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j]; 00148 } 00149 00150 /* The analysis window */ 00151 window = new Window<double>(config.window, fftSize); 00152 00153 /* Allocate memory for the FFT */ 00154 realOut = (double*)calloc(fftSize, sizeof(double)); 00155 imagOut = (double*)calloc(fftSize, sizeof(double)); 00156 00157 earMag = (double*)calloc(totalFilters, sizeof(double)); 00158 fftMag = (double*)calloc(fftSize/2, sizeof(double)); 00159 00160 free(freqs); 00161 free(lower); 00162 free(center); 00163 free(upper); 00164 free(triangleHeight); 00165 free(fftFreqs); 00166 } 00167 00168 MFCC::~MFCC() 00169 { 00170 int i; 00171 00172 /* Free the structure */ 00173 for (i = 0; i < nceps+1; i++) { 00174 free(mfccDCTMatrix[i]); 00175 } 00176 free(mfccDCTMatrix); 00177 00178 for (i = 0; i < totalFilters; i++) { 00179 free(mfccFilterWeights[i]); 00180 } 00181 free(mfccFilterWeights); 00182 00183 /* Free the feature vector */ 00184 free(ceps); 00185 00186 /* The analysis window */ 00187 delete window; 00188 00189 free(earMag); 00190 free(fftMag); 00191 00192 /* Free the FFT */ 00193 free(realOut); 00194 free(imagOut); 00195 00196 delete fft; 00197 } 00198 00199 00200 /* 00201 * 00202 * Extract the MFCC on the input frame 00203 * 00204 */ 00205 int MFCC::process(const double *inframe, double *outceps) 00206 { 00207 double *inputData = (double *)malloc(fftSize * sizeof(double)); 00208 for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i]; 00209 00210 window->cut(inputData); 00211 00212 /* Calculate the fft on the input frame */ 00213 fft->forward(inputData, realOut, imagOut); 00214 00215 free(inputData); 00216 00217 return process(realOut, imagOut, outceps); 00218 } 00219 00220 int MFCC::process(const double *real, const double *imag, double *outceps) 00221 { 00222 int i, j; 00223 00224 for (i = 0; i < fftSize/2; ++i) { 00225 fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]); 00226 } 00227 00228 for (i = 0; i < totalFilters; ++i) { 00229 earMag[i] = 0.0; 00230 } 00231 00232 /* Multiply by mfccFilterWeights */ 00233 for (i = 0; i < totalFilters; i++) { 00234 double tmp = 0.0; 00235 for (j = 0; j < fftSize/2; j++) { 00236 tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]); 00237 } 00238 if (tmp > 0) earMag[i] = log10(tmp); 00239 else earMag[i] = 0.0; 00240 00241 if (logPower != 1.0) { 00242 earMag[i] = pow(earMag[i], logPower); 00243 } 00244 } 00245 00246 /* 00247 * 00248 * Calculate now the cepstral coefficients 00249 * with or without the DC component 00250 * 00251 */ 00252 00253 if (WANT_C0 == 1) { 00254 00255 for (i = 0; i < nceps+1; i++) { 00256 double tmp = 0.; 00257 for (j = 0; j < totalFilters; j++){ 00258 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j]; 00259 } 00260 outceps[i] = tmp; 00261 } 00262 } 00263 else 00264 { 00265 for (i = 1; i < nceps+1; i++) { 00266 double tmp = 0.; 00267 for (j = 0; j < totalFilters; j++){ 00268 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j]; 00269 } 00270 outceps[i-1] = tmp; 00271 } 00272 } 00273 00274 return nceps; 00275 } 00276