Marsyas
0.6.0-alpha
|
00001 /* 00002 ** Copyright (C) 1998-2011 George Tzanetakis <gtzan@cs.uvic.ca> 00003 ** 00004 ** This program is free software; you can redistribute it and/or modify 00005 ** it under the terms of the GNU General Public License as published by 00006 ** the Free Software Foundation; either version 2 of the License, or 00007 ** (at your option) any later version. 00008 ** 00009 ** This program is distributed in the hope that it will be useful, 00010 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 ** GNU General Public License for more details. 00013 ** 00014 ** You should have received a copy of the GNU General Public License 00015 ** along with this program; if not, write to the Free Software 00016 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00017 */ 00018 00019 #include "CARFAC.h" 00020 #include <cstddef> 00021 00022 using std::cout; 00023 using std::endl; 00024 using std::ostream; 00025 using std::size_t; 00026 00027 namespace Marsyas 00028 { 00029 00030 filter_state_class::filter_state_class() 00031 { 00032 init = false; 00033 } 00034 00035 filter_state_class::~filter_state_class() 00036 { 00037 } 00038 00039 std::vector<double> filter_state_class::FilterStep(CF_class &CF, double input_waves, std::vector<double> &filterstep_detect) 00040 { 00041 if (!init) { 00042 filterstep_inputs.resize(CF.n_ch); 00043 filterstep_zA.resize(CF.n_ch); 00044 filterstep_zB.resize(CF.n_ch); 00045 filterstep_zY.resize(CF.n_ch); 00046 filterstep_z1.resize(CF.n_ch); 00047 filterstep_z2.resize(CF.n_ch); 00048 } 00049 00050 // Use each stage previous Y as input to next. 00051 filterstep_inputs[0] = input_waves; 00052 00053 for (unsigned int i=0; i < zY_memory.size()-1; i++) { 00054 filterstep_inputs[i+1] = zY_memory[i]; 00055 } 00056 00057 // AGC interpolation. 00058 for (int i=0; i < CF.n_ch; i++) { 00059 zB_memory[i] = zB_memory[i] + dzB_memory[i]; 00060 double filterstep_r = CF.filter_coeffs.r_coeffs[i] - CF.filter_coeffs.c_coeffs[i] * (zA_memory[i] + zB_memory[i]); 00061 00062 // Now reduce filter_state by r and rotate with the fixed cos/sin 00063 // coeffs. 00064 double z1_mem = z1_memory[i]; 00065 z1_memory[i] = ((filterstep_r * 00066 (CF.filter_coeffs.a_coeffs[i] * z1_memory[i] - 00067 CF.filter_coeffs.c_coeffs[i] * z2_memory[i])) 00068 + filterstep_inputs[i]); 00069 z2_memory[i] = filterstep_r * (CF.filter_coeffs.c_coeffs[i] * z1_mem + 00070 CF.filter_coeffs.a_coeffs[i] * z2_memory[i]); 00071 } 00072 00073 // Update the "velocity" for cubic nonlinearity, into zA. 00074 for (int i=0; i<CF.n_ch; i++) { 00075 zA_memory[i] = pow(((z2_memory[i] - z2_memory[i]) * CF.filter_coeffs.velocity_scale), 2); 00076 } 00077 00078 for (int i=0; i<CF.n_ch; i++) { 00079 // Simulate Sigmoidal OHC effect on damping. 00080 zA_memory[i] = (1 - pow((1 - zA_memory[i]), 4)) / 4; // soft max at 0.25 00081 // Get outputs from inputs and new z2 values. 00082 zY_memory[i] = CF.filter_coeffs.g_coeffs[i] * (filterstep_inputs[i] + CF.filter_coeffs.h_coeffs[i] * z2_memory[i]); 00083 } 00084 00085 // TODO(dicklyon): Generalize to a detection nonlinearity. 00086 double maxval = 0.0; 00087 for (int i=0; i<CF.n_ch; i++) { 00088 filterstep_detect[i] = zY_memory[i] > maxval ? zY_memory[i] : maxval; 00089 } 00090 00091 for (int i=0; i<CF.n_ch; i++) { 00092 double output = zY_memory[i]; 00093 double detect = output > 0.0 ? output : 0.0; 00094 filterstep_detect[i] = detect; 00095 detect_accum[i] += detect; 00096 } 00097 00098 return filterstep_detect; 00099 } 00100 00101 00102 ostream& operator<<(ostream& o, std::vector<double> a) 00103 { 00104 size_t max_x = (a.size() < 5) ? a.size() : 5; 00105 for (size_t i=0; i<max_x; i++) { 00106 o << a[i] << " "; 00107 } 00108 return o; 00109 } 00110 00111 ostream& operator<<(ostream& o, std::vector<std::vector<double> > a) 00112 { 00113 size_t max_x = (a.size() < 5) ? a.size() : 5; 00114 size_t max_y = (a[0].size() < 5) ? a[0].size() : 5; 00115 for (size_t i=0; i<max_x; i++) { 00116 for (size_t j=0; j<max_y; j++) { 00117 o << a[i][j] << " "; 00118 } 00119 o << endl << "\t\t\t"; 00120 } 00121 return o; 00122 } 00123 00124 ostream& operator<<(ostream& o, const filter_state_class& l) 00125 { 00126 o << "\tz1_memory=" << l.z1_memory << endl; 00127 o << "\tz2_memory=" << l.z2_memory << endl; 00128 o << "\tzA_memory=" << l.zA_memory << endl; 00129 o << "\tzB_memory=" << l.zB_memory << endl; 00130 o << "\tdzB_memory=" << l.dzB_memory << endl; 00131 o << "\tzY_memory=" << l.zY_memory << endl; 00132 o << "\tdetect_accum=" << l.detect_accum << endl; 00133 00134 return o; 00135 } 00136 00138 00139 AGC_state_class::AGC_state_class() 00140 { 00141 } 00142 00143 AGC_state_class::~AGC_state_class() 00144 { 00145 } 00146 00147 ostream& operator<<(ostream& o, const AGC_state_class& l) 00148 { 00149 o << "**AGC_state_class" << endl; 00150 o << "\tsum_AGC=" << l.sum_AGC << endl; 00151 00152 for (int i = 0; i < 4; i++) { 00153 o << "\tAGC_memory(" << i << ")="; 00154 for (int j = 0; j < 5; j++) { 00155 o << l.AGC_memory[j][i] << " "; 00156 } 00157 o << endl; 00158 } 00159 return o; 00160 } 00161 00163 00164 strobe_state_class::strobe_state_class() 00165 { 00166 } 00167 00168 strobe_state_class::~strobe_state_class() 00169 { 00170 } 00171 00172 ostream& operator<<(ostream& o, const strobe_state_class& l) 00173 { 00174 o << "**strobe_state_class" << endl; 00175 o << "\tlastdata=" << l.lastdata << endl; 00176 o << "\tthresholds=" << l.thresholds << endl; 00177 // o << "\ttrigger_index=" << l.trigger_index << endl; 00178 // o << "\tsai_index=" << l.sai_index << endl; 00179 00180 return o; 00181 } 00182 00184 00185 filter_coeffs_class::filter_coeffs_class() 00186 { 00187 } 00188 00189 filter_coeffs_class::~filter_coeffs_class() 00190 { 00191 } 00192 00193 void filter_coeffs_class::init(double v, int n_ch) 00194 { 00195 velocity_scale = v; 00196 00197 r_coeffs.assign(n_ch,0); 00198 a_coeffs.assign(n_ch,0); 00199 c_coeffs.assign(n_ch,0); 00200 h_coeffs.assign(n_ch,0); 00201 g_coeffs.assign(n_ch,0); 00202 00203 } 00204 00205 ostream& operator<<(ostream& o, const filter_coeffs_class& l) 00206 { 00207 o << "**filter_coeffs_class" << endl; 00208 o << "\t\tvelocity_scale=" << l.velocity_scale << endl; 00209 00210 o << "\t\tr_coeffs=" << l.r_coeffs << endl; 00211 o << "\t\ta_coeffs=" << l.a_coeffs << endl; 00212 o << "\t\tc_coeffs=" << l.c_coeffs << endl; 00213 o << "\t\th_coeffs=" << l.h_coeffs << endl; 00214 o << "\t\tg_coeffs=" << l.g_coeffs << endl; 00215 00216 return o; 00217 } 00218 00219 00221 00222 CF_AGC_params_class::CF_AGC_params_class() 00223 { 00224 n_stages = 4; 00225 time_constants.push_back(1*0.002); 00226 time_constants.push_back(4*0.002); 00227 time_constants.push_back(16*0.002); 00228 time_constants.push_back(64*0.002); 00229 00230 AGC_stage_gain = 2; 00231 decimation = 16; 00232 00233 AGC1_scales.push_back(1*1); 00234 AGC1_scales.push_back(2*1); 00235 AGC1_scales.push_back(3*1); 00236 AGC1_scales.push_back(4*1); 00237 00238 AGC2_scales.push_back(1*1.5); 00239 AGC2_scales.push_back(2*1.5); 00240 AGC2_scales.push_back(3*1.5); 00241 AGC2_scales.push_back(4*1.5); 00242 00243 detect_scale = 0.002; 00244 AGC_mix_coeff = 0.25; 00245 } 00246 00247 00248 CF_AGC_params_class::~CF_AGC_params_class() 00249 { 00250 } 00251 00252 ostream& operator<<(ostream& o, const CF_AGC_params_class& l) 00253 { 00254 o << "**CF_AGC_params_class" << endl; 00255 o << "\t\tn_stages=" << l.n_stages << endl; 00256 00257 o << "\t\ttime_constants=["; 00258 for (unsigned int i=0; i<l.time_constants.size(); i++) { 00259 o << l.time_constants[i] << " "; 00260 } 00261 o << "]" << endl; 00262 00263 o << "\t\tAGC_stage_gain=" << l.AGC_stage_gain << endl; 00264 o << "\t\tdecimation=" << l.decimation << endl; 00265 00266 o << "\t\tAGC1_scales="; 00267 for (unsigned int i=0; i<l.AGC1_scales.size(); i++) { 00268 o << l.AGC1_scales[i] << " "; 00269 } 00270 o << endl; 00271 00272 o << "\t\tAGC2_scales="; 00273 for (unsigned int i=0; i<l.AGC2_scales.size(); i++) { 00274 o << l.AGC2_scales[i] << " "; 00275 } 00276 o << endl; 00277 00278 o << "\t\tdetect_scale=" << l.detect_scale << endl; 00279 o << "\t\tAGC_mix_coeff=" << l.AGC_mix_coeff << endl; 00280 00281 return o; 00282 } 00283 00285 00286 AGC_coeffs_class::AGC_coeffs_class() 00287 { 00288 } 00289 00290 AGC_coeffs_class::~AGC_coeffs_class() 00291 { 00292 } 00293 00294 ostream& operator<<(ostream& o, const AGC_coeffs_class& l) 00295 { 00296 o << "**AGC_coeffs_class" << endl; 00297 o << "\t\tdetect_scale=" << l.detect_scale << endl; 00298 o << "\t\tAGC_stage_gain=" << l.AGC_stage_gain << endl; 00299 o << "\t\tAGC_mix_coeff=" << l.AGC_mix_coeff << endl; 00300 o << "\t\tAGC_epsilon=["; 00301 for (unsigned int i=0; i<l.AGC_epsilon.size(); i++) { 00302 o << l.AGC_epsilon[i] << " "; 00303 } 00304 o << "]" << endl; 00305 00306 return o; 00307 } 00308 00310 00311 CF_filter_params_class::CF_filter_params_class() 00312 { 00313 velocity_scale = 0.002; // for the cubic nonlinearity 00314 min_zeta = 0.15; 00315 first_pole_theta = 0.78 * PI; 00316 zero_ratio = sqrt((float)2.0); 00317 ERB_per_step = 0.3333; // assume G&M's ERB formula 00318 min_pole_Hz = 40; 00319 } 00320 00321 CF_filter_params_class::~CF_filter_params_class() 00322 { 00323 } 00324 00325 00326 ostream& operator<<(ostream& o, const CF_filter_params_class& l) 00327 { 00328 o << "**CF_filter_params_class" << endl; 00329 o << "\t\tvelocity_scale=" << l.velocity_scale << endl; 00330 o << "\t\tmin_zeta=" << l.min_zeta << endl; 00331 o << "\t\tfirst_pole_theta=" << l.first_pole_theta << endl; 00332 o << "\t\tzero_ratio=" << l.zero_ratio << endl; 00333 o << "\t\tERB_per_step=" << l.ERB_per_step << endl; 00334 o << "\t\tmin_pole_Hz=" << l.min_pole_Hz << endl; 00335 return o; 00336 } 00337 00339 00340 CF_class::CF_class() 00341 { 00342 CARFAC_Design(); 00343 00344 // Default value for number of microphones 00345 n_mics = 2; 00346 } 00347 00348 CF_class::~CF_class() 00349 { 00350 } 00351 00352 // 00353 // Original Docs 00354 // ------------- 00355 // 00356 // function CF = CARFAC_Design(fs, CF_filter_params, ... 00357 // CF_AGC_params, ERB_min_BW, ERB_Q) 00358 // 00359 // This function designs the CARFAC (Cascade of Asymmetric Resonators with 00360 // Fast-Acting Compression); that is, it take bundles of parameters and 00361 // computes all the filter coefficients needed to run it. 00362 // 00363 // fs is sample rate (per second) 00364 // CF_filter_params bundles all the PZFC parameters 00365 // CF_AGC_params bundles all the AGC parameters 00366 // 00367 // See other functions for designing and characterizing the CARFAC: 00368 // [naps, CF] = CARFAC_Run(CF, input_waves) 00369 // transfns = CARFAC_Transfer_Functions(CF, to_channels, from_channels) 00370 // 00371 // All args are defaultable; for sample/default args see the code; they 00372 // make 96 channels at default fs = 22050, 114 channels at 44100. 00373 // 00374 00375 // TODO(snessnet) - Have CARFAC_Design take parameters like the original function 00376 void CF_class::CARFAC_Design(double _fs, double _ERB_break_freq, double _ERB_Q) 00377 { 00378 AGC_coeffs.detect_scale = CF_AGC_params.detect_scale; 00379 AGC_coeffs.AGC_stage_gain = CF_AGC_params.AGC_stage_gain; 00380 AGC_coeffs.AGC_mix_coeff = CF_AGC_params.AGC_mix_coeff; 00381 00382 // TODO(snessnet) - We should get this from israte, but this won't 00383 // be setup until we load the soundfile. For now require 22050Hz 00384 // sound files. 00385 if (_fs == -1) { 00386 fs = 22050; 00387 } 00388 00389 // first figure out how many filter stages (PZFC/CARFAC channels): 00390 double pole_Hz = CF_filter_params.first_pole_theta * fs / (2 * PI); 00391 n_ch = 0; 00392 while (pole_Hz > CF_filter_params.min_pole_Hz) { 00393 n_ch = n_ch + 1; 00394 pole_Hz = pole_Hz - CF_filter_params.ERB_per_step * ERB_Hz(pole_Hz, _ERB_break_freq, _ERB_Q); 00395 } 00396 00397 pole_freqs.assign(n_ch,0); 00398 pole_Hz = CF_filter_params.first_pole_theta * fs / (2 * PI); 00399 for(int ch = 0; ch < n_ch; ch++) { 00400 pole_freqs[ch] = pole_Hz; 00401 pole_Hz = pole_Hz - CF_filter_params.ERB_per_step * ERB_Hz(pole_Hz, _ERB_break_freq, _ERB_Q); 00402 } 00403 00404 // Design the AGC and Filters 00405 CARFAC_DesignFilters(); 00406 CARFAC_DesignAGC(); 00407 } 00408 00409 // Calculate the Equivalent Rectangular Bandwith of a given frequency 00410 double CF_class::ERB_Hz(double CF_Hz, double ERB_break_freq, double ERB_Q) 00411 { 00412 if (ERB_Q == -1) { 00413 ERB_Q = 1000/(24.7*4.37); // 9.2645 00414 } 00415 00416 if (ERB_break_freq == -1) { 00417 ERB_break_freq = 1000/4.37; // 228.833 00418 } 00419 00420 double ERB = (ERB_break_freq + CF_Hz) / ERB_Q; 00421 return ERB; 00422 } 00423 00424 // From the input filter_params, sampling frequency and 00425 // pole_frequencies, create all the filter coefficients. 00426 void CF_class::CARFAC_DesignFilters() 00427 { 00428 int n_ch = (int) pole_freqs.size(); 00429 00430 filter_coeffs.init(CF_filter_params.velocity_scale, n_ch); 00431 00432 // zero_ratio comes in via h. In book's circuit D, zero_ratio is 00433 // 1/sqrt(a), and that a is here 1 / (1+f) where h = f*c. 00434 // solve for f: 1/zero_ratio^2 = 1 / (1+f) 00435 // zero_ratio^2 = 1+f => f = zero_ratio^2 - 1 00436 double f = pow(CF_filter_params.zero_ratio,2) - 1; // nominally 1 for half-octave 00437 00438 // Make pole positions, s and c coeffs, h and g coeffs, etc., which 00439 // mostly depend on the pole angle theta. 00440 std::vector<double> theta(n_ch); 00441 for (unsigned int i = 0; i < theta.size(); i++) { 00442 theta[i] = pole_freqs[i] * (2 * PI / fs); 00443 } 00444 00445 // Different possible interpretations for min-damping r: 00446 // r = exp(-theta * CF_filter_params.min_zeta); 00447 std::vector<double> r(n_ch); 00448 for (unsigned int i = 0; i < r.size(); i++) { 00449 r[i] = (1 - (sin(theta[i]) * CF_filter_params.min_zeta)); // higher Q at highest thetas 00450 } 00451 filter_coeffs.r_coeffs = r; 00452 00453 // Undamped coupled-form coefficients: 00454 for (unsigned int i = 0; i < theta.size(); i++) { 00455 filter_coeffs.a_coeffs[i] = cos(theta[i]); 00456 filter_coeffs.c_coeffs[i] = sin(theta[i]); 00457 } 00458 00459 // The zeros follow via the h_coeffs 00460 std::vector<double> h(n_ch); 00461 for (unsigned int i = 0; i < theta.size(); i++) { 00462 h[i] = sin(theta[i]) * f; 00463 } 00464 filter_coeffs.h_coeffs = h; 00465 00466 // Aim for unity DC gain at min damping, here; or could try r^2 00467 std::vector<double> r2 = r; 00468 for (unsigned int i = 0; i < theta.size(); i++) { 00469 filter_coeffs.g_coeffs[i] = 00470 1 / (1 + h[i] * r2[i] * sin(theta[i]) / (1 - 2 * r2[i] * cos(theta[i]) + pow(r2[i], 2))); 00471 } 00472 } 00473 00474 // Calculate the parameters for the AGC step 00475 void CF_class::CARFAC_DesignAGC() 00476 { 00477 std::vector<double> AGC1_scales = CF_AGC_params.AGC1_scales; 00478 std::vector<double> AGC2_scales = CF_AGC_params.AGC2_scales; 00479 00480 int n_AGC_stages = CF_AGC_params.n_stages; 00481 AGC_coeffs.AGC_epsilon.assign(n_AGC_stages, 0); 00482 AGC_coeffs.AGC1_polez.assign(n_AGC_stages, 0); 00483 AGC_coeffs.AGC2_polez.assign(n_AGC_stages, 0); 00484 int decim = CF_AGC_params.decimation; 00485 00486 for (int stage = 0; stage < n_AGC_stages; stage++) { 00487 double tau = CF_AGC_params.time_constants[stage]; 00488 00489 // Epsilon is how much new input to take at each update step. 00490 AGC_coeffs.AGC_epsilon[stage] = 1 - exp(-decim / (tau * fs)); 00491 00492 // And these are the smoothing scales and poles for decimated rate. 00493 double ntimes = tau * (fs / decim); // Effective number of times the smoothing is done 00494 00495 // Divide the spatial variance by effective number of smoothings: 00496 double t = (pow(AGC1_scales[stage],2)) / ntimes; // Adjust scale per step for diffusion 00497 AGC_coeffs.AGC1_polez[stage] = 1 + 1/t - sqrt(pow(1+1/t,2) - 1); 00498 t = (pow(AGC2_scales[stage],2)) / ntimes; // Adjust scale per step for diffusion. 00499 AGC_coeffs.AGC2_polez[stage] = 1 + 1/t - sqrt(pow(1+1/t,2) - 1); 00500 } 00501 } 00502 00503 // 00504 // Initialize state for n_mics channels (default 1). 00505 // 00506 // TODO(dicklyon): Review whether storing state in the same struct as 00507 // the design is a good thing, or whether we want another level of 00508 // object. I like fewer structs and class types. function CF_struct 00509 // = CARFAC_Init(CF_struct, n_mics) 00510 // 00511 // Initialize state for n_mics channels (default 1). 00512 // 00513 // TODO(dicklyon): Review whether storing state in the same struct as 00514 // the design is a good thing, or whether we want another 00515 // level of object. I like fewer structs and class types. 00516 // 00517 void CF_class::CARFAC_Init(int n_mics) 00518 { 00519 std::vector<double> AGC_time_constants = CF_AGC_params.time_constants; 00520 int n_AGC_stages = (int) AGC_time_constants.size(); 00521 filter_state_class tmp_filter_state; 00522 tmp_filter_state.z1_memory.assign(n_ch,0.0); 00523 tmp_filter_state.z2_memory.assign(n_ch,0.0); 00524 tmp_filter_state.zA_memory.assign(n_ch,0.0); 00525 tmp_filter_state.zB_memory.assign(n_ch,0.0); 00526 tmp_filter_state.dzB_memory.assign(n_ch,0.0); 00527 tmp_filter_state.zY_memory.assign(n_ch,0.0); 00528 tmp_filter_state.detect_accum.assign(n_ch,0.0); 00529 00530 for (int mic = 0; mic < n_mics; mic++) { 00531 filter_state.push_back(tmp_filter_state); 00532 } 00533 00534 // AGC loop filters' state: 00535 AGC_state_class tmp_AGC_state; 00536 tmp_AGC_state.sum_AGC.assign(n_ch,0.0); 00537 00538 std::vector<double> tmp_AGC_memory(n_ch); 00539 for (int i = 0; i < n_AGC_stages; i++) { 00540 tmp_AGC_state.AGC_memory.push_back(tmp_AGC_memory); 00541 } 00542 00543 for (int mic = 0; mic < n_mics; mic++) { 00544 AGC_state.push_back(tmp_AGC_state); 00545 } 00546 00547 // Strobe state 00548 strobe_threshold_start = 0.0100; 00549 strobe_state_class tmp_strobe_state; 00550 tmp_strobe_state.lastdata.assign(n_ch,0.0); 00551 tmp_strobe_state.thresholds.assign(n_ch,strobe_threshold_start); 00552 tmp_strobe_state.trigger_index.assign(n_ch,0); 00553 tmp_strobe_state.sai_index.assign(n_ch,0); 00554 00555 for (int mic = 0; mic < n_mics; mic++) { 00556 strobe_state.push_back(tmp_strobe_state); 00557 } 00558 00559 } 00560 00561 ostream& operator<<(ostream& o, const CF_class& l) 00562 { 00563 o << "*CF_class" << endl; 00564 if (l.printcoeffs) { 00565 o << "\tfs=" << l.fs << endl; 00566 o << "\tn_ch=" << l.n_ch << endl; 00567 o << "\tn_mics=" << l.n_mics << endl; 00568 o << "\tCF_filter_params=" << l.CF_filter_params << endl; 00569 o << "\tCF_AGC_params=" << l.CF_AGC_params << endl; 00570 o << "\tfilter_coeffs=" << l.filter_coeffs << endl; 00571 o << "\tAGC_coeffs=" << l.AGC_coeffs << endl; 00572 } 00573 00574 if (l.printstate) { 00575 for (unsigned int i=0; i<l.filter_state.size(); i++) { 00576 o << "filter_state(" << i+1 << ")" << endl; 00577 o << l.filter_state[i]; 00578 00579 o << "AGC_state(" << i+1 << ")" << endl; 00580 o << l.AGC_state[i]; 00581 } 00582 } 00583 00584 return o; 00585 } 00586 }