svcore
1.9
|
00001 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ 00002 00003 /* 00004 Sonic Visualiser 00005 An audio file viewer and annotation editor. 00006 Centre for Digital Music, Queen Mary, University of London. 00007 This file copyright 2006 Chris Cannam and QMUL. 00008 00009 This program is free software; you can redistribute it and/or 00010 modify it under the terms of the GNU General Public License as 00011 published by the Free Software Foundation; either version 2 of the 00012 License, or (at your option) any later version. See the file 00013 COPYING included with this distribution for more information. 00014 */ 00015 00016 #include "FeatureExtractionModelTransformer.h" 00017 00018 #include "plugin/FeatureExtractionPluginFactory.h" 00019 #include "plugin/PluginXml.h" 00020 #include <vamp-hostsdk/Plugin.h> 00021 00022 #include "data/model/Model.h" 00023 #include "base/Window.h" 00024 #include "base/Exceptions.h" 00025 #include "data/model/SparseOneDimensionalModel.h" 00026 #include "data/model/SparseTimeValueModel.h" 00027 #include "data/model/EditableDenseThreeDimensionalModel.h" 00028 #include "data/model/DenseTimeValueModel.h" 00029 #include "data/model/NoteModel.h" 00030 #include "data/model/FlexiNoteModel.h" 00031 #include "data/model/RegionModel.h" 00032 #include "data/model/FFTModel.h" 00033 #include "data/model/WaveFileModel.h" 00034 #include "rdf/PluginRDFDescription.h" 00035 00036 #include "TransformFactory.h" 00037 00038 #include <iostream> 00039 00040 #include <QSettings> 00041 00042 FeatureExtractionModelTransformer::FeatureExtractionModelTransformer(Input in, 00043 const Transform &transform) : 00044 ModelTransformer(in, transform), 00045 m_plugin(0) 00046 { 00047 // SVDEBUG << "FeatureExtractionModelTransformer::FeatureExtractionModelTransformer: plugin " << pluginId << ", outputName " << m_transform.getOutput() << endl; 00048 00049 initialise(); 00050 } 00051 00052 FeatureExtractionModelTransformer::FeatureExtractionModelTransformer(Input in, 00053 const Transforms &transforms) : 00054 ModelTransformer(in, transforms), 00055 m_plugin(0) 00056 { 00057 // SVDEBUG << "FeatureExtractionModelTransformer::FeatureExtractionModelTransformer: plugin " << pluginId << ", outputName " << m_transform.getOutput() << endl; 00058 00059 initialise(); 00060 } 00061 00062 static bool 00063 areTransformsSimilar(const Transform &t1, const Transform &t2) 00064 { 00065 Transform t2o(t2); 00066 t2o.setOutput(t1.getOutput()); 00067 return t1 == t2o; 00068 } 00069 00070 bool 00071 FeatureExtractionModelTransformer::initialise() 00072 { 00073 // All transforms must use the same plugin, parameters, and 00074 // inputs: they can differ only in choice of plugin output. So we 00075 // initialise based purely on the first transform in the list (but 00076 // first check that they are actually similar as promised) 00077 00078 for (int j = 1; j < (int)m_transforms.size(); ++j) { 00079 if (!areTransformsSimilar(m_transforms[0], m_transforms[j])) { 00080 m_message = tr("Transforms supplied to a single FeatureExtractionModelTransformer instance must be similar in every respect except plugin output"); 00081 return false; 00082 } 00083 } 00084 00085 Transform primaryTransform = m_transforms[0]; 00086 00087 QString pluginId = primaryTransform.getPluginIdentifier(); 00088 00089 FeatureExtractionPluginFactory *factory = 00090 FeatureExtractionPluginFactory::instanceFor(pluginId); 00091 00092 if (!factory) { 00093 m_message = tr("No factory available for feature extraction plugin id \"%1\" (unknown plugin type, or internal error?)").arg(pluginId); 00094 return false; 00095 } 00096 00097 DenseTimeValueModel *input = getConformingInput(); 00098 if (!input) { 00099 m_message = tr("Input model for feature extraction plugin \"%1\" is of wrong type (internal error?)").arg(pluginId); 00100 return false; 00101 } 00102 00103 m_plugin = factory->instantiatePlugin(pluginId, input->getSampleRate()); 00104 if (!m_plugin) { 00105 m_message = tr("Failed to instantiate plugin \"%1\"").arg(pluginId); 00106 return false; 00107 } 00108 00109 TransformFactory::getInstance()->makeContextConsistentWithPlugin 00110 (primaryTransform, m_plugin); 00111 00112 TransformFactory::getInstance()->setPluginParameters 00113 (primaryTransform, m_plugin); 00114 00115 int channelCount = input->getChannelCount(); 00116 if ((int)m_plugin->getMaxChannelCount() < channelCount) { 00117 channelCount = 1; 00118 } 00119 if ((int)m_plugin->getMinChannelCount() > channelCount) { 00120 m_message = tr("Cannot provide enough channels to feature extraction plugin \"%1\" (plugin min is %2, max %3; input model has %4)") 00121 .arg(pluginId) 00122 .arg(m_plugin->getMinChannelCount()) 00123 .arg(m_plugin->getMaxChannelCount()) 00124 .arg(input->getChannelCount()); 00125 return false; 00126 } 00127 00128 SVDEBUG << "Initialising feature extraction plugin with channels = " 00129 << channelCount << ", step = " << primaryTransform.getStepSize() 00130 << ", block = " << primaryTransform.getBlockSize() << endl; 00131 00132 if (!m_plugin->initialise(channelCount, 00133 primaryTransform.getStepSize(), 00134 primaryTransform.getBlockSize())) { 00135 00136 int pstep = primaryTransform.getStepSize(); 00137 int pblock = primaryTransform.getBlockSize(); 00138 00140 primaryTransform.setStepSize(0); 00141 primaryTransform.setBlockSize(0); 00142 TransformFactory::getInstance()->makeContextConsistentWithPlugin 00143 (primaryTransform, m_plugin); 00144 00145 if (primaryTransform.getStepSize() != pstep || 00146 primaryTransform.getBlockSize() != pblock) { 00147 00148 if (!m_plugin->initialise(channelCount, 00149 primaryTransform.getStepSize(), 00150 primaryTransform.getBlockSize())) { 00151 00152 m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId); 00153 return false; 00154 00155 } else { 00156 00157 m_message = tr("Feature extraction plugin \"%1\" rejected the given step and block sizes (%2 and %3); using plugin defaults (%4 and %5) instead") 00158 .arg(pluginId) 00159 .arg(pstep) 00160 .arg(pblock) 00161 .arg(primaryTransform.getStepSize()) 00162 .arg(primaryTransform.getBlockSize()); 00163 } 00164 00165 } else { 00166 00167 m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId); 00168 return false; 00169 } 00170 } 00171 00172 if (primaryTransform.getPluginVersion() != "") { 00173 QString pv = QString("%1").arg(m_plugin->getPluginVersion()); 00174 if (pv != primaryTransform.getPluginVersion()) { 00175 QString vm = tr("Transform was configured for version %1 of plugin \"%2\", but the plugin being used is version %3") 00176 .arg(primaryTransform.getPluginVersion()) 00177 .arg(pluginId) 00178 .arg(pv); 00179 if (m_message != "") { 00180 m_message = QString("%1; %2").arg(vm).arg(m_message); 00181 } else { 00182 m_message = vm; 00183 } 00184 } 00185 } 00186 00187 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors(); 00188 00189 if (outputs.empty()) { 00190 m_message = tr("Plugin \"%1\" has no outputs").arg(pluginId); 00191 return false; 00192 } 00193 00194 for (int j = 0; j < (int)m_transforms.size(); ++j) { 00195 00196 for (int i = 0; i < (int)outputs.size(); ++i) { 00197 // SVDEBUG << "comparing output " << i << " name \"" << outputs[i].identifier << "\" with expected \"" << m_transform.getOutput() << "\"" << endl; 00198 if (m_transforms[j].getOutput() == "" || 00199 outputs[i].identifier == m_transforms[j].getOutput().toStdString()) { 00200 m_outputNos.push_back(i); 00201 m_descriptors.push_back(new Vamp::Plugin::OutputDescriptor(outputs[i])); 00202 m_fixedRateFeatureNos.push_back(-1); // we increment before use 00203 break; 00204 } 00205 } 00206 00207 if ((int)m_descriptors.size() <= j) { 00208 m_message = tr("Plugin \"%1\" has no output named \"%2\"") 00209 .arg(pluginId) 00210 .arg(m_transforms[j].getOutput()); 00211 return false; 00212 } 00213 } 00214 00215 for (int j = 0; j < (int)m_transforms.size(); ++j) { 00216 createOutputModels(j); 00217 } 00218 00219 return true; 00220 } 00221 00222 void 00223 FeatureExtractionModelTransformer::createOutputModels(int n) 00224 { 00225 DenseTimeValueModel *input = getConformingInput(); 00226 00227 // cerr << "FeatureExtractionModelTransformer::createOutputModel: sample type " << m_descriptor->sampleType << ", rate " << m_descriptor->sampleRate << endl; 00228 00229 PluginRDFDescription description(m_transforms[n].getPluginIdentifier()); 00230 QString outputId = m_transforms[n].getOutput(); 00231 00232 int binCount = 1; 00233 float minValue = 0.0, maxValue = 0.0; 00234 bool haveExtents = false; 00235 bool haveBinCount = m_descriptors[n]->hasFixedBinCount; 00236 00237 if (haveBinCount) { 00238 binCount = m_descriptors[n]->binCount; 00239 } 00240 00241 m_needAdditionalModels[n] = false; 00242 00243 // cerr << "FeatureExtractionModelTransformer: output bin count " 00244 // << binCount << endl; 00245 00246 if (binCount > 0 && m_descriptors[n]->hasKnownExtents) { 00247 minValue = m_descriptors[n]->minValue; 00248 maxValue = m_descriptors[n]->maxValue; 00249 haveExtents = true; 00250 } 00251 00252 int modelRate = input->getSampleRate(); 00253 int modelResolution = 1; 00254 00255 if (m_descriptors[n]->sampleType != 00256 Vamp::Plugin::OutputDescriptor::OneSamplePerStep) { 00257 if (m_descriptors[n]->sampleRate > input->getSampleRate()) { 00258 cerr << "WARNING: plugin reports output sample rate as " 00259 << m_descriptors[n]->sampleRate << " (can't display features with finer resolution than the input rate of " << input->getSampleRate() << ")" << endl; 00260 } 00261 } 00262 00263 switch (m_descriptors[n]->sampleType) { 00264 00265 case Vamp::Plugin::OutputDescriptor::VariableSampleRate: 00266 if (m_descriptors[n]->sampleRate != 0.0) { 00267 modelResolution = int(modelRate / m_descriptors[n]->sampleRate + 0.001); 00268 } 00269 break; 00270 00271 case Vamp::Plugin::OutputDescriptor::OneSamplePerStep: 00272 modelResolution = m_transforms[n].getStepSize(); 00273 break; 00274 00275 case Vamp::Plugin::OutputDescriptor::FixedSampleRate: 00281 if (m_descriptors[n]->sampleRate > input->getSampleRate()) { 00282 modelResolution = 1; 00283 } else { 00284 modelResolution = int(round(input->getSampleRate() / 00285 m_descriptors[n]->sampleRate)); 00286 } 00287 break; 00288 } 00289 00290 bool preDurationPlugin = (m_plugin->getVampApiVersion() < 2); 00291 00292 Model *out = 0; 00293 00294 if (binCount == 0 && 00295 (preDurationPlugin || !m_descriptors[n]->hasDuration)) { 00296 00297 // Anything with no value and no duration is an instant 00298 00299 out = new SparseOneDimensionalModel(modelRate, modelResolution, false); 00300 QString outputEventTypeURI = description.getOutputEventTypeURI(outputId); 00301 out->setRDFTypeURI(outputEventTypeURI); 00302 00303 } else if ((preDurationPlugin && binCount > 1 && 00304 (m_descriptors[n]->sampleType == 00305 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) || 00306 (!preDurationPlugin && m_descriptors[n]->hasDuration)) { 00307 00308 // For plugins using the old v1 API without explicit duration, 00309 // we treat anything that has multiple bins (i.e. that has the 00310 // potential to have value and duration) and a variable sample 00311 // rate as a note model, taking its values as pitch, duration 00312 // and velocity (if present) respectively. This is the same 00313 // behaviour as always applied by SV to these plugins in the 00314 // past. 00315 00316 // For plugins with the newer API, we treat anything with 00317 // duration as either a note model with pitch and velocity, or 00318 // a region model. 00319 00320 // How do we know whether it's an interval or note model? 00321 // What's the essential difference? Is a note model any 00322 // interval model using a Hz or "MIDI pitch" scale? There 00323 // isn't really a reliable test for "MIDI pitch"... Does a 00324 // note model always have velocity? This is a good question 00325 // to be addressed by accompanying RDF, but for the moment we 00326 // will do the following... 00327 00328 bool isNoteModel = false; 00329 00330 // Regions have only value (and duration -- we can't extract a 00331 // region model from an old-style plugin that doesn't support 00332 // duration) 00333 if (binCount > 1) isNoteModel = true; 00334 00335 // Regions do not have units of Hz or MIDI things (a sweeping 00336 // assumption!) 00337 if (m_descriptors[n]->unit == "Hz" || 00338 m_descriptors[n]->unit.find("MIDI") != std::string::npos || 00339 m_descriptors[n]->unit.find("midi") != std::string::npos) { 00340 isNoteModel = true; 00341 } 00342 00343 // If we had a "sparse 3D model", we would have the additional 00344 // problem of determining whether to use that here (if bin 00345 // count > 1). But we don't. 00346 00347 QSettings settings; 00348 settings.beginGroup("Transformer"); 00349 bool flexi = settings.value("use-flexi-note-model", false).toBool(); 00350 settings.endGroup(); 00351 00352 cerr << "flexi = " << flexi << endl; 00353 00354 if (isNoteModel && !flexi) { 00355 00356 NoteModel *model; 00357 if (haveExtents) { 00358 model = new NoteModel 00359 (modelRate, modelResolution, minValue, maxValue, false); 00360 } else { 00361 model = new NoteModel 00362 (modelRate, modelResolution, false); 00363 } 00364 model->setScaleUnits(m_descriptors[n]->unit.c_str()); 00365 out = model; 00366 00367 } else if (isNoteModel && flexi) { 00368 00369 FlexiNoteModel *model; 00370 if (haveExtents) { 00371 model = new FlexiNoteModel 00372 (modelRate, modelResolution, minValue, maxValue, false); 00373 } else { 00374 model = new FlexiNoteModel 00375 (modelRate, modelResolution, false); 00376 } 00377 model->setScaleUnits(m_descriptors[n]->unit.c_str()); 00378 out = model; 00379 00380 } else { 00381 00382 RegionModel *model; 00383 if (haveExtents) { 00384 model = new RegionModel 00385 (modelRate, modelResolution, minValue, maxValue, false); 00386 } else { 00387 model = new RegionModel 00388 (modelRate, modelResolution, false); 00389 } 00390 model->setScaleUnits(m_descriptors[n]->unit.c_str()); 00391 out = model; 00392 } 00393 00394 QString outputEventTypeURI = description.getOutputEventTypeURI(outputId); 00395 out->setRDFTypeURI(outputEventTypeURI); 00396 00397 } else if (binCount == 1 || 00398 (m_descriptors[n]->sampleType == 00399 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) { 00400 00401 // Anything that is not a 1D, note, or interval model and that 00402 // has only one value per result must be a sparse time value 00403 // model. 00404 00405 // Anything that is not a 1D, note, or interval model and that 00406 // has a variable sample rate is treated as a set of sparse 00407 // time value models, one per output bin, because we lack a 00408 // sparse 3D model. 00409 00410 // Anything that is not a 1D, note, or interval model and that 00411 // has a fixed sample rate but an unknown number of values per 00412 // result is also treated as a set of sparse time value models. 00413 00414 // For sets of sparse time value models, we create a single 00415 // model first as the "standard" output and then create models 00416 // for bins 1+ in the additional model map (mapping the output 00417 // descriptor to a list of models indexed by bin-1). But we 00418 // don't create the additional models yet, as this case has to 00419 // work even if the number of bins is unknown at this point -- 00420 // we create an additional model (copying its parameters from 00421 // the default one) each time a new bin is encountered. 00422 00423 if (!haveBinCount || binCount > 1) { 00424 m_needAdditionalModels[n] = true; 00425 } 00426 00427 SparseTimeValueModel *model; 00428 if (haveExtents) { 00429 model = new SparseTimeValueModel 00430 (modelRate, modelResolution, minValue, maxValue, false); 00431 } else { 00432 model = new SparseTimeValueModel 00433 (modelRate, modelResolution, false); 00434 } 00435 00436 Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors(); 00437 model->setScaleUnits(outputs[m_outputNos[n]].unit.c_str()); 00438 00439 out = model; 00440 00441 QString outputEventTypeURI = description.getOutputEventTypeURI(outputId); 00442 out->setRDFTypeURI(outputEventTypeURI); 00443 00444 } else { 00445 00446 // Anything that is not a 1D, note, or interval model and that 00447 // has a fixed sample rate and more than one value per result 00448 // must be a dense 3D model. 00449 00450 EditableDenseThreeDimensionalModel *model = 00451 new EditableDenseThreeDimensionalModel 00452 (modelRate, modelResolution, binCount, 00453 EditableDenseThreeDimensionalModel::BasicMultirateCompression, 00454 false); 00455 00456 if (!m_descriptors[n]->binNames.empty()) { 00457 std::vector<QString> names; 00458 for (int i = 0; i < (int)m_descriptors[n]->binNames.size(); ++i) { 00459 names.push_back(m_descriptors[n]->binNames[i].c_str()); 00460 } 00461 model->setBinNames(names); 00462 } 00463 00464 out = model; 00465 00466 QString outputSignalTypeURI = description.getOutputSignalTypeURI(outputId); 00467 out->setRDFTypeURI(outputSignalTypeURI); 00468 } 00469 00470 if (out) { 00471 out->setSourceModel(input); 00472 m_outputs.push_back(out); 00473 } 00474 } 00475 00476 FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer() 00477 { 00478 // SVDEBUG << "FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer()" << endl; 00479 delete m_plugin; 00480 for (int j = 0; j < (int)m_descriptors.size(); ++j) { 00481 delete m_descriptors[j]; 00482 } 00483 } 00484 00485 FeatureExtractionModelTransformer::Models 00486 FeatureExtractionModelTransformer::getAdditionalOutputModels() 00487 { 00488 Models mm; 00489 for (AdditionalModelMap::iterator i = m_additionalModels.begin(); 00490 i != m_additionalModels.end(); ++i) { 00491 for (std::map<int, SparseTimeValueModel *>::iterator j = 00492 i->second.begin(); 00493 j != i->second.end(); ++j) { 00494 SparseTimeValueModel *m = j->second; 00495 if (m) mm.push_back(m); 00496 } 00497 } 00498 return mm; 00499 } 00500 00501 bool 00502 FeatureExtractionModelTransformer::willHaveAdditionalOutputModels() 00503 { 00504 for (std::map<int, bool>::const_iterator i = 00505 m_needAdditionalModels.begin(); 00506 i != m_needAdditionalModels.end(); ++i) { 00507 if (i->second) return true; 00508 } 00509 return false; 00510 } 00511 00512 SparseTimeValueModel * 00513 FeatureExtractionModelTransformer::getAdditionalModel(int n, int binNo) 00514 { 00515 // std::cerr << "getAdditionalModel(" << n << ", " << binNo << ")" << std::endl; 00516 00517 if (binNo == 0) { 00518 std::cerr << "Internal error: binNo == 0 in getAdditionalModel (should be using primary model)" << std::endl; 00519 return 0; 00520 } 00521 00522 if (!m_needAdditionalModels[n]) return 0; 00523 if (!isOutput<SparseTimeValueModel>(n)) return 0; 00524 if (m_additionalModels[n][binNo]) return m_additionalModels[n][binNo]; 00525 00526 std::cerr << "getAdditionalModel(" << n << ", " << binNo << "): creating" << std::endl; 00527 00528 SparseTimeValueModel *baseModel = getConformingOutput<SparseTimeValueModel>(n); 00529 if (!baseModel) return 0; 00530 00531 std::cerr << "getAdditionalModel(" << n << ", " << binNo << "): (from " << baseModel << ")" << std::endl; 00532 00533 SparseTimeValueModel *additional = 00534 new SparseTimeValueModel(baseModel->getSampleRate(), 00535 baseModel->getResolution(), 00536 baseModel->getValueMinimum(), 00537 baseModel->getValueMaximum(), 00538 false); 00539 00540 additional->setScaleUnits(baseModel->getScaleUnits()); 00541 additional->setRDFTypeURI(baseModel->getRDFTypeURI()); 00542 00543 m_additionalModels[n][binNo] = additional; 00544 return additional; 00545 } 00546 00547 DenseTimeValueModel * 00548 FeatureExtractionModelTransformer::getConformingInput() 00549 { 00550 // SVDEBUG << "FeatureExtractionModelTransformer::getConformingInput: input model is " << getInputModel() << endl; 00551 00552 DenseTimeValueModel *dtvm = 00553 dynamic_cast<DenseTimeValueModel *>(getInputModel()); 00554 if (!dtvm) { 00555 SVDEBUG << "FeatureExtractionModelTransformer::getConformingInput: WARNING: Input model is not conformable to DenseTimeValueModel" << endl; 00556 } 00557 return dtvm; 00558 } 00559 00560 void 00561 FeatureExtractionModelTransformer::run() 00562 { 00563 DenseTimeValueModel *input = getConformingInput(); 00564 if (!input) return; 00565 00566 if (m_outputs.empty()) return; 00567 00568 Transform primaryTransform = m_transforms[0]; 00569 00570 while (!input->isReady() && !m_abandoned) { 00571 cerr << "FeatureExtractionModelTransformer::run: Waiting for input model to be ready..." << endl; 00572 usleep(500000); 00573 } 00574 if (m_abandoned) return; 00575 00576 int sampleRate = input->getSampleRate(); 00577 00578 int channelCount = input->getChannelCount(); 00579 if ((int)m_plugin->getMaxChannelCount() < channelCount) { 00580 channelCount = 1; 00581 } 00582 00583 float **buffers = new float*[channelCount]; 00584 for (int ch = 0; ch < channelCount; ++ch) { 00585 buffers[ch] = new float[primaryTransform.getBlockSize() + 2]; 00586 } 00587 00588 int stepSize = primaryTransform.getStepSize(); 00589 int blockSize = primaryTransform.getBlockSize(); 00590 00591 bool frequencyDomain = (m_plugin->getInputDomain() == 00592 Vamp::Plugin::FrequencyDomain); 00593 std::vector<FFTModel *> fftModels; 00594 00595 if (frequencyDomain) { 00596 for (int ch = 0; ch < channelCount; ++ch) { 00597 FFTModel *model = new FFTModel 00598 (getConformingInput(), 00599 channelCount == 1 ? m_input.getChannel() : ch, 00600 primaryTransform.getWindowType(), 00601 blockSize, 00602 stepSize, 00603 blockSize, 00604 false, 00605 StorageAdviser::PrecisionCritical); 00606 if (!model->isOK()) { 00607 delete model; 00608 for (int j = 0; j < (int)m_outputNos.size(); ++j) { 00609 setCompletion(j, 100); 00610 } 00612 throw AllocationFailed("Failed to create the FFT model for this feature extraction model transformer"); 00613 } 00614 model->resume(); 00615 fftModels.push_back(model); 00616 } 00617 } 00618 00619 long startFrame = m_input.getModel()->getStartFrame(); 00620 long endFrame = m_input.getModel()->getEndFrame(); 00621 00622 RealTime contextStartRT = primaryTransform.getStartTime(); 00623 RealTime contextDurationRT = primaryTransform.getDuration(); 00624 00625 long contextStart = 00626 RealTime::realTime2Frame(contextStartRT, sampleRate); 00627 00628 long contextDuration = 00629 RealTime::realTime2Frame(contextDurationRT, sampleRate); 00630 00631 if (contextStart == 0 || contextStart < startFrame) { 00632 contextStart = startFrame; 00633 } 00634 00635 if (contextDuration == 0) { 00636 contextDuration = endFrame - contextStart; 00637 } 00638 if (contextStart + contextDuration > endFrame) { 00639 contextDuration = endFrame - contextStart; 00640 } 00641 00642 long blockFrame = contextStart; 00643 00644 long prevCompletion = 0; 00645 00646 for (int j = 0; j < (int)m_outputNos.size(); ++j) { 00647 setCompletion(j, 0); 00648 } 00649 00650 float *reals = 0; 00651 float *imaginaries = 0; 00652 if (frequencyDomain) { 00653 reals = new float[blockSize/2 + 1]; 00654 imaginaries = new float[blockSize/2 + 1]; 00655 } 00656 00657 QString error = ""; 00658 00659 while (!m_abandoned) { 00660 00661 if (frequencyDomain) { 00662 if (blockFrame - int(blockSize)/2 > 00663 contextStart + contextDuration) break; 00664 } else { 00665 if (blockFrame >= 00666 contextStart + contextDuration) break; 00667 } 00668 00669 // SVDEBUG << "FeatureExtractionModelTransformer::run: blockFrame " 00670 // << blockFrame << ", endFrame " << endFrame << ", blockSize " 00671 // << blockSize << endl; 00672 00673 long completion = 00674 (((blockFrame - contextStart) / stepSize) * 99) / 00675 (contextDuration / stepSize + 1); 00676 00677 // channelCount is either m_input.getModel()->channelCount or 1 00678 00679 if (frequencyDomain) { 00680 for (int ch = 0; ch < channelCount; ++ch) { 00681 int column = (blockFrame - startFrame) / stepSize; 00682 fftModels[ch]->getValuesAt(column, reals, imaginaries); 00683 for (int i = 0; i <= blockSize/2; ++i) { 00684 buffers[ch][i*2] = reals[i]; 00685 buffers[ch][i*2+1] = imaginaries[i]; 00686 } 00687 error = fftModels[ch]->getError(); 00688 if (error != "") { 00689 cerr << "FeatureExtractionModelTransformer::run: Abandoning, error is " << error << endl; 00690 m_abandoned = true; 00691 m_message = error; 00692 } 00693 } 00694 } else { 00695 getFrames(channelCount, blockFrame, blockSize, buffers); 00696 } 00697 00698 if (m_abandoned) break; 00699 00700 Vamp::Plugin::FeatureSet features = m_plugin->process 00701 (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate)); 00702 00703 if (m_abandoned) break; 00704 00705 for (int j = 0; j < (int)m_outputNos.size(); ++j) { 00706 for (int fi = 0; fi < (int)features[m_outputNos[j]].size(); ++fi) { 00707 Vamp::Plugin::Feature feature = features[m_outputNos[j]][fi]; 00708 addFeature(j, blockFrame, feature); 00709 } 00710 } 00711 00712 if (blockFrame == contextStart || completion > prevCompletion) { 00713 for (int j = 0; j < (int)m_outputNos.size(); ++j) { 00714 setCompletion(j, completion); 00715 } 00716 prevCompletion = completion; 00717 } 00718 00719 blockFrame += stepSize; 00720 } 00721 00722 if (!m_abandoned) { 00723 Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures(); 00724 00725 for (int j = 0; j < (int)m_outputNos.size(); ++j) { 00726 for (int fi = 0; fi < (int)features[m_outputNos[j]].size(); ++fi) { 00727 Vamp::Plugin::Feature feature = features[m_outputNos[j]][fi]; 00728 addFeature(j, blockFrame, feature); 00729 } 00730 } 00731 } 00732 00733 for (int j = 0; j < (int)m_outputNos.size(); ++j) { 00734 setCompletion(j, 100); 00735 } 00736 00737 if (frequencyDomain) { 00738 for (int ch = 0; ch < channelCount; ++ch) { 00739 delete fftModels[ch]; 00740 } 00741 delete[] reals; 00742 delete[] imaginaries; 00743 } 00744 00745 for (int ch = 0; ch < channelCount; ++ch) { 00746 delete[] buffers[ch]; 00747 } 00748 delete[] buffers; 00749 } 00750 00751 void 00752 FeatureExtractionModelTransformer::getFrames(int channelCount, 00753 long startFrame, long size, 00754 float **buffers) 00755 { 00756 long offset = 0; 00757 00758 if (startFrame < 0) { 00759 for (int c = 0; c < channelCount; ++c) { 00760 for (int i = 0; i < size && startFrame + i < 0; ++i) { 00761 buffers[c][i] = 0.0f; 00762 } 00763 } 00764 offset = -startFrame; 00765 size -= offset; 00766 if (size <= 0) return; 00767 startFrame = 0; 00768 } 00769 00770 DenseTimeValueModel *input = getConformingInput(); 00771 if (!input) return; 00772 00773 long got = 0; 00774 00775 if (channelCount == 1) { 00776 00777 got = input->getData(m_input.getChannel(), startFrame, size, 00778 buffers[0] + offset); 00779 00780 if (m_input.getChannel() == -1 && input->getChannelCount() > 1) { 00781 // use mean instead of sum, as plugin input 00782 float cc = float(input->getChannelCount()); 00783 for (long i = 0; i < size; ++i) { 00784 buffers[0][i + offset] /= cc; 00785 } 00786 } 00787 00788 } else { 00789 00790 float **writebuf = buffers; 00791 if (offset > 0) { 00792 writebuf = new float *[channelCount]; 00793 for (int i = 0; i < channelCount; ++i) { 00794 writebuf[i] = buffers[i] + offset; 00795 } 00796 } 00797 00798 got = input->getData(0, channelCount-1, startFrame, size, writebuf); 00799 00800 if (writebuf != buffers) delete[] writebuf; 00801 } 00802 00803 while (got < size) { 00804 for (int c = 0; c < channelCount; ++c) { 00805 buffers[c][got + offset] = 0.0; 00806 } 00807 ++got; 00808 } 00809 } 00810 00811 void 00812 FeatureExtractionModelTransformer::addFeature(int n, 00813 int blockFrame, 00814 const Vamp::Plugin::Feature &feature) 00815 { 00816 int inputRate = m_input.getModel()->getSampleRate(); 00817 00818 // cerr << "FeatureExtractionModelTransformer::addFeature: blockFrame = " 00819 // << blockFrame << ", hasTimestamp = " << feature.hasTimestamp 00820 // << ", timestamp = " << feature.timestamp << ", hasDuration = " 00821 // << feature.hasDuration << ", duration = " << feature.duration 00822 // << endl; 00823 00824 int frame = blockFrame; 00825 00826 if (m_descriptors[n]->sampleType == 00827 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { 00828 00829 if (!feature.hasTimestamp) { 00830 cerr 00831 << "WARNING: FeatureExtractionModelTransformer::addFeature: " 00832 << "Feature has variable sample rate but no timestamp!" 00833 << endl; 00834 return; 00835 } else { 00836 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate); 00837 } 00838 00839 } else if (m_descriptors[n]->sampleType == 00840 Vamp::Plugin::OutputDescriptor::FixedSampleRate) { 00841 00842 if (!feature.hasTimestamp) { 00843 ++m_fixedRateFeatureNos[n]; 00844 } else { 00845 RealTime ts(feature.timestamp.sec, feature.timestamp.nsec); 00846 m_fixedRateFeatureNos[n] = 00847 lrint(ts.toDouble() * m_descriptors[n]->sampleRate); 00848 } 00849 00850 // cerr << "m_fixedRateFeatureNo = " << m_fixedRateFeatureNo 00851 // << ", m_descriptor->sampleRate = " << m_descriptor->sampleRate 00852 // << ", inputRate = " << inputRate 00853 // << " giving frame = "; 00854 frame = lrintf((m_fixedRateFeatureNos[n] / m_descriptors[n]->sampleRate) 00855 * int(inputRate)); 00856 } 00857 00858 if (frame < 0) { 00859 cerr 00860 << "WARNING: FeatureExtractionModelTransformer::addFeature: " 00861 << "Negative frame counts are not supported (frame = " << frame 00862 << " from timestamp " << feature.timestamp 00863 << "), dropping feature" 00864 << endl; 00865 return; 00866 } 00867 00868 // Rather than repeat the complicated tests from the constructor 00869 // to determine what sort of model we must be adding the features 00870 // to, we instead test what sort of model the constructor decided 00871 // to create. 00872 00873 if (isOutput<SparseOneDimensionalModel>(n)) { 00874 00875 SparseOneDimensionalModel *model = 00876 getConformingOutput<SparseOneDimensionalModel>(n); 00877 if (!model) return; 00878 00879 model->addPoint(SparseOneDimensionalModel::Point 00880 (frame, feature.label.c_str())); 00881 00882 } else if (isOutput<SparseTimeValueModel>(n)) { 00883 00884 SparseTimeValueModel *model = 00885 getConformingOutput<SparseTimeValueModel>(n); 00886 if (!model) return; 00887 00888 for (int i = 0; i < (int)feature.values.size(); ++i) { 00889 00890 float value = feature.values[i]; 00891 00892 QString label = feature.label.c_str(); 00893 if (feature.values.size() > 1) { 00894 label = QString("[%1] %2").arg(i+1).arg(label); 00895 } 00896 00897 SparseTimeValueModel *targetModel = model; 00898 00899 if (m_needAdditionalModels[n] && i > 0) { 00900 targetModel = getAdditionalModel(n, i); 00901 if (!targetModel) targetModel = model; 00902 // std::cerr << "adding point to model " << targetModel 00903 // << " for output " << n << " bin " << i << std::endl; 00904 } 00905 00906 targetModel->addPoint 00907 (SparseTimeValueModel::Point(frame, value, label)); 00908 } 00909 00910 } else if (isOutput<FlexiNoteModel>(n) || isOutput<NoteModel>(n) || isOutput<RegionModel>(n)) { //GF: Added Note Model 00911 00912 int index = 0; 00913 00914 float value = 0.0; 00915 if ((int)feature.values.size() > index) { 00916 value = feature.values[index++]; 00917 } 00918 00919 float duration = 1; 00920 if (feature.hasDuration) { 00921 duration = Vamp::RealTime::realTime2Frame(feature.duration, inputRate); 00922 } else { 00923 if ((int)feature.values.size() > index) { 00924 duration = feature.values[index++]; 00925 } 00926 } 00927 00928 if (isOutput<FlexiNoteModel>(n)) { // GF: added for flexi note model 00929 00930 float velocity = 100; 00931 if ((int)feature.values.size() > index) { 00932 velocity = feature.values[index++]; 00933 } 00934 if (velocity < 0) velocity = 127; 00935 if (velocity > 127) velocity = 127; 00936 00937 FlexiNoteModel *model = getConformingOutput<FlexiNoteModel>(n); 00938 if (!model) return; 00939 model->addPoint(FlexiNoteModel::Point(frame, value, // value is pitch 00940 lrintf(duration), 00941 velocity / 127.f, 00942 feature.label.c_str())); 00943 // GF: end -- added for flexi note model 00944 } else if (isOutput<NoteModel>(n)) { 00945 00946 float velocity = 100; 00947 if ((int)feature.values.size() > index) { 00948 velocity = feature.values[index++]; 00949 } 00950 if (velocity < 0) velocity = 127; 00951 if (velocity > 127) velocity = 127; 00952 00953 NoteModel *model = getConformingOutput<NoteModel>(n); 00954 if (!model) return; 00955 model->addPoint(NoteModel::Point(frame, value, // value is pitch 00956 lrintf(duration), 00957 velocity / 127.f, 00958 feature.label.c_str())); 00959 } else { 00960 00961 RegionModel *model = getConformingOutput<RegionModel>(n); 00962 if (!model) return; 00963 00964 if (feature.hasDuration && !feature.values.empty()) { 00965 00966 for (int i = 0; i < (int)feature.values.size(); ++i) { 00967 00968 float value = feature.values[i]; 00969 00970 QString label = feature.label.c_str(); 00971 if (feature.values.size() > 1) { 00972 label = QString("[%1] %2").arg(i+1).arg(label); 00973 } 00974 00975 model->addPoint(RegionModel::Point(frame, value, 00976 lrintf(duration), 00977 label)); 00978 } 00979 } else { 00980 00981 model->addPoint(RegionModel::Point(frame, value, 00982 lrintf(duration), 00983 feature.label.c_str())); 00984 } 00985 } 00986 00987 } else if (isOutput<EditableDenseThreeDimensionalModel>(n)) { 00988 00989 DenseThreeDimensionalModel::Column values = 00990 DenseThreeDimensionalModel::Column::fromStdVector(feature.values); 00991 00992 EditableDenseThreeDimensionalModel *model = 00993 getConformingOutput<EditableDenseThreeDimensionalModel>(n); 00994 if (!model) return; 00995 00996 // cerr << "(note: model resolution = " << model->getResolution() << ")" 00997 // << endl; 00998 00999 if (!feature.hasTimestamp && m_fixedRateFeatureNos[n] >= 0) { 01000 model->setColumn(m_fixedRateFeatureNos[n], values); 01001 } else { 01002 model->setColumn(frame / model->getResolution(), values); 01003 } 01004 01005 } else { 01006 SVDEBUG << "FeatureExtractionModelTransformer::addFeature: Unknown output model type!" << endl; 01007 } 01008 } 01009 01010 void 01011 FeatureExtractionModelTransformer::setCompletion(int n, int completion) 01012 { 01013 // SVDEBUG << "FeatureExtractionModelTransformer::setCompletion(" 01014 // << completion << ")" << endl; 01015 01016 if (isOutput<SparseOneDimensionalModel>(n)) { 01017 01018 SparseOneDimensionalModel *model = 01019 getConformingOutput<SparseOneDimensionalModel>(n); 01020 if (!model) return; 01021 if (model->isAbandoning()) abandon(); 01022 model->setCompletion(completion, true); 01023 01024 } else if (isOutput<SparseTimeValueModel>(n)) { 01025 01026 SparseTimeValueModel *model = 01027 getConformingOutput<SparseTimeValueModel>(n); 01028 if (!model) return; 01029 if (model->isAbandoning()) abandon(); 01030 model->setCompletion(completion, true); 01031 01032 } else if (isOutput<NoteModel>(n)) { 01033 01034 NoteModel *model = getConformingOutput<NoteModel>(n); 01035 if (!model) return; 01036 if (model->isAbandoning()) abandon(); 01037 model->setCompletion(completion, true); 01038 01039 } else if (isOutput<FlexiNoteModel>(n)) { 01040 01041 FlexiNoteModel *model = getConformingOutput<FlexiNoteModel>(n); 01042 if (!model) return; 01043 if (model->isAbandoning()) abandon(); 01044 model->setCompletion(completion, true); 01045 01046 } else if (isOutput<RegionModel>(n)) { 01047 01048 RegionModel *model = getConformingOutput<RegionModel>(n); 01049 if (!model) return; 01050 if (model->isAbandoning()) abandon(); 01051 model->setCompletion(completion, true); 01052 01053 } else if (isOutput<EditableDenseThreeDimensionalModel>(n)) { 01054 01055 EditableDenseThreeDimensionalModel *model = 01056 getConformingOutput<EditableDenseThreeDimensionalModel>(n); 01057 if (!model) return; 01058 if (model->isAbandoning()) abandon(); 01059 model->setCompletion(completion, true); 01060 } 01061 } 01062