svcore
1.9
|
00001 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ 00002 00003 /* 00004 Sonic Visualiser 00005 An audio file viewer and annotation editor. 00006 Centre for Digital Music, Queen Mary, University of London. 00007 This file copyright 2006 Chris Cannam. 00008 00009 This program is free software; you can redistribute it and/or 00010 modify it under the terms of the GNU General Public License as 00011 published by the Free Software Foundation; either version 2 of the 00012 License, or (at your option) any later version. See the file 00013 COPYING included with this distribution for more information. 00014 */ 00015 00016 #include "CSVFileReader.h" 00017 00018 #include "model/Model.h" 00019 #include "base/RealTime.h" 00020 #include "base/StringBits.h" 00021 #include "model/SparseOneDimensionalModel.h" 00022 #include "model/SparseTimeValueModel.h" 00023 #include "model/EditableDenseThreeDimensionalModel.h" 00024 #include "model/RegionModel.h" 00025 #include "model/NoteModel.h" 00026 #include "DataFileReaderFactory.h" 00027 00028 #include <QFile> 00029 #include <QString> 00030 #include <QRegExp> 00031 #include <QStringList> 00032 #include <QTextStream> 00033 00034 #include <iostream> 00035 #include <map> 00036 00037 CSVFileReader::CSVFileReader(QString path, CSVFormat format, 00038 int mainModelSampleRate) : 00039 m_format(format), 00040 m_file(0), 00041 m_warnings(0), 00042 m_mainModelSampleRate(mainModelSampleRate) 00043 { 00044 m_file = new QFile(path); 00045 bool good = false; 00046 00047 if (!m_file->exists()) { 00048 m_error = QFile::tr("File \"%1\" does not exist").arg(path); 00049 } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) { 00050 m_error = QFile::tr("Failed to open file \"%1\"").arg(path); 00051 } else { 00052 good = true; 00053 } 00054 00055 if (!good) { 00056 delete m_file; 00057 m_file = 0; 00058 } 00059 } 00060 00061 CSVFileReader::~CSVFileReader() 00062 { 00063 SVDEBUG << "CSVFileReader::~CSVFileReader: file is " << m_file << endl; 00064 00065 if (m_file) { 00066 SVDEBUG << "CSVFileReader::CSVFileReader: Closing file" << endl; 00067 m_file->close(); 00068 } 00069 delete m_file; 00070 } 00071 00072 bool 00073 CSVFileReader::isOK() const 00074 { 00075 return (m_file != 0); 00076 } 00077 00078 QString 00079 CSVFileReader::getError() const 00080 { 00081 return m_error; 00082 } 00083 00084 int 00085 CSVFileReader::convertTimeValue(QString s, int lineno, int sampleRate, 00086 int windowSize) const 00087 { 00088 QRegExp nonNumericRx("[^0-9eE.,+-]"); 00089 int warnLimit = 10; 00090 00091 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits(); 00092 00093 int calculatedFrame = 0; 00094 00095 bool ok = false; 00096 QString numeric = s; 00097 numeric.remove(nonNumericRx); 00098 00099 if (timeUnits == CSVFormat::TimeSeconds) { 00100 00101 double time = numeric.toDouble(&ok); 00102 if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok); 00103 calculatedFrame = int(time * sampleRate + 0.5); 00104 00105 } else { 00106 00107 long n = numeric.toLong(&ok); 00108 if (n >= 0) calculatedFrame = n; 00109 00110 if (timeUnits == CSVFormat::TimeWindows) { 00111 calculatedFrame *= windowSize; 00112 } 00113 } 00114 00115 if (!ok) { 00116 if (m_warnings < warnLimit) { 00117 cerr << "WARNING: CSVFileReader::load: " 00118 << "Bad time format (\"" << s 00119 << "\") in data line " 00120 << lineno+1 << endl; 00121 } else if (m_warnings == warnLimit) { 00122 cerr << "WARNING: Too many warnings" << endl; 00123 } 00124 ++m_warnings; 00125 } 00126 00127 return calculatedFrame; 00128 } 00129 00130 Model * 00131 CSVFileReader::load() const 00132 { 00133 if (!m_file) return 0; 00134 00135 CSVFormat::ModelType modelType = m_format.getModelType(); 00136 CSVFormat::TimingType timingType = m_format.getTimingType(); 00137 CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits(); 00138 int sampleRate = m_format.getSampleRate(); 00139 int windowSize = m_format.getWindowSize(); 00140 QChar separator = m_format.getSeparator(); 00141 bool allowQuoting = m_format.getAllowQuoting(); 00142 00143 if (timingType == CSVFormat::ExplicitTiming) { 00144 if (modelType == CSVFormat::ThreeDimensionalModel) { 00145 // This will be overridden later if more than one line 00146 // appears in our file, but we want to choose a default 00147 // that's likely to be visible 00148 windowSize = 1024; 00149 } else { 00150 windowSize = 1; 00151 } 00152 if (timeUnits == CSVFormat::TimeSeconds) { 00153 sampleRate = m_mainModelSampleRate; 00154 } 00155 } 00156 00157 SparseOneDimensionalModel *model1 = 0; 00158 SparseTimeValueModel *model2 = 0; 00159 RegionModel *model2a = 0; 00160 NoteModel *model2b = 0; 00161 EditableDenseThreeDimensionalModel *model3 = 0; 00162 Model *model = 0; 00163 00164 QTextStream in(m_file); 00165 in.seek(0); 00166 00167 unsigned int warnings = 0, warnLimit = 10; 00168 unsigned int lineno = 0; 00169 00170 float min = 0.0, max = 0.0; 00171 00172 int frameNo = 0; 00173 int duration = 0; 00174 int endFrame = 0; 00175 00176 bool haveAnyValue = false; 00177 bool haveEndTime = false; 00178 bool pitchLooksLikeMIDI = true; 00179 00180 int startFrame = 0; // for calculation of dense model resolution 00181 bool firstEverValue = true; 00182 00183 std::map<QString, int> labelCountMap; 00184 00185 int valueColumns = 0; 00186 for (int i = 0; i < m_format.getColumnCount(); ++i) { 00187 if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) { 00188 ++valueColumns; 00189 } 00190 } 00191 00192 while (!in.atEnd()) { 00193 00194 // QTextStream's readLine doesn't cope with old-style Mac 00195 // CR-only line endings. Why did they bother making the class 00196 // cope with more than one sort of line ending, if it still 00197 // can't be configured to cope with all the common sorts? 00198 00199 // For the time being we'll deal with this case (which is 00200 // relatively uncommon for us, but still necessary to handle) 00201 // by reading the entire file using a single readLine, and 00202 // splitting it. For CR and CR/LF line endings this will just 00203 // read a line at a time, and that's obviously OK. 00204 00205 QString chunk = in.readLine(); 00206 QStringList lines = chunk.split('\r', QString::SkipEmptyParts); 00207 00208 for (int li = 0; li < lines.size(); ++li) { 00209 00210 QString line = lines[li]; 00211 00212 if (line.startsWith("#")) continue; 00213 00214 QStringList list = StringBits::split(line, separator, allowQuoting); 00215 if (!model) { 00216 00217 switch (modelType) { 00218 00219 case CSVFormat::OneDimensionalModel: 00220 model1 = new SparseOneDimensionalModel(sampleRate, windowSize); 00221 model = model1; 00222 break; 00223 00224 case CSVFormat::TwoDimensionalModel: 00225 model2 = new SparseTimeValueModel(sampleRate, windowSize, false); 00226 model = model2; 00227 break; 00228 00229 case CSVFormat::TwoDimensionalModelWithDuration: 00230 model2a = new RegionModel(sampleRate, windowSize, false); 00231 model = model2a; 00232 break; 00233 00234 case CSVFormat::TwoDimensionalModelWithDurationAndPitch: 00235 model2b = new NoteModel(sampleRate, windowSize, false); 00236 model = model2b; 00237 break; 00238 00239 case CSVFormat::ThreeDimensionalModel: 00240 model3 = new EditableDenseThreeDimensionalModel 00241 (sampleRate, 00242 windowSize, 00243 valueColumns, 00244 EditableDenseThreeDimensionalModel::NoCompression); 00245 model = model3; 00246 break; 00247 } 00248 } 00249 00250 float value = 0.f; 00251 float pitch = 0.f; 00252 QString label = ""; 00253 00254 duration = 0.f; 00255 haveEndTime = false; 00256 00257 for (int i = 0; i < list.size(); ++i) { 00258 00259 QString s = list[i]; 00260 00261 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i); 00262 00263 switch (purpose) { 00264 00265 case CSVFormat::ColumnUnknown: 00266 break; 00267 00268 case CSVFormat::ColumnStartTime: 00269 frameNo = convertTimeValue(s, lineno, sampleRate, windowSize); 00270 break; 00271 00272 case CSVFormat::ColumnEndTime: 00273 endFrame = convertTimeValue(s, lineno, sampleRate, windowSize); 00274 haveEndTime = true; 00275 break; 00276 00277 case CSVFormat::ColumnDuration: 00278 duration = convertTimeValue(s, lineno, sampleRate, windowSize); 00279 break; 00280 00281 case CSVFormat::ColumnValue: 00282 value = s.toFloat(); 00283 haveAnyValue = true; 00284 break; 00285 00286 case CSVFormat::ColumnPitch: 00287 pitch = s.toFloat(); 00288 if (pitch < 0.f || pitch > 127.f) { 00289 pitchLooksLikeMIDI = false; 00290 } 00291 break; 00292 00293 case CSVFormat::ColumnLabel: 00294 label = s; 00295 ++labelCountMap[label]; 00296 break; 00297 } 00298 } 00299 00300 if (haveEndTime) { // ... calculate duration now all cols read 00301 if (endFrame > frameNo) { 00302 duration = endFrame - frameNo; 00303 } 00304 } 00305 00306 if (modelType == CSVFormat::OneDimensionalModel) { 00307 00308 SparseOneDimensionalModel::Point point(frameNo, label); 00309 model1->addPoint(point); 00310 00311 } else if (modelType == CSVFormat::TwoDimensionalModel) { 00312 00313 SparseTimeValueModel::Point point(frameNo, value, label); 00314 model2->addPoint(point); 00315 00316 } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) { 00317 00318 RegionModel::Point point(frameNo, value, duration, label); 00319 model2a->addPoint(point); 00320 00321 } else if (modelType == CSVFormat::TwoDimensionalModelWithDurationAndPitch) { 00322 00323 float level = ((value >= 0.f && value <= 1.f) ? value : 1.f); 00324 NoteModel::Point point(frameNo, pitch, duration, level, label); 00325 model2b->addPoint(point); 00326 00327 } else if (modelType == CSVFormat::ThreeDimensionalModel) { 00328 00329 DenseThreeDimensionalModel::Column values; 00330 00331 for (int i = 0; i < list.size(); ++i) { 00332 00333 if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) { 00334 continue; 00335 } 00336 00337 bool ok = false; 00338 float value = list[i].toFloat(&ok); 00339 00340 values.push_back(value); 00341 00342 if (firstEverValue || value < min) min = value; 00343 if (firstEverValue || value > max) max = value; 00344 00345 if (firstEverValue) { 00346 startFrame = frameNo; 00347 model3->setStartFrame(startFrame); 00348 } else if (lineno == 1 && 00349 timingType == CSVFormat::ExplicitTiming) { 00350 model3->setResolution(frameNo - startFrame); 00351 } 00352 00353 firstEverValue = false; 00354 00355 if (!ok) { 00356 if (warnings < warnLimit) { 00357 cerr << "WARNING: CSVFileReader::load: " 00358 << "Non-numeric value \"" 00359 << list[i] 00360 << "\" in data line " << lineno+1 00361 << ":" << endl; 00362 cerr << line << endl; 00363 ++warnings; 00364 } else if (warnings == warnLimit) { 00365 // cerr << "WARNING: Too many warnings" << endl; 00366 } 00367 } 00368 } 00369 00370 // SVDEBUG << "Setting bin values for count " << lineno << ", frame " 00371 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl; 00372 00373 model3->setColumn(lineno, values); 00374 } 00375 00376 ++lineno; 00377 if (timingType == CSVFormat::ImplicitTiming || 00378 list.size() == 0) { 00379 frameNo += windowSize; 00380 } 00381 } 00382 } 00383 00384 if (!haveAnyValue) { 00385 if (model2a) { 00386 // assign values for regions based on label frequency; we 00387 // have this in our labelCountMap, sort of 00388 00389 std::map<int, std::map<QString, float> > countLabelValueMap; 00390 for (std::map<QString, int>::iterator i = labelCountMap.begin(); 00391 i != labelCountMap.end(); ++i) { 00392 countLabelValueMap[i->second][i->first] = 0.f; 00393 } 00394 00395 float v = 0.f; 00396 for (std::map<int, std::map<QString, float> >::iterator i = 00397 countLabelValueMap.end(); i != countLabelValueMap.begin(); ) { 00398 --i; 00399 for (std::map<QString, float>::iterator j = i->second.begin(); 00400 j != i->second.end(); ++j) { 00401 j->second = v; 00402 v = v + 1.f; 00403 } 00404 } 00405 00406 std::map<RegionModel::Point, RegionModel::Point, 00407 RegionModel::Point::Comparator> pointMap; 00408 for (RegionModel::PointList::const_iterator i = 00409 model2a->getPoints().begin(); 00410 i != model2a->getPoints().end(); ++i) { 00411 RegionModel::Point p(*i); 00412 v = countLabelValueMap[labelCountMap[p.label]][p.label]; 00413 RegionModel::Point pp(p.frame, v, p.duration, p.label); 00414 pointMap[p] = pp; 00415 } 00416 00417 for (std::map<RegionModel::Point, RegionModel::Point>::iterator i = 00418 pointMap.begin(); i != pointMap.end(); ++i) { 00419 model2a->deletePoint(i->first); 00420 model2a->addPoint(i->second); 00421 } 00422 } 00423 } 00424 00425 if (model2b) { 00426 if (pitchLooksLikeMIDI) { 00427 model2b->setScaleUnits("MIDI Pitch"); 00428 } else { 00429 model2b->setScaleUnits("Hz"); 00430 } 00431 } 00432 00433 if (model3) { 00434 model3->setMinimumLevel(min); 00435 model3->setMaximumLevel(max); 00436 } 00437 00438 return model; 00439 } 00440