svcore  1.9
CSVFileReader.cpp
Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
00002 
00003 /*
00004     Sonic Visualiser
00005     An audio file viewer and annotation editor.
00006     Centre for Digital Music, Queen Mary, University of London.
00007     This file copyright 2006 Chris Cannam.
00008     
00009     This program is free software; you can redistribute it and/or
00010     modify it under the terms of the GNU General Public License as
00011     published by the Free Software Foundation; either version 2 of the
00012     License, or (at your option) any later version.  See the file
00013     COPYING included with this distribution for more information.
00014 */
00015 
00016 #include "CSVFileReader.h"
00017 
00018 #include "model/Model.h"
00019 #include "base/RealTime.h"
00020 #include "base/StringBits.h"
00021 #include "model/SparseOneDimensionalModel.h"
00022 #include "model/SparseTimeValueModel.h"
00023 #include "model/EditableDenseThreeDimensionalModel.h"
00024 #include "model/RegionModel.h"
00025 #include "model/NoteModel.h"
00026 #include "DataFileReaderFactory.h"
00027 
00028 #include <QFile>
00029 #include <QString>
00030 #include <QRegExp>
00031 #include <QStringList>
00032 #include <QTextStream>
00033 
00034 #include <iostream>
00035 #include <map>
00036 
00037 CSVFileReader::CSVFileReader(QString path, CSVFormat format,
00038                              int mainModelSampleRate) :
00039     m_format(format),
00040     m_file(0),
00041     m_warnings(0),
00042     m_mainModelSampleRate(mainModelSampleRate)
00043 {
00044     m_file = new QFile(path);
00045     bool good = false;
00046     
00047     if (!m_file->exists()) {
00048         m_error = QFile::tr("File \"%1\" does not exist").arg(path);
00049     } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) {
00050         m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
00051     } else {
00052         good = true;
00053     }
00054 
00055     if (!good) {
00056         delete m_file;
00057         m_file = 0;
00058     }
00059 }
00060 
00061 CSVFileReader::~CSVFileReader()
00062 {
00063     SVDEBUG << "CSVFileReader::~CSVFileReader: file is " << m_file << endl;
00064 
00065     if (m_file) {
00066         SVDEBUG << "CSVFileReader::CSVFileReader: Closing file" << endl;
00067         m_file->close();
00068     }
00069     delete m_file;
00070 }
00071 
00072 bool
00073 CSVFileReader::isOK() const
00074 {
00075     return (m_file != 0);
00076 }
00077 
00078 QString
00079 CSVFileReader::getError() const
00080 {
00081     return m_error;
00082 }
00083 
00084 int
00085 CSVFileReader::convertTimeValue(QString s, int lineno, int sampleRate,
00086                                 int windowSize) const
00087 {
00088     QRegExp nonNumericRx("[^0-9eE.,+-]");
00089     int warnLimit = 10;
00090 
00091     CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
00092 
00093     int calculatedFrame = 0;
00094 
00095     bool ok = false;
00096     QString numeric = s;
00097     numeric.remove(nonNumericRx);
00098     
00099     if (timeUnits == CSVFormat::TimeSeconds) {
00100 
00101         double time = numeric.toDouble(&ok);
00102         if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
00103         calculatedFrame = int(time * sampleRate + 0.5);
00104         
00105     } else {
00106         
00107         long n = numeric.toLong(&ok);
00108         if (n >= 0) calculatedFrame = n;
00109         
00110         if (timeUnits == CSVFormat::TimeWindows) {
00111             calculatedFrame *= windowSize;
00112         }
00113     }
00114     
00115     if (!ok) {
00116         if (m_warnings < warnLimit) {
00117             cerr << "WARNING: CSVFileReader::load: "
00118                       << "Bad time format (\"" << s
00119                       << "\") in data line "
00120                       << lineno+1 << endl;
00121         } else if (m_warnings == warnLimit) {
00122             cerr << "WARNING: Too many warnings" << endl;
00123         }
00124         ++m_warnings;
00125     }
00126 
00127     return calculatedFrame;
00128 }
00129 
00130 Model *
00131 CSVFileReader::load() const
00132 {
00133     if (!m_file) return 0;
00134 
00135     CSVFormat::ModelType modelType = m_format.getModelType();
00136     CSVFormat::TimingType timingType = m_format.getTimingType();
00137     CSVFormat::TimeUnits timeUnits = m_format.getTimeUnits();
00138     int sampleRate = m_format.getSampleRate();
00139     int windowSize = m_format.getWindowSize();
00140     QChar separator = m_format.getSeparator();
00141     bool allowQuoting = m_format.getAllowQuoting();
00142 
00143     if (timingType == CSVFormat::ExplicitTiming) {
00144         if (modelType == CSVFormat::ThreeDimensionalModel) {
00145             // This will be overridden later if more than one line
00146             // appears in our file, but we want to choose a default
00147             // that's likely to be visible
00148             windowSize = 1024;
00149         } else {
00150             windowSize = 1;
00151         }
00152         if (timeUnits == CSVFormat::TimeSeconds) {
00153             sampleRate = m_mainModelSampleRate;
00154         }
00155     }
00156 
00157     SparseOneDimensionalModel *model1 = 0;
00158     SparseTimeValueModel *model2 = 0;
00159     RegionModel *model2a = 0;
00160     NoteModel *model2b = 0;
00161     EditableDenseThreeDimensionalModel *model3 = 0;
00162     Model *model = 0;
00163 
00164     QTextStream in(m_file);
00165     in.seek(0);
00166 
00167     unsigned int warnings = 0, warnLimit = 10;
00168     unsigned int lineno = 0;
00169 
00170     float min = 0.0, max = 0.0;
00171 
00172     int frameNo = 0;
00173     int duration = 0;
00174     int endFrame = 0;
00175 
00176     bool haveAnyValue = false;
00177     bool haveEndTime = false;
00178     bool pitchLooksLikeMIDI = true;
00179 
00180     int startFrame = 0; // for calculation of dense model resolution
00181     bool firstEverValue = true;
00182 
00183     std::map<QString, int> labelCountMap;
00184     
00185     int valueColumns = 0;
00186     for (int i = 0; i < m_format.getColumnCount(); ++i) {
00187         if (m_format.getColumnPurpose(i) == CSVFormat::ColumnValue) {
00188             ++valueColumns;
00189         }
00190     }
00191 
00192     while (!in.atEnd()) {
00193 
00194         // QTextStream's readLine doesn't cope with old-style Mac
00195         // CR-only line endings.  Why did they bother making the class
00196         // cope with more than one sort of line ending, if it still
00197         // can't be configured to cope with all the common sorts?
00198 
00199         // For the time being we'll deal with this case (which is
00200         // relatively uncommon for us, but still necessary to handle)
00201         // by reading the entire file using a single readLine, and
00202         // splitting it.  For CR and CR/LF line endings this will just
00203         // read a line at a time, and that's obviously OK.
00204 
00205         QString chunk = in.readLine();
00206         QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
00207         
00208         for (int li = 0; li < lines.size(); ++li) {
00209 
00210             QString line = lines[li];
00211 
00212             if (line.startsWith("#")) continue;
00213 
00214             QStringList list = StringBits::split(line, separator, allowQuoting);
00215             if (!model) {
00216 
00217                 switch (modelType) {
00218 
00219                 case CSVFormat::OneDimensionalModel:
00220                     model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
00221                     model = model1;
00222                     break;
00223                 
00224                 case CSVFormat::TwoDimensionalModel:
00225                     model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
00226                     model = model2;
00227                     break;
00228                 
00229                 case CSVFormat::TwoDimensionalModelWithDuration:
00230                     model2a = new RegionModel(sampleRate, windowSize, false);
00231                     model = model2a;
00232                     break;
00233                 
00234                 case CSVFormat::TwoDimensionalModelWithDurationAndPitch:
00235                     model2b = new NoteModel(sampleRate, windowSize, false);
00236                     model = model2b;
00237                     break;
00238                 
00239                 case CSVFormat::ThreeDimensionalModel:
00240                     model3 = new EditableDenseThreeDimensionalModel
00241                         (sampleRate,
00242                          windowSize,
00243                          valueColumns,
00244                          EditableDenseThreeDimensionalModel::NoCompression);
00245                     model = model3;
00246                     break;
00247                 }
00248             }
00249 
00250             float value = 0.f;
00251             float pitch = 0.f;
00252             QString label = "";
00253 
00254             duration = 0.f;
00255             haveEndTime = false;
00256 
00257             for (int i = 0; i < list.size(); ++i) {
00258 
00259                 QString s = list[i];
00260 
00261                 CSVFormat::ColumnPurpose purpose = m_format.getColumnPurpose(i);
00262 
00263                 switch (purpose) {
00264 
00265                 case CSVFormat::ColumnUnknown:
00266                     break;
00267 
00268                 case CSVFormat::ColumnStartTime:
00269                     frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
00270                     break;
00271                 
00272                 case CSVFormat::ColumnEndTime:
00273                     endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
00274                     haveEndTime = true;
00275                     break;
00276 
00277                 case CSVFormat::ColumnDuration:
00278                     duration = convertTimeValue(s, lineno, sampleRate, windowSize);
00279                     break;
00280 
00281                 case CSVFormat::ColumnValue:
00282                     value = s.toFloat();
00283                     haveAnyValue = true;
00284                     break;
00285 
00286                 case CSVFormat::ColumnPitch:
00287                     pitch = s.toFloat();
00288                     if (pitch < 0.f || pitch > 127.f) {
00289                         pitchLooksLikeMIDI = false;
00290                     }
00291                     break;
00292 
00293                 case CSVFormat::ColumnLabel:
00294                     label = s;
00295                     ++labelCountMap[label];
00296                     break;
00297                 }
00298             }
00299 
00300             if (haveEndTime) { // ... calculate duration now all cols read
00301                 if (endFrame > frameNo) {
00302                     duration = endFrame - frameNo;
00303                 }
00304             }
00305 
00306             if (modelType == CSVFormat::OneDimensionalModel) {
00307             
00308                 SparseOneDimensionalModel::Point point(frameNo, label);
00309                 model1->addPoint(point);
00310 
00311             } else if (modelType == CSVFormat::TwoDimensionalModel) {
00312 
00313                 SparseTimeValueModel::Point point(frameNo, value, label);
00314                 model2->addPoint(point);
00315 
00316             } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
00317 
00318                 RegionModel::Point point(frameNo, value, duration, label);
00319                 model2a->addPoint(point);
00320 
00321             } else if (modelType == CSVFormat::TwoDimensionalModelWithDurationAndPitch) {
00322 
00323                 float level = ((value >= 0.f && value <= 1.f) ? value : 1.f);
00324                 NoteModel::Point point(frameNo, pitch, duration, level, label);
00325                 model2b->addPoint(point);
00326 
00327             } else if (modelType == CSVFormat::ThreeDimensionalModel) {
00328 
00329                 DenseThreeDimensionalModel::Column values;
00330 
00331                 for (int i = 0; i < list.size(); ++i) {
00332 
00333                     if (m_format.getColumnPurpose(i) != CSVFormat::ColumnValue) {
00334                         continue;
00335                     }
00336 
00337                     bool ok = false;
00338                     float value = list[i].toFloat(&ok);
00339 
00340                     values.push_back(value);
00341             
00342                     if (firstEverValue || value < min) min = value;
00343                     if (firstEverValue || value > max) max = value;
00344                     
00345                     if (firstEverValue) {
00346                         startFrame = frameNo;
00347                         model3->setStartFrame(startFrame);
00348                     } else if (lineno == 1 &&
00349                                timingType == CSVFormat::ExplicitTiming) {
00350                         model3->setResolution(frameNo - startFrame);
00351                     }
00352                     
00353                     firstEverValue = false;
00354 
00355                     if (!ok) {
00356                         if (warnings < warnLimit) {
00357                             cerr << "WARNING: CSVFileReader::load: "
00358                                       << "Non-numeric value \""
00359                                       << list[i]
00360                                       << "\" in data line " << lineno+1
00361                                       << ":" << endl;
00362                             cerr << line << endl;
00363                             ++warnings;
00364                         } else if (warnings == warnLimit) {
00365 //                            cerr << "WARNING: Too many warnings" << endl;
00366                         }
00367                     }
00368                 }
00369         
00370 //                SVDEBUG << "Setting bin values for count " << lineno << ", frame "
00371 //                          << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl;
00372 
00373                 model3->setColumn(lineno, values);
00374             }
00375 
00376             ++lineno;
00377             if (timingType == CSVFormat::ImplicitTiming ||
00378                 list.size() == 0) {
00379                 frameNo += windowSize;
00380             }
00381         }
00382     }
00383 
00384     if (!haveAnyValue) {
00385         if (model2a) {
00386             // assign values for regions based on label frequency; we
00387             // have this in our labelCountMap, sort of
00388 
00389             std::map<int, std::map<QString, float> > countLabelValueMap;
00390             for (std::map<QString, int>::iterator i = labelCountMap.begin();
00391                  i != labelCountMap.end(); ++i) {
00392                 countLabelValueMap[i->second][i->first] = 0.f;
00393             }
00394 
00395             float v = 0.f;
00396             for (std::map<int, std::map<QString, float> >::iterator i =
00397                      countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
00398                 --i;
00399                 for (std::map<QString, float>::iterator j = i->second.begin();
00400                      j != i->second.end(); ++j) {
00401                     j->second = v;
00402                     v = v + 1.f;
00403                 }
00404             }
00405 
00406             std::map<RegionModel::Point, RegionModel::Point,
00407                 RegionModel::Point::Comparator> pointMap;
00408             for (RegionModel::PointList::const_iterator i =
00409                      model2a->getPoints().begin();
00410                  i != model2a->getPoints().end(); ++i) {
00411                 RegionModel::Point p(*i);
00412                 v = countLabelValueMap[labelCountMap[p.label]][p.label];
00413                 RegionModel::Point pp(p.frame, v, p.duration, p.label);
00414                 pointMap[p] = pp;
00415             }
00416 
00417             for (std::map<RegionModel::Point, RegionModel::Point>::iterator i = 
00418                      pointMap.begin(); i != pointMap.end(); ++i) {
00419                 model2a->deletePoint(i->first);
00420                 model2a->addPoint(i->second);
00421             }
00422         }
00423     }
00424                 
00425     if (model2b) {
00426         if (pitchLooksLikeMIDI) {
00427             model2b->setScaleUnits("MIDI Pitch");
00428         } else {
00429             model2b->setScaleUnits("Hz");
00430         }
00431     }
00432 
00433     if (model3) {
00434         model3->setMinimumLevel(min);
00435         model3->setMaximumLevel(max);
00436     }
00437 
00438     return model;
00439 }
00440