svcore
1.9
|
00001 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ 00002 00003 /* 00004 Sonic Visualiser 00005 An audio file viewer and annotation editor. 00006 Centre for Digital Music, Queen Mary, University of London. 00007 This file copyright 2006 Chris Cannam. 00008 00009 This program is free software; you can redistribute it and/or 00010 modify it under the terms of the GNU General Public License as 00011 published by the Free Software Foundation; either version 2 of the 00012 License, or (at your option) any later version. See the file 00013 COPYING included with this distribution for more information. 00014 */ 00015 00016 #include "CSVFormat.h" 00017 00018 #include "base/StringBits.h" 00019 00020 #include <QFile> 00021 #include <QString> 00022 #include <QRegExp> 00023 #include <QStringList> 00024 #include <QTextStream> 00025 00026 #include <iostream> 00027 00028 CSVFormat::CSVFormat(QString path) : 00029 m_separator(""), 00030 m_sampleRate(44100), 00031 m_windowSize(1024), 00032 m_allowQuoting(true) 00033 { 00034 guessFormatFor(path); 00035 } 00036 00037 void 00038 CSVFormat::guessFormatFor(QString path) 00039 { 00040 m_modelType = TwoDimensionalModel; 00041 m_timingType = ExplicitTiming; 00042 m_timeUnits = TimeSeconds; 00043 00044 m_maxExampleCols = 0; 00045 m_columnCount = 0; 00046 m_variableColumnCount = false; 00047 00048 m_example.clear(); 00049 m_columnQualities.clear(); 00050 m_columnPurposes.clear(); 00051 m_prevValues.clear(); 00052 00053 QFile file(path); 00054 if (!file.exists()) return; 00055 if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return; 00056 00057 QTextStream in(&file); 00058 in.seek(0); 00059 00060 int lineno = 0; 00061 00062 while (!in.atEnd()) { 00063 00064 // See comment about line endings in CSVFileReader::load() 00065 00066 QString chunk = in.readLine(); 00067 QStringList lines = chunk.split('\r', QString::SkipEmptyParts); 00068 00069 for (int li = 0; li < lines.size(); ++li) { 00070 00071 QString line = lines[li]; 00072 if (line.startsWith("#") || line == "") continue; 00073 00074 guessQualities(line, lineno); 00075 00076 ++lineno; 00077 } 00078 00079 if (lineno >= 50) break; 00080 } 00081 00082 guessPurposes(); 00083 } 00084 00085 void 00086 CSVFormat::guessSeparator(QString line) 00087 { 00088 char candidates[] = { ',', '\t', ' ', '|', '/', ':' }; 00089 for (int i = 0; i < int(sizeof(candidates)/sizeof(candidates[0])); ++i) { 00090 if (StringBits::split(line, candidates[i], m_allowQuoting).size() >= 2) { 00091 m_separator = candidates[i]; 00092 return; 00093 } 00094 } 00095 m_separator = " "; 00096 } 00097 00098 void 00099 CSVFormat::guessQualities(QString line, int lineno) 00100 { 00101 if (m_separator == "") guessSeparator(line); 00102 00103 QStringList list = StringBits::split(line, m_separator[0], m_allowQuoting); 00104 00105 int cols = list.size(); 00106 if (lineno == 0 || (cols < m_columnCount)) m_columnCount = cols; 00107 if (cols != m_columnCount) m_variableColumnCount = true; 00108 00109 // All columns are regarded as having these qualities until we see 00110 // something that indicates otherwise: 00111 00112 ColumnQualities defaultQualities = 00113 ColumnNumeric | ColumnIntegral | ColumnIncreasing; 00114 00115 for (int i = 0; i < cols; ++i) { 00116 00117 while (m_columnQualities.size() <= i) { 00118 m_columnQualities.push_back(defaultQualities); 00119 m_prevValues.push_back(0.f); 00120 } 00121 00122 QString s(list[i]); 00123 bool ok = false; 00124 00125 ColumnQualities qualities = m_columnQualities[i]; 00126 00127 bool numeric = (qualities & ColumnNumeric); 00128 bool integral = (qualities & ColumnIntegral); 00129 bool increasing = (qualities & ColumnIncreasing); 00130 bool large = (qualities & ColumnLarge); // this one defaults to off 00131 00132 float value = 0.f; 00133 00135 00136 if (numeric) { 00137 value = s.toFloat(&ok); 00138 if (!ok) { 00139 value = (float)StringBits::stringToDoubleLocaleFree(s, &ok); 00140 } 00141 if (ok) { 00142 if (lineno < 2 && value > 1000.f) large = true; 00143 } else { 00144 numeric = false; 00145 } 00146 } 00147 00148 if (numeric) { 00149 00150 if (integral) { 00151 if (s.contains('.') || s.contains(',')) { 00152 integral = false; 00153 } 00154 } 00155 00156 if (increasing) { 00157 if (lineno > 0 && value <= m_prevValues[i]) { 00158 increasing = false; 00159 } 00160 } 00161 00162 m_prevValues[i] = value; 00163 } 00164 00165 m_columnQualities[i] = 00166 (numeric ? ColumnNumeric : 0) | 00167 (integral ? ColumnIntegral : 0) | 00168 (increasing ? ColumnIncreasing : 0) | 00169 (large ? ColumnLarge : 0); 00170 } 00171 00172 if (lineno < 10) { 00173 m_example.push_back(list); 00174 if (lineno == 0 || cols > m_maxExampleCols) { 00175 m_maxExampleCols = cols; 00176 } 00177 } 00178 00179 // cerr << "Estimated column qualities: "; 00180 // for (int i = 0; i < m_columnCount; ++i) { 00181 // cerr << int(m_columnQualities[i]) << " "; 00182 // } 00183 // cerr << endl; 00184 } 00185 00186 void 00187 CSVFormat::guessPurposes() 00188 { 00189 m_timingType = CSVFormat::ImplicitTiming; 00190 m_timeUnits = CSVFormat::TimeWindows; 00191 00192 int timingColumnCount = 0; 00193 00194 for (int i = 0; i < m_columnCount; ++i) { 00195 00196 ColumnPurpose purpose = ColumnUnknown; 00197 bool primary = (i == 0); 00198 00199 ColumnQualities qualities = m_columnQualities[i]; 00200 00201 bool numeric = (qualities & ColumnNumeric); 00202 bool integral = (qualities & ColumnIntegral); 00203 bool increasing = (qualities & ColumnIncreasing); 00204 bool large = (qualities & ColumnLarge); 00205 00206 bool timingColumn = (numeric && increasing); 00207 00208 if (timingColumn) { 00209 00210 ++timingColumnCount; 00211 00212 if (primary) { 00213 00214 purpose = ColumnStartTime; 00215 00216 m_timingType = ExplicitTiming; 00217 00218 if (integral && large) { 00219 m_timeUnits = TimeAudioFrames; 00220 } else { 00221 m_timeUnits = TimeSeconds; 00222 } 00223 00224 } else { 00225 00226 if (timingColumnCount == 2 && m_timingType == ExplicitTiming) { 00227 purpose = ColumnEndTime; 00228 } 00229 } 00230 } 00231 00232 if (purpose == ColumnUnknown) { 00233 if (numeric) { 00234 purpose = ColumnValue; 00235 } else { 00236 purpose = ColumnLabel; 00237 } 00238 } 00239 00240 setColumnPurpose(i, purpose); 00241 } 00242 00243 int valueCount = 0; 00244 for (int i = 0; i < m_columnCount; ++i) { 00245 if (m_columnPurposes[i] == ColumnValue) ++valueCount; 00246 } 00247 00248 if (valueCount == 2 && timingColumnCount == 1) { 00249 // If we have exactly two apparent value columns and only one 00250 // timing column, but one value column is integral and the 00251 // other is not, guess that whichever one matches the integral 00252 // status of the time column is either duration or end time 00253 if (m_timingType == ExplicitTiming) { 00254 int a = -1, b = -1; 00255 for (int i = 0; i < m_columnCount; ++i) { 00256 if (m_columnPurposes[i] == ColumnValue) { 00257 if (a == -1) a = i; 00258 else b = i; 00259 } 00260 } 00261 if ((m_columnQualities[a] & ColumnIntegral) != 00262 (m_columnQualities[b] & ColumnIntegral)) { 00263 int timecol = a; 00264 if ((m_columnQualities[a] & ColumnIntegral) != 00265 (m_columnQualities[0] & ColumnIntegral)) { 00266 timecol = b; 00267 } 00268 if (m_columnQualities[timecol] & ColumnIncreasing) { 00269 // This shouldn't happen; should have been settled above 00270 m_columnPurposes[timecol] = ColumnEndTime; 00271 } else { 00272 m_columnPurposes[timecol] = ColumnDuration; 00273 } 00274 --valueCount; 00275 } 00276 } 00277 } 00278 00279 if (timingColumnCount > 1) { 00280 m_modelType = TwoDimensionalModelWithDuration; 00281 } else { 00282 if (valueCount == 0) { 00283 m_modelType = OneDimensionalModel; 00284 } else if (valueCount == 1) { 00285 m_modelType = TwoDimensionalModel; 00286 } else { 00287 m_modelType = ThreeDimensionalModel; 00288 } 00289 } 00290 00291 // cerr << "Estimated column purposes: "; 00292 // for (int i = 0; i < m_columnCount; ++i) { 00293 // cerr << int(m_columnPurposes[i]) << " "; 00294 // } 00295 // cerr << endl; 00296 00297 // cerr << "Estimated model type: " << m_modelType << endl; 00298 // cerr << "Estimated timing type: " << m_timingType << endl; 00299 // cerr << "Estimated units: " << m_timeUnits << endl; 00300 } 00301 00302 CSVFormat::ColumnPurpose 00303 CSVFormat::getColumnPurpose(int i) 00304 { 00305 while (m_columnPurposes.size() <= i) { 00306 m_columnPurposes.push_back(ColumnUnknown); 00307 } 00308 return m_columnPurposes[i]; 00309 } 00310 00311 CSVFormat::ColumnPurpose 00312 CSVFormat::getColumnPurpose(int i) const 00313 { 00314 if (m_columnPurposes.size() <= i) { 00315 return ColumnUnknown; 00316 } 00317 return m_columnPurposes[i]; 00318 } 00319 00320 void 00321 CSVFormat::setColumnPurpose(int i, ColumnPurpose p) 00322 { 00323 while (m_columnPurposes.size() <= i) { 00324 m_columnPurposes.push_back(ColumnUnknown); 00325 } 00326 m_columnPurposes[i] = p; 00327 } 00328 00329 00330 00331