Marsyas
0.6.0-alpha
|
00001 /* 00002 ** Copyright (C) 1998-2010 George Tzanetakis <gtzan@cs.uvic.ca> 00003 ** 00004 ** This program is free software; you can redistribute it and/or modify 00005 ** it under the terms of the GNU General Public License as published by 00006 ** the Free Software Foundation; either version 2 of the License, or 00007 ** (at your option) any later version. 00008 ** 00009 ** This program is distributed in the hope that it will be useful, 00010 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 ** GNU General Public License for more details. 00013 ** 00014 ** You should have received a copy of the GNU General Public License 00015 ** along with this program; if not, write to the Free Software 00016 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00017 */ 00018 00019 #include <marsyas/common_source.h> 00020 #include <marsyas/Collection.h> 00021 #include <algorithm> 00022 #include <iterator> 00023 #include <time.h> // for srand(time(NULL)) 00024 00025 00026 00027 using std::ostringstream; 00028 using std::vector; 00029 using std::ifstream; 00030 using std::ofstream; 00031 using std::ostream_iterator; 00032 using std::endl; 00033 using std::ostream; 00034 using std::istream; 00035 using std::string; 00036 00037 // if the directory doesn't exist, we need to make it a "". 00038 static std::string marsyas_datadir_ = 00039 std::getenv("MARSYAS_DATADIR") == NULL ? 00040 "" : std::getenv("MARSYAS_DATADIR"); 00041 00042 namespace Marsyas 00043 { 00044 00045 // Utility function. Should move this somewhere publicly accessible for code re-use. 00046 mrs_string join(const vector<mrs_string>& v, const mrs_string delim) 00047 { 00048 ostringstream os; 00049 copy(v.begin(), v.end(), ostream_iterator<mrs_string>(os, delim.c_str())); 00050 00051 return os.str(); 00052 } 00053 00054 Collection::Collection() 00055 { 00056 collectionList_.reserve(1024); 00057 hasLabels_ = false; 00058 store_labels_ = true; 00059 // initialize random number generation. 00060 srand( (unsigned int) time( NULL) ); 00061 } 00062 00063 Collection::~Collection() 00064 { 00065 } 00066 00067 void 00068 Collection::setName(mrs_string name) 00069 { 00070 name_ = name; 00071 } 00072 00073 void 00074 Collection::store_labels(mrs_bool store) 00075 { 00076 store_labels_ = store; 00077 } 00078 00079 00080 void 00081 Collection::read(mrs_string filename) 00082 { 00083 ifstream is(filename.c_str()); 00084 name_ = filename.substr(0, filename.rfind(".", filename.length())); 00085 00086 is >> (*this); 00087 } 00088 00089 00090 void 00091 Collection::write(mrs_string filename) 00092 { 00093 ofstream os(filename.c_str()); 00094 os << (*this) << endl; 00095 } 00096 00097 void 00098 Collection::labelAll(mrs_string label) 00099 { 00100 if (hasLabels_ == false) 00101 { 00102 hasLabels_ = true; 00103 labelList_.reserve(collectionList_.size()); 00104 for (mrs_natural i = 0; i < (mrs_natural)collectionList_.size(); ++i) 00105 labelList_.push_back(label); 00106 } 00107 else 00108 { 00109 for (mrs_natural i=0; i < (mrs_natural)collectionList_.size(); ++i) 00110 labelList_[i] = label; 00111 } 00112 } 00113 00114 ostream& 00115 operator<<(ostream& o, const Collection& l) 00116 { 00117 // o << "# MARSYAS Collection " << endl; 00118 // o << "# name = " << l.name_ << endl << endl; 00119 for (mrs_natural i=0; i < (mrs_natural)l.collectionList_.size(); ++i) 00120 { 00121 o << l.collectionList_[i]; 00122 if (l.hasLabels_) 00123 o << "\t" << l.labelList_[i]; 00124 o << endl; 00125 } 00126 //o << endl; 00127 return o; 00128 } 00129 00130 00131 00132 mrs_natural 00133 Collection::size() 00134 { 00135 return (mrs_natural) collectionList_.size(); 00136 } 00137 00138 mrs_natural 00139 Collection::getSize() 00140 { 00141 return (mrs_natural) collectionList_.size(); 00142 } 00143 00144 00145 mrs_string 00146 Collection::name() 00147 { 00148 return name_; 00149 } 00150 00151 void 00152 Collection::add(mrs_string entry) 00153 { 00154 collectionList_.push_back(entry); 00155 hasLabels_ = false; 00156 } 00157 00158 00159 00160 void 00161 Collection::clear() 00162 { 00163 collectionList_.clear(); 00164 labelList_.clear(); 00165 00166 // Do not clear labelNames so that multiple collections 00167 // can share the same label set 00168 // maybe at some point make this behavior controllable 00169 } 00170 00171 00172 void 00173 Collection::add(mrs_string entry, mrs_string label) 00174 { 00175 00176 collectionList_.push_back(entry); 00177 hasLabels_ = true; 00178 labelList_.push_back(label); 00179 00180 if (store_labels_) { 00181 if (find(labelNames_.begin(), labelNames_.end(), label) == labelNames_.end()) { 00182 labelNames_.push_back(label); 00183 } 00184 sort(labelNames_.begin(), labelNames_.end()); 00185 } 00186 00187 } 00188 00189 00190 00191 00192 mrs_natural 00193 Collection::getNumLabels() 00194 { 00195 return (mrs_natural) labelNames_.size(); 00196 } 00197 00198 mrs_string 00199 Collection::labelName(mrs_natural i) 00200 { 00201 if (i >= 0 && i < (mrs_natural)labelNames_.size()) 00202 return labelNames_[i]; 00203 00204 return EMPTYSTRING; 00205 } 00206 00207 mrs_string 00208 Collection::getLabelNames() 00209 { 00210 return join(labelNames_, ","); 00211 } 00212 00213 mrs_bool 00214 Collection::hasLabels() 00215 { 00216 return hasLabels_; 00217 } 00218 00219 void 00220 Collection::shuffle() 00221 { 00222 // Use a Fisher-Yates shuffle 00223 // http://en.wikipedia.org/wiki/Fisher-Yates_shuffle 00224 mrs_natural n = (mrs_natural)collectionList_.size(); 00225 while (n > 1) 00226 { 00227 // Generate a random index in the range [0, n). 00228 mrs_natural k = (mrs_natural)(n * ((mrs_real)rand() / ((mrs_real)(RAND_MAX) + (mrs_real)1))); 00229 00230 n--; 00231 swap(collectionList_[n], collectionList_[k]); 00232 if (hasLabels_) 00233 swap(labelList_[n], labelList_[k]); 00234 } 00235 } 00236 00237 mrs_string 00238 Collection::toLongString() 00239 { 00240 return join(collectionList_, ","); 00241 } 00242 00243 mrs_natural 00244 Collection::labelNum(mrs_string label) 00245 { 00246 00247 vector<mrs_string>::iterator it = find(labelNames_.begin(), labelNames_.end(), label); 00248 if (it == labelNames_.end()) 00249 return -1; 00250 00251 return (mrs_natural) distance(labelNames_.begin(), it); 00252 00253 } 00254 mrs_real 00255 Collection::regression_label(mrs_natural i) 00256 { 00257 if (hasLabels_ && i >= 0 && i < (mrs_natural)labelList_.size()) { 00258 return (mrs_real) atof(labelList_[i].c_str()); 00259 } 00260 return 0.0; 00261 } 00262 00263 mrs_string 00264 Collection::labelEntry(mrs_natural i) 00265 { 00266 if (hasLabels_ && i >= 0 && i < (mrs_natural)labelList_.size()) 00267 return labelList_[i]; 00268 00269 return "No label"; 00270 } 00271 00272 mrs_string 00273 Collection::entry(mrs_natural i) 00274 { 00275 if (i >= 0 && i < (mrs_natural)collectionList_.size()) 00276 return collectionList_[i]; 00277 00278 return mrs_string(); 00279 } 00280 00281 00282 void 00283 Collection::concatenate(vector<Collection> cls) 00284 { 00285 for (mrs_natural cj = 0; cj < (mrs_natural)cls.size(); cj++) 00286 { 00287 Collection l = cls[cj]; 00288 if (l.hasLabels_) 00289 hasLabels_ = true; 00290 00291 for (mrs_natural i = 0; i < l.size(); ++i) 00292 add(l.entry(i), l.labelEntry(i)); 00293 } 00294 } 00295 00296 00297 /* I can't be bothered to think about this myself, so copied from 00298 http://stackoverflow.com/questions/3418231/c-replace-part-of-a-string-with-another-string 00299 -gp */ 00300 bool replace(std::string& str, const std::string& from, const std::string& to) { 00301 string::size_type start_pos = str.find(from); 00302 if(start_pos == std::string::npos) 00303 return false; 00304 str.replace(start_pos, from.length(), to); 00305 return true; 00306 } 00307 00308 00309 istream& 00310 operator>>(istream& i, Collection& l) 00311 { 00312 MRSDIAG("Collection.cpp - operator>>"); 00313 00314 mrs_string fileEntry; 00315 while (getline(i, fileEntry)) 00316 { 00317 // Skip blank lines. 00318 if (fileEntry.empty()) 00319 continue; 00320 00321 // Skip comment lines. 00322 if (fileEntry[0] == '#') 00323 continue; 00324 00325 // Check to see if there is a label. Could use rfind for efficiency 00326 // if we were sure there weren't tabs after the label. 00327 if (marsyas_datadir_.length() > 0) { 00328 replace(fileEntry, "MARSYAS_DATADIR", marsyas_datadir_); 00329 } 00330 mrs_string::size_type loc = fileEntry.find('\t', 0); 00331 if (loc != mrs_string::npos) 00332 { 00333 mrs_string file = fileEntry.substr(0, loc); 00334 mrs_string label = fileEntry.substr(loc+1, fileEntry.size()); 00335 l.add(file, label); 00336 } 00337 else { 00338 l.add(fileEntry); 00339 } 00340 } 00341 00342 return i; 00343 } 00344 }