Marsyas
0.6.0-alpha
|
00001 /* 00002 ** Copyright (C) 1998-2010 George Tzanetakis <gtzan@cs.uvic.ca> 00003 ** 00004 ** This program is free software; you can redistribute it and/or modify 00005 ** it under the terms of the GNU General Public License as published by 00006 ** the Free Software Foundation; either version 2 of the License, or 00007 ** (at your option) any later version. 00008 ** 00009 ** This program is distributed in the hope that it will be useful, 00010 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 ** GNU General Public License for more details. 00013 ** 00014 ** You should have received a copy of the GNU General Public License 00015 ** along with this program; if not, write to the Free Software 00016 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00017 */ 00018 00019 #include "GMMClassifier.h" 00020 #include "../common_source.h" 00021 #include <marsyas/NumericLib.h> 00022 00023 00024 using std::ostringstream; 00025 using std::vector; 00026 00027 using namespace Marsyas; 00028 00029 GMMClassifier::GMMClassifier(mrs_string name):MarSystem("GMMClassifier",name) 00030 { 00031 prev_mode_= "predict"; 00032 classSize_ = -1; 00033 featSize_ = -1; 00034 nMixtures_ = -1; 00035 addControls(); 00036 } 00037 00038 00039 GMMClassifier::GMMClassifier(const GMMClassifier& a):MarSystem(a) 00040 { 00041 ctrl_mode_ = getctrl("mrs_string/mode"); 00042 ctrl_nClasses_ = getctrl("mrs_natural/nClasses"); 00043 ctrl_nMixtures_ = getctrl("mrs_natural/nMixtures"); 00044 ctrl_iterations_ = getctrl("mrs_natural/iterations"); 00045 ctrl_kiterations_ = getctrl("mrs_natural/kiterations"); 00046 ctrl_eiterations_ = getctrl("mrs_natural/eiterations"); 00047 00048 prev_mode_ = "predict"; 00049 classSize_ = -1; 00050 featSize_ = -1; 00051 nMixtures_ = -1; 00052 } 00053 00054 00055 GMMClassifier::~GMMClassifier() 00056 { 00057 } 00058 00059 00060 MarSystem* 00061 GMMClassifier::clone() const 00062 { 00063 return new GMMClassifier(*this); 00064 } 00065 00066 void 00067 GMMClassifier::addControls() 00068 { 00069 addctrl("mrs_string/mode", "train", ctrl_mode_); 00070 ctrl_mode_->setState(true); 00071 00072 addctrl("mrs_natural/nClasses", -1, ctrl_nClasses_); 00073 ctrl_nClasses_->setState(true); 00074 00075 addctrl("mrs_natural/nMixtures", -1, ctrl_nMixtures_); 00076 ctrl_nMixtures_->setState(true); 00077 00078 addctrl("mrs_natural/iterations", 200, ctrl_iterations_); 00079 addctrl("mrs_natural/kiterations", 100, ctrl_kiterations_); 00080 addctrl("mrs_natural/eiterations", 20, ctrl_eiterations_); 00081 } 00082 00083 void 00084 GMMClassifier::initialize() 00085 { 00086 mrs_natural trainSize = trainMatrix_.getCols(); 00087 00088 realvec temp(featSize_); 00089 realvec randstep(featSize_); 00090 00091 mrs_natural seedSize = 5; //FIXME: hardcoded; change to a control? 00092 mrs_real rind; 00093 rind = ((mrs_real)rand() / (mrs_real)(RAND_MAX))*trainSize; 00094 00095 for (mrs_natural cl=0; cl < classSize_; cl++) 00096 { 00097 for (mrs_natural k=0; k < nMixtures_; k++) 00098 { 00100 // Compute feature Means for current class and mixture 00102 temp.setval(0.0); 00103 for (mrs_natural c=0; c < seedSize; ++c) 00104 { 00105 //randomly select a number of feat.vectors from a class 00106 rind = ((mrs_real)rand() / (mrs_real)(RAND_MAX))*trainSize; 00107 while (trainMatrix_(labelRow_,(mrs_natural)rind)!= cl) 00108 rind = ((mrs_real)rand() / (mrs_real)(RAND_MAX))*trainSize; 00109 00110 //accumulate randomly selected feature vector into temp realvec 00111 for(mrs_natural f=0; f < featSize_; ++f) 00112 temp(f) += trainMatrix_(f, (mrs_natural)rind); 00113 } 00114 temp /= seedSize; //compute mean 00115 //store result for current class and mixture 00116 for(mrs_natural f=0; f < featSize_; ++f) 00117 means_[cl](f, k) = temp(f); 00118 00120 // Compute feature Variances for current class and mixture 00122 // count the number of examples of this class in the training matrix 00123 mrs_natural classExamples = 0; 00124 vector<mrs_natural> classCols; 00125 for(mrs_natural c=0; c < trainSize; ++c) 00126 if(trainMatrix_(labelRow_, c) == cl) 00127 { 00128 classExamples++; 00129 classCols.push_back(c); 00130 } 00131 00132 // copy all feature vector from this class into a temp matrix 00133 // so we can compute some statistics (i.e. variance) 00134 realvec classFeatMatrix(featSize_, classExamples); 00135 for(mrs_natural c=0; c < classExamples; ++c) 00136 { 00137 for(mrs_natural f=0; f < featSize_; ++f) 00138 classFeatMatrix(f, c) = trainMatrix_(f, classCols[c]); 00139 } 00140 00141 //compute variance of the features (i.e. observations) 00142 classFeatMatrix.varObs(temp); 00143 00144 //store result for current class and mixture 00145 for(mrs_natural f=0; f < featSize_; ++f) 00146 vars_[cl](f, k) = temp(f); 00147 } 00148 00150 // Compute feature Covariances for current class and mixture 00152 for (mrs_natural k=0; k < nMixtures_; k++) 00153 for (mrs_natural f=0; f < featSize_; f++) 00154 { 00155 if (vars_[cl](f,k) != 0.0) 00156 covars_[cl](f,k) = 1.0 / vars_[cl](f,k); 00157 else 00158 covars_[cl](f,k) = 0.0; 00159 } 00160 00162 // Set initial weights for current class and mixture 00164 weights_[cl].setval(1.0 / nMixtures_); 00165 } 00166 00168 // Perform K-Means 00170 mrs_real dist = 0.0; 00171 mrs_natural min_k = 0; 00172 00173 likelihoods_.create(classSize_, nMixtures_); 00174 00175 for (mrs_natural i=0; i < kiterations_; ++i) 00176 { 00177 likelihoods_.setval(0.0); 00178 00179 //init omeans_ with the values of means_ 00180 for (mrs_natural cl = 0; cl < classSize_; cl++) 00181 for (mrs_natural k=0; k < nMixtures_; k++) 00182 for (mrs_natural f=0; f < featSize_; f++) 00183 { 00184 omeans_[cl](f,k) = means_[cl](f,k); 00185 } 00186 00187 // set each class's means to zero (for all mixtures) 00188 for (mrs_natural cl=0; cl < classSize_; cl++) 00189 { 00190 means_[cl].setval(0.0); 00191 } 00192 00193 //for all the feature vectors (i.e. examples) in the trainMatrix... 00194 for (mrs_natural c=0; c < trainSize; ++c) 00195 { 00196 mrs_real min = 100000000; 00197 00198 //get this feature vector class label 00199 mrs_natural cl = (mrs_natural)trainMatrix_(labelRow_, c); 00200 trainMatrix_.getCol(c, temp); 00201 00202 // look for the minimum distance of this training feat. vec 00203 // to the existing mixtures 00204 for (mrs_natural k=0; k < nMixtures_; k++) 00205 { 00206 //get omean and covar for each mixture in this class 00207 realvec omean; 00208 omeans_[cl].getCol(k, omean); 00209 realvec covar; 00210 covars_[cl].getCol(k, covar); 00211 00212 //compute distance between feat.vector and the mean vector 00213 dist = NumericLib::mahalanobisDistance(temp, omean, covar); 00214 00215 if (dist < min) 00216 { 00217 min = dist; 00218 min_k = k; 00219 } 00220 } 00221 00222 //update closest mixture with the current feature vector 00223 for (mrs_natural f=0; f < featSize_; f++) 00224 { 00225 means_[cl](f, min_k) += temp(f); 00226 } 00227 00228 // increment the counter of the current class and selected mixture 00229 likelihoods_(cl,min_k)++; 00230 } 00231 00232 //compute means for all classes 00233 for (mrs_natural cl=0; cl < classSize_; cl++) 00234 { 00235 for (mrs_natural k=0; k < nMixtures_; k++) 00236 for (mrs_natural f=0; f < featSize_; f++) 00237 { 00238 if (likelihoods_(cl,k) != 0.0) 00239 means_[cl](f,k) /= likelihoods_(cl, k); 00240 } 00241 // cout << "KMEANS CLASS = " << cl << endl; 00242 // cout << "Means = " << means_[cl] << endl; 00243 } 00244 }//end of k-means iterations 00245 00246 classSizes_.create(classSize_); 00247 sum_.create(classSize_); 00248 likelihoods_.create(classSize_, nMixtures_); 00249 accumVec_.create(featSize_); //FIXME: row? 00250 temp_.create(featSize_); //FIXME: ? 00251 sprobs_.create(classSize_,nMixtures_); 00252 00253 probs_.reserve(classSize_); 00254 ssprobs_.reserve(classSize_); 00255 for (mrs_natural cl=0; cl < classSize_; ++cl) 00256 { 00257 probs_.push_back(realvec(trainSize, nMixtures_)); 00258 ssprobs_.push_back(realvec(featSize_, nMixtures_)); 00259 } 00260 } 00261 00262 mrs_real 00263 GMMClassifier::gaussian(mrs_natural cl, mrs_natural k, realvec& vec) 00264 { 00265 mrs_real res; 00266 mrs_real temp; 00267 mrs_real det = 1.0; 00268 00269 for (mrs_natural f=0; f < featSize_; f++) 00270 det *= (vars_[cl])(f,k); 00271 00272 res = 1 / (factor_ * det); 00273 00274 realvec mean; 00275 means_[cl].getCol(k, mean); 00276 realvec covar; 00277 covars_[cl].getCol(k, covar); 00278 temp = NumericLib::mahalanobisDistance(vec, mean, covar); 00279 00280 res *= exp(-temp*0.5); 00281 00282 return res; 00283 } 00284 00285 void 00286 GMMClassifier::doEM() 00287 { 00288 realvec featVec; 00289 mrs_natural cl; 00290 00291 //init to zero 00292 classSizes_.setval(0.0); 00293 sum_.setval(0.0); 00294 sprobs_.setval(0.0); 00295 accumVec_.setval(0.0); 00296 for (cl=0; cl < classSize_; cl++) 00297 ssprobs_[cl].setval(0.0); 00298 00299 mrs_natural trainSize = trainMatrix_.getCols(); 00300 mrs_real prob; 00301 mrs_real sum; 00302 00303 //for all feat.vecs in trainMatrix... 00304 for (mrs_natural c=0; c < trainSize; ++c) 00305 { 00306 //get class label of current feature vector 00307 cl = (mrs_natural)trainMatrix_(labelRow_, c); 00308 classSizes_(cl)++; 00309 sum = 0.0; 00310 00311 //get current feature vector 00312 trainMatrix_.getCol(c, featVec); 00313 00314 //calculate the probability of the feat.Vector 00315 //belonging to each one of the mixtures 00316 for (mrs_natural k=0; k < nMixtures_; k++) 00317 { 00318 //compute the probablity p(x|k) 00319 likelihoods_(cl,k) = gaussian(cl,k, featVec); 00320 //accumulated probability (Sum_k[p(x|k)]) 00321 sum += likelihoods_(cl,k); 00322 } 00323 00324 //for each mixture... 00325 for (mrs_natural k=0; k < nMixtures_; k++) 00326 { 00327 // compute posteriori probablility: 00328 // p(k|x) = p(x|k)/sum[p(x|k)] = p(x|k)/p(x) 00329 if (sum != 0.0) 00330 prob = likelihoods_(cl,k) / sum; 00331 else 00332 { 00333 prob = 0.0000000001; 00334 } 00335 //store posteriori probabilities (p(k|x)) 00336 probs_[cl](c,k) = prob; 00337 00338 //posterior probability for each class (p(cl|x)) 00339 sprobs_(cl,k) += prob; 00340 00341 //compute x*p(k|x) 00342 temp_ = featVec; 00343 temp_ *= prob; 00344 00345 //compute p(cl|x)*x 00346 ssprobs_[cl].getCol(k, accumVec_); 00347 accumVec_ += temp_; 00348 00349 //store it 00350 for(mrs_natural f=0; f < featSize_; ++f) 00351 ssprobs_[cl](f, k) = accumVec_(f); 00352 } 00353 } 00354 00355 for (cl = 0; cl < classSize_; cl++) 00356 for (mrs_natural k=0; k < nMixtures_; k++) 00357 { 00358 weights_[cl](k) = sprobs_(cl,k) / classSizes_(cl); 00359 ssprobs_[cl].getCol(k, temp_); 00360 if (sprobs_(cl,k) != 0.0) 00361 { 00362 //compute p(cl|x)*x/p(cl|x) 00363 temp_ /= sprobs_(cl,k); 00364 //store it 00365 for(mrs_natural f=0; f < featSize_; ++f) 00366 means_[cl](f,k) = temp_(f); 00367 } 00368 } 00369 00370 for (cl=0; cl < classSize_; cl++) 00371 ssprobs_[cl].setval(0.0); 00372 00373 00374 //for each feat.Vec in the trainMatrix... 00375 for (mrs_natural t=0; t < trainSize; t++) 00376 { 00377 //get its class label 00378 cl = (mrs_natural)trainMatrix_(labelRow_, t); 00379 00380 //get the feat.Vector 00381 trainMatrix_.getCol(t, featVec); 00382 00383 //for each mixture, compute: 00384 // p(x|k)(x-uk)(x-uk)T 00385 for (mrs_natural k=0; k < nMixtures_; k++) 00386 { 00387 prob = (probs_[cl])(t,k); 00388 temp_ = featVec; 00389 realvec means; 00390 means_[cl].getCol(k, means); 00391 temp_ -= means; 00392 temp_.sqr(); 00393 temp_ *= prob; 00394 00395 ssprobs_[cl].getCol(k, accumVec_); 00396 accumVec_ += temp_; 00397 00398 //store it 00399 for(mrs_natural f=0; f < featSize_; ++f) 00400 ssprobs_[cl](f, k) = accumVec_(f); 00401 } 00402 } 00403 00404 for (cl = 0; cl < classSize_; cl++) 00405 { 00406 for (mrs_natural k=0; k < nMixtures_; k++) 00407 { 00408 ssprobs_[cl].getCol(k, temp_); 00409 temp_ *= (1.0 / (sprobs_(cl,k)));// -1.0)); //FIXME: confirm this? 00410 00411 //store it 00412 for(mrs_natural f=0; f < featSize_; ++f) 00413 vars_[cl](f, k) = temp_(f); 00414 } 00415 00416 for (mrs_natural k=0; k < nMixtures_; k++) 00417 for (mrs_natural f=0; f < featSize_; f++) 00418 { 00419 if (vars_[cl](f, k) > 0.0) 00420 covars_[cl](f, k) = 1.0 / (vars_[cl](f, k)); 00421 else 00422 { 00423 covars_[cl](f, k) = 10000000.0; 00424 vars_[cl](f, k) = 0.0000001; 00425 } 00426 } 00427 } 00428 } 00429 00430 void 00431 GMMClassifier::myUpdate(MarControlPtr sender) 00432 { 00433 (void) sender; //suppress warning of unused parameter(s) 00434 MRSDIAG("GMMClassifier.cpp - GMMClassifier:myUpdate"); 00435 00436 setctrl("mrs_natural/onSamples", getctrl("mrs_natural/inSamples")); 00437 setctrl("mrs_natural/onObservations", (mrs_natural)2); 00438 setctrl("mrs_real/osrate", getctrl("mrs_real/israte")); 00439 setctrl("mrs_string/onObsNames", "GT_label, Predicted_label,"); 00440 00441 mrs_string mode = getctrl("mrs_string/mode")->to<mrs_string>(); 00442 00443 mrs_natural classSize = getctrl("mrs_natural/nClasses")->to<mrs_natural>(); 00444 mrs_natural nMixtures = getctrl("mrs_natural/nMixtures")->to<mrs_natural>(); 00445 mrs_natural featSize = inObservations_-1; //last observation at input is the label row! 00446 00447 // Initialize internal variables 00448 // (check for changes in nr of classes, nr of features or 00449 // nr of gaussian mixtures) 00450 if((classSize != classSize_) || (nMixtures != nMixtures_) || 00451 (featSize != featSize_)) 00452 { 00453 classSize_ = classSize; 00454 nMixtures_ = nMixtures; 00455 featSize_ = featSize; 00456 labelRow_ = featSize_; 00457 00458 factor_ = pow(sqrt(TWOPI), (mrs_real)featSize_); 00459 00460 //init 00461 means_.clear(); 00462 omeans_.clear(); 00463 vars_.clear(); 00464 covars_.clear(); 00465 weights_.clear(); 00466 means_.reserve(classSize_); 00467 omeans_.reserve(classSize_); 00468 vars_.reserve(classSize_); 00469 covars_.reserve(classSize_); 00470 weights_.reserve(classSize_); 00471 00472 // populate above vectors with realvecs for each class 00473 for (mrs_natural cl=0; cl < classSize_; cl++) 00474 { 00475 realvec cmeans(featSize_, nMixtures_); 00476 realvec ocmeans(featSize_, nMixtures_); 00477 realvec cvars(featSize_, nMixtures_); 00478 realvec ccovars(featSize_, nMixtures_); 00479 realvec cweights(nMixtures_); 00480 00481 // Vectors of realvec for each class 00482 means_.push_back(cmeans); 00483 omeans_.push_back(ocmeans); 00484 vars_.push_back(cvars); 00485 covars_.push_back(ccovars); 00486 weights_.push_back(cweights); 00487 } 00488 } 00489 00490 //change from TRAIN to PREDICT mode 00491 if ((prev_mode_ == "train") && (mode == "predict")) 00492 { 00493 initialize(); 00494 00495 for (mrs_natural i=0; i < iterations_ ; ++i) 00496 { 00497 doEM(); 00498 } 00499 00500 prev_mode_ = mode; 00501 } 00502 } 00503 00504 void 00505 GMMClassifier::myProcess(realvec& in, realvec& out) 00506 { 00507 mrs_string mode = ctrl_mode_->to<mrs_string>(); 00508 00509 // reset 00510 if ((prev_mode_ == "predict") && (mode == "train")) 00511 { 00512 //just drop all accumulated feature vectors and 00513 //copy take the new ones from the input 00514 trainMatrix_ = in; 00515 } 00516 00517 if (mode == "train") 00518 { 00519 MRSASSERT(trainMatrix_.getRows() == inObservations_); 00520 00521 //stretch to acommodate input feat Vecs 00522 mrs_natural storedFeatVecs = trainMatrix_.getCols(); 00523 trainMatrix_.stretch(inObservations_, storedFeatVecs + inSamples_); 00524 00525 //append input data 00526 for(mrs_natural c=0; c < inSamples_; ++c) 00527 for(mrs_natural r = 0; r < inObservations_; ++r) 00528 trainMatrix_(r, c+storedFeatVecs) = in(r,c); 00529 } 00530 00531 if (mode == "predict") 00532 { 00533 mrs_real maxProb = 0.0; 00534 mrs_natural maxClass = 0; 00535 mrs_real prob; 00536 mrs_real dist; 00537 realvec vec; 00538 realvec means; 00539 realvec covars; 00540 00541 MRSASSERT(trainMatrix_.getRows() == inObservations_); 00542 00543 for(mrs_natural t=0; t < inSamples_; ++t) 00544 { 00545 00546 in.getCol(t, vec); 00547 00548 for (mrs_natural cl=0; cl < classSize_; cl++) 00549 { 00550 for (mrs_natural k=0; k < nMixtures_; k++) 00551 { 00552 means_[cl].getCol(k, means); 00553 covars_[cl].getCol(k, covars); 00554 dist = NumericLib::mahalanobisDistance(vec, means, covars); 00555 likelihoods_(cl,k) = weights_[cl](k) / dist; 00556 } 00557 prob = 0.0; 00558 for (mrs_natural k=0; k < nMixtures_; k++) 00559 { 00560 prob += likelihoods_(cl,k); 00561 } 00562 if (prob > maxProb) 00563 { 00564 maxProb = prob; 00565 maxClass = cl; 00566 } 00567 } 00568 out(0,t) = in(labelRow_, t); 00569 out(1,t) = (mrs_real)maxClass; //FIXME: what about he maxProb (i.e. Confidence)? 00570 } 00571 } 00572 00573 prev_mode_ = mode; 00574 }