Marsyas
0.6.0-alpha
|
00001 /* 00002 ** Copyright (C) 1998-2006 George Tzanetakis <gtzan@cs.uvic.ca> 00003 ** 00004 ** This program is free software; you can redistribute it and/or modify 00005 ** it under the terms of the GNU General Public License as published by 00006 ** the Free Software Foundation; either version 2 of the License, or 00007 ** (at your option) any later version. 00008 ** 00009 ** This program is distributed in the hope that it will be useful, 00010 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 ** GNU General Public License for more details. 00013 ** 00014 ** You should have received a copy of the GNU General Public License 00015 ** along with this program; if not, write to the Free Software 00016 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00017 */ 00018 00019 #include "PeakClusterSelect.h" 00020 #include "../common_source.h" 00021 #include <algorithm> 00022 00023 using std::min; 00024 using std::max; 00025 using std::ostringstream; 00026 using namespace Marsyas; 00027 00028 static std::ofstream outTextFile; 00029 //#define MTLB_DBG_LOG 00030 //#define DBG_FILE_OUT 00031 #ifdef DBG_FILE_OUT 00032 static const std::string outFileName("d:/temp/peakcluster.txt"); 00033 #endif 00034 00035 00036 PeakClusterSelect::PeakClusterSelect(mrs_string name):MarSystem("PeakClusterSelect", name) 00037 { 00038 addControls(); 00039 } 00040 00041 PeakClusterSelect::PeakClusterSelect(const PeakClusterSelect& a) : MarSystem(a) 00042 { 00043 ctrl_numClustersToKeep_ = getctrl("mrs_natural/numClustersToKeep"); 00044 } 00045 00046 PeakClusterSelect::~PeakClusterSelect() 00047 { 00048 #ifdef DBG_FILE_OUT 00049 outTextFile.close (); 00050 #endif 00051 } 00052 00053 MarSystem* 00054 PeakClusterSelect::clone() const 00055 { 00056 return new PeakClusterSelect(*this); 00057 } 00058 00059 void 00060 PeakClusterSelect::addControls() 00061 { 00062 addctrl("mrs_natural/numClustersToKeep", 1, ctrl_numClustersToKeep_); 00063 ctrl_numClustersToKeep_->setState(false); 00064 } 00065 00066 void 00067 PeakClusterSelect::myUpdate(MarControlPtr sender) 00068 { 00069 (void) sender; //suppress warning of unused parameter(s) 00070 MRSDIAG("PeakClusterSelect.cpp - PeakClusterSelect:myUpdate"); 00071 00072 ctrl_onObservations_->setValue(1, NOUPDATE); 00073 ctrl_onSamples_->setValue(ctrl_inSamples_, NOUPDATE); 00074 ctrl_osrate_->setValue(ctrl_israte_, NOUPDATE); 00075 ctrl_onObsNames_->setValue("peakLabels", NOUPDATE); 00076 00077 #ifdef DBG_FILE_OUT 00078 if (!outTextFile.good () || ! outTextFile.is_open ()) 00079 outTextFile.open(outFileName.c_str ()); 00080 #endif 00081 } 00082 00083 void 00084 PeakClusterSelect::sort(realvec& rv, mrs_natural dimension, mrs_natural left, mrs_natural right, mrs_bool sortColumns) 00085 { 00086 if( left < right ) 00087 { 00088 int part = partition(rv, dimension, left, right, sortColumns); 00089 sort(rv, dimension, left, part-1, sortColumns); 00090 sort(rv, dimension, part+1, right, sortColumns); 00091 } 00092 } 00093 00094 int 00095 PeakClusterSelect::partition(realvec& rv, mrs_natural dimension, mrs_natural left, mrs_natural right, mrs_bool sortColumns) 00096 { 00097 // Not quite fair, but good enough random partitioning 00098 int pivot_i = rand()%(right-left+1) + left; 00099 // Place pivot val at the end of the series 00100 swap(rv, pivot_i, right, sortColumns); 00101 00102 mrs_real pivot_val; 00103 if( sortColumns ) 00104 pivot_val = rv(dimension,right); 00105 else 00106 pivot_val = rv(right,dimension); 00107 00108 int i = left-1; 00109 00110 if( sortColumns ) 00111 { 00112 for( int j=left ; j<right ; j++ ) 00113 { 00114 if( rv(dimension,j) <= pivot_val ) 00115 { 00116 ++i; 00117 swap(rv, i, j, sortColumns); 00118 } 00119 } 00120 // re-insert pivot val 00121 swap(rv, i+1, right, sortColumns); 00122 } 00123 else 00124 { 00125 for( int j=left ; j<right ; j++ ) 00126 { 00127 if( rv(j,dimension) <= pivot_val ) 00128 { 00129 ++i; 00130 swap(rv, i, j, sortColumns); 00131 } 00132 } 00133 // re-insert pivot val 00134 swap(rv, i+1, right, sortColumns); 00135 } 00136 00137 return i+1; 00138 } 00139 00140 void 00141 PeakClusterSelect::swap(realvec& rv, mrs_natural sample1, mrs_natural sample2, mrs_bool swapColumns) 00142 { 00143 if( swapColumns ) // swap two columns 00144 { 00145 int rows = rv.getRows(); 00146 mrs_real tmp; 00147 00148 for( int i=0 ; i<rows ; ++i ) 00149 { 00150 tmp = rv(i, sample1); 00151 rv(i,sample1) = rv(i,sample2); 00152 rv(i,sample2) = tmp; 00153 } 00154 } 00155 else // swap two rows 00156 { 00157 int cols = rv.getCols(); 00158 mrs_real tmp; 00159 00160 for( int i=0 ; i<cols ; ++i ) 00161 { 00162 tmp = rv(sample1,i); 00163 rv(sample1,i) = rv(sample2,i); 00164 rv(sample2,i) = tmp; 00165 } 00166 } 00167 } 00168 00169 void 00170 PeakClusterSelect::myProcess(realvec& in, realvec& out) 00171 { 00172 mrs_natural t; 00173 mrs_natural numClustersToKeep = ctrl_numClustersToKeep_->to<mrs_natural>(); 00174 mrs_natural curNumClusters=-1, i, j, k, curClusterLabel; 00175 mrs_natural numPeaks = ctrl_inSamples_->to<mrs_natural>(); 00176 00177 // Determine number of clusters in input by finding maximum index plus 1 00178 for (t = 0; t < inSamples_; t++) 00179 if( in(0,t) > curNumClusters ) 00180 curNumClusters = (mrs_natural)in(0,t); 00181 curNumClusters += 1; 00182 00183 mrs_realvec intraClusterSimilarities(3, curNumClusters); 00184 mrs_realvec clusterSimilarity (curNumClusters, curNumClusters); 00185 mrs_realvec clusterNorm (curNumClusters, curNumClusters); 00186 mrs_realvec keepMe(curNumClusters); 00187 mrs_real intraSimKeepThresh = .5; 00188 00189 clusterSimilarity.setval (0.); 00190 clusterNorm.setval (0.); 00191 keepMe.setval (1.); 00192 00193 for( i=0 ; i<curNumClusters ; ++i ) 00194 { 00195 // Store cluster index in first row 00196 // (so upon sorting of density values, 00197 // we have reference to the cluster) 00198 intraClusterSimilarities(0,i) = i; 00199 // Keep track of the number of peaks in each cluster 00200 intraClusterSimilarities(1,i) = 0; 00201 // Finally, keep track of the sum of intra-cluster similarities 00202 intraClusterSimilarities(2,i) = 0; 00203 } 00204 00205 00206 // compute the similarity of each peak to each each other 00207 // accumulate per cluster pair 00208 for( i=0 ; i<numPeaks ; ++i ) 00209 { 00210 mrs_realvec numAcc(curNumClusters); 00211 mrs_realvec similarity(curNumClusters); 00212 00213 numAcc.setval (0.); 00214 similarity.setval (0.); 00215 for( j=0 ; j<numPeaks ; j++ ) 00216 { 00217 if (i==j) 00218 continue; 00219 similarity((mrs_natural)(in(0,j)+.1)) += in (i+1, j); 00220 clusterNorm ((mrs_natural)(in(0,i)+.1),(mrs_natural)(in(0,j)+.1)) += 1; 00221 } 00222 for (k = 0; k < curNumClusters; k++) 00223 clusterSimilarity ((mrs_natural)(in(0,i)+.1), k) += similarity(k); 00224 } 00225 00226 for (i = 0; i < curNumClusters; i++) 00227 for (j = 0; j < curNumClusters; j++) 00228 clusterSimilarity(i,j) /= (clusterNorm(i,j)>0)? clusterNorm(i,j) : 1.; 00229 00230 // compute something similar to silhouette coefficients 00231 mrs_realvec silhouetteCoeffs (curNumClusters); 00232 silhouetteCoeffs.setval(0.); 00233 00234 for (k = 0; k < curNumClusters; k++) 00235 { 00236 mrs_real selfSim, maximum, minSim = 0;//1e37; 00237 selfSim = clusterSimilarity(k,k); 00238 for (i = 0; i < curNumClusters; i++) 00239 { 00240 if (i == k) 00241 continue; 00242 //minSim = (clusterSimilarity(k,i) < minSim)?clusterSimilarity(k,i) : minSim; 00243 minSim += clusterSimilarity(k,i); 00244 } 00245 minSim /= (curNumClusters-1); 00246 00247 if ((maximum = max(selfSim, minSim)) != 0) 00248 silhouetteCoeffs(k) = (selfSim - minSim)/ maximum; 00249 } 00250 00251 // update output values 00252 for (k = 0; k < curNumClusters; k++) 00253 intraClusterSimilarities(2,k) = clusterSimilarity(k,k); 00254 00255 #ifdef MARSYAS_MATLAB 00256 #ifdef MTLB_DBG_LOG 00257 //MATLAB_PUT(clusterSimilarity, "sim"); 00258 //MATLAB_PUT(curNumClusters, "numClust"); 00259 //MATLAB_EVAL ("figure(13),imagesc(0:(numClust-1),0:(numClust-1),sim,[0 1]),colorbar"); 00260 //MATLAB_PUT(silhouetteCoeffs, "sil"); 00261 //MATLAB_EVAL ("figure(14),plot(0:(length(sil)-1), sil),axis([0 length(sil)-1 0 1]), grid on"); 00262 #endif 00263 #endif 00264 00265 00266 00267 // (Quick) Sort by intra-cluster similarity density 00268 sort( intraClusterSimilarities, 2, 0, curNumClusters-1 ); 00269 00270 00271 #ifdef DBG_FILE_OUT 00272 if (outTextFile.good ()) 00273 { 00274 if (curNumClusters < 6) 00275 { 00276 for (k = 0; k < 6-curNumClusters; k++) 00277 outTextFile << 0 <<"\t"; 00278 } 00279 for (k = 0; k < curNumClusters; k++) 00280 outTextFile << silhouetteCoeffs(k) <<"\t"; ; 00281 //outTextFile << intraClusterSimilarities(2,k) <<"\t"; ; 00282 00283 outTextFile << std::endl; 00284 } 00285 #endif 00286 00287 // ignore setting of numclusters2Keep and set it by threshold of the intraclustersimilarity 00288 if (numClustersToKeep == 0) 00289 { 00290 const mrs_real intraThreshBounds[2] = {.3,.6}; 00291 const mrs_real silThreshBound = 1./curNumClusters; 00292 intraSimKeepThresh = max((mrs_real)intraThreshBounds[0],min((mrs_real)intraThreshBounds[1],(mrs_real).5*((mrs_real)intraClusterSimilarities(2,0) + (mrs_real)intraClusterSimilarities(2, curNumClusters-1)))); 00293 for (k = 0; k < curNumClusters; k++) 00294 { 00295 // intra cluster similarity threshold 00296 if (intraClusterSimilarities(2,k) < intraSimKeepThresh) 00297 keepMe(k) = 0; 00298 00299 // silhouette coefficient threshold 00300 if (silhouetteCoeffs((mrs_natural)(intraClusterSimilarities(0,k)+.1)) < silThreshBound) 00301 keepMe(k) = 0; 00302 } 00303 numClustersToKeep = (mrs_natural)(keepMe.sum () +.1); 00304 00305 if (numClustersToKeep == curNumClusters) 00306 keepMe(0) = 0; // always abandon one cluster! 00307 //std::cout << numClustersToKeep << " " << std::endl; 00308 } 00309 else 00310 { 00311 for (k = 0; k < (curNumClusters - numClustersToKeep); k++) 00312 keepMe(k) = 0; 00313 } 00314 00315 for (t = 0; t < inSamples_; t++) 00316 { 00317 curClusterLabel = (mrs_natural)in(0,t); 00318 out(0,t) = curClusterLabel; //signals clusters to be synthesized 00319 00320 for( k=0 ; k < curNumClusters; k++) 00321 { 00322 if( curClusterLabel == intraClusterSimilarities(0,k) ) 00323 { 00324 if (keepMe(k) < .5) 00325 out(0,t) = (curClusterLabel)? -curClusterLabel : -1; //negative value signals clusters to be discarded, zero is mapped to -1 00326 break; 00327 } 00328 } 00329 } 00330 }