SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2008 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include <shogun/ui/SGInterface.h> 00013 #include <shogun/ui/GUIKernel.h> 00014 #include <shogun/ui/GUIPluginEstimate.h> 00015 00016 #include <shogun/lib/config.h> 00017 #include <shogun/io/SGIO.h> 00018 #include <shogun/io/CSVFile.h> 00019 #include <shogun/kernel/Kernel.h> 00020 #include <shogun/kernel/CombinedKernel.h> 00021 #include <shogun/kernel/Chi2Kernel.h> 00022 #include <shogun/kernel/LinearKernel.h> 00023 #include <shogun/kernel/string/LinearStringKernel.h> 00024 #include <shogun/kernel/string/WeightedDegreeStringKernel.h> 00025 #include <shogun/kernel/WeightedDegreeRBFKernel.h> 00026 #include <shogun/kernel/string/SpectrumMismatchRBFKernel.h> 00027 #include <shogun/kernel/string/WeightedDegreePositionStringKernel.h> 00028 #include <shogun/kernel/string/FixedDegreeStringKernel.h> 00029 #include <shogun/kernel/string/LocalityImprovedStringKernel.h> 00030 #include <shogun/kernel/string/SimpleLocalityImprovedStringKernel.h> 00031 #include <shogun/kernel/PolyKernel.h> 00032 #include <shogun/kernel/CustomKernel.h> 00033 #include <shogun/kernel/ConstKernel.h> 00034 #include <shogun/kernel/string/PolyMatchWordStringKernel.h> 00035 #include <shogun/kernel/string/PolyMatchStringKernel.h> 00036 #include <shogun/kernel/string/LocalAlignmentStringKernel.h> 00037 #include <shogun/kernel/string/MatchWordStringKernel.h> 00038 #include <shogun/kernel/string/CommWordStringKernel.h> 00039 #include <shogun/kernel/string/WeightedCommWordStringKernel.h> 00040 #include <shogun/kernel/string/CommUlongStringKernel.h> 00041 #include <shogun/kernel/string/HistogramWordStringKernel.h> 00042 #include <shogun/kernel/string/SalzbergWordStringKernel.h> 00043 #include <shogun/kernel/GaussianKernel.h> 00044 #include <shogun/kernel/GaussianShiftKernel.h> 00045 #include <shogun/kernel/SigmoidKernel.h> 00046 #include <shogun/kernel/DiagKernel.h> 00047 #include <shogun/kernel/string/OligoStringKernel.h> 00048 #include <shogun/kernel/DistanceKernel.h> 00049 #include <shogun/kernel/TensorProductPairKernel.h> 00050 #include <shogun/kernel/normalizer/AvgDiagKernelNormalizer.h> 00051 #include <shogun/kernel/normalizer/RidgeKernelNormalizer.h> 00052 #include <shogun/kernel/normalizer/FirstElementKernelNormalizer.h> 00053 #include <shogun/kernel/normalizer/IdentityKernelNormalizer.h> 00054 #include <shogun/kernel/normalizer/SqrtDiagKernelNormalizer.h> 00055 #include <shogun/kernel/normalizer/VarianceKernelNormalizer.h> 00056 #include <shogun/kernel/normalizer/ScatterKernelNormalizer.h> 00057 #include <shogun/classifier/svm/SVM.h> 00058 #include <shogun/kernel/normalizer/ZeroMeanCenterKernelNormalizer.h> 00059 #include <shogun/kernel/WaveletKernel.h> 00060 00061 #include <string.h> 00062 00063 using namespace shogun; 00064 00065 CGUIKernel::CGUIKernel(CSGInterface* ui_) 00066 : CSGObject(), ui(ui_) 00067 { 00068 kernel=NULL; 00069 } 00070 00071 CGUIKernel::~CGUIKernel() 00072 { 00073 SG_UNREF(kernel); 00074 } 00075 00076 CKernel* CGUIKernel::get_kernel() 00077 { 00078 return kernel; 00079 } 00080 00081 CKernel* CGUIKernel::create_oligo(int32_t size, int32_t k, float64_t width) 00082 { 00083 CKernel* kern=new COligoStringKernel(size, k, width); 00084 SG_DEBUG("created OligoStringKernel (%p) with size %d, k %d, width %f.\n", kern, size, k, width) 00085 00086 return kern; 00087 } 00088 00089 CKernel* CGUIKernel::create_diag(int32_t size, float64_t diag) 00090 { 00091 CKernel* kern=new CDiagKernel(size, diag); 00092 if (!kern) 00093 SG_ERROR("Couldn't create DiagKernel with size %d, diag %f.\n", size, diag) 00094 else 00095 SG_DEBUG("created DiagKernel (%p) with size %d, diag %f.\n", kern, size, diag) 00096 00097 return kern; 00098 } 00099 00100 CKernel* CGUIKernel::create_const(int32_t size, float64_t c) 00101 { 00102 CKernel* kern=new CConstKernel(c); 00103 if (!kern) 00104 SG_ERROR("Couldn't create ConstKernel with c %f.\n", c) 00105 else 00106 SG_DEBUG("created ConstKernel (%p) with c %f.\n", kern, c) 00107 00108 kern->set_cache_size(size); 00109 00110 return kern; 00111 } 00112 00113 CKernel* CGUIKernel::create_custom(float64_t* kmatrix, int32_t num_feat, int32_t num_vec, bool source_is_diag, bool dest_is_diag) 00114 { 00115 CCustomKernel* kern=new CCustomKernel(); 00116 SG_DEBUG("created CustomKernel (%p).\n", kern) 00117 00118 SGMatrix<float64_t> km=SGMatrix<float64_t>(kmatrix, num_feat, num_vec); 00119 00120 if (source_is_diag && dest_is_diag && num_feat==1) 00121 { 00122 kern->set_triangle_kernel_matrix_from_triangle( 00123 SGVector<float64_t>(kmatrix, num_vec)); 00124 } 00125 else if (!source_is_diag && dest_is_diag && num_vec==num_feat) 00126 kern->set_triangle_kernel_matrix_from_full(km); 00127 else 00128 kern->set_full_kernel_matrix_from_full(km); 00129 00130 return kern; 00131 } 00132 00133 00134 CKernel* CGUIKernel::create_gaussianshift( 00135 int32_t size, float64_t width, int32_t max_shift, int32_t shift_step) 00136 { 00137 CKernel* kern=new CGaussianShiftKernel(size, width, max_shift, shift_step); 00138 if (!kern) 00139 SG_ERROR("Couldn't create GaussianShiftKernel with size %d, width %f, max_shift %d, shift_step %d.\n", size, width, max_shift, shift_step) 00140 else 00141 SG_DEBUG("created GaussianShiftKernel (%p) with size %d, width %f, max_shift %d, shift_step %d.\n", kern, size, width, max_shift, shift_step) 00142 00143 return kern; 00144 } 00145 00146 CKernel* CGUIKernel::create_sparsegaussian(int32_t size, float64_t width) 00147 { 00148 CKernel* kern=new CGaussianKernel(size, width); 00149 if (!kern) 00150 SG_ERROR("Couldn't create GaussianKernel with size %d, width %f.\n", size, width) 00151 else 00152 SG_DEBUG("created GaussianKernel (%p) with size %d, width %f.\n", kern, size, width) 00153 00154 return kern; 00155 } 00156 00157 CKernel* CGUIKernel::create_gaussian(int32_t size, float64_t width) 00158 { 00159 CKernel* kern=new CGaussianKernel(size, width); 00160 if (!kern) 00161 SG_ERROR("Couldn't create GaussianKernel with size %d, width %f.\n", size, width) 00162 else 00163 SG_DEBUG("created GaussianKernel (%p) with size %d, width %f.\n", kern, size, width) 00164 00165 return kern; 00166 } 00167 00168 CKernel* CGUIKernel::create_sigmoid( 00169 int32_t size, float64_t gamma, float64_t coef0) 00170 { 00171 CKernel* kern=new CSigmoidKernel(size, gamma, coef0); 00172 if (!kern) 00173 SG_ERROR("Couldn't create SigmoidKernel with size %d, gamma %f, coef0 %f.\n", size, gamma, coef0) 00174 else 00175 SG_DEBUG("created SigmoidKernel (%p) with size %d, gamma %f, coef0 %f.\n", kern, size, gamma, coef0) 00176 00177 return kern; 00178 } 00179 CKernel* CGUIKernel::create_wavelet( 00180 int32_t size, float64_t Wdilation, float64_t Wtranslation) 00181 { 00182 CKernel* kern=new CWaveletKernel(size, Wdilation, Wtranslation); 00183 if (!kern) 00184 SG_ERROR("Couldn't create WaveletKernel with size %d, Wdilation %f, Wtranslation %f.\n", size, Wdilation, Wtranslation) 00185 else 00186 SG_DEBUG("created WaveletKernel (%p) with size %d, Wdilation %f, Wtranslation %f.\n", kern, size, Wdilation, Wtranslation) 00187 00188 return kern; 00189 } 00190 CKernel* CGUIKernel::create_sparsepoly( 00191 int32_t size, int32_t degree, bool inhomogene, bool normalize) 00192 { 00193 CKernel* kern=new CPolyKernel(size, degree, inhomogene); 00194 if (!normalize) 00195 kern->set_normalizer(new CIdentityKernelNormalizer()); 00196 SG_DEBUG("created PolyKernel with size %d, degree %d, inhomogene %d normalize %d.\n", kern, size, degree, inhomogene, normalize) 00197 00198 return kern; 00199 } 00200 00201 CKernel* CGUIKernel::create_poly( 00202 int32_t size, int32_t degree, bool inhomogene, bool normalize) 00203 { 00204 CKernel* kern=new CPolyKernel(size, degree, inhomogene); 00205 if (!normalize) 00206 kern->set_normalizer(new CIdentityKernelNormalizer()); 00207 SG_DEBUG("created PolyKernel (%p) with size %d, degree %d, inhomogene %d, normalize %d.\n", kern, size, degree, inhomogene, normalize) 00208 00209 return kern; 00210 } 00211 00212 CKernel* CGUIKernel::create_localityimprovedstring( 00213 int32_t size, int32_t length, int32_t inner_degree, int32_t outer_degree, 00214 EKernelType ktype) 00215 { 00216 CKernel* kern=NULL; 00217 00218 if (ktype==K_SIMPLELOCALITYIMPROVED) 00219 { 00220 kern=new CSimpleLocalityImprovedStringKernel( 00221 size, length, inner_degree, outer_degree); 00222 } 00223 else if (ktype==K_LOCALITYIMPROVED) 00224 { 00225 kern=new CLocalityImprovedStringKernel( 00226 size, length, inner_degree, outer_degree); 00227 } 00228 00229 if (!kern) 00230 SG_ERROR("Couldn't create (Simple)LocalityImprovedStringKernel with size %d, length %d, inner_degree %d, outer_degree %d.\n", size, length, inner_degree, outer_degree) 00231 else 00232 SG_DEBUG("created (Simple)LocalityImprovedStringKernel with size %d, length %d, inner_degree %d, outer_degree %d.\n", kern, size, length, inner_degree, outer_degree) 00233 00234 return kern; 00235 } 00236 00237 CKernel* CGUIKernel::create_weighteddegreestring( 00238 int32_t size, int32_t order, int32_t max_mismatch, bool use_normalization, 00239 int32_t mkl_stepsize, bool block_computation, int32_t single_degree) 00240 { 00241 float64_t* weights=get_weights(order, max_mismatch); 00242 00243 int32_t i=0; 00244 if (single_degree>=0) 00245 { 00246 ASSERT(single_degree<order) 00247 for (i=0; i<order; i++) 00248 { 00249 if (i!=single_degree) 00250 weights[i]=0; 00251 else 00252 weights[i]=1; 00253 } 00254 } 00255 00256 CKernel* kern=new CWeightedDegreeStringKernel(SGVector<float64_t>(weights, order)); 00257 00258 SG_DEBUG("created WeightedDegreeStringKernel (%p) with size %d, order %d, " 00259 "max_mismatch %d, use_normalization %d, mkl_stepsize %d, " 00260 "block_computation %d, single_degree %d.\n", 00261 kern, size, order, max_mismatch, (int) use_normalization, mkl_stepsize, 00262 block_computation, single_degree); 00263 00264 if (!use_normalization) 00265 kern->set_normalizer(new CIdentityKernelNormalizer()); 00266 00267 ((CWeightedDegreeStringKernel*) kern)-> 00268 set_use_block_computation(block_computation); 00269 ((CWeightedDegreeStringKernel*) kern)->set_max_mismatch(max_mismatch); 00270 ((CWeightedDegreeStringKernel*) kern)->set_mkl_stepsize(mkl_stepsize); 00271 ((CWeightedDegreeStringKernel*) kern)->set_which_degree(single_degree); 00272 00273 return kern; 00274 } 00275 00276 CKernel* CGUIKernel::create_weighteddegreepositionstring( 00277 int32_t size, int32_t order, int32_t max_mismatch, int32_t length, 00278 int32_t center, float64_t step) 00279 { 00280 int32_t i=0; 00281 int32_t* shifts=SG_MALLOC(int32_t, length); 00282 00283 for (i=center; i<length; i++) 00284 shifts[i]=(int32_t) floor(((float64_t) (i-center))/step); 00285 00286 for (i=center-1; i>=0; i--) 00287 shifts[i]=(int32_t) floor(((float64_t) (center-i))/step); 00288 00289 for (i=0; i<length; i++) 00290 { 00291 if (shifts[i]>length) 00292 shifts[i]=length; 00293 } 00294 00295 for (i=0; i<length; i++) 00296 SG_INFO("shift[%i]=%i\n", i, shifts[i]) 00297 00298 float64_t* weights=get_weights(order, max_mismatch); 00299 00300 CKernel* kern=new CWeightedDegreePositionStringKernel(size, SGVector<float64_t>(weights, order*(1+max_mismatch)), order, max_mismatch, SGVector<int32_t>(shifts, length).clone()); 00301 if (!kern) 00302 SG_ERROR("Couldn't create WeightedDegreePositionStringKernel with size %d, order %d, max_mismatch %d, length %d, center %d, step %f.\n", size, order, max_mismatch, length, center, step) 00303 else 00304 SG_DEBUG("created WeightedDegreePositionStringKernel with size %d, order %d, max_mismatch %d, length %d, center %d, step %f.\n", kern, size, order, max_mismatch, length, center, step) 00305 00306 return kern; 00307 } 00308 00309 CKernel* CGUIKernel::create_weighteddegreepositionstring3( 00310 int32_t size, int32_t order, int32_t max_mismatch, int32_t* shifts, 00311 int32_t length, int32_t mkl_stepsize, float64_t* position_weights) 00312 { 00313 float64_t* weights=get_weights(order, max_mismatch); 00314 00315 CKernel* kern=new CWeightedDegreePositionStringKernel(size, SGVector<float64_t>(weights, order*(1+max_mismatch)), order, max_mismatch, SGVector<int32_t>(shifts, length, false).clone(), mkl_stepsize); 00316 kern->set_normalizer(new CIdentityKernelNormalizer()); 00317 00318 SG_DEBUG("created WeightedDegreePositionStringKernel (%p) with size %d, order %d, max_mismatch %d, length %d and position_weights (MKL stepsize: %d).\n", kern, size, order, max_mismatch, length, mkl_stepsize) 00319 00320 if (!position_weights) 00321 { 00322 position_weights=SG_MALLOC(float64_t, length); 00323 for (int32_t i=0; i<length; i++) 00324 position_weights[i]=1.0/length; 00325 } 00326 ((CWeightedDegreePositionStringKernel*) kern)-> 00327 set_position_weights(SGVector<float64_t>(position_weights, length)); 00328 00329 return kern; 00330 } 00331 00332 CKernel* CGUIKernel::create_weighteddegreepositionstring2( 00333 int32_t size, int32_t order, int32_t max_mismatch, int32_t* shifts, 00334 int32_t length, bool use_normalization) 00335 { 00336 float64_t* weights=get_weights(order, max_mismatch); 00337 00338 CKernel* kern=new CWeightedDegreePositionStringKernel(size, SGVector<float64_t>(weights, order*(1+max_mismatch)), order, max_mismatch, SGVector<int32_t>(shifts, length, false).clone()); 00339 if (!use_normalization) 00340 kern->set_normalizer(new CIdentityKernelNormalizer()); 00341 00342 00343 SG_DEBUG("created WeightedDegreePositionStringKernel (%p) with size %d, order %d, max_mismatch %d, length %d, use_normalization %d.\n", kern, size, order, max_mismatch, length, use_normalization) 00344 00345 return kern; 00346 } 00347 00348 float64_t* CGUIKernel::get_weights(int32_t order, int32_t max_mismatch) 00349 { 00350 float64_t *weights=SG_MALLOC(float64_t, order*(1+max_mismatch)); 00351 float64_t sum=0; 00352 int32_t i=0; 00353 00354 for (i=0; i<order; i++) 00355 { 00356 weights[i]=order-i; 00357 sum+=weights[i]; 00358 } 00359 for (i=0; i<order; i++) 00360 weights[i]/=sum; 00361 00362 for (i=0; i<order; i++) 00363 { 00364 for (int32_t j=1; j<=max_mismatch; j++) 00365 { 00366 if (j<i+1) 00367 { 00368 int32_t nk=CMath::nchoosek(i+1, j); 00369 weights[i+j*order]=weights[i]/(nk*CMath::pow(3, j)); 00370 } 00371 else 00372 weights[i+j*order]=0; 00373 } 00374 } 00375 00376 return weights; 00377 } 00378 00379 CKernel* CGUIKernel::create_weighteddegreerbf(int32_t size, int32_t degree, int32_t nof_properties, float64_t width) 00380 { 00381 CKernel* kern=new CWeightedDegreeRBFKernel(size, width, degree, nof_properties); 00382 if (!kern) 00383 SG_ERROR("Couldn't create WeightedDegreeRBFKernel with size %d, width %f, degree %d, nof_properties %d.\n", size, width, degree, nof_properties) 00384 else 00385 SG_DEBUG("created WeightedDegreeRBFKernel (%p) with size %d, width %f, degree %d, nof_properties %d.\n", kern, size, width, degree, nof_properties) 00386 00387 return kern; 00388 } 00389 00390 CKernel* CGUIKernel::create_spectrummismatchrbf(int32_t size, float64_t* AA_matrix, int32_t nr, int32_t nc, int32_t max_mismatch, int32_t degree, float64_t width) 00391 { 00392 00393 CKernel* kern = new CSpectrumMismatchRBFKernel(size, AA_matrix, nr, nc, degree, max_mismatch, width); 00394 if (!kern) 00395 SG_ERROR("Couldn't create SpectrumMismatchRBFKernel with size %d, width %f, degree %d, max_mismatch %d.\n", size, width, degree, max_mismatch) 00396 else 00397 SG_DEBUG("created SpectrumMismatchRBFKernel (%p) with size %d, width %f, degree %d, max_mismatch %d.\n", kern, size, width, degree, max_mismatch) 00398 00399 return kern; 00400 00401 } 00402 00403 00404 CKernel* CGUIKernel::create_localalignmentstring(int32_t size) 00405 { 00406 CKernel* kern=new CLocalAlignmentStringKernel(size); 00407 if (!kern) 00408 SG_ERROR("Couldn't create LocalAlignmentStringKernel with size %d.\n", size) 00409 else 00410 SG_DEBUG("created LocalAlignmentStringKernel (%p) with size %d.\n", kern, size) 00411 00412 return kern; 00413 } 00414 00415 CKernel* CGUIKernel::create_fixeddegreestring(int32_t size, int32_t d) 00416 { 00417 CKernel* kern=new CFixedDegreeStringKernel(size, d); 00418 if (!kern) 00419 SG_ERROR("Couldn't create FixedDegreeStringKernel with size %d and d %d.\n", size, d) 00420 else 00421 SG_DEBUG("created FixedDegreeStringKernel (%p) with size %d and d %d.\n", kern, size, d) 00422 00423 return kern; 00424 } 00425 00426 CKernel* CGUIKernel::create_chi2(int32_t size, float64_t width) 00427 { 00428 CKernel* kern=new CChi2Kernel(size, width); 00429 if (!kern) 00430 SG_ERROR("Couldn't create Chi2Kernel with size %d and width %f.\n", size, width) 00431 else 00432 SG_DEBUG("created Chi2Kernel (%p) with size %d and width %f.\n", kern, size, width) 00433 00434 return kern; 00435 } 00436 00437 CKernel* CGUIKernel::create_commstring( 00438 int32_t size, bool use_sign, char* norm_str, EKernelType ktype) 00439 { 00440 CKernel* kern=NULL; 00441 00442 if (!norm_str) 00443 norm_str= (char*) "FULL"; 00444 00445 if (ktype==K_COMMULONGSTRING) 00446 kern=new CCommUlongStringKernel(size, use_sign); 00447 else if (ktype==K_COMMWORDSTRING) 00448 kern=new CCommWordStringKernel(size, use_sign); 00449 else if (ktype==K_WEIGHTEDCOMMWORDSTRING) 00450 kern=new CWeightedCommWordStringKernel(size, use_sign); 00451 00452 SG_DEBUG("created WeightedCommWord/CommWord/CommUlongStringKernel (%p) with size %d, use_sign %d norm_str %s.\n", kern, size, use_sign, norm_str) 00453 00454 00455 if (strncmp(norm_str, "NO", 2)==0) 00456 { 00457 kern->set_normalizer(new CIdentityKernelNormalizer()); 00458 } 00459 else if (strncmp(norm_str, "FULL", 4)==0) 00460 { 00461 //nop, as this one is default 00462 } 00463 else 00464 SG_ERROR("Unsupported Normalizer requested, supports only FULL and NO\n") 00465 00466 return kern; 00467 } 00468 00469 CKernel* CGUIKernel::create_matchwordstring( 00470 int32_t size, int32_t d, bool normalize) 00471 { 00472 CKernel* kern=new CMatchWordStringKernel(size, d); 00473 SG_DEBUG("created MatchWordStringKernel (%p) with size %d and d %d.\n", kern, size, d) 00474 if (!normalize) 00475 kern->set_normalizer(new CIdentityKernelNormalizer()); 00476 00477 return kern; 00478 } 00479 00480 CKernel* CGUIKernel::create_polymatchstring( 00481 int32_t size, int32_t degree, bool inhomogene, bool normalize) 00482 { 00483 CKernel* kern=new CPolyMatchStringKernel(size, degree, inhomogene); 00484 SG_DEBUG("created PolyMatchStringKernel (%p) with size %d, degree %d, inhomogene %d normalize %d.\n", kern, size, degree, inhomogene, normalize) 00485 if (!normalize) 00486 kern->set_normalizer(new CIdentityKernelNormalizer()); 00487 00488 return kern; 00489 } 00490 00491 CKernel* CGUIKernel::create_polymatchwordstring( 00492 int32_t size, int32_t degree, bool inhomogene, bool normalize) 00493 { 00494 CKernel* kern=new CPolyMatchWordStringKernel(size, degree, inhomogene); 00495 SG_DEBUG("created PolyMatchWordStringKernel (%p) with size %d, degree %d, inhomogene %d, normalize %d.\n", kern, size, degree, inhomogene, normalize) 00496 if (!normalize) 00497 kern->set_normalizer(new CIdentityKernelNormalizer()); 00498 00499 return kern; 00500 } 00501 00502 CKernel* CGUIKernel::create_salzbergword(int32_t size) 00503 { 00504 SG_INFO("Getting estimator.\n") 00505 CPluginEstimate* estimator=ui->ui_pluginestimate->get_estimator(); 00506 if (!estimator) 00507 SG_ERROR("No estimator set.\n") 00508 00509 CKernel* kern=new CSalzbergWordStringKernel(size, estimator); 00510 if (!kern) 00511 SG_ERROR("Couldn't create SalzbergWordString with size %d.\n", size) 00512 else 00513 SG_DEBUG("created SalzbergWordString (%p) with size %d.\n", kern, size) 00514 00515 /* 00516 // prior stuff 00517 SG_INFO("Getting labels.\n") 00518 CLabels* train_labels=ui->ui_labels->get_train_labels(); 00519 if (!train_labels) 00520 { 00521 SG_INFO("Assign train labels first!\n") 00522 return NULL; 00523 } 00524 ((CSalzbergWordStringKernel *) kern)->set_prior_probs_from_labels(train_labels); 00525 */ 00526 00527 return kern; 00528 } 00529 00530 CKernel* CGUIKernel::create_histogramword(int32_t size) 00531 { 00532 SG_INFO("Getting estimator.\n") 00533 CPluginEstimate* estimator=ui->ui_pluginestimate->get_estimator(); 00534 if (!estimator) 00535 SG_ERROR("No estimator set.\n") 00536 00537 CKernel* kern=new CHistogramWordStringKernel(size, estimator); 00538 if (!kern) 00539 SG_ERROR("Couldn't create HistogramWordString with size %d.\n", size) 00540 else 00541 SG_DEBUG("created HistogramWordString (%p) with size %d.\n", kern, size) 00542 00543 return kern; 00544 } 00545 00546 CKernel* CGUIKernel::create_linearbyte(int32_t size, float64_t scale) 00547 { 00548 size=0; 00549 CKernel* kern=new CLinearKernel(); 00550 kern->set_normalizer(new CAvgDiagKernelNormalizer(scale)); 00551 SG_DEBUG("created LinearByteKernel (%p) with size %d and scale %f.\n", kern, size, scale) 00552 00553 return kern; 00554 } 00555 00556 CKernel* CGUIKernel::create_linearword(int32_t size, float64_t scale) 00557 { 00558 size=0; 00559 CKernel* kern=new CLinearKernel(); 00560 kern->set_normalizer(new CAvgDiagKernelNormalizer(scale)); 00561 SG_DEBUG("created LinearWordKernel (%p) with size %d and scale %f.\n", kern, size, scale) 00562 00563 return kern; 00564 } 00565 00566 CKernel* CGUIKernel::create_linearstring(int32_t size, float64_t scale) 00567 { 00568 size=0; 00569 CKernel* kern=NULL; 00570 kern=new CLinearStringKernel(); 00571 kern->set_normalizer(new CAvgDiagKernelNormalizer(scale)); 00572 00573 SG_DEBUG("created LinearStringKernel (%p) with size %d and scale %f.\n", kern, size, scale) 00574 00575 return kern; 00576 } 00577 00578 CKernel* CGUIKernel::create_linear(int32_t size, float64_t scale) 00579 { 00580 size=0; 00581 CKernel* kern=new CLinearKernel(); 00582 kern->set_normalizer(new CAvgDiagKernelNormalizer(scale)); 00583 00584 SG_DEBUG("created LinearKernel (%p) with size %d and scale %f.\n", kern, size, scale) 00585 00586 return kern; 00587 } 00588 00589 CKernel* CGUIKernel::create_sparselinear(int32_t size, float64_t scale) 00590 { 00591 size=0; 00592 CKernel* kern=new CLinearKernel(); 00593 kern->set_normalizer(new CAvgDiagKernelNormalizer(scale)); 00594 00595 SG_DEBUG("created LinearKernel (%p) with size %d and scale %f.\n", kern, size, scale) 00596 00597 return kern; 00598 } 00599 00600 CKernel* CGUIKernel::create_tppk(int32_t size, float64_t* km, int32_t rows, int32_t cols) 00601 { 00602 CCustomKernel* k=new CCustomKernel(); 00603 k->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(km, rows, cols)); 00604 00605 CKernel* kern=new CTensorProductPairKernel(size, k); 00606 00607 SG_DEBUG("created TPPK (%p) with size %d and km %p, rows %d, cols %d.\n", kern, size, km, rows, cols) 00608 00609 return kern; 00610 } 00611 00612 CKernel* CGUIKernel::create_distance(int32_t size, float64_t width) 00613 { 00614 CDistance* dist=ui->ui_distance->get_distance(); 00615 if (!dist) 00616 SG_ERROR("No distance set for DistanceKernel.\n") 00617 00618 CKernel* kern=new CDistanceKernel(size, width, dist); 00619 if (!kern) 00620 SG_ERROR("Couldn't create DistanceKernel with size %d and width %f.\n", size, width) 00621 else 00622 SG_DEBUG("created DistanceKernel (%p) with size %d and width %f.\n", kern, size, width) 00623 00624 return kern; 00625 } 00626 00627 CKernel* CGUIKernel::create_combined( 00628 int32_t size, bool append_subkernel_weights) 00629 { 00630 CKernel* kern=new CCombinedKernel(size, append_subkernel_weights); 00631 if (!kern) 00632 SG_ERROR("Couldn't create CombinedKernel with size %d and append_subkernel_weights %d.\n", size, append_subkernel_weights) 00633 else 00634 SG_DEBUG("created CombinedKernel (%p) with size %d and append_subkernel_weights %d.\n", kern, size, append_subkernel_weights) 00635 00636 return kern; 00637 } 00638 00639 bool CGUIKernel::set_normalization(char* normalization, float64_t c, float64_t r) 00640 { 00641 CKernel* k=kernel; 00642 00643 if (k && k->get_kernel_type()==K_COMBINED) 00644 k=((CCombinedKernel*) kernel)->get_last_kernel(); 00645 00646 if (!k) 00647 SG_ERROR("No kernel available.\n") 00648 00649 if (strncmp(normalization, "IDENTITY", 8)==0) 00650 { 00651 SG_INFO("Identity Normalization (==NO NORMALIZATION) selected\n") 00652 return k->set_normalizer(new CIdentityKernelNormalizer()); 00653 } 00654 else if (strncmp(normalization,"AVGDIAG", 7)==0) 00655 { 00656 SG_INFO("Average Kernel Diagonal Normalization selected\n") 00657 return k->set_normalizer(new CAvgDiagKernelNormalizer(c)); 00658 } 00659 else if (strncmp(normalization,"RIDGE", 5)==0) 00660 { 00661 SG_INFO("Ridge Kernel Normalization selected\n") 00662 return k->set_normalizer(new CRidgeKernelNormalizer(r, c)); 00663 } 00664 else if (strncmp(normalization,"SQRTDIAG", 8)==0) 00665 { 00666 SG_INFO("Sqrt Diagonal Normalization selected\n") 00667 return k->set_normalizer(new CSqrtDiagKernelNormalizer()); 00668 } 00669 else if (strncmp(normalization,"FIRSTELEMENT", 12)==0) 00670 { 00671 SG_INFO("First Element Normalization selected\n") 00672 return k->set_normalizer(new CFirstElementKernelNormalizer()); 00673 } 00674 else if (strncmp(normalization,"VARIANCE", 8)==0) 00675 { 00676 SG_INFO("Variance Normalization selected\n") 00677 return k->set_normalizer(new CVarianceKernelNormalizer()); 00678 } 00679 else if (strncmp(normalization,"SCATTER", 7)==0) 00680 { 00681 SG_INFO("Scatter Normalization selected\n") 00682 CLabels* train_labels=ui->ui_labels->get_train_labels(); 00683 ASSERT(train_labels) 00684 return k->set_normalizer(new CScatterKernelNormalizer(c,r, train_labels)); 00685 } 00686 else if (strncmp(normalization,"ZEROMEANCENTER", 13)==0) 00687 { 00688 SG_INFO("Zero Mean Center Normalization selected\n") 00689 return k->set_normalizer(new CZeroMeanCenterKernelNormalizer()); 00690 } 00691 else 00692 SG_ERROR("Wrong kernel normalizer name.\n") 00693 00694 SG_UNREF(k); 00695 00696 return false; 00697 } 00698 00699 bool CGUIKernel::set_kernel(CKernel* kern) 00700 { 00701 if (kern) 00702 { 00703 SG_DEBUG("deleting old kernel (%p).\n", kernel) 00704 SG_REF(kern); 00705 SG_UNREF(kernel); 00706 kernel=kern; 00707 SG_DEBUG("set new kernel (%p).\n", kern) 00708 00709 return true; 00710 } 00711 else 00712 return false; 00713 } 00714 00715 bool CGUIKernel::init_kernel_optimization() 00716 { 00717 CSVM* svm=(CSVM*) ui->ui_classifier->get_classifier(); 00718 if (svm) 00719 { 00720 if (kernel->has_property(KP_LINADD)) 00721 { 00722 int32_t num_sv=svm->get_num_support_vectors(); 00723 int32_t* sv_idx=SG_MALLOC(int32_t, num_sv); 00724 float64_t* sv_weight=SG_MALLOC(float64_t, num_sv); 00725 00726 for (int32_t i=0; i<num_sv; i++) 00727 { 00728 sv_idx[i]=svm->get_support_vector(i); 00729 sv_weight[i]=svm->get_alpha(i); 00730 } 00731 00732 bool ret=kernel->init_optimization(num_sv, sv_idx, sv_weight); 00733 00734 SG_FREE(sv_idx); 00735 SG_FREE(sv_weight); 00736 00737 if (!ret) 00738 SG_ERROR("Initialization of kernel optimization failed\n") 00739 return ret; 00740 } 00741 } 00742 else 00743 SG_ERROR("Create SVM first!\n") 00744 00745 return true; 00746 } 00747 00748 bool CGUIKernel::delete_kernel_optimization() 00749 { 00750 if (kernel && kernel->has_property(KP_LINADD) && kernel->get_is_initialized()) 00751 kernel->delete_optimization(); 00752 00753 return true; 00754 } 00755 00756 00757 bool CGUIKernel::init_kernel(const char* target) 00758 { 00759 if (!kernel) 00760 SG_ERROR("No kernel available.\n") 00761 00762 // no need to init custom kernel 00763 if (kernel->get_kernel_type() == K_CUSTOM || !target) 00764 { 00765 initialized=true; 00766 return true; 00767 } 00768 00769 EFeatureClass k_fclass=kernel->get_feature_class(); 00770 EFeatureType k_ftype=kernel->get_feature_type(); 00771 00772 if (!strncmp(target, "TRAIN", 5)) 00773 { 00774 CFeatures* train=ui->ui_features->get_train_features(); 00775 00776 if (train) 00777 { 00778 EFeatureClass fclass=train->get_feature_class(); 00779 EFeatureType ftype=train->get_feature_type(); 00780 if ((k_fclass==fclass || k_fclass==C_ANY || fclass==C_ANY) && 00781 (k_ftype==ftype || k_ftype==F_ANY || ftype==F_ANY)) 00782 00783 { 00784 SG_INFO("Initialising kernel with TRAIN DATA, train: %p\n", train) 00785 kernel->init(train, train); 00786 initialized=true; 00787 } 00788 else 00789 SG_ERROR("Kernel can not process this train feature type: %d %d.\n", fclass, ftype) 00790 } 00791 else 00792 SG_DEBUG("Not initing kernel - no train features assigned.\n") 00793 } 00794 else if (!strncmp(target, "TEST", 4)) 00795 { 00796 CFeatures* train=ui->ui_features->get_train_features(); 00797 CFeatures* test=ui->ui_features->get_test_features(); 00798 if (train && test) 00799 { 00800 EFeatureClass fclass=test->get_feature_class(); 00801 EFeatureType ftype=test->get_feature_type(); 00802 if ((k_fclass==fclass || k_fclass==C_ANY || fclass==C_ANY) && 00803 (k_ftype==ftype || k_ftype==F_ANY || ftype==F_ANY)) 00804 00805 { 00806 if (!initialized) 00807 { 00808 EFeatureClass tr_fclass=train->get_feature_class(); 00809 EFeatureType tr_ftype=train->get_feature_type(); 00810 if ((k_fclass==tr_fclass || k_fclass==C_ANY || tr_fclass==C_ANY) && 00811 (k_ftype==tr_ftype || k_ftype==F_ANY || tr_ftype==F_ANY)) 00812 { 00813 SG_INFO("Initialising kernel with TRAIN DATA, train: %p\n", train) 00814 kernel->init(train, train); 00815 initialized=true; 00816 } 00817 else 00818 SG_ERROR("Kernel can not process this train feature type: %d %d.\n", fclass, ftype) 00819 } 00820 00821 SG_INFO("Initialising kernel with TEST DATA, train: %p test %p\n", train, test) 00822 // lhs -> always train_features; rhs -> always test_features 00823 kernel->init(train, test); 00824 } 00825 else 00826 SG_ERROR("Kernel can not process this test feature type: %d %d.\n", fclass, ftype) 00827 } 00828 else 00829 SG_DEBUG("Not initing kernel - no train and test features assigned.\n") 00830 } 00831 else 00832 SG_ERROR("Unknown target %s.\n", target) 00833 00834 return true; 00835 } 00836 00837 bool CGUIKernel::save_kernel(char* filename) 00838 { 00839 if (kernel && initialized) 00840 { 00841 CCSVFile* file=new CCSVFile(filename); 00842 try 00843 { 00844 kernel->save(file); 00845 } 00846 catch (...) 00847 { 00848 SG_ERROR("Writing to file %s failed!\n", filename) 00849 } 00850 00851 SG_UNREF(file); 00852 SG_INFO("Successfully written kernel to \"%s\" !\n", filename) 00853 return true; 00854 } 00855 else 00856 SG_ERROR("No kernel set / kernel not initialized!\n") 00857 00858 return false; 00859 } 00860 00861 bool CGUIKernel::add_kernel(CKernel* kern, float64_t weight) 00862 { 00863 if (!kern) 00864 SG_ERROR("Given kernel to add is invalid.\n") 00865 00866 if (!kernel) 00867 { 00868 kernel= new CCombinedKernel(20, false); 00869 SG_REF(kernel); 00870 } 00871 00872 if (kernel->get_kernel_type()!=K_COMBINED) 00873 { 00874 CKernel* first_elem=kernel; 00875 kernel= new CCombinedKernel(20, false); 00876 SG_REF(kernel); 00877 ((CCombinedKernel*) kernel)->append_kernel(first_elem); 00878 } 00879 00880 if (!kernel) 00881 SG_ERROR("Combined kernel object could not be created.\n") 00882 00883 kern->set_combined_kernel_weight(weight); 00884 00885 bool success=((CCombinedKernel*) kernel)->append_kernel(kern); 00886 00887 initialized=true; 00888 if (success) 00889 ((CCombinedKernel*) kernel)->list_kernels(); 00890 else 00891 SG_ERROR("Adding of kernel failed.\n") 00892 00893 return success; 00894 } 00895 00896 00897 bool CGUIKernel::del_last_kernel() 00898 { 00899 if (!kernel) 00900 SG_ERROR("No kernel available.\n") 00901 00902 if (kernel->get_kernel_type()!=K_COMBINED) 00903 SG_ERROR("Need a combined kernel for deleting the last kernel in it.\n") 00904 00905 if (((CCombinedKernel*) kernel)->get_num_kernels()>0) 00906 return ((CCombinedKernel*) kernel)-> 00907 delete_kernel(((CCombinedKernel*) kernel)->get_num_kernels()-1); 00908 else 00909 SG_ERROR("No kernel available to delete.\n") 00910 00911 return false; 00912 } 00913 00914 bool CGUIKernel::clean_kernel() 00915 { 00916 SG_UNREF(kernel); 00917 kernel=NULL; 00918 return true; 00919 } 00920 00921 #ifdef USE_SVMLIGHT 00922 bool CGUIKernel::resize_kernel_cache(int32_t size) 00923 { 00924 if (!kernel) 00925 SG_ERROR("No kernel available.\n") 00926 00927 kernel->resize_kernel_cache(size); 00928 return true; 00929 } 00930 #endif //USE_SVMLIGHT 00931 00932 bool CGUIKernel::set_optimization_type(char* opt_type) 00933 { 00934 EOptimizationType opt=SLOWBUTMEMEFFICIENT; 00935 if (!kernel) 00936 SG_ERROR("No kernel available.\n") 00937 00938 if (strncmp(opt_type, "FASTBUTMEMHUNGRY", 16)==0) 00939 { 00940 SG_INFO("FAST METHOD selected\n") 00941 opt=FASTBUTMEMHUNGRY; 00942 kernel->set_optimization_type(opt); 00943 00944 return true; 00945 } 00946 else if (strncmp(opt_type,"SLOWBUTMEMEFFICIENT", 19)==0) 00947 { 00948 SG_INFO("MEMORY EFFICIENT METHOD selected\n") 00949 opt=SLOWBUTMEMEFFICIENT; 00950 kernel->set_optimization_type(opt); 00951 00952 return true; 00953 } 00954 else 00955 SG_ERROR("Wrong kernel optimization type.\n") 00956 00957 return false; 00958 } 00959 00960 bool CGUIKernel::precompute_subkernels() 00961 { 00962 if (!kernel) 00963 SG_ERROR("No kernel available.\n") 00964 00965 if (kernel->get_kernel_type()!=K_COMBINED) 00966 SG_ERROR("Not a combined kernel.\n") 00967 00968 return ((CCombinedKernel*) kernel)->precompute_subkernels(); 00969 }