SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 #include <shogun/ui/GUIClassifier.h> 00012 #include <shogun/ui/SGInterface.h> 00013 00014 #include <shogun/lib/config.h> 00015 #include <shogun/io/SGIO.h> 00016 00017 #include <shogun/features/SparseFeatures.h> 00018 #include <shogun/features/RealFileFeatures.h> 00019 #include <shogun/labels/Labels.h> 00020 00021 #include <shogun/kernel/AUCKernel.h> 00022 00023 #include <shogun/multiclass/KNN.h> 00024 #include <shogun/clustering/KMeans.h> 00025 #include <shogun/clustering/Hierarchical.h> 00026 #include <shogun/classifier/PluginEstimate.h> 00027 00028 #include <shogun/classifier/LDA.h> 00029 #include <shogun/classifier/LPM.h> 00030 #include <shogun/classifier/LPBoost.h> 00031 #include <shogun/classifier/Perceptron.h> 00032 00033 #include <shogun/machine/LinearMachine.h> 00034 00035 #ifdef USE_SVMLIGHT 00036 #include <shogun/classifier/svm/SVMLight.h> 00037 #include <shogun/classifier/svm/SVMLightOneClass.h> 00038 #include <shogun/regression/svr/SVRLight.h> 00039 #endif //USE_SVMLIGHT 00040 00041 #include <shogun/classifier/mkl/MKLClassification.h> 00042 #include <shogun/regression/svr/MKLRegression.h> 00043 #include <shogun/classifier/mkl/MKLOneClass.h> 00044 #include <shogun/classifier/mkl/MKLMulticlass.h> 00045 #include <shogun/classifier/svm/LibSVM.h> 00046 #include <shogun/multiclass/LaRank.h> 00047 #include <shogun/classifier/svm/GPBTSVM.h> 00048 #include <shogun/classifier/svm/LibSVMOneClass.h> 00049 #include <shogun/multiclass/MulticlassLibSVM.h> 00050 00051 #include <shogun/regression/svr/LibSVR.h> 00052 #include <shogun/regression/KernelRidgeRegression.h> 00053 00054 #include <shogun/classifier/svm/LibLinear.h> 00055 #include <shogun/classifier/svm/MPDSVM.h> 00056 #include <shogun/classifier/svm/GNPPSVM.h> 00057 #include <shogun/multiclass/GMNPSVM.h> 00058 #include <shogun/multiclass/ScatterSVM.h> 00059 00060 #include <shogun/classifier/svm/SVMLin.h> 00061 #include <shogun/classifier/svm/SVMOcas.h> 00062 #include <shogun/classifier/svm/SVMSGD.h> 00063 #include <shogun/classifier/svm/WDSVMOcas.h> 00064 00065 #include <shogun/io/SerializableAsciiFile.h> 00066 00067 using namespace shogun; 00068 00069 CGUIClassifier::CGUIClassifier(CSGInterface* ui_) 00070 : CSGObject(), ui(ui_) 00071 { 00072 constraint_generator=NULL; 00073 classifier=NULL; 00074 max_train_time=0; 00075 00076 // Perceptron parameters 00077 perceptron_learnrate=0.1; 00078 perceptron_maxiter=1000; 00079 00080 // SVM parameters 00081 svm_qpsize=41; 00082 svm_bufsize=3000; 00083 svm_max_qpsize=1000; 00084 mkl_norm=1; 00085 ent_lambda=0; 00086 mkl_block_norm=4; 00087 svm_C1=1; 00088 svm_C2=1; 00089 C_mkl=0; 00090 mkl_use_interleaved=true; 00091 svm_weight_epsilon=1e-5; 00092 svm_epsilon=1e-5; 00093 svm_tube_epsilon=1e-2; 00094 svm_nu=0.5; 00095 svm_use_shrinking = true ; 00096 00097 svm_use_bias = true; 00098 svm_use_batch_computation = true ; 00099 svm_use_linadd = true ; 00100 svm_do_auc_maximization = false ; 00101 00102 // KRR parameters 00103 krr_tau=1; 00104 00105 solver_type=ST_AUTO; 00106 } 00107 00108 CGUIClassifier::~CGUIClassifier() 00109 { 00110 SG_UNREF(classifier); 00111 SG_UNREF(constraint_generator); 00112 } 00113 00114 bool CGUIClassifier::new_classifier(char* name, int32_t d, int32_t from_d) 00115 { 00116 if (strcmp(name,"LIBSVM_ONECLASS")==0) 00117 { 00118 SG_UNREF(classifier); 00119 classifier = new CLibSVMOneClass(); 00120 SG_INFO("created SVMlibsvm object for oneclass\n") 00121 } 00122 else if (strcmp(name,"LIBSVM_MULTICLASS")==0) 00123 { 00124 SG_UNREF(classifier); 00125 classifier = new CMulticlassLibSVM(); 00126 SG_INFO("created SVMlibsvm object for multiclass\n") 00127 } 00128 else if (strcmp(name,"LIBSVM_NUMULTICLASS")==0) 00129 { 00130 SG_UNREF(classifier); 00131 classifier= new CMulticlassLibSVM(LIBSVM_NU_SVC); 00132 SG_INFO("created SVMlibsvm object for multiclass\n") 00133 } 00134 #ifdef USE_SVMLIGHT 00135 else if (strcmp(name,"SCATTERSVM_NO_BIAS_SVMLIGHT")==0) 00136 { 00137 SG_UNREF(classifier); 00138 classifier= new CScatterSVM(NO_BIAS_SVMLIGHT); 00139 SG_INFO("created ScatterSVM NO BIAS SVMLIGHT object\n") 00140 } 00141 #endif //USE_SVMLIGHT 00142 else if (strcmp(name,"SCATTERSVM_NO_BIAS_LIBSVM")==0) 00143 { 00144 SG_UNREF(classifier); 00145 classifier= new CScatterSVM(NO_BIAS_LIBSVM); 00146 SG_INFO("created ScatterSVM NO BIAS LIBSVM object\n") 00147 } 00148 else if (strcmp(name,"SCATTERSVM_TESTRULE1")==0) 00149 { 00150 SG_UNREF(classifier); 00151 classifier= new CScatterSVM(TEST_RULE1); 00152 SG_INFO("created ScatterSVM TESTRULE1 object\n") 00153 } 00154 else if (strcmp(name,"SCATTERSVM_TESTRULE2")==0) 00155 { 00156 SG_UNREF(classifier); 00157 classifier= new CScatterSVM(TEST_RULE2); 00158 SG_INFO("created ScatterSVM TESTRULE2 object\n") 00159 } 00160 else if (strcmp(name,"LIBSVM_NU")==0) 00161 { 00162 SG_UNREF(classifier); 00163 classifier= new CLibSVM(LIBSVM_NU_SVC); 00164 SG_INFO("created SVMlibsvm object\n") 00165 } 00166 else if (strcmp(name,"LIBSVM")==0) 00167 { 00168 SG_UNREF(classifier); 00169 classifier= new CLibSVM(); 00170 SG_INFO("created SVMlibsvm object\n") 00171 } 00172 else if (strcmp(name,"LARANK")==0) 00173 { 00174 SG_UNREF(classifier); 00175 classifier= new CLaRank(); 00176 SG_INFO("created LaRank object\n") 00177 } 00178 #ifdef USE_SVMLIGHT 00179 else if ((strcmp(name,"LIGHT")==0) || (strcmp(name,"SVMLIGHT")==0)) 00180 { 00181 SG_UNREF(classifier); 00182 classifier= new CSVMLight(); 00183 SG_INFO("created SVMLight object\n") 00184 } 00185 else if (strcmp(name,"SVMLIGHT_ONECLASS")==0) 00186 { 00187 SG_UNREF(classifier); 00188 classifier= new CSVMLightOneClass(); 00189 SG_INFO("created SVMLightOneClass object\n") 00190 } 00191 else if (strcmp(name,"SVRLIGHT")==0) 00192 { 00193 SG_UNREF(classifier); 00194 classifier= new CSVRLight(); 00195 SG_INFO("created SVRLight object\n") 00196 } 00197 #endif //USE_SVMLIGHT 00198 else if (strcmp(name,"GPBTSVM")==0) 00199 { 00200 SG_UNREF(classifier); 00201 classifier= new CGPBTSVM(); 00202 SG_INFO("created GPBT-SVM object\n") 00203 } 00204 else if (strcmp(name,"MPDSVM")==0) 00205 { 00206 SG_UNREF(classifier); 00207 classifier= new CMPDSVM(); 00208 SG_INFO("created MPD-SVM object\n") 00209 } 00210 else if (strcmp(name,"GNPPSVM")==0) 00211 { 00212 SG_UNREF(classifier); 00213 classifier= new CGNPPSVM(); 00214 SG_INFO("created GNPP-SVM object\n") 00215 } 00216 else if (strcmp(name,"GMNPSVM")==0) 00217 { 00218 SG_UNREF(classifier); 00219 classifier= new CGMNPSVM(); 00220 SG_INFO("created GMNP-SVM object\n") 00221 } 00222 else if (strcmp(name,"LIBSVR")==0) 00223 { 00224 SG_UNREF(classifier); 00225 classifier= new CLibSVR(); 00226 SG_INFO("created SVRlibsvm object\n") 00227 } 00228 #ifdef HAVE_LAPACK 00229 else if (strcmp(name, "KERNELRIDGEREGRESSION")==0) 00230 { 00231 SG_UNREF(classifier); 00232 classifier=new CKernelRidgeRegression(krr_tau, ui->ui_kernel->get_kernel(), 00233 ui->ui_labels->get_train_labels()); 00234 SG_INFO("created KernelRidgeRegression object %p\n", classifier) 00235 } 00236 #endif //HAVE_LAPACK 00237 else if (strcmp(name,"PERCEPTRON")==0) 00238 { 00239 SG_UNREF(classifier); 00240 classifier= new CPerceptron(); 00241 SG_INFO("created Perceptron object\n") 00242 } 00243 #ifdef HAVE_LAPACK 00244 else if (strncmp(name,"LIBLINEAR",9)==0) 00245 { 00246 LIBLINEAR_SOLVER_TYPE st=L2R_LR; 00247 00248 if (strcmp(name,"LIBLINEAR_L2R_LR")==0) 00249 { 00250 st=L2R_LR; 00251 SG_INFO("created LibLinear l2 regularized logistic regression object\n") 00252 } 00253 else if (strcmp(name,"LIBLINEAR_L2R_L2LOSS_SVC_DUAL")==0) 00254 { 00255 st=L2R_L2LOSS_SVC_DUAL; 00256 SG_INFO("created LibLinear l2 regularized l2 loss SVM dual object\n") 00257 } 00258 else if (strcmp(name,"LIBLINEAR_L2R_L2LOSS_SVC")==0) 00259 { 00260 st=L2R_L2LOSS_SVC; 00261 SG_INFO("created LibLinear l2 regularized l2 loss SVM primal object\n") 00262 } 00263 else if (strcmp(name,"LIBLINEAR_L1R_L2LOSS_SVC")==0) 00264 { 00265 st=L1R_L2LOSS_SVC; 00266 SG_INFO("created LibLinear l1 regularized l2 loss SVM primal object\n") 00267 } 00268 else if (strcmp(name,"LIBLINEAR_L2R_L1LOSS_SVC_DUAL")==0) 00269 { 00270 st=L2R_L1LOSS_SVC_DUAL; 00271 SG_INFO("created LibLinear l2 regularized l1 loss dual SVM object\n") 00272 } 00273 else 00274 SG_ERROR("unknown liblinear type\n") 00275 00276 SG_UNREF(classifier); 00277 classifier= new CLibLinear(st); 00278 ((CLibLinear*) classifier)->set_C(svm_C1, svm_C2); 00279 ((CLibLinear*) classifier)->set_epsilon(svm_epsilon); 00280 ((CLibLinear*) classifier)->set_bias_enabled(svm_use_bias); 00281 } 00282 else if (strcmp(name,"LDA")==0) 00283 { 00284 SG_UNREF(classifier); 00285 classifier= new CLDA(); 00286 SG_INFO("created LDA object\n") 00287 } 00288 #endif //HAVE_LAPACK 00289 #ifdef USE_CPLEX 00290 else if (strcmp(name,"LPM")==0) 00291 { 00292 SG_UNREF(classifier); 00293 classifier= new CLPM(); 00294 ((CLPM*) classifier)->set_C(svm_C1, svm_C2); 00295 ((CLPM*) classifier)->set_epsilon(svm_epsilon); 00296 ((CLPM*) classifier)->set_bias_enabled(svm_use_bias); 00297 ((CLPM*) classifier)->set_max_train_time(max_train_time); 00298 SG_INFO("created LPM object\n") 00299 } 00300 else if (strcmp(name,"LPBOOST")==0) 00301 { 00302 SG_UNREF(classifier); 00303 classifier= new CLPBoost(); 00304 ((CLPBoost*) classifier)->set_C(svm_C1, svm_C2); 00305 ((CLPBoost*) classifier)->set_epsilon(svm_epsilon); 00306 ((CLPBoost*) classifier)->set_bias_enabled(svm_use_bias); 00307 ((CLPBoost*) classifier)->set_max_train_time(max_train_time); 00308 SG_INFO("created LPBoost object\n") 00309 } 00310 #endif //USE_CPLEX 00311 else if (strncmp(name,"KNN", strlen("KNN"))==0) 00312 { 00313 SG_UNREF(classifier); 00314 classifier= new CKNN(); 00315 SG_INFO("created KNN object\n") 00316 } 00317 else if (strncmp(name,"KMEANS", strlen("KMEANS"))==0) 00318 { 00319 SG_UNREF(classifier); 00320 classifier= new CKMeans(); 00321 SG_INFO("created KMeans object\n") 00322 } 00323 else if (strncmp(name,"HIERARCHICAL", strlen("HIERARCHICAL"))==0) 00324 { 00325 SG_UNREF(classifier); 00326 classifier= new CHierarchical(); 00327 SG_INFO("created Hierarchical clustering object\n") 00328 } 00329 else if (strcmp(name,"SVMLIN")==0) 00330 { 00331 SG_UNREF(classifier); 00332 classifier= new CSVMLin(); 00333 ((CSVMLin*) classifier)->set_C(svm_C1, svm_C2); 00334 ((CSVMLin*) classifier)->set_epsilon(svm_epsilon); 00335 ((CSVMLin*) classifier)->set_bias_enabled(svm_use_bias); 00336 SG_INFO("created SVMLin object\n") 00337 } 00338 else if (strncmp(name,"WDSVMOCAS", strlen("WDSVMOCAS"))==0) 00339 { 00340 SG_UNREF(classifier); 00341 classifier= new CWDSVMOcas(SVM_OCAS); 00342 00343 ((CWDSVMOcas*) classifier)->set_bias_enabled(svm_use_bias); 00344 ((CWDSVMOcas*) classifier)->set_degree(d, from_d); 00345 ((CWDSVMOcas*) classifier)->set_C(svm_C1, svm_C2); 00346 ((CWDSVMOcas*) classifier)->set_epsilon(svm_epsilon); 00347 ((CWDSVMOcas*) classifier)->set_bufsize(svm_bufsize); 00348 SG_INFO("created Weighted Degree Kernel SVM Ocas(OCAS) object of order %d (from order:%d)\n", d, from_d) 00349 } 00350 else if (strcmp(name,"SVMOCAS")==0) 00351 { 00352 SG_UNREF(classifier); 00353 classifier= new CSVMOcas(SVM_OCAS); 00354 00355 ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2); 00356 ((CSVMOcas*) classifier)->set_epsilon(svm_epsilon); 00357 ((CSVMOcas*) classifier)->set_bufsize(svm_bufsize); 00358 ((CSVMOcas*) classifier)->set_bias_enabled(svm_use_bias); 00359 SG_INFO("created SVM Ocas(OCAS) object\n") 00360 } 00361 else if (strcmp(name,"SVMSGD")==0) 00362 { 00363 SG_UNREF(classifier); 00364 classifier= new CSVMSGD(svm_C1); 00365 ((CSVMSGD*) classifier)->set_bias_enabled(svm_use_bias); 00366 SG_INFO("created SVM SGD object\n") 00367 } 00368 else if (strcmp(name,"SVMBMRM")==0 || (strcmp(name,"SVMPERF")==0)) 00369 { 00370 SG_UNREF(classifier); 00371 classifier= new CSVMOcas(SVM_BMRM); 00372 00373 ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2); 00374 ((CSVMOcas*) classifier)->set_epsilon(svm_epsilon); 00375 ((CSVMOcas*) classifier)->set_bufsize(svm_bufsize); 00376 ((CSVMOcas*) classifier)->set_bias_enabled(svm_use_bias); 00377 SG_INFO("created SVM Ocas(BMRM/PERF) object\n") 00378 } 00379 else if (strcmp(name,"MKL_CLASSIFICATION")==0) 00380 { 00381 SG_UNREF(classifier); 00382 classifier= new CMKLClassification(); 00383 } 00384 else if (strcmp(name,"MKL_ONECLASS")==0) 00385 { 00386 SG_UNREF(classifier); 00387 classifier= new CMKLOneClass(); 00388 } 00389 else if (strcmp(name,"MKL_MULTICLASS")==0) 00390 { 00391 SG_UNREF(classifier); 00392 classifier= new CMKLMulticlass(); 00393 } 00394 else if (strcmp(name,"MKL_REGRESSION")==0) 00395 { 00396 SG_UNREF(classifier); 00397 classifier= new CMKLRegression(); 00398 } 00399 else 00400 { 00401 SG_ERROR("Unknown classifier %s.\n", name) 00402 return false; 00403 } 00404 SG_REF(classifier); 00405 00406 return (classifier!=NULL); 00407 } 00408 00409 bool CGUIClassifier::train_mkl_multiclass() 00410 { 00411 CMKLMulticlass* mkl= (CMKLMulticlass*) classifier; 00412 if (!mkl) 00413 SG_ERROR("No MKL available.\n") 00414 00415 CLabels* trainlabels=ui->ui_labels->get_train_labels(); 00416 if (!trainlabels) 00417 SG_ERROR("No trainlabels available.\n") 00418 00419 CKernel* kernel=ui->ui_kernel->get_kernel(); 00420 if (!kernel) 00421 SG_ERROR("No kernel available.\n") 00422 00423 bool success=ui->ui_kernel->init_kernel("TRAIN"); 00424 00425 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features()) 00426 SG_ERROR("Kernel not initialized / no train features available.\n") 00427 00428 int32_t num_vec=kernel->get_num_vec_lhs(); 00429 if (trainlabels->get_num_labels() != num_vec) 00430 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec) 00431 00432 SG_INFO("Starting MC-MKL training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon) 00433 00434 mkl->set_mkl_epsilon(svm_weight_epsilon); 00435 mkl->set_mkl_norm(mkl_norm); 00436 //mkl->set_max_num_mkliters(-1); 00437 mkl->set_solver_type(solver_type); 00438 mkl->set_bias_enabled(svm_use_bias); 00439 mkl->set_epsilon(svm_epsilon); 00440 mkl->set_max_train_time(max_train_time); 00441 mkl->set_tube_epsilon(svm_tube_epsilon); 00442 mkl->set_nu(svm_nu); 00443 mkl->set_C(svm_C1); 00444 mkl->set_qpsize(svm_qpsize); 00445 mkl->set_shrinking_enabled(svm_use_shrinking); 00446 mkl->set_linadd_enabled(svm_use_linadd); 00447 mkl->set_batch_computation_enabled(svm_use_batch_computation); 00448 00449 ((CKernelMulticlassMachine*) mkl)->set_labels(trainlabels); 00450 ((CKernelMulticlassMachine*) mkl)->set_kernel(kernel); 00451 00452 return mkl->train(); 00453 } 00454 00455 bool CGUIClassifier::train_mkl() 00456 { 00457 CMKL* mkl= (CMKL*) classifier; 00458 if (!mkl) 00459 SG_ERROR("No SVM available.\n") 00460 00461 bool oneclass=(mkl->get_classifier_type()==CT_LIBSVMONECLASS); 00462 CLabels* trainlabels=NULL; 00463 if(!oneclass) 00464 trainlabels=ui->ui_labels->get_train_labels(); 00465 else 00466 SG_INFO("Training one class mkl.\n") 00467 if (!trainlabels && !oneclass) 00468 SG_ERROR("No trainlabels available.\n") 00469 00470 CKernel* kernel=ui->ui_kernel->get_kernel(); 00471 if (!kernel) 00472 SG_ERROR("No kernel available.\n") 00473 00474 bool success=ui->ui_kernel->init_kernel("TRAIN"); 00475 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features()) 00476 SG_ERROR("Kernel not initialized.\n") 00477 00478 int32_t num_vec=kernel->get_num_vec_lhs(); 00479 if (!oneclass && trainlabels->get_num_labels() != num_vec) 00480 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec) 00481 00482 SG_INFO("Starting SVM training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon) 00483 00484 if (constraint_generator) 00485 mkl->set_constraint_generator(constraint_generator); 00486 mkl->set_solver_type(solver_type); 00487 mkl->set_bias_enabled(svm_use_bias); 00488 mkl->set_epsilon(svm_epsilon); 00489 mkl->set_max_train_time(max_train_time); 00490 mkl->set_tube_epsilon(svm_tube_epsilon); 00491 mkl->set_nu(svm_nu); 00492 mkl->set_C(svm_C1, svm_C2); 00493 mkl->set_qpsize(svm_qpsize); 00494 mkl->set_shrinking_enabled(svm_use_shrinking); 00495 mkl->set_linadd_enabled(svm_use_linadd); 00496 mkl->set_batch_computation_enabled(svm_use_batch_computation); 00497 mkl->set_mkl_epsilon(svm_weight_epsilon); 00498 mkl->set_mkl_norm(mkl_norm); 00499 mkl->set_elasticnet_lambda(ent_lambda); 00500 mkl->set_mkl_block_norm(mkl_block_norm); 00501 mkl->set_C_mkl(C_mkl); 00502 mkl->set_interleaved_optimization_enabled(mkl_use_interleaved); 00503 00504 if (svm_do_auc_maximization) 00505 { 00506 CAUCKernel* auc_kernel = new CAUCKernel(10, kernel); 00507 CLabels* auc_labels= auc_kernel->setup_auc_maximization(trainlabels); 00508 ((CKernelMachine*) mkl)->set_labels(auc_labels); 00509 ((CKernelMachine*) mkl)->set_kernel(auc_kernel); 00510 SG_UNREF(auc_labels); 00511 } 00512 else 00513 { 00514 if(!oneclass) 00515 ((CKernelMachine*) mkl)->set_labels(trainlabels); 00516 ((CKernelMachine*) mkl)->set_kernel(kernel); 00517 } 00518 00519 bool result=mkl->train(); 00520 00521 return result; 00522 } 00523 00524 bool CGUIClassifier::train_svm() 00525 { 00526 EMachineType type = classifier->get_classifier_type(); 00527 00528 if (!classifier) 00529 SG_ERROR("No SVM available.\n") 00530 00531 bool oneclass=(type==CT_LIBSVMONECLASS); 00532 CLabels* trainlabels=NULL; 00533 if(!oneclass) 00534 trainlabels=ui->ui_labels->get_train_labels(); 00535 else 00536 SG_INFO("Training one class svm.\n") 00537 if (!trainlabels && !oneclass) 00538 SG_ERROR("No trainlabels available.\n") 00539 00540 CKernel* kernel=ui->ui_kernel->get_kernel(); 00541 if (!kernel) 00542 SG_ERROR("No kernel available.\n") 00543 00544 bool success=ui->ui_kernel->init_kernel("TRAIN"); 00545 00546 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features()) 00547 SG_ERROR("Kernel not initialized / no train features available.\n") 00548 00549 int32_t num_vec=kernel->get_num_vec_lhs(); 00550 if (!oneclass && trainlabels->get_num_labels() != num_vec) 00551 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec) 00552 00553 SG_INFO("Starting SVM training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon) 00554 00555 if (type==CT_LARANK || type==CT_GMNPSVM || type==CT_LIBSVMMULTICLASS) 00556 { 00557 CMulticlassSVM* svm = (CMulticlassSVM*)classifier; 00558 svm->set_solver_type(solver_type); 00559 svm->set_bias_enabled(svm_use_bias); 00560 svm->set_epsilon(svm_epsilon); 00561 svm->set_max_train_time(max_train_time); 00562 svm->set_tube_epsilon(svm_tube_epsilon); 00563 svm->set_nu(svm_nu); 00564 svm->set_C(svm_C1); 00565 svm->set_qpsize(svm_qpsize); 00566 svm->set_shrinking_enabled(svm_use_shrinking); 00567 svm->set_linadd_enabled(svm_use_linadd); 00568 svm->set_batch_computation_enabled(svm_use_batch_computation); 00569 } 00570 else 00571 { 00572 CSVM* svm = (CSVM*)classifier; 00573 svm->set_solver_type(solver_type); 00574 svm->set_bias_enabled(svm_use_bias); 00575 svm->set_epsilon(svm_epsilon); 00576 svm->set_max_train_time(max_train_time); 00577 svm->set_tube_epsilon(svm_tube_epsilon); 00578 svm->set_nu(svm_nu); 00579 svm->set_C(svm_C1, svm_C2); 00580 svm->set_qpsize(svm_qpsize); 00581 svm->set_shrinking_enabled(svm_use_shrinking); 00582 svm->set_linadd_enabled(svm_use_linadd); 00583 svm->set_batch_computation_enabled(svm_use_batch_computation); 00584 } 00585 00586 if (type==CT_MKLMULTICLASS) 00587 { 00588 ((CMKLMulticlass *)classifier)->set_mkl_epsilon(svm_weight_epsilon); 00589 } 00590 00591 if (svm_do_auc_maximization) 00592 { 00593 CAUCKernel* auc_kernel = new CAUCKernel(10, kernel); 00594 CLabels* auc_labels = auc_kernel->setup_auc_maximization(trainlabels); 00595 ((CKernelMachine*)classifier)->set_labels(auc_labels); 00596 ((CKernelMachine*)classifier)->set_kernel(auc_kernel); 00597 SG_UNREF(auc_labels); 00598 } 00599 else 00600 { 00601 if (type==CT_LARANK || type==CT_GMNPSVM || type==CT_LIBSVMMULTICLASS) 00602 { 00603 ((CKernelMulticlassMachine*)classifier)->set_labels(trainlabels); 00604 ((CKernelMulticlassMachine*)classifier)->set_kernel(kernel); 00605 } 00606 else 00607 { 00608 if(!oneclass) 00609 ((CKernelMachine*)classifier)->set_labels(trainlabels); 00610 00611 ((CKernelMachine*)classifier)->set_kernel(kernel); 00612 } 00613 } 00614 00615 bool result = classifier->train(); 00616 00617 return result; 00618 } 00619 00620 bool CGUIClassifier::train_clustering(int32_t k, int32_t max_iter) 00621 { 00622 bool result=false; 00623 CDistance* distance=ui->ui_distance->get_distance(); 00624 00625 if (!distance) 00626 SG_ERROR("No distance available\n") 00627 00628 if (!ui->ui_distance->init_distance("TRAIN")) 00629 SG_ERROR("Initializing distance with train features failed.\n") 00630 00631 ((CDistanceMachine*) classifier)->set_distance(distance); 00632 00633 EMachineType type=classifier->get_classifier_type(); 00634 switch (type) 00635 { 00636 case CT_KMEANS: 00637 { 00638 ((CKMeans*) classifier)->set_k(k); 00639 ((CKMeans*) classifier)->set_max_iter(max_iter); 00640 result=((CKMeans*) classifier)->train(); 00641 break; 00642 } 00643 case CT_HIERARCHICAL: 00644 { 00645 ((CHierarchical*) classifier)->set_merges(k); 00646 result=((CHierarchical*) classifier)->train(); 00647 break; 00648 } 00649 default: 00650 SG_ERROR("Unknown clustering type %d\n", type) 00651 } 00652 00653 return result; 00654 } 00655 00656 bool CGUIClassifier::train_knn(int32_t k) 00657 { 00658 CLabels* trainlabels=ui->ui_labels->get_train_labels(); 00659 CDistance* distance=ui->ui_distance->get_distance(); 00660 00661 bool result=false; 00662 00663 if (trainlabels) 00664 { 00665 if (distance) 00666 { 00667 if (!ui->ui_distance->init_distance("TRAIN")) 00668 SG_ERROR("Initializing distance with train features failed.\n") 00669 ((CKNN*) classifier)->set_labels(trainlabels); 00670 ((CKNN*) classifier)->set_distance(distance); 00671 ((CKNN*) classifier)->set_k(k); 00672 result=((CKNN*) classifier)->train(); 00673 } 00674 else 00675 SG_ERROR("No distance available.\n") 00676 } 00677 else 00678 SG_ERROR("No labels available\n") 00679 00680 return result; 00681 } 00682 00683 bool CGUIClassifier::train_krr() 00684 { 00685 #ifdef HAVE_LAPACK 00686 CKernelRidgeRegression* krr= (CKernelRidgeRegression*) classifier; 00687 if (!krr) 00688 SG_ERROR("No SVM available.\n") 00689 00690 CLabels* trainlabels=NULL; 00691 trainlabels=ui->ui_labels->get_train_labels(); 00692 if (!trainlabels) 00693 SG_ERROR("No trainlabels available.\n") 00694 00695 CKernel* kernel=ui->ui_kernel->get_kernel(); 00696 if (!kernel) 00697 SG_ERROR("No kernel available.\n") 00698 00699 bool success=ui->ui_kernel->init_kernel("TRAIN"); 00700 00701 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features()) 00702 SG_ERROR("Kernel not initialized / no train features available.\n") 00703 00704 int32_t num_vec=kernel->get_num_vec_lhs(); 00705 if (trainlabels->get_num_labels() != num_vec) 00706 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec) 00707 00708 00709 // Set training labels and kernel 00710 krr->set_labels(trainlabels); 00711 krr->set_kernel(kernel); 00712 00713 bool result=krr->train(); 00714 return result; 00715 #else 00716 return false; 00717 #endif 00718 } 00719 00720 bool CGUIClassifier::train_linear(float64_t gamma) 00721 { 00722 ASSERT(classifier) 00723 EMachineType ctype = classifier->get_classifier_type(); 00724 CFeatures* trainfeatures=ui->ui_features->get_train_features(); 00725 CLabels* trainlabels=ui->ui_labels->get_train_labels(); 00726 bool result=false; 00727 00728 if (!trainfeatures) 00729 SG_ERROR("No trainfeatures available.\n") 00730 00731 if (!trainfeatures->has_property(FP_DOT)) 00732 SG_ERROR("Trainfeatures not based on DotFeatures.\n") 00733 00734 if (!trainlabels) 00735 SG_ERROR("No labels available\n") 00736 00737 if (ctype==CT_PERCEPTRON) 00738 { 00739 ((CPerceptron*) classifier)->set_learn_rate(perceptron_learnrate); 00740 ((CPerceptron*) classifier)->set_max_iter(perceptron_maxiter); 00741 } 00742 00743 #ifdef HAVE_LAPACK 00744 if (ctype==CT_LDA) 00745 { 00746 if (trainfeatures->get_feature_type()!=F_DREAL || 00747 trainfeatures->get_feature_class()!=C_DENSE) 00748 SG_ERROR("LDA requires train features of class SIMPLE type REAL.\n") 00749 ((CLDA*) classifier)->set_gamma(gamma); 00750 } 00751 #endif 00752 00753 if (ctype==CT_SVMOCAS) 00754 ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2); 00755 #ifdef HAVE_LAPACK 00756 else if (ctype==CT_LIBLINEAR) 00757 ((CLibLinear*) classifier)->set_C(svm_C1, svm_C2); 00758 #endif 00759 else if (ctype==CT_SVMLIN) 00760 ((CSVMLin*) classifier)->set_C(svm_C1, svm_C2); 00761 else if (ctype==CT_SVMSGD) 00762 ((CSVMSGD*) classifier)->set_C(svm_C1, svm_C2); 00763 else if (ctype==CT_LPM || ctype==CT_LPBOOST) 00764 { 00765 if (trainfeatures->get_feature_class()!=C_SPARSE || 00766 trainfeatures->get_feature_type()!=F_DREAL) 00767 SG_ERROR("LPM and LPBOOST require trainfeatures of class SPARSE type REAL.\n") 00768 } 00769 00770 ((CLinearMachine*) classifier)->set_labels(trainlabels); 00771 ((CLinearMachine*) classifier)->set_features((CDenseFeatures<float64_t>*) trainfeatures); 00772 result=((CLinearMachine*) classifier)->train(); 00773 00774 return result; 00775 } 00776 00777 bool CGUIClassifier::train_wdocas() 00778 { 00779 CFeatures* trainfeatures=ui->ui_features->get_train_features(); 00780 CLabels* trainlabels=ui->ui_labels->get_train_labels(); 00781 00782 bool result=false; 00783 00784 if (!trainfeatures) 00785 SG_ERROR("No trainfeatures available.\n") 00786 00787 if (trainfeatures->get_feature_class()!=C_STRING || 00788 trainfeatures->get_feature_type()!=F_BYTE ) 00789 SG_ERROR("Trainfeatures are not of class STRING type BYTE.\n") 00790 00791 if (!trainlabels) 00792 SG_ERROR("No labels available.\n") 00793 00794 ((CWDSVMOcas*) classifier)->set_labels(trainlabels); 00795 ((CWDSVMOcas*) classifier)->set_features((CStringFeatures<uint8_t>*) trainfeatures); 00796 result=((CWDSVMOcas*) classifier)->train(); 00797 00798 return result; 00799 } 00800 00801 bool CGUIClassifier::load(char* filename, char* type) 00802 { 00803 bool result=false; 00804 00805 if (new_classifier(type)) 00806 { 00807 FILE* model_file=fopen(filename, "r"); 00808 CSerializableAsciiFile* ascii_file = new CSerializableAsciiFile(model_file,'r'); 00809 00810 if (ascii_file) 00811 { 00812 if (classifier && classifier->load_serializable(ascii_file)) 00813 { 00814 SG_DEBUG("file successfully read.\n") 00815 result=true; 00816 } 00817 else 00818 SG_ERROR("SVM/Classifier creation/loading failed on file %s.\n", filename) 00819 00820 delete ascii_file; 00821 } 00822 else 00823 SG_ERROR("Opening file %s failed.\n", filename) 00824 00825 return result; 00826 } 00827 else 00828 SG_ERROR("Type %s of SVM/Classifier unknown.\n", type) 00829 00830 return false; 00831 } 00832 00833 bool CGUIClassifier::save(char* param) 00834 { 00835 bool result=false; 00836 param=SGIO::skip_spaces(param); 00837 00838 if (classifier) 00839 { 00840 FILE* file=fopen(param, "w"); 00841 CSerializableAsciiFile* ascii_file = new CSerializableAsciiFile(file,'w'); 00842 00843 if ((!ascii_file) || (!classifier->save_serializable(ascii_file))) 00844 printf("writing to file %s failed!\n", param); 00845 else 00846 { 00847 printf("successfully written classifier into \"%s\" !\n", param); 00848 result=true; 00849 } 00850 00851 if (ascii_file) 00852 delete ascii_file; 00853 } 00854 else 00855 SG_ERROR("create classifier first\n") 00856 00857 return result; 00858 } 00859 00860 bool CGUIClassifier::set_perceptron_parameters( 00861 float64_t learnrate, int32_t maxiter) 00862 { 00863 if (learnrate<=0) 00864 perceptron_learnrate=0.01; 00865 else 00866 perceptron_learnrate=learnrate; 00867 00868 if (maxiter<=0) 00869 perceptron_maxiter=1000; 00870 else 00871 perceptron_maxiter=maxiter; 00872 SG_INFO("Setting to perceptron parameters (learnrate %f and maxiter: %d\n", perceptron_learnrate, perceptron_maxiter) 00873 00874 return true; 00875 } 00876 00877 bool CGUIClassifier::set_svm_epsilon(float64_t epsilon) 00878 { 00879 if (epsilon<0) 00880 svm_epsilon=1e-4; 00881 else 00882 svm_epsilon=epsilon; 00883 SG_INFO("Set to svm_epsilon=%f.\n", svm_epsilon) 00884 00885 return true; 00886 } 00887 00888 bool CGUIClassifier::set_max_train_time(float64_t max) 00889 { 00890 if (max>0) 00891 { 00892 max_train_time=max; 00893 SG_INFO("Set to max_train_time=%f.\n", max_train_time) 00894 } 00895 else 00896 SG_INFO("Disabling max_train_time.\n") 00897 00898 return true; 00899 } 00900 00901 bool CGUIClassifier::set_svr_tube_epsilon(float64_t tube_epsilon) 00902 { 00903 if (!classifier) 00904 SG_ERROR("No regression method allocated\n") 00905 00906 if (classifier->get_classifier_type() != CT_LIBSVR && 00907 classifier->get_classifier_type() != CT_SVRLIGHT && 00908 classifier->get_classifier_type() != CT_MKLREGRESSION ) 00909 { 00910 SG_ERROR("Underlying method not capable of SV-regression\n") 00911 } 00912 00913 if (tube_epsilon<0) 00914 svm_tube_epsilon=1e-2; 00915 svm_tube_epsilon=tube_epsilon; 00916 00917 ((CSVM*) classifier)->set_tube_epsilon(svm_tube_epsilon); 00918 SG_INFO("Set to svr_tube_epsilon=%f.\n", svm_tube_epsilon) 00919 00920 return true; 00921 } 00922 00923 bool CGUIClassifier::set_svm_nu(float64_t nu) 00924 { 00925 if (nu<0 || nu>1) 00926 nu=0.5; 00927 00928 svm_nu=nu; 00929 SG_INFO("Set to nu=%f.\n", svm_nu) 00930 00931 return true; 00932 } 00933 00934 bool CGUIClassifier::set_svm_mkl_parameters( 00935 float64_t weight_epsilon, float64_t C, float64_t norm) 00936 { 00937 if (weight_epsilon<0) 00938 weight_epsilon=1e-4; 00939 if (C<0) 00940 C=0; 00941 if (norm<0) 00942 SG_ERROR("MKL norm >= 0\n") 00943 00944 svm_weight_epsilon=weight_epsilon; 00945 C_mkl=C; 00946 mkl_norm=norm; 00947 00948 SG_INFO("Set to weight_epsilon=%f.\n", svm_weight_epsilon) 00949 SG_INFO("Set to C_mkl=%f.\n", C_mkl) 00950 SG_INFO("Set to mkl_norm=%f.\n", mkl_norm) 00951 00952 return true; 00953 } 00954 00955 bool CGUIClassifier::set_elasticnet_lambda(float64_t lambda) 00956 { 00957 if (lambda<0 || lambda>1) 00958 SG_ERROR("0 <= ent_lambda <= 1\n") 00959 00960 ent_lambda = lambda; 00961 return true; 00962 } 00963 00964 bool CGUIClassifier::set_mkl_block_norm(float64_t mkl_bnorm) 00965 { 00966 if (mkl_bnorm<1) 00967 SG_ERROR("1 <= mkl_block_norm <= inf\n") 00968 00969 mkl_block_norm=mkl_bnorm; 00970 return true; 00971 } 00972 00973 00974 bool CGUIClassifier::set_svm_C(float64_t C1, float64_t C2) 00975 { 00976 if (C1<0) 00977 svm_C1=1.0; 00978 else 00979 svm_C1=C1; 00980 00981 if (C2<0) 00982 svm_C2=svm_C1; 00983 else 00984 svm_C2=C2; 00985 00986 SG_INFO("Set to C1=%f C2=%f.\n", svm_C1, svm_C2) 00987 00988 return true; 00989 } 00990 00991 bool CGUIClassifier::set_svm_qpsize(int32_t qpsize) 00992 { 00993 if (qpsize<2) 00994 svm_qpsize=41; 00995 else 00996 svm_qpsize=qpsize; 00997 SG_INFO("Set qpsize to svm_qpsize=%d.\n", svm_qpsize) 00998 00999 return true; 01000 } 01001 01002 bool CGUIClassifier::set_svm_max_qpsize(int32_t max_qpsize) 01003 { 01004 if (max_qpsize<50) 01005 svm_max_qpsize=50; 01006 else 01007 svm_max_qpsize=max_qpsize; 01008 SG_INFO("Set max qpsize to svm_max_qpsize=%d.\n", svm_max_qpsize) 01009 01010 return true; 01011 } 01012 01013 bool CGUIClassifier::set_svm_bufsize(int32_t bufsize) 01014 { 01015 if (svm_bufsize<0) 01016 svm_bufsize=3000; 01017 else 01018 svm_bufsize=bufsize; 01019 SG_INFO("Set bufsize to svm_bufsize=%d.\n", svm_bufsize) 01020 01021 return true ; 01022 } 01023 01024 bool CGUIClassifier::set_svm_shrinking_enabled(bool enabled) 01025 { 01026 svm_use_shrinking=enabled; 01027 if (svm_use_shrinking) 01028 SG_INFO("Enabling shrinking optimization.\n") 01029 else 01030 SG_INFO("Disabling shrinking optimization.\n") 01031 01032 return true; 01033 } 01034 01035 bool CGUIClassifier::set_svm_batch_computation_enabled(bool enabled) 01036 { 01037 svm_use_batch_computation=enabled; 01038 if (svm_use_batch_computation) 01039 SG_INFO("Enabling batch computation.\n") 01040 else 01041 SG_INFO("Disabling batch computation.\n") 01042 01043 return true; 01044 } 01045 01046 bool CGUIClassifier::set_svm_linadd_enabled(bool enabled) 01047 { 01048 svm_use_linadd=enabled; 01049 if (svm_use_linadd) 01050 SG_INFO("Enabling LINADD optimization.\n") 01051 else 01052 SG_INFO("Disabling LINADD optimization.\n") 01053 01054 return true; 01055 } 01056 01057 bool CGUIClassifier::set_svm_bias_enabled(bool enabled) 01058 { 01059 svm_use_bias=enabled; 01060 if (svm_use_bias) 01061 SG_INFO("Enabling svm bias.\n") 01062 else 01063 SG_INFO("Disabling svm bias.\n") 01064 01065 return true; 01066 } 01067 01068 bool CGUIClassifier::set_mkl_interleaved_enabled(bool enabled) 01069 { 01070 mkl_use_interleaved=enabled; 01071 if (mkl_use_interleaved) 01072 SG_INFO("Enabling mkl interleaved optimization.\n") 01073 else 01074 SG_INFO("Disabling mkl interleaved optimization.\n") 01075 01076 return true; 01077 } 01078 01079 bool CGUIClassifier::set_do_auc_maximization(bool do_auc) 01080 { 01081 svm_do_auc_maximization=do_auc; 01082 01083 if (svm_do_auc_maximization) 01084 SG_INFO("Enabling AUC maximization.\n") 01085 else 01086 SG_INFO("Disabling AUC maximization.\n") 01087 01088 return true; 01089 } 01090 01091 01092 CLabels* CGUIClassifier::classify() 01093 { 01094 ASSERT(classifier) 01095 01096 switch (classifier->get_classifier_type()) 01097 { 01098 case CT_LIGHT: 01099 case CT_LIGHTONECLASS: 01100 case CT_LIBSVM: 01101 case CT_SCATTERSVM: 01102 case CT_MPD: 01103 case CT_GPBT: 01104 case CT_CPLEXSVM: 01105 case CT_GMNPSVM: 01106 case CT_GNPPSVM: 01107 case CT_LIBSVR: 01108 case CT_LIBSVMMULTICLASS: 01109 case CT_LIBSVMONECLASS: 01110 case CT_SVRLIGHT: 01111 case CT_MKLCLASSIFICATION: 01112 case CT_MKLMULTICLASS: 01113 case CT_MKLREGRESSION: 01114 case CT_MKLONECLASS: 01115 case CT_KERNELRIDGEREGRESSION: 01116 return classify_kernelmachine(); 01117 case CT_KNN: 01118 return classify_distancemachine(); 01119 case CT_PERCEPTRON: 01120 case CT_LDA: 01121 return classify_linear(); 01122 case CT_SVMLIN: 01123 case CT_SVMPERF: 01124 case CT_SVMOCAS: 01125 case CT_SVMSGD: 01126 case CT_LPM: 01127 case CT_LPBOOST: 01128 case CT_LIBLINEAR: 01129 return classify_linear(); 01130 case CT_WDSVMOCAS: 01131 return classify_byte_linear(); 01132 default: 01133 SG_ERROR("unknown classifier type\n") 01134 break; 01135 }; 01136 01137 return NULL; 01138 } 01139 01140 CLabels* CGUIClassifier::classify_kernelmachine() 01141 { 01142 CFeatures* trainfeatures=ui->ui_features->get_train_features(); 01143 CFeatures* testfeatures=ui->ui_features->get_test_features(); 01144 01145 if (!classifier) 01146 SG_ERROR("No kernelmachine available.\n") 01147 01148 bool success=true; 01149 01150 REQUIRE(ui->ui_kernel->get_kernel(), "No kernel set"); 01151 if (ui->ui_kernel->get_kernel()->get_kernel_type()!=K_CUSTOM) 01152 { 01153 if (ui->ui_kernel->get_kernel()->get_kernel_type()==K_COMBINED 01154 && ( !trainfeatures || !testfeatures )) 01155 { 01156 SG_DEBUG("skipping initialisation of combined kernel " 01157 "as train/test features are unavailable\n") 01158 } 01159 else 01160 { 01161 if (!trainfeatures) 01162 SG_ERROR("No training features available.\n") 01163 if (!testfeatures) 01164 SG_ERROR("No test features available.\n") 01165 01166 success=ui->ui_kernel->init_kernel("TEST"); 01167 } 01168 } 01169 01170 if (!success || !ui->ui_kernel->is_initialized()) 01171 SG_ERROR("Kernel not initialized.\n") 01172 01173 EMachineType type = classifier->get_classifier_type(); 01174 if (type==CT_LARANK || type==CT_GMNPSVM || type==CT_LIBSVMMULTICLASS || 01175 type==CT_MKLMULTICLASS) 01176 { 01177 CKernelMulticlassMachine* kmcm = (CKernelMulticlassMachine*) classifier; 01178 kmcm->set_kernel(ui->ui_kernel->get_kernel()); 01179 } 01180 else 01181 { 01182 CKernelMachine* km=(CKernelMachine*) classifier; 01183 km->set_kernel(ui->ui_kernel->get_kernel()); 01184 km->set_batch_computation_enabled(svm_use_batch_computation); 01185 } 01186 01187 SG_INFO("Starting kernel machine testing.\n") 01188 return classifier->apply(); 01189 } 01190 01191 bool CGUIClassifier::get_trained_classifier( 01192 float64_t* &weights, int32_t &rows, int32_t &cols, float64_t*& bias, 01193 int32_t& brows, int32_t& bcols, 01194 int32_t idx) // which SVM for Multiclass 01195 { 01196 ASSERT(classifier) 01197 01198 switch (classifier->get_classifier_type()) 01199 { 01200 case CT_SCATTERSVM: 01201 case CT_GNPPSVM: 01202 case CT_LIBSVMMULTICLASS: 01203 case CT_LIGHT: 01204 case CT_LIGHTONECLASS: 01205 case CT_LIBSVM: 01206 case CT_MPD: 01207 case CT_GPBT: 01208 case CT_CPLEXSVM: 01209 case CT_GMNPSVM: 01210 case CT_LIBSVR: 01211 case CT_LIBSVMONECLASS: 01212 case CT_SVRLIGHT: 01213 case CT_MKLCLASSIFICATION: 01214 case CT_MKLREGRESSION: 01215 case CT_MKLONECLASS: 01216 case CT_MKLMULTICLASS: 01217 case CT_KERNELRIDGEREGRESSION: 01218 return get_svm(weights, rows, cols, bias, brows, bcols, idx); 01219 break; 01220 case CT_PERCEPTRON: 01221 case CT_LDA: 01222 case CT_LPM: 01223 case CT_LPBOOST: 01224 case CT_SVMOCAS: 01225 case CT_SVMSGD: 01226 case CT_SVMLIN: 01227 case CT_SVMPERF: 01228 case CT_LIBLINEAR: 01229 return get_linear(weights, rows, cols, bias, brows, bcols); 01230 break; 01231 case CT_KMEANS: 01232 case CT_HIERARCHICAL: 01233 return get_clustering(weights, rows, cols, bias, brows, bcols); 01234 break; 01235 case CT_KNN: 01236 SG_ERROR("not implemented") 01237 break; 01238 default: 01239 SG_ERROR("unknown classifier type\n") 01240 break; 01241 }; 01242 return false; 01243 } 01244 01245 01246 int32_t CGUIClassifier::get_num_svms() 01247 { 01248 ASSERT(classifier) 01249 return ((CMulticlassSVM*) classifier)->get_num_machines(); 01250 } 01251 01252 bool CGUIClassifier::get_svm( 01253 float64_t* &weights, int32_t& rows, int32_t& cols, float64_t*& bias, 01254 int32_t& brows, int32_t& bcols, int32_t idx) 01255 { 01256 CSVM* svm=(CSVM*) classifier; 01257 01258 if (idx>-1) // should be MulticlassSVM 01259 svm=((CMulticlassSVM*) svm)->get_svm(idx); 01260 01261 if (svm) 01262 { 01263 brows=1; 01264 bcols=1; 01265 bias=SG_MALLOC(float64_t, 1); 01266 *bias=svm->get_bias(); 01267 01268 rows=svm->get_num_support_vectors(); 01269 cols=2; 01270 weights=SG_MALLOC(float64_t, rows*cols); 01271 01272 for (int32_t i=0; i<rows; i++) 01273 { 01274 weights[i]=svm->get_alpha(i); 01275 weights[i+rows]=svm->get_support_vector(i); 01276 } 01277 01278 return true; 01279 } 01280 01281 return false; 01282 } 01283 01284 bool CGUIClassifier::get_clustering( 01285 float64_t* ¢ers, int32_t& rows, int32_t& cols, float64_t*& radi, 01286 int32_t& brows, int32_t& bcols) 01287 { 01288 if (!classifier) 01289 return false; 01290 01291 switch (classifier->get_classifier_type()) 01292 { 01293 case CT_KMEANS: 01294 { 01295 CKMeans* clustering=(CKMeans*) classifier; 01296 01297 bcols=1; 01298 SGVector<float64_t> r=clustering->get_radiuses(); 01299 brows=r.vlen; 01300 radi=SG_MALLOC(float64_t, brows); 01301 memcpy(radi, r.vector, sizeof(float64_t)*brows); 01302 01303 cols=1; 01304 SGMatrix<float64_t> c=clustering->get_cluster_centers(); 01305 rows=c.num_rows; 01306 cols=c.num_cols; 01307 centers=SG_MALLOC(float64_t, rows*cols); 01308 memcpy(centers, c.matrix, sizeof(float64_t)*rows*cols); 01309 break; 01310 } 01311 01312 case CT_HIERARCHICAL: 01313 { 01314 CHierarchical* clustering=(CHierarchical*) classifier; 01315 01316 // radi == merge_distances, centers == pairs 01317 bcols=1; 01318 SGVector<float64_t> r=clustering->get_merge_distances(); 01319 brows=r.vlen; 01320 radi=SG_MALLOC(float64_t, brows); 01321 memcpy(radi, r.vector, sizeof(float64_t)*brows); 01322 01323 SGMatrix<int32_t> p=clustering->get_cluster_pairs(); 01324 rows=p.num_rows; 01325 cols=p.num_cols; 01326 centers=SG_MALLOC(float64_t, rows*cols); 01327 for (int32_t i=0; i<rows*cols; i++) 01328 centers[i]=(float64_t) p.matrix[i]; 01329 01330 break; 01331 } 01332 01333 default: 01334 SG_ERROR("internal error - unknown clustering type\n") 01335 } 01336 01337 return true; 01338 } 01339 01340 bool CGUIClassifier::get_linear( 01341 float64_t* &weights, int32_t& rows, int32_t& cols, float64_t*& bias, 01342 int32_t& brows, int32_t& bcols) 01343 { 01344 CLinearMachine* linear=(CLinearMachine*) classifier; 01345 01346 if (!linear) 01347 return false; 01348 01349 bias=SG_MALLOC(float64_t, 1); 01350 *bias=linear->get_bias(); 01351 brows=1; 01352 bcols=1; 01353 01354 SGVector<float64_t> w=linear->get_w(); 01355 cols=1; 01356 rows=w.vlen; 01357 01358 weights= SG_MALLOC(float64_t, w.vlen); 01359 memcpy(weights, w.vector, sizeof(float64_t)*w.vlen); 01360 01361 return true; 01362 } 01363 01364 CLabels* CGUIClassifier::classify_distancemachine() 01365 { 01366 CFeatures* trainfeatures=ui->ui_features->get_train_features(); 01367 CFeatures* testfeatures=ui->ui_features->get_test_features(); 01368 01369 if (!classifier) 01370 { 01371 SG_ERROR("no kernelmachine available\n") 01372 return NULL; 01373 } 01374 if (!trainfeatures) 01375 { 01376 SG_ERROR("no training features available\n") 01377 return NULL; 01378 } 01379 01380 if (!testfeatures) 01381 { 01382 SG_ERROR("no test features available\n") 01383 return NULL; 01384 } 01385 01386 bool success=ui->ui_distance->init_distance("TEST"); 01387 01388 if (!success || !ui->ui_distance->is_initialized()) 01389 { 01390 SG_ERROR("distance not initialized\n") 01391 return NULL; 01392 } 01393 01394 ((CDistanceMachine*) classifier)->set_distance( 01395 ui->ui_distance->get_distance()); 01396 SG_INFO("starting distance machine testing\n") 01397 return classifier->apply(); 01398 } 01399 01400 01401 CLabels* CGUIClassifier::classify_linear() 01402 { 01403 CFeatures* testfeatures=ui->ui_features->get_test_features(); 01404 01405 if (!classifier) 01406 { 01407 SG_ERROR("no classifier available\n") 01408 return NULL; 01409 } 01410 if (!testfeatures) 01411 { 01412 SG_ERROR("no test features available\n") 01413 return NULL; 01414 } 01415 if (!(testfeatures->has_property(FP_DOT))) 01416 { 01417 SG_ERROR("testfeatures not based on DotFeatures\n") 01418 return NULL; 01419 } 01420 01421 ((CLinearMachine*) classifier)->set_features((CDotFeatures*) testfeatures); 01422 SG_INFO("starting linear classifier testing\n") 01423 return classifier->apply(); 01424 } 01425 01426 CLabels* CGUIClassifier::classify_byte_linear() 01427 { 01428 CFeatures* testfeatures=ui->ui_features->get_test_features(); 01429 01430 if (!classifier) 01431 { 01432 SG_ERROR("no svm available\n") 01433 return NULL; 01434 } 01435 if (!testfeatures) 01436 { 01437 SG_ERROR("no test features available\n") 01438 return NULL; 01439 } 01440 if (testfeatures->get_feature_class() != C_STRING || 01441 testfeatures->get_feature_type() != F_BYTE ) 01442 { 01443 SG_ERROR("testfeatures not of class STRING type BYTE\n") 01444 return NULL; 01445 } 01446 01447 ((CWDSVMOcas*) classifier)->set_features((CStringFeatures<uint8_t>*) testfeatures); 01448 SG_INFO("starting linear classifier testing\n") 01449 return classifier->apply(); 01450 } 01451 01452 bool CGUIClassifier::classify_example(int32_t idx, float64_t &result) 01453 { 01454 CFeatures* trainfeatures=ui->ui_features->get_train_features(); 01455 CFeatures* testfeatures=ui->ui_features->get_test_features(); 01456 01457 if (!classifier) 01458 { 01459 SG_ERROR("no svm available\n") 01460 return false; 01461 } 01462 01463 if (!ui->ui_kernel->is_initialized()) 01464 { 01465 SG_ERROR("kernel not initialized\n") 01466 return false; 01467 } 01468 01469 if (!ui->ui_kernel->get_kernel() || 01470 ui->ui_kernel->get_kernel()->get_kernel_type()!=K_CUSTOM) 01471 { 01472 if (!trainfeatures) 01473 { 01474 SG_ERROR("no training features available\n") 01475 return false; 01476 } 01477 01478 if (!testfeatures) 01479 { 01480 SG_ERROR("no test features available\n") 01481 return false; 01482 } 01483 } 01484 01485 ((CKernelMachine*) classifier)->set_kernel( 01486 ui->ui_kernel->get_kernel()); 01487 01488 result=((CKernelMachine*)classifier)->apply_one(idx); 01489 return true ; 01490 } 01491 01492 01493 bool CGUIClassifier::set_krr_tau(float64_t tau) 01494 { 01495 #ifdef HAVE_LAPACK 01496 krr_tau=tau; 01497 ((CKernelRidgeRegression*) classifier)->set_tau(krr_tau); 01498 SG_INFO("Set to krr_tau=%f.\n", krr_tau) 01499 01500 return true; 01501 #else 01502 return false; 01503 #endif 01504 } 01505 01506 bool CGUIClassifier::set_solver(char* solver) 01507 { 01508 ESolverType s=ST_AUTO; 01509 01510 if (strncmp(solver,"NEWTON", 6)==0) 01511 { 01512 SG_INFO("Using NEWTON solver.\n") 01513 s=ST_NEWTON; 01514 } 01515 else if (strncmp(solver,"DIRECT", 6)==0) 01516 { 01517 SG_INFO("Using DIRECT solver\n") 01518 s=ST_DIRECT; 01519 } 01520 else if (strncmp(solver,"BLOCK_NORM", 9)==0) 01521 { 01522 SG_INFO("Using BLOCK_NORM solver\n") 01523 s=ST_BLOCK_NORM; 01524 } 01525 else if (strncmp(solver,"ELASTICNET", 10)==0) 01526 { 01527 SG_INFO("Using ELASTICNET solver\n") 01528 s=ST_ELASTICNET; 01529 } 01530 else if (strncmp(solver,"AUTO", 4)==0) 01531 { 01532 SG_INFO("Automagically determining solver.\n") 01533 s=ST_AUTO; 01534 } 01535 #ifdef USE_CPLEX 01536 else if (strncmp(solver, "CPLEX", 5)==0) 01537 { 01538 SG_INFO("USING CPLEX METHOD selected\n") 01539 s=ST_CPLEX; 01540 } 01541 #endif 01542 #ifdef USE_GLPK 01543 else if (strncmp(solver,"GLPK", 4)==0) 01544 { 01545 SG_INFO("Using GLPK solver\n") 01546 s=ST_GLPK; 01547 } 01548 #endif 01549 else 01550 SG_ERROR("Unknown solver type, %s (not compiled in?)\n", solver) 01551 01552 01553 solver_type=s; 01554 return true; 01555 } 01556 01557 bool CGUIClassifier::set_constraint_generator(char* name) 01558 { 01559 if (strcmp(name,"LIBSVM_ONECLASS")==0) 01560 { 01561 SG_UNREF(constraint_generator); 01562 constraint_generator = new CLibSVMOneClass(); 01563 SG_INFO("created SVMlibsvm object for oneclass\n") 01564 } 01565 else if (strcmp(name,"LIBSVM_NU")==0) 01566 { 01567 SG_UNREF(constraint_generator); 01568 constraint_generator= new CLibSVM(LIBSVM_NU_SVC); 01569 SG_INFO("created SVMlibsvm object\n") 01570 } 01571 else if (strcmp(name,"LIBSVM")==0) 01572 { 01573 SG_UNREF(constraint_generator); 01574 constraint_generator= new CLibSVM(); 01575 SG_INFO("created SVMlibsvm object\n") 01576 } 01577 #ifdef USE_SVMLIGHT 01578 else if ((strcmp(name,"LIGHT")==0) || (strcmp(name,"SVMLIGHT")==0)) 01579 { 01580 SG_UNREF(constraint_generator); 01581 constraint_generator= new CSVMLight(); 01582 SG_INFO("created SVMLight object\n") 01583 } 01584 else if (strcmp(name,"SVMLIGHT_ONECLASS")==0) 01585 { 01586 SG_UNREF(constraint_generator); 01587 constraint_generator= new CSVMLightOneClass(); 01588 SG_INFO("created SVMLightOneClass object\n") 01589 } 01590 else if (strcmp(name,"SVRLIGHT")==0) 01591 { 01592 SG_UNREF(constraint_generator); 01593 constraint_generator= new CSVRLight(); 01594 SG_INFO("created SVRLight object\n") 01595 } 01596 #endif //USE_SVMLIGHT 01597 else if (strcmp(name,"GPBTSVM")==0) 01598 { 01599 SG_UNREF(constraint_generator); 01600 constraint_generator= new CGPBTSVM(); 01601 SG_INFO("created GPBT-SVM object\n") 01602 } 01603 else if (strcmp(name,"MPDSVM")==0) 01604 { 01605 SG_UNREF(constraint_generator); 01606 constraint_generator= new CMPDSVM(); 01607 SG_INFO("created MPD-SVM object\n") 01608 } 01609 else if (strcmp(name,"GNPPSVM")==0) 01610 { 01611 SG_UNREF(constraint_generator); 01612 constraint_generator= new CGNPPSVM(); 01613 SG_INFO("created GNPP-SVM object\n") 01614 } 01615 else if (strcmp(name,"LIBSVR")==0) 01616 { 01617 SG_UNREF(constraint_generator); 01618 constraint_generator= new CLibSVR(); 01619 SG_INFO("created SVRlibsvm object\n") 01620 } 01621 else 01622 { 01623 SG_ERROR("Unknown SV-classifier %s.\n", name) 01624 return false; 01625 } 01626 SG_REF(constraint_generator); 01627 01628 return (constraint_generator!=NULL); 01629 }