SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2008 Gunnar Raetsch 00008 * Written (W) 2009 Soeren Sonnnenburg 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include <shogun/lib/common.h> 00013 #include <shogun/mathematics/Math.h> 00014 #include <shogun/kernel/AUCKernel.h> 00015 #include <shogun/io/SGIO.h> 00016 #include <shogun/labels/BinaryLabels.h> 00017 00018 using namespace shogun; 00019 00020 void 00021 CAUCKernel::init() 00022 { 00023 SG_ADD((CSGObject**) &subkernel, "subkernel", "The subkernel.", 00024 MS_AVAILABLE); 00025 } 00026 00027 CAUCKernel::CAUCKernel() 00028 : CDotKernel(0), subkernel(NULL) 00029 { 00030 init(); 00031 } 00032 00033 CAUCKernel::CAUCKernel(int32_t size, CKernel* s) 00034 : CDotKernel(size), subkernel(s) 00035 { 00036 init(); 00037 SG_REF(subkernel); 00038 } 00039 00040 CAUCKernel::~CAUCKernel() 00041 { 00042 SG_UNREF(subkernel); 00043 cleanup(); 00044 } 00045 00046 CLabels* CAUCKernel::setup_auc_maximization(CLabels* labels) 00047 { 00048 SG_INFO("setting up AUC maximization\n") 00049 ASSERT(labels) 00050 ASSERT(labels->get_label_type() == LT_BINARY) 00051 labels->ensure_valid(); 00052 00053 // get the original labels 00054 SGVector<int32_t> int_labels=((CBinaryLabels*) labels)->get_int_labels(); 00055 ASSERT(subkernel->get_num_vec_rhs()==int_labels.vlen) 00056 00057 // count positive and negative 00058 int32_t num_pos=0; 00059 int32_t num_neg=0; 00060 00061 for (int32_t i=0; i<int_labels.vlen; i++) 00062 { 00063 if (int_labels.vector[i]==1) 00064 num_pos++; 00065 else 00066 num_neg++; 00067 } 00068 00069 // create AUC features and labels (alternate labels) 00070 int32_t num_auc = num_pos*num_neg; 00071 SG_INFO("num_pos: %i num_neg: %i num_auc: %i\n", num_pos, num_neg, num_auc) 00072 00073 SGMatrix<uint16_t> features_auc(2,num_auc); 00074 int32_t* labels_auc = SG_MALLOC(int32_t, num_auc); 00075 int32_t n=0 ; 00076 00077 for (int32_t i=0; i<int_labels.vlen; i++) 00078 { 00079 if (int_labels.vector[i]!=1) 00080 continue; 00081 00082 for (int32_t j=0; j<int_labels.vlen; j++) 00083 { 00084 if (int_labels.vector[j]!=-1) 00085 continue; 00086 00087 // create about as many positively as negatively labeled examples 00088 if (n%2==0) 00089 { 00090 features_auc.matrix[n*2]=i; 00091 features_auc.matrix[n*2+1]=j; 00092 labels_auc[n]=1; 00093 } 00094 else 00095 { 00096 features_auc.matrix[n*2]=j; 00097 features_auc.matrix[n*2+1]=i; 00098 labels_auc[n]=-1; 00099 } 00100 00101 n++; 00102 ASSERT(n<=num_auc) 00103 } 00104 } 00105 00106 // create label object and attach it to svm 00107 CBinaryLabels* lab_auc = new CBinaryLabels(num_auc); 00108 lab_auc->set_int_labels(SGVector<int32_t>(labels_auc, num_auc, false)); 00109 SG_REF(lab_auc); 00110 00111 // create feature object 00112 CDenseFeatures<uint16_t>* f = new CDenseFeatures<uint16_t>(0); 00113 f->set_feature_matrix(features_auc); 00114 00115 // create AUC kernel and attach the features 00116 init(f,f); 00117 00118 SG_FREE(labels_auc); 00119 00120 return lab_auc; 00121 } 00122 00123 00124 bool CAUCKernel::init(CFeatures* l, CFeatures* r) 00125 { 00126 CDotKernel::init(l, r); 00127 init_normalizer(); 00128 return true; 00129 } 00130 00131 float64_t CAUCKernel::compute(int32_t idx_a, int32_t idx_b) 00132 { 00133 int32_t alen, blen; 00134 bool afree, bfree; 00135 00136 uint16_t* avec=((CDenseFeatures<uint16_t>*) lhs)->get_feature_vector(idx_a, alen, afree); 00137 uint16_t* bvec=((CDenseFeatures<uint16_t>*) rhs)->get_feature_vector(idx_b, blen, bfree); 00138 00139 ASSERT(alen==2) 00140 ASSERT(blen==2) 00141 00142 ASSERT(subkernel && subkernel->has_features()) 00143 00144 float64_t k11,k12,k21,k22; 00145 int32_t idx_a1=avec[0], idx_a2=avec[1], idx_b1=bvec[0], idx_b2=bvec[1]; 00146 00147 k11 = subkernel->kernel(idx_a1,idx_b1); 00148 k12 = subkernel->kernel(idx_a1,idx_b2); 00149 k21 = subkernel->kernel(idx_a2,idx_b1); 00150 k22 = subkernel->kernel(idx_a2,idx_b2); 00151 00152 float64_t result = k11+k22-k21-k12; 00153 00154 ((CDenseFeatures<uint16_t>*) lhs)->free_feature_vector(avec, idx_a, afree); 00155 ((CDenseFeatures<uint16_t>*) rhs)->free_feature_vector(bvec, idx_b, bfree); 00156 00157 return result; 00158 }