SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Copyright (C) 2012 Sergey Lisitsyn 00008 */ 00009 00010 #include <shogun/classifier/FeatureBlockLogisticRegression.h> 00011 #include <shogun/lib/slep/slep_solver.h> 00012 #include <shogun/lib/slep/slep_options.h> 00013 00014 #include <shogun/lib/IndexBlockGroup.h> 00015 #include <shogun/lib/IndexBlockTree.h> 00016 00017 namespace shogun 00018 { 00019 00020 CFeatureBlockLogisticRegression::CFeatureBlockLogisticRegression() : 00021 CLinearMachine() 00022 { 00023 init(); 00024 register_parameters(); 00025 } 00026 00027 CFeatureBlockLogisticRegression::CFeatureBlockLogisticRegression( 00028 float64_t z, CDotFeatures* train_features, 00029 CBinaryLabels* train_labels, CIndexBlockRelation* feature_relation) : 00030 CLinearMachine() 00031 { 00032 init(); 00033 set_feature_relation(feature_relation); 00034 set_z(z); 00035 set_features(train_features); 00036 set_labels(train_labels); 00037 register_parameters(); 00038 } 00039 00040 void CFeatureBlockLogisticRegression::init() 00041 { 00042 m_feature_relation=NULL; 00043 m_z=0.0; 00044 m_q=2.0; 00045 m_termination=0; 00046 m_regularization=0; 00047 m_tolerance=1e-3; 00048 m_max_iter=1000; 00049 } 00050 00051 CFeatureBlockLogisticRegression::~CFeatureBlockLogisticRegression() 00052 { 00053 SG_UNREF(m_feature_relation); 00054 } 00055 00056 void CFeatureBlockLogisticRegression::register_parameters() 00057 { 00058 SG_ADD((CSGObject**)&m_feature_relation, "feature_relation", "feature relation", MS_NOT_AVAILABLE); 00059 SG_ADD(&m_z, "z", "regularization coefficient", MS_AVAILABLE); 00060 SG_ADD(&m_q, "q", "q of L1/Lq", MS_AVAILABLE); 00061 SG_ADD(&m_termination, "termination", "termination", MS_NOT_AVAILABLE); 00062 SG_ADD(&m_regularization, "regularization", "regularization", MS_NOT_AVAILABLE); 00063 SG_ADD(&m_tolerance, "tolerance", "tolerance", MS_NOT_AVAILABLE); 00064 SG_ADD(&m_max_iter, "max_iter", "maximum number of iterations", MS_NOT_AVAILABLE); 00065 } 00066 00067 CIndexBlockRelation* CFeatureBlockLogisticRegression::get_feature_relation() const 00068 { 00069 SG_REF(m_feature_relation); 00070 return m_feature_relation; 00071 } 00072 00073 void CFeatureBlockLogisticRegression::set_feature_relation(CIndexBlockRelation* feature_relation) 00074 { 00075 SG_REF(feature_relation); 00076 SG_UNREF(m_feature_relation); 00077 m_feature_relation = feature_relation; 00078 } 00079 00080 int32_t CFeatureBlockLogisticRegression::get_max_iter() const 00081 { 00082 return m_max_iter; 00083 } 00084 00085 int32_t CFeatureBlockLogisticRegression::get_regularization() const 00086 { 00087 return m_regularization; 00088 } 00089 00090 int32_t CFeatureBlockLogisticRegression::get_termination() const 00091 { 00092 return m_termination; 00093 } 00094 00095 float64_t CFeatureBlockLogisticRegression::get_tolerance() const 00096 { 00097 return m_tolerance; 00098 } 00099 00100 float64_t CFeatureBlockLogisticRegression::get_z() const 00101 { 00102 return m_z; 00103 } 00104 00105 float64_t CFeatureBlockLogisticRegression::get_q() const 00106 { 00107 return m_q; 00108 } 00109 00110 void CFeatureBlockLogisticRegression::set_max_iter(int32_t max_iter) 00111 { 00112 ASSERT(max_iter>=0) 00113 m_max_iter = max_iter; 00114 } 00115 00116 void CFeatureBlockLogisticRegression::set_regularization(int32_t regularization) 00117 { 00118 ASSERT(regularization==0 || regularization==1) 00119 m_regularization = regularization; 00120 } 00121 00122 void CFeatureBlockLogisticRegression::set_termination(int32_t termination) 00123 { 00124 ASSERT(termination>=0 && termination<=4) 00125 m_termination = termination; 00126 } 00127 00128 void CFeatureBlockLogisticRegression::set_tolerance(float64_t tolerance) 00129 { 00130 ASSERT(tolerance>0.0) 00131 m_tolerance = tolerance; 00132 } 00133 00134 void CFeatureBlockLogisticRegression::set_z(float64_t z) 00135 { 00136 m_z = z; 00137 } 00138 00139 void CFeatureBlockLogisticRegression::set_q(float64_t q) 00140 { 00141 m_q = q; 00142 } 00143 00144 bool CFeatureBlockLogisticRegression::train_machine(CFeatures* data) 00145 { 00146 if (data && (CDotFeatures*)data) 00147 set_features((CDotFeatures*)data); 00148 00149 ASSERT(features) 00150 ASSERT(m_labels) 00151 00152 int32_t n_vecs = m_labels->get_num_labels(); 00153 SGVector<float64_t> y(n_vecs); 00154 for (int32_t i=0; i<n_vecs; i++) 00155 y[i] = ((CBinaryLabels*)m_labels)->get_label(i); 00156 00157 slep_options options = slep_options::default_options(); 00158 options.q = m_q; 00159 options.regularization = m_regularization; 00160 options.termination = m_termination; 00161 options.tolerance = m_tolerance; 00162 options.max_iter = m_max_iter; 00163 options.loss = LOGISTIC; 00164 00165 EIndexBlockRelationType relation_type = m_feature_relation->get_relation_type(); 00166 switch (relation_type) 00167 { 00168 case GROUP: 00169 { 00170 CIndexBlockGroup* feature_group = (CIndexBlockGroup*)m_feature_relation; 00171 SGVector<index_t> ind = feature_group->get_SLEP_ind(); 00172 options.ind = ind.vector; 00173 options.n_feature_blocks = ind.vlen-1; 00174 if (ind[ind.vlen-1] > features->get_dim_feature_space()) 00175 SG_ERROR("Group of features covers more features than available\n") 00176 00177 options.gWeight = SG_MALLOC(double, options.n_feature_blocks); 00178 for (int32_t i=0; i<options.n_feature_blocks; i++) 00179 options.gWeight[i] = 1.0; 00180 options.mode = FEATURE_GROUP; 00181 options.loss = LOGISTIC; 00182 options.n_nodes = 0; 00183 slep_result_t result = slep_solver(features, y.vector, m_z, options); 00184 00185 SG_FREE(options.gWeight); 00186 int32_t n_feats = features->get_dim_feature_space(); 00187 SGVector<float64_t> new_w(n_feats); 00188 for (int i=0; i<n_feats; i++) 00189 new_w[i] = result.w[i]; 00190 set_bias(result.c[0]); 00191 00192 w = new_w; 00193 } 00194 break; 00195 case TREE: 00196 { 00197 CIndexBlockTree* feature_tree = (CIndexBlockTree*)m_feature_relation; 00198 00199 SGVector<float64_t> ind_t = feature_tree->get_SLEP_ind_t(); 00200 SGVector<float64_t> G; 00201 if (feature_tree->is_general()) 00202 { 00203 G = feature_tree->get_SLEP_G(); 00204 options.general = true; 00205 } 00206 options.ind_t = ind_t.vector; 00207 options.G = G.vector; 00208 options.n_nodes = ind_t.vlen/3; 00209 options.n_feature_blocks = ind_t.vlen/3; 00210 options.mode = FEATURE_TREE; 00211 options.loss = LOGISTIC; 00212 00213 slep_result_t result = slep_solver(features, y.vector, m_z, options); 00214 00215 int32_t n_feats = features->get_dim_feature_space(); 00216 SGVector<float64_t> new_w(n_feats); 00217 for (int i=0; i<n_feats; i++) 00218 new_w[i] = result.w[i]; 00219 00220 set_bias(result.c[0]); 00221 00222 w = new_w; 00223 } 00224 break; 00225 default: 00226 SG_ERROR("Not supported feature relation type\n") 00227 } 00228 00229 return true; 00230 } 00231 00232 float64_t CFeatureBlockLogisticRegression::apply_one(int32_t vec_idx) 00233 { 00234 return CMath::exp(-(features->dense_dot(vec_idx, w.vector, w.vlen) + bias)); 00235 } 00236 00237 SGVector<float64_t> CFeatureBlockLogisticRegression::apply_get_outputs(CFeatures* data) 00238 { 00239 if (data) 00240 { 00241 if (!data->has_property(FP_DOT)) 00242 SG_ERROR("Specified features are not of type CDotFeatures\n") 00243 00244 set_features((CDotFeatures*) data); 00245 } 00246 00247 if (!features) 00248 return SGVector<float64_t>(); 00249 00250 int32_t num=features->get_num_vectors(); 00251 ASSERT(num>0) 00252 ASSERT(w.vlen==features->get_dim_feature_space()) 00253 00254 float64_t* out=SG_MALLOC(float64_t, num); 00255 features->dense_dot_range(out, 0, num, NULL, w.vector, w.vlen, bias); 00256 for (int32_t i=0; i<num; i++) 00257 out[i] = 2.0/(1.0+CMath::exp(-out[i])) - 1.0; 00258 return SGVector<float64_t>(out,num); 00259 } 00260 00261 }