SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
FeatureBlockLogisticRegression.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Copyright (C) 2012 Sergey Lisitsyn
00008  */
00009 
00010 #include <shogun/classifier/FeatureBlockLogisticRegression.h>
00011 #include <shogun/lib/slep/slep_solver.h>
00012 #include <shogun/lib/slep/slep_options.h>
00013 
00014 #include <shogun/lib/IndexBlockGroup.h>
00015 #include <shogun/lib/IndexBlockTree.h>
00016 
00017 namespace shogun
00018 {
00019 
00020 CFeatureBlockLogisticRegression::CFeatureBlockLogisticRegression() :
00021     CLinearMachine()
00022 {
00023     init();
00024     register_parameters();
00025 }
00026 
00027 CFeatureBlockLogisticRegression::CFeatureBlockLogisticRegression(
00028      float64_t z, CDotFeatures* train_features,
00029      CBinaryLabels* train_labels, CIndexBlockRelation* feature_relation) :
00030     CLinearMachine()
00031 {
00032     init();
00033     set_feature_relation(feature_relation);
00034     set_z(z);
00035     set_features(train_features);
00036     set_labels(train_labels);
00037     register_parameters();
00038 }
00039 
00040 void CFeatureBlockLogisticRegression::init()
00041 {
00042     m_feature_relation=NULL;
00043     m_z=0.0;
00044     m_q=2.0;
00045     m_termination=0;
00046     m_regularization=0;
00047     m_tolerance=1e-3;
00048     m_max_iter=1000;
00049 }
00050 
00051 CFeatureBlockLogisticRegression::~CFeatureBlockLogisticRegression()
00052 {
00053     SG_UNREF(m_feature_relation);
00054 }
00055 
00056 void CFeatureBlockLogisticRegression::register_parameters()
00057 {
00058     SG_ADD((CSGObject**)&m_feature_relation, "feature_relation", "feature relation", MS_NOT_AVAILABLE);
00059     SG_ADD(&m_z, "z", "regularization coefficient", MS_AVAILABLE);
00060     SG_ADD(&m_q, "q", "q of L1/Lq", MS_AVAILABLE);
00061     SG_ADD(&m_termination, "termination", "termination", MS_NOT_AVAILABLE);
00062     SG_ADD(&m_regularization, "regularization", "regularization", MS_NOT_AVAILABLE);
00063     SG_ADD(&m_tolerance, "tolerance", "tolerance", MS_NOT_AVAILABLE);
00064     SG_ADD(&m_max_iter, "max_iter", "maximum number of iterations", MS_NOT_AVAILABLE);
00065 }
00066 
00067 CIndexBlockRelation* CFeatureBlockLogisticRegression::get_feature_relation() const
00068 {
00069     SG_REF(m_feature_relation);
00070     return m_feature_relation;
00071 }
00072 
00073 void CFeatureBlockLogisticRegression::set_feature_relation(CIndexBlockRelation* feature_relation)
00074 {
00075     SG_REF(feature_relation);
00076     SG_UNREF(m_feature_relation);
00077     m_feature_relation = feature_relation;
00078 }
00079 
00080 int32_t CFeatureBlockLogisticRegression::get_max_iter() const
00081 {
00082     return m_max_iter;
00083 }
00084 
00085 int32_t CFeatureBlockLogisticRegression::get_regularization() const
00086 {
00087     return m_regularization;
00088 }
00089 
00090 int32_t CFeatureBlockLogisticRegression::get_termination() const
00091 {
00092     return m_termination;
00093 }
00094 
00095 float64_t CFeatureBlockLogisticRegression::get_tolerance() const
00096 {
00097     return m_tolerance;
00098 }
00099 
00100 float64_t CFeatureBlockLogisticRegression::get_z() const
00101 {
00102     return m_z;
00103 }
00104 
00105 float64_t CFeatureBlockLogisticRegression::get_q() const
00106 {
00107     return m_q;
00108 }
00109 
00110 void CFeatureBlockLogisticRegression::set_max_iter(int32_t max_iter)
00111 {
00112     ASSERT(max_iter>=0)
00113     m_max_iter = max_iter;
00114 }
00115 
00116 void CFeatureBlockLogisticRegression::set_regularization(int32_t regularization)
00117 {
00118     ASSERT(regularization==0 || regularization==1)
00119     m_regularization = regularization;
00120 }
00121 
00122 void CFeatureBlockLogisticRegression::set_termination(int32_t termination)
00123 {
00124     ASSERT(termination>=0 && termination<=4)
00125     m_termination = termination;
00126 }
00127 
00128 void CFeatureBlockLogisticRegression::set_tolerance(float64_t tolerance)
00129 {
00130     ASSERT(tolerance>0.0)
00131     m_tolerance = tolerance;
00132 }
00133 
00134 void CFeatureBlockLogisticRegression::set_z(float64_t z)
00135 {
00136     m_z = z;
00137 }
00138 
00139 void CFeatureBlockLogisticRegression::set_q(float64_t q)
00140 {
00141     m_q = q;
00142 }
00143 
00144 bool CFeatureBlockLogisticRegression::train_machine(CFeatures* data)
00145 {
00146     if (data && (CDotFeatures*)data)
00147         set_features((CDotFeatures*)data);
00148 
00149     ASSERT(features)
00150     ASSERT(m_labels)
00151 
00152     int32_t n_vecs = m_labels->get_num_labels();
00153     SGVector<float64_t> y(n_vecs);
00154     for (int32_t i=0; i<n_vecs; i++)
00155         y[i] = ((CBinaryLabels*)m_labels)->get_label(i);
00156 
00157     slep_options options = slep_options::default_options();
00158     options.q = m_q;
00159     options.regularization = m_regularization;
00160     options.termination = m_termination;
00161     options.tolerance = m_tolerance;
00162     options.max_iter = m_max_iter;
00163     options.loss = LOGISTIC;
00164 
00165     EIndexBlockRelationType relation_type = m_feature_relation->get_relation_type();
00166     switch (relation_type)
00167     {
00168         case GROUP:
00169         {
00170             CIndexBlockGroup* feature_group = (CIndexBlockGroup*)m_feature_relation;
00171             SGVector<index_t> ind = feature_group->get_SLEP_ind();
00172             options.ind = ind.vector;
00173             options.n_feature_blocks = ind.vlen-1;
00174             if (ind[ind.vlen-1] > features->get_dim_feature_space())
00175                 SG_ERROR("Group of features covers more features than available\n")
00176 
00177             options.gWeight = SG_MALLOC(double, options.n_feature_blocks);
00178             for (int32_t i=0; i<options.n_feature_blocks; i++)
00179                 options.gWeight[i] = 1.0;
00180             options.mode = FEATURE_GROUP;
00181             options.loss = LOGISTIC;
00182             options.n_nodes = 0;
00183             slep_result_t result = slep_solver(features, y.vector, m_z, options);
00184 
00185             SG_FREE(options.gWeight);
00186             int32_t n_feats = features->get_dim_feature_space();
00187             SGVector<float64_t> new_w(n_feats);
00188             for (int i=0; i<n_feats; i++)
00189                 new_w[i] = result.w[i];
00190             set_bias(result.c[0]);
00191 
00192             w = new_w;
00193         }
00194         break;
00195         case TREE:
00196         {
00197             CIndexBlockTree* feature_tree = (CIndexBlockTree*)m_feature_relation;
00198 
00199             SGVector<float64_t> ind_t = feature_tree->get_SLEP_ind_t();
00200             SGVector<float64_t> G;
00201             if (feature_tree->is_general())
00202             {
00203                 G = feature_tree->get_SLEP_G();
00204                 options.general = true;
00205             }
00206             options.ind_t = ind_t.vector;
00207             options.G = G.vector;
00208             options.n_nodes = ind_t.vlen/3;
00209             options.n_feature_blocks = ind_t.vlen/3;
00210             options.mode = FEATURE_TREE;
00211             options.loss = LOGISTIC;
00212 
00213             slep_result_t result = slep_solver(features, y.vector, m_z, options);
00214 
00215             int32_t n_feats = features->get_dim_feature_space();
00216             SGVector<float64_t> new_w(n_feats);
00217             for (int i=0; i<n_feats; i++)
00218                 new_w[i] = result.w[i];
00219 
00220             set_bias(result.c[0]);
00221 
00222             w = new_w;
00223         }
00224         break;
00225         default:
00226             SG_ERROR("Not supported feature relation type\n")
00227     }
00228 
00229     return true;
00230 }
00231 
00232 float64_t CFeatureBlockLogisticRegression::apply_one(int32_t vec_idx)
00233 {
00234     return CMath::exp(-(features->dense_dot(vec_idx, w.vector, w.vlen) + bias));
00235 }
00236 
00237 SGVector<float64_t> CFeatureBlockLogisticRegression::apply_get_outputs(CFeatures* data)
00238 {
00239     if (data)
00240     {
00241         if (!data->has_property(FP_DOT))
00242             SG_ERROR("Specified features are not of type CDotFeatures\n")
00243 
00244         set_features((CDotFeatures*) data);
00245     }
00246 
00247     if (!features)
00248         return SGVector<float64_t>();
00249 
00250     int32_t num=features->get_num_vectors();
00251     ASSERT(num>0)
00252     ASSERT(w.vlen==features->get_dim_feature_space())
00253 
00254     float64_t* out=SG_MALLOC(float64_t, num);
00255     features->dense_dot_range(out, 0, num, NULL, w.vector, w.vlen, bias);
00256     for (int32_t i=0; i<num; i++)
00257         out[i] = 2.0/(1.0+CMath::exp(-out[i])) - 1.0;
00258     return SGVector<float64_t>(out,num);
00259 }
00260 
00261 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation