SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Factor.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2013 Shell Hu
00008  * Copyright (C) 2013 Shell Hu
00009  */
00010 
00011 #include <shogun/structure/Factor.h>
00012 #include <shogun/base/Parameter.h>
00013 
00014 using namespace shogun;
00015 
00016 CFactor::CFactor() : CSGObject()
00017 {
00018     SG_UNSTABLE("CFactor::CFactor()", "\n");
00019     init();
00020 }
00021 
00022 CFactor::CFactor(CTableFactorType* ftype,
00023     SGVector<int32_t> var_index,
00024     SGVector<float64_t> data) : CSGObject()
00025 {
00026     init();
00027     m_factor_type = ftype;
00028     m_var_index = var_index;
00029     m_data = data;
00030     m_is_data_dep = true;
00031 
00032     ASSERT(m_factor_type != NULL);
00033     ASSERT(m_factor_type->get_cardinalities().size() == m_var_index.size());
00034 
00035     if (m_data.size() == 0)
00036         m_is_data_dep = false;
00037 
00038     if (ftype->is_table() && m_is_data_dep)
00039         m_energies.resize_vector(ftype->get_num_assignments());
00040 
00041     SG_REF(m_factor_type);
00042     SG_REF(m_data_source);
00043 }
00044 
00045 CFactor::CFactor(CTableFactorType* ftype,
00046     SGVector<int32_t> var_index,
00047     SGSparseVector<float64_t> data_sparse) : CSGObject()
00048 {
00049     init();
00050     m_factor_type = ftype;
00051     m_var_index = var_index;
00052     m_data_sparse = data_sparse;
00053     m_is_data_dep = true;
00054 
00055     ASSERT(m_factor_type != NULL);
00056     ASSERT(m_factor_type->get_cardinalities().size() == m_var_index.size());
00057 
00058     if (m_data_sparse.num_feat_entries == 0)
00059         m_is_data_dep = false;
00060 
00061     if (ftype->is_table() && m_is_data_dep)
00062         m_energies.resize_vector(ftype->get_num_assignments());
00063 
00064     SG_REF(m_factor_type);
00065     SG_REF(m_data_source);
00066 }
00067 
00068 CFactor::CFactor(CTableFactorType* ftype,
00069     SGVector<int32_t> var_index,
00070     CFactorDataSource* data_source) : CSGObject()
00071 {
00072     init();
00073     m_factor_type = ftype;
00074     m_var_index = var_index;
00075     m_data_source = data_source;
00076     m_is_data_dep = true;
00077 
00078     ASSERT(m_factor_type != NULL);
00079     ASSERT(m_factor_type->get_cardinalities().size() == m_var_index.size());
00080     ASSERT(m_data_source != NULL);
00081 
00082     if (ftype->is_table())
00083         m_energies.resize_vector(ftype->get_num_assignments());
00084 
00085     SG_REF(m_factor_type);
00086     SG_REF(m_data_source);
00087 }
00088 
00089 CFactor::~CFactor()
00090 {
00091     SG_UNREF(m_factor_type);
00092     SG_UNREF(m_data_source);
00093 }
00094 
00095 CTableFactorType* CFactor::get_factor_type() const
00096 {
00097     SG_REF(m_factor_type);
00098     return m_factor_type;
00099 }
00100 
00101 void CFactor::set_factor_type(CTableFactorType* ftype)
00102 {
00103     m_factor_type = ftype;
00104     SG_REF(m_factor_type);
00105 }
00106 
00107 const SGVector<int32_t> CFactor::get_variables() const
00108 {
00109     return m_var_index;
00110 }
00111 
00112 void CFactor::set_variables(SGVector<int32_t> vars)
00113 {
00114     m_var_index = vars.clone();
00115 }
00116 
00117 const SGVector<int32_t> CFactor::get_cardinalities() const
00118 {
00119     return m_factor_type->get_cardinalities();
00120 }
00121 
00122 SGVector<float64_t> CFactor::get_data() const
00123 {
00124     if (m_data_source != NULL)
00125         return m_data_source->get_data();
00126 
00127     return m_data;
00128 }
00129 
00130 SGSparseVector<float64_t> CFactor::get_data_sparse() const
00131 {
00132     if (m_data_source != NULL)
00133         return m_data_source->get_data_sparse();
00134 
00135     return m_data_sparse;
00136 }
00137 
00138 void CFactor::set_data(SGVector<float64_t> data_dense)
00139 {
00140     m_data = data_dense.clone();
00141     m_is_data_dep = true;
00142 }
00143 
00144 void CFactor::set_data_sparse(SGSparseVectorEntry<float64_t>* data_sparse,
00145     int32_t dlen)
00146 {
00147     m_data_sparse = SGSparseVector<float64_t>(data_sparse, dlen);
00148     m_is_data_dep = true;
00149 }
00150 
00151 bool CFactor::is_data_dependent() const
00152 {
00153     return m_is_data_dep;
00154 }
00155 
00156 bool CFactor::is_data_sparse() const
00157 {
00158     if (m_data_source != NULL)
00159         return m_data_source->is_sparse();
00160 
00161     return (m_data.size() == 0);
00162 }
00163 
00164 SGVector<float64_t> CFactor::get_energies() const
00165 {
00166     if (is_data_dependent() == false && m_energies.size() == 0)
00167     {
00168         const SGVector<float64_t> ft_energies = m_factor_type->get_w();
00169         ASSERT(ft_energies.size() == m_factor_type->get_num_assignments());
00170         return ft_energies;
00171     }
00172     return m_energies;
00173 }
00174 
00175 float64_t CFactor::get_energy(int32_t index) const
00176 {
00177     return get_energies()[index]; // note for data indep, we get m_w not m_energies
00178 }
00179 
00180 void CFactor::set_energies(SGVector<float64_t> ft_energies)
00181 {
00182     REQUIRE(m_factor_type->get_num_assignments() == ft_energies.size(),
00183         "%s::set_energies(): ft_energies is not a valid energy table!\n", get_name());
00184 
00185     m_energies = ft_energies;
00186 }
00187 
00188 void CFactor::set_energy(int32_t ei, float64_t value)
00189 {
00190     REQUIRE(ei >= 0 && ei < m_factor_type->get_num_assignments(),
00191         "%s::set_energy(): ei is out of index!\n", get_name());
00192 
00193     REQUIRE(is_data_dependent(), "%s::set_energy(): \
00194         energy table is fixed in data dependent factor!\n", get_name());
00195 
00196     m_energies[ei] = value;
00197 }
00198 
00199 float64_t CFactor::evaluate_energy(const SGVector<int32_t> state) const
00200 {
00201     int32_t index = m_factor_type->index_from_universe_assignment(state, m_var_index);
00202     return get_energy(index);
00203 }
00204 
00205 void CFactor::compute_energies()
00206 {
00207     if (is_data_dependent() == false)
00208         return;
00209 
00210     // For some factor types the size of the energy table is determined only
00211     // after an initialization step from training data.
00212     if (m_energies.size() == 0)
00213         m_energies.resize_vector(m_factor_type->get_num_assignments());
00214 
00215     const SGVector<float64_t> H = get_data();
00216     const SGSparseVector<float64_t> H_sparse = get_data_sparse();
00217 
00218     if (H_sparse.num_feat_entries == 0)
00219         m_factor_type->compute_energies(H, m_energies);
00220     else
00221         m_factor_type->compute_energies(H_sparse, m_energies);
00222 }
00223 
00224 void CFactor::compute_gradients(
00225     const SGVector<float64_t> marginals,
00226     SGVector<float64_t>& parameter_gradient,
00227     float64_t mult) const
00228 {
00229     const SGVector<float64_t> H = get_data();
00230     const SGSparseVector<float64_t> H_sparse = get_data_sparse();
00231 
00232     if (H_sparse.num_feat_entries == 0)
00233         m_factor_type->compute_gradients(H, marginals, parameter_gradient, mult);
00234     else
00235         m_factor_type->compute_gradients(H_sparse, marginals, parameter_gradient, mult);
00236 }
00237 
00238 void CFactor::init()
00239 {
00240     SG_ADD((CSGObject**)&m_factor_type, "type_name", "Factor type name", MS_NOT_AVAILABLE);
00241     SG_ADD(&m_var_index, "var_index", "Factor variable index", MS_NOT_AVAILABLE);
00242     SG_ADD(&m_energies, "energies", "Factor energies", MS_NOT_AVAILABLE);
00243     SG_ADD((CSGObject**)&m_data_source, "data_source", "Factor data source", MS_NOT_AVAILABLE);
00244     SG_ADD(&m_data, "data", "Factor data", MS_NOT_AVAILABLE);
00245     SG_ADD(&m_data_sparse, "data_sparse", "Sparse factor data", MS_NOT_AVAILABLE);
00246     SG_ADD(&m_is_data_dep, "is_data_dep", "Factor is data dependent or not", MS_NOT_AVAILABLE);
00247 
00248     m_factor_type=NULL;
00249     m_data_source=NULL;
00250     m_is_data_dep = false;
00251 }
00252 
00253 CFactorDataSource::CFactorDataSource() : CSGObject()
00254 {
00255     init();
00256 }
00257 
00258 CFactorDataSource::CFactorDataSource(SGVector<float64_t> dense)
00259     : CSGObject()
00260 {
00261     init();
00262     m_dense = dense;
00263 }
00264 
00265 CFactorDataSource::CFactorDataSource(SGSparseVector<float64_t> sparse)
00266     : CSGObject()
00267 {
00268     init();
00269     m_sparse = sparse;
00270 }
00271 
00272 CFactorDataSource::~CFactorDataSource()
00273 {
00274 }
00275 
00276 bool CFactorDataSource::is_sparse() const
00277 {
00278     return (m_dense.size() == 0);
00279 }
00280 
00281 SGVector<float64_t> CFactorDataSource::get_data() const
00282 {
00283     return m_dense;
00284 }
00285 
00286 SGSparseVector<float64_t> CFactorDataSource::get_data_sparse() const
00287 {
00288     return m_sparse;
00289 }
00290 
00291 void CFactorDataSource::set_data(SGVector<float64_t> dense)
00292 {
00293     m_dense = dense.clone();
00294 }
00295 
00296 void CFactorDataSource::set_data_sparse(SGSparseVectorEntry<float64_t>* sparse,
00297     int32_t dlen)
00298 {
00299     m_sparse = SGSparseVector<float64_t>(sparse, dlen);
00300 }
00301 
00302 void CFactorDataSource::init()
00303 {
00304     SG_ADD(&m_dense, "dense", "Shared data", MS_NOT_AVAILABLE);
00305     SG_ADD(&m_sparse, "sparse", "Shared sparse data", MS_NOT_AVAILABLE);
00306 }
00307 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation