SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2013 Shell Hu 00008 * Copyright (C) 2013 Shell Hu 00009 */ 00010 00011 #include <shogun/structure/Factor.h> 00012 #include <shogun/base/Parameter.h> 00013 00014 using namespace shogun; 00015 00016 CFactor::CFactor() : CSGObject() 00017 { 00018 SG_UNSTABLE("CFactor::CFactor()", "\n"); 00019 init(); 00020 } 00021 00022 CFactor::CFactor(CTableFactorType* ftype, 00023 SGVector<int32_t> var_index, 00024 SGVector<float64_t> data) : CSGObject() 00025 { 00026 init(); 00027 m_factor_type = ftype; 00028 m_var_index = var_index; 00029 m_data = data; 00030 m_is_data_dep = true; 00031 00032 ASSERT(m_factor_type != NULL); 00033 ASSERT(m_factor_type->get_cardinalities().size() == m_var_index.size()); 00034 00035 if (m_data.size() == 0) 00036 m_is_data_dep = false; 00037 00038 if (ftype->is_table() && m_is_data_dep) 00039 m_energies.resize_vector(ftype->get_num_assignments()); 00040 00041 SG_REF(m_factor_type); 00042 SG_REF(m_data_source); 00043 } 00044 00045 CFactor::CFactor(CTableFactorType* ftype, 00046 SGVector<int32_t> var_index, 00047 SGSparseVector<float64_t> data_sparse) : CSGObject() 00048 { 00049 init(); 00050 m_factor_type = ftype; 00051 m_var_index = var_index; 00052 m_data_sparse = data_sparse; 00053 m_is_data_dep = true; 00054 00055 ASSERT(m_factor_type != NULL); 00056 ASSERT(m_factor_type->get_cardinalities().size() == m_var_index.size()); 00057 00058 if (m_data_sparse.num_feat_entries == 0) 00059 m_is_data_dep = false; 00060 00061 if (ftype->is_table() && m_is_data_dep) 00062 m_energies.resize_vector(ftype->get_num_assignments()); 00063 00064 SG_REF(m_factor_type); 00065 SG_REF(m_data_source); 00066 } 00067 00068 CFactor::CFactor(CTableFactorType* ftype, 00069 SGVector<int32_t> var_index, 00070 CFactorDataSource* data_source) : CSGObject() 00071 { 00072 init(); 00073 m_factor_type = ftype; 00074 m_var_index = var_index; 00075 m_data_source = data_source; 00076 m_is_data_dep = true; 00077 00078 ASSERT(m_factor_type != NULL); 00079 ASSERT(m_factor_type->get_cardinalities().size() == m_var_index.size()); 00080 ASSERT(m_data_source != NULL); 00081 00082 if (ftype->is_table()) 00083 m_energies.resize_vector(ftype->get_num_assignments()); 00084 00085 SG_REF(m_factor_type); 00086 SG_REF(m_data_source); 00087 } 00088 00089 CFactor::~CFactor() 00090 { 00091 SG_UNREF(m_factor_type); 00092 SG_UNREF(m_data_source); 00093 } 00094 00095 CTableFactorType* CFactor::get_factor_type() const 00096 { 00097 SG_REF(m_factor_type); 00098 return m_factor_type; 00099 } 00100 00101 void CFactor::set_factor_type(CTableFactorType* ftype) 00102 { 00103 m_factor_type = ftype; 00104 SG_REF(m_factor_type); 00105 } 00106 00107 const SGVector<int32_t> CFactor::get_variables() const 00108 { 00109 return m_var_index; 00110 } 00111 00112 void CFactor::set_variables(SGVector<int32_t> vars) 00113 { 00114 m_var_index = vars.clone(); 00115 } 00116 00117 const SGVector<int32_t> CFactor::get_cardinalities() const 00118 { 00119 return m_factor_type->get_cardinalities(); 00120 } 00121 00122 SGVector<float64_t> CFactor::get_data() const 00123 { 00124 if (m_data_source != NULL) 00125 return m_data_source->get_data(); 00126 00127 return m_data; 00128 } 00129 00130 SGSparseVector<float64_t> CFactor::get_data_sparse() const 00131 { 00132 if (m_data_source != NULL) 00133 return m_data_source->get_data_sparse(); 00134 00135 return m_data_sparse; 00136 } 00137 00138 void CFactor::set_data(SGVector<float64_t> data_dense) 00139 { 00140 m_data = data_dense.clone(); 00141 m_is_data_dep = true; 00142 } 00143 00144 void CFactor::set_data_sparse(SGSparseVectorEntry<float64_t>* data_sparse, 00145 int32_t dlen) 00146 { 00147 m_data_sparse = SGSparseVector<float64_t>(data_sparse, dlen); 00148 m_is_data_dep = true; 00149 } 00150 00151 bool CFactor::is_data_dependent() const 00152 { 00153 return m_is_data_dep; 00154 } 00155 00156 bool CFactor::is_data_sparse() const 00157 { 00158 if (m_data_source != NULL) 00159 return m_data_source->is_sparse(); 00160 00161 return (m_data.size() == 0); 00162 } 00163 00164 SGVector<float64_t> CFactor::get_energies() const 00165 { 00166 if (is_data_dependent() == false && m_energies.size() == 0) 00167 { 00168 const SGVector<float64_t> ft_energies = m_factor_type->get_w(); 00169 ASSERT(ft_energies.size() == m_factor_type->get_num_assignments()); 00170 return ft_energies; 00171 } 00172 return m_energies; 00173 } 00174 00175 float64_t CFactor::get_energy(int32_t index) const 00176 { 00177 return get_energies()[index]; // note for data indep, we get m_w not m_energies 00178 } 00179 00180 void CFactor::set_energies(SGVector<float64_t> ft_energies) 00181 { 00182 REQUIRE(m_factor_type->get_num_assignments() == ft_energies.size(), 00183 "%s::set_energies(): ft_energies is not a valid energy table!\n", get_name()); 00184 00185 m_energies = ft_energies; 00186 } 00187 00188 void CFactor::set_energy(int32_t ei, float64_t value) 00189 { 00190 REQUIRE(ei >= 0 && ei < m_factor_type->get_num_assignments(), 00191 "%s::set_energy(): ei is out of index!\n", get_name()); 00192 00193 REQUIRE(is_data_dependent(), "%s::set_energy(): \ 00194 energy table is fixed in data dependent factor!\n", get_name()); 00195 00196 m_energies[ei] = value; 00197 } 00198 00199 float64_t CFactor::evaluate_energy(const SGVector<int32_t> state) const 00200 { 00201 int32_t index = m_factor_type->index_from_universe_assignment(state, m_var_index); 00202 return get_energy(index); 00203 } 00204 00205 void CFactor::compute_energies() 00206 { 00207 if (is_data_dependent() == false) 00208 return; 00209 00210 // For some factor types the size of the energy table is determined only 00211 // after an initialization step from training data. 00212 if (m_energies.size() == 0) 00213 m_energies.resize_vector(m_factor_type->get_num_assignments()); 00214 00215 const SGVector<float64_t> H = get_data(); 00216 const SGSparseVector<float64_t> H_sparse = get_data_sparse(); 00217 00218 if (H_sparse.num_feat_entries == 0) 00219 m_factor_type->compute_energies(H, m_energies); 00220 else 00221 m_factor_type->compute_energies(H_sparse, m_energies); 00222 } 00223 00224 void CFactor::compute_gradients( 00225 const SGVector<float64_t> marginals, 00226 SGVector<float64_t>& parameter_gradient, 00227 float64_t mult) const 00228 { 00229 const SGVector<float64_t> H = get_data(); 00230 const SGSparseVector<float64_t> H_sparse = get_data_sparse(); 00231 00232 if (H_sparse.num_feat_entries == 0) 00233 m_factor_type->compute_gradients(H, marginals, parameter_gradient, mult); 00234 else 00235 m_factor_type->compute_gradients(H_sparse, marginals, parameter_gradient, mult); 00236 } 00237 00238 void CFactor::init() 00239 { 00240 SG_ADD((CSGObject**)&m_factor_type, "type_name", "Factor type name", MS_NOT_AVAILABLE); 00241 SG_ADD(&m_var_index, "var_index", "Factor variable index", MS_NOT_AVAILABLE); 00242 SG_ADD(&m_energies, "energies", "Factor energies", MS_NOT_AVAILABLE); 00243 SG_ADD((CSGObject**)&m_data_source, "data_source", "Factor data source", MS_NOT_AVAILABLE); 00244 SG_ADD(&m_data, "data", "Factor data", MS_NOT_AVAILABLE); 00245 SG_ADD(&m_data_sparse, "data_sparse", "Sparse factor data", MS_NOT_AVAILABLE); 00246 SG_ADD(&m_is_data_dep, "is_data_dep", "Factor is data dependent or not", MS_NOT_AVAILABLE); 00247 00248 m_factor_type=NULL; 00249 m_data_source=NULL; 00250 m_is_data_dep = false; 00251 } 00252 00253 CFactorDataSource::CFactorDataSource() : CSGObject() 00254 { 00255 init(); 00256 } 00257 00258 CFactorDataSource::CFactorDataSource(SGVector<float64_t> dense) 00259 : CSGObject() 00260 { 00261 init(); 00262 m_dense = dense; 00263 } 00264 00265 CFactorDataSource::CFactorDataSource(SGSparseVector<float64_t> sparse) 00266 : CSGObject() 00267 { 00268 init(); 00269 m_sparse = sparse; 00270 } 00271 00272 CFactorDataSource::~CFactorDataSource() 00273 { 00274 } 00275 00276 bool CFactorDataSource::is_sparse() const 00277 { 00278 return (m_dense.size() == 0); 00279 } 00280 00281 SGVector<float64_t> CFactorDataSource::get_data() const 00282 { 00283 return m_dense; 00284 } 00285 00286 SGSparseVector<float64_t> CFactorDataSource::get_data_sparse() const 00287 { 00288 return m_sparse; 00289 } 00290 00291 void CFactorDataSource::set_data(SGVector<float64_t> dense) 00292 { 00293 m_dense = dense.clone(); 00294 } 00295 00296 void CFactorDataSource::set_data_sparse(SGSparseVectorEntry<float64_t>* sparse, 00297 int32_t dlen) 00298 { 00299 m_sparse = SGSparseVector<float64_t>(sparse, dlen); 00300 } 00301 00302 void CFactorDataSource::init() 00303 { 00304 SG_ADD(&m_dense, "dense", "Shared data", MS_NOT_AVAILABLE); 00305 SG_ADD(&m_sparse, "sparse", "Shared sparse data", MS_NOT_AVAILABLE); 00306 } 00307