SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011 Alesis Novik 00008 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society 00009 */ 00010 #include <shogun/lib/config.h> 00011 00012 #ifdef HAVE_LAPACK 00013 00014 #include <shogun/distributions/Gaussian.h> 00015 #include <shogun/mathematics/Math.h> 00016 #include <shogun/base/Parameter.h> 00017 00018 using namespace shogun; 00019 00020 CGaussian::CGaussian() : CDistribution(), m_constant(0), m_d(), m_u(), m_mean(), m_cov_type(FULL) 00021 { 00022 register_params(); 00023 } 00024 00025 CGaussian::CGaussian(const SGVector<float64_t> mean, SGMatrix<float64_t> cov, 00026 ECovType cov_type) : CDistribution(), m_d(), m_u(), m_cov_type(cov_type) 00027 { 00028 ASSERT(mean.vlen==cov.num_rows) 00029 ASSERT(cov.num_rows==cov.num_cols) 00030 00031 m_mean=mean; 00032 00033 if (cov.num_rows==1) 00034 m_cov_type=SPHERICAL; 00035 00036 decompose_cov(cov); 00037 init(); 00038 register_params(); 00039 } 00040 00041 void CGaussian::init() 00042 { 00043 m_constant=CMath::log(2*M_PI)*m_mean.vlen; 00044 switch (m_cov_type) 00045 { 00046 case FULL: 00047 case DIAG: 00048 for (int32_t i=0; i<m_d.vlen; i++) 00049 m_constant+=CMath::log(m_d.vector[i]); 00050 break; 00051 case SPHERICAL: 00052 m_constant+=m_mean.vlen*CMath::log(m_d.vector[0]); 00053 break; 00054 } 00055 } 00056 00057 CGaussian::~CGaussian() 00058 { 00059 } 00060 00061 bool CGaussian::train(CFeatures* data) 00062 { 00063 // init features with data if necessary and assure type is correct 00064 if (data) 00065 { 00066 if (!data->has_property(FP_DOT)) 00067 SG_ERROR("Specified features are not of type CDotFeatures\n") 00068 set_features(data); 00069 } 00070 00071 CDotFeatures* dotdata=(CDotFeatures *) data; 00072 m_mean=dotdata->get_mean(); 00073 SGMatrix<float64_t> cov=dotdata->get_cov(); 00074 decompose_cov(cov); 00075 init(); 00076 return true; 00077 } 00078 00079 int32_t CGaussian::get_num_model_parameters() 00080 { 00081 switch (m_cov_type) 00082 { 00083 case FULL: 00084 return m_u.num_rows*m_u.num_cols+m_d.vlen+m_mean.vlen; 00085 case DIAG: 00086 return m_d.vlen+m_mean.vlen; 00087 case SPHERICAL: 00088 return 1+m_mean.vlen; 00089 } 00090 return 0; 00091 } 00092 00093 float64_t CGaussian::get_log_model_parameter(int32_t num_param) 00094 { 00095 SG_NOTIMPLEMENTED 00096 return 0; 00097 } 00098 00099 float64_t CGaussian::get_log_derivative(int32_t num_param, int32_t num_example) 00100 { 00101 SG_NOTIMPLEMENTED 00102 return 0; 00103 } 00104 00105 float64_t CGaussian::get_log_likelihood_example(int32_t num_example) 00106 { 00107 ASSERT(features->has_property(FP_DOT)) 00108 SGVector<float64_t> v=((CDotFeatures *)features)->get_computed_dot_feature_vector(num_example); 00109 float64_t answer=compute_log_PDF(v); 00110 return answer; 00111 } 00112 00113 float64_t CGaussian::compute_log_PDF(SGVector<float64_t> point) 00114 { 00115 ASSERT(m_mean.vector && m_d.vector) 00116 ASSERT(point.vlen == m_mean.vlen) 00117 float64_t* difference=SG_MALLOC(float64_t, m_mean.vlen); 00118 memcpy(difference, point.vector, sizeof(float64_t)*m_mean.vlen); 00119 00120 for (int32_t i = 0; i < m_mean.vlen; i++) 00121 difference[i] -= m_mean.vector[i]; 00122 00123 float64_t answer=m_constant; 00124 00125 if (m_cov_type==FULL) 00126 { 00127 float64_t* temp_holder=SG_MALLOC(float64_t, m_d.vlen); 00128 cblas_dgemv(CblasRowMajor, CblasNoTrans, m_d.vlen, m_d.vlen, 00129 1, m_u.matrix, m_d.vlen, difference, 1, 0, temp_holder, 1); 00130 00131 for (int32_t i=0; i<m_d.vlen; i++) 00132 answer+=temp_holder[i]*temp_holder[i]/m_d.vector[i]; 00133 00134 SG_FREE(temp_holder); 00135 } 00136 else if (m_cov_type==DIAG) 00137 { 00138 for (int32_t i=0; i<m_mean.vlen; i++) 00139 answer+=difference[i]*difference[i]/m_d.vector[i]; 00140 } 00141 else 00142 { 00143 for (int32_t i=0; i<m_mean.vlen; i++) 00144 answer+=difference[i]*difference[i]/m_d.vector[0]; 00145 } 00146 00147 SG_FREE(difference); 00148 00149 return -0.5*answer; 00150 } 00151 00152 SGVector<float64_t> CGaussian::get_mean() 00153 { 00154 return m_mean; 00155 } 00156 00157 void CGaussian::set_mean(SGVector<float64_t> mean) 00158 { 00159 if (mean.vlen==1) 00160 m_cov_type=SPHERICAL; 00161 00162 m_mean=mean; 00163 } 00164 00165 void CGaussian::set_cov(SGMatrix<float64_t> cov) 00166 { 00167 ASSERT(cov.num_rows==cov.num_cols) 00168 ASSERT(cov.num_rows==m_mean.vlen) 00169 decompose_cov(cov); 00170 init(); 00171 } 00172 00173 void CGaussian::set_d(const SGVector<float64_t> d) 00174 { 00175 m_d = d; 00176 init(); 00177 } 00178 00179 SGMatrix<float64_t> CGaussian::get_cov() 00180 { 00181 float64_t* cov=SG_MALLOC(float64_t, m_mean.vlen*m_mean.vlen); 00182 memset(cov, 0, sizeof(float64_t)*m_mean.vlen*m_mean.vlen); 00183 00184 if (m_cov_type==FULL) 00185 { 00186 if (!m_u.matrix) 00187 SG_ERROR("Unitary matrix not set\n") 00188 00189 float64_t* temp_holder=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen); 00190 float64_t* diag_holder=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen); 00191 memset(diag_holder, 0, sizeof(float64_t)*m_d.vlen*m_d.vlen); 00192 for(int32_t i=0; i<m_d.vlen; i++) 00193 diag_holder[i*m_d.vlen+i]=m_d.vector[i]; 00194 00195 cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 00196 m_d.vlen, m_d.vlen, m_d.vlen, 1, m_u.matrix, m_d.vlen, 00197 diag_holder, m_d.vlen, 0, temp_holder, m_d.vlen); 00198 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 00199 m_d.vlen, m_d.vlen, m_d.vlen, 1, temp_holder, m_d.vlen, 00200 m_u.matrix, m_d.vlen, 0, cov, m_d.vlen); 00201 00202 SG_FREE(diag_holder); 00203 SG_FREE(temp_holder); 00204 } 00205 else if (m_cov_type==DIAG) 00206 { 00207 for (int32_t i=0; i<m_d.vlen; i++) 00208 cov[i*m_d.vlen+i]=m_d.vector[i]; 00209 } 00210 else 00211 { 00212 for (int32_t i=0; i<m_mean.vlen; i++) 00213 cov[i*m_mean.vlen+i]=m_d.vector[0]; 00214 } 00215 return SGMatrix<float64_t>(cov, m_mean.vlen, m_mean.vlen, false);//fix needed 00216 } 00217 00218 void CGaussian::register_params() 00219 { 00220 m_parameters->add(&m_u, "m_u", "Unitary matrix."); 00221 m_parameters->add(&m_d, "m_d", "Diagonal."); 00222 m_parameters->add(&m_mean, "m_mean", "Mean."); 00223 m_parameters->add(&m_constant, "m_constant", "Constant part."); 00224 m_parameters->add((machine_int_t*)&m_cov_type, "m_cov_type", "Covariance type."); 00225 } 00226 00227 void CGaussian::decompose_cov(SGMatrix<float64_t> cov) 00228 { 00229 switch (m_cov_type) 00230 { 00231 case FULL: 00232 m_u=SGMatrix<float64_t>(cov.num_rows,cov.num_rows); 00233 memcpy(m_u.matrix, cov.matrix, sizeof(float64_t)*cov.num_rows*cov.num_rows); 00234 00235 m_d.vector=SGMatrix<float64_t>::compute_eigenvectors(m_u.matrix, cov.num_rows, cov.num_rows); 00236 m_d.vlen=cov.num_rows; 00237 m_u.num_rows=cov.num_rows; 00238 m_u.num_cols=cov.num_rows; 00239 break; 00240 case DIAG: 00241 m_d.vector=SG_MALLOC(float64_t, cov.num_rows); 00242 00243 for (int32_t i=0; i<cov.num_rows; i++) 00244 m_d.vector[i]=cov.matrix[i*cov.num_rows+i]; 00245 00246 m_d.vlen=cov.num_rows; 00247 break; 00248 case SPHERICAL: 00249 m_d.vector=SG_MALLOC(float64_t, 1); 00250 00251 m_d.vector[0]=cov.matrix[0]; 00252 m_d.vlen=1; 00253 break; 00254 } 00255 } 00256 00257 SGVector<float64_t> CGaussian::sample() 00258 { 00259 SG_DEBUG("Entering\n"); 00260 float64_t* r_matrix=SG_MALLOC(float64_t, m_mean.vlen*m_mean.vlen); 00261 memset(r_matrix, 0, m_mean.vlen*m_mean.vlen*sizeof(float64_t)); 00262 00263 switch (m_cov_type) 00264 { 00265 case FULL: 00266 case DIAG: 00267 for (int32_t i=0; i<m_mean.vlen; i++) 00268 r_matrix[i*m_mean.vlen+i]=CMath::sqrt(m_d.vector[i]); 00269 00270 break; 00271 case SPHERICAL: 00272 for (int32_t i=0; i<m_mean.vlen; i++) 00273 r_matrix[i*m_mean.vlen+i]=CMath::sqrt(m_d.vector[0]); 00274 00275 break; 00276 } 00277 00278 float64_t* random_vec=SG_MALLOC(float64_t, m_mean.vlen); 00279 00280 for (int32_t i=0; i<m_mean.vlen; i++) 00281 random_vec[i]=CMath::randn_double(); 00282 00283 if (m_cov_type==FULL) 00284 { 00285 float64_t* temp_matrix=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen); 00286 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 00287 m_d.vlen, m_d.vlen, m_d.vlen, 1, m_u.matrix, m_d.vlen, 00288 r_matrix, m_d.vlen, 0, temp_matrix, m_d.vlen); 00289 SG_FREE(r_matrix); 00290 r_matrix=temp_matrix; 00291 } 00292 00293 float64_t* samp=SG_MALLOC(float64_t, m_mean.vlen); 00294 00295 cblas_dgemv(CblasRowMajor, CblasNoTrans, m_mean.vlen, m_mean.vlen, 00296 1, r_matrix, m_mean.vlen, random_vec, 1, 0, samp, 1); 00297 00298 for (int32_t i=0; i<m_mean.vlen; i++) 00299 samp[i]+=m_mean.vector[i]; 00300 00301 SG_FREE(random_vec); 00302 SG_FREE(r_matrix); 00303 00304 SG_DEBUG("Leaving\n"); 00305 return SGVector<float64_t>(samp, m_mean.vlen, false);//fix needed 00306 } 00307 00308 CGaussian* CGaussian::obtain_from_generic(CDistribution* distribution) 00309 { 00310 if (!distribution) 00311 return NULL; 00312 00313 CGaussian* casted=dynamic_cast<CGaussian*>(distribution); 00314 if (!casted) 00315 return NULL; 00316 00317 /* since an additional reference is returned */ 00318 SG_REF(casted); 00319 return casted; 00320 } 00321 00322 #endif // HAVE_LAPACK