SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011 Soeren Sonnenburg 00008 * Copyright (C) 2011 Berlin Institute of Technology 00009 */ 00010 00011 #include <shogun/preprocessor/KernelPCA.h> 00012 #ifdef HAVE_LAPACK 00013 #include <shogun/lib/config.h> 00014 #include <shogun/mathematics/Math.h> 00015 00016 #include <string.h> 00017 #include <stdlib.h> 00018 00019 #include <shogun/mathematics/lapack.h> 00020 #include <shogun/lib/common.h> 00021 #include <shogun/kernel/Kernel.h> 00022 #include <shogun/preprocessor/DimensionReductionPreprocessor.h> 00023 #include <shogun/features/Features.h> 00024 #include <shogun/io/SGIO.h> 00025 00026 using namespace shogun; 00027 00028 CKernelPCA::CKernelPCA() : CDimensionReductionPreprocessor() 00029 { 00030 init(); 00031 } 00032 00033 CKernelPCA::CKernelPCA(CKernel* k) : CDimensionReductionPreprocessor() 00034 { 00035 init(); 00036 set_kernel(k); 00037 } 00038 00039 void CKernelPCA::init() 00040 { 00041 m_initialized = false; 00042 m_init_features = NULL; 00043 m_transformation_matrix = SGMatrix<float64_t>(NULL, 0, 0, false); 00044 m_bias_vector = SGVector<float64_t>(NULL, 0, false); 00045 00046 SG_ADD(&m_transformation_matrix, "transformation_matrix", 00047 "matrix used to transform data", MS_NOT_AVAILABLE); 00048 SG_ADD(&m_bias_vector, "bias_vector", 00049 "bias vector used to transform data", MS_NOT_AVAILABLE); 00050 } 00051 00052 void CKernelPCA::cleanup() 00053 { 00054 m_transformation_matrix = SGMatrix<float64_t>(); 00055 00056 if (m_init_features) 00057 SG_UNREF(m_init_features); 00058 00059 m_initialized = false; 00060 } 00061 00062 CKernelPCA::~CKernelPCA() 00063 { 00064 if (m_init_features) 00065 SG_UNREF(m_init_features); 00066 } 00067 00068 bool CKernelPCA::init(CFeatures* features) 00069 { 00070 if (!m_initialized && m_kernel) 00071 { 00072 SG_REF(features); 00073 m_init_features = features; 00074 00075 m_kernel->init(features,features); 00076 SGMatrix<float64_t> kernel_matrix = m_kernel->get_kernel_matrix(); 00077 m_kernel->cleanup(); 00078 int32_t n = kernel_matrix.num_cols; 00079 int32_t m = kernel_matrix.num_rows; 00080 ASSERT(n==m) 00081 00082 float64_t* bias_tmp = SGMatrix<float64_t>::get_column_sum(kernel_matrix.matrix, n,n); 00083 SGVector<float64_t>::scale_vector(-1.0/n, bias_tmp, n); 00084 float64_t s = SGVector<float64_t>::sum(bias_tmp, n)/n; 00085 SGVector<float64_t>::add_scalar(-s, bias_tmp, n); 00086 00087 SGMatrix<float64_t>::center_matrix(kernel_matrix.matrix, n, m); 00088 00089 float64_t* eigenvalues=SGMatrix<float64_t>::compute_eigenvectors(kernel_matrix.matrix, n, n); 00090 00091 for (int32_t i=0; i<n; i++) 00092 { 00093 //normalize and trap divide by zero and negative eigenvalues 00094 for (int32_t j=0; j<n; j++) 00095 kernel_matrix.matrix[i*n+j]/=CMath::sqrt(CMath::max(1e-16,eigenvalues[i])); 00096 } 00097 00098 SG_FREE(eigenvalues); 00099 00100 m_transformation_matrix = SGMatrix<float64_t>(kernel_matrix.matrix,n,n); 00101 kernel_matrix.matrix = NULL; 00102 m_bias_vector = SGVector<float64_t>(n); 00103 SGVector<float64_t>::fill_vector(m_bias_vector.vector, m_bias_vector.vlen, 0.0); 00104 00105 cblas_dgemv(CblasColMajor, CblasTrans, 00106 n, n, 1.0, m_transformation_matrix.matrix, n, 00107 bias_tmp, 1, 0.0, m_bias_vector.vector, 1); 00108 00109 float64_t* rowsum = SGMatrix<float64_t>::get_row_sum(m_transformation_matrix.matrix, n, n); 00110 SGVector<float64_t>::scale_vector(1.0/n, rowsum, n); 00111 00112 for (int32_t i=0; i<n; i++) 00113 { 00114 for (int32_t j=0; j<n; j++) 00115 m_transformation_matrix.matrix[j+n*i] -= rowsum[i]; 00116 } 00117 SG_FREE(rowsum); 00118 SG_FREE(bias_tmp); 00119 00120 m_initialized=true; 00121 SG_INFO("Done\n") 00122 return true; 00123 } 00124 return false; 00125 } 00126 00127 00128 SGMatrix<float64_t> CKernelPCA::apply_to_feature_matrix(CFeatures* features) 00129 { 00130 ASSERT(m_initialized) 00131 CDenseFeatures<float64_t>* simple_features = (CDenseFeatures<float64_t>*)features; 00132 00133 int32_t num_vectors = simple_features->get_num_vectors(); 00134 int32_t i,j,k; 00135 int32_t n = m_transformation_matrix.num_cols; 00136 00137 m_kernel->init(features,m_init_features); 00138 00139 float64_t* new_feature_matrix = SG_MALLOC(float64_t, m_target_dim*num_vectors); 00140 00141 for (i=0; i<num_vectors; i++) 00142 { 00143 for (j=0; j<m_target_dim; j++) 00144 new_feature_matrix[i*m_target_dim+j] = m_bias_vector.vector[j]; 00145 00146 for (j=0; j<n; j++) 00147 { 00148 float64_t kij = m_kernel->kernel(i,j); 00149 00150 for (k=0; k<m_target_dim; k++) 00151 new_feature_matrix[k+i*m_target_dim] += kij*m_transformation_matrix.matrix[(n-k-1)*n+j]; 00152 } 00153 } 00154 00155 m_kernel->cleanup(); 00156 simple_features->set_feature_matrix(SGMatrix<float64_t>(new_feature_matrix,m_target_dim,num_vectors)); 00157 return ((CDenseFeatures<float64_t>*)features)->get_feature_matrix(); 00158 } 00159 00160 SGVector<float64_t> CKernelPCA::apply_to_feature_vector(SGVector<float64_t> vector) 00161 { 00162 ASSERT(m_initialized) 00163 SGVector<float64_t> result = SGVector<float64_t>(m_target_dim); 00164 m_kernel->init(new CDenseFeatures<float64_t>(SGMatrix<float64_t>(vector.vector,vector.vlen,1)), 00165 m_init_features); 00166 00167 int32_t j,k; 00168 int32_t n = m_transformation_matrix.num_cols; 00169 00170 for (j=0; j<m_target_dim; j++) 00171 result.vector[j] = m_bias_vector.vector[j]; 00172 00173 for (j=0; j<n; j++) 00174 { 00175 float64_t kj = m_kernel->kernel(0,j); 00176 00177 for (k=0; k<m_target_dim; k++) 00178 result.vector[k] += kj*m_transformation_matrix.matrix[(n-k-1)*n+j]; 00179 } 00180 00181 m_kernel->cleanup(); 00182 return result; 00183 } 00184 00185 CDenseFeatures<float64_t>* CKernelPCA::apply_to_string_features(CFeatures* features) 00186 { 00187 ASSERT(m_initialized) 00188 00189 int32_t num_vectors = features->get_num_vectors(); 00190 int32_t i,j,k; 00191 int32_t n = m_transformation_matrix.num_cols; 00192 00193 m_kernel->init(features,m_init_features); 00194 00195 float64_t* new_feature_matrix = SG_MALLOC(float64_t, m_target_dim*num_vectors); 00196 00197 for (i=0; i<num_vectors; i++) 00198 { 00199 for (j=0; j<m_target_dim; j++) 00200 new_feature_matrix[i*m_target_dim+j] = m_bias_vector.vector[j]; 00201 00202 for (j=0; j<n; j++) 00203 { 00204 float64_t kij = m_kernel->kernel(i,j); 00205 00206 for (k=0; k<m_target_dim; k++) 00207 new_feature_matrix[k+i*m_target_dim] += kij*m_transformation_matrix.matrix[(n-k-1)*n+j]; 00208 } 00209 } 00210 00211 m_kernel->cleanup(); 00212 00213 return new CDenseFeatures<float64_t>(SGMatrix<float64_t>(new_feature_matrix,m_target_dim,num_vectors)); 00214 } 00215 00216 #endif