SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2012 Heiko Strathmann 00008 */ 00009 00010 #include <shogun/lib/config.h> 00011 00012 #include <shogun/features/DataGenerator.h> 00013 #include <shogun/mathematics/Math.h> 00014 #include <shogun/distributions/Gaussian.h> 00015 00016 using namespace shogun; 00017 00018 CDataGenerator::CDataGenerator() : CSGObject() 00019 { 00020 init(); 00021 } 00022 00023 CDataGenerator::~CDataGenerator() 00024 { 00025 00026 } 00027 00028 void CDataGenerator::init() 00029 { 00030 } 00031 00032 SGMatrix<float64_t> CDataGenerator::generate_checkboard_data(int32_t num_classes, 00033 int32_t dim, int32_t num_points, float64_t overlap) 00034 { 00035 int32_t points_per_class = num_points / num_classes; 00036 00037 int32_t grid_size = (int32_t ) CMath::ceil(CMath::sqrt((float64_t ) num_classes)); 00038 float64_t cell_size = (float64_t ) 1 / grid_size; 00039 SGVector<float64_t> grid_idx(dim); 00040 for (index_t i=0; i<dim; i++) 00041 grid_idx[i] = 0; 00042 00043 SGMatrix<float64_t> points(dim+1, num_points); 00044 int32_t points_idx = 0; 00045 for (int32_t class_idx=0; class_idx<num_classes; class_idx++) 00046 { 00047 SGVector<float64_t> class_dim_centers(dim); 00048 for (index_t i=0; i<dim; i++) 00049 class_dim_centers[i] = (grid_idx[i] * cell_size + (grid_idx[i] + 1) * cell_size) / 2; 00050 00051 for (index_t p=points_idx; p<points_per_class+points_idx; p++) 00052 { 00053 for (index_t i=0; i<dim; i++) 00054 { 00055 do 00056 { 00057 points(i, p) = CMath::normal_random(class_dim_centers[i], cell_size*0.5); 00058 if ((points(i, p)>(grid_idx[i]+1)*cell_size) || 00059 (points(i, p)<grid_idx[i]*cell_size)) 00060 { 00061 if (!(CMath::random(0.0, 1.0)<overlap)) 00062 continue; 00063 } 00064 break; 00065 } while (true); 00066 } 00067 points(dim, p) = class_idx; 00068 } 00069 points_idx += points_per_class; 00070 for (index_t i=dim-1; i>=0; i--) 00071 { 00072 grid_idx[i]++; 00073 if (grid_idx[i]>=grid_size) 00074 grid_idx[i] = 0; 00075 else 00076 break; 00077 } 00078 } 00079 return points; 00080 } 00081 00082 SGMatrix<float64_t> CDataGenerator::generate_mean_data(index_t m, 00083 index_t dim, float64_t mean_shift, 00084 SGMatrix<float64_t> target) 00085 { 00086 /* evtl. allocate space */ 00087 SGMatrix<float64_t> result=SGMatrix<float64_t>::get_allocated_matrix( 00088 dim, 2*m, target); 00089 00090 /* fill matrix with normal data */ 00091 for (index_t i=0; i<2*m; ++i) 00092 { 00093 for (index_t j=0; j<dim; ++j) 00094 result(j,i)=CMath::randn_double(); 00095 00096 /* mean shift for second half */ 00097 if (i>=m) 00098 result(0,i)+=mean_shift; 00099 } 00100 00101 return result; 00102 } 00103 00104 SGMatrix<float64_t> CDataGenerator::generate_sym_mix_gauss(index_t m, 00105 float64_t d, float64_t angle, SGMatrix<float64_t> target) 00106 { 00107 /* evtl. allocate space */ 00108 SGMatrix<float64_t> result=SGMatrix<float64_t>::get_allocated_matrix( 00109 2, m, target); 00110 00111 /* rotation matrix */ 00112 SGMatrix<float64_t> rot=SGMatrix<float64_t>(2,2); 00113 rot(0, 0)=CMath::cos(angle); 00114 rot(0, 1)=-CMath::sin(angle); 00115 rot(1, 0)=CMath::sin(angle); 00116 rot(1, 1)=CMath::cos(angle); 00117 00118 /* generate signal in each dimension which is an equal mixture of two 00119 * Gaussians */ 00120 for (index_t i=0; i<m; ++i) 00121 { 00122 result(0,i)=CMath::randn_double() + (CMath::random(0, 1) ? d : -d); 00123 result(1,i)=CMath::randn_double() + (CMath::random(0, 1) ? d : -d); 00124 } 00125 00126 /* rotate result */ 00127 if (angle) 00128 result=SGMatrix<float64_t>::matrix_multiply(rot, result); 00129 00130 return result; 00131 } 00132 #ifdef HAVE_LAPACK 00133 SGMatrix<float64_t> CDataGenerator::generate_gaussians(index_t m, index_t n, index_t dim) 00134 { 00135 /* evtl. allocate space */ 00136 SGMatrix<float64_t> result = 00137 SGMatrix<float64_t>::get_allocated_matrix(dim, n*m); 00138 00139 float64_t grid_distance = 5.0; 00140 for (index_t i = 0; i < n; ++i) 00141 { 00142 SGVector<float64_t> mean(dim); 00143 SGMatrix<float64_t> cov = SGMatrix<float64_t>::create_identity_matrix(dim, 1.0); 00144 00145 mean.zero(); 00146 for (index_t k = 0; k < dim; ++k) 00147 { 00148 mean[k] = (i+1)*grid_distance; 00149 if (k % (i+1) == 0) 00150 mean[k] *= -1; 00151 } 00152 CGaussian* g = new CGaussian(mean, cov, DIAG); 00153 for (index_t j = 0; j < m; ++j) 00154 { 00155 SGVector<float64_t> v = g->sample(); 00156 memcpy((result.matrix+j*result.num_rows+i*m*dim), v.vector, dim*sizeof(float64_t)); 00157 SG_FREE(v.vector); 00158 } 00159 00160 SG_UNREF(g); 00161 } 00162 00163 return result; 00164 } 00165 #endif /* HAVE_LAPACK */