SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
DataGenerator.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2012 Heiko Strathmann
00008  */
00009 
00010 #include <shogun/lib/config.h>
00011 
00012 #include <shogun/features/DataGenerator.h>
00013 #include <shogun/mathematics/Math.h>
00014 #include <shogun/distributions/Gaussian.h>
00015 
00016 using namespace shogun;
00017 
00018 CDataGenerator::CDataGenerator() : CSGObject()
00019 {
00020     init();
00021 }
00022 
00023 CDataGenerator::~CDataGenerator()
00024 {
00025 
00026 }
00027 
00028 void CDataGenerator::init()
00029 {
00030 }
00031 
00032 SGMatrix<float64_t> CDataGenerator::generate_checkboard_data(int32_t num_classes,
00033         int32_t dim, int32_t num_points, float64_t overlap)
00034 {
00035     int32_t points_per_class = num_points / num_classes;
00036 
00037     int32_t grid_size = (int32_t ) CMath::ceil(CMath::sqrt((float64_t ) num_classes));
00038     float64_t cell_size = (float64_t ) 1 / grid_size;
00039     SGVector<float64_t> grid_idx(dim);
00040     for (index_t i=0; i<dim; i++)
00041         grid_idx[i] = 0;
00042 
00043     SGMatrix<float64_t> points(dim+1, num_points);
00044     int32_t points_idx = 0;
00045     for (int32_t class_idx=0; class_idx<num_classes; class_idx++)
00046     {
00047         SGVector<float64_t> class_dim_centers(dim);
00048         for (index_t i=0; i<dim; i++)
00049             class_dim_centers[i] = (grid_idx[i] * cell_size + (grid_idx[i] + 1) * cell_size) / 2;
00050 
00051         for (index_t p=points_idx; p<points_per_class+points_idx; p++)
00052         {
00053             for (index_t i=0; i<dim; i++)
00054             {
00055                 do
00056                 {
00057                     points(i, p) = CMath::normal_random(class_dim_centers[i], cell_size*0.5);
00058                     if ((points(i, p)>(grid_idx[i]+1)*cell_size) ||
00059                             (points(i, p)<grid_idx[i]*cell_size))
00060                     {
00061                         if (!(CMath::random(0.0, 1.0)<overlap))
00062                             continue;
00063                     }
00064                     break;
00065                 } while (true);
00066             }
00067             points(dim, p) = class_idx;
00068         }
00069         points_idx += points_per_class;
00070         for (index_t i=dim-1; i>=0; i--)
00071         {
00072             grid_idx[i]++;
00073             if (grid_idx[i]>=grid_size)
00074                 grid_idx[i] = 0;
00075             else
00076                 break;
00077         }
00078     }
00079     return points;
00080 }
00081 
00082 SGMatrix<float64_t> CDataGenerator::generate_mean_data(index_t m,
00083         index_t dim, float64_t mean_shift,
00084         SGMatrix<float64_t> target)
00085 {
00086     /* evtl. allocate space */
00087     SGMatrix<float64_t> result=SGMatrix<float64_t>::get_allocated_matrix(
00088             dim, 2*m, target);
00089 
00090     /* fill matrix with normal data */
00091     for (index_t i=0; i<2*m; ++i)
00092     {
00093         for (index_t j=0; j<dim; ++j)
00094             result(j,i)=CMath::randn_double();
00095 
00096         /* mean shift for second half */
00097         if (i>=m)
00098             result(0,i)+=mean_shift;
00099     }
00100 
00101     return result;
00102 }
00103 
00104 SGMatrix<float64_t> CDataGenerator::generate_sym_mix_gauss(index_t m,
00105         float64_t d, float64_t angle, SGMatrix<float64_t> target)
00106 {
00107     /* evtl. allocate space */
00108     SGMatrix<float64_t> result=SGMatrix<float64_t>::get_allocated_matrix(
00109             2, m, target);
00110 
00111     /* rotation matrix */
00112     SGMatrix<float64_t> rot=SGMatrix<float64_t>(2,2);
00113     rot(0, 0)=CMath::cos(angle);
00114     rot(0, 1)=-CMath::sin(angle);
00115     rot(1, 0)=CMath::sin(angle);
00116     rot(1, 1)=CMath::cos(angle);
00117 
00118     /* generate signal in each dimension which is an equal mixture of two
00119      * Gaussians */
00120     for (index_t i=0; i<m; ++i)
00121     {
00122         result(0,i)=CMath::randn_double() + (CMath::random(0, 1) ? d : -d);
00123         result(1,i)=CMath::randn_double() + (CMath::random(0, 1) ? d : -d);
00124     }
00125 
00126     /* rotate result */
00127     if (angle)
00128         result=SGMatrix<float64_t>::matrix_multiply(rot, result);
00129 
00130     return result;
00131 }
00132 #ifdef HAVE_LAPACK
00133 SGMatrix<float64_t> CDataGenerator::generate_gaussians(index_t m, index_t n, index_t dim)
00134 {
00135     /* evtl. allocate space */
00136     SGMatrix<float64_t> result =
00137         SGMatrix<float64_t>::get_allocated_matrix(dim, n*m);
00138 
00139     float64_t grid_distance = 5.0;
00140     for (index_t i = 0; i < n; ++i)
00141     {
00142         SGVector<float64_t> mean(dim);
00143         SGMatrix<float64_t> cov = SGMatrix<float64_t>::create_identity_matrix(dim, 1.0);
00144 
00145         mean.zero();
00146         for (index_t k = 0; k < dim; ++k)
00147         {
00148             mean[k] = (i+1)*grid_distance;
00149             if (k % (i+1) == 0)
00150                 mean[k] *= -1;
00151         }
00152         CGaussian* g = new CGaussian(mean, cov, DIAG);
00153         for (index_t j = 0; j < m; ++j)
00154         {
00155             SGVector<float64_t> v = g->sample();
00156             memcpy((result.matrix+j*result.num_rows+i*m*dim), v.vector, dim*sizeof(float64_t));
00157             SG_FREE(v.vector);
00158         }
00159 
00160         SG_UNREF(g);
00161     }
00162 
00163     return result;
00164 }
00165 #endif /* HAVE_LAPACK */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation