SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
RescaleFeatures.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 20013 Viktor Gal
00008  * Copyright (C) 2013 Viktor Gal
00009  */
00010 
00011 #include <shogun/preprocessor/RescaleFeatures.h>
00012 
00013 using namespace shogun;
00014 
00015 CRescaleFeatures::CRescaleFeatures()
00016  : CDensePreprocessor<float64_t>(),
00017  m_initialized(false)
00018 {
00019     register_parameters();
00020 }
00021 
00022 CRescaleFeatures::~CRescaleFeatures()
00023 {
00024     cleanup();
00025 }
00026 
00027 bool CRescaleFeatures::init(CFeatures* features)
00028 {
00029     if (!m_initialized)
00030     {
00031         ASSERT(features->get_feature_class()==C_DENSE);
00032         ASSERT(features->get_feature_type()==F_DREAL);
00033 
00034         CDenseFeatures<float64_t>* simple_features=(CDenseFeatures<float64_t>*) features;
00035         int32_t num_examples = simple_features->get_num_vectors();
00036         int32_t num_features = simple_features->get_num_features();
00037         REQUIRE(num_examples > 1,
00038                         "number of feature vectors should be at least 2!\n");
00039 
00040         SG_INFO("Extracting min and range values for each feature\n")
00041 
00042         m_min = SGVector<float64_t>(num_features);
00043         m_range = SGVector<float64_t>(num_features);
00044         SGMatrix<float64_t> feature_matrix=((CDenseFeatures<float64_t>*)features)->get_feature_matrix();
00045         for (index_t i = 0; i < num_features; i++)
00046         {
00047             SGVector<float64_t> vec = feature_matrix.get_row_vector(i);
00048             float64_t cur_min = vec[0];
00049             float64_t cur_max = vec[0];
00050 
00051             /* find the max and min values in one loop */
00052             for (index_t j = 1; j < vec.vlen; j++)
00053             {
00054                 cur_min = CMath::min(vec[j], cur_min);
00055                 cur_max = CMath::max(vec[j], cur_max);
00056             }
00057 
00058             /* only rescale if range > 0 */
00059             if ((cur_max - cur_min) > 0) {
00060                 m_min[i] = cur_min;
00061                 m_range[i] = 1.0/(cur_max - cur_min);
00062             }
00063             else {
00064                 m_min[i] = 0.0;
00065                 m_range[i] = 1.0;
00066             }
00067         }
00068 
00069         m_initialized = true;
00070 
00071         return true;
00072     }
00073 
00074     return false;
00075 }
00076 
00077 void CRescaleFeatures::cleanup()
00078 {
00079     m_initialized = false;
00080 }
00081 
00082 SGMatrix<float64_t> CRescaleFeatures::apply_to_feature_matrix(CFeatures* features)
00083 {
00084     ASSERT(m_initialized);
00085 
00086     SGMatrix<float64_t> feature_matrix=((CDenseFeatures<float64_t>*)features)->get_feature_matrix();
00087     ASSERT(feature_matrix.num_rows == m_min.vlen);
00088 
00089     for (index_t i = 0; i < feature_matrix.num_cols; i++)
00090     {
00091         float64_t* vec = feature_matrix.get_column_vector(i);
00092         SGVector<float64_t>::vec1_plus_scalar_times_vec2(vec, -1.0, m_min.vector, feature_matrix.num_rows);
00093         for (index_t j = 0; j < feature_matrix.num_rows; j++) {
00094             vec[j] *= m_range[j];
00095         }
00096     }
00097 
00098     return feature_matrix;
00099 }
00100 
00101 SGVector<float64_t> CRescaleFeatures::apply_to_feature_vector(SGVector<float64_t> vector)
00102 {
00103     ASSERT(m_initialized);
00104     ASSERT(m_min.vlen == vector.vlen);
00105 
00106     float64_t* ret = SG_MALLOC(float64_t, vector.vlen);
00107     SGVector<float64_t>::add(ret, 1.0, vector.vector, -1.0, m_min.vector, vector.vlen);
00108     for (index_t i = 0; i < vector.vlen; i++) {
00109         ret[i] *= m_range[i];
00110     }
00111 
00112     return SGVector<float64_t>(ret,vector.vlen);
00113 }
00114 
00115 void CRescaleFeatures::register_parameters()
00116 {
00117     SG_ADD(&m_min, "min", "minimum values of each feature", MS_NOT_AVAILABLE);
00118     SG_ADD(&m_range, "range", "Reciprocal of the range of each feature", MS_NOT_AVAILABLE);
00119     SG_ADD(&m_initialized, "initialized", "Indicator of the state of the preprocessor.", MS_NOT_AVAILABLE);
00120 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation