SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 20013 Viktor Gal 00008 * Copyright (C) 2013 Viktor Gal 00009 */ 00010 00011 #include <shogun/preprocessor/RescaleFeatures.h> 00012 00013 using namespace shogun; 00014 00015 CRescaleFeatures::CRescaleFeatures() 00016 : CDensePreprocessor<float64_t>(), 00017 m_initialized(false) 00018 { 00019 register_parameters(); 00020 } 00021 00022 CRescaleFeatures::~CRescaleFeatures() 00023 { 00024 cleanup(); 00025 } 00026 00027 bool CRescaleFeatures::init(CFeatures* features) 00028 { 00029 if (!m_initialized) 00030 { 00031 ASSERT(features->get_feature_class()==C_DENSE); 00032 ASSERT(features->get_feature_type()==F_DREAL); 00033 00034 CDenseFeatures<float64_t>* simple_features=(CDenseFeatures<float64_t>*) features; 00035 int32_t num_examples = simple_features->get_num_vectors(); 00036 int32_t num_features = simple_features->get_num_features(); 00037 REQUIRE(num_examples > 1, 00038 "number of feature vectors should be at least 2!\n"); 00039 00040 SG_INFO("Extracting min and range values for each feature\n") 00041 00042 m_min = SGVector<float64_t>(num_features); 00043 m_range = SGVector<float64_t>(num_features); 00044 SGMatrix<float64_t> feature_matrix=((CDenseFeatures<float64_t>*)features)->get_feature_matrix(); 00045 for (index_t i = 0; i < num_features; i++) 00046 { 00047 SGVector<float64_t> vec = feature_matrix.get_row_vector(i); 00048 float64_t cur_min = vec[0]; 00049 float64_t cur_max = vec[0]; 00050 00051 /* find the max and min values in one loop */ 00052 for (index_t j = 1; j < vec.vlen; j++) 00053 { 00054 cur_min = CMath::min(vec[j], cur_min); 00055 cur_max = CMath::max(vec[j], cur_max); 00056 } 00057 00058 /* only rescale if range > 0 */ 00059 if ((cur_max - cur_min) > 0) { 00060 m_min[i] = cur_min; 00061 m_range[i] = 1.0/(cur_max - cur_min); 00062 } 00063 else { 00064 m_min[i] = 0.0; 00065 m_range[i] = 1.0; 00066 } 00067 } 00068 00069 m_initialized = true; 00070 00071 return true; 00072 } 00073 00074 return false; 00075 } 00076 00077 void CRescaleFeatures::cleanup() 00078 { 00079 m_initialized = false; 00080 } 00081 00082 SGMatrix<float64_t> CRescaleFeatures::apply_to_feature_matrix(CFeatures* features) 00083 { 00084 ASSERT(m_initialized); 00085 00086 SGMatrix<float64_t> feature_matrix=((CDenseFeatures<float64_t>*)features)->get_feature_matrix(); 00087 ASSERT(feature_matrix.num_rows == m_min.vlen); 00088 00089 for (index_t i = 0; i < feature_matrix.num_cols; i++) 00090 { 00091 float64_t* vec = feature_matrix.get_column_vector(i); 00092 SGVector<float64_t>::vec1_plus_scalar_times_vec2(vec, -1.0, m_min.vector, feature_matrix.num_rows); 00093 for (index_t j = 0; j < feature_matrix.num_rows; j++) { 00094 vec[j] *= m_range[j]; 00095 } 00096 } 00097 00098 return feature_matrix; 00099 } 00100 00101 SGVector<float64_t> CRescaleFeatures::apply_to_feature_vector(SGVector<float64_t> vector) 00102 { 00103 ASSERT(m_initialized); 00104 ASSERT(m_min.vlen == vector.vlen); 00105 00106 float64_t* ret = SG_MALLOC(float64_t, vector.vlen); 00107 SGVector<float64_t>::add(ret, 1.0, vector.vector, -1.0, m_min.vector, vector.vlen); 00108 for (index_t i = 0; i < vector.vlen; i++) { 00109 ret[i] *= m_range[i]; 00110 } 00111 00112 return SGVector<float64_t>(ret,vector.vlen); 00113 } 00114 00115 void CRescaleFeatures::register_parameters() 00116 { 00117 SG_ADD(&m_min, "min", "minimum values of each feature", MS_NOT_AVAILABLE); 00118 SG_ADD(&m_range, "range", "Reciprocal of the range of each feature", MS_NOT_AVAILABLE); 00119 SG_ADD(&m_initialized, "initialized", "Indicator of the state of the preprocessor.", MS_NOT_AVAILABLE); 00120 }