SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Written (W) 2011-2012 Heiko Strathmann 00010 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00011 */ 00012 00013 #include <shogun/features/Features.h> 00014 #include <shogun/preprocessor/Preprocessor.h> 00015 #include <shogun/io/SGIO.h> 00016 #include <shogun/base/Parameter.h> 00017 #include <shogun/lib/DynamicObjectArray.h> 00018 00019 #include <string.h> 00020 00021 using namespace shogun; 00022 00023 CFeatures::CFeatures(int32_t size) 00024 : CSGObject() 00025 { 00026 init(); 00027 cache_size = size; 00028 } 00029 00030 CFeatures::CFeatures(const CFeatures& orig) 00031 : CSGObject(orig) 00032 { 00033 init(); 00034 00035 preproc = orig.preproc; 00036 preprocessed = orig.preprocessed; 00037 SG_REF(preproc); 00038 SG_REF(preprocessed); 00039 } 00040 00041 CFeatures::CFeatures(CFile* loader) 00042 : CSGObject() 00043 { 00044 init(); 00045 00046 load(loader); 00047 SG_INFO("Feature object loaded (%p)\n",this) 00048 } 00049 00050 CFeatures::~CFeatures() 00051 { 00052 clean_preprocessors(); 00053 SG_UNREF(m_subset_stack); 00054 SG_UNREF(preproc); 00055 SG_UNREF(preprocessed); 00056 } 00057 00058 void CFeatures::init() 00059 { 00060 SG_ADD(&properties, "properties", "Feature properties", MS_NOT_AVAILABLE); 00061 SG_ADD(&cache_size, "cache_size", "Size of cache in MB", MS_NOT_AVAILABLE); 00062 00063 SG_ADD((CSGObject**) &preproc, "preproc", "Array of preprocessors.", 00064 MS_NOT_AVAILABLE); 00065 SG_ADD((CSGObject**) &preprocessed, "preprocessed", "Array of preprocessed.", 00066 MS_NOT_AVAILABLE); 00067 00068 SG_ADD((CSGObject**)&m_subset_stack, "subset_stack", "Stack of subsets", 00069 MS_NOT_AVAILABLE); 00070 00071 m_subset_stack=new CSubsetStack(); 00072 SG_REF(m_subset_stack); 00073 00074 properties = FP_NONE; 00075 cache_size = 0; 00076 preproc = new CDynamicObjectArray(); 00077 preprocessed = new CDynamicArray<bool>(); 00078 SG_REF(preproc); 00079 SG_REF(preprocessed); 00080 } 00081 00082 void CFeatures::add_preprocessor(CPreprocessor* p) 00083 { 00084 ASSERT(p) 00085 00086 preproc->push_back(p); 00087 preprocessed->push_back(false); 00088 } 00089 00090 CPreprocessor* CFeatures::get_preprocessor(int32_t num) const 00091 { 00092 if (num<preproc->get_num_elements() && num>=0) 00093 { 00094 return (CPreprocessor*) preproc->get_element(num); 00095 } 00096 else 00097 return NULL; 00098 } 00099 00100 int32_t CFeatures::get_num_preprocessed() const 00101 { 00102 int32_t num=0; 00103 00104 for (int32_t i=0; i<preproc->get_num_elements(); i++) 00105 { 00106 if ((*preprocessed)[i]) 00107 num++; 00108 } 00109 00110 return num; 00111 } 00112 00113 void CFeatures::clean_preprocessors() 00114 { 00115 preproc->reset_array(); 00116 preprocessed->reset_array(); 00117 } 00118 00119 void CFeatures::del_preprocessor(int32_t num) 00120 { 00121 if (num<preproc->get_num_elements() && num>=0) 00122 { 00123 preproc->delete_element(num); 00124 preprocessed->delete_element(num); 00125 } 00126 } 00127 00128 void CFeatures::list_preprocessors() 00129 { 00130 int32_t num_preproc = preproc->get_num_elements(); 00131 00132 for (int32_t i=0; i<num_preproc; i++) 00133 { 00134 SG_INFO("preproc[%d]=%s applied=%s\n",i, 00135 preproc->get_element(i)->get_name(), 00136 preprocessed->get_element(i) ? "true" : "false"); 00137 } 00138 } 00139 00140 void CFeatures::set_preprocessed(int32_t num) 00141 { 00142 ASSERT(num<preprocessed->get_num_elements() && num>=0); 00143 (*preprocessed)[num]=true; 00144 } 00145 00146 bool CFeatures::is_preprocessed(int32_t num) const 00147 { 00148 ASSERT(num<preprocessed->get_num_elements() && num>=0); 00149 return (*preprocessed)[num]; 00150 } 00151 00152 int32_t CFeatures::get_num_preprocessors() const 00153 { 00154 return preproc->get_num_elements(); 00155 } 00156 00157 int32_t CFeatures::get_cache_size() const 00158 { 00159 return cache_size; 00160 } 00161 00162 bool CFeatures::reshape(int32_t num_features, int32_t num_vectors) 00163 { 00164 SG_NOTIMPLEMENTED 00165 return false; 00166 } 00167 00168 void CFeatures::list_feature_obj() const 00169 { 00170 SG_INFO("%p - ", this) 00171 switch (get_feature_class()) 00172 { 00173 case C_UNKNOWN: 00174 SG_INFO("C_UNKNOWN ") 00175 break; 00176 case C_DENSE: 00177 SG_INFO("C_DENSE ") 00178 break; 00179 case C_SPARSE: 00180 SG_INFO("C_SPARSE ") 00181 break; 00182 case C_STRING: 00183 SG_INFO("C_STRING ") 00184 break; 00185 case C_COMBINED: 00186 SG_INFO("C_COMBINED ") 00187 break; 00188 case C_COMBINED_DOT: 00189 SG_INFO("C_COMBINED_DOT ") 00190 break; 00191 case C_WD: 00192 SG_INFO("C_WD ") 00193 break; 00194 case C_SPEC: 00195 SG_INFO("C_SPEC ") 00196 break; 00197 case C_WEIGHTEDSPEC: 00198 SG_INFO("C_WEIGHTEDSPEC ") 00199 break; 00200 case C_STREAMING_DENSE: 00201 SG_INFO("C_STREAMING_DENSE ") 00202 break; 00203 case C_STREAMING_SPARSE: 00204 SG_INFO("C_STREAMING_SPARSE ") 00205 break; 00206 case C_STREAMING_STRING: 00207 SG_INFO("C_STREAMING_STRING ") 00208 break; 00209 case C_STREAMING_VW: 00210 SG_INFO("C_STREAMING_VW ") 00211 break; 00212 case C_ANY: 00213 SG_INFO("C_ANY ") 00214 break; 00215 default: 00216 SG_ERROR("ERROR UNKNOWN FEATURE CLASS") 00217 } 00218 00219 switch (get_feature_type()) 00220 { 00221 case F_UNKNOWN: 00222 SG_INFO("F_UNKNOWN \n") 00223 break; 00224 case F_CHAR: 00225 SG_INFO("F_CHAR \n") 00226 break; 00227 case F_BYTE: 00228 SG_INFO("F_BYTE \n") 00229 break; 00230 case F_SHORT: 00231 SG_INFO("F_SHORT \n") 00232 break; 00233 case F_WORD: 00234 SG_INFO("F_WORD \n") 00235 break; 00236 case F_INT: 00237 SG_INFO("F_INT \n") 00238 break; 00239 case F_UINT: 00240 SG_INFO("F_UINT \n") 00241 break; 00242 case F_LONG: 00243 SG_INFO("F_LONG \n") 00244 break; 00245 case F_ULONG: 00246 SG_INFO("F_ULONG \n") 00247 break; 00248 case F_SHORTREAL: 00249 SG_INFO("F_SHORTEAL \n") 00250 break; 00251 case F_DREAL: 00252 SG_INFO("F_DREAL \n") 00253 break; 00254 case F_LONGREAL: 00255 SG_INFO("F_LONGREAL \n") 00256 break; 00257 case F_ANY: 00258 SG_INFO("F_ANY \n") 00259 break; 00260 default: 00261 SG_ERROR("ERROR UNKNOWN FEATURE TYPE\n") 00262 } 00263 } 00264 00265 00266 void CFeatures::load(CFile* loader) 00267 { 00268 SG_SET_LOCALE_C; 00269 SG_NOTIMPLEMENTED 00270 SG_RESET_LOCALE; 00271 } 00272 00273 void CFeatures::save(CFile* writer) 00274 { 00275 SG_SET_LOCALE_C; 00276 SG_NOTIMPLEMENTED 00277 SG_RESET_LOCALE; 00278 } 00279 00280 bool CFeatures::check_feature_compatibility(CFeatures* f) const 00281 { 00282 bool result=false; 00283 00284 if (f) 00285 { 00286 result= ( (this->get_feature_class() == f->get_feature_class()) && 00287 (this->get_feature_type() == f->get_feature_type())); 00288 } 00289 return result; 00290 } 00291 00292 bool CFeatures::has_property(EFeatureProperty p) const 00293 { 00294 return (properties & p) != 0; 00295 } 00296 00297 void CFeatures::set_property(EFeatureProperty p) 00298 { 00299 properties |= p; 00300 } 00301 00302 void CFeatures::unset_property(EFeatureProperty p) 00303 { 00304 properties &= (properties | p) ^ p; 00305 } 00306 00307 void CFeatures::add_subset(SGVector<index_t> subset) 00308 { 00309 m_subset_stack->add_subset(subset); 00310 subset_changed_post(); 00311 } 00312 00313 void CFeatures::remove_subset() 00314 { 00315 m_subset_stack->remove_subset(); 00316 subset_changed_post(); 00317 } 00318 00319 void CFeatures::remove_all_subsets() 00320 { 00321 m_subset_stack->remove_all_subsets(); 00322 subset_changed_post(); 00323 } 00324 00325 CSubsetStack* CFeatures::get_subset_stack() 00326 { 00327 return m_subset_stack; 00328 } 00329 00330 CFeatures* CFeatures::copy_subset(SGVector<index_t> indices) 00331 { 00332 SG_ERROR("%s::copy_subset(): copy_subset and therefore model storage of " 00333 "CMachine (required for cross-validation and model-selection is " 00334 "not yet implemented yet. Ask developers!\n", get_name()); 00335 return NULL; 00336 }