SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Features.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Written (W) 2011-2012 Heiko Strathmann
00010  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00011  */
00012 
00013 #include <shogun/features/Features.h>
00014 #include <shogun/preprocessor/Preprocessor.h>
00015 #include <shogun/io/SGIO.h>
00016 #include <shogun/base/Parameter.h>
00017 #include <shogun/lib/DynamicObjectArray.h>
00018 
00019 #include <string.h>
00020 
00021 using namespace shogun;
00022 
00023 CFeatures::CFeatures(int32_t size)
00024 : CSGObject()
00025 {
00026     init();
00027     cache_size = size;
00028 }
00029 
00030 CFeatures::CFeatures(const CFeatures& orig)
00031 : CSGObject(orig)
00032 {
00033     init();
00034 
00035     preproc = orig.preproc;
00036     preprocessed = orig.preprocessed;
00037     SG_REF(preproc);
00038     SG_REF(preprocessed);
00039 }
00040 
00041 CFeatures::CFeatures(CFile* loader)
00042 : CSGObject()
00043 {
00044     init();
00045 
00046     load(loader);
00047     SG_INFO("Feature object loaded (%p)\n",this)
00048 }
00049 
00050 CFeatures::~CFeatures()
00051 {
00052     clean_preprocessors();
00053     SG_UNREF(m_subset_stack);
00054     SG_UNREF(preproc);
00055     SG_UNREF(preprocessed);
00056 }
00057 
00058 void CFeatures::init()
00059 {
00060     SG_ADD(&properties, "properties", "Feature properties", MS_NOT_AVAILABLE);
00061     SG_ADD(&cache_size, "cache_size", "Size of cache in MB", MS_NOT_AVAILABLE);
00062 
00063     SG_ADD((CSGObject**) &preproc, "preproc", "Array of preprocessors.",
00064            MS_NOT_AVAILABLE);
00065     SG_ADD((CSGObject**) &preprocessed, "preprocessed", "Array of preprocessed.",
00066            MS_NOT_AVAILABLE);
00067 
00068     SG_ADD((CSGObject**)&m_subset_stack, "subset_stack", "Stack of subsets",
00069            MS_NOT_AVAILABLE);
00070 
00071     m_subset_stack=new CSubsetStack();
00072     SG_REF(m_subset_stack);
00073 
00074     properties = FP_NONE;
00075     cache_size = 0;
00076     preproc = new CDynamicObjectArray();
00077     preprocessed = new CDynamicArray<bool>();
00078     SG_REF(preproc);
00079     SG_REF(preprocessed);
00080 }
00081 
00082 void CFeatures::add_preprocessor(CPreprocessor* p)
00083 {
00084     ASSERT(p)
00085 
00086     preproc->push_back(p);
00087     preprocessed->push_back(false);
00088 }
00089 
00090 CPreprocessor* CFeatures::get_preprocessor(int32_t num) const
00091 {
00092     if (num<preproc->get_num_elements() && num>=0)
00093     {
00094       return (CPreprocessor*) preproc->get_element(num);
00095     }
00096     else
00097         return NULL;
00098 }
00099 
00100 int32_t CFeatures::get_num_preprocessed() const
00101 {
00102     int32_t num=0;
00103 
00104     for (int32_t i=0; i<preproc->get_num_elements(); i++)
00105     {
00106       if ((*preprocessed)[i])
00107             num++;
00108     }
00109 
00110     return num;
00111 }
00112 
00113 void CFeatures::clean_preprocessors()
00114 {
00115     preproc->reset_array();
00116     preprocessed->reset_array();
00117 }
00118 
00119 void CFeatures::del_preprocessor(int32_t num)
00120 {
00121     if (num<preproc->get_num_elements() && num>=0)
00122     {
00123         preproc->delete_element(num);
00124         preprocessed->delete_element(num);
00125     }
00126 }
00127 
00128 void CFeatures::list_preprocessors()
00129 {
00130     int32_t num_preproc = preproc->get_num_elements();
00131 
00132     for (int32_t i=0; i<num_preproc; i++)
00133     {
00134         SG_INFO("preproc[%d]=%s applied=%s\n",i,
00135                 preproc->get_element(i)->get_name(),
00136                 preprocessed->get_element(i) ? "true" : "false");
00137     }
00138 }
00139 
00140 void CFeatures::set_preprocessed(int32_t num)
00141 {
00142     ASSERT(num<preprocessed->get_num_elements() && num>=0);
00143     (*preprocessed)[num]=true;
00144 }
00145 
00146 bool CFeatures::is_preprocessed(int32_t num) const
00147 {
00148     ASSERT(num<preprocessed->get_num_elements() && num>=0);
00149     return (*preprocessed)[num];
00150 }
00151 
00152 int32_t CFeatures::get_num_preprocessors() const
00153 {
00154     return preproc->get_num_elements();
00155 }
00156 
00157 int32_t CFeatures::get_cache_size() const
00158 {
00159     return cache_size;
00160 }
00161 
00162 bool CFeatures::reshape(int32_t num_features, int32_t num_vectors)
00163 {
00164     SG_NOTIMPLEMENTED
00165     return false;
00166 }
00167 
00168 void CFeatures::list_feature_obj() const
00169 {
00170     SG_INFO("%p - ", this)
00171     switch (get_feature_class())
00172     {
00173         case C_UNKNOWN:
00174             SG_INFO("C_UNKNOWN ")
00175             break;
00176         case C_DENSE:
00177             SG_INFO("C_DENSE ")
00178             break;
00179         case C_SPARSE:
00180             SG_INFO("C_SPARSE ")
00181             break;
00182         case C_STRING:
00183             SG_INFO("C_STRING ")
00184             break;
00185         case C_COMBINED:
00186             SG_INFO("C_COMBINED ")
00187             break;
00188         case C_COMBINED_DOT:
00189             SG_INFO("C_COMBINED_DOT ")
00190             break;
00191         case C_WD:
00192             SG_INFO("C_WD ")
00193             break;
00194         case C_SPEC:
00195             SG_INFO("C_SPEC ")
00196             break;
00197         case C_WEIGHTEDSPEC:
00198             SG_INFO("C_WEIGHTEDSPEC ")
00199             break;
00200         case C_STREAMING_DENSE:
00201             SG_INFO("C_STREAMING_DENSE ")
00202             break;
00203         case C_STREAMING_SPARSE:
00204             SG_INFO("C_STREAMING_SPARSE ")
00205             break;
00206         case C_STREAMING_STRING:
00207             SG_INFO("C_STREAMING_STRING ")
00208             break;
00209         case C_STREAMING_VW:
00210             SG_INFO("C_STREAMING_VW ")
00211             break;
00212         case C_ANY:
00213             SG_INFO("C_ANY ")
00214             break;
00215         default:
00216          SG_ERROR("ERROR UNKNOWN FEATURE CLASS")
00217     }
00218 
00219     switch (get_feature_type())
00220     {
00221         case F_UNKNOWN:
00222             SG_INFO("F_UNKNOWN \n")
00223             break;
00224         case F_CHAR:
00225             SG_INFO("F_CHAR \n")
00226             break;
00227         case F_BYTE:
00228             SG_INFO("F_BYTE \n")
00229             break;
00230         case F_SHORT:
00231             SG_INFO("F_SHORT \n")
00232             break;
00233         case F_WORD:
00234             SG_INFO("F_WORD \n")
00235             break;
00236         case F_INT:
00237             SG_INFO("F_INT \n")
00238             break;
00239         case F_UINT:
00240             SG_INFO("F_UINT \n")
00241             break;
00242         case F_LONG:
00243             SG_INFO("F_LONG \n")
00244             break;
00245         case F_ULONG:
00246             SG_INFO("F_ULONG \n")
00247             break;
00248         case F_SHORTREAL:
00249             SG_INFO("F_SHORTEAL \n")
00250             break;
00251         case F_DREAL:
00252             SG_INFO("F_DREAL \n")
00253             break;
00254         case F_LONGREAL:
00255             SG_INFO("F_LONGREAL \n")
00256             break;
00257         case F_ANY:
00258             SG_INFO("F_ANY \n")
00259             break;
00260         default:
00261          SG_ERROR("ERROR UNKNOWN FEATURE TYPE\n")
00262     }
00263 }
00264 
00265 
00266 void CFeatures::load(CFile* loader)
00267 {
00268     SG_SET_LOCALE_C;
00269     SG_NOTIMPLEMENTED
00270     SG_RESET_LOCALE;
00271 }
00272 
00273 void CFeatures::save(CFile* writer)
00274 {
00275     SG_SET_LOCALE_C;
00276     SG_NOTIMPLEMENTED
00277     SG_RESET_LOCALE;
00278 }
00279 
00280 bool CFeatures::check_feature_compatibility(CFeatures* f) const
00281 {
00282     bool result=false;
00283 
00284     if (f)
00285     {
00286         result= ( (this->get_feature_class() == f->get_feature_class()) &&
00287                 (this->get_feature_type() == f->get_feature_type()));
00288     }
00289     return result;
00290 }
00291 
00292 bool CFeatures::has_property(EFeatureProperty p) const
00293 {
00294     return (properties & p) != 0;
00295 }
00296 
00297 void CFeatures::set_property(EFeatureProperty p)
00298 {
00299     properties |= p;
00300 }
00301 
00302 void CFeatures::unset_property(EFeatureProperty p)
00303 {
00304     properties &= (properties | p) ^ p;
00305 }
00306 
00307 void CFeatures::add_subset(SGVector<index_t> subset)
00308 {
00309     m_subset_stack->add_subset(subset);
00310     subset_changed_post();
00311 }
00312 
00313 void CFeatures::remove_subset()
00314 {
00315     m_subset_stack->remove_subset();
00316     subset_changed_post();
00317 }
00318 
00319 void CFeatures::remove_all_subsets()
00320 {
00321     m_subset_stack->remove_all_subsets();
00322     subset_changed_post();
00323 }
00324 
00325 CSubsetStack* CFeatures::get_subset_stack()
00326 {
00327     return m_subset_stack;
00328 }
00329 
00330 CFeatures* CFeatures::copy_subset(SGVector<index_t> indices)
00331 {
00332     SG_ERROR("%s::copy_subset(): copy_subset and therefore model storage of "
00333             "CMachine (required for cross-validation and model-selection is "
00334             "not yet implemented yet. Ask developers!\n", get_name());
00335     return NULL;
00336 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation