SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
DataType.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2010 Soeren Sonnenburg
00008  * Written (W) 2011-2013 Heiko Strathmann
00009  * Copyright (C) 2010 Berlin Institute of Technology
00010  */
00011 
00012 #include <string.h>
00013 
00014 #include <shogun/base/SGObject.h>
00015 #include <shogun/lib/DataType.h>
00016 #include <shogun/lib/SGString.h>
00017 #include <shogun/lib/SGSparseVector.h>
00018 
00019 using namespace shogun;
00020 
00021 TSGDataType::TSGDataType(EContainerType ctype, EStructType stype,
00022                          EPrimitiveType ptype)
00023 {
00024     m_ctype = ctype, m_stype = stype, m_ptype = ptype;
00025     m_length_y = m_length_x = NULL;
00026 }
00027 
00028 TSGDataType::TSGDataType(EContainerType ctype, EStructType stype,
00029                          EPrimitiveType ptype, index_t* length)
00030 {
00031     m_ctype = ctype, m_stype = stype, m_ptype = ptype;
00032     m_length_y = length, m_length_x = NULL;
00033 }
00034 
00035 TSGDataType::TSGDataType(EContainerType ctype, EStructType stype,
00036                          EPrimitiveType ptype, index_t* length_y,
00037                          index_t* length_x)
00038 {
00039     m_ctype = ctype, m_stype = stype, m_ptype = ptype;
00040     m_length_y = length_y, m_length_x = length_x;
00041 }
00042 
00043 bool
00044 TSGDataType::operator==(const TSGDataType& a)
00045 {
00046     /* handle CT_SG* and SG_* ambiguity */
00047     bool ctype_equal=false;
00048     if ((m_ctype==CT_VECTOR && a.m_ctype==CT_SGVECTOR) ||
00049             (m_ctype==CT_SGVECTOR && a.m_ctype==CT_VECTOR) ||
00050             (m_ctype==CT_MATRIX && a.m_ctype==CT_SGMATRIX) ||
00051             (m_ctype==CT_SGMATRIX && a.m_ctype==CT_MATRIX) ||
00052             (m_ctype==a.m_ctype))
00053         ctype_equal=true;
00054 
00055     bool result = ctype_equal && m_stype == a.m_stype
00056         && m_ptype == a.m_ptype;
00057 
00058     result &= m_length_y != NULL && a.m_length_y != NULL
00059         ? *m_length_y == *a.m_length_y: m_length_y == a.m_length_y;
00060     result &= m_length_x != NULL && a.m_length_x != NULL
00061         ? *m_length_x == *a.m_length_x: m_length_x == a.m_length_x;
00062 
00063     return result;
00064 }
00065 
00066 bool TSGDataType::equals_without_length(TSGDataType other)
00067 {
00068     if (m_ctype!=other.m_ctype)
00069     {
00070         SG_SDEBUG("leaving TSGDataType::equals_without_length(): container types are "
00071                 "different\n");
00072         return false;
00073     }
00074 
00075     if (m_stype!=other.m_stype)
00076     {
00077         SG_SDEBUG("leaving TSGDataType::equals_without_length(): struct types are "
00078                 "different\n");
00079         return false;
00080     }
00081 
00082     if (m_ptype!=other.m_ptype)
00083     {
00084         SG_SDEBUG("leaving TSGDataType::equals_without_length(): primitive types are "
00085                 "different\n");
00086         return false;
00087     }
00088 
00089     SG_SDEBUG("leaving TSGDataType::equals_without_length(): data types "
00090             "without lengths are equal\n");
00091     return true;
00092 }
00093 
00094 bool TSGDataType::equals(TSGDataType other)
00095 {
00096     SG_SDEBUG("entering TSGDataType::equals()\n");
00097 
00098     if (!equals_without_length(other))
00099     {
00100         SG_SDEBUG("leaving TSGDataType::equals(): Data types without lengths "
00101                 "are not equal\n");
00102         return false;
00103     }
00104 
00105     if ((!m_length_y && other.m_length_y) || (m_length_y && !other.m_length_y))
00106     {
00107         SG_SDEBUG("leaving TSGDataType::equals(): length_y is at %p while "
00108                 "other's length_y is at %p\n", m_length_y, other.m_length_y);
00109         return false;
00110     }
00111 
00112     if (m_length_y && other.m_length_y)
00113     {
00114         if (*m_length_y!=*other.m_length_y)
00115         {
00116             SG_SDEBUG("leaving TSGDataType::equals(): length_y=%d while "
00117                     "other's length_y=%d\n", *m_length_y, *other.m_length_y);
00118             return false;
00119         }
00120     }
00121 
00122     if ((!m_length_x && other.m_length_x) || (m_length_x && !other.m_length_x))
00123     {
00124         SG_SDEBUG("leaving TSGDataType::equals(): m_length_x is at %p while "
00125                 "other's m_length_x is at %p\n", m_length_x, other.m_length_x);
00126         return false;
00127     }
00128 
00129     if (m_length_x && other.m_length_x)
00130     {
00131         if (*m_length_x!=*other.m_length_x)
00132         {
00133             SG_SDEBUG("leaving TSGDataType::equals(): m_length_x=%d while "
00134                     "other's m_length_x=%d\n", *m_length_x, *other.m_length_x);
00135             return false;
00136         }
00137     }
00138 
00139     SG_SDEBUG("leaving TSGDataType::equals(): datatypes are equal\n");
00140     return true;
00141 }
00142 
00143 void
00144 TSGDataType::to_string(char* dest, size_t n) const
00145 {
00146     char* p = dest;
00147 
00148     switch (m_ctype) {
00149     case CT_SCALAR: strncpy(p, "", n); break;
00150     case CT_VECTOR: strncpy(p, "Vector<", n); break;
00151     case CT_SGVECTOR: strncpy(p, "SGVector<", n); break;
00152     case CT_MATRIX: strncpy(p, "Matrix<", n); break;
00153     case CT_SGMATRIX: strncpy(p, "SGMatrix<", n); break;
00154     case CT_NDARRAY: strncpy(p, "N-Dimensional Array<", n); break;
00155     case CT_UNDEFINED: default: strncpy(p, "Undefined", n); break;
00156     }
00157 
00158     if (m_ctype != CT_UNDEFINED)
00159     {
00160         size_t np = strlen(p);
00161         stype_to_string(p + np, m_stype, m_ptype, n - np - 2);
00162     }
00163 
00164     switch (m_ctype) {
00165     case CT_SCALAR: break;
00166     case CT_VECTOR:
00167     case CT_SGVECTOR:
00168     case CT_MATRIX:
00169     case CT_SGMATRIX:
00170     case CT_NDARRAY: strcat(p, ">"); break;
00171     case CT_UNDEFINED: default: break;
00172     }
00173 }
00174 
00175 size_t
00176 TSGDataType::sizeof_stype() const
00177 {
00178     return sizeof_stype(m_stype, m_ptype);
00179 }
00180 
00181 size_t
00182 TSGDataType::sizeof_ptype() const
00183 {
00184     return sizeof_ptype(m_ptype);
00185 }
00186 
00187 size_t
00188 TSGDataType::sizeof_stype(EStructType stype, EPrimitiveType ptype)
00189 {
00190     switch (stype) {
00191     case ST_NONE: return sizeof_ptype(ptype);
00192     case ST_STRING:
00193         switch (ptype) {
00194         case PT_BOOL: return sizeof (SGString<bool>);
00195         case PT_CHAR: return sizeof (SGString<char>);
00196         case PT_INT8: return sizeof (SGString<int8_t>);
00197         case PT_UINT8: return sizeof (SGString<uint8_t>);
00198         case PT_INT16: return sizeof (SGString<int16_t>);
00199         case PT_UINT16: return sizeof (SGString<uint16_t>);
00200         case PT_INT32: return sizeof (SGString<int32_t>);
00201         case PT_UINT32: return sizeof (SGString<uint32_t>);
00202         case PT_INT64: return sizeof (SGString<int64_t>);
00203         case PT_UINT64: return sizeof (SGString<uint64_t>);
00204         case PT_FLOAT32: return sizeof (SGString<float32_t>);
00205         case PT_FLOAT64: return sizeof (SGString<float64_t>);
00206         case PT_FLOATMAX: return sizeof (SGString<floatmax_t>);
00207         case PT_COMPLEX128:
00208             SG_SWARNING("TGSDataType::sizeof_stype(): Strings are"
00209                 " not supported for complex128_t\n");
00210             return -1;
00211         case PT_SGOBJECT:
00212             SG_SWARNING("TGSDataType::sizeof_stype(): Strings are"
00213                 " not supported for SGObject\n");
00214             return -1;
00215         case PT_UNDEFINED: default:
00216             SG_SERROR("Implementation error: undefined primitive type\n");
00217             break;
00218         }
00219         break;
00220     case ST_SPARSE:
00221         switch (ptype) {
00222         case PT_BOOL: return sizeof (SGSparseVector<bool>);
00223         case PT_CHAR: return sizeof (SGSparseVector<char>);
00224         case PT_INT8: return sizeof (SGSparseVector<int8_t>);
00225         case PT_UINT8: return sizeof (SGSparseVector<uint8_t>);
00226         case PT_INT16: return sizeof (SGSparseVector<int16_t>);
00227         case PT_UINT16: return sizeof (SGSparseVector<uint16_t>);
00228         case PT_INT32: return sizeof (SGSparseVector<int32_t>);
00229         case PT_UINT32: return sizeof (SGSparseVector<uint32_t>);
00230         case PT_INT64: return sizeof (SGSparseVector<int64_t>);
00231         case PT_UINT64: return sizeof (SGSparseVector<uint64_t>);
00232         case PT_FLOAT32: return sizeof (SGSparseVector<float32_t>);
00233         case PT_FLOAT64: return sizeof (SGSparseVector<float64_t>);
00234         case PT_FLOATMAX: return sizeof (SGSparseVector<floatmax_t>);
00235         case PT_COMPLEX128: return sizeof (SGSparseVector<complex128_t>);
00236         case PT_SGOBJECT: return -1;
00237         case PT_UNDEFINED: default:
00238             SG_SERROR("Implementation error: undefined primitive type\n");
00239             break;
00240         }
00241         break;
00242     case ST_UNDEFINED: default:
00243         SG_SERROR("Implementation error: undefined structure type\n");
00244         break;
00245     }
00246 
00247     return -1;
00248 }
00249 
00250 size_t
00251 TSGDataType::sizeof_ptype(EPrimitiveType ptype)
00252 {
00253     switch (ptype) {
00254     case PT_BOOL: return sizeof (bool);
00255     case PT_CHAR: return sizeof (char);
00256     case PT_INT8: return sizeof (int8_t);
00257     case PT_UINT8: return sizeof (uint8_t);
00258     case PT_INT16: return sizeof (int16_t);
00259     case PT_UINT16: return sizeof (uint16_t);
00260     case PT_INT32: return sizeof (int32_t);
00261     case PT_UINT32: return sizeof (uint32_t);
00262     case PT_INT64: return sizeof (int64_t);
00263     case PT_UINT64: return sizeof (uint64_t);
00264     case PT_FLOAT32: return sizeof (float32_t);
00265     case PT_FLOAT64: return sizeof (float64_t);
00266     case PT_FLOATMAX: return sizeof (floatmax_t);
00267     case PT_COMPLEX128: return sizeof (complex128_t);
00268     case PT_SGOBJECT: return sizeof (CSGObject*);
00269     case PT_UNDEFINED: default:
00270         SG_SERROR("Implementation error: undefined primitive type\n");
00271         break;
00272     }
00273 
00274     return -1;
00275 }
00276 
00277 size_t
00278 TSGDataType::sizeof_sparseentry(EPrimitiveType ptype)
00279 {
00280     switch (ptype) {
00281     case PT_BOOL: return sizeof (SGSparseVectorEntry<bool>);
00282     case PT_CHAR: return sizeof (SGSparseVectorEntry<char>);
00283     case PT_INT8: return sizeof (SGSparseVectorEntry<int8_t>);
00284     case PT_UINT8: return sizeof (SGSparseVectorEntry<uint8_t>);
00285     case PT_INT16: return sizeof (SGSparseVectorEntry<int16_t>);
00286     case PT_UINT16: return sizeof (SGSparseVectorEntry<uint16_t>);
00287     case PT_INT32: return sizeof (SGSparseVectorEntry<int32_t>);
00288     case PT_UINT32: return sizeof (SGSparseVectorEntry<uint32_t>);
00289     case PT_INT64: return sizeof (SGSparseVectorEntry<int64_t>);
00290     case PT_UINT64: return sizeof (SGSparseVectorEntry<uint64_t>);
00291     case PT_FLOAT32: return sizeof (SGSparseVectorEntry<float32_t>);
00292     case PT_FLOAT64: return sizeof (SGSparseVectorEntry<float64_t>);
00293     case PT_FLOATMAX: return sizeof (SGSparseVectorEntry<floatmax_t>);
00294     case PT_COMPLEX128: return sizeof (SGSparseVectorEntry<complex128_t>);
00295     case PT_SGOBJECT: return -1;
00296     case PT_UNDEFINED: default:
00297         SG_SERROR("Implementation error: undefined primitive type\n");
00298         break;
00299     }
00300 
00301     return -1;
00302 }
00303 
00304 #define ENTRY_OFFSET(k, type)                                   \
00305     ((char*) &((SGSparseVectorEntry<type>*) (k))->entry - (char*) (k))
00306 size_t
00307 TSGDataType::offset_sparseentry(EPrimitiveType ptype)
00308 {
00309     size_t result = -1; void* x = &result;
00310 
00311     switch (ptype) {
00312     case PT_BOOL: result = ENTRY_OFFSET(x, bool); break;
00313     case PT_CHAR: result = ENTRY_OFFSET(x, char); break;
00314     case PT_INT8: result = ENTRY_OFFSET(x, int8_t); break;
00315     case PT_UINT8: result = ENTRY_OFFSET(x, uint8_t); break;
00316     case PT_INT16: result = ENTRY_OFFSET(x, int16_t); break;
00317     case PT_UINT16: result = ENTRY_OFFSET(x, uint16_t); break;
00318     case PT_INT32: result = ENTRY_OFFSET(x, int32_t); break;
00319     case PT_UINT32: result = ENTRY_OFFSET(x, uint32_t); break;
00320     case PT_INT64: result = ENTRY_OFFSET(x, int64_t); break;
00321     case PT_UINT64: result = ENTRY_OFFSET(x, uint64_t); break;
00322     case PT_FLOAT32: result = ENTRY_OFFSET(x, float32_t); break;
00323     case PT_FLOAT64: result = ENTRY_OFFSET(x, float64_t); break;
00324     case PT_FLOATMAX: result = ENTRY_OFFSET(x, floatmax_t); break;
00325     case PT_COMPLEX128: result = ENTRY_OFFSET(x, complex128_t); break;
00326     case PT_SGOBJECT: return -1;
00327     case PT_UNDEFINED: default:
00328         SG_SERROR("Implementation error: undefined primitive type\n");
00329         break;
00330     }
00331 
00332     return result;
00333 }
00334 
00335 void
00336 TSGDataType::stype_to_string(char* dest, EStructType stype,
00337                              EPrimitiveType ptype, size_t n)
00338 {
00339     char* p = dest;
00340 
00341     switch (stype) {
00342     case ST_NONE: strncpy(p, "", n); break;
00343     case ST_STRING: strncpy(p, "String<", n); break;
00344     case ST_SPARSE: strncpy(p, "Sparse<", n); break;
00345     case ST_UNDEFINED: default:
00346         SG_SERROR("Implementation error: undefined structure type\n");
00347         break;
00348     }
00349 
00350     size_t np = strlen(p);
00351     ptype_to_string(p + np, ptype, n - np - 2);
00352 
00353     switch (stype) {
00354     case ST_NONE: break;
00355     case ST_STRING: case ST_SPARSE:
00356         strcat(p, ">"); break;
00357     case ST_UNDEFINED: default:
00358         SG_SERROR("Implementation error: undefined structure type\n");
00359         break;
00360     }
00361 }
00362 
00363 void
00364 TSGDataType::ptype_to_string(char* dest, EPrimitiveType ptype,
00365                              size_t n)
00366 {
00367     char* p = dest;
00368 
00369     switch (ptype) {
00370     case PT_BOOL: strncpy(p, "bool", n); break;
00371     case PT_CHAR: strncpy(p, "char", n); break;
00372     case PT_INT8: strncpy(p, "int8", n); break;
00373     case PT_UINT8: strncpy(p, "uint8", n); break;
00374     case PT_INT16: strncpy(p, "int16", n); break;
00375     case PT_UINT16: strncpy(p, "uint16", n); break;
00376     case PT_INT32: strncpy(p, "int32", n); break;
00377     case PT_UINT32: strncpy(p, "uint32", n); break;
00378     case PT_INT64: strncpy(p, "int64", n); break;
00379     case PT_UINT64: strncpy(p, "uint64", n); break;
00380     case PT_FLOAT32: strncpy(p, "float32", n); break;
00381     case PT_FLOAT64: strncpy(p, "float64", n); break;
00382     case PT_FLOATMAX: strncpy(p, "floatmax", n); break;
00383     case PT_COMPLEX128: strncpy(p, "complex128", n); break;
00384     case PT_SGOBJECT: strncpy(p, "SGSerializable*", n); break;
00385     case PT_UNDEFINED: default:
00386         SG_SERROR("Implementation error: undefined primitive type\n");
00387         break;
00388     }
00389 }
00390 
00391 bool
00392 TSGDataType::string_to_ptype(EPrimitiveType* ptype, const char* str)
00393 {
00394     if (strcmp(str, "bool") == 0) {
00395         *ptype = PT_BOOL; return true; }
00396     if (strcmp(str, "char") == 0) {
00397         *ptype = PT_CHAR; return true; }
00398     if (strcmp(str, "int8") == 0) {
00399         *ptype = PT_INT8; return true; }
00400     if (strcmp(str, "uint8") == 0) {
00401         *ptype = PT_UINT8; return true; }
00402     if (strcmp(str, "int16") == 0) {
00403         *ptype = PT_INT16; return true; }
00404     if (strcmp(str, "uint16") == 0) {
00405         *ptype = PT_UINT16; return true; }
00406     if (strcmp(str, "int32") == 0) {
00407         *ptype = PT_INT32; return true; }
00408     if (strcmp(str, "uint32") == 0) {
00409         *ptype = PT_UINT32; return true; }
00410     if (strcmp(str, "int64") == 0) {
00411         *ptype = PT_INT64; return true; }
00412     if (strcmp(str, "uint64") == 0) {
00413         *ptype = PT_UINT64; return true; }
00414     if (strcmp(str, "float32") == 0) {
00415         *ptype = PT_FLOAT32; return true; }
00416     if (strcmp(str, "float64") == 0) {
00417         *ptype = PT_FLOAT64; return true; }
00418     if (strcmp(str, "floatmax") == 0) {
00419         *ptype = PT_FLOATMAX; return true; }
00420     if (strcmp(str, "complex128") == 0) {
00421         *ptype = PT_COMPLEX128; return true; }
00422     if (strcmp(str, "SGSerializable*") == 0) {
00423         *ptype = PT_SGOBJECT; return true; }
00424 
00425     /* Make sure that the compiler will warn at this position.  */
00426     switch (*ptype) {
00427     case PT_BOOL: case PT_CHAR: case PT_INT8: case PT_UINT8:
00428     case PT_INT16: case PT_UINT16: case PT_INT32: case PT_UINT32:
00429     case PT_INT64: case PT_UINT64: case PT_FLOAT32: case PT_FLOAT64:
00430     case PT_FLOATMAX: case PT_COMPLEX128: case PT_SGOBJECT: break;
00431     case PT_UNDEFINED: default:
00432         SG_SERROR("Implementation error: undefined primitive type\n");
00433         break;
00434     }
00435 
00436     return false;
00437 }
00438 
00439 size_t TSGDataType::get_size()
00440 {
00441     switch (m_stype)
00442     {
00443         case ST_NONE:
00444             return get_num_elements()*sizeof_ptype();
00445         case ST_STRING:
00446             if (m_ptype==PT_SGOBJECT)
00447                 return 0;
00448 
00449             return get_num_elements()*sizeof_stype();
00450         case ST_SPARSE:
00451             if (m_ptype==PT_SGOBJECT)
00452                 return 0;
00453 
00454             return get_num_elements()*sizeof_sparseentry(m_ptype);
00455         case ST_UNDEFINED: default:
00456             SG_SERROR("Implementation error: undefined structure type\n");
00457             break;
00458     }
00459 
00460     return 0;
00461 }
00462 
00463 int64_t TSGDataType::get_num_elements()
00464 {
00465     switch (m_ctype)
00466     {
00467         case CT_SCALAR:
00468             return 1;
00469         case CT_VECTOR: case CT_SGVECTOR:
00470             /* length_y contains the length for vectors */
00471             return *m_length_y;
00472         case CT_MATRIX: case CT_SGMATRIX:
00473             return (*m_length_y)*(*m_length_x);
00474         case CT_NDARRAY:
00475             SG_SNOTIMPLEMENTED
00476         case CT_UNDEFINED: default:
00477             SG_SERROR("Implementation error: undefined container type\n");
00478             break;
00479     }
00480     return 0;
00481 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation