SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
SerializableHdf5Reader00.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2010 Soeren Sonnenburg
00008  * Copyright (C) 2010 Berlin Institute of Technology
00009  */
00010 
00011 #include <shogun/lib/config.h>
00012 #ifdef HAVE_HDF5
00013 
00014 #include <shogun/io/SerializableHdf5Reader00.h>
00015 
00016 using namespace shogun;
00017 
00018 SerializableHdf5Reader00::SerializableHdf5Reader00(
00019     CSerializableHdf5File* file) { m_file = file; }
00020 
00021 SerializableHdf5Reader00::~SerializableHdf5Reader00() {}
00022 
00023 bool
00024 SerializableHdf5Reader00::read_scalar_wrapped(
00025     const TSGDataType* type, void* param)
00026 {
00027     /* note: param may well be NULL. This doesnt hurt if m->y or m->x are -1 */
00028     ASSERT(type);
00029 
00030     CSerializableHdf5File::type_item_t* m
00031         = m_file->m_stack_type.back();
00032 
00033     switch (type->m_stype) {
00034     case ST_NONE:
00035         if (m->y != 0 || m->x != 0) return true;
00036         break;
00037     case ST_STRING:
00038         if (m->y == -1 || m->x == -1) break;
00039 
00040         if (m->sub_y != 0) return true;
00041 
00042         ASSERT(param);
00043         memcpy(param, m->vltype[m->x*m->dims[1] + m->y].p,
00044                m->vltype[m->x*m->dims[1] + m->y].len
00045                *type->sizeof_ptype());
00046 
00047         return true;
00048     case ST_SPARSE:
00049         if (m->sub_y != 0) return true;
00050         break;
00051     case ST_UNDEFINED:
00052         return false;
00053     }
00054 
00055     hid_t mem_type_id;
00056     if ((mem_type_id = CSerializableHdf5File::new_stype2hdf5(
00057              type->m_stype, type->m_ptype)) < 0) return false;
00058 
00059     switch (type->m_stype) {
00060     case ST_NONE:
00061         if (H5Dread(m->dset, mem_type_id, H5S_ALL, H5S_ALL,
00062                     H5P_DEFAULT, param) < 0) return false;
00063         break;
00064     case ST_STRING:
00065         if (H5Dread(m->dset, mem_type_id, H5S_ALL, H5S_ALL,
00066                     H5P_DEFAULT, m->vltype) < 0) return false;
00067         break;
00068     case ST_SPARSE:
00069         if (H5Dread(m->dset, m->dtype, H5S_ALL, H5S_ALL,
00070                     H5P_DEFAULT, m->sparse_ptr) < 0) return false;
00071         break;
00072     case ST_UNDEFINED:
00073         return false;
00074     }
00075 
00076     if (H5Tclose(mem_type_id) < 0) return false;
00077 
00078     return true;
00079 }
00080 
00081 bool
00082 SerializableHdf5Reader00::read_cont_begin_wrapped(
00083     const TSGDataType* type, index_t* len_read_y, index_t* len_read_x)
00084 {
00085     CSerializableHdf5File::type_item_t* m
00086         = m_file->m_stack_type.back();
00087 
00088     if (type->m_ptype != PT_SGOBJECT) {
00089         switch (type->m_ctype) {
00090         case CT_NDARRAY:
00091             SG_NOTIMPLEMENTED
00092         case CT_SCALAR:
00093             SG_ERROR("read_cont_begin_wrapped(): Implementation error"
00094                      " during writing Hdf5File (0)!");
00095             return false;
00096         case CT_VECTOR: case CT_SGVECTOR: *len_read_y = m->dims[0]; break;
00097         case CT_MATRIX: case CT_SGMATRIX:
00098             *len_read_x = m->dims[0]; *len_read_y = m->dims[1];
00099             break;
00100         default: return false;
00101         }
00102 
00103         return true;
00104     }
00105 
00106     if (!m_file->attr_exists(STR_IS_CONT)) return false;
00107 
00108     string_t ctype_buf, buf;
00109     type->to_string(ctype_buf, STRING_LEN);
00110     if (!m_file->attr_read_string(STR_CTYPE_NAME, buf, STRING_LEN))
00111         return false;
00112     if (strcmp(ctype_buf, buf) != 0) return false;
00113 
00114     switch (type->m_ctype) {
00115     case CT_NDARRAY:
00116         SG_NOTIMPLEMENTED
00117     case CT_SCALAR:
00118         SG_ERROR("read_cont_begin_wrapped(): Implementation error"
00119                  " during writing Hdf5File (1)!");
00120         return false;
00121     case CT_MATRIX: case CT_SGMATRIX:
00122         if (!m_file->attr_read_scalar(TYPE_INDEX, STR_LENGTH_X,
00123                                       len_read_x))
00124             return false;
00125         /* break;  */
00126     case CT_VECTOR: case CT_SGVECTOR:
00127         if (!m_file->attr_read_scalar(TYPE_INDEX, STR_LENGTH_Y,
00128                                       len_read_y))
00129             return false;
00130         break;
00131     default: return false;
00132     }
00133 
00134     return true;
00135 }
00136 
00137 bool
00138 SerializableHdf5Reader00::read_cont_end_wrapped(
00139     const TSGDataType* type, index_t len_read_y, index_t len_read_x)
00140 {
00141     return true;
00142 }
00143 
00144 bool
00145 SerializableHdf5Reader00::read_string_begin_wrapped(
00146     const TSGDataType* type, index_t* length)
00147 {
00148     CSerializableHdf5File::type_item_t* m
00149         = m_file->m_stack_type.back();
00150 
00151     if (m->y == 0 && m->x == 0) {
00152         m->y = -1; m->x = -1;
00153         read_scalar_wrapped(type, NULL);
00154         m->y = 0; m->x = 0;
00155     }
00156 
00157     *length = m->vltype[m->x*m->dims[1] + m->y].len;
00158 
00159     return true;
00160 }
00161 
00162 bool
00163 SerializableHdf5Reader00::read_string_end_wrapped(
00164     const TSGDataType* type, index_t length)
00165 {
00166     return true;
00167 }
00168 
00169 bool
00170 SerializableHdf5Reader00::read_stringentry_begin_wrapped(
00171     const TSGDataType* type, index_t y)
00172 {
00173     CSerializableHdf5File::type_item_t* m
00174         = m_file->m_stack_type.back();
00175 
00176     m->sub_y = y;
00177 
00178     return true;
00179 }
00180 
00181 bool
00182 SerializableHdf5Reader00::read_stringentry_end_wrapped(
00183     const TSGDataType* type, index_t y)
00184 {
00185     return true;
00186 }
00187 
00188 bool
00189 SerializableHdf5Reader00::read_sparse_begin_wrapped(
00190     const TSGDataType* type, index_t* length)
00191 {
00192     CSerializableHdf5File::type_item_t* m_prev
00193         = m_file->m_stack_type.back();
00194 
00195     if(!m_file->dspace_select(type->m_ctype, m_prev->y, m_prev->x))
00196         return false;
00197 
00198     CSerializableHdf5File::type_item_t* m = new CSerializableHdf5File
00199         ::type_item_t(m_prev->name);
00200     m_file->m_stack_type.push_back(m);
00201 
00202     /* ************************************************************ */
00203 
00204     if (!m_file->group_open(m->name, STR_GROUP_PREFIX)) return false;
00205     if (!m_file->attr_exists(STR_IS_SPARSE)) return false;
00206 
00207     string_t name;
00208     CSerializableHdf5File::index2string(
00209         name, STRING_LEN, type->m_ctype, m_prev->y, m_prev->x);
00210     if ((m->dset = H5Dopen2(m_file->m_stack_h5stream.back(), name,
00211                             H5P_DEFAULT)) < 0)
00212         return false;
00213 
00214     if ((m->dtype = H5Dget_type(m->dset)) < 0) return false;
00215     if (!CSerializableHdf5File::isequal_stype2hdf5(
00216             type->m_stype, type->m_ptype, m->dtype)) return false;
00217 
00218     if ((m->dspace = H5Dget_space(m->dset)) < 0) return false;
00219     if (H5Sget_simple_extent_ndims(m->dspace) != 1) return false;
00220 
00221 
00222     if ((m->rank = H5Sget_simple_extent_dims(m->dspace, m->dims, NULL)
00223             ) < 0) return false;
00224 
00225     if (H5Sget_simple_extent_type(m->dspace) != H5S_NULL
00226         && m->rank != 1) return false;
00227 
00228     *length = m->dims[0];
00229 
00230     /* ************************************************************ */
00231 
00232     char* buf = SG_MALLOC(char, CSerializableHdf5File::sizeof_sparsetype());
00233 
00234     hid_t mem_type_id;
00235     if ((mem_type_id = CSerializableHdf5File::new_sparsetype()) < 0)
00236         return false;
00237 
00238     hid_t mem_space_id;
00239     if ((mem_space_id = H5Screate_simple(0, NULL, NULL)) < 0)
00240         return false;
00241 
00242     if (H5Dread(m_prev->dset, mem_type_id, mem_space_id,
00243                 m_prev->dspace, H5P_DEFAULT, buf) < 0) return false;
00244 
00245     if (H5Sclose(mem_space_id) < 0) return false;
00246     if (H5Tclose(mem_type_id) < 0) return false;
00247 
00248     delete buf;
00249 
00250     return true;
00251 }
00252 
00253 bool
00254 SerializableHdf5Reader00::read_sparse_end_wrapped(
00255     const TSGDataType* type, index_t length)
00256 {
00257     if (!m_file->group_close()) return false;
00258 
00259     delete m_file->m_stack_type.back();
00260     m_file->m_stack_type.pop_back();
00261 
00262     return true;
00263 }
00264 
00265 bool
00266 SerializableHdf5Reader00::read_sparseentry_begin_wrapped(
00267     const TSGDataType* type, SGSparseVectorEntry<char>* first_entry,
00268     index_t* feat_index, index_t y)
00269 {
00270     CSerializableHdf5File::type_item_t* m
00271         = m_file->m_stack_type.back();
00272 
00273     m->sparse_ptr = first_entry;
00274     m->sub_y = y;
00275 
00276     return true;
00277 }
00278 
00279 bool
00280 SerializableHdf5Reader00::read_sparseentry_end_wrapped(
00281     const TSGDataType* type, SGSparseVectorEntry<char>* first_entry,
00282     index_t* feat_index, index_t y)
00283 {
00284     return true;
00285 }
00286 
00287 bool
00288 SerializableHdf5Reader00::read_item_begin_wrapped(
00289     const TSGDataType* type, index_t y, index_t x)
00290 {
00291     CSerializableHdf5File::type_item_t* m
00292         = m_file->m_stack_type.back();
00293     m->y = y; m->x = x;
00294 
00295     if (type->m_ptype != PT_SGOBJECT) return true;
00296 
00297     string_t name;
00298     if (!CSerializableHdf5File::index2string(
00299             name, STRING_LEN, type->m_ctype, y, x)) return false;
00300     if (!m_file->group_open(name, "")) return false;
00301 
00302     return true;
00303 }
00304 
00305 bool
00306 SerializableHdf5Reader00::read_item_end_wrapped(
00307     const TSGDataType* type, index_t y, index_t x)
00308 {
00309     if (type->m_ptype == PT_SGOBJECT)
00310         if (!m_file->group_close()) return false;
00311 
00312     return true;
00313 }
00314 
00315 bool
00316 SerializableHdf5Reader00::read_sgserializable_begin_wrapped(
00317     const TSGDataType* type, char* sgserializable_name,
00318     EPrimitiveType* generic)
00319 {
00320     if (!m_file->attr_exists(STR_IS_SGSERIALIZABLE)) return false;
00321 
00322     if (m_file->attr_exists(STR_IS_NULL)) {
00323         *sgserializable_name = '\0'; return true;
00324     }
00325 
00326     if (!m_file->attr_read_string(
00327             STR_INSTANCE_NAME, sgserializable_name, STRING_LEN))
00328         return false;
00329 
00330     if (m_file->attr_exists(STR_GENERIC_NAME)) {
00331         string_t buf;
00332         if (!m_file->attr_read_string(
00333                 STR_GENERIC_NAME, buf, STRING_LEN)) return false;
00334         if (!TSGDataType::string_to_ptype(generic, buf))
00335             return false;
00336     }
00337 
00338     return true;
00339 }
00340 
00341 bool
00342 SerializableHdf5Reader00::read_sgserializable_end_wrapped(
00343     const TSGDataType* type, const char* sgserializable_name,
00344     EPrimitiveType generic)
00345 {
00346     return true;
00347 }
00348 
00349 bool
00350 SerializableHdf5Reader00::read_type_begin_wrapped(
00351     const TSGDataType* type, const char* name, const char* prefix)
00352 {
00353     CSerializableHdf5File::type_item_t* m = new CSerializableHdf5File
00354         ::type_item_t(name);
00355     m_file->m_stack_type.push_back(m);
00356 
00357     if (type->m_ptype == PT_SGOBJECT) {
00358         if (!m_file->group_open(name, "")) return false;
00359         return true;
00360     }
00361 
00362     if ((m->dset = H5Dopen2(m_file->m_stack_h5stream.back(), name,
00363                             H5P_DEFAULT)) < 0)
00364         return false;
00365 
00366     if ((m->dtype = H5Dget_type(m->dset)) < 0) return false;
00367     if (!CSerializableHdf5File::isequal_stype2hdf5(
00368             type->m_stype, type->m_ptype, m->dtype)) return false;
00369 
00370     if ((m->dspace = H5Dget_space(m->dset)) < 0) return false;
00371 
00372     if (H5Sget_simple_extent_ndims(m->dspace) > 2) return false;
00373     if ((m->rank = H5Sget_simple_extent_dims(m->dspace, m->dims, NULL)
00374             ) < 0) return false;
00375 
00376     switch (type->m_ctype) {
00377     case CT_NDARRAY:
00378         SG_NOTIMPLEMENTED
00379     case CT_SCALAR:
00380         if (m->rank != 0) return false;
00381         if (type->m_stype == ST_STRING) m->vltype = SG_MALLOC(hvl_t, 1);
00382         break;
00383     case CT_VECTOR: case CT_SGVECTOR:
00384         if (H5Sget_simple_extent_type(m->dspace) != H5S_NULL
00385             && m->rank != 1) return false;
00386         if (type->m_stype == ST_STRING)
00387             m->vltype = SG_MALLOC(hvl_t, m->dims[0]);
00388         break;
00389     case CT_MATRIX: case CT_SGMATRIX:
00390         if (H5Sget_simple_extent_type(m->dspace) != H5S_NULL
00391             && m->rank != 2) return false;
00392         if (type->m_stype == ST_STRING)
00393             m->vltype = SG_MALLOC(hvl_t, m->dims[0] *m->dims[1]);
00394         break;
00395     default: return false;
00396     }
00397 
00398     return true;
00399 }
00400 
00401 bool
00402 SerializableHdf5Reader00::read_type_end_wrapped(
00403     const TSGDataType* type, const char* name, const char* prefix)
00404 {
00405     if (type->m_ptype == PT_SGOBJECT)
00406         if (!m_file->group_close()) return false;
00407 
00408     delete m_file->m_stack_type.back();
00409     m_file->m_stack_type.pop_back();
00410     return true;
00411 }
00412 
00413 #endif /* HAVE_HDF5  */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation