SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
File.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2010 Soeren Sonnenburg
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  * Copyright (C) 2010 Berlin Institute of Technology
00010  */
00011 
00012 #include <stdio.h>
00013 #include <stdlib.h>
00014 
00015 #include <shogun/io/File.h>
00016 #include <shogun/lib/memory.h>
00017 #include <shogun/features/StringFeatures.h>
00018 #include <shogun/features/SparseFeatures.h>
00019 
00020 using namespace shogun;
00021 
00022 CFile::CFile() : CSGObject()
00023 {
00024     file=NULL;
00025     filename=NULL;
00026     variable_name=NULL;
00027 }
00028 
00029 CFile::CFile(FILE* f, const char* name) : CSGObject()
00030 {
00031     file=f;
00032     filename=NULL;
00033     variable_name=NULL;
00034 
00035     if (name)
00036         set_variable_name(name);
00037 }
00038 
00039 CFile::CFile(int fd, const char* mode, const char* name) : CSGObject()
00040 {
00041     file=fdopen(fd, mode);
00042     filename=NULL;
00043     variable_name=NULL;
00044 
00045     if (name)
00046         set_variable_name(name);
00047 }
00048 
00049 CFile::CFile(const char* fname, char rw, const char* name) : CSGObject()
00050 {
00051     variable_name=NULL;
00052     task=rw;
00053     filename=get_strdup(fname);
00054     char mode[2];
00055     mode[0]=rw;
00056     mode[1]='\0';
00057 
00058     if (rw=='r' || rw == 'w')
00059     {
00060         if (filename)
00061         {
00062             if (!(file=fopen((const char*) filename, (const char*) mode)))
00063                 SG_ERROR("Error opening file '%s'\n", filename)
00064         }
00065     }
00066     else
00067         SG_ERROR("unknown mode '%c'\n", mode[0])
00068 
00069     if (name)
00070         set_variable_name(name);
00071 }
00072 
00073 void CFile::get_vector(bool*& vector, int32_t& len)
00074 {
00075     int32_t* int_vector;
00076     get_vector(int_vector, len);
00077 
00078     ASSERT(len>0)
00079     vector= SG_MALLOC(bool, len);
00080 
00081     for (int32_t i=0; i<len; i++)
00082         vector[i]= (int_vector[i]!=0);
00083 
00084     SG_FREE(int_vector);
00085 }
00086 
00087 void CFile::set_vector(const bool* vector, int32_t len)
00088 {
00089     int32_t* int_vector = SG_MALLOC(int32_t, len);
00090     for (int32_t i=0;i<len;i++)
00091     {
00092         if (vector[i])
00093             int_vector[i]=1;
00094         else
00095             int_vector[i]=0;
00096     }
00097     set_vector(int_vector,len);
00098     SG_FREE(int_vector);
00099 }
00100 
00101 void CFile::get_matrix(bool*& matrix, int32_t& num_feat, int32_t& num_vec)
00102 {
00103     uint8_t * byte_matrix;
00104     get_matrix(byte_matrix,num_feat,num_vec);
00105 
00106     ASSERT(num_feat > 0 && num_vec > 0)
00107     matrix = SG_MALLOC(bool, num_feat*num_vec);
00108 
00109     for(int32_t i = 0;i < num_vec;i++)
00110     {
00111         for(int32_t j = 0;j < num_feat;j++)
00112             matrix[i*num_feat+j] = byte_matrix[i*num_feat+j] != 0 ? 1 : 0;
00113     }
00114 
00115     SG_FREE(byte_matrix);
00116 }
00117 
00118 void CFile::set_matrix(const bool* matrix, int32_t num_feat, int32_t num_vec)
00119 {
00120     uint8_t * byte_matrix = SG_MALLOC(uint8_t, num_feat*num_vec);
00121     for(int32_t i = 0;i < num_vec;i++)
00122     {
00123         for(int32_t j = 0;j < num_feat;j++)
00124             byte_matrix[i*num_feat+j] = matrix[i*num_feat+j] != 0 ? 1 : 0;
00125     }
00126 
00127     set_matrix(byte_matrix,num_feat,num_vec);
00128 
00129     SG_FREE(byte_matrix);
00130 }
00131 
00132 void CFile::get_string_list(
00133         SGString<bool>*& strings, int32_t& num_str,
00134         int32_t& max_string_len)
00135 {
00136     SGString<int8_t>* strs;
00137     get_string_list(strs, num_str, max_string_len);
00138 
00139     ASSERT(num_str>0 && max_string_len>0)
00140     strings=SG_MALLOC(SGString<bool>, num_str);
00141 
00142     for(int32_t i = 0;i < num_str;i++)
00143     {
00144         strings[i].slen = strs[i].slen;
00145                 strings[i].string = SG_MALLOC(bool, strs[i].slen);
00146         for(int32_t j = 0;j < strs[i].slen;j++)
00147         strings[i].string[j] = strs[i].string[j] != 0 ? 1 : 0;
00148     }
00149 
00150     for(int32_t i = 0;i < num_str;i++)
00151         SG_FREE(strs[i].string);
00152     SG_FREE(strs);
00153 }
00154 
00155 void CFile::set_string_list(const SGString<bool>* strings, int32_t num_str)
00156 {
00157     SGString<int8_t> * strs = SG_MALLOC(SGString<int8_t>, num_str);
00158 
00159     for(int32_t i = 0;i < num_str;i++)
00160     {
00161         strs[i].slen = strings[i].slen;
00162         strs[i].string = SG_MALLOC(int8_t, strings[i].slen);
00163         for(int32_t j = 0;j < strings[i].slen;j++)
00164         strs[i].string[j] = strings[i].string[j] != 0 ? 1 : 0;
00165     }
00166 
00167     set_string_list(strs,num_str);
00168 
00169     for(int32_t i = 0;i < num_str;i++)
00170         SG_FREE(strs[i].string);
00171     SG_FREE(strs);
00172 }
00173 
00174 CFile::~CFile()
00175 {
00176     close();
00177 }
00178 
00179 void CFile::set_variable_name(const char* name)
00180 {
00181     SG_FREE(variable_name);
00182     variable_name=strdup(name);
00183 }
00184 
00185 char* CFile::get_variable_name()
00186 {
00187     return strdup(variable_name);
00188 }
00189 
00190 #define SPARSE_VECTOR_GETTER(type)                                      \
00191 void CFile::set_sparse_vector(                                          \
00192             const SGSparseVectorEntry<type>* entries, int32_t num_feat) \
00193 {                                                                       \
00194     SGSparseVector<type> v((SGSparseVectorEntry<type>*) entries, num_feat, false);  \
00195     set_sparse_matrix(&v, 0, 1);                                        \
00196 }                                                                       \
00197                                                                         \
00198 void CFile::get_sparse_vector(                                          \
00199             SGSparseVectorEntry<type>*& entries, int32_t& num_feat)     \
00200 {                                                                       \
00201     SGSparseVector<type>* v;                                            \
00202     int32_t dummy;                                                      \
00203     int32_t nvec;                                                       \
00204     get_sparse_matrix(v, dummy, nvec);                                  \
00205     ASSERT(nvec==1)                                                 \
00206     entries=v->features;                                                \
00207     num_feat=v->num_feat_entries;                                       \
00208 }
00209 SPARSE_VECTOR_GETTER(bool)
00210 SPARSE_VECTOR_GETTER(int8_t)
00211 SPARSE_VECTOR_GETTER(uint8_t)
00212 SPARSE_VECTOR_GETTER(char)
00213 SPARSE_VECTOR_GETTER(int32_t)
00214 SPARSE_VECTOR_GETTER(uint32_t)
00215 SPARSE_VECTOR_GETTER(float32_t)
00216 SPARSE_VECTOR_GETTER(float64_t)
00217 SPARSE_VECTOR_GETTER(floatmax_t)
00218 SPARSE_VECTOR_GETTER(int16_t)
00219 SPARSE_VECTOR_GETTER(uint16_t)
00220 SPARSE_VECTOR_GETTER(int64_t)
00221 SPARSE_VECTOR_GETTER(uint64_t)
00222 
00223 #undef SPARSE_VECTOR_GETTER
00224 
00225 
00226 char* CFile::read_whole_file(char* fname, size_t& len)
00227 {
00228     FILE* tmpf=fopen(fname, "r");
00229     ASSERT(tmpf)
00230     fseek(tmpf,0,SEEK_END);
00231     len=ftell(tmpf);
00232     ASSERT(len>0)
00233     rewind(tmpf);
00234     char* result = SG_MALLOC(char, len);
00235     size_t total=fread(result,1,len,tmpf);
00236     ASSERT(total==len)
00237     fclose(tmpf);
00238     return result;
00239 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation