SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2010 Soeren Sonnenburg 00008 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 * Copyright (C) 2010 Berlin Institute of Technology 00010 */ 00011 00012 #include <stdio.h> 00013 #include <stdlib.h> 00014 00015 #include <shogun/io/File.h> 00016 #include <shogun/lib/memory.h> 00017 #include <shogun/features/StringFeatures.h> 00018 #include <shogun/features/SparseFeatures.h> 00019 00020 using namespace shogun; 00021 00022 CFile::CFile() : CSGObject() 00023 { 00024 file=NULL; 00025 filename=NULL; 00026 variable_name=NULL; 00027 } 00028 00029 CFile::CFile(FILE* f, const char* name) : CSGObject() 00030 { 00031 file=f; 00032 filename=NULL; 00033 variable_name=NULL; 00034 00035 if (name) 00036 set_variable_name(name); 00037 } 00038 00039 CFile::CFile(int fd, const char* mode, const char* name) : CSGObject() 00040 { 00041 file=fdopen(fd, mode); 00042 filename=NULL; 00043 variable_name=NULL; 00044 00045 if (name) 00046 set_variable_name(name); 00047 } 00048 00049 CFile::CFile(const char* fname, char rw, const char* name) : CSGObject() 00050 { 00051 variable_name=NULL; 00052 task=rw; 00053 filename=get_strdup(fname); 00054 char mode[2]; 00055 mode[0]=rw; 00056 mode[1]='\0'; 00057 00058 if (rw=='r' || rw == 'w') 00059 { 00060 if (filename) 00061 { 00062 if (!(file=fopen((const char*) filename, (const char*) mode))) 00063 SG_ERROR("Error opening file '%s'\n", filename) 00064 } 00065 } 00066 else 00067 SG_ERROR("unknown mode '%c'\n", mode[0]) 00068 00069 if (name) 00070 set_variable_name(name); 00071 } 00072 00073 void CFile::get_vector(bool*& vector, int32_t& len) 00074 { 00075 int32_t* int_vector; 00076 get_vector(int_vector, len); 00077 00078 ASSERT(len>0) 00079 vector= SG_MALLOC(bool, len); 00080 00081 for (int32_t i=0; i<len; i++) 00082 vector[i]= (int_vector[i]!=0); 00083 00084 SG_FREE(int_vector); 00085 } 00086 00087 void CFile::set_vector(const bool* vector, int32_t len) 00088 { 00089 int32_t* int_vector = SG_MALLOC(int32_t, len); 00090 for (int32_t i=0;i<len;i++) 00091 { 00092 if (vector[i]) 00093 int_vector[i]=1; 00094 else 00095 int_vector[i]=0; 00096 } 00097 set_vector(int_vector,len); 00098 SG_FREE(int_vector); 00099 } 00100 00101 void CFile::get_matrix(bool*& matrix, int32_t& num_feat, int32_t& num_vec) 00102 { 00103 uint8_t * byte_matrix; 00104 get_matrix(byte_matrix,num_feat,num_vec); 00105 00106 ASSERT(num_feat > 0 && num_vec > 0) 00107 matrix = SG_MALLOC(bool, num_feat*num_vec); 00108 00109 for(int32_t i = 0;i < num_vec;i++) 00110 { 00111 for(int32_t j = 0;j < num_feat;j++) 00112 matrix[i*num_feat+j] = byte_matrix[i*num_feat+j] != 0 ? 1 : 0; 00113 } 00114 00115 SG_FREE(byte_matrix); 00116 } 00117 00118 void CFile::set_matrix(const bool* matrix, int32_t num_feat, int32_t num_vec) 00119 { 00120 uint8_t * byte_matrix = SG_MALLOC(uint8_t, num_feat*num_vec); 00121 for(int32_t i = 0;i < num_vec;i++) 00122 { 00123 for(int32_t j = 0;j < num_feat;j++) 00124 byte_matrix[i*num_feat+j] = matrix[i*num_feat+j] != 0 ? 1 : 0; 00125 } 00126 00127 set_matrix(byte_matrix,num_feat,num_vec); 00128 00129 SG_FREE(byte_matrix); 00130 } 00131 00132 void CFile::get_string_list( 00133 SGString<bool>*& strings, int32_t& num_str, 00134 int32_t& max_string_len) 00135 { 00136 SGString<int8_t>* strs; 00137 get_string_list(strs, num_str, max_string_len); 00138 00139 ASSERT(num_str>0 && max_string_len>0) 00140 strings=SG_MALLOC(SGString<bool>, num_str); 00141 00142 for(int32_t i = 0;i < num_str;i++) 00143 { 00144 strings[i].slen = strs[i].slen; 00145 strings[i].string = SG_MALLOC(bool, strs[i].slen); 00146 for(int32_t j = 0;j < strs[i].slen;j++) 00147 strings[i].string[j] = strs[i].string[j] != 0 ? 1 : 0; 00148 } 00149 00150 for(int32_t i = 0;i < num_str;i++) 00151 SG_FREE(strs[i].string); 00152 SG_FREE(strs); 00153 } 00154 00155 void CFile::set_string_list(const SGString<bool>* strings, int32_t num_str) 00156 { 00157 SGString<int8_t> * strs = SG_MALLOC(SGString<int8_t>, num_str); 00158 00159 for(int32_t i = 0;i < num_str;i++) 00160 { 00161 strs[i].slen = strings[i].slen; 00162 strs[i].string = SG_MALLOC(int8_t, strings[i].slen); 00163 for(int32_t j = 0;j < strings[i].slen;j++) 00164 strs[i].string[j] = strings[i].string[j] != 0 ? 1 : 0; 00165 } 00166 00167 set_string_list(strs,num_str); 00168 00169 for(int32_t i = 0;i < num_str;i++) 00170 SG_FREE(strs[i].string); 00171 SG_FREE(strs); 00172 } 00173 00174 CFile::~CFile() 00175 { 00176 close(); 00177 } 00178 00179 void CFile::set_variable_name(const char* name) 00180 { 00181 SG_FREE(variable_name); 00182 variable_name=strdup(name); 00183 } 00184 00185 char* CFile::get_variable_name() 00186 { 00187 return strdup(variable_name); 00188 } 00189 00190 #define SPARSE_VECTOR_GETTER(type) \ 00191 void CFile::set_sparse_vector( \ 00192 const SGSparseVectorEntry<type>* entries, int32_t num_feat) \ 00193 { \ 00194 SGSparseVector<type> v((SGSparseVectorEntry<type>*) entries, num_feat, false); \ 00195 set_sparse_matrix(&v, 0, 1); \ 00196 } \ 00197 \ 00198 void CFile::get_sparse_vector( \ 00199 SGSparseVectorEntry<type>*& entries, int32_t& num_feat) \ 00200 { \ 00201 SGSparseVector<type>* v; \ 00202 int32_t dummy; \ 00203 int32_t nvec; \ 00204 get_sparse_matrix(v, dummy, nvec); \ 00205 ASSERT(nvec==1) \ 00206 entries=v->features; \ 00207 num_feat=v->num_feat_entries; \ 00208 } 00209 SPARSE_VECTOR_GETTER(bool) 00210 SPARSE_VECTOR_GETTER(int8_t) 00211 SPARSE_VECTOR_GETTER(uint8_t) 00212 SPARSE_VECTOR_GETTER(char) 00213 SPARSE_VECTOR_GETTER(int32_t) 00214 SPARSE_VECTOR_GETTER(uint32_t) 00215 SPARSE_VECTOR_GETTER(float32_t) 00216 SPARSE_VECTOR_GETTER(float64_t) 00217 SPARSE_VECTOR_GETTER(floatmax_t) 00218 SPARSE_VECTOR_GETTER(int16_t) 00219 SPARSE_VECTOR_GETTER(uint16_t) 00220 SPARSE_VECTOR_GETTER(int64_t) 00221 SPARSE_VECTOR_GETTER(uint64_t) 00222 00223 #undef SPARSE_VECTOR_GETTER 00224 00225 00226 char* CFile::read_whole_file(char* fname, size_t& len) 00227 { 00228 FILE* tmpf=fopen(fname, "r"); 00229 ASSERT(tmpf) 00230 fseek(tmpf,0,SEEK_END); 00231 len=ftell(tmpf); 00232 ASSERT(len>0) 00233 rewind(tmpf); 00234 char* result = SG_MALLOC(char, len); 00235 size_t total=fread(result,1,len,tmpf); 00236 ASSERT(total==len) 00237 fclose(tmpf); 00238 return result; 00239 }