SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 */ 00010 00011 #include <shogun/features/RealFileFeatures.h> 00012 #include <shogun/features/Features.h> 00013 #include <shogun/io/SGIO.h> 00014 #include <shogun/lib/memory.h> 00015 00016 #include <stdio.h> 00017 #include <string.h> 00018 00019 using namespace shogun; 00020 00021 CRealFileFeatures::CRealFileFeatures() 00022 { 00023 SG_UNSTABLE("CRealFileFeatures::CRealFileFeatures()", "\n") 00024 init(); 00025 } 00026 00027 CRealFileFeatures::CRealFileFeatures(int32_t size, char* fname) 00028 : CDenseFeatures<float64_t>(size) 00029 { 00030 init(); 00031 00032 working_file=fopen(fname, "r"); 00033 working_filename=get_strdup(fname); 00034 ASSERT(working_file) 00035 status=load_base_data(); 00036 } 00037 00038 CRealFileFeatures::CRealFileFeatures(int32_t size, FILE* file) 00039 : CDenseFeatures<float64_t>(size) 00040 { 00041 init(); 00042 00043 ASSERT(working_file) 00044 status=load_base_data(); 00045 } 00046 00047 void CRealFileFeatures::init() 00048 { 00049 working_file=NULL; 00050 working_filename=get_strdup(""); 00051 intlen=0; 00052 doublelen=0; 00053 endian=0; 00054 fourcc=0; 00055 preprocd=0; 00056 labels=NULL; 00057 status=false; 00058 00059 unset_generic(); 00060 } 00061 00062 CRealFileFeatures::~CRealFileFeatures() 00063 { 00064 SG_FREE(working_filename); 00065 SG_FREE(labels); 00066 } 00067 00068 CRealFileFeatures::CRealFileFeatures(const CRealFileFeatures & orig) 00069 : CDenseFeatures<float64_t>(orig), working_file(orig.working_file), status(orig.status) 00070 { 00071 if (orig.working_filename) 00072 working_filename=get_strdup(orig.working_filename); 00073 if (orig.labels && get_num_vectors()) 00074 { 00075 labels=SG_MALLOC(int32_t, get_num_vectors()); 00076 memcpy(labels, orig.labels, sizeof(int32_t)*get_num_vectors()); 00077 } 00078 } 00079 00080 float64_t* CRealFileFeatures::compute_feature_vector( 00081 int32_t num, int32_t &len, float64_t* target) 00082 { 00083 ASSERT(num<num_vectors) 00084 len=num_features; 00085 float64_t* featurevector=target; 00086 if (!featurevector) 00087 featurevector=SG_MALLOC(float64_t, num_features); 00088 ASSERT(working_file) 00089 fseek(working_file, filepos+num_features*doublelen*num, SEEK_SET); 00090 ASSERT(fread(featurevector, doublelen, num_features, working_file)==(size_t) num_features) 00091 return featurevector; 00092 } 00093 00094 float64_t* CRealFileFeatures::load_feature_matrix() 00095 { 00096 ASSERT(working_file) 00097 fseek(working_file, filepos, SEEK_SET); 00098 free_feature_matrix(); 00099 00100 SG_INFO("allocating feature matrix of size %.2fM\n", sizeof(double)*num_features*num_vectors/1024.0/1024.0) 00101 free_feature_matrix(); 00102 feature_matrix=SGMatrix<float64_t>(num_features,num_vectors); 00103 00104 SG_INFO("loading... be patient.\n") 00105 00106 for (int32_t i=0; i<(int32_t) num_vectors; i++) 00107 { 00108 if (!(i % (num_vectors/10+1))) 00109 SG_PRINT("%02d%%.", (int) (100.0*i/num_vectors)) 00110 else if (!(i % (num_vectors/200+1))) 00111 SG_PRINT(".") 00112 00113 ASSERT(fread(&feature_matrix.matrix[num_features*i], doublelen, num_features, working_file)==(size_t) num_features) 00114 } 00115 SG_DONE() 00116 00117 return feature_matrix.matrix; 00118 } 00119 00120 int32_t CRealFileFeatures::get_label(int32_t idx) 00121 { 00122 ASSERT(idx<num_vectors) 00123 if (labels) 00124 return labels[idx]; 00125 return 0; 00126 } 00127 00128 bool CRealFileFeatures::load_base_data() 00129 { 00130 ASSERT(working_file) 00131 uint32_t num_vec=0; 00132 uint32_t num_feat=0; 00133 00134 ASSERT(fread(&intlen, sizeof(uint8_t), 1, working_file)==1) 00135 ASSERT(fread(&doublelen, sizeof(uint8_t), 1, working_file)==1) 00136 ASSERT(fread(&endian, (uint32_t) intlen, 1, working_file)== 1) 00137 ASSERT(fread(&fourcc, (uint32_t) intlen, 1, working_file)==1) 00138 ASSERT(fread(&num_vec, (uint32_t) intlen, 1, working_file)==1) 00139 ASSERT(fread(&num_feat, (uint32_t) intlen, 1, working_file)==1) 00140 ASSERT(fread(&preprocd, (uint32_t) intlen, 1, working_file)==1) 00141 SG_INFO("detected: intsize=%d, doublesize=%d, num_vec=%d, num_feat=%d, preprocd=%d\n", intlen, doublelen, num_vec, num_feat, preprocd) 00142 filepos=ftell(working_file); 00143 set_num_vectors(num_vec); 00144 set_num_features(num_feat); 00145 fseek(working_file, filepos+num_features*num_vectors*doublelen, SEEK_SET); 00146 SG_FREE(labels); 00147 labels=SG_MALLOC(int, num_vec); 00148 ASSERT(fread(labels, intlen, num_vec, working_file) == num_vec) 00149 return true; 00150 }