SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
RealFileFeatures.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/features/RealFileFeatures.h>
00012 #include <shogun/features/Features.h>
00013 #include <shogun/io/SGIO.h>
00014 #include <shogun/lib/memory.h>
00015 
00016 #include <stdio.h>
00017 #include <string.h>
00018 
00019 using namespace shogun;
00020 
00021 CRealFileFeatures::CRealFileFeatures()
00022 {
00023     SG_UNSTABLE("CRealFileFeatures::CRealFileFeatures()", "\n")
00024     init();
00025 }
00026 
00027 CRealFileFeatures::CRealFileFeatures(int32_t size, char* fname)
00028 : CDenseFeatures<float64_t>(size)
00029 {
00030     init();
00031 
00032     working_file=fopen(fname, "r");
00033     working_filename=get_strdup(fname);
00034     ASSERT(working_file)
00035     status=load_base_data();
00036 }
00037 
00038 CRealFileFeatures::CRealFileFeatures(int32_t size, FILE* file)
00039 : CDenseFeatures<float64_t>(size)
00040 {
00041     init();
00042 
00043     ASSERT(working_file)
00044     status=load_base_data();
00045 }
00046 
00047 void CRealFileFeatures::init()
00048 {
00049     working_file=NULL;
00050     working_filename=get_strdup("");
00051     intlen=0;
00052     doublelen=0;
00053     endian=0;
00054     fourcc=0;
00055     preprocd=0;
00056     labels=NULL;
00057     status=false;
00058 
00059     unset_generic();
00060 }
00061 
00062 CRealFileFeatures::~CRealFileFeatures()
00063 {
00064     SG_FREE(working_filename);
00065     SG_FREE(labels);
00066 }
00067 
00068 CRealFileFeatures::CRealFileFeatures(const CRealFileFeatures & orig)
00069 : CDenseFeatures<float64_t>(orig), working_file(orig.working_file), status(orig.status)
00070 {
00071     if (orig.working_filename)
00072         working_filename=get_strdup(orig.working_filename);
00073     if (orig.labels && get_num_vectors())
00074     {
00075         labels=SG_MALLOC(int32_t, get_num_vectors());
00076         memcpy(labels, orig.labels, sizeof(int32_t)*get_num_vectors());
00077     }
00078 }
00079 
00080 float64_t* CRealFileFeatures::compute_feature_vector(
00081     int32_t num, int32_t &len, float64_t* target)
00082 {
00083     ASSERT(num<num_vectors)
00084     len=num_features;
00085     float64_t* featurevector=target;
00086     if (!featurevector)
00087         featurevector=SG_MALLOC(float64_t, num_features);
00088     ASSERT(working_file)
00089     fseek(working_file, filepos+num_features*doublelen*num, SEEK_SET);
00090     ASSERT(fread(featurevector, doublelen, num_features, working_file)==(size_t) num_features)
00091     return featurevector;
00092 }
00093 
00094 float64_t* CRealFileFeatures::load_feature_matrix()
00095 {
00096     ASSERT(working_file)
00097     fseek(working_file, filepos, SEEK_SET);
00098     free_feature_matrix();
00099 
00100     SG_INFO("allocating feature matrix of size %.2fM\n", sizeof(double)*num_features*num_vectors/1024.0/1024.0)
00101     free_feature_matrix();
00102     feature_matrix=SGMatrix<float64_t>(num_features,num_vectors);
00103 
00104     SG_INFO("loading... be patient.\n")
00105 
00106     for (int32_t i=0; i<(int32_t) num_vectors; i++)
00107     {
00108         if (!(i % (num_vectors/10+1)))
00109             SG_PRINT("%02d%%.", (int) (100.0*i/num_vectors))
00110         else if (!(i % (num_vectors/200+1)))
00111             SG_PRINT(".")
00112 
00113         ASSERT(fread(&feature_matrix.matrix[num_features*i], doublelen, num_features, working_file)==(size_t) num_features)
00114     }
00115     SG_DONE()
00116 
00117     return feature_matrix.matrix;
00118 }
00119 
00120 int32_t CRealFileFeatures::get_label(int32_t idx)
00121 {
00122     ASSERT(idx<num_vectors)
00123     if (labels)
00124         return labels[idx];
00125     return 0;
00126 }
00127 
00128 bool CRealFileFeatures::load_base_data()
00129 {
00130     ASSERT(working_file)
00131     uint32_t num_vec=0;
00132     uint32_t num_feat=0;
00133 
00134     ASSERT(fread(&intlen, sizeof(uint8_t), 1, working_file)==1)
00135     ASSERT(fread(&doublelen, sizeof(uint8_t), 1, working_file)==1)
00136     ASSERT(fread(&endian, (uint32_t) intlen, 1, working_file)== 1)
00137     ASSERT(fread(&fourcc, (uint32_t) intlen, 1, working_file)==1)
00138     ASSERT(fread(&num_vec, (uint32_t) intlen, 1, working_file)==1)
00139     ASSERT(fread(&num_feat, (uint32_t) intlen, 1, working_file)==1)
00140     ASSERT(fread(&preprocd, (uint32_t) intlen, 1, working_file)==1)
00141     SG_INFO("detected: intsize=%d, doublesize=%d, num_vec=%d, num_feat=%d, preprocd=%d\n", intlen, doublelen, num_vec, num_feat, preprocd)
00142     filepos=ftell(working_file);
00143     set_num_vectors(num_vec);
00144     set_num_features(num_feat);
00145     fseek(working_file, filepos+num_features*num_vectors*doublelen, SEEK_SET);
00146     SG_FREE(labels);
00147     labels=SG_MALLOC(int, num_vec);
00148     ASSERT(fread(labels, intlen, num_vec, working_file) == num_vec)
00149     return true;
00150 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation