SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2012 Chiyuan Zhang 00008 * Copyright (C) 2012 Chiyuan Zhang 00009 */ 00010 00011 #include <vector> 00012 #include <limits> 00013 #include <algorithm> 00014 00015 #include <shogun/multiclass/ecoc/ECOCRandomSparseEncoder.h> 00016 #include <shogun/multiclass/ecoc/ECOCUtil.h> 00017 00018 using namespace shogun; 00019 00020 CECOCRandomSparseEncoder::CECOCRandomSparseEncoder(int32_t maxiter, int32_t codelen, 00021 float64_t pzero, float64_t pposone, float64_t pnegone) 00022 :m_maxiter(maxiter), m_codelen(codelen), m_pzero(pzero), m_pposone(pposone), m_pnegone(pnegone) 00023 { 00024 if (!check_probability(pzero, pposone, pnegone)) 00025 SG_ERROR("probability of 0, +1 and -1 must sum to one") 00026 00027 init(); 00028 } 00029 00030 void CECOCRandomSparseEncoder::init() 00031 { 00032 SG_ADD(&m_maxiter, "maxiter", "max number of iterations", MS_NOT_AVAILABLE); 00033 SG_ADD(&m_codelen, "codelen", "code length", MS_NOT_AVAILABLE); 00034 SG_ADD(&m_pzero, "pzero", "probability of 0", MS_NOT_AVAILABLE); 00035 SG_ADD(&m_pposone, "pposone", "probability of +1", MS_NOT_AVAILABLE); 00036 SG_ADD(&m_pnegone, "pnegone", "probability of -1", MS_NOT_AVAILABLE); 00037 } 00038 00039 void CECOCRandomSparseEncoder::set_probability(float64_t pzero, float64_t pposone, float64_t pnegone) 00040 { 00041 if (!check_probability(pzero, pposone, pnegone)) 00042 SG_ERROR("probability of 0, +1 and -1 must sum to one") 00043 00044 m_pzero = pzero; 00045 m_pposone = pposone; 00046 m_pnegone = pnegone; 00047 } 00048 00049 SGMatrix<int32_t> CECOCRandomSparseEncoder::create_codebook(int32_t num_classes) 00050 { 00051 int32_t codelen = m_codelen; 00052 if (codelen <= 0) 00053 codelen = get_default_code_length(num_classes); 00054 00055 00056 SGMatrix<int32_t> best_codebook(codelen, num_classes, true); 00057 int32_t best_dist = 0; 00058 00059 SGMatrix<int32_t> codebook(codelen, num_classes); 00060 std::vector<int32_t> random_sel(num_classes); 00061 int32_t n_iter = 0; 00062 00063 while (true) 00064 { 00065 // fill codebook 00066 codebook.zero(); 00067 for (int32_t i=0; i < codelen; ++i) 00068 { 00069 // randomly select two positions 00070 for (int32_t j=0; j < num_classes; ++j) 00071 random_sel[j] = j; 00072 std::random_shuffle(random_sel.begin(), random_sel.end()); 00073 if (CMath::random(0.0, 1.0) > 0.5) 00074 { 00075 codebook(i, random_sel[0]) = +1; 00076 codebook(i, random_sel[1]) = -1; 00077 } 00078 else 00079 { 00080 codebook(i, random_sel[0]) = -1; 00081 codebook(i, random_sel[1]) = +1; 00082 } 00083 00084 // assign the remaining positions 00085 for (int32_t j=2; j < num_classes; ++j) 00086 { 00087 float64_t randval = CMath::random(0.0, 1.0); 00088 if (randval > m_pzero) 00089 { 00090 if (randval > m_pzero+m_pposone) 00091 codebook(i, random_sel[j]) = -1; 00092 else 00093 codebook(i, random_sel[j]) = +1; 00094 } 00095 } 00096 } 00097 00098 // see if this is a better codebook 00099 // compute the minimum pairwise code distance 00100 int32_t min_dist = std::numeric_limits<int32_t>::max(); 00101 for (int32_t i=0; i < num_classes; ++i) 00102 { 00103 for (int32_t j=i+1; j < num_classes; ++j) 00104 { 00105 int32_t dist = CECOCUtil::hamming_distance(codebook.get_column_vector(i), 00106 codebook.get_column_vector(j), codelen); 00107 if (dist < min_dist) 00108 min_dist = dist; 00109 } 00110 } 00111 00112 if (min_dist > best_dist) 00113 { 00114 best_dist = min_dist; 00115 std::copy(codebook.matrix, codebook.matrix + codelen*num_classes, 00116 best_codebook.matrix); 00117 } 00118 00119 if (++n_iter >= m_maxiter) 00120 break; 00121 } 00122 00123 return best_codebook; 00124 }