SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2009 Christian Widmer 00008 * Copyright (C) 2009 Max-Planck-Society 00009 */ 00010 00011 #ifndef _MULTITASKKERNELNORMALIZER_H___ 00012 #define _MULTITASKKERNELNORMALIZER_H___ 00013 00014 #include <shogun/kernel/normalizer/KernelNormalizer.h> 00015 #include <shogun/kernel/Kernel.h> 00016 #include <algorithm> 00017 #include <vector> 00018 00019 00020 00021 namespace shogun 00022 { 00032 class CMultitaskKernelNormalizer: public CKernelNormalizer 00033 { 00034 00035 public: 00036 00039 CMultitaskKernelNormalizer() : CKernelNormalizer(), scale(1.0) 00040 { 00041 } 00042 00047 CMultitaskKernelNormalizer(std::vector<int32_t> task_vector) 00048 : CKernelNormalizer(), scale(1.0) 00049 { 00050 00051 num_tasks = get_num_unique_tasks(task_vector); 00052 00053 // set both sides equally 00054 set_task_vector(task_vector); 00055 00056 // init similarity matrix 00057 similarity_matrix = std::vector<float64_t>(num_tasks * num_tasks); 00058 00059 } 00060 00062 virtual ~CMultitaskKernelNormalizer() 00063 { 00064 } 00065 00068 virtual bool init(CKernel* k) 00069 { 00070 00071 //same as first-element normalizer 00072 CFeatures* old_lhs=k->lhs; 00073 CFeatures* old_rhs=k->rhs; 00074 k->lhs=old_lhs; 00075 k->rhs=old_lhs; 00076 00077 if (strcmp(k->get_name(), "WeightedDegree") == 0) { 00078 SG_INFO("using first-element normalization\n") 00079 scale=k->compute(0, 0); 00080 } else { 00081 SG_INFO("no inner normalization for non-WDK kernel\n") 00082 scale=1.0; 00083 } 00084 00085 k->lhs=old_lhs; 00086 k->rhs=old_rhs; 00087 00088 ASSERT(k) 00089 int32_t num_lhs = k->get_num_vec_lhs(); 00090 int32_t num_rhs = k->get_num_vec_rhs(); 00091 ASSERT(num_lhs>0) 00092 ASSERT(num_rhs>0) 00093 00094 //std::cout << "scale: " << scale << std::endl; 00095 00096 return true; 00097 } 00098 00104 int32_t get_num_unique_tasks(std::vector<int32_t> vec) { 00105 00106 //sort 00107 std::sort(vec.begin(), vec.end()); 00108 00109 //reorder tasks with unique prefix 00110 std::vector<int32_t>::iterator endLocation = std::unique(vec.begin(), vec.end()); 00111 00112 //count unique tasks 00113 int32_t num_vec = std::distance(vec.begin(), endLocation); 00114 00115 return num_vec; 00116 00117 } 00118 00124 virtual float64_t normalize(float64_t value, int32_t idx_lhs, 00125 int32_t idx_rhs) 00126 { 00127 00128 //lookup tasks 00129 int32_t task_idx_lhs = task_vector_lhs[idx_lhs]; 00130 int32_t task_idx_rhs = task_vector_rhs[idx_rhs]; 00131 00132 //lookup similarity 00133 float64_t task_similarity = get_task_similarity(task_idx_lhs, 00134 task_idx_rhs); 00135 00136 //take task similarity into account 00137 float64_t similarity = (value/scale) * task_similarity; 00138 00139 00140 return similarity; 00141 00142 } 00143 00148 virtual float64_t normalize_lhs(float64_t value, int32_t idx_lhs) 00149 { 00150 SG_ERROR("normalize_lhs not implemented") 00151 return 0; 00152 } 00153 00158 virtual float64_t normalize_rhs(float64_t value, int32_t idx_rhs) 00159 { 00160 SG_ERROR("normalize_rhs not implemented") 00161 return 0; 00162 } 00163 00164 public: 00165 00167 std::vector<int32_t> get_task_vector_lhs() const 00168 { 00169 return task_vector_lhs; 00170 } 00171 00173 void set_task_vector_lhs(std::vector<int32_t> vec) 00174 { 00175 task_vector_lhs = vec; 00176 } 00177 00179 std::vector<int32_t> get_task_vector_rhs() const 00180 { 00181 return task_vector_rhs; 00182 } 00183 00185 void set_task_vector_rhs(std::vector<int32_t> vec) 00186 { 00187 task_vector_rhs = vec; 00188 } 00189 00191 void set_task_vector(std::vector<int32_t> vec) 00192 { 00193 task_vector_lhs = vec; 00194 task_vector_rhs = vec; 00195 } 00196 00202 float64_t get_task_similarity(int32_t task_lhs, int32_t task_rhs) 00203 { 00204 00205 ASSERT(task_lhs < num_tasks && task_lhs >= 0) 00206 ASSERT(task_rhs < num_tasks && task_rhs >= 0) 00207 00208 return similarity_matrix[task_lhs * num_tasks + task_rhs]; 00209 00210 } 00211 00217 void set_task_similarity(int32_t task_lhs, int32_t task_rhs, 00218 float64_t similarity) 00219 { 00220 00221 ASSERT(task_lhs < num_tasks && task_lhs >= 0) 00222 ASSERT(task_rhs < num_tasks && task_rhs >= 0) 00223 00224 similarity_matrix[task_lhs * num_tasks + task_rhs] = similarity; 00225 00226 } 00227 00229 virtual const char* get_name() const 00230 { 00231 return "MultitaskKernelNormalizer"; 00232 } 00233 00238 inline CMultitaskKernelNormalizer* KernelNormalizerToMultitaskKernelNormalizer(CKernelNormalizer* n) 00239 { 00240 return dynamic_cast<CMultitaskKernelNormalizer*>(n); 00241 } 00242 00243 00244 protected: 00245 00247 std::vector<float64_t> similarity_matrix; 00248 00250 int32_t num_tasks; 00251 00253 std::vector<int32_t> task_vector_lhs; 00254 00256 std::vector<int32_t> task_vector_rhs; 00257 00259 float64_t scale; 00260 00261 }; 00262 } 00263 #endif