SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 2 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2010 Christian Widmer 00008 * Copyright (C) 2010 Max-Planck-Society 00009 */ 00010 00011 #ifndef _MULTITASKKERNELTREENORMALIZER_H___ 00012 #define _MULTITASKKERNELTREENORMALIZER_H___ 00013 00014 #include <shogun/transfer/multitask/MultitaskKernelMklNormalizer.h> 00015 #include <shogun/kernel/Kernel.h> 00016 #include <algorithm> 00017 #include <map> 00018 #include <set> 00019 #include <deque> 00020 #include <vector> 00021 00022 namespace shogun 00023 { 00024 00029 class CNode: public CSGObject 00030 { 00031 public: 00034 CNode() 00035 { 00036 parent = NULL; 00037 beta = 1.0; 00038 node_id = 0; 00039 } 00040 00041 virtual ~CNode() 00042 { 00043 for (size_t i = 0; i < children.size(); i++) 00044 delete children[i]; 00045 } 00046 00050 std::set<CNode*> get_path_root() 00051 { 00052 std::set<CNode*> nodes_on_path = std::set<CNode*>(); 00053 CNode *node = this; 00054 while (node != NULL) { 00055 nodes_on_path.insert(node); 00056 node = node->parent; 00057 } 00058 return nodes_on_path; 00059 } 00060 00064 std::vector<int32_t> get_task_ids_below() 00065 { 00066 00067 std::vector<int32_t> task_ids; 00068 std::deque<CNode*> grey_nodes; 00069 grey_nodes.push_back(this); 00070 00071 while(grey_nodes.size() > 0) 00072 { 00073 00074 CNode *current_node = grey_nodes.front(); 00075 grey_nodes.pop_front(); 00076 00077 for(int32_t i = 0; i!=int32_t(current_node->children.size()); i++){ 00078 grey_nodes.push_back(current_node->children[i]); 00079 } 00080 00081 if(current_node->is_leaf()){ 00082 task_ids.push_back(current_node->getNode_id()); 00083 } 00084 } 00085 00086 return task_ids; 00087 } 00088 00092 void add_child(CNode *node) 00093 { 00094 node->parent = this; 00095 this->children.push_back(node); 00096 } 00097 00099 virtual const char *get_name() const 00100 { 00101 return "Node"; 00102 } 00103 00105 bool is_leaf() 00106 { 00107 return children.empty(); 00108 00109 } 00110 00112 int32_t getNode_id() const 00113 { 00114 return node_id; 00115 } 00116 00118 void setNode_id(int32_t node_idx) 00119 { 00120 this->node_id = node_idx; 00121 } 00122 00124 float64_t beta; 00125 00126 protected: 00127 00129 CNode* parent; 00130 00132 std::vector<CNode*> children; 00133 00135 int32_t node_id; 00136 00137 }; 00138 00139 00144 class CTaxonomy : public CSGObject 00145 { 00146 00147 public: 00148 00151 CTaxonomy() : CSGObject() 00152 { 00153 root = new CNode(); 00154 nodes.push_back(root); 00155 00156 name2id = std::map<std::string, int32_t>(); 00157 name2id["root"] = 0; 00158 } 00159 00160 virtual ~CTaxonomy() 00161 { 00162 for (size_t i = 0; i < nodes.size(); i++) 00163 delete nodes[i]; 00164 nodes.clear(); 00165 name2id.clear(); 00166 task_histogram.clear(); 00167 } 00168 00173 CNode* get_node(int32_t task_id) { 00174 return nodes[task_id]; 00175 } 00176 00180 void set_root_beta(float64_t beta) 00181 { 00182 nodes[0]->beta = beta; 00183 } 00184 00190 CNode* add_node(std::string parent_name, std::string child_name, float64_t beta) 00191 { 00192 if (child_name=="") SG_ERROR("child_name empty") 00193 if (parent_name=="") SG_ERROR("parent_name empty") 00194 00195 00196 CNode* child_node = new CNode(); 00197 00198 child_node->beta = beta; 00199 00200 nodes.push_back(child_node); 00201 int32_t id = nodes.size()-1; 00202 00203 name2id[child_name] = id; 00204 00205 child_node->setNode_id(id); 00206 00207 00208 //create edge 00209 CNode* parent = nodes[name2id[parent_name]]; 00210 00211 parent->add_child(child_node); 00212 00213 return child_node; 00214 } 00215 00220 int32_t get_id(std::string name) { 00221 return name2id[name]; 00222 } 00223 00229 std::set<CNode*> intersect_root_path(CNode* node_lhs, CNode* node_rhs) 00230 { 00231 00232 std::set<CNode*> root_path_lhs = node_lhs->get_path_root(); 00233 std::set<CNode*> root_path_rhs = node_rhs->get_path_root(); 00234 00235 std::set<CNode*> intersection; 00236 00237 std::set_intersection(root_path_lhs.begin(), root_path_lhs.end(), 00238 root_path_rhs.begin(), root_path_rhs.end(), 00239 std::inserter(intersection, intersection.end())); 00240 00241 return intersection; 00242 00243 } 00244 00250 float64_t compute_node_similarity(int32_t task_lhs, int32_t task_rhs) 00251 { 00252 00253 CNode* node_lhs = get_node(task_lhs); 00254 CNode* node_rhs = get_node(task_rhs); 00255 00256 // compute intersection of paths to root 00257 std::set<CNode*> intersection = intersect_root_path(node_lhs, node_rhs); 00258 00259 // sum up weights 00260 float64_t gamma = 0; 00261 for (std::set<CNode*>::const_iterator p = intersection.begin(); p != intersection.end(); ++p) { 00262 00263 gamma += (*p)->beta; 00264 } 00265 00266 return gamma; 00267 00268 } 00269 00273 void update_task_histogram(std::vector<int32_t> task_vector_lhs) { 00274 00275 //empty map 00276 task_histogram.clear(); 00277 00278 00279 //fill map with zeros 00280 for (std::vector<int32_t>::const_iterator it=task_vector_lhs.begin(); it!=task_vector_lhs.end(); it++) 00281 { 00282 task_histogram[*it] = 0.0; 00283 } 00284 00285 //fill map 00286 for (std::vector<int32_t>::const_iterator it=task_vector_lhs.begin(); it!=task_vector_lhs.end(); it++) 00287 { 00288 task_histogram[*it] += 1.0; 00289 } 00290 00291 //compute fractions 00292 for (std::map<int32_t, float64_t>::const_iterator it=task_histogram.begin(); it!=task_histogram.end(); it++) 00293 { 00294 task_histogram[it->first] = task_histogram[it->first] / float64_t(task_vector_lhs.size()); 00295 } 00296 00297 } 00298 00300 int32_t get_num_nodes() 00301 { 00302 return (int32_t)(nodes.size()); 00303 } 00304 00306 int32_t get_num_leaves() 00307 { 00308 int32_t num_leaves = 0; 00309 00310 for (int32_t i=0; i!=get_num_nodes(); i++) 00311 { 00312 if (get_node(i)->is_leaf()==true) 00313 { 00314 num_leaves++; 00315 } 00316 } 00317 00318 return num_leaves; 00319 } 00320 00322 float64_t get_node_weight(int32_t idx) 00323 { 00324 CNode* node = get_node(idx); 00325 return node->beta; 00326 } 00327 00332 void set_node_weight(int32_t idx, float64_t weight) 00333 { 00334 CNode* node = get_node(idx); 00335 node->beta = weight; 00336 } 00337 00339 virtual const char* get_name() const 00340 { 00341 return "Taxonomy"; 00342 } 00343 00345 std::map<std::string, int32_t> get_name2id() { 00346 return name2id; 00347 } 00348 00354 int32_t get_id_by_name(std::string name) 00355 { 00356 return name2id[name]; 00357 } 00358 00359 00360 protected: 00361 00363 CNode* root; 00365 std::map<std::string, int32_t> name2id; 00367 std::vector<CNode*> nodes; 00369 std::map<int32_t, float64_t> task_histogram; 00370 00371 }; 00372 00376 class CMultitaskKernelTreeNormalizer: public CMultitaskKernelMklNormalizer 00377 { 00378 00379 public: 00380 00383 CMultitaskKernelTreeNormalizer() : CMultitaskKernelMklNormalizer() 00384 { 00385 } 00386 00393 CMultitaskKernelTreeNormalizer(std::vector<std::string> task_lhs, 00394 std::vector<std::string> task_rhs, 00395 CTaxonomy tax) : CMultitaskKernelMklNormalizer() 00396 { 00397 00398 taxonomy = tax; 00399 set_task_vector_lhs(task_lhs); 00400 set_task_vector_rhs(task_rhs); 00401 00402 num_nodes = taxonomy.get_num_nodes(); 00403 00404 dependency_matrix = std::vector<float64_t>(num_nodes * num_nodes); 00405 00406 update_cache(); 00407 } 00408 00409 00411 virtual ~CMultitaskKernelTreeNormalizer() 00412 { 00413 } 00414 00415 00417 void update_cache() 00418 { 00419 00420 00421 for (int32_t i=0; i!=num_nodes; i++) 00422 { 00423 for (int32_t j=0; j!=num_nodes; j++) 00424 { 00425 00426 float64_t similarity = taxonomy.compute_node_similarity(i, j); 00427 set_node_similarity(i,j,similarity); 00428 00429 } 00430 00431 } 00432 } 00433 00434 00435 00441 virtual float64_t normalize(float64_t value, int32_t idx_lhs, int32_t idx_rhs) 00442 { 00443 //lookup tasks 00444 int32_t task_idx_lhs = task_vector_lhs[idx_lhs]; 00445 int32_t task_idx_rhs = task_vector_rhs[idx_rhs]; 00446 00447 //lookup similarity 00448 float64_t task_similarity = get_node_similarity(task_idx_lhs, task_idx_rhs); 00449 //float64_t task_similarity = taxonomy.compute_node_similarity(task_idx_lhs, task_idx_rhs); 00450 00451 //take task similarity into account 00452 float64_t similarity = (value/scale) * task_similarity; 00453 00454 00455 return similarity; 00456 } 00457 00462 virtual float64_t normalize_lhs(float64_t value, int32_t idx_lhs) 00463 { 00464 SG_ERROR("normalize_lhs not implemented") 00465 return 0; 00466 } 00467 00472 virtual float64_t normalize_rhs(float64_t value, int32_t idx_rhs) 00473 { 00474 SG_ERROR("normalize_rhs not implemented") 00475 return 0; 00476 } 00477 00478 00480 void set_task_vector_lhs(std::vector<std::string> vec) 00481 { 00482 00483 task_vector_lhs.clear(); 00484 00485 for (int32_t i = 0; i != (int32_t)(vec.size()); ++i) 00486 { 00487 task_vector_lhs.push_back(taxonomy.get_id(vec[i])); 00488 } 00489 00490 //update task histogram 00491 taxonomy.update_task_histogram(task_vector_lhs); 00492 00493 } 00494 00496 void set_task_vector_rhs(std::vector<std::string> vec) 00497 { 00498 00499 task_vector_rhs.clear(); 00500 00501 for (int32_t i = 0; i != (int32_t)(vec.size()); ++i) 00502 { 00503 task_vector_rhs.push_back(taxonomy.get_id(vec[i])); 00504 } 00505 00506 } 00507 00509 void set_task_vector(std::vector<std::string> vec) 00510 { 00511 set_task_vector_lhs(vec); 00512 set_task_vector_rhs(vec); 00513 } 00514 00516 int32_t get_num_betas() 00517 { 00518 00519 return taxonomy.get_num_nodes(); 00520 00521 } 00522 00526 float64_t get_beta(int32_t idx) 00527 { 00528 00529 return taxonomy.get_node_weight(idx); 00530 00531 } 00532 00536 void set_beta(int32_t idx, float64_t weight) 00537 { 00538 00539 taxonomy.set_node_weight(idx, weight); 00540 00541 update_cache(); 00542 00543 } 00544 00545 00551 float64_t get_node_similarity(int32_t node_lhs, int32_t node_rhs) 00552 { 00553 00554 ASSERT(node_lhs < num_nodes && node_lhs >= 0) 00555 ASSERT(node_rhs < num_nodes && node_rhs >= 0) 00556 00557 return dependency_matrix[node_lhs * num_nodes + node_rhs]; 00558 00559 } 00560 00566 void set_node_similarity(int32_t node_lhs, int32_t node_rhs, 00567 float64_t similarity) 00568 { 00569 00570 ASSERT(node_lhs < num_nodes && node_lhs >= 0) 00571 ASSERT(node_rhs < num_nodes && node_rhs >= 0) 00572 00573 dependency_matrix[node_lhs * num_nodes + node_rhs] = similarity; 00574 00575 } 00576 00578 virtual const char* get_name() const 00579 { 00580 return "MultitaskKernelTreeNormalizer"; 00581 } 00582 00586 CMultitaskKernelTreeNormalizer* KernelNormalizerToMultitaskKernelTreeNormalizer(CKernelNormalizer* n) 00587 { 00588 return dynamic_cast<CMultitaskKernelTreeNormalizer*>(n); 00589 } 00590 00591 protected: 00593 CTaxonomy taxonomy; 00594 00596 int32_t num_nodes; 00597 00599 std::vector<int32_t> task_vector_lhs; 00600 00602 std::vector<int32_t> task_vector_rhs; 00603 00605 std::vector<float64_t> dependency_matrix; 00606 }; 00607 } 00608 #endif