SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
MultitaskKernelTreeNormalizer.h
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 2 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2010 Christian Widmer
00008  * Copyright (C) 2010 Max-Planck-Society
00009  */
00010 
00011 #ifndef _MULTITASKKERNELTREENORMALIZER_H___
00012 #define _MULTITASKKERNELTREENORMALIZER_H___
00013 
00014 #include <shogun/transfer/multitask/MultitaskKernelMklNormalizer.h>
00015 #include <shogun/kernel/Kernel.h>
00016 #include <algorithm>
00017 #include <map>
00018 #include <set>
00019 #include <deque>
00020 #include <vector>
00021 
00022 namespace shogun
00023 {
00024 
00029 class CNode: public CSGObject
00030 {
00031 public:
00034     CNode()
00035     {
00036         parent = NULL;
00037         beta = 1.0;
00038         node_id = 0;
00039     }
00040 
00041     virtual ~CNode()
00042     {
00043     for (size_t i = 0; i < children.size(); i++)
00044         delete children[i];
00045     }
00046 
00050     std::set<CNode*> get_path_root()
00051     {
00052         std::set<CNode*> nodes_on_path = std::set<CNode*>();
00053         CNode *node = this;
00054         while (node != NULL) {
00055             nodes_on_path.insert(node);
00056             node = node->parent;
00057         }
00058         return nodes_on_path;
00059     }
00060 
00064     std::vector<int32_t> get_task_ids_below()
00065     {
00066 
00067         std::vector<int32_t> task_ids;
00068         std::deque<CNode*> grey_nodes;
00069         grey_nodes.push_back(this);
00070 
00071         while(grey_nodes.size() > 0)
00072         {
00073 
00074             CNode *current_node = grey_nodes.front();
00075             grey_nodes.pop_front();
00076 
00077             for(int32_t i = 0; i!=int32_t(current_node->children.size()); i++){
00078                 grey_nodes.push_back(current_node->children[i]);
00079             }
00080 
00081             if(current_node->is_leaf()){
00082     task_ids.push_back(current_node->getNode_id());
00083             }
00084         }
00085 
00086         return task_ids;
00087     }
00088 
00092     void add_child(CNode *node)
00093     {
00094         node->parent = this;
00095         this->children.push_back(node);
00096     }
00097 
00099     virtual const char *get_name() const
00100     {
00101         return "Node";
00102     }
00103 
00105     bool is_leaf()
00106     {
00107         return children.empty();
00108 
00109     }
00110 
00112     int32_t getNode_id() const
00113     {
00114         return node_id;
00115     }
00116 
00118     void setNode_id(int32_t node_idx)
00119     {
00120         this->node_id = node_idx;
00121     }
00122 
00124     float64_t beta;
00125 
00126 protected:
00127 
00129     CNode* parent;
00130 
00132     std::vector<CNode*> children;
00133 
00135     int32_t node_id;
00136 
00137 };
00138 
00139 
00144 class CTaxonomy : public CSGObject
00145 {
00146 
00147 public:
00148 
00151     CTaxonomy() : CSGObject()
00152     {
00153         root = new CNode();
00154         nodes.push_back(root);
00155 
00156         name2id = std::map<std::string, int32_t>();
00157         name2id["root"] = 0;
00158     }
00159 
00160     virtual ~CTaxonomy()
00161     {
00162         for (size_t i = 0; i < nodes.size(); i++)
00163             delete nodes[i];
00164         nodes.clear();
00165         name2id.clear();
00166         task_histogram.clear();
00167     }
00168 
00173     CNode* get_node(int32_t task_id) {
00174         return nodes[task_id];
00175     }
00176 
00180     void set_root_beta(float64_t beta)
00181     {
00182         nodes[0]->beta = beta;
00183     }
00184 
00190     CNode* add_node(std::string parent_name, std::string child_name, float64_t beta)
00191     {
00192         if (child_name=="") SG_ERROR("child_name empty")
00193         if (parent_name=="") SG_ERROR("parent_name empty")
00194 
00195 
00196         CNode* child_node = new CNode();
00197 
00198         child_node->beta = beta;
00199 
00200         nodes.push_back(child_node);
00201         int32_t id = nodes.size()-1;
00202 
00203         name2id[child_name] = id;
00204 
00205         child_node->setNode_id(id);
00206 
00207 
00208         //create edge
00209         CNode* parent = nodes[name2id[parent_name]];
00210 
00211         parent->add_child(child_node);
00212 
00213         return child_node;
00214     }
00215 
00220     int32_t get_id(std::string name) {
00221         return name2id[name];
00222     }
00223 
00229     std::set<CNode*> intersect_root_path(CNode* node_lhs, CNode* node_rhs)
00230     {
00231 
00232         std::set<CNode*> root_path_lhs = node_lhs->get_path_root();
00233         std::set<CNode*> root_path_rhs = node_rhs->get_path_root();
00234 
00235         std::set<CNode*> intersection;
00236 
00237         std::set_intersection(root_path_lhs.begin(), root_path_lhs.end(),
00238                               root_path_rhs.begin(), root_path_rhs.end(),
00239                               std::inserter(intersection, intersection.end()));
00240 
00241         return intersection;
00242 
00243     }
00244 
00250     float64_t compute_node_similarity(int32_t task_lhs, int32_t task_rhs)
00251     {
00252 
00253         CNode* node_lhs = get_node(task_lhs);
00254         CNode* node_rhs = get_node(task_rhs);
00255 
00256         // compute intersection of paths to root
00257         std::set<CNode*> intersection = intersect_root_path(node_lhs, node_rhs);
00258 
00259         // sum up weights
00260         float64_t gamma = 0;
00261         for (std::set<CNode*>::const_iterator p = intersection.begin(); p != intersection.end(); ++p) {
00262 
00263             gamma += (*p)->beta;
00264         }
00265 
00266         return gamma;
00267 
00268     }
00269 
00273     void update_task_histogram(std::vector<int32_t> task_vector_lhs) {
00274 
00275         //empty map
00276         task_histogram.clear();
00277 
00278 
00279         //fill map with zeros
00280         for (std::vector<int32_t>::const_iterator it=task_vector_lhs.begin(); it!=task_vector_lhs.end(); it++)
00281         {
00282             task_histogram[*it] = 0.0;
00283         }
00284 
00285         //fill map
00286         for (std::vector<int32_t>::const_iterator it=task_vector_lhs.begin(); it!=task_vector_lhs.end(); it++)
00287         {
00288             task_histogram[*it] += 1.0;
00289         }
00290 
00291         //compute fractions
00292         for (std::map<int32_t, float64_t>::const_iterator it=task_histogram.begin(); it!=task_histogram.end(); it++)
00293         {
00294             task_histogram[it->first] = task_histogram[it->first] / float64_t(task_vector_lhs.size());
00295         }
00296 
00297     }
00298 
00300     int32_t get_num_nodes()
00301     {
00302         return (int32_t)(nodes.size());
00303     }
00304 
00306     int32_t get_num_leaves()
00307     {
00308         int32_t num_leaves = 0;
00309 
00310         for (int32_t i=0; i!=get_num_nodes(); i++)
00311         {
00312             if (get_node(i)->is_leaf()==true)
00313             {
00314                 num_leaves++;
00315             }
00316         }
00317 
00318         return num_leaves;
00319     }
00320 
00322     float64_t get_node_weight(int32_t idx)
00323     {
00324         CNode* node = get_node(idx);
00325         return node->beta;
00326     }
00327 
00332     void set_node_weight(int32_t idx, float64_t weight)
00333     {
00334         CNode* node = get_node(idx);
00335         node->beta = weight;
00336     }
00337 
00339     virtual const char* get_name() const
00340     {
00341         return "Taxonomy";
00342     }
00343 
00345     std::map<std::string, int32_t> get_name2id() {
00346         return name2id;
00347     }
00348 
00354     int32_t get_id_by_name(std::string name)
00355     {
00356         return name2id[name];
00357     }
00358 
00359 
00360 protected:
00361 
00363     CNode* root;
00365     std::map<std::string, int32_t> name2id;
00367     std::vector<CNode*> nodes;
00369     std::map<int32_t, float64_t> task_histogram;
00370 
00371 };
00372 
00376 class CMultitaskKernelTreeNormalizer: public CMultitaskKernelMklNormalizer
00377 {
00378 
00379 public:
00380 
00383     CMultitaskKernelTreeNormalizer() : CMultitaskKernelMklNormalizer()
00384     {
00385     }
00386 
00393     CMultitaskKernelTreeNormalizer(std::vector<std::string> task_lhs,
00394                                    std::vector<std::string> task_rhs,
00395                                    CTaxonomy tax) : CMultitaskKernelMklNormalizer()
00396     {
00397 
00398         taxonomy = tax;
00399         set_task_vector_lhs(task_lhs);
00400         set_task_vector_rhs(task_rhs);
00401 
00402         num_nodes = taxonomy.get_num_nodes();
00403 
00404         dependency_matrix = std::vector<float64_t>(num_nodes * num_nodes);
00405 
00406         update_cache();
00407     }
00408 
00409 
00411     virtual ~CMultitaskKernelTreeNormalizer()
00412     {
00413     }
00414 
00415 
00417     void update_cache()
00418     {
00419 
00420 
00421         for (int32_t i=0; i!=num_nodes; i++)
00422         {
00423             for (int32_t j=0; j!=num_nodes; j++)
00424             {
00425 
00426                 float64_t similarity = taxonomy.compute_node_similarity(i, j);
00427                 set_node_similarity(i,j,similarity);
00428 
00429             }
00430 
00431         }
00432     }
00433 
00434 
00435 
00441     virtual float64_t normalize(float64_t value, int32_t idx_lhs, int32_t idx_rhs)
00442     {
00443         //lookup tasks
00444         int32_t task_idx_lhs = task_vector_lhs[idx_lhs];
00445         int32_t task_idx_rhs = task_vector_rhs[idx_rhs];
00446 
00447         //lookup similarity
00448         float64_t task_similarity = get_node_similarity(task_idx_lhs, task_idx_rhs);
00449         //float64_t task_similarity = taxonomy.compute_node_similarity(task_idx_lhs, task_idx_rhs);
00450 
00451         //take task similarity into account
00452         float64_t similarity = (value/scale) * task_similarity;
00453 
00454 
00455         return similarity;
00456     }
00457 
00462     virtual float64_t normalize_lhs(float64_t value, int32_t idx_lhs)
00463     {
00464         SG_ERROR("normalize_lhs not implemented")
00465         return 0;
00466     }
00467 
00472     virtual float64_t normalize_rhs(float64_t value, int32_t idx_rhs)
00473     {
00474         SG_ERROR("normalize_rhs not implemented")
00475         return 0;
00476     }
00477 
00478 
00480     void set_task_vector_lhs(std::vector<std::string> vec)
00481     {
00482 
00483         task_vector_lhs.clear();
00484 
00485         for (int32_t i = 0; i != (int32_t)(vec.size()); ++i)
00486         {
00487             task_vector_lhs.push_back(taxonomy.get_id(vec[i]));
00488         }
00489 
00490         //update task histogram
00491         taxonomy.update_task_histogram(task_vector_lhs);
00492 
00493     }
00494 
00496     void set_task_vector_rhs(std::vector<std::string> vec)
00497     {
00498 
00499         task_vector_rhs.clear();
00500 
00501         for (int32_t i = 0; i != (int32_t)(vec.size()); ++i)
00502         {
00503             task_vector_rhs.push_back(taxonomy.get_id(vec[i]));
00504         }
00505 
00506     }
00507 
00509     void set_task_vector(std::vector<std::string> vec)
00510     {
00511         set_task_vector_lhs(vec);
00512         set_task_vector_rhs(vec);
00513     }
00514 
00516     int32_t get_num_betas()
00517     {
00518 
00519         return taxonomy.get_num_nodes();
00520 
00521     }
00522 
00526     float64_t get_beta(int32_t idx)
00527     {
00528 
00529         return taxonomy.get_node_weight(idx);
00530 
00531     }
00532 
00536     void set_beta(int32_t idx, float64_t weight)
00537     {
00538 
00539         taxonomy.set_node_weight(idx, weight);
00540 
00541         update_cache();
00542 
00543     }
00544 
00545 
00551     float64_t get_node_similarity(int32_t node_lhs, int32_t node_rhs)
00552     {
00553 
00554         ASSERT(node_lhs < num_nodes && node_lhs >= 0)
00555         ASSERT(node_rhs < num_nodes && node_rhs >= 0)
00556 
00557         return dependency_matrix[node_lhs * num_nodes + node_rhs];
00558 
00559     }
00560 
00566     void set_node_similarity(int32_t node_lhs, int32_t node_rhs,
00567             float64_t similarity)
00568     {
00569 
00570         ASSERT(node_lhs < num_nodes && node_lhs >= 0)
00571         ASSERT(node_rhs < num_nodes && node_rhs >= 0)
00572 
00573         dependency_matrix[node_lhs * num_nodes + node_rhs] = similarity;
00574 
00575     }
00576 
00578     virtual const char* get_name() const
00579     {
00580         return "MultitaskKernelTreeNormalizer";
00581     }
00582 
00586     CMultitaskKernelTreeNormalizer* KernelNormalizerToMultitaskKernelTreeNormalizer(CKernelNormalizer* n)
00587     {
00588         return dynamic_cast<CMultitaskKernelTreeNormalizer*>(n);
00589     }
00590 
00591 protected:
00593     CTaxonomy taxonomy;
00594 
00596     int32_t num_nodes;
00597 
00599     std::vector<int32_t> task_vector_lhs;
00600 
00602     std::vector<int32_t> task_vector_rhs;
00603 
00605     std::vector<float64_t> dependency_matrix;
00606 };
00607 }
00608 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation