SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) Christian Gehl 00008 * Written (W) 1999-2009 Soeren Sonnenburg 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include <shogun/lib/common.h> 00013 #include <shogun/distance/CanberraWordDistance.h> 00014 #include <shogun/features/Features.h> 00015 #include <shogun/features/StringFeatures.h> 00016 #include <shogun/io/SGIO.h> 00017 00018 using namespace shogun; 00019 00020 CCanberraWordDistance::CCanberraWordDistance() 00021 : CStringDistance<uint16_t>() 00022 { 00023 SG_DEBUG("CCanberraWordDistance created") 00024 } 00025 00026 CCanberraWordDistance::CCanberraWordDistance( 00027 CStringFeatures<uint16_t>* l, CStringFeatures<uint16_t>* r) 00028 : CStringDistance<uint16_t>() 00029 { 00030 SG_DEBUG("CCanberraWordDistance created") 00031 00032 init(l, r); 00033 } 00034 00035 CCanberraWordDistance::~CCanberraWordDistance() 00036 { 00037 cleanup(); 00038 } 00039 00040 bool CCanberraWordDistance::init(CFeatures* l, CFeatures* r) 00041 { 00042 return CStringDistance<uint16_t>::init(l,r); 00043 } 00044 00045 void CCanberraWordDistance::cleanup() 00046 { 00047 } 00048 00049 float64_t CCanberraWordDistance::compute(int32_t idx_a, int32_t idx_b) 00050 { 00051 int32_t alen, blen; 00052 bool free_avec, free_bvec; 00053 00054 uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)-> 00055 get_feature_vector(idx_a, alen, free_avec); 00056 uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)-> 00057 get_feature_vector(idx_b, blen, free_bvec); 00058 00059 float64_t result=0; 00060 00061 int32_t left_idx=0; 00062 int32_t right_idx=0; 00063 00064 while (left_idx < alen && right_idx < blen) 00065 { 00066 uint16_t sym=avec[left_idx]; 00067 if (avec[left_idx]==bvec[right_idx]) 00068 { 00069 int32_t old_left_idx=left_idx; 00070 int32_t old_right_idx=right_idx; 00071 00072 while (left_idx< alen && avec[left_idx]==sym) 00073 left_idx++; 00074 00075 while (right_idx< blen && bvec[right_idx]==sym) 00076 right_idx++; 00077 00078 result += 00079 CMath::abs((float64_t) 00080 ((left_idx-old_left_idx)-(right_idx-old_right_idx)))/ 00081 ((float64_t) 00082 ((left_idx-old_left_idx) + (right_idx-old_right_idx))); 00083 } 00084 else if (avec[left_idx]<bvec[right_idx]) 00085 { 00086 result++; 00087 00088 while (left_idx< alen && avec[left_idx]==sym) 00089 left_idx++; 00090 } 00091 else 00092 { 00093 sym=bvec[right_idx]; 00094 result++; 00095 00096 while (right_idx< blen && bvec[right_idx]==sym) 00097 right_idx++; 00098 } 00099 } 00100 00101 while (left_idx < alen) 00102 { 00103 uint16_t sym=avec[left_idx]; 00104 result++; 00105 00106 while (left_idx< alen && avec[left_idx]==sym) 00107 left_idx++; 00108 } 00109 00110 while (right_idx < blen) 00111 { 00112 uint16_t sym=bvec[right_idx]; 00113 result++; 00114 00115 while (right_idx< blen && bvec[right_idx]==sym) 00116 right_idx++; 00117 } 00118 ((CStringFeatures<uint16_t>*) lhs)-> 00119 free_feature_vector(avec, idx_a, free_avec); 00120 ((CStringFeatures<uint16_t>*) rhs)-> 00121 free_feature_vector(bvec, idx_b, free_bvec); 00122 00123 return result; 00124 }