SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2007-2009 Christian Gehl 00008 * Written (W) 1999-2009 Soeren Sonnenburg 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include <shogun/lib/common.h> 00013 #include <shogun/io/SGIO.h> 00014 00015 #include <shogun/base/Parameter.h> 00016 00017 #include <shogun/distance/HammingWordDistance.h> 00018 #include <shogun/features/Features.h> 00019 #include <shogun/features/StringFeatures.h> 00020 00021 using namespace shogun; 00022 00023 CHammingWordDistance::CHammingWordDistance() 00024 { 00025 init(); 00026 } 00027 00028 CHammingWordDistance::CHammingWordDistance(bool sign) 00029 : CStringDistance<uint16_t>() 00030 { 00031 init(); 00032 use_sign=sign; 00033 00034 SG_DEBUG("CHammingWordDistance with sign: %d created\n", (sign) ? 1 : 0) 00035 } 00036 00037 CHammingWordDistance::CHammingWordDistance( 00038 CStringFeatures<uint16_t>* l, CStringFeatures<uint16_t>* r, bool sign) 00039 : CStringDistance<uint16_t>() 00040 { 00041 init(); 00042 use_sign=sign; 00043 00044 SG_DEBUG("CHammingWordDistance with sign: %d created\n", (sign) ? 1 : 0) 00045 00046 init(l, r); 00047 } 00048 00049 CHammingWordDistance::~CHammingWordDistance() 00050 { 00051 cleanup(); 00052 } 00053 00054 bool CHammingWordDistance::init(CFeatures* l, CFeatures* r) 00055 { 00056 bool result=CStringDistance<uint16_t>::init(l,r); 00057 return result; 00058 } 00059 00060 void CHammingWordDistance::cleanup() 00061 { 00062 } 00063 00064 float64_t CHammingWordDistance::compute(int32_t idx_a, int32_t idx_b) 00065 { 00066 int32_t alen, blen; 00067 bool free_avec, free_bvec; 00068 00069 uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)-> 00070 get_feature_vector(idx_a, alen, free_avec); 00071 uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)-> 00072 get_feature_vector(idx_b, blen, free_bvec); 00073 00074 int32_t result=0; 00075 00076 int32_t left_idx=0; 00077 int32_t right_idx=0; 00078 00079 if (use_sign) 00080 { 00081 // hamming of: if words appear in both vectors 00082 while (left_idx < alen && right_idx < blen) 00083 { 00084 uint16_t sym=avec[left_idx]; 00085 if (avec[left_idx]==bvec[right_idx]) 00086 { 00087 while (left_idx< alen && avec[left_idx]==sym) 00088 left_idx++; 00089 00090 while (right_idx< blen && bvec[right_idx]==sym) 00091 right_idx++; 00092 } 00093 else if (avec[left_idx]<bvec[right_idx]) 00094 { 00095 result++; 00096 00097 while (left_idx< alen && avec[left_idx]==sym) 00098 left_idx++; 00099 } 00100 else 00101 { 00102 sym=bvec[right_idx]; 00103 result++; 00104 00105 while (right_idx< blen && bvec[right_idx]==sym) 00106 right_idx++; 00107 } 00108 } 00109 } 00110 else 00111 { 00112 //hamming of: if words appear in both vectors _the same number_ of times 00113 while (left_idx < alen && right_idx < blen) 00114 { 00115 uint16_t sym=avec[left_idx]; 00116 if (avec[left_idx]==bvec[right_idx]) 00117 { 00118 int32_t old_left_idx=left_idx; 00119 int32_t old_right_idx=right_idx; 00120 00121 while (left_idx< alen && avec[left_idx]==sym) 00122 left_idx++; 00123 00124 while (right_idx< blen && bvec[right_idx]==sym) 00125 right_idx++; 00126 00127 if ((left_idx-old_left_idx)!=(right_idx-old_right_idx)) 00128 result++; 00129 } 00130 else if (avec[left_idx]<bvec[right_idx]) 00131 { 00132 result++; 00133 00134 while (left_idx< alen && avec[left_idx]==sym) 00135 left_idx++; 00136 } 00137 else 00138 { 00139 sym=bvec[right_idx]; 00140 result++; 00141 00142 while (right_idx< blen && bvec[right_idx]==sym) 00143 right_idx++; 00144 } 00145 } 00146 } 00147 00148 while (left_idx < alen) 00149 { 00150 uint16_t sym=avec[left_idx]; 00151 result++; 00152 00153 while (left_idx< alen && avec[left_idx]==sym) 00154 left_idx++; 00155 } 00156 00157 while (right_idx < blen) 00158 { 00159 uint16_t sym=bvec[right_idx]; 00160 result++; 00161 00162 while (right_idx< blen && bvec[right_idx]==sym) 00163 right_idx++; 00164 } 00165 00166 ((CStringFeatures<uint16_t>*) lhs)-> 00167 free_feature_vector(avec, idx_a, free_avec); 00168 ((CStringFeatures<uint16_t>*) rhs)-> 00169 free_feature_vector(bvec, idx_b, free_bvec); 00170 00171 return result; 00172 } 00173 00174 void CHammingWordDistance::init() 00175 { 00176 use_sign = false; 00177 m_parameters->add(&use_sign, "use_sign", 00178 "If signum(counts) is used instead of counts."); 00179 }