libflame
revision_anchor
|
00001 /* 00002 00003 Copyright (C) 2014, The University of Texas at Austin 00004 00005 This file is part of libflame and is available under the 3-Clause 00006 BSD license, which can be found in the LICENSE file at the top-level 00007 directory, or at http://opensource.org/licenses/BSD-3-Clause 00008 00009 */ 00010 00011 #define MAC_Apply_G_mx4s_ops( m_A, \ 00012 gamma23_k1, \ 00013 sigma23_k1, \ 00014 gamma34_k1, \ 00015 sigma34_k1, \ 00016 gamma12_k2, \ 00017 sigma12_k2, \ 00018 gamma23_k2, \ 00019 sigma23_k2, \ 00020 a1, inc_a1, \ 00021 a2, inc_a2, \ 00022 a3, inc_a3, \ 00023 a4, inc_a4 ) \ 00024 { \ 00025 float ga23_k1 = *gamma23_k1; \ 00026 float si23_k1 = *sigma23_k1; \ 00027 float ga34_k1 = *gamma34_k1; \ 00028 float si34_k1 = *sigma34_k1; \ 00029 float ga12_k2 = *gamma12_k2; \ 00030 float si12_k2 = *sigma12_k2; \ 00031 float ga23_k2 = *gamma23_k2; \ 00032 float si23_k2 = *sigma23_k2; \ 00033 float* restrict alpha1 = a1; \ 00034 float* restrict alpha2 = a2; \ 00035 float* restrict alpha3 = a3; \ 00036 float* restrict alpha4 = a4; \ 00037 float temp1; \ 00038 float temp2; \ 00039 float temp3; \ 00040 float temp4; \ 00041 int i; \ 00042 \ 00043 for ( i = 0; i < m_A; ++i ) \ 00044 { \ 00045 temp2 = *alpha2; \ 00046 temp3 = *alpha3; \ 00047 \ 00048 *alpha2 = temp2 * ga23_k1 + temp3 * si23_k1; \ 00049 *alpha3 = temp3 * ga23_k1 - temp2 * si23_k1; \ 00050 \ 00051 temp3 = *alpha3; \ 00052 temp4 = *alpha4; \ 00053 \ 00054 *alpha3 = temp3 * ga34_k1 + temp4 * si34_k1; \ 00055 *alpha4 = temp4 * ga34_k1 - temp3 * si34_k1; \ 00056 \ 00057 temp1 = *alpha1; \ 00058 temp2 = *alpha2; \ 00059 \ 00060 *alpha1 = temp1 * ga12_k2 + temp2 * si12_k2; \ 00061 *alpha2 = temp2 * ga12_k2 - temp1 * si12_k2; \ 00062 \ 00063 temp2 = *alpha2; \ 00064 temp3 = *alpha3; \ 00065 \ 00066 *alpha2 = temp2 * ga23_k2 + temp3 * si23_k2; \ 00067 *alpha3 = temp3 * ga23_k2 - temp2 * si23_k2; \ 00068 \ 00069 alpha1 += inc_a1; \ 00070 alpha2 += inc_a2; \ 00071 alpha3 += inc_a3; \ 00072 alpha4 += inc_a4; \ 00073 } \ 00074 } 00075 00076 #define MAC_Apply_G_mx4s_opc( m_A, \ 00077 gamma23_k1, \ 00078 sigma23_k1, \ 00079 gamma34_k1, \ 00080 sigma34_k1, \ 00081 gamma12_k2, \ 00082 sigma12_k2, \ 00083 gamma23_k2, \ 00084 sigma23_k2, \ 00085 a1, inc_a1, \ 00086 a2, inc_a2, \ 00087 a3, inc_a3, \ 00088 a4, inc_a4 ) \ 00089 { \ 00090 float ga23_k1 = *gamma23_k1; \ 00091 float si23_k1 = *sigma23_k1; \ 00092 float ga34_k1 = *gamma34_k1; \ 00093 float si34_k1 = *sigma34_k1; \ 00094 float ga12_k2 = *gamma12_k2; \ 00095 float si12_k2 = *sigma12_k2; \ 00096 float ga23_k2 = *gamma23_k2; \ 00097 float si23_k2 = *sigma23_k2; \ 00098 scomplex* restrict alpha1 = a1; \ 00099 scomplex* restrict alpha2 = a2; \ 00100 scomplex* restrict alpha3 = a3; \ 00101 scomplex* restrict alpha4 = a4; \ 00102 scomplex temp1; \ 00103 scomplex temp2; \ 00104 scomplex temp3; \ 00105 scomplex temp4; \ 00106 int i; \ 00107 \ 00108 for ( i = 0; i < m_A; ++i ) \ 00109 { \ 00110 \ 00111 temp2 = *alpha2; \ 00112 temp3 = *alpha3; \ 00113 \ 00114 alpha2->real = temp2.real * ga23_k1 + temp3.real * si23_k1; \ 00115 alpha2->imag = temp2.imag * ga23_k1 + temp3.imag * si23_k1; \ 00116 \ 00117 alpha3->real = temp3.real * ga23_k1 - temp2.real * si23_k1; \ 00118 alpha3->imag = temp3.imag * ga23_k1 - temp2.imag * si23_k1; \ 00119 \ 00120 temp3 = *alpha3; \ 00121 temp4 = *alpha4; \ 00122 \ 00123 alpha3->real = temp3.real * ga34_k1 + temp4.real * si34_k1; \ 00124 alpha3->imag = temp3.imag * ga34_k1 + temp4.imag * si34_k1; \ 00125 \ 00126 alpha4->real = temp4.real * ga34_k1 - temp3.real * si34_k1; \ 00127 alpha4->imag = temp4.imag * ga34_k1 - temp3.imag * si34_k1; \ 00128 \ 00129 temp1 = *alpha1; \ 00130 temp2 = *alpha2; \ 00131 \ 00132 alpha1->real = temp1.real * ga12_k2 + temp2.real * si12_k2; \ 00133 alpha1->imag = temp1.imag * ga12_k2 + temp2.imag * si12_k2; \ 00134 \ 00135 alpha2->real = temp2.real * ga12_k2 - temp1.real * si12_k2; \ 00136 alpha2->imag = temp2.imag * ga12_k2 - temp1.imag * si12_k2; \ 00137 \ 00138 temp2 = *alpha2; \ 00139 temp3 = *alpha3; \ 00140 \ 00141 alpha2->real = temp2.real * ga23_k2 + temp3.real * si23_k2; \ 00142 alpha2->imag = temp2.imag * ga23_k2 + temp3.imag * si23_k2; \ 00143 \ 00144 alpha3->real = temp3.real * ga23_k2 - temp2.real * si23_k2; \ 00145 alpha3->imag = temp3.imag * ga23_k2 - temp2.imag * si23_k2; \ 00146 \ 00147 alpha1 += inc_a1; \ 00148 alpha2 += inc_a2; \ 00149 alpha3 += inc_a3; \ 00150 alpha4 += inc_a4; \ 00151 } \ 00152 } 00153 00154 #define MAC_Apply_G_mx4s_opd( m_A, \ 00155 gamma23_k1, \ 00156 sigma23_k1, \ 00157 gamma34_k1, \ 00158 sigma34_k1, \ 00159 gamma12_k2, \ 00160 sigma12_k2, \ 00161 gamma23_k2, \ 00162 sigma23_k2, \ 00163 a1, inc_a1, \ 00164 a2, inc_a2, \ 00165 a3, inc_a3, \ 00166 a4, inc_a4 ) \ 00167 { \ 00168 double ga23_k1 = *gamma23_k1; \ 00169 double si23_k1 = *sigma23_k1; \ 00170 double ga34_k1 = *gamma34_k1; \ 00171 double si34_k1 = *sigma34_k1; \ 00172 double ga12_k2 = *gamma12_k2; \ 00173 double si12_k2 = *sigma12_k2; \ 00174 double ga23_k2 = *gamma23_k2; \ 00175 double si23_k2 = *sigma23_k2; \ 00176 double* restrict alpha1 = a1; \ 00177 double* restrict alpha2 = a2; \ 00178 double* restrict alpha3 = a3; \ 00179 double* restrict alpha4 = a4; \ 00180 double temp1; \ 00181 double temp2; \ 00182 double temp3; \ 00183 double temp4; \ 00184 int i; \ 00185 \ 00186 for ( i = 0; i < m_A; ++i ) \ 00187 { \ 00188 temp2 = *alpha2; \ 00189 temp3 = *alpha3; \ 00190 \ 00191 *alpha2 = temp2 * ga23_k1 + temp3 * si23_k1; \ 00192 *alpha3 = temp3 * ga23_k1 - temp2 * si23_k1; \ 00193 \ 00194 temp3 = *alpha3; \ 00195 temp4 = *alpha4; \ 00196 \ 00197 *alpha3 = temp3 * ga34_k1 + temp4 * si34_k1; \ 00198 *alpha4 = temp4 * ga34_k1 - temp3 * si34_k1; \ 00199 \ 00200 temp1 = *alpha1; \ 00201 temp2 = *alpha2; \ 00202 \ 00203 *alpha1 = temp1 * ga12_k2 + temp2 * si12_k2; \ 00204 *alpha2 = temp2 * ga12_k2 - temp1 * si12_k2; \ 00205 \ 00206 temp2 = *alpha2; \ 00207 temp3 = *alpha3; \ 00208 \ 00209 *alpha2 = temp2 * ga23_k2 + temp3 * si23_k2; \ 00210 *alpha3 = temp3 * ga23_k2 - temp2 * si23_k2; \ 00211 \ 00212 alpha1 += inc_a1; \ 00213 alpha2 += inc_a2; \ 00214 alpha3 += inc_a3; \ 00215 alpha4 += inc_a4; \ 00216 } \ 00217 } 00218 00219 #define MAC_Apply_G_mx4s_opz( m_A, \ 00220 gamma23_k1, \ 00221 sigma23_k1, \ 00222 gamma34_k1, \ 00223 sigma34_k1, \ 00224 gamma12_k2, \ 00225 sigma12_k2, \ 00226 gamma23_k2, \ 00227 sigma23_k2, \ 00228 a1, inc_a1, \ 00229 a2, inc_a2, \ 00230 a3, inc_a3, \ 00231 a4, inc_a4 ) \ 00232 { \ 00233 double ga23_k1 = *gamma23_k1; \ 00234 double si23_k1 = *sigma23_k1; \ 00235 double ga34_k1 = *gamma34_k1; \ 00236 double si34_k1 = *sigma34_k1; \ 00237 double ga12_k2 = *gamma12_k2; \ 00238 double si12_k2 = *sigma12_k2; \ 00239 double ga23_k2 = *gamma23_k2; \ 00240 double si23_k2 = *sigma23_k2; \ 00241 dcomplex* restrict alpha1 = a1; \ 00242 dcomplex* restrict alpha2 = a2; \ 00243 dcomplex* restrict alpha3 = a3; \ 00244 dcomplex* restrict alpha4 = a4; \ 00245 dcomplex temp1; \ 00246 dcomplex temp2; \ 00247 dcomplex temp3; \ 00248 dcomplex temp4; \ 00249 int i; \ 00250 \ 00251 for ( i = 0; i < m_A; ++i ) \ 00252 { \ 00253 \ 00254 temp2 = *alpha2; \ 00255 temp3 = *alpha3; \ 00256 \ 00257 alpha2->real = temp2.real * ga23_k1 + temp3.real * si23_k1; \ 00258 alpha2->imag = temp2.imag * ga23_k1 + temp3.imag * si23_k1; \ 00259 \ 00260 alpha3->real = temp3.real * ga23_k1 - temp2.real * si23_k1; \ 00261 alpha3->imag = temp3.imag * ga23_k1 - temp2.imag * si23_k1; \ 00262 \ 00263 temp3 = *alpha3; \ 00264 temp4 = *alpha4; \ 00265 \ 00266 alpha3->real = temp3.real * ga34_k1 + temp4.real * si34_k1; \ 00267 alpha3->imag = temp3.imag * ga34_k1 + temp4.imag * si34_k1; \ 00268 \ 00269 alpha4->real = temp4.real * ga34_k1 - temp3.real * si34_k1; \ 00270 alpha4->imag = temp4.imag * ga34_k1 - temp3.imag * si34_k1; \ 00271 \ 00272 temp1 = *alpha1; \ 00273 temp2 = *alpha2; \ 00274 \ 00275 alpha1->real = temp1.real * ga12_k2 + temp2.real * si12_k2; \ 00276 alpha1->imag = temp1.imag * ga12_k2 + temp2.imag * si12_k2; \ 00277 \ 00278 alpha2->real = temp2.real * ga12_k2 - temp1.real * si12_k2; \ 00279 alpha2->imag = temp2.imag * ga12_k2 - temp1.imag * si12_k2; \ 00280 \ 00281 temp2 = *alpha2; \ 00282 temp3 = *alpha3; \ 00283 \ 00284 alpha2->real = temp2.real * ga23_k2 + temp3.real * si23_k2; \ 00285 alpha2->imag = temp2.imag * ga23_k2 + temp3.imag * si23_k2; \ 00286 \ 00287 alpha3->real = temp3.real * ga23_k2 - temp2.real * si23_k2; \ 00288 alpha3->imag = temp3.imag * ga23_k2 - temp2.imag * si23_k2; \ 00289 \ 00290 alpha1 += inc_a1; \ 00291 alpha2 += inc_a2; \ 00292 alpha3 += inc_a3; \ 00293 alpha4 += inc_a4; \ 00294 } \ 00295 } 00296