libflame  revision_anchor
FLA_Apply_G_mx4s_opt.h
Go to the documentation of this file.
00001 /*
00002 
00003     Copyright (C) 2014, The University of Texas at Austin
00004 
00005     This file is part of libflame and is available under the 3-Clause
00006     BSD license, which can be found in the LICENSE file at the top-level
00007     directory, or at http://opensource.org/licenses/BSD-3-Clause
00008 
00009 */
00010 
00011 #define MAC_Apply_G_mx4s_ops( m_A, \
00012                               gamma23_k1, \
00013                               sigma23_k1, \
00014                               gamma34_k1, \
00015                               sigma34_k1, \
00016                               gamma12_k2, \
00017                               sigma12_k2, \
00018                               gamma23_k2, \
00019                               sigma23_k2, \
00020                               a1, inc_a1, \
00021                               a2, inc_a2, \
00022                               a3, inc_a3, \
00023                               a4, inc_a4 ) \
00024 { \
00025     float              ga23_k1 = *gamma23_k1; \
00026     float              si23_k1 = *sigma23_k1; \
00027     float              ga34_k1 = *gamma34_k1; \
00028     float              si34_k1 = *sigma34_k1; \
00029     float              ga12_k2 = *gamma12_k2; \
00030     float              si12_k2 = *sigma12_k2; \
00031     float              ga23_k2 = *gamma23_k2; \
00032     float              si23_k2 = *sigma23_k2; \
00033     float*    restrict alpha1 = a1; \
00034     float*    restrict alpha2 = a2; \
00035     float*    restrict alpha3 = a3; \
00036     float*    restrict alpha4 = a4; \
00037     float              temp1; \
00038     float              temp2; \
00039     float              temp3; \
00040     float              temp4; \
00041     int                i; \
00042 \
00043     for ( i = 0; i < m_A; ++i ) \
00044     { \
00045         temp2 = *alpha2; \
00046         temp3 = *alpha3; \
00047 \
00048         *alpha2 = temp2 * ga23_k1 + temp3 * si23_k1; \
00049         *alpha3 = temp3 * ga23_k1 - temp2 * si23_k1; \
00050 \
00051         temp3 = *alpha3; \
00052         temp4 = *alpha4; \
00053 \
00054         *alpha3 = temp3 * ga34_k1 + temp4 * si34_k1; \
00055         *alpha4 = temp4 * ga34_k1 - temp3 * si34_k1; \
00056 \
00057         temp1 = *alpha1; \
00058         temp2 = *alpha2; \
00059 \
00060         *alpha1 = temp1 * ga12_k2 + temp2 * si12_k2; \
00061         *alpha2 = temp2 * ga12_k2 - temp1 * si12_k2; \
00062 \
00063         temp2 = *alpha2; \
00064         temp3 = *alpha3; \
00065 \
00066         *alpha2 = temp2 * ga23_k2 + temp3 * si23_k2; \
00067         *alpha3 = temp3 * ga23_k2 - temp2 * si23_k2; \
00068 \
00069         alpha1 += inc_a1; \
00070         alpha2 += inc_a2; \
00071         alpha3 += inc_a3; \
00072         alpha4 += inc_a4; \
00073     } \
00074 }
00075 
00076 #define MAC_Apply_G_mx4s_opc( m_A, \
00077                               gamma23_k1, \
00078                               sigma23_k1, \
00079                               gamma34_k1, \
00080                               sigma34_k1, \
00081                               gamma12_k2, \
00082                               sigma12_k2, \
00083                               gamma23_k2, \
00084                               sigma23_k2, \
00085                               a1, inc_a1, \
00086                               a2, inc_a2, \
00087                               a3, inc_a3, \
00088                               a4, inc_a4 ) \
00089 { \
00090     float              ga23_k1 = *gamma23_k1; \
00091     float              si23_k1 = *sigma23_k1; \
00092     float              ga34_k1 = *gamma34_k1; \
00093     float              si34_k1 = *sigma34_k1; \
00094     float              ga12_k2 = *gamma12_k2; \
00095     float              si12_k2 = *sigma12_k2; \
00096     float              ga23_k2 = *gamma23_k2; \
00097     float              si23_k2 = *sigma23_k2; \
00098     scomplex* restrict alpha1 = a1; \
00099     scomplex* restrict alpha2 = a2; \
00100     scomplex* restrict alpha3 = a3; \
00101     scomplex* restrict alpha4 = a4; \
00102     scomplex           temp1; \
00103     scomplex           temp2; \
00104     scomplex           temp3; \
00105     scomplex           temp4; \
00106     int                i; \
00107 \
00108     for ( i = 0; i < m_A; ++i ) \
00109     { \
00110 \
00111         temp2 = *alpha2; \
00112         temp3 = *alpha3; \
00113 \
00114         alpha2->real = temp2.real * ga23_k1 + temp3.real * si23_k1; \
00115         alpha2->imag = temp2.imag * ga23_k1 + temp3.imag * si23_k1; \
00116 \
00117         alpha3->real = temp3.real * ga23_k1 - temp2.real * si23_k1; \
00118         alpha3->imag = temp3.imag * ga23_k1 - temp2.imag * si23_k1; \
00119 \
00120         temp3 = *alpha3; \
00121         temp4 = *alpha4; \
00122 \
00123         alpha3->real = temp3.real * ga34_k1 + temp4.real * si34_k1; \
00124         alpha3->imag = temp3.imag * ga34_k1 + temp4.imag * si34_k1; \
00125 \
00126         alpha4->real = temp4.real * ga34_k1 - temp3.real * si34_k1; \
00127         alpha4->imag = temp4.imag * ga34_k1 - temp3.imag * si34_k1; \
00128 \
00129         temp1 = *alpha1; \
00130         temp2 = *alpha2; \
00131 \
00132         alpha1->real = temp1.real * ga12_k2 + temp2.real * si12_k2; \
00133         alpha1->imag = temp1.imag * ga12_k2 + temp2.imag * si12_k2; \
00134 \
00135         alpha2->real = temp2.real * ga12_k2 - temp1.real * si12_k2; \
00136         alpha2->imag = temp2.imag * ga12_k2 - temp1.imag * si12_k2; \
00137 \
00138         temp2 = *alpha2; \
00139         temp3 = *alpha3; \
00140 \
00141         alpha2->real = temp2.real * ga23_k2 + temp3.real * si23_k2; \
00142         alpha2->imag = temp2.imag * ga23_k2 + temp3.imag * si23_k2; \
00143 \
00144         alpha3->real = temp3.real * ga23_k2 - temp2.real * si23_k2; \
00145         alpha3->imag = temp3.imag * ga23_k2 - temp2.imag * si23_k2; \
00146 \
00147         alpha1 += inc_a1; \
00148         alpha2 += inc_a2; \
00149         alpha3 += inc_a3; \
00150         alpha4 += inc_a4; \
00151     } \
00152 }
00153 
00154 #define MAC_Apply_G_mx4s_opd( m_A, \
00155                               gamma23_k1, \
00156                               sigma23_k1, \
00157                               gamma34_k1, \
00158                               sigma34_k1, \
00159                               gamma12_k2, \
00160                               sigma12_k2, \
00161                               gamma23_k2, \
00162                               sigma23_k2, \
00163                               a1, inc_a1, \
00164                               a2, inc_a2, \
00165                               a3, inc_a3, \
00166                               a4, inc_a4 ) \
00167 { \
00168     double             ga23_k1 = *gamma23_k1; \
00169     double             si23_k1 = *sigma23_k1; \
00170     double             ga34_k1 = *gamma34_k1; \
00171     double             si34_k1 = *sigma34_k1; \
00172     double             ga12_k2 = *gamma12_k2; \
00173     double             si12_k2 = *sigma12_k2; \
00174     double             ga23_k2 = *gamma23_k2; \
00175     double             si23_k2 = *sigma23_k2; \
00176     double*   restrict alpha1 = a1; \
00177     double*   restrict alpha2 = a2; \
00178     double*   restrict alpha3 = a3; \
00179     double*   restrict alpha4 = a4; \
00180     double             temp1; \
00181     double             temp2; \
00182     double             temp3; \
00183     double             temp4; \
00184     int                i; \
00185 \
00186     for ( i = 0; i < m_A; ++i ) \
00187     { \
00188         temp2 = *alpha2; \
00189         temp3 = *alpha3; \
00190 \
00191         *alpha2 = temp2 * ga23_k1 + temp3 * si23_k1; \
00192         *alpha3 = temp3 * ga23_k1 - temp2 * si23_k1; \
00193 \
00194         temp3 = *alpha3; \
00195         temp4 = *alpha4; \
00196 \
00197         *alpha3 = temp3 * ga34_k1 + temp4 * si34_k1; \
00198         *alpha4 = temp4 * ga34_k1 - temp3 * si34_k1; \
00199 \
00200         temp1 = *alpha1; \
00201         temp2 = *alpha2; \
00202 \
00203         *alpha1 = temp1 * ga12_k2 + temp2 * si12_k2; \
00204         *alpha2 = temp2 * ga12_k2 - temp1 * si12_k2; \
00205 \
00206         temp2 = *alpha2; \
00207         temp3 = *alpha3; \
00208 \
00209         *alpha2 = temp2 * ga23_k2 + temp3 * si23_k2; \
00210         *alpha3 = temp3 * ga23_k2 - temp2 * si23_k2; \
00211 \
00212         alpha1 += inc_a1; \
00213         alpha2 += inc_a2; \
00214         alpha3 += inc_a3; \
00215         alpha4 += inc_a4; \
00216     } \
00217 }
00218 
00219 #define MAC_Apply_G_mx4s_opz( m_A, \
00220                               gamma23_k1, \
00221                               sigma23_k1, \
00222                               gamma34_k1, \
00223                               sigma34_k1, \
00224                               gamma12_k2, \
00225                               sigma12_k2, \
00226                               gamma23_k2, \
00227                               sigma23_k2, \
00228                               a1, inc_a1, \
00229                               a2, inc_a2, \
00230                               a3, inc_a3, \
00231                               a4, inc_a4 ) \
00232 { \
00233     double             ga23_k1 = *gamma23_k1; \
00234     double             si23_k1 = *sigma23_k1; \
00235     double             ga34_k1 = *gamma34_k1; \
00236     double             si34_k1 = *sigma34_k1; \
00237     double             ga12_k2 = *gamma12_k2; \
00238     double             si12_k2 = *sigma12_k2; \
00239     double             ga23_k2 = *gamma23_k2; \
00240     double             si23_k2 = *sigma23_k2; \
00241     dcomplex* restrict alpha1 = a1; \
00242     dcomplex* restrict alpha2 = a2; \
00243     dcomplex* restrict alpha3 = a3; \
00244     dcomplex* restrict alpha4 = a4; \
00245     dcomplex           temp1; \
00246     dcomplex           temp2; \
00247     dcomplex           temp3; \
00248     dcomplex           temp4; \
00249     int                i; \
00250 \
00251     for ( i = 0; i < m_A; ++i ) \
00252     { \
00253 \
00254         temp2 = *alpha2; \
00255         temp3 = *alpha3; \
00256 \
00257         alpha2->real = temp2.real * ga23_k1 + temp3.real * si23_k1; \
00258         alpha2->imag = temp2.imag * ga23_k1 + temp3.imag * si23_k1; \
00259 \
00260         alpha3->real = temp3.real * ga23_k1 - temp2.real * si23_k1; \
00261         alpha3->imag = temp3.imag * ga23_k1 - temp2.imag * si23_k1; \
00262 \
00263         temp3 = *alpha3; \
00264         temp4 = *alpha4; \
00265 \
00266         alpha3->real = temp3.real * ga34_k1 + temp4.real * si34_k1; \
00267         alpha3->imag = temp3.imag * ga34_k1 + temp4.imag * si34_k1; \
00268 \
00269         alpha4->real = temp4.real * ga34_k1 - temp3.real * si34_k1; \
00270         alpha4->imag = temp4.imag * ga34_k1 - temp3.imag * si34_k1; \
00271 \
00272         temp1 = *alpha1; \
00273         temp2 = *alpha2; \
00274 \
00275         alpha1->real = temp1.real * ga12_k2 + temp2.real * si12_k2; \
00276         alpha1->imag = temp1.imag * ga12_k2 + temp2.imag * si12_k2; \
00277 \
00278         alpha2->real = temp2.real * ga12_k2 - temp1.real * si12_k2; \
00279         alpha2->imag = temp2.imag * ga12_k2 - temp1.imag * si12_k2; \
00280 \
00281         temp2 = *alpha2; \
00282         temp3 = *alpha3; \
00283 \
00284         alpha2->real = temp2.real * ga23_k2 + temp3.real * si23_k2; \
00285         alpha2->imag = temp2.imag * ga23_k2 + temp3.imag * si23_k2; \
00286 \
00287         alpha3->real = temp3.real * ga23_k2 - temp2.real * si23_k2; \
00288         alpha3->imag = temp3.imag * ga23_k2 - temp2.imag * si23_k2; \
00289 \
00290         alpha1 += inc_a1; \
00291         alpha2 += inc_a2; \
00292         alpha3 += inc_a3; \
00293         alpha4 += inc_a4; \
00294     } \
00295 }
00296