libflame  revision_anchor
FLA_Apply_G_mx3b_opt.h
Go to the documentation of this file.
00001 /*
00002 
00003     Copyright (C) 2014, The University of Texas at Austin
00004 
00005     This file is part of libflame and is available under the 3-Clause
00006     BSD license, which can be found in the LICENSE file at the top-level
00007     directory, or at http://opensource.org/licenses/BSD-3-Clause
00008 
00009 */
00010 
00011 #define MAC_Apply_G_mx3b_ops( m_A, \
00012                               gamma12, \
00013                               sigma12, \
00014                               gamma23, \
00015                               sigma23, \
00016                               a1, inc_a1, \
00017                               a2, inc_a2, \
00018                               a3, inc_a3 ) \
00019 { \
00020     float              ga12   = *gamma12; \
00021     float              si12   = *sigma12; \
00022     float              ga23   = *gamma23; \
00023     float              si23   = *sigma23; \
00024     float*    restrict alpha1 = a1; \
00025     float*    restrict alpha2 = a2; \
00026     float*    restrict alpha3 = a3; \
00027     float              temp1; \
00028     float              temp2; \
00029     float              temp3; \
00030     int                i; \
00031 \
00032     for ( i = 0; i < m_A; ++i ) \
00033     { \
00034         temp2 = *alpha2; \
00035         temp3 = *alpha3; \
00036 \
00037         *alpha2 = temp2 * ga23 + temp3 * si23; \
00038         *alpha3 = temp3 * ga23 - temp2 * si23; \
00039 \
00040         temp1 = *alpha1; \
00041         temp2 = *alpha2; \
00042 \
00043         *alpha1 = temp1 * ga12 + temp2 * si12; \
00044         *alpha2 = temp2 * ga12 - temp1 * si12; \
00045 \
00046         alpha1 += inc_a1; \
00047         alpha2 += inc_a2; \
00048         alpha3 += inc_a3; \
00049     } \
00050 }
00051 
00052 #define MAC_Apply_G_mx3b_opc( m_A, \
00053                               gamma12, \
00054                               sigma12, \
00055                               gamma23, \
00056                               sigma23, \
00057                               a1, inc_a1, \
00058                               a2, inc_a2, \
00059                               a3, inc_a3 ) \
00060 { \
00061     float              ga12   = *gamma12; \
00062     float              si12   = *sigma12; \
00063     float              ga23   = *gamma23; \
00064     float              si23   = *sigma23; \
00065     scomplex* restrict alpha1 = a1; \
00066     scomplex* restrict alpha2 = a2; \
00067     scomplex* restrict alpha3 = a3; \
00068     scomplex           temp1; \
00069     scomplex           temp2; \
00070     scomplex           temp3; \
00071     int                i; \
00072 \
00073     for ( i = 0; i < m_A; ++i ) \
00074     { \
00075         temp2 = *alpha2; \
00076         temp3 = *alpha3; \
00077 \
00078         alpha2->real =  ga23 * temp2.real + si23 * temp3.real; \
00079         alpha2->imag =  ga23 * temp2.imag + si23 * temp3.imag; \
00080 \
00081         alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \
00082         alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \
00083 \
00084         temp1 = *alpha1; \
00085         temp2 = *alpha2; \
00086 \
00087         alpha1->real =  ga12 * temp1.real + si12 * temp2.real; \
00088         alpha1->imag =  ga12 * temp1.imag + si12 * temp2.imag; \
00089 \
00090         alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
00091         alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
00092 \
00093         alpha1 += inc_a1; \
00094         alpha2 += inc_a2; \
00095         alpha3 += inc_a3; \
00096     } \
00097 }
00098 
00099 #define MAC_Apply_G_mx3b_opd( m_A, \
00100                               gamma12, \
00101                               sigma12, \
00102                               gamma23, \
00103                               sigma23, \
00104                               a1, inc_a1, \
00105                               a2, inc_a2, \
00106                               a3, inc_a3 ) \
00107 { \
00108     double             ga12   = *gamma12; \
00109     double             si12   = *sigma12; \
00110     double             ga23   = *gamma23; \
00111     double             si23   = *sigma23; \
00112     double*   restrict alpha1 = a1; \
00113     double*   restrict alpha2 = a2; \
00114     double*   restrict alpha3 = a3; \
00115     double             temp1; \
00116     double             temp2; \
00117     double             temp3; \
00118     int                i; \
00119 \
00120     for ( i = 0; i < m_A; ++i ) \
00121     { \
00122         temp2 = *alpha2; \
00123         temp3 = *alpha3; \
00124 \
00125         *alpha2 = temp2 * ga23 + temp3 * si23; \
00126         *alpha3 = temp3 * ga23 - temp2 * si23; \
00127 \
00128         temp1 = *alpha1; \
00129         temp2 = *alpha2; \
00130 \
00131         *alpha1 = temp1 * ga12 + temp2 * si12; \
00132         *alpha2 = temp2 * ga12 - temp1 * si12; \
00133 \
00134         alpha1 += inc_a1; \
00135         alpha2 += inc_a2; \
00136         alpha3 += inc_a3; \
00137     } \
00138 }
00139 
00140 #define MAC_Apply_G_mx3b_opz( m_A, \
00141                               gamma12, \
00142                               sigma12, \
00143                               gamma23, \
00144                               sigma23, \
00145                               a1, inc_a1, \
00146                               a2, inc_a2, \
00147                               a3, inc_a3 ) \
00148 { \
00149     double             ga12   = *gamma12; \
00150     double             si12   = *sigma12; \
00151     double             ga23   = *gamma23; \
00152     double             si23   = *sigma23; \
00153     dcomplex* restrict alpha1 = a1; \
00154     dcomplex* restrict alpha2 = a2; \
00155     dcomplex* restrict alpha3 = a3; \
00156     dcomplex           temp1; \
00157     dcomplex           temp2; \
00158     dcomplex           temp3; \
00159     int                i; \
00160 \
00161     for ( i = 0; i < m_A; ++i ) \
00162     { \
00163         temp2 = *alpha2; \
00164         temp3 = *alpha3; \
00165 \
00166         alpha2->real =  ga23 * temp2.real + si23 * temp3.real; \
00167         alpha2->imag =  ga23 * temp2.imag + si23 * temp3.imag; \
00168 \
00169         alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \
00170         alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \
00171 \
00172         temp1 = *alpha1; \
00173         temp2 = *alpha2; \
00174 \
00175         alpha1->real =  ga12 * temp1.real + si12 * temp2.real; \
00176         alpha1->imag =  ga12 * temp1.imag + si12 * temp2.imag; \
00177 \
00178         alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
00179         alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
00180 \
00181         alpha1 += inc_a1; \
00182         alpha2 += inc_a2; \
00183         alpha3 += inc_a3; \
00184     } \
00185 }
00186