libflame  revision_anchor
FLA_Apply_G_mx3_opt.h
Go to the documentation of this file.
00001 /*
00002 
00003     Copyright (C) 2014, The University of Texas at Austin
00004 
00005     This file is part of libflame and is available under the 3-Clause
00006     BSD license, which can be found in the LICENSE file at the top-level
00007     directory, or at http://opensource.org/licenses/BSD-3-Clause
00008 
00009 */
00010 
00011 #define MAC_Apply_G_mx3_ops( m_A, \
00012                              gamma12, \
00013                              sigma12, \
00014                              gamma23, \
00015                              sigma23, \
00016                              a1, inc_a1, \
00017                              a2, inc_a2, \
00018                              a3, inc_a3 ) \
00019 { \
00020     float              ga12   = *gamma12; \
00021     float              si12   = *sigma12; \
00022     float              ga23   = *gamma23; \
00023     float              si23   = *sigma23; \
00024     float*    restrict alpha1 = a1; \
00025     float*    restrict alpha2 = a2; \
00026     float*    restrict alpha3 = a3; \
00027     float              temp1; \
00028     float              temp2; \
00029     float              temp3; \
00030     int                i; \
00031 \
00032     for ( i = 0; i < m_A; ++i ) \
00033     { \
00034         temp1 = *alpha1; \
00035         temp2 = *alpha2; \
00036 \
00037         *alpha1 = temp1 * ga12 + temp2 * si12; \
00038         *alpha2 = temp2 * ga12 - temp1 * si12; \
00039 \
00040         temp2 = *alpha2; \
00041         temp3 = *alpha3; \
00042 \
00043         *alpha2 = temp2 * ga23 + temp3 * si23; \
00044         *alpha3 = temp3 * ga23 - temp2 * si23; \
00045 \
00046         alpha1 += inc_a1; \
00047         alpha2 += inc_a2; \
00048         alpha3 += inc_a3; \
00049     } \
00050 }
00051 
00052 #define MAC_Apply_G_mx3_opd( m_A, \
00053                              gamma12, \
00054                              sigma12, \
00055                              gamma23, \
00056                              sigma23, \
00057                              a1, inc_a1, \
00058                              a2, inc_a2, \
00059                              a3, inc_a3 ) \
00060 { \
00061     double             ga12   = *gamma12; \
00062     double             si12   = *sigma12; \
00063     double             ga23   = *gamma23; \
00064     double             si23   = *sigma23; \
00065     double*   restrict alpha1 = a1; \
00066     double*   restrict alpha2 = a2; \
00067     double*   restrict alpha3 = a3; \
00068     double             temp1; \
00069     double             temp2; \
00070     double             temp3; \
00071     int                i; \
00072 \
00073     for ( i = 0; i < m_A; ++i ) \
00074     { \
00075         temp1 = *alpha1; \
00076         temp2 = *alpha2; \
00077 \
00078         *alpha1 = temp1 * ga12 + temp2 * si12; \
00079         *alpha2 = temp2 * ga12 - temp1 * si12; \
00080 \
00081         temp2 = *alpha2; \
00082         temp3 = *alpha3; \
00083 \
00084         *alpha2 = temp2 * ga23 + temp3 * si23; \
00085         *alpha3 = temp3 * ga23 - temp2 * si23; \
00086 \
00087         alpha1 += inc_a1; \
00088         alpha2 += inc_a2; \
00089         alpha3 += inc_a3; \
00090     } \
00091 }
00092 
00093 #define MAC_Apply_G_mx3_opc( m_A, \
00094                              gamma12, \
00095                              sigma12, \
00096                              gamma23, \
00097                              sigma23, \
00098                              a1, inc_a1, \
00099                              a2, inc_a2, \
00100                              a3, inc_a3 ) \
00101 { \
00102     float              ga12   = *gamma12; \
00103     float              si12   = *sigma12; \
00104     float              ga23   = *gamma23; \
00105     float              si23   = *sigma23; \
00106     scomplex* restrict alpha1 = a1; \
00107     scomplex* restrict alpha2 = a2; \
00108     scomplex* restrict alpha3 = a3; \
00109     scomplex           temp1; \
00110     scomplex           temp2; \
00111     scomplex           temp3; \
00112     int                i; \
00113 \
00114     for ( i = 0; i < m_A; ++i ) \
00115     { \
00116         temp1 = *alpha1; \
00117         temp2 = *alpha2; \
00118 \
00119         alpha1->real =  ga12 * temp1.real + si12 * temp2.real; \
00120         alpha1->imag =  ga12 * temp1.imag + si12 * temp2.imag; \
00121 \
00122         alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
00123         alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
00124 \
00125         temp2 = *alpha2; \
00126         temp3 = *alpha3; \
00127 \
00128         alpha2->real =  ga23 * temp2.real + si23 * temp3.real; \
00129         alpha2->imag =  ga23 * temp2.imag + si23 * temp3.imag; \
00130 \
00131         alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \
00132         alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \
00133 \
00134         alpha1 += inc_a1; \
00135         alpha2 += inc_a2; \
00136         alpha3 += inc_a3; \
00137     } \
00138 }
00139 
00140 #define MAC_Apply_G_mx3_opz( m_A, \
00141                              gamma12, \
00142                              sigma12, \
00143                              gamma23, \
00144                              sigma23, \
00145                              a1, inc_a1, \
00146                              a2, inc_a2, \
00147                              a3, inc_a3 ) \
00148 { \
00149     double             ga12   = *gamma12; \
00150     double             si12   = *sigma12; \
00151     double             ga23   = *gamma23; \
00152     double             si23   = *sigma23; \
00153     dcomplex* restrict alpha1 = a1; \
00154     dcomplex* restrict alpha2 = a2; \
00155     dcomplex* restrict alpha3 = a3; \
00156     dcomplex           temp1; \
00157     dcomplex           temp2; \
00158     dcomplex           temp3; \
00159     int                i; \
00160 \
00161     for ( i = 0; i < m_A; ++i ) \
00162     { \
00163         temp1 = *alpha1; \
00164         temp2 = *alpha2; \
00165 \
00166         alpha1->real =  ga12 * temp1.real + si12 * temp2.real; \
00167         alpha1->imag =  ga12 * temp1.imag + si12 * temp2.imag; \
00168 \
00169         alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
00170         alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
00171 \
00172         temp2 = *alpha2; \
00173         temp3 = *alpha3; \
00174 \
00175         alpha2->real =  ga23 * temp2.real + si23 * temp3.real; \
00176         alpha2->imag =  ga23 * temp2.imag + si23 * temp3.imag; \
00177 \
00178         alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \
00179         alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \
00180 \
00181         alpha1 += inc_a1; \
00182         alpha2 += inc_a2; \
00183         alpha3 += inc_a3; \
00184     } \
00185 }
00186