libflame  revision_anchor
FLA_Apply_G_mx2_opt.h
Go to the documentation of this file.
00001 /*
00002 
00003     Copyright (C) 2014, The University of Texas at Austin
00004 
00005     This file is part of libflame and is available under the 3-Clause
00006     BSD license, which can be found in the LICENSE file at the top-level
00007     directory, or at http://opensource.org/licenses/BSD-3-Clause
00008 
00009 */
00010 
00011 #define MAC_Apply_G_mx2_ops( m_A, \
00012                              gamma12, \
00013                              sigma12, \
00014                              a1, inc_a1, \
00015                              a2, inc_a2 ) \
00016 { \
00017     float             ga     = *gamma12; \
00018     float             si     = *sigma12; \
00019     float*  restrict  alpha1 = a1; \
00020     float*  restrict  alpha2 = a2; \
00021     float             temp1; \
00022     float             temp2; \
00023     int               i; \
00024 \
00025     for ( i = 0; i < m_A; ++i ) \
00026     { \
00027         temp1 = *alpha1; \
00028         temp2 = *alpha2; \
00029 \
00030         *alpha1 =  ga * temp1 + si * temp2; \
00031         *alpha2 = -si * temp1 + ga * temp2; \
00032 \
00033         alpha1 += inc_a1; \
00034         alpha2 += inc_a2; \
00035     } \
00036 }
00037 
00038 #define MAC_Apply_G_mx2_opc( m_A, \
00039                              gamma12, \
00040                              sigma12, \
00041                              a1, inc_a1, \
00042                              a2, inc_a2 ) \
00043 { \
00044     float              ga12   = *gamma12; \
00045     float              si12   = *sigma12; \
00046     scomplex* restrict alpha1 = a1; \
00047     scomplex* restrict alpha2 = a2; \
00048     scomplex           temp1; \
00049     scomplex           temp2; \
00050     int                i; \
00051 \
00052     for ( i = 0; i < m_A; ++i ) \
00053     { \
00054         temp1 = *alpha1; \
00055         temp2 = *alpha2; \
00056 \
00057         alpha1->real =  ga12 * temp1.real + si12 * temp2.real; \
00058         alpha1->imag =  ga12 * temp1.imag + si12 * temp2.imag; \
00059 \
00060         alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
00061         alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
00062 \
00063         alpha1 += inc_a1; \
00064         alpha2 += inc_a2; \
00065     } \
00066 }
00067 
00068 #define MAC_Apply_G_mx2_opd( m_A, \
00069                              gamma12, \
00070                              sigma12, \
00071                              a1, inc_a1, \
00072                              a2, inc_a2 ) \
00073 { \
00074     double            ga     = *gamma12; \
00075     double            si     = *sigma12; \
00076     double* restrict  alpha1 = a1; \
00077     double* restrict  alpha2 = a2; \
00078     double            temp1; \
00079     double            temp2; \
00080     int               i; \
00081 \
00082     for ( i = 0; i < m_A; ++i ) \
00083     { \
00084         temp1 = *alpha1; \
00085         temp2 = *alpha2; \
00086 \
00087         *alpha1 =  ga * temp1 + si * temp2; \
00088         *alpha2 = -si * temp1 + ga * temp2; \
00089 \
00090         alpha1 += inc_a1; \
00091         alpha2 += inc_a2; \
00092     } \
00093 }
00094 
00095 #define MAC_Apply_G_mx2_opz( m_A, \
00096                              gamma12, \
00097                              sigma12, \
00098                              a1, inc_a1, \
00099                              a2, inc_a2 ) \
00100 {\
00101     double             ga12   = *gamma12; \
00102     double             si12   = *sigma12; \
00103     dcomplex* restrict alpha1 = a1; \
00104     dcomplex* restrict alpha2 = a2; \
00105     dcomplex           temp1; \
00106     dcomplex           temp2; \
00107     int                i; \
00108 \
00109     for ( i = 0; i < m_A; ++i ) \
00110     { \
00111         temp1 = *alpha1; \
00112         temp2 = *alpha2; \
00113 \
00114         alpha1->real =  ga12 * temp1.real + si12 * temp2.real; \
00115         alpha1->imag =  ga12 * temp1.imag + si12 * temp2.imag; \
00116 \
00117         alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
00118         alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
00119 \
00120         alpha1 += inc_a1; \
00121         alpha2 += inc_a2; \
00122     } \
00123 }
00124