libflame
revision_anchor
|
00001 /* 00002 00003 Copyright (C) 2014, The University of Texas at Austin 00004 00005 This file is part of libflame and is available under the 3-Clause 00006 BSD license, which can be found in the LICENSE file at the top-level 00007 directory, or at http://opensource.org/licenses/BSD-3-Clause 00008 00009 */ 00010 00011 #define MAC_Apply_G_mx3b_ops( m_A, \ 00012 gamma12, \ 00013 sigma12, \ 00014 gamma23, \ 00015 sigma23, \ 00016 a1, inc_a1, \ 00017 a2, inc_a2, \ 00018 a3, inc_a3 ) \ 00019 { \ 00020 float ga12 = *gamma12; \ 00021 float si12 = *sigma12; \ 00022 float ga23 = *gamma23; \ 00023 float si23 = *sigma23; \ 00024 float* restrict alpha1 = a1; \ 00025 float* restrict alpha2 = a2; \ 00026 float* restrict alpha3 = a3; \ 00027 float temp1; \ 00028 float temp2; \ 00029 float temp3; \ 00030 int i; \ 00031 \ 00032 for ( i = 0; i < m_A; ++i ) \ 00033 { \ 00034 temp2 = *alpha2; \ 00035 temp3 = *alpha3; \ 00036 \ 00037 *alpha2 = temp2 * ga23 + temp3 * si23; \ 00038 *alpha3 = temp3 * ga23 - temp2 * si23; \ 00039 \ 00040 temp1 = *alpha1; \ 00041 temp2 = *alpha2; \ 00042 \ 00043 *alpha1 = temp1 * ga12 + temp2 * si12; \ 00044 *alpha2 = temp2 * ga12 - temp1 * si12; \ 00045 \ 00046 alpha1 += inc_a1; \ 00047 alpha2 += inc_a2; \ 00048 alpha3 += inc_a3; \ 00049 } \ 00050 } 00051 00052 #define MAC_Apply_G_mx3b_opc( m_A, \ 00053 gamma12, \ 00054 sigma12, \ 00055 gamma23, \ 00056 sigma23, \ 00057 a1, inc_a1, \ 00058 a2, inc_a2, \ 00059 a3, inc_a3 ) \ 00060 { \ 00061 float ga12 = *gamma12; \ 00062 float si12 = *sigma12; \ 00063 float ga23 = *gamma23; \ 00064 float si23 = *sigma23; \ 00065 scomplex* restrict alpha1 = a1; \ 00066 scomplex* restrict alpha2 = a2; \ 00067 scomplex* restrict alpha3 = a3; \ 00068 scomplex temp1; \ 00069 scomplex temp2; \ 00070 scomplex temp3; \ 00071 int i; \ 00072 \ 00073 for ( i = 0; i < m_A; ++i ) \ 00074 { \ 00075 temp2 = *alpha2; \ 00076 temp3 = *alpha3; \ 00077 \ 00078 alpha2->real = ga23 * temp2.real + si23 * temp3.real; \ 00079 alpha2->imag = ga23 * temp2.imag + si23 * temp3.imag; \ 00080 \ 00081 alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \ 00082 alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \ 00083 \ 00084 temp1 = *alpha1; \ 00085 temp2 = *alpha2; \ 00086 \ 00087 alpha1->real = ga12 * temp1.real + si12 * temp2.real; \ 00088 alpha1->imag = ga12 * temp1.imag + si12 * temp2.imag; \ 00089 \ 00090 alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \ 00091 alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \ 00092 \ 00093 alpha1 += inc_a1; \ 00094 alpha2 += inc_a2; \ 00095 alpha3 += inc_a3; \ 00096 } \ 00097 } 00098 00099 #define MAC_Apply_G_mx3b_opd( m_A, \ 00100 gamma12, \ 00101 sigma12, \ 00102 gamma23, \ 00103 sigma23, \ 00104 a1, inc_a1, \ 00105 a2, inc_a2, \ 00106 a3, inc_a3 ) \ 00107 { \ 00108 double ga12 = *gamma12; \ 00109 double si12 = *sigma12; \ 00110 double ga23 = *gamma23; \ 00111 double si23 = *sigma23; \ 00112 double* restrict alpha1 = a1; \ 00113 double* restrict alpha2 = a2; \ 00114 double* restrict alpha3 = a3; \ 00115 double temp1; \ 00116 double temp2; \ 00117 double temp3; \ 00118 int i; \ 00119 \ 00120 for ( i = 0; i < m_A; ++i ) \ 00121 { \ 00122 temp2 = *alpha2; \ 00123 temp3 = *alpha3; \ 00124 \ 00125 *alpha2 = temp2 * ga23 + temp3 * si23; \ 00126 *alpha3 = temp3 * ga23 - temp2 * si23; \ 00127 \ 00128 temp1 = *alpha1; \ 00129 temp2 = *alpha2; \ 00130 \ 00131 *alpha1 = temp1 * ga12 + temp2 * si12; \ 00132 *alpha2 = temp2 * ga12 - temp1 * si12; \ 00133 \ 00134 alpha1 += inc_a1; \ 00135 alpha2 += inc_a2; \ 00136 alpha3 += inc_a3; \ 00137 } \ 00138 } 00139 00140 #define MAC_Apply_G_mx3b_opz( m_A, \ 00141 gamma12, \ 00142 sigma12, \ 00143 gamma23, \ 00144 sigma23, \ 00145 a1, inc_a1, \ 00146 a2, inc_a2, \ 00147 a3, inc_a3 ) \ 00148 { \ 00149 double ga12 = *gamma12; \ 00150 double si12 = *sigma12; \ 00151 double ga23 = *gamma23; \ 00152 double si23 = *sigma23; \ 00153 dcomplex* restrict alpha1 = a1; \ 00154 dcomplex* restrict alpha2 = a2; \ 00155 dcomplex* restrict alpha3 = a3; \ 00156 dcomplex temp1; \ 00157 dcomplex temp2; \ 00158 dcomplex temp3; \ 00159 int i; \ 00160 \ 00161 for ( i = 0; i < m_A; ++i ) \ 00162 { \ 00163 temp2 = *alpha2; \ 00164 temp3 = *alpha3; \ 00165 \ 00166 alpha2->real = ga23 * temp2.real + si23 * temp3.real; \ 00167 alpha2->imag = ga23 * temp2.imag + si23 * temp3.imag; \ 00168 \ 00169 alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \ 00170 alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \ 00171 \ 00172 temp1 = *alpha1; \ 00173 temp2 = *alpha2; \ 00174 \ 00175 alpha1->real = ga12 * temp1.real + si12 * temp2.real; \ 00176 alpha1->imag = ga12 * temp1.imag + si12 * temp2.imag; \ 00177 \ 00178 alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \ 00179 alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \ 00180 \ 00181 alpha1 += inc_a1; \ 00182 alpha2 += inc_a2; \ 00183 alpha3 += inc_a3; \ 00184 } \ 00185 } 00186