libflame
revision_anchor
|
00001 /* 00002 00003 Copyright (C) 2014, The University of Texas at Austin 00004 00005 This file is part of libflame and is available under the 3-Clause 00006 BSD license, which can be found in the LICENSE file at the top-level 00007 directory, or at http://opensource.org/licenses/BSD-3-Clause 00008 00009 */ 00010 00011 #define MAC_Apply_G_mx3_ops( m_A, \ 00012 gamma12, \ 00013 sigma12, \ 00014 gamma23, \ 00015 sigma23, \ 00016 a1, inc_a1, \ 00017 a2, inc_a2, \ 00018 a3, inc_a3 ) \ 00019 { \ 00020 float ga12 = *gamma12; \ 00021 float si12 = *sigma12; \ 00022 float ga23 = *gamma23; \ 00023 float si23 = *sigma23; \ 00024 float* restrict alpha1 = a1; \ 00025 float* restrict alpha2 = a2; \ 00026 float* restrict alpha3 = a3; \ 00027 float temp1; \ 00028 float temp2; \ 00029 float temp3; \ 00030 int i; \ 00031 \ 00032 for ( i = 0; i < m_A; ++i ) \ 00033 { \ 00034 temp1 = *alpha1; \ 00035 temp2 = *alpha2; \ 00036 \ 00037 *alpha1 = temp1 * ga12 + temp2 * si12; \ 00038 *alpha2 = temp2 * ga12 - temp1 * si12; \ 00039 \ 00040 temp2 = *alpha2; \ 00041 temp3 = *alpha3; \ 00042 \ 00043 *alpha2 = temp2 * ga23 + temp3 * si23; \ 00044 *alpha3 = temp3 * ga23 - temp2 * si23; \ 00045 \ 00046 alpha1 += inc_a1; \ 00047 alpha2 += inc_a2; \ 00048 alpha3 += inc_a3; \ 00049 } \ 00050 } 00051 00052 #define MAC_Apply_G_mx3_opd( m_A, \ 00053 gamma12, \ 00054 sigma12, \ 00055 gamma23, \ 00056 sigma23, \ 00057 a1, inc_a1, \ 00058 a2, inc_a2, \ 00059 a3, inc_a3 ) \ 00060 { \ 00061 double ga12 = *gamma12; \ 00062 double si12 = *sigma12; \ 00063 double ga23 = *gamma23; \ 00064 double si23 = *sigma23; \ 00065 double* restrict alpha1 = a1; \ 00066 double* restrict alpha2 = a2; \ 00067 double* restrict alpha3 = a3; \ 00068 double temp1; \ 00069 double temp2; \ 00070 double temp3; \ 00071 int i; \ 00072 \ 00073 for ( i = 0; i < m_A; ++i ) \ 00074 { \ 00075 temp1 = *alpha1; \ 00076 temp2 = *alpha2; \ 00077 \ 00078 *alpha1 = temp1 * ga12 + temp2 * si12; \ 00079 *alpha2 = temp2 * ga12 - temp1 * si12; \ 00080 \ 00081 temp2 = *alpha2; \ 00082 temp3 = *alpha3; \ 00083 \ 00084 *alpha2 = temp2 * ga23 + temp3 * si23; \ 00085 *alpha3 = temp3 * ga23 - temp2 * si23; \ 00086 \ 00087 alpha1 += inc_a1; \ 00088 alpha2 += inc_a2; \ 00089 alpha3 += inc_a3; \ 00090 } \ 00091 } 00092 00093 #define MAC_Apply_G_mx3_opc( m_A, \ 00094 gamma12, \ 00095 sigma12, \ 00096 gamma23, \ 00097 sigma23, \ 00098 a1, inc_a1, \ 00099 a2, inc_a2, \ 00100 a3, inc_a3 ) \ 00101 { \ 00102 float ga12 = *gamma12; \ 00103 float si12 = *sigma12; \ 00104 float ga23 = *gamma23; \ 00105 float si23 = *sigma23; \ 00106 scomplex* restrict alpha1 = a1; \ 00107 scomplex* restrict alpha2 = a2; \ 00108 scomplex* restrict alpha3 = a3; \ 00109 scomplex temp1; \ 00110 scomplex temp2; \ 00111 scomplex temp3; \ 00112 int i; \ 00113 \ 00114 for ( i = 0; i < m_A; ++i ) \ 00115 { \ 00116 temp1 = *alpha1; \ 00117 temp2 = *alpha2; \ 00118 \ 00119 alpha1->real = ga12 * temp1.real + si12 * temp2.real; \ 00120 alpha1->imag = ga12 * temp1.imag + si12 * temp2.imag; \ 00121 \ 00122 alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \ 00123 alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \ 00124 \ 00125 temp2 = *alpha2; \ 00126 temp3 = *alpha3; \ 00127 \ 00128 alpha2->real = ga23 * temp2.real + si23 * temp3.real; \ 00129 alpha2->imag = ga23 * temp2.imag + si23 * temp3.imag; \ 00130 \ 00131 alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \ 00132 alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \ 00133 \ 00134 alpha1 += inc_a1; \ 00135 alpha2 += inc_a2; \ 00136 alpha3 += inc_a3; \ 00137 } \ 00138 } 00139 00140 #define MAC_Apply_G_mx3_opz( m_A, \ 00141 gamma12, \ 00142 sigma12, \ 00143 gamma23, \ 00144 sigma23, \ 00145 a1, inc_a1, \ 00146 a2, inc_a2, \ 00147 a3, inc_a3 ) \ 00148 { \ 00149 double ga12 = *gamma12; \ 00150 double si12 = *sigma12; \ 00151 double ga23 = *gamma23; \ 00152 double si23 = *sigma23; \ 00153 dcomplex* restrict alpha1 = a1; \ 00154 dcomplex* restrict alpha2 = a2; \ 00155 dcomplex* restrict alpha3 = a3; \ 00156 dcomplex temp1; \ 00157 dcomplex temp2; \ 00158 dcomplex temp3; \ 00159 int i; \ 00160 \ 00161 for ( i = 0; i < m_A; ++i ) \ 00162 { \ 00163 temp1 = *alpha1; \ 00164 temp2 = *alpha2; \ 00165 \ 00166 alpha1->real = ga12 * temp1.real + si12 * temp2.real; \ 00167 alpha1->imag = ga12 * temp1.imag + si12 * temp2.imag; \ 00168 \ 00169 alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \ 00170 alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \ 00171 \ 00172 temp2 = *alpha2; \ 00173 temp3 = *alpha3; \ 00174 \ 00175 alpha2->real = ga23 * temp2.real + si23 * temp3.real; \ 00176 alpha2->imag = ga23 * temp2.imag + si23 * temp3.imag; \ 00177 \ 00178 alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \ 00179 alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \ 00180 \ 00181 alpha1 += inc_a1; \ 00182 alpha2 += inc_a2; \ 00183 alpha3 += inc_a3; \ 00184 } \ 00185 } 00186