Botan
1.11.15
|
00001 /* 00002 * Lowest Level MPI Algorithms 00003 * (C) 1999-2010 Jack Lloyd 00004 * 2006 Luca Piccarreta 00005 * 00006 * Botan is released under the Simplified BSD License (see license.txt) 00007 */ 00008 00009 #ifndef BOTAN_MP_ASM_INTERNAL_H__ 00010 #define BOTAN_MP_ASM_INTERNAL_H__ 00011 00012 #include <botan/internal/mp_madd.h> 00013 00014 namespace Botan { 00015 00016 extern "C" { 00017 00018 /* 00019 * Helper Macros for x86-64 Assembly 00020 */ 00021 #ifndef ASM 00022 #define ASM(x) x "\n\t" 00023 #endif 00024 00025 #define ADDSUB2_OP(OPERATION, INDEX) \ 00026 ASM("movq 8*" #INDEX "(%[y]), %[carry]") \ 00027 ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \ 00028 00029 #define ADDSUB3_OP(OPERATION, INDEX) \ 00030 ASM("movq 8*" #INDEX "(%[x]), %[carry]") \ 00031 ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \ 00032 ASM("movq %[carry], 8*" #INDEX "(%[z])") \ 00033 00034 #define LINMUL_OP(WRITE_TO, INDEX) \ 00035 ASM("movq 8*" #INDEX "(%[x]),%%rax") \ 00036 ASM("mulq %[y]") \ 00037 ASM("addq %[carry],%%rax") \ 00038 ASM("adcq $0,%%rdx") \ 00039 ASM("movq %%rdx,%[carry]") \ 00040 ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])") 00041 00042 #define MULADD_OP(IGNORED, INDEX) \ 00043 ASM("movq 8*" #INDEX "(%[x]),%%rax") \ 00044 ASM("mulq %[y]") \ 00045 ASM("addq %[carry],%%rax") \ 00046 ASM("adcq $0,%%rdx") \ 00047 ASM("addq 8*" #INDEX "(%[z]),%%rax") \ 00048 ASM("adcq $0,%%rdx") \ 00049 ASM("movq %%rdx,%[carry]") \ 00050 ASM("movq %%rax, 8*" #INDEX " (%[z])") 00051 00052 #define DO_8_TIMES(MACRO, ARG) \ 00053 MACRO(ARG, 0) \ 00054 MACRO(ARG, 1) \ 00055 MACRO(ARG, 2) \ 00056 MACRO(ARG, 3) \ 00057 MACRO(ARG, 4) \ 00058 MACRO(ARG, 5) \ 00059 MACRO(ARG, 6) \ 00060 MACRO(ARG, 7) 00061 00062 #define ADD_OR_SUBTRACT(CORE_CODE) \ 00063 ASM("rorq %[carry]") \ 00064 CORE_CODE \ 00065 ASM("sbbq %[carry],%[carry]") \ 00066 ASM("negq %[carry]") 00067 00068 /* 00069 * Word Addition 00070 */ 00071 inline word word_add(word x, word y, word* carry) 00072 { 00073 asm( 00074 ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]")) 00075 : [x]"=r"(x), [carry]"=r"(*carry) 00076 : "0"(x), [y]"rm"(y), "1"(*carry) 00077 : "cc"); 00078 return x; 00079 } 00080 00081 /* 00082 * Eight Word Block Addition, Two Argument 00083 */ 00084 inline word word8_add2(word x[8], const word y[8], word carry) 00085 { 00086 asm( 00087 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq")) 00088 : [carry]"=r"(carry) 00089 : [x]"r"(x), [y]"r"(y), "0"(carry) 00090 : "cc", "memory"); 00091 return carry; 00092 } 00093 00094 /* 00095 * Eight Word Block Addition, Three Argument 00096 */ 00097 inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) 00098 { 00099 asm( 00100 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq")) 00101 : [carry]"=r"(carry) 00102 : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) 00103 : "cc", "memory"); 00104 return carry; 00105 } 00106 00107 /* 00108 * Word Subtraction 00109 */ 00110 inline word word_sub(word x, word y, word* carry) 00111 { 00112 asm( 00113 ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]")) 00114 : [x]"=r"(x), [carry]"=r"(*carry) 00115 : "0"(x), [y]"rm"(y), "1"(*carry) 00116 : "cc"); 00117 return x; 00118 } 00119 00120 /* 00121 * Eight Word Block Subtraction, Two Argument 00122 */ 00123 inline word word8_sub2(word x[8], const word y[8], word carry) 00124 { 00125 asm( 00126 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq")) 00127 : [carry]"=r"(carry) 00128 : [x]"r"(x), [y]"r"(y), "0"(carry) 00129 : "cc", "memory"); 00130 return carry; 00131 } 00132 00133 /* 00134 * Eight Word Block Subtraction, Two Argument 00135 */ 00136 inline word word8_sub2_rev(word x[8], const word y[8], word carry) 00137 { 00138 asm( 00139 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) 00140 : [carry]"=r"(carry) 00141 : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) 00142 : "cc", "memory"); 00143 return carry; 00144 } 00145 00146 /* 00147 * Eight Word Block Subtraction, Three Argument 00148 */ 00149 inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry) 00150 { 00151 asm( 00152 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) 00153 : [carry]"=r"(carry) 00154 : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) 00155 : "cc", "memory"); 00156 return carry; 00157 } 00158 00159 /* 00160 * Eight Word Block Linear Multiplication 00161 */ 00162 inline word word8_linmul2(word x[8], word y, word carry) 00163 { 00164 asm( 00165 DO_8_TIMES(LINMUL_OP, "x") 00166 : [carry]"=r"(carry) 00167 : [x]"r"(x), [y]"rm"(y), "0"(carry) 00168 : "cc", "%rax", "%rdx"); 00169 return carry; 00170 } 00171 00172 /* 00173 * Eight Word Block Linear Multiplication 00174 */ 00175 inline word word8_linmul3(word z[8], const word x[8], word y, word carry) 00176 { 00177 asm( 00178 DO_8_TIMES(LINMUL_OP, "z") 00179 : [carry]"=r"(carry) 00180 : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) 00181 : "cc", "%rax", "%rdx"); 00182 return carry; 00183 } 00184 00185 /* 00186 * Eight Word Block Multiply/Add 00187 */ 00188 inline word word8_madd3(word z[8], const word x[8], word y, word carry) 00189 { 00190 asm( 00191 DO_8_TIMES(MULADD_OP, "") 00192 : [carry]"=r"(carry) 00193 : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) 00194 : "cc", "%rax", "%rdx"); 00195 return carry; 00196 } 00197 00198 /* 00199 * Multiply-Add Accumulator 00200 */ 00201 inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) 00202 { 00203 asm( 00204 ASM("mulq %[y]") 00205 00206 ASM("addq %[x],%[w0]") 00207 ASM("adcq %[y],%[w1]") 00208 ASM("adcq $0,%[w2]") 00209 00210 : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) 00211 : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) 00212 : "cc"); 00213 } 00214 00215 /* 00216 * Multiply-Add Accumulator 00217 */ 00218 inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) 00219 { 00220 asm( 00221 ASM("mulq %[y]") 00222 00223 ASM("addq %[x],%[w0]") 00224 ASM("adcq %[y],%[w1]") 00225 ASM("adcq $0,%[w2]") 00226 00227 ASM("addq %[x],%[w0]") 00228 ASM("adcq %[y],%[w1]") 00229 ASM("adcq $0,%[w2]") 00230 00231 : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) 00232 : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) 00233 : "cc"); 00234 } 00235 00236 00237 #undef ASM 00238 #undef DO_8_TIMES 00239 #undef ADD_OR_SUBTRACT 00240 #undef ADDSUB2_OP 00241 #undef ADDSUB3_OP 00242 #undef LINMUL_OP 00243 #undef MULADD_OP 00244 00245 } 00246 00247 } 00248 #endif