Botan
1.11.15
|
00001 /* 00002 * curve25519-donna-c64.c from github.com/agl/curve25519-donna 00003 * revision 80ad9b9930c9baef5829dd2a235b6b7646d32a8e 00004 */ 00005 00006 /* Copyright 2008, Google Inc. 00007 * All rights reserved. 00008 * 00009 * Code released into the public domain. 00010 * 00011 * curve25519-donna: Curve25519 elliptic curve, public key function 00012 * 00013 * http://code.google.com/p/curve25519-donna/ 00014 * 00015 * Adam Langley <agl@imperialviolet.org> 00016 * 00017 * Derived from public domain C code by Daniel J. Bernstein <djb@cr.yp.to> 00018 * 00019 * More information about curve25519 can be found here 00020 * http://cr.yp.to/ecdh.html 00021 * 00022 * djb's sample implementation of curve25519 is written in a special assembly 00023 * language called qhasm and uses the floating point registers. 00024 * 00025 * This is, almost, a clean room reimplementation from the curve25519 paper. It 00026 * uses many of the tricks described therein. Only the crecip function is taken 00027 * from the sample implementation. 00028 */ 00029 00030 #include <botan/curve25519.h> 00031 #include <botan/mul128.h> 00032 #include <botan/internal/donna128.h> 00033 #include <botan/loadstor.h> 00034 00035 namespace Botan { 00036 00037 typedef byte u8; 00038 typedef u64bit limb; 00039 typedef limb felem[5]; 00040 00041 #if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128) 00042 typedef donna128 uint128_t; 00043 #endif 00044 00045 /* Sum two numbers: output += in */ 00046 static inline void 00047 fsum(limb *output, const limb *in) { 00048 output[0] += in[0]; 00049 output[1] += in[1]; 00050 output[2] += in[2]; 00051 output[3] += in[3]; 00052 output[4] += in[4]; 00053 } 00054 00055 /* Find the difference of two numbers: output = in - output 00056 * (note the order of the arguments!) 00057 * 00058 * Assumes that out[i] < 2**52 00059 * On return, out[i] < 2**55 00060 */ 00061 static inline void 00062 fdifference_backwards(felem out, const felem in) { 00063 /* 152 is 19 << 3 */ 00064 static const limb two54m152 = (static_cast<limb>(1) << 54) - 152; 00065 static const limb two54m8 = (static_cast<limb>(1) << 54) - 8; 00066 00067 out[0] = in[0] + two54m152 - out[0]; 00068 out[1] = in[1] + two54m8 - out[1]; 00069 out[2] = in[2] + two54m8 - out[2]; 00070 out[3] = in[3] + two54m8 - out[3]; 00071 out[4] = in[4] + two54m8 - out[4]; 00072 } 00073 00074 /* Multiply a number by a scalar: output = in * scalar */ 00075 static inline void 00076 fscalar_product(felem output, const felem in, const limb scalar) { 00077 uint128_t a = uint128_t(in[0]) * scalar; 00078 output[0] = a & 0x7ffffffffffff; 00079 00080 a = uint128_t(in[1]) * scalar + carry_shift(a, 51); 00081 output[1] = a & 0x7ffffffffffff; 00082 00083 a = uint128_t(in[2]) * scalar + carry_shift(a, 51); 00084 output[2] = a & 0x7ffffffffffff; 00085 00086 a = uint128_t(in[3]) * scalar + carry_shift(a, 51); 00087 output[3] = a & 0x7ffffffffffff; 00088 00089 a = uint128_t(in[4]) * scalar + carry_shift(a, 51); 00090 output[4] = a & 0x7ffffffffffff; 00091 00092 output[0] += carry_shift(a, 51) * 19; 00093 } 00094 00095 /* Multiply two numbers: output = in2 * in 00096 * 00097 * output must be distinct to both inputs. The inputs are reduced coefficient 00098 * form, the output is not. 00099 * 00100 * Assumes that in[i] < 2**55 and likewise for in2. 00101 * On return, output[i] < 2**52 00102 */ 00103 static inline void 00104 fmul(felem output, const felem in2, const felem in) { 00105 uint128_t t[5]; 00106 limb r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c; 00107 00108 r0 = in[0]; 00109 r1 = in[1]; 00110 r2 = in[2]; 00111 r3 = in[3]; 00112 r4 = in[4]; 00113 00114 s0 = in2[0]; 00115 s1 = in2[1]; 00116 s2 = in2[2]; 00117 s3 = in2[3]; 00118 s4 = in2[4]; 00119 00120 t[0] = uint128_t(r0) * s0; 00121 t[1] = uint128_t(r0) * s1 + uint128_t(r1) * s0; 00122 t[2] = uint128_t(r0) * s2 + uint128_t(r2) * s0 + uint128_t(r1) * s1; 00123 t[3] = uint128_t(r0) * s3 + uint128_t(r3) * s0 + uint128_t(r1) * s2 + uint128_t(r2) * s1; 00124 t[4] = uint128_t(r0) * s4 + uint128_t(r4) * s0 + uint128_t(r3) * s1 + uint128_t(r1) * s3 + uint128_t(r2) * s2; 00125 00126 r4 *= 19; 00127 r1 *= 19; 00128 r2 *= 19; 00129 r3 *= 19; 00130 00131 t[0] += uint128_t(r4) * s1 + uint128_t(r1) * s4 + uint128_t(r2) * s3 + uint128_t(r3) * s2; 00132 t[1] += uint128_t(r4) * s2 + uint128_t(r2) * s4 + uint128_t(r3) * s3; 00133 t[2] += uint128_t(r4) * s3 + uint128_t(r3) * s4; 00134 t[3] += uint128_t(r4) * s4; 00135 00136 r0 = t[0] & 0x7ffffffffffff; c = carry_shift(t[0], 51); 00137 t[1] += c; r1 = t[1] & 0x7ffffffffffff; c = carry_shift(t[1], 51); 00138 t[2] += c; r2 = t[2] & 0x7ffffffffffff; c = carry_shift(t[2], 51); 00139 t[3] += c; r3 = t[3] & 0x7ffffffffffff; c = carry_shift(t[3], 51); 00140 t[4] += c; r4 = t[4] & 0x7ffffffffffff; c = carry_shift(t[4], 51); 00141 r0 += c * 19; c = carry_shift(r0, 51); r0 = r0 & 0x7ffffffffffff; 00142 r1 += c; c = carry_shift(r1, 51); r1 = r1 & 0x7ffffffffffff; 00143 r2 += c; 00144 00145 output[0] = r0; 00146 output[1] = r1; 00147 output[2] = r2; 00148 output[3] = r3; 00149 output[4] = r4; 00150 } 00151 00152 static inline void fsquare_times(felem output, const felem in, limb count) { 00153 uint128_t t[5]; 00154 limb r0,r1,r2,r3,r4,c; 00155 limb d0,d1,d2,d4,d419; 00156 00157 r0 = in[0]; 00158 r1 = in[1]; 00159 r2 = in[2]; 00160 r3 = in[3]; 00161 r4 = in[4]; 00162 00163 do { 00164 d0 = r0 * 2; 00165 d1 = r1 * 2; 00166 d2 = r2 * 2 * 19; 00167 d419 = r4 * 19; 00168 d4 = d419 * 2; 00169 00170 t[0] = uint128_t(r0) * r0 + uint128_t(d4) * r1 + uint128_t(d2) * (r3 ); 00171 t[1] = uint128_t(d0) * r1 + uint128_t(d4) * r2 + uint128_t(r3) * (r3 * 19); 00172 t[2] = uint128_t(d0) * r2 + uint128_t(r1) * r1 + uint128_t(d4) * (r3 ); 00173 t[3] = uint128_t(d0) * r3 + uint128_t(d1) * r2 + uint128_t(r4) * (d419 ); 00174 t[4] = uint128_t(d0) * r4 + uint128_t(d1) * r3 + uint128_t(r2) * (r2 ); 00175 00176 r0 = t[0] & 0x7ffffffffffff; c = carry_shift(t[0], 51); 00177 t[1] += c; r1 = t[1] & 0x7ffffffffffff; c = carry_shift(t[1], 51); 00178 t[2] += c; r2 = t[2] & 0x7ffffffffffff; c = carry_shift(t[2], 51); 00179 t[3] += c; r3 = t[3] & 0x7ffffffffffff; c = carry_shift(t[3], 51); 00180 t[4] += c; r4 = t[4] & 0x7ffffffffffff; c = carry_shift(t[4], 51); 00181 r0 += c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffff; 00182 r1 += c; c = r1 >> 51; r1 = r1 & 0x7ffffffffffff; 00183 r2 += c; 00184 } while(--count); 00185 00186 output[0] = r0; 00187 output[1] = r1; 00188 output[2] = r2; 00189 output[3] = r3; 00190 output[4] = r4; 00191 } 00192 00193 /* Load a little-endian 64-bit number */ 00194 static limb 00195 load_limb(const u8 *in) { 00196 return load_le<u64bit>(in, 0); 00197 } 00198 00199 static void 00200 store_limb(u8 *out, limb in) { 00201 store_le(in, out); 00202 } 00203 00204 /* Take a little-endian, 32-byte number and expand it into polynomial form */ 00205 static void 00206 fexpand(limb *output, const u8 *in) { 00207 output[0] = load_limb(in) & 0x7ffffffffffff; 00208 output[1] = (load_limb(in+6) >> 3) & 0x7ffffffffffff; 00209 output[2] = (load_limb(in+12) >> 6) & 0x7ffffffffffff; 00210 output[3] = (load_limb(in+19) >> 1) & 0x7ffffffffffff; 00211 output[4] = (load_limb(in+24) >> 12) & 0x7ffffffffffff; 00212 } 00213 00214 /* Take a fully reduced polynomial form number and contract it into a 00215 * little-endian, 32-byte array 00216 */ 00217 static void 00218 fcontract(u8 *output, const felem input) { 00219 uint128_t t[5]; 00220 00221 t[0] = input[0]; 00222 t[1] = input[1]; 00223 t[2] = input[2]; 00224 t[3] = input[3]; 00225 t[4] = input[4]; 00226 00227 t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff; 00228 t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff; 00229 t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff; 00230 t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff; 00231 t[0] += (t[4] >> 51) * 19; t[4] &= 0x7ffffffffffff; 00232 00233 t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff; 00234 t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff; 00235 t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff; 00236 t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff; 00237 t[0] += (t[4] >> 51) * 19; t[4] &= 0x7ffffffffffff; 00238 00239 /* now t is between 0 and 2^255-1, properly carried. */ 00240 /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */ 00241 00242 t[0] += 19; 00243 00244 t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff; 00245 t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff; 00246 t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff; 00247 t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff; 00248 t[0] += (t[4] >> 51) * 19; t[4] &= 0x7ffffffffffff; 00249 00250 /* now between 19 and 2^255-1 in both cases, and offset by 19. */ 00251 00252 t[0] += 0x8000000000000 - 19; 00253 t[1] += 0x8000000000000 - 1; 00254 t[2] += 0x8000000000000 - 1; 00255 t[3] += 0x8000000000000 - 1; 00256 t[4] += 0x8000000000000 - 1; 00257 00258 /* now between 2^255 and 2^256-20, and offset by 2^255. */ 00259 00260 t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff; 00261 t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff; 00262 t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff; 00263 t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff; 00264 t[4] &= 0x7ffffffffffff; 00265 00266 store_limb(output, combine_lower(t[0], 0, t[1], 51)); 00267 store_limb(output+8, combine_lower(t[1], 13, t[2], 38)); 00268 store_limb(output+16, combine_lower(t[2], 26, t[3], 25)); 00269 store_limb(output+24, combine_lower(t[3], 39, t[4], 12)); 00270 } 00271 00272 /* Input: Q, Q', Q-Q' 00273 * Output: 2Q, Q+Q' 00274 * 00275 * x2 z3: long form 00276 * x3 z3: long form 00277 * x z: short form, destroyed 00278 * xprime zprime: short form, destroyed 00279 * qmqp: short form, preserved 00280 */ 00281 static void 00282 fmonty(limb *x2, limb *z2, /* output 2Q */ 00283 limb *x3, limb *z3, /* output Q + Q' */ 00284 limb *x, limb *z, /* input Q */ 00285 limb *xprime, limb *zprime, /* input Q' */ 00286 const limb *qmqp /* input Q - Q' */) { 00287 limb origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5], 00288 zzprime[5], zzzprime[5]; 00289 00290 copy_mem(origx, x, 5); 00291 fsum(x, z); 00292 fdifference_backwards(z, origx); // does x - z 00293 00294 copy_mem(origxprime, xprime, 5); 00295 fsum(xprime, zprime); 00296 fdifference_backwards(zprime, origxprime); 00297 fmul(xxprime, xprime, z); 00298 fmul(zzprime, x, zprime); 00299 copy_mem(origxprime, xxprime, 5); 00300 fsum(xxprime, zzprime); 00301 fdifference_backwards(zzprime, origxprime); 00302 fsquare_times(x3, xxprime, 1); 00303 fsquare_times(zzzprime, zzprime, 1); 00304 fmul(z3, zzzprime, qmqp); 00305 00306 fsquare_times(xx, x, 1); 00307 fsquare_times(zz, z, 1); 00308 fmul(x2, xx, zz); 00309 fdifference_backwards(zz, xx); // does zz = xx - zz 00310 fscalar_product(zzz, zz, 121665); 00311 fsum(zzz, xx); 00312 fmul(z2, zz, zzz); 00313 } 00314 00315 // ----------------------------------------------------------------------------- 00316 // Maybe swap the contents of two limb arrays (@a and @b), each @len elements 00317 // long. Perform the swap iff @swap is non-zero. 00318 // 00319 // This function performs the swap without leaking any side-channel 00320 // information. 00321 // ----------------------------------------------------------------------------- 00322 static void 00323 swap_conditional(limb a[5], limb b[5], limb iswap) { 00324 unsigned i; 00325 const limb swap = -iswap; 00326 00327 for (i = 0; i < 5; ++i) { 00328 const limb x = swap & (a[i] ^ b[i]); 00329 a[i] ^= x; 00330 b[i] ^= x; 00331 } 00332 } 00333 00334 /* Calculates nQ where Q is the x-coordinate of a point on the curve 00335 * 00336 * resultx/resultz: the x coordinate of the resulting curve point (short form) 00337 * n: a little endian, 32-byte number 00338 * q: a point of the curve (short form) 00339 */ 00340 static void 00341 cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q) { 00342 limb a[5] = {0}, b[5] = {1}, c[5] = {1}, d[5] = {0}; 00343 limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t; 00344 limb e[5] = {0}, f[5] = {1}, g[5] = {0}, h[5] = {1}; 00345 limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h; 00346 00347 unsigned i, j; 00348 00349 copy_mem(nqpqx, q, 5); 00350 00351 for (i = 0; i < 32; ++i) { 00352 u8 byte = n[31 - i]; 00353 for (j = 0; j < 8; ++j) { 00354 const limb bit = byte >> 7; 00355 00356 swap_conditional(nqx, nqpqx, bit); 00357 swap_conditional(nqz, nqpqz, bit); 00358 fmonty(nqx2, nqz2, 00359 nqpqx2, nqpqz2, 00360 nqx, nqz, 00361 nqpqx, nqpqz, 00362 q); 00363 swap_conditional(nqx2, nqpqx2, bit); 00364 swap_conditional(nqz2, nqpqz2, bit); 00365 00366 t = nqx; 00367 nqx = nqx2; 00368 nqx2 = t; 00369 t = nqz; 00370 nqz = nqz2; 00371 nqz2 = t; 00372 t = nqpqx; 00373 nqpqx = nqpqx2; 00374 nqpqx2 = t; 00375 t = nqpqz; 00376 nqpqz = nqpqz2; 00377 nqpqz2 = t; 00378 00379 byte <<= 1; 00380 } 00381 } 00382 00383 copy_mem(resultx, nqx, 5); 00384 copy_mem(resultz, nqz, 5); 00385 } 00386 00387 00388 // ----------------------------------------------------------------------------- 00389 // Shamelessly copied from djb's code, tightened a little 00390 // ----------------------------------------------------------------------------- 00391 static void 00392 crecip(felem out, const felem z) { 00393 felem a,t0,b,c; 00394 00395 /* 2 */ fsquare_times(a, z, 1); // a = 2 00396 /* 8 */ fsquare_times(t0, a, 2); 00397 /* 9 */ fmul(b, t0, z); // b = 9 00398 /* 11 */ fmul(a, b, a); // a = 11 00399 /* 22 */ fsquare_times(t0, a, 1); 00400 /* 2^5 - 2^0 = 31 */ fmul(b, t0, b); 00401 /* 2^10 - 2^5 */ fsquare_times(t0, b, 5); 00402 /* 2^10 - 2^0 */ fmul(b, t0, b); 00403 /* 2^20 - 2^10 */ fsquare_times(t0, b, 10); 00404 /* 2^20 - 2^0 */ fmul(c, t0, b); 00405 /* 2^40 - 2^20 */ fsquare_times(t0, c, 20); 00406 /* 2^40 - 2^0 */ fmul(t0, t0, c); 00407 /* 2^50 - 2^10 */ fsquare_times(t0, t0, 10); 00408 /* 2^50 - 2^0 */ fmul(b, t0, b); 00409 /* 2^100 - 2^50 */ fsquare_times(t0, b, 50); 00410 /* 2^100 - 2^0 */ fmul(c, t0, b); 00411 /* 2^200 - 2^100 */ fsquare_times(t0, c, 100); 00412 /* 2^200 - 2^0 */ fmul(t0, t0, c); 00413 /* 2^250 - 2^50 */ fsquare_times(t0, t0, 50); 00414 /* 2^250 - 2^0 */ fmul(t0, t0, b); 00415 /* 2^255 - 2^5 */ fsquare_times(t0, t0, 5); 00416 /* 2^255 - 21 */ fmul(out, t0, a); 00417 } 00418 00419 int 00420 curve25519_donna(u8 *mypublic, const u8 *secret, const u8 *basepoint) { 00421 limb bp[5], x[5], z[5], zmone[5]; 00422 uint8_t e[32]; 00423 int i; 00424 00425 for (i = 0;i < 32;++i) e[i] = secret[i]; 00426 e[0] &= 248; 00427 e[31] &= 127; 00428 e[31] |= 64; 00429 00430 fexpand(bp, basepoint); 00431 cmult(x, z, e, bp); 00432 crecip(zmone, z); 00433 fmul(z, x, zmone); 00434 fcontract(mypublic, z); 00435 return 0; 00436 } 00437 00438 }