Botan  1.11.15
src/lib/pubkey/curve25519/donna.cpp
Go to the documentation of this file.
00001 /*
00002 * curve25519-donna-c64.c from github.com/agl/curve25519-donna
00003 * revision 80ad9b9930c9baef5829dd2a235b6b7646d32a8e
00004 */
00005 
00006 /* Copyright 2008, Google Inc.
00007  * All rights reserved.
00008  *
00009  * Code released into the public domain.
00010  *
00011  * curve25519-donna: Curve25519 elliptic curve, public key function
00012  *
00013  * http://code.google.com/p/curve25519-donna/
00014  *
00015  * Adam Langley <agl@imperialviolet.org>
00016  *
00017  * Derived from public domain C code by Daniel J. Bernstein <djb@cr.yp.to>
00018  *
00019  * More information about curve25519 can be found here
00020  *   http://cr.yp.to/ecdh.html
00021  *
00022  * djb's sample implementation of curve25519 is written in a special assembly
00023  * language called qhasm and uses the floating point registers.
00024  *
00025  * This is, almost, a clean room reimplementation from the curve25519 paper. It
00026  * uses many of the tricks described therein. Only the crecip function is taken
00027  * from the sample implementation.
00028  */
00029 
00030 #include <botan/curve25519.h>
00031 #include <botan/mul128.h>
00032 #include <botan/internal/donna128.h>
00033 #include <botan/loadstor.h>
00034 
00035 namespace Botan {
00036 
00037 typedef byte u8;
00038 typedef u64bit limb;
00039 typedef limb felem[5];
00040 
00041 #if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
00042 typedef donna128 uint128_t;
00043 #endif
00044 
00045 /* Sum two numbers: output += in */
00046 static inline void
00047 fsum(limb *output, const limb *in) {
00048   output[0] += in[0];
00049   output[1] += in[1];
00050   output[2] += in[2];
00051   output[3] += in[3];
00052   output[4] += in[4];
00053 }
00054 
00055 /* Find the difference of two numbers: output = in - output
00056  * (note the order of the arguments!)
00057  *
00058  * Assumes that out[i] < 2**52
00059  * On return, out[i] < 2**55
00060  */
00061 static inline void
00062 fdifference_backwards(felem out, const felem in) {
00063   /* 152 is 19 << 3 */
00064   static const limb two54m152 = (static_cast<limb>(1) << 54) - 152;
00065   static const limb two54m8 = (static_cast<limb>(1) << 54) - 8;
00066 
00067   out[0] = in[0] + two54m152 - out[0];
00068   out[1] = in[1] + two54m8 - out[1];
00069   out[2] = in[2] + two54m8 - out[2];
00070   out[3] = in[3] + two54m8 - out[3];
00071   out[4] = in[4] + two54m8 - out[4];
00072 }
00073 
00074 /* Multiply a number by a scalar: output = in * scalar */
00075 static inline void
00076 fscalar_product(felem output, const felem in, const limb scalar) {
00077   uint128_t a = uint128_t(in[0]) * scalar;
00078   output[0] = a & 0x7ffffffffffff;
00079 
00080   a = uint128_t(in[1]) * scalar + carry_shift(a, 51);
00081   output[1] = a & 0x7ffffffffffff;
00082 
00083   a = uint128_t(in[2]) * scalar + carry_shift(a, 51);
00084   output[2] = a & 0x7ffffffffffff;
00085 
00086   a = uint128_t(in[3]) * scalar + carry_shift(a, 51);
00087   output[3] = a & 0x7ffffffffffff;
00088 
00089   a = uint128_t(in[4]) * scalar + carry_shift(a, 51);
00090   output[4] = a & 0x7ffffffffffff;
00091 
00092   output[0] += carry_shift(a, 51) * 19;
00093 }
00094 
00095 /* Multiply two numbers: output = in2 * in
00096  *
00097  * output must be distinct to both inputs. The inputs are reduced coefficient
00098  * form, the output is not.
00099  *
00100  * Assumes that in[i] < 2**55 and likewise for in2.
00101  * On return, output[i] < 2**52
00102  */
00103 static inline void
00104 fmul(felem output, const felem in2, const felem in) {
00105   uint128_t t[5];
00106   limb r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c;
00107 
00108   r0 = in[0];
00109   r1 = in[1];
00110   r2 = in[2];
00111   r3 = in[3];
00112   r4 = in[4];
00113 
00114   s0 = in2[0];
00115   s1 = in2[1];
00116   s2 = in2[2];
00117   s3 = in2[3];
00118   s4 = in2[4];
00119 
00120   t[0]  =  uint128_t(r0) * s0;
00121   t[1]  =  uint128_t(r0) * s1 + uint128_t(r1) * s0;
00122   t[2]  =  uint128_t(r0) * s2 + uint128_t(r2) * s0 + uint128_t(r1) * s1;
00123   t[3]  =  uint128_t(r0) * s3 + uint128_t(r3) * s0 + uint128_t(r1) * s2 + uint128_t(r2) * s1;
00124   t[4]  =  uint128_t(r0) * s4 + uint128_t(r4) * s0 + uint128_t(r3) * s1 + uint128_t(r1) * s3 + uint128_t(r2) * s2;
00125 
00126   r4 *= 19;
00127   r1 *= 19;
00128   r2 *= 19;
00129   r3 *= 19;
00130 
00131   t[0] += uint128_t(r4) * s1 + uint128_t(r1) * s4 + uint128_t(r2) * s3 + uint128_t(r3) * s2;
00132   t[1] += uint128_t(r4) * s2 + uint128_t(r2) * s4 + uint128_t(r3) * s3;
00133   t[2] += uint128_t(r4) * s3 + uint128_t(r3) * s4;
00134   t[3] += uint128_t(r4) * s4;
00135 
00136                   r0 = t[0] & 0x7ffffffffffff; c = carry_shift(t[0], 51);
00137   t[1] += c;      r1 = t[1] & 0x7ffffffffffff; c = carry_shift(t[1], 51);
00138   t[2] += c;      r2 = t[2] & 0x7ffffffffffff; c = carry_shift(t[2], 51);
00139   t[3] += c;      r3 = t[3] & 0x7ffffffffffff; c = carry_shift(t[3], 51);
00140   t[4] += c;      r4 = t[4] & 0x7ffffffffffff; c = carry_shift(t[4], 51);
00141   r0 +=   c * 19; c = carry_shift(r0, 51); r0 = r0 & 0x7ffffffffffff;
00142   r1 +=   c;      c = carry_shift(r1, 51); r1 = r1 & 0x7ffffffffffff;
00143   r2 +=   c;
00144 
00145   output[0] = r0;
00146   output[1] = r1;
00147   output[2] = r2;
00148   output[3] = r3;
00149   output[4] = r4;
00150 }
00151 
00152 static inline void fsquare_times(felem output, const felem in, limb count) {
00153   uint128_t t[5];
00154   limb r0,r1,r2,r3,r4,c;
00155   limb d0,d1,d2,d4,d419;
00156 
00157   r0 = in[0];
00158   r1 = in[1];
00159   r2 = in[2];
00160   r3 = in[3];
00161   r4 = in[4];
00162 
00163   do {
00164     d0 = r0 * 2;
00165     d1 = r1 * 2;
00166     d2 = r2 * 2 * 19;
00167     d419 = r4 * 19;
00168     d4 = d419 * 2;
00169 
00170     t[0] = uint128_t(r0) * r0 + uint128_t(d4) * r1 + uint128_t(d2) * (r3     );
00171     t[1] = uint128_t(d0) * r1 + uint128_t(d4) * r2 + uint128_t(r3) * (r3 * 19);
00172     t[2] = uint128_t(d0) * r2 + uint128_t(r1) * r1 + uint128_t(d4) * (r3     );
00173     t[3] = uint128_t(d0) * r3 + uint128_t(d1) * r2 + uint128_t(r4) * (d419   );
00174     t[4] = uint128_t(d0) * r4 + uint128_t(d1) * r3 + uint128_t(r2) * (r2     );
00175 
00176                     r0 = t[0] & 0x7ffffffffffff; c = carry_shift(t[0], 51);
00177     t[1] += c;      r1 = t[1] & 0x7ffffffffffff; c = carry_shift(t[1], 51);
00178     t[2] += c;      r2 = t[2] & 0x7ffffffffffff; c = carry_shift(t[2], 51);
00179     t[3] += c;      r3 = t[3] & 0x7ffffffffffff; c = carry_shift(t[3], 51);
00180     t[4] += c;      r4 = t[4] & 0x7ffffffffffff; c = carry_shift(t[4], 51);
00181     r0 +=   c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffff;
00182     r1 +=   c;      c = r1 >> 51; r1 = r1 & 0x7ffffffffffff;
00183     r2 +=   c;
00184   } while(--count);
00185 
00186   output[0] = r0;
00187   output[1] = r1;
00188   output[2] = r2;
00189   output[3] = r3;
00190   output[4] = r4;
00191 }
00192 
00193 /* Load a little-endian 64-bit number  */
00194 static limb
00195 load_limb(const u8 *in) {
00196   return load_le<u64bit>(in, 0);
00197 }
00198 
00199 static void
00200 store_limb(u8 *out, limb in) {
00201   store_le(in, out);
00202 }
00203 
00204 /* Take a little-endian, 32-byte number and expand it into polynomial form */
00205 static void
00206 fexpand(limb *output, const u8 *in) {
00207   output[0] = load_limb(in) & 0x7ffffffffffff;
00208   output[1] = (load_limb(in+6) >> 3) & 0x7ffffffffffff;
00209   output[2] = (load_limb(in+12) >> 6) & 0x7ffffffffffff;
00210   output[3] = (load_limb(in+19) >> 1) & 0x7ffffffffffff;
00211   output[4] = (load_limb(in+24) >> 12) & 0x7ffffffffffff;
00212 }
00213 
00214 /* Take a fully reduced polynomial form number and contract it into a
00215  * little-endian, 32-byte array
00216  */
00217 static void
00218 fcontract(u8 *output, const felem input) {
00219   uint128_t t[5];
00220 
00221   t[0] = input[0];
00222   t[1] = input[1];
00223   t[2] = input[2];
00224   t[3] = input[3];
00225   t[4] = input[4];
00226 
00227   t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
00228   t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
00229   t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
00230   t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
00231   t[0] += (t[4] >> 51) * 19; t[4] &= 0x7ffffffffffff;
00232 
00233   t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
00234   t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
00235   t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
00236   t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
00237   t[0] += (t[4] >> 51) * 19; t[4] &= 0x7ffffffffffff;
00238 
00239   /* now t is between 0 and 2^255-1, properly carried. */
00240   /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
00241 
00242   t[0] += 19;
00243 
00244   t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
00245   t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
00246   t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
00247   t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
00248   t[0] += (t[4] >> 51) * 19; t[4] &= 0x7ffffffffffff;
00249 
00250   /* now between 19 and 2^255-1 in both cases, and offset by 19. */
00251 
00252   t[0] += 0x8000000000000 - 19;
00253   t[1] += 0x8000000000000 - 1;
00254   t[2] += 0x8000000000000 - 1;
00255   t[3] += 0x8000000000000 - 1;
00256   t[4] += 0x8000000000000 - 1;
00257 
00258   /* now between 2^255 and 2^256-20, and offset by 2^255. */
00259 
00260   t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
00261   t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
00262   t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
00263   t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
00264   t[4] &= 0x7ffffffffffff;
00265 
00266   store_limb(output,    combine_lower(t[0], 0, t[1], 51));
00267   store_limb(output+8,  combine_lower(t[1], 13, t[2], 38));
00268   store_limb(output+16, combine_lower(t[2], 26, t[3], 25));
00269   store_limb(output+24, combine_lower(t[3], 39, t[4], 12));
00270 }
00271 
00272 /* Input: Q, Q', Q-Q'
00273  * Output: 2Q, Q+Q'
00274  *
00275  *   x2 z3: long form
00276  *   x3 z3: long form
00277  *   x z: short form, destroyed
00278  *   xprime zprime: short form, destroyed
00279  *   qmqp: short form, preserved
00280  */
00281 static void
00282 fmonty(limb *x2, limb *z2, /* output 2Q */
00283        limb *x3, limb *z3, /* output Q + Q' */
00284        limb *x, limb *z,   /* input Q */
00285        limb *xprime, limb *zprime, /* input Q' */
00286        const limb *qmqp /* input Q - Q' */) {
00287   limb origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5],
00288         zzprime[5], zzzprime[5];
00289 
00290   copy_mem(origx, x, 5);
00291   fsum(x, z);
00292   fdifference_backwards(z, origx);  // does x - z
00293 
00294   copy_mem(origxprime, xprime, 5);
00295   fsum(xprime, zprime);
00296   fdifference_backwards(zprime, origxprime);
00297   fmul(xxprime, xprime, z);
00298   fmul(zzprime, x, zprime);
00299   copy_mem(origxprime, xxprime, 5);
00300   fsum(xxprime, zzprime);
00301   fdifference_backwards(zzprime, origxprime);
00302   fsquare_times(x3, xxprime, 1);
00303   fsquare_times(zzzprime, zzprime, 1);
00304   fmul(z3, zzzprime, qmqp);
00305 
00306   fsquare_times(xx, x, 1);
00307   fsquare_times(zz, z, 1);
00308   fmul(x2, xx, zz);
00309   fdifference_backwards(zz, xx);  // does zz = xx - zz
00310   fscalar_product(zzz, zz, 121665);
00311   fsum(zzz, xx);
00312   fmul(z2, zz, zzz);
00313 }
00314 
00315 // -----------------------------------------------------------------------------
00316 // Maybe swap the contents of two limb arrays (@a and @b), each @len elements
00317 // long. Perform the swap iff @swap is non-zero.
00318 //
00319 // This function performs the swap without leaking any side-channel
00320 // information.
00321 // -----------------------------------------------------------------------------
00322 static void
00323 swap_conditional(limb a[5], limb b[5], limb iswap) {
00324   unsigned i;
00325   const limb swap = -iswap;
00326 
00327   for (i = 0; i < 5; ++i) {
00328     const limb x = swap & (a[i] ^ b[i]);
00329     a[i] ^= x;
00330     b[i] ^= x;
00331   }
00332 }
00333 
00334 /* Calculates nQ where Q is the x-coordinate of a point on the curve
00335  *
00336  *   resultx/resultz: the x coordinate of the resulting curve point (short form)
00337  *   n: a little endian, 32-byte number
00338  *   q: a point of the curve (short form)
00339  */
00340 static void
00341 cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q) {
00342   limb a[5] = {0}, b[5] = {1}, c[5] = {1}, d[5] = {0};
00343   limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
00344   limb e[5] = {0}, f[5] = {1}, g[5] = {0}, h[5] = {1};
00345   limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
00346 
00347   unsigned i, j;
00348 
00349   copy_mem(nqpqx, q, 5);
00350 
00351   for (i = 0; i < 32; ++i) {
00352     u8 byte = n[31 - i];
00353     for (j = 0; j < 8; ++j) {
00354       const limb bit = byte >> 7;
00355 
00356       swap_conditional(nqx, nqpqx, bit);
00357       swap_conditional(nqz, nqpqz, bit);
00358       fmonty(nqx2, nqz2,
00359              nqpqx2, nqpqz2,
00360              nqx, nqz,
00361              nqpqx, nqpqz,
00362              q);
00363       swap_conditional(nqx2, nqpqx2, bit);
00364       swap_conditional(nqz2, nqpqz2, bit);
00365 
00366       t = nqx;
00367       nqx = nqx2;
00368       nqx2 = t;
00369       t = nqz;
00370       nqz = nqz2;
00371       nqz2 = t;
00372       t = nqpqx;
00373       nqpqx = nqpqx2;
00374       nqpqx2 = t;
00375       t = nqpqz;
00376       nqpqz = nqpqz2;
00377       nqpqz2 = t;
00378 
00379       byte <<= 1;
00380     }
00381   }
00382 
00383   copy_mem(resultx, nqx, 5);
00384   copy_mem(resultz, nqz, 5);
00385 }
00386 
00387 
00388 // -----------------------------------------------------------------------------
00389 // Shamelessly copied from djb's code, tightened a little
00390 // -----------------------------------------------------------------------------
00391 static void
00392 crecip(felem out, const felem z) {
00393   felem a,t0,b,c;
00394 
00395   /* 2 */ fsquare_times(a, z, 1); // a = 2
00396   /* 8 */ fsquare_times(t0, a, 2);
00397   /* 9 */ fmul(b, t0, z); // b = 9
00398   /* 11 */ fmul(a, b, a); // a = 11
00399   /* 22 */ fsquare_times(t0, a, 1);
00400   /* 2^5 - 2^0 = 31 */ fmul(b, t0, b);
00401   /* 2^10 - 2^5 */ fsquare_times(t0, b, 5);
00402   /* 2^10 - 2^0 */ fmul(b, t0, b);
00403   /* 2^20 - 2^10 */ fsquare_times(t0, b, 10);
00404   /* 2^20 - 2^0 */ fmul(c, t0, b);
00405   /* 2^40 - 2^20 */ fsquare_times(t0, c, 20);
00406   /* 2^40 - 2^0 */ fmul(t0, t0, c);
00407   /* 2^50 - 2^10 */ fsquare_times(t0, t0, 10);
00408   /* 2^50 - 2^0 */ fmul(b, t0, b);
00409   /* 2^100 - 2^50 */ fsquare_times(t0, b, 50);
00410   /* 2^100 - 2^0 */ fmul(c, t0, b);
00411   /* 2^200 - 2^100 */ fsquare_times(t0, c, 100);
00412   /* 2^200 - 2^0 */ fmul(t0, t0, c);
00413   /* 2^250 - 2^50 */ fsquare_times(t0, t0, 50);
00414   /* 2^250 - 2^0 */ fmul(t0, t0, b);
00415   /* 2^255 - 2^5 */ fsquare_times(t0, t0, 5);
00416   /* 2^255 - 21 */ fmul(out, t0, a);
00417 }
00418 
00419 int
00420 curve25519_donna(u8 *mypublic, const u8 *secret, const u8 *basepoint) {
00421   limb bp[5], x[5], z[5], zmone[5];
00422   uint8_t e[32];
00423   int i;
00424 
00425   for (i = 0;i < 32;++i) e[i] = secret[i];
00426   e[0] &= 248;
00427   e[31] &= 127;
00428   e[31] |= 64;
00429 
00430   fexpand(bp, basepoint);
00431   cmult(x, z, e, bp);
00432   crecip(zmone, z);
00433   fmul(z, x, zmone);
00434   fcontract(mypublic, z);
00435   return 0;
00436 }
00437 
00438 }