Botan  1.11.15
src/lib/math/mp/mp_karat.cpp
Go to the documentation of this file.
00001 /*
00002 * Karatsuba Multiplication/Squaring
00003 * (C) 1999-2010 Jack Lloyd
00004 *
00005 * Botan is released under the Simplified BSD License (see license.txt)
00006 */
00007 
00008 #include <botan/internal/mp_core.h>
00009 #include <botan/internal/mp_asmi.h>
00010 #include <botan/mem_ops.h>
00011 
00012 namespace Botan {
00013 
00014 namespace {
00015 
00016 static const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32;
00017 static const size_t KARATSUBA_SQUARE_THRESHOLD = 32;
00018 
00019 /*
00020 * Karatsuba Multiplication Operation
00021 */
00022 void karatsuba_mul(word z[], const word x[], const word y[], size_t N,
00023                    word workspace[])
00024    {
00025    if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2)
00026       {
00027       if(N == 6)
00028          return bigint_comba_mul6(z, x, y);
00029       else if(N == 8)
00030          return bigint_comba_mul8(z, x, y);
00031       else if(N == 16)
00032          return bigint_comba_mul16(z, x, y);
00033       else
00034          return bigint_simple_mul(z, x, N, y, N);
00035       }
00036 
00037    const size_t N2 = N / 2;
00038 
00039    const word* x0 = x;
00040    const word* x1 = x + N2;
00041    const word* y0 = y;
00042    const word* y1 = y + N2;
00043    word* z0 = z;
00044    word* z1 = z + N;
00045 
00046    const s32bit cmp0 = bigint_cmp(x0, N2, x1, N2);
00047    const s32bit cmp1 = bigint_cmp(y1, N2, y0, N2);
00048 
00049    clear_mem(workspace, 2*N);
00050 
00051    /*
00052    * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here,
00053    * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do
00054    * anything, clear_mem above already set the correct result.
00055    *
00056    * However we ignore the result of the comparisons and always perform the
00057    * subtractions and recursively multiply to avoid the timing channel.
00058    */
00059 
00060    //if(cmp0 && cmp1)
00061       {
00062       if(cmp0 > 0)
00063          bigint_sub3(z0, x0, N2, x1, N2);
00064       else
00065          bigint_sub3(z0, x1, N2, x0, N2);
00066 
00067       if(cmp1 > 0)
00068          bigint_sub3(z1, y1, N2, y0, N2);
00069       else
00070          bigint_sub3(z1, y0, N2, y1, N2);
00071 
00072       karatsuba_mul(workspace, z0, z1, N2, workspace+N);
00073       }
00074 
00075    karatsuba_mul(z0, x0, y0, N2, workspace+N);
00076    karatsuba_mul(z1, x1, y1, N2, workspace+N);
00077 
00078    const word ws_carry = bigint_add3_nc(workspace + N, z0, N, z1, N);
00079    word z_carry = bigint_add2_nc(z + N2, N, workspace + N, N);
00080 
00081    z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
00082    bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
00083 
00084    if((cmp0 == cmp1) || (cmp0 == 0) || (cmp1 == 0))
00085       bigint_add2(z + N2, 2*N-N2, workspace, N);
00086    else
00087       bigint_sub2(z + N2, 2*N-N2, workspace, N);
00088    }
00089 
00090 /*
00091 * Karatsuba Squaring Operation
00092 */
00093 void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[])
00094    {
00095    if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2)
00096       {
00097       if(N == 6)
00098          return bigint_comba_sqr6(z, x);
00099       else if(N == 8)
00100          return bigint_comba_sqr8(z, x);
00101       else if(N == 16)
00102          return bigint_comba_sqr16(z, x);
00103       else
00104          return bigint_simple_sqr(z, x, N);
00105       }
00106 
00107    const size_t N2 = N / 2;
00108 
00109    const word* x0 = x;
00110    const word* x1 = x + N2;
00111    word* z0 = z;
00112    word* z1 = z + N;
00113 
00114    const s32bit cmp = bigint_cmp(x0, N2, x1, N2);
00115 
00116    clear_mem(workspace, 2*N);
00117 
00118    // See comment in karatsuba_mul
00119 
00120    //if(cmp)
00121       {
00122       if(cmp > 0)
00123          bigint_sub3(z0, x0, N2, x1, N2);
00124       else
00125          bigint_sub3(z0, x1, N2, x0, N2);
00126 
00127       karatsuba_sqr(workspace, z0, N2, workspace+N);
00128       }
00129 
00130    karatsuba_sqr(z0, x0, N2, workspace+N);
00131    karatsuba_sqr(z1, x1, N2, workspace+N);
00132 
00133    const word ws_carry = bigint_add3_nc(workspace + N, z0, N, z1, N);
00134    word z_carry = bigint_add2_nc(z + N2, N, workspace + N, N);
00135 
00136    z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
00137    bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
00138 
00139    /*
00140    * This is only actually required if cmp is != 0, however
00141    * if cmp==0 then workspace[0:N] == 0 and avoiding the jump
00142    * hides a timing channel.
00143    */
00144    bigint_sub2(z + N2, 2*N-N2, workspace, N);
00145    }
00146 
00147 /*
00148 * Pick a good size for the Karatsuba multiply
00149 */
00150 size_t karatsuba_size(size_t z_size,
00151                       size_t x_size, size_t x_sw,
00152                       size_t y_size, size_t y_sw)
00153    {
00154    if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size)
00155       return 0;
00156 
00157    if(((x_size == x_sw) && (x_size % 2)) ||
00158       ((y_size == y_sw) && (y_size % 2)))
00159       return 0;
00160 
00161    const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
00162    const size_t end = (x_size < y_size) ? x_size : y_size;
00163 
00164    if(start == end)
00165       {
00166       if(start % 2)
00167          return 0;
00168       return start;
00169       }
00170 
00171    for(size_t j = start; j <= end; ++j)
00172       {
00173       if(j % 2)
00174          continue;
00175 
00176       if(2*j > z_size)
00177          return 0;
00178 
00179       if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size)
00180          {
00181          if(j % 4 == 2 &&
00182             (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size)
00183             return j+2;
00184          return j;
00185          }
00186       }
00187 
00188    return 0;
00189    }
00190 
00191 /*
00192 * Pick a good size for the Karatsuba squaring
00193 */
00194 size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw)
00195    {
00196    if(x_sw == x_size)
00197       {
00198       if(x_sw % 2)
00199          return 0;
00200       return x_sw;
00201       }
00202 
00203    for(size_t j = x_sw; j <= x_size; ++j)
00204       {
00205       if(j % 2)
00206          continue;
00207 
00208       if(2*j > z_size)
00209          return 0;
00210 
00211       if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size)
00212          return j+2;
00213       return j;
00214       }
00215 
00216    return 0;
00217    }
00218 
00219 }
00220 
00221 /*
00222 * Multiplication Algorithm Dispatcher
00223 */
00224 void bigint_mul(word z[], size_t z_size, word workspace[],
00225                 const word x[], size_t x_size, size_t x_sw,
00226                 const word y[], size_t y_size, size_t y_sw)
00227    {
00228    if(x_sw == 1)
00229       {
00230       bigint_linmul3(z, y, y_sw, x[0]);
00231       }
00232    else if(y_sw == 1)
00233       {
00234       bigint_linmul3(z, x, x_sw, y[0]);
00235       }
00236    else if(x_sw <= 4 && x_size >= 4 &&
00237            y_sw <= 4 && y_size >= 4 && z_size >= 8)
00238       {
00239       bigint_comba_mul4(z, x, y);
00240       }
00241    else if(x_sw <= 6 && x_size >= 6 &&
00242            y_sw <= 6 && y_size >= 6 && z_size >= 12)
00243       {
00244       bigint_comba_mul6(z, x, y);
00245       }
00246    else if(x_sw <= 8 && x_size >= 8 &&
00247            y_sw <= 8 && y_size >= 8 && z_size >= 16)
00248       {
00249       bigint_comba_mul8(z, x, y);
00250       }
00251    else if(x_sw <= 9 && x_size >= 9 &&
00252            y_sw <= 9 && y_size >= 9 && z_size >= 18)
00253       {
00254       bigint_comba_mul9(z, x, y);
00255       }
00256    else if(x_sw <= 16 && x_size >= 16 &&
00257            y_sw <= 16 && y_size >= 16 && z_size >= 32)
00258       {
00259       bigint_comba_mul16(z, x, y);
00260       }
00261    else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
00262            y_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
00263            !workspace)
00264       {
00265       bigint_simple_mul(z, x, x_sw, y, y_sw);
00266       }
00267    else
00268       {
00269       const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
00270 
00271       if(N)
00272          karatsuba_mul(z, x, y, N, workspace);
00273       else
00274          bigint_simple_mul(z, x, x_sw, y, y_sw);
00275       }
00276    }
00277 
00278 /*
00279 * Squaring Algorithm Dispatcher
00280 */
00281 void bigint_sqr(word z[], size_t z_size, word workspace[],
00282                 const word x[], size_t x_size, size_t x_sw)
00283    {
00284    if(x_sw == 1)
00285       {
00286       bigint_linmul3(z, x, x_sw, x[0]);
00287       }
00288    else if(x_sw <= 4 && x_size >= 4 && z_size >= 8)
00289       {
00290       bigint_comba_sqr4(z, x);
00291       }
00292    else if(x_sw <= 6 && x_size >= 6 && z_size >= 12)
00293       {
00294       bigint_comba_sqr6(z, x);
00295       }
00296    else if(x_sw <= 8 && x_size >= 8 && z_size >= 16)
00297       {
00298       bigint_comba_sqr8(z, x);
00299       }
00300    else if(x_sw == 9 && x_size >= 9 && z_size >= 18)
00301       {
00302       bigint_comba_sqr9(z, x);
00303       }
00304    else if(x_sw <= 16 && x_size >= 16 && z_size >= 32)
00305       {
00306       bigint_comba_sqr16(z, x);
00307       }
00308    else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace)
00309       {
00310       bigint_simple_sqr(z, x, x_sw);
00311       }
00312    else
00313       {
00314       const size_t N = karatsuba_size(z_size, x_size, x_sw);
00315 
00316       if(N)
00317          karatsuba_sqr(z, x, N, workspace);
00318       else
00319          bigint_simple_sqr(z, x, x_sw);
00320       }
00321    }
00322 
00323 }