Botan  1.11.15
src/lib/utils/mul128.h
Go to the documentation of this file.
00001 /*
00002 * 64x64->128 bit multiply operation
00003 * (C) 2013 Jack Lloyd
00004 *
00005 * Botan is released under the Simplified BSD License (see license.txt)
00006 */
00007 
00008 #ifndef BOTAN_UTIL_MUL128_H__
00009 #define BOTAN_UTIL_MUL128_H__
00010 
00011 #include <botan/types.h>
00012 
00013 namespace Botan {
00014 
00015 #if defined(__SIZEOF_INT128__)
00016    #define BOTAN_TARGET_HAS_NATIVE_UINT128
00017    typedef unsigned __int128 uint128_t;
00018 
00019 #elif (BOTAN_GCC_VERSION > 440) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT)
00020    #define BOTAN_TARGET_HAS_NATIVE_UINT128
00021    typedef unsigned int uint128_t __attribute__((mode(TI)));
00022 #endif
00023 
00024 }
00025 
00026 #if defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
00027 
00028 #define BOTAN_FAST_64X64_MUL(a,b,lo,hi)      \
00029    do {                                      \
00030       const uint128_t r = static_cast<uint128_t>(a) * b;   \
00031       *hi = (r >> 64) & 0xFFFFFFFFFFFFFFFF;  \
00032       *lo = (r      ) & 0xFFFFFFFFFFFFFFFF;  \
00033    } while(0)
00034 
00035 #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT)
00036 
00037 #include <intrin.h>
00038 #pragma intrinsic(_umul128)
00039 
00040 #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) \
00041    do { *lo = _umul128(a, b, hi); } while(0)
00042 
00043 #elif defined(BOTAN_USE_GCC_INLINE_ASM)
00044 
00045 #if defined(BOTAN_TARGET_ARCH_IS_X86_64)
00046 
00047 #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do {                           \
00048    asm("mulq %3" : "=d" (*hi), "=a" (*lo) : "a" (a), "rm" (b) : "cc"); \
00049    } while(0)
00050 
00051 #elif defined(BOTAN_TARGET_ARCH_IS_ALPHA)
00052 
00053 #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do {              \
00054    asm("umulh %1,%2,%0" : "=r" (*hi) : "r" (a), "r" (b)); \
00055    *lo = a * b;                                           \
00056 } while(0)
00057 
00058 #elif defined(BOTAN_TARGET_ARCH_IS_IA64)
00059 
00060 #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do {                \
00061    asm("xmpy.hu %0=%1,%2" : "=f" (*hi) : "f" (a), "f" (b)); \
00062    *lo = a * b;                                             \
00063 } while(0)
00064 
00065 #elif defined(BOTAN_TARGET_ARCH_IS_PPC64)
00066 
00067 #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do {                      \
00068    asm("mulhdu %0,%1,%2" : "=r" (*hi) : "r" (a), "r" (b) : "cc"); \
00069    *lo = a * b;                                                   \
00070 } while(0)
00071 
00072 #endif
00073 
00074 #endif
00075 
00076 namespace Botan {
00077 
00078 /**
00079 * Perform a 64x64->128 bit multiplication
00080 */
00081 inline void mul64x64_128(u64bit a, u64bit b, u64bit* lo, u64bit* hi)
00082    {
00083 #if defined(BOTAN_FAST_64X64_MUL)
00084    BOTAN_FAST_64X64_MUL(a, b, lo, hi);
00085 #else
00086 
00087    /*
00088    * Do a 64x64->128 multiply using four 32x32->64 multiplies plus
00089    * some adds and shifts. Last resort for CPUs like UltraSPARC (with
00090    * 64-bit registers/ALU, but no 64x64->128 multiply) or 32-bit CPUs.
00091    */
00092    const size_t HWORD_BITS = 32;
00093    const u32bit HWORD_MASK = 0xFFFFFFFF;
00094 
00095    const u32bit a_hi = (a >> HWORD_BITS);
00096    const u32bit a_lo = (a  & HWORD_MASK);
00097    const u32bit b_hi = (b >> HWORD_BITS);
00098    const u32bit b_lo = (b  & HWORD_MASK);
00099 
00100    u64bit x0 = static_cast<u64bit>(a_hi) * b_hi;
00101    u64bit x1 = static_cast<u64bit>(a_lo) * b_hi;
00102    u64bit x2 = static_cast<u64bit>(a_hi) * b_lo;
00103    u64bit x3 = static_cast<u64bit>(a_lo) * b_lo;
00104 
00105    // this cannot overflow as (2^32-1)^2 + 2^32-1 < 2^64-1
00106    x2 += x3 >> HWORD_BITS;
00107 
00108    // this one can overflow
00109    x2 += x1;
00110 
00111    // propagate the carry if any
00112    x0 += static_cast<u64bit>(static_cast<bool>(x2 < x1)) << HWORD_BITS;
00113 
00114    *hi = x0 + (x2 >> HWORD_BITS);
00115    *lo  = ((x2 & HWORD_MASK) << HWORD_BITS) + (x3 & HWORD_MASK);
00116 #endif
00117    }
00118 
00119 }
00120 
00121 #endif