Botan  1.11.15
src/lib/utils/bswap.h
Go to the documentation of this file.
00001 /*
00002 * Byte Swapping Operations
00003 * (C) 1999-2011 Jack Lloyd
00004 * (C) 2007 Yves Jerschow
00005 *
00006 * Botan is released under the Simplified BSD License (see license.txt)
00007 */
00008 
00009 #ifndef BOTAN_BYTE_SWAP_H__
00010 #define BOTAN_BYTE_SWAP_H__
00011 
00012 #include <botan/types.h>
00013 #include <botan/rotate.h>
00014 
00015 #if defined(BOTAN_TARGET_CPU_HAS_SSE2) && !defined(BOTAN_NO_SSE_INTRINSICS)
00016   #include <emmintrin.h>
00017 #endif
00018 
00019 namespace Botan {
00020 
00021 /**
00022 * Swap a 16 bit integer
00023 */
00024 inline u16bit reverse_bytes(u16bit val)
00025    {
00026    return rotate_left(val, 8);
00027    }
00028 
00029 /**
00030 * Swap a 32 bit integer
00031 */
00032 inline u32bit reverse_bytes(u32bit val)
00033    {
00034 #if BOTAN_GCC_VERSION >= 430 && !defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
00035    /*
00036    GCC intrinsic added in 4.3, works for a number of CPUs
00037 
00038    However avoid under ARM, as it branches to a function in libgcc
00039    instead of generating inline asm, so slower even than the generic
00040    rotate version below.
00041    */
00042    return __builtin_bswap32(val);
00043 
00044 #elif BOTAN_USE_GCC_INLINE_ASM && defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
00045 
00046    // GCC-style inline assembly for x86 or x86-64
00047    asm("bswapl %0" : "=r" (val) : "0" (val));
00048    return val;
00049 
00050 #elif BOTAN_USE_GCC_INLINE_ASM && defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
00051 
00052    asm ("eor r3, %1, %1, ror #16\n\t"
00053         "bic r3, r3, #0x00FF0000\n\t"
00054         "mov %0, %1, ror #8\n\t"
00055         "eor %0, %0, r3, lsr #8"
00056         : "=r" (val)
00057         : "0" (val)
00058         : "r3", "cc");
00059 
00060    return val;
00061 
00062 #elif defined(_MSC_VER) && defined(BOTAN_TARGET_ARCH_IS_X86_32)
00063 
00064    // Visual C++ inline asm for 32-bit x86, by Yves Jerschow
00065    __asm mov eax, val;
00066    __asm bswap eax;
00067 
00068 #else
00069 
00070    // Generic implementation
00071    return (rotate_right(val, 8) & 0xFF00FF00) |
00072           (rotate_left (val, 8) & 0x00FF00FF);
00073 
00074 #endif
00075    }
00076 
00077 /**
00078 * Swap a 64 bit integer
00079 */
00080 inline u64bit reverse_bytes(u64bit val)
00081    {
00082 #if BOTAN_GCC_VERSION >= 430
00083 
00084    // GCC intrinsic added in 4.3, works for a number of CPUs
00085    return __builtin_bswap64(val);
00086 
00087 #elif BOTAN_USE_GCC_INLINE_ASM && defined(BOTAN_TARGET_ARCH_IS_X86_64)
00088    // GCC-style inline assembly for x86-64
00089    asm("bswapq %0" : "=r" (val) : "0" (val));
00090    return val;
00091 
00092 #else
00093    /* Generic implementation. Defined in terms of 32-bit bswap so any
00094     * optimizations in that version can help here (particularly
00095     * useful for 32-bit x86).
00096     */
00097 
00098    u32bit hi = static_cast<u32bit>(val >> 32);
00099    u32bit lo = static_cast<u32bit>(val);
00100 
00101    hi = reverse_bytes(hi);
00102    lo = reverse_bytes(lo);
00103 
00104    return (static_cast<u64bit>(lo) << 32) | hi;
00105 #endif
00106    }
00107 
00108 /**
00109 * Swap 4 Ts in an array
00110 */
00111 template<typename T>
00112 inline void bswap_4(T x[4])
00113    {
00114    x[0] = reverse_bytes(x[0]);
00115    x[1] = reverse_bytes(x[1]);
00116    x[2] = reverse_bytes(x[2]);
00117    x[3] = reverse_bytes(x[3]);
00118    }
00119 
00120 #if defined(BOTAN_TARGET_CPU_HAS_SSE2) && !defined(BOTAN_NO_SSE_INTRINSICS)
00121 
00122 /**
00123 * Swap 4 u32bits in an array using SSE2 shuffle instructions
00124 */
00125 template<>
00126 inline void bswap_4(u32bit x[4])
00127    {
00128    __m128i T = _mm_loadu_si128(reinterpret_cast<const __m128i*>(x));
00129 
00130    T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
00131    T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
00132 
00133    T =  _mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8));
00134 
00135    _mm_storeu_si128(reinterpret_cast<__m128i*>(x), T);
00136    }
00137 
00138 #endif
00139 
00140 }
00141 
00142 #endif