Botan
1.11.15
|
00001 /* 00002 * Byte Swapping Operations 00003 * (C) 1999-2011 Jack Lloyd 00004 * (C) 2007 Yves Jerschow 00005 * 00006 * Botan is released under the Simplified BSD License (see license.txt) 00007 */ 00008 00009 #ifndef BOTAN_BYTE_SWAP_H__ 00010 #define BOTAN_BYTE_SWAP_H__ 00011 00012 #include <botan/types.h> 00013 #include <botan/rotate.h> 00014 00015 #if defined(BOTAN_TARGET_CPU_HAS_SSE2) && !defined(BOTAN_NO_SSE_INTRINSICS) 00016 #include <emmintrin.h> 00017 #endif 00018 00019 namespace Botan { 00020 00021 /** 00022 * Swap a 16 bit integer 00023 */ 00024 inline u16bit reverse_bytes(u16bit val) 00025 { 00026 return rotate_left(val, 8); 00027 } 00028 00029 /** 00030 * Swap a 32 bit integer 00031 */ 00032 inline u32bit reverse_bytes(u32bit val) 00033 { 00034 #if BOTAN_GCC_VERSION >= 430 && !defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) 00035 /* 00036 GCC intrinsic added in 4.3, works for a number of CPUs 00037 00038 However avoid under ARM, as it branches to a function in libgcc 00039 instead of generating inline asm, so slower even than the generic 00040 rotate version below. 00041 */ 00042 return __builtin_bswap32(val); 00043 00044 #elif BOTAN_USE_GCC_INLINE_ASM && defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) 00045 00046 // GCC-style inline assembly for x86 or x86-64 00047 asm("bswapl %0" : "=r" (val) : "0" (val)); 00048 return val; 00049 00050 #elif BOTAN_USE_GCC_INLINE_ASM && defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) 00051 00052 asm ("eor r3, %1, %1, ror #16\n\t" 00053 "bic r3, r3, #0x00FF0000\n\t" 00054 "mov %0, %1, ror #8\n\t" 00055 "eor %0, %0, r3, lsr #8" 00056 : "=r" (val) 00057 : "0" (val) 00058 : "r3", "cc"); 00059 00060 return val; 00061 00062 #elif defined(_MSC_VER) && defined(BOTAN_TARGET_ARCH_IS_X86_32) 00063 00064 // Visual C++ inline asm for 32-bit x86, by Yves Jerschow 00065 __asm mov eax, val; 00066 __asm bswap eax; 00067 00068 #else 00069 00070 // Generic implementation 00071 return (rotate_right(val, 8) & 0xFF00FF00) | 00072 (rotate_left (val, 8) & 0x00FF00FF); 00073 00074 #endif 00075 } 00076 00077 /** 00078 * Swap a 64 bit integer 00079 */ 00080 inline u64bit reverse_bytes(u64bit val) 00081 { 00082 #if BOTAN_GCC_VERSION >= 430 00083 00084 // GCC intrinsic added in 4.3, works for a number of CPUs 00085 return __builtin_bswap64(val); 00086 00087 #elif BOTAN_USE_GCC_INLINE_ASM && defined(BOTAN_TARGET_ARCH_IS_X86_64) 00088 // GCC-style inline assembly for x86-64 00089 asm("bswapq %0" : "=r" (val) : "0" (val)); 00090 return val; 00091 00092 #else 00093 /* Generic implementation. Defined in terms of 32-bit bswap so any 00094 * optimizations in that version can help here (particularly 00095 * useful for 32-bit x86). 00096 */ 00097 00098 u32bit hi = static_cast<u32bit>(val >> 32); 00099 u32bit lo = static_cast<u32bit>(val); 00100 00101 hi = reverse_bytes(hi); 00102 lo = reverse_bytes(lo); 00103 00104 return (static_cast<u64bit>(lo) << 32) | hi; 00105 #endif 00106 } 00107 00108 /** 00109 * Swap 4 Ts in an array 00110 */ 00111 template<typename T> 00112 inline void bswap_4(T x[4]) 00113 { 00114 x[0] = reverse_bytes(x[0]); 00115 x[1] = reverse_bytes(x[1]); 00116 x[2] = reverse_bytes(x[2]); 00117 x[3] = reverse_bytes(x[3]); 00118 } 00119 00120 #if defined(BOTAN_TARGET_CPU_HAS_SSE2) && !defined(BOTAN_NO_SSE_INTRINSICS) 00121 00122 /** 00123 * Swap 4 u32bits in an array using SSE2 shuffle instructions 00124 */ 00125 template<> 00126 inline void bswap_4(u32bit x[4]) 00127 { 00128 __m128i T = _mm_loadu_si128(reinterpret_cast<const __m128i*>(x)); 00129 00130 T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1)); 00131 T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1)); 00132 00133 T = _mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8)); 00134 00135 _mm_storeu_si128(reinterpret_cast<__m128i*>(x), T); 00136 } 00137 00138 #endif 00139 00140 } 00141 00142 #endif