Botan  1.11.15
src/lib/utils/simd/simd_sse2/simd_sse2.h
Go to the documentation of this file.
00001 /*
00002 * Lightweight wrappers for SSE2 intrinsics for 32-bit operations
00003 * (C) 2009 Jack Lloyd
00004 *
00005 * Botan is released under the Simplified BSD License (see license.txt)
00006 */
00007 
00008 #ifndef BOTAN_SIMD_SSE_H__
00009 #define BOTAN_SIMD_SSE_H__
00010 
00011 #if defined(BOTAN_TARGET_SUPPORTS_SSE2)
00012 
00013 #include <botan/cpuid.h>
00014 #include <emmintrin.h>
00015 
00016 namespace Botan {
00017 
00018 class SIMD_SSE2
00019    {
00020    public:
00021       static bool enabled() { return CPUID::has_sse2(); }
00022 
00023       SIMD_SSE2(const u32bit B[4])
00024          {
00025          reg = _mm_loadu_si128(reinterpret_cast<const __m128i*>(B));
00026          }
00027 
00028       SIMD_SSE2(u32bit B0, u32bit B1, u32bit B2, u32bit B3)
00029          {
00030          reg = _mm_set_epi32(B0, B1, B2, B3);
00031          }
00032 
00033       SIMD_SSE2(u32bit B)
00034          {
00035          reg = _mm_set1_epi32(B);
00036          }
00037 
00038       static SIMD_SSE2 load_le(const void* in)
00039          {
00040          return _mm_loadu_si128(reinterpret_cast<const __m128i*>(in));
00041          }
00042 
00043       static SIMD_SSE2 load_be(const void* in)
00044          {
00045          return load_le(in).bswap();
00046          }
00047 
00048       void store_le(byte out[]) const
00049          {
00050          _mm_storeu_si128(reinterpret_cast<__m128i*>(out), reg);
00051          }
00052 
00053       void store_be(byte out[]) const
00054          {
00055          bswap().store_le(out);
00056          }
00057 
00058       void rotate_left(size_t rot)
00059          {
00060          reg = _mm_or_si128(_mm_slli_epi32(reg, static_cast<int>(rot)),
00061                             _mm_srli_epi32(reg, static_cast<int>(32-rot)));
00062          }
00063 
00064       void rotate_right(size_t rot)
00065          {
00066          rotate_left(32 - rot);
00067          }
00068 
00069       void operator+=(const SIMD_SSE2& other)
00070          {
00071          reg = _mm_add_epi32(reg, other.reg);
00072          }
00073 
00074       SIMD_SSE2 operator+(const SIMD_SSE2& other) const
00075          {
00076          return _mm_add_epi32(reg, other.reg);
00077          }
00078 
00079       void operator-=(const SIMD_SSE2& other)
00080          {
00081          reg = _mm_sub_epi32(reg, other.reg);
00082          }
00083 
00084       SIMD_SSE2 operator-(const SIMD_SSE2& other) const
00085          {
00086          return _mm_sub_epi32(reg, other.reg);
00087          }
00088 
00089       void operator^=(const SIMD_SSE2& other)
00090          {
00091          reg = _mm_xor_si128(reg, other.reg);
00092          }
00093 
00094       SIMD_SSE2 operator^(const SIMD_SSE2& other) const
00095          {
00096          return _mm_xor_si128(reg, other.reg);
00097          }
00098 
00099       void operator|=(const SIMD_SSE2& other)
00100          {
00101          reg = _mm_or_si128(reg, other.reg);
00102          }
00103 
00104       SIMD_SSE2 operator&(const SIMD_SSE2& other)
00105          {
00106          return _mm_and_si128(reg, other.reg);
00107          }
00108 
00109       void operator&=(const SIMD_SSE2& other)
00110          {
00111          reg = _mm_and_si128(reg, other.reg);
00112          }
00113 
00114       SIMD_SSE2 operator<<(size_t shift) const
00115          {
00116          return _mm_slli_epi32(reg, static_cast<int>(shift));
00117          }
00118 
00119       SIMD_SSE2 operator>>(size_t shift) const
00120          {
00121          return _mm_srli_epi32(reg, static_cast<int>(shift));
00122          }
00123 
00124       SIMD_SSE2 operator~() const
00125          {
00126          return _mm_xor_si128(reg, _mm_set1_epi32(0xFFFFFFFF));
00127          }
00128 
00129       // (~reg) & other
00130       SIMD_SSE2 andc(const SIMD_SSE2& other)
00131          {
00132          return _mm_andnot_si128(reg, other.reg);
00133          }
00134 
00135       SIMD_SSE2 bswap() const
00136          {
00137          __m128i T = reg;
00138 
00139          T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
00140          T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
00141 
00142          return _mm_or_si128(_mm_srli_epi16(T, 8),
00143                              _mm_slli_epi16(T, 8));
00144          }
00145 
00146       static void transpose(SIMD_SSE2& B0, SIMD_SSE2& B1,
00147                             SIMD_SSE2& B2, SIMD_SSE2& B3)
00148          {
00149          __m128i T0 = _mm_unpacklo_epi32(B0.reg, B1.reg);
00150          __m128i T1 = _mm_unpacklo_epi32(B2.reg, B3.reg);
00151          __m128i T2 = _mm_unpackhi_epi32(B0.reg, B1.reg);
00152          __m128i T3 = _mm_unpackhi_epi32(B2.reg, B3.reg);
00153          B0.reg = _mm_unpacklo_epi64(T0, T1);
00154          B1.reg = _mm_unpackhi_epi64(T0, T1);
00155          B2.reg = _mm_unpacklo_epi64(T2, T3);
00156          B3.reg = _mm_unpackhi_epi64(T2, T3);
00157          }
00158 
00159    private:
00160       SIMD_SSE2(__m128i in) { reg = in; }
00161 
00162       __m128i reg;
00163    };
00164 
00165 }
00166 
00167 #endif
00168 
00169 #endif