Botan
1.11.15
|
00001 /* 00002 * CLMUL hook 00003 * (C) 2013 Jack Lloyd 00004 * 00005 * Botan is released under the Simplified BSD License (see license.txt) 00006 */ 00007 00008 #include <botan/internal/clmul.h> 00009 #include <immintrin.h> 00010 #include <wmmintrin.h> 00011 00012 namespace Botan { 00013 00014 void gcm_multiply_clmul(byte x[16], const byte H[16]) 00015 { 00016 /* 00017 * Algorithms 1 and 5 from Intel's CLMUL guide 00018 */ 00019 const __m128i BSWAP_MASK = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 00020 00021 __m128i a = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&x[0])); 00022 __m128i b = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&H[0])); 00023 00024 a = _mm_shuffle_epi8(a, BSWAP_MASK); 00025 b = _mm_shuffle_epi8(b, BSWAP_MASK); 00026 00027 __m128i T0, T1, T2, T3, T4, T5; 00028 00029 T0 = _mm_clmulepi64_si128(a, b, 0x00); 00030 T1 = _mm_clmulepi64_si128(a, b, 0x01); 00031 T2 = _mm_clmulepi64_si128(a, b, 0x10); 00032 T3 = _mm_clmulepi64_si128(a, b, 0x11); 00033 00034 T1 = _mm_xor_si128(T1, T2); 00035 T2 = _mm_slli_si128(T1, 8); 00036 T1 = _mm_srli_si128(T1, 8); 00037 T0 = _mm_xor_si128(T0, T2); 00038 T3 = _mm_xor_si128(T3, T1); 00039 00040 T4 = _mm_srli_epi32(T0, 31); 00041 T0 = _mm_slli_epi32(T0, 1); 00042 00043 T5 = _mm_srli_epi32(T3, 31); 00044 T3 = _mm_slli_epi32(T3, 1); 00045 00046 T2 = _mm_srli_si128(T4, 12); 00047 T5 = _mm_slli_si128(T5, 4); 00048 T4 = _mm_slli_si128(T4, 4); 00049 T0 = _mm_or_si128(T0, T4); 00050 T3 = _mm_or_si128(T3, T5); 00051 T3 = _mm_or_si128(T3, T2); 00052 00053 T4 = _mm_slli_epi32(T0, 31); 00054 T5 = _mm_slli_epi32(T0, 30); 00055 T2 = _mm_slli_epi32(T0, 25); 00056 00057 T4 = _mm_xor_si128(T4, T5); 00058 T4 = _mm_xor_si128(T4, T2); 00059 T5 = _mm_srli_si128(T4, 4); 00060 T3 = _mm_xor_si128(T3, T5); 00061 T4 = _mm_slli_si128(T4, 12); 00062 T0 = _mm_xor_si128(T0, T4); 00063 T3 = _mm_xor_si128(T3, T0); 00064 00065 T4 = _mm_srli_epi32(T0, 1); 00066 T1 = _mm_srli_epi32(T0, 2); 00067 T2 = _mm_srli_epi32(T0, 7); 00068 T3 = _mm_xor_si128(T3, T1); 00069 T3 = _mm_xor_si128(T3, T2); 00070 T3 = _mm_xor_si128(T3, T4); 00071 00072 T3 = _mm_shuffle_epi8(T3, BSWAP_MASK); 00073 00074 _mm_storeu_si128(reinterpret_cast<__m128i*>(&x[0]), T3); 00075 } 00076 00077 }