![]() |
Eigen
3.3.3
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // This Source Code Form is subject to the terms of the Mozilla 00005 // Public License v. 2.0. If a copy of the MPL was not distributed 00006 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00007 00008 /* The sin, cos, exp, and log functions of this file come from 00009 * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/ 00010 */ 00011 00012 #ifndef EIGEN_MATH_FUNCTIONS_NEON_H 00013 #define EIGEN_MATH_FUNCTIONS_NEON_H 00014 00015 namespace Eigen { 00016 00017 namespace internal { 00018 00019 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED 00020 Packet4f pexp<Packet4f>(const Packet4f& _x) 00021 { 00022 Packet4f x = _x; 00023 Packet4f tmp, fx; 00024 00025 _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f); 00026 _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f); 00027 _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f); 00028 _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f); 00029 _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f); 00030 _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f); 00031 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f); 00032 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f); 00033 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f); 00034 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f); 00035 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f); 00036 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f); 00037 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f); 00038 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f); 00039 00040 x = vminq_f32(x, p4f_exp_hi); 00041 x = vmaxq_f32(x, p4f_exp_lo); 00042 00043 /* express exp(x) as exp(g + n*log(2)) */ 00044 fx = vmlaq_f32(p4f_half, x, p4f_cephes_LOG2EF); 00045 00046 /* perform a floorf */ 00047 tmp = vcvtq_f32_s32(vcvtq_s32_f32(fx)); 00048 00049 /* if greater, substract 1 */ 00050 Packet4ui mask = vcgtq_f32(tmp, fx); 00051 mask = vandq_u32(mask, vreinterpretq_u32_f32(p4f_1)); 00052 00053 fx = vsubq_f32(tmp, vreinterpretq_f32_u32(mask)); 00054 00055 tmp = vmulq_f32(fx, p4f_cephes_exp_C1); 00056 Packet4f z = vmulq_f32(fx, p4f_cephes_exp_C2); 00057 x = vsubq_f32(x, tmp); 00058 x = vsubq_f32(x, z); 00059 00060 Packet4f y = vmulq_f32(p4f_cephes_exp_p0, x); 00061 z = vmulq_f32(x, x); 00062 y = vaddq_f32(y, p4f_cephes_exp_p1); 00063 y = vmulq_f32(y, x); 00064 y = vaddq_f32(y, p4f_cephes_exp_p2); 00065 y = vmulq_f32(y, x); 00066 y = vaddq_f32(y, p4f_cephes_exp_p3); 00067 y = vmulq_f32(y, x); 00068 y = vaddq_f32(y, p4f_cephes_exp_p4); 00069 y = vmulq_f32(y, x); 00070 y = vaddq_f32(y, p4f_cephes_exp_p5); 00071 00072 y = vmulq_f32(y, z); 00073 y = vaddq_f32(y, x); 00074 y = vaddq_f32(y, p4f_1); 00075 00076 /* build 2^n */ 00077 int32x4_t mm; 00078 mm = vcvtq_s32_f32(fx); 00079 mm = vaddq_s32(mm, p4i_0x7f); 00080 mm = vshlq_n_s32(mm, 23); 00081 Packet4f pow2n = vreinterpretq_f32_s32(mm); 00082 00083 y = vmulq_f32(y, pow2n); 00084 return y; 00085 } 00086 00087 } // end namespace internal 00088 00089 } // end namespace Eigen 00090 00091 #endif // EIGEN_MATH_FUNCTIONS_NEON_H