![]() |
Eigen
3.3.3
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr> 00005 // Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com> 00006 // Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com> 00007 // Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com> 00008 // Copyright (C) 2010 Thomas Capricelli <orzel@freehackers.org> 00009 // Copyright (C) 2013 Pavel Holoborodko <pavel@holoborodko.com> 00010 // 00011 // This Source Code Form is subject to the terms of the Mozilla 00012 // Public License v. 2.0. If a copy of the MPL was not distributed 00013 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00014 00015 00016 /***************************************************************************** 00017 *** Platform checks for aligned malloc functions *** 00018 *****************************************************************************/ 00019 00020 #ifndef EIGEN_MEMORY_H 00021 #define EIGEN_MEMORY_H 00022 00023 #ifndef EIGEN_MALLOC_ALREADY_ALIGNED 00024 00025 // Try to determine automatically if malloc is already aligned. 00026 00027 // On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see: 00028 // http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html 00029 // This is true at least since glibc 2.8. 00030 // This leaves the question how to detect 64-bit. According to this document, 00031 // http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf 00032 // page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed 00033 // quite safe, at least within the context of glibc, to equate 64-bit with LP64. 00034 #if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \ 00035 && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_DEFAULT_ALIGN_BYTES == 16) 00036 #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1 00037 #else 00038 #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0 00039 #endif 00040 00041 // FreeBSD 6 seems to have 16-byte aligned malloc 00042 // See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup 00043 // FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures 00044 // See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup 00045 #if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16) 00046 #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1 00047 #else 00048 #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0 00049 #endif 00050 00051 #if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \ 00052 || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \ 00053 || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \ 00054 || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 00055 #define EIGEN_MALLOC_ALREADY_ALIGNED 1 00056 #else 00057 #define EIGEN_MALLOC_ALREADY_ALIGNED 0 00058 #endif 00059 00060 #endif 00061 00062 namespace Eigen { 00063 00064 namespace internal { 00065 00066 EIGEN_DEVICE_FUNC 00067 inline void throw_std_bad_alloc() 00068 { 00069 #ifdef EIGEN_EXCEPTIONS 00070 throw std::bad_alloc(); 00071 #else 00072 std::size_t huge = static_cast<std::size_t>(-1); 00073 new int[huge]; 00074 #endif 00075 } 00076 00077 /***************************************************************************** 00078 *** Implementation of handmade aligned functions *** 00079 *****************************************************************************/ 00080 00081 /* ----- Hand made implementations of aligned malloc/free and realloc ----- */ 00082 00086 inline void* handmade_aligned_malloc(std::size_t size) 00087 { 00088 void *original = std::malloc(size+EIGEN_DEFAULT_ALIGN_BYTES); 00089 if (original == 0) return 0; 00090 void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES); 00091 *(reinterpret_cast<void**>(aligned) - 1) = original; 00092 return aligned; 00093 } 00094 00096 inline void handmade_aligned_free(void *ptr) 00097 { 00098 if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1)); 00099 } 00100 00106 inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0) 00107 { 00108 if (ptr == 0) return handmade_aligned_malloc(size); 00109 void *original = *(reinterpret_cast<void**>(ptr) - 1); 00110 std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original); 00111 original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES); 00112 if (original == 0) return 0; 00113 void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES); 00114 void *previous_aligned = static_cast<char *>(original)+previous_offset; 00115 if(aligned!=previous_aligned) 00116 std::memmove(aligned, previous_aligned, size); 00117 00118 *(reinterpret_cast<void**>(aligned) - 1) = original; 00119 return aligned; 00120 } 00121 00122 /***************************************************************************** 00123 *** Implementation of portable aligned versions of malloc/free/realloc *** 00124 *****************************************************************************/ 00125 00126 #ifdef EIGEN_NO_MALLOC 00127 EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() 00128 { 00129 eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)"); 00130 } 00131 #elif defined EIGEN_RUNTIME_NO_MALLOC 00132 EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false) 00133 { 00134 static bool value = true; 00135 if (update == 1) 00136 value = new_value; 00137 return value; 00138 } 00139 EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); } 00140 EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); } 00141 EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() 00142 { 00143 eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)"); 00144 } 00145 #else 00146 EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() 00147 {} 00148 #endif 00149 00153 EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size) 00154 { 00155 check_that_malloc_is_allowed(); 00156 00157 void *result; 00158 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED 00159 result = std::malloc(size); 00160 #if EIGEN_DEFAULT_ALIGN_BYTES==16 00161 eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator."); 00162 #endif 00163 #else 00164 result = handmade_aligned_malloc(size); 00165 #endif 00166 00167 if(!result && size) 00168 throw_std_bad_alloc(); 00169 00170 return result; 00171 } 00172 00174 EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr) 00175 { 00176 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED 00177 std::free(ptr); 00178 #else 00179 handmade_aligned_free(ptr); 00180 #endif 00181 } 00182 00188 inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size) 00189 { 00190 EIGEN_UNUSED_VARIABLE(old_size); 00191 00192 void *result; 00193 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED 00194 result = std::realloc(ptr,new_size); 00195 #else 00196 result = handmade_aligned_realloc(ptr,new_size,old_size); 00197 #endif 00198 00199 if (!result && new_size) 00200 throw_std_bad_alloc(); 00201 00202 return result; 00203 } 00204 00205 /***************************************************************************** 00206 *** Implementation of conditionally aligned functions *** 00207 *****************************************************************************/ 00208 00212 template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size) 00213 { 00214 return aligned_malloc(size); 00215 } 00216 00217 template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size) 00218 { 00219 check_that_malloc_is_allowed(); 00220 00221 void *result = std::malloc(size); 00222 if(!result && size) 00223 throw_std_bad_alloc(); 00224 return result; 00225 } 00226 00228 template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr) 00229 { 00230 aligned_free(ptr); 00231 } 00232 00233 template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr) 00234 { 00235 std::free(ptr); 00236 } 00237 00238 template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size) 00239 { 00240 return aligned_realloc(ptr, new_size, old_size); 00241 } 00242 00243 template<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t) 00244 { 00245 return std::realloc(ptr, new_size); 00246 } 00247 00248 /***************************************************************************** 00249 *** Construction/destruction of array elements *** 00250 *****************************************************************************/ 00251 00255 template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, std::size_t size) 00256 { 00257 // always destruct an array starting from the end. 00258 if(ptr) 00259 while(size) ptr[--size].~T(); 00260 } 00261 00265 template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, std::size_t size) 00266 { 00267 std::size_t i; 00268 EIGEN_TRY 00269 { 00270 for (i = 0; i < size; ++i) ::new (ptr + i) T; 00271 return ptr; 00272 } 00273 EIGEN_CATCH(...) 00274 { 00275 destruct_elements_of_array(ptr, i); 00276 EIGEN_THROW; 00277 } 00278 return NULL; 00279 } 00280 00281 /***************************************************************************** 00282 *** Implementation of aligned new/delete-like functions *** 00283 *****************************************************************************/ 00284 00285 template<typename T> 00286 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size) 00287 { 00288 if(size > std::size_t(-1) / sizeof(T)) 00289 throw_std_bad_alloc(); 00290 } 00291 00296 template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size) 00297 { 00298 check_size_for_overflow<T>(size); 00299 T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size)); 00300 EIGEN_TRY 00301 { 00302 return construct_elements_of_array(result, size); 00303 } 00304 EIGEN_CATCH(...) 00305 { 00306 aligned_free(result); 00307 EIGEN_THROW; 00308 } 00309 return result; 00310 } 00311 00312 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size) 00313 { 00314 check_size_for_overflow<T>(size); 00315 T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size)); 00316 EIGEN_TRY 00317 { 00318 return construct_elements_of_array(result, size); 00319 } 00320 EIGEN_CATCH(...) 00321 { 00322 conditional_aligned_free<Align>(result); 00323 EIGEN_THROW; 00324 } 00325 return result; 00326 } 00327 00331 template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size) 00332 { 00333 destruct_elements_of_array<T>(ptr, size); 00334 aligned_free(ptr); 00335 } 00336 00340 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, std::size_t size) 00341 { 00342 destruct_elements_of_array<T>(ptr, size); 00343 conditional_aligned_free<Align>(ptr); 00344 } 00345 00346 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size) 00347 { 00348 check_size_for_overflow<T>(new_size); 00349 check_size_for_overflow<T>(old_size); 00350 if(new_size < old_size) 00351 destruct_elements_of_array(pts+new_size, old_size-new_size); 00352 T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size)); 00353 if(new_size > old_size) 00354 { 00355 EIGEN_TRY 00356 { 00357 construct_elements_of_array(result+old_size, new_size-old_size); 00358 } 00359 EIGEN_CATCH(...) 00360 { 00361 conditional_aligned_free<Align>(result); 00362 EIGEN_THROW; 00363 } 00364 } 00365 return result; 00366 } 00367 00368 00369 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size) 00370 { 00371 if(size==0) 00372 return 0; // short-cut. Also fixes Bug 884 00373 check_size_for_overflow<T>(size); 00374 T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size)); 00375 if(NumTraits<T>::RequireInitialization) 00376 { 00377 EIGEN_TRY 00378 { 00379 construct_elements_of_array(result, size); 00380 } 00381 EIGEN_CATCH(...) 00382 { 00383 conditional_aligned_free<Align>(result); 00384 EIGEN_THROW; 00385 } 00386 } 00387 return result; 00388 } 00389 00390 template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size) 00391 { 00392 check_size_for_overflow<T>(new_size); 00393 check_size_for_overflow<T>(old_size); 00394 if(NumTraits<T>::RequireInitialization && (new_size < old_size)) 00395 destruct_elements_of_array(pts+new_size, old_size-new_size); 00396 T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size)); 00397 if(NumTraits<T>::RequireInitialization && (new_size > old_size)) 00398 { 00399 EIGEN_TRY 00400 { 00401 construct_elements_of_array(result+old_size, new_size-old_size); 00402 } 00403 EIGEN_CATCH(...) 00404 { 00405 conditional_aligned_free<Align>(result); 00406 EIGEN_THROW; 00407 } 00408 } 00409 return result; 00410 } 00411 00412 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, std::size_t size) 00413 { 00414 if(NumTraits<T>::RequireInitialization) 00415 destruct_elements_of_array<T>(ptr, size); 00416 conditional_aligned_free<Align>(ptr); 00417 } 00418 00419 /****************************************************************************/ 00420 00438 template<int Alignment, typename Scalar, typename Index> 00439 EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size) 00440 { 00441 const Index ScalarSize = sizeof(Scalar); 00442 const Index AlignmentSize = Alignment / ScalarSize; 00443 const Index AlignmentMask = AlignmentSize-1; 00444 00445 if(AlignmentSize<=1) 00446 { 00447 // Either the requested alignment if smaller than a scalar, or it exactly match a 1 scalar 00448 // so that all elements of the array have the same alignment. 00449 return 0; 00450 } 00451 else if( (UIntPtr(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0) 00452 { 00453 // The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size. 00454 // Consequently, no element of the array is well aligned. 00455 return size; 00456 } 00457 else 00458 { 00459 Index first = (AlignmentSize - (Index((UIntPtr(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask; 00460 return (first < size) ? first : size; 00461 } 00462 } 00463 00466 template<typename Scalar, typename Index> 00467 EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size) 00468 { 00469 typedef typename packet_traits<Scalar>::type DefaultPacketType; 00470 return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size); 00471 } 00472 00475 template<typename Index> 00476 inline Index first_multiple(Index size, Index base) 00477 { 00478 return ((size+base-1)/base)*base; 00479 } 00480 00481 // std::copy is much slower than memcpy, so let's introduce a smart_copy which 00482 // use memcpy on trivial types, i.e., on types that does not require an initialization ctor. 00483 template<typename T, bool UseMemcpy> struct smart_copy_helper; 00484 00485 template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target) 00486 { 00487 smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target); 00488 } 00489 00490 template<typename T> struct smart_copy_helper<T,true> { 00491 EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) 00492 { 00493 IntPtr size = IntPtr(end)-IntPtr(start); 00494 if(size==0) return; 00495 eigen_internal_assert(start!=0 && end!=0 && target!=0); 00496 memcpy(target, start, size); 00497 } 00498 }; 00499 00500 template<typename T> struct smart_copy_helper<T,false> { 00501 EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) 00502 { std::copy(start, end, target); } 00503 }; 00504 00505 // intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. 00506 template<typename T, bool UseMemmove> struct smart_memmove_helper; 00507 00508 template<typename T> void smart_memmove(const T* start, const T* end, T* target) 00509 { 00510 smart_memmove_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target); 00511 } 00512 00513 template<typename T> struct smart_memmove_helper<T,true> { 00514 static inline void run(const T* start, const T* end, T* target) 00515 { 00516 IntPtr size = IntPtr(end)-IntPtr(start); 00517 if(size==0) return; 00518 eigen_internal_assert(start!=0 && end!=0 && target!=0); 00519 std::memmove(target, start, size); 00520 } 00521 }; 00522 00523 template<typename T> struct smart_memmove_helper<T,false> { 00524 static inline void run(const T* start, const T* end, T* target) 00525 { 00526 if (UIntPtr(target) < UIntPtr(start)) 00527 { 00528 std::copy(start, end, target); 00529 } 00530 else 00531 { 00532 std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T); 00533 std::copy_backward(start, end, target + count); 00534 } 00535 } 00536 }; 00537 00538 00539 /***************************************************************************** 00540 *** Implementation of runtime stack allocation (falling back to malloc) *** 00541 *****************************************************************************/ 00542 00543 // you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA 00544 // to the appropriate stack allocation function 00545 #ifndef EIGEN_ALLOCA 00546 #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca) 00547 #define EIGEN_ALLOCA alloca 00548 #elif EIGEN_COMP_MSVC 00549 #define EIGEN_ALLOCA _alloca 00550 #endif 00551 #endif 00552 00553 // This helper class construct the allocated memory, and takes care of destructing and freeing the handled data 00554 // at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions. 00555 template<typename T> class aligned_stack_memory_handler : noncopyable 00556 { 00557 public: 00558 /* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size. 00559 * Note that \a ptr can be 0 regardless of the other parameters. 00560 * This constructor takes care of constructing/initializing the elements of the buffer if required by the scalar type T (see NumTraits<T>::RequireInitialization). 00561 * In this case, the buffer elements will also be destructed when this handler will be destructed. 00562 * Finally, if \a dealloc is true, then the pointer \a ptr is freed. 00563 **/ 00564 aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc) 00565 : m_ptr(ptr), m_size(size), m_deallocate(dealloc) 00566 { 00567 if(NumTraits<T>::RequireInitialization && m_ptr) 00568 Eigen::internal::construct_elements_of_array(m_ptr, size); 00569 } 00570 ~aligned_stack_memory_handler() 00571 { 00572 if(NumTraits<T>::RequireInitialization && m_ptr) 00573 Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size); 00574 if(m_deallocate) 00575 Eigen::internal::aligned_free(m_ptr); 00576 } 00577 protected: 00578 T* m_ptr; 00579 std::size_t m_size; 00580 bool m_deallocate; 00581 }; 00582 00583 template<typename T> class scoped_array : noncopyable 00584 { 00585 T* m_ptr; 00586 public: 00587 explicit scoped_array(std::ptrdiff_t size) 00588 { 00589 m_ptr = new T[size]; 00590 } 00591 ~scoped_array() 00592 { 00593 delete[] m_ptr; 00594 } 00595 T& operator[](std::ptrdiff_t i) { return m_ptr[i]; } 00596 const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; } 00597 T* &ptr() { return m_ptr; } 00598 const T* ptr() const { return m_ptr; } 00599 operator const T*() const { return m_ptr; } 00600 }; 00601 00602 template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) 00603 { 00604 std::swap(a.ptr(),b.ptr()); 00605 } 00606 00607 } // end namespace internal 00608 00624 #ifdef EIGEN_ALLOCA 00625 00626 #if EIGEN_DEFAULT_ALIGN_BYTES>0 00627 // We always manually re-align the result of EIGEN_ALLOCA. 00628 // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment. 00629 #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((internal::UIntPtr(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) 00630 #else 00631 #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE) 00632 #endif 00633 00634 #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \ 00635 Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \ 00636 TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \ 00637 : reinterpret_cast<TYPE*>( \ 00638 (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \ 00639 : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \ 00640 Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) 00641 00642 #else 00643 00644 #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \ 00645 Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \ 00646 TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \ 00647 Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true) 00648 00649 #endif 00650 00651 00652 /***************************************************************************** 00653 *** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] *** 00654 *****************************************************************************/ 00655 00656 #if EIGEN_MAX_ALIGN_BYTES!=0 00657 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ 00658 void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \ 00659 EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \ 00660 EIGEN_CATCH (...) { return 0; } \ 00661 } 00662 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \ 00663 void *operator new(std::size_t size) { \ 00664 return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \ 00665 } \ 00666 void *operator new[](std::size_t size) { \ 00667 return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \ 00668 } \ 00669 void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ 00670 void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ 00671 void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ 00672 void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ 00673 /* in-place new and delete. since (at least afaik) there is no actual */ \ 00674 /* memory allocated we can safely let the default implementation handle */ \ 00675 /* this particular case. */ \ 00676 static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \ 00677 static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \ 00678 void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \ 00679 void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \ 00680 /* nothrow-new (returns zero instead of std::bad_alloc) */ \ 00681 EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ 00682 void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \ 00683 Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \ 00684 } \ 00685 typedef void eigen_aligned_operator_new_marker_type; 00686 #else 00687 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) 00688 #endif 00689 00690 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true) 00691 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \ 00692 EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0))) 00693 00694 /****************************************************************************/ 00695 00712 template<class T> 00713 class aligned_allocator : public std::allocator<T> 00714 { 00715 public: 00716 typedef std::size_t size_type; 00717 typedef std::ptrdiff_t difference_type; 00718 typedef T* pointer; 00719 typedef const T* const_pointer; 00720 typedef T& reference; 00721 typedef const T& const_reference; 00722 typedef T value_type; 00723 00724 template<class U> 00725 struct rebind 00726 { 00727 typedef aligned_allocator<U> other; 00728 }; 00729 00730 aligned_allocator() : std::allocator<T>() {} 00731 00732 aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {} 00733 00734 template<class U> 00735 aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {} 00736 00737 ~aligned_allocator() {} 00738 00739 pointer allocate(size_type num, const void* /*hint*/ = 0) 00740 { 00741 internal::check_size_for_overflow<T>(num); 00742 return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) ); 00743 } 00744 00745 void deallocate(pointer p, size_type /*num*/) 00746 { 00747 internal::aligned_free(p); 00748 } 00749 }; 00750 00751 //---------- Cache sizes ---------- 00752 00753 #if !defined(EIGEN_NO_CPUID) 00754 # if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64 00755 # if defined(__PIC__) && EIGEN_ARCH_i386 00756 // Case for x86 with PIC 00757 # define EIGEN_CPUID(abcd,func,id) \ 00758 __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id)); 00759 # elif defined(__PIC__) && EIGEN_ARCH_x86_64 00760 // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model. 00761 // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway. 00762 # define EIGEN_CPUID(abcd,func,id) \ 00763 __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id)); 00764 # else 00765 // Case for x86_64 or x86 w/o PIC 00766 # define EIGEN_CPUID(abcd,func,id) \ 00767 __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) ); 00768 # endif 00769 # elif EIGEN_COMP_MSVC 00770 # if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64 00771 # define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id) 00772 # endif 00773 # endif 00774 #endif 00775 00776 namespace internal { 00777 00778 #ifdef EIGEN_CPUID 00779 00780 inline bool cpuid_is_vendor(int abcd[4], const int vendor[3]) 00781 { 00782 return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2]; 00783 } 00784 00785 inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3) 00786 { 00787 int abcd[4]; 00788 l1 = l2 = l3 = 0; 00789 int cache_id = 0; 00790 int cache_type = 0; 00791 do { 00792 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; 00793 EIGEN_CPUID(abcd,0x4,cache_id); 00794 cache_type = (abcd[0] & 0x0F) >> 0; 00795 if(cache_type==1||cache_type==3) // data or unified cache 00796 { 00797 int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5] 00798 int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22] 00799 int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12] 00800 int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0] 00801 int sets = (abcd[2]); // C[31:0] 00802 00803 int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1); 00804 00805 switch(cache_level) 00806 { 00807 case 1: l1 = cache_size; break; 00808 case 2: l2 = cache_size; break; 00809 case 3: l3 = cache_size; break; 00810 default: break; 00811 } 00812 } 00813 cache_id++; 00814 } while(cache_type>0 && cache_id<16); 00815 } 00816 00817 inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3) 00818 { 00819 int abcd[4]; 00820 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; 00821 l1 = l2 = l3 = 0; 00822 EIGEN_CPUID(abcd,0x00000002,0); 00823 unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2; 00824 bool check_for_p2_core2 = false; 00825 for(int i=0; i<14; ++i) 00826 { 00827 switch(bytes[i]) 00828 { 00829 case 0x0A: l1 = 8; break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines 00830 case 0x0C: l1 = 16; break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines 00831 case 0x0E: l1 = 24; break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines 00832 case 0x10: l1 = 16; break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64) 00833 case 0x15: l1 = 16; break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64) 00834 case 0x2C: l1 = 32; break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines 00835 case 0x30: l1 = 32; break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines 00836 case 0x60: l1 = 16; break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored 00837 case 0x66: l1 = 8; break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored 00838 case 0x67: l1 = 16; break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored 00839 case 0x68: l1 = 32; break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored 00840 case 0x1A: l2 = 96; break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64) 00841 case 0x22: l3 = 512; break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored 00842 case 0x23: l3 = 1024; break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored 00843 case 0x25: l3 = 2048; break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored 00844 case 0x29: l3 = 4096; break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored 00845 case 0x39: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored 00846 case 0x3A: l2 = 192; break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored 00847 case 0x3B: l2 = 128; break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored 00848 case 0x3C: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored 00849 case 0x3D: l2 = 384; break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored 00850 case 0x3E: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored 00851 case 0x40: l2 = 0; break; // no integrated L2 cache (P6 core) or L3 cache (P4 core) 00852 case 0x41: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines 00853 case 0x42: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines 00854 case 0x43: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines 00855 case 0x44: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines 00856 case 0x45: l2 = 2048; break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines 00857 case 0x46: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines 00858 case 0x47: l3 = 8192; break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines 00859 case 0x48: l2 = 3072; break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines 00860 case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;// code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2 00861 case 0x4A: l3 = 6144; break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines 00862 case 0x4B: l3 = 8192; break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines 00863 case 0x4C: l3 = 12288; break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines 00864 case 0x4D: l3 = 16384; break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines 00865 case 0x4E: l2 = 6144; break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines 00866 case 0x78: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines 00867 case 0x79: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored 00868 case 0x7A: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored 00869 case 0x7B: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored 00870 case 0x7C: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored 00871 case 0x7D: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines 00872 case 0x7E: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64) 00873 case 0x7F: l2 = 512; break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines 00874 case 0x80: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines 00875 case 0x81: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines 00876 case 0x82: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines 00877 case 0x83: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines 00878 case 0x84: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines 00879 case 0x85: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines 00880 case 0x86: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines 00881 case 0x87: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines 00882 case 0x88: l3 = 2048; break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64) 00883 case 0x89: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64) 00884 case 0x8A: l3 = 8192; break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64) 00885 case 0x8D: l3 = 3072; break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64) 00886 00887 default: break; 00888 } 00889 } 00890 if(check_for_p2_core2 && l2 == l3) 00891 l3 = 0; 00892 l1 *= 1024; 00893 l2 *= 1024; 00894 l3 *= 1024; 00895 } 00896 00897 inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs) 00898 { 00899 if(max_std_funcs>=4) 00900 queryCacheSizes_intel_direct(l1,l2,l3); 00901 else 00902 queryCacheSizes_intel_codes(l1,l2,l3); 00903 } 00904 00905 inline void queryCacheSizes_amd(int& l1, int& l2, int& l3) 00906 { 00907 int abcd[4]; 00908 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; 00909 EIGEN_CPUID(abcd,0x80000005,0); 00910 l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB 00911 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; 00912 EIGEN_CPUID(abcd,0x80000006,0); 00913 l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB 00914 l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB 00915 } 00916 #endif 00917 00920 inline void queryCacheSizes(int& l1, int& l2, int& l3) 00921 { 00922 #ifdef EIGEN_CPUID 00923 int abcd[4]; 00924 const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e}; 00925 const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163}; 00926 const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!" 00927 00928 // identify the CPU vendor 00929 EIGEN_CPUID(abcd,0x0,0); 00930 int max_std_funcs = abcd[1]; 00931 if(cpuid_is_vendor(abcd,GenuineIntel)) 00932 queryCacheSizes_intel(l1,l2,l3,max_std_funcs); 00933 else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_)) 00934 queryCacheSizes_amd(l1,l2,l3); 00935 else 00936 // by default let's use Intel's API 00937 queryCacheSizes_intel(l1,l2,l3,max_std_funcs); 00938 00939 // here is the list of other vendors: 00940 // ||cpuid_is_vendor(abcd,"VIA VIA VIA ") 00941 // ||cpuid_is_vendor(abcd,"CyrixInstead") 00942 // ||cpuid_is_vendor(abcd,"CentaurHauls") 00943 // ||cpuid_is_vendor(abcd,"GenuineTMx86") 00944 // ||cpuid_is_vendor(abcd,"TransmetaCPU") 00945 // ||cpuid_is_vendor(abcd,"RiseRiseRise") 00946 // ||cpuid_is_vendor(abcd,"Geode by NSC") 00947 // ||cpuid_is_vendor(abcd,"SiS SiS SiS ") 00948 // ||cpuid_is_vendor(abcd,"UMC UMC UMC ") 00949 // ||cpuid_is_vendor(abcd,"NexGenDriven") 00950 #else 00951 l1 = l2 = l3 = -1; 00952 #endif 00953 } 00954 00957 inline int queryL1CacheSize() 00958 { 00959 int l1(-1), l2, l3; 00960 queryCacheSizes(l1,l2,l3); 00961 return l1; 00962 } 00963 00966 inline int queryTopLevelCacheSize() 00967 { 00968 int l1, l2(-1), l3(-1); 00969 queryCacheSizes(l1,l2,l3); 00970 return (std::max)(l2,l3); 00971 } 00972 00973 } // end namespace internal 00974 00975 } // end namespace Eigen 00976 00977 #endif // EIGEN_MEMORY_H