MOAB
4.9.3pre
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2008-2015 Gael Guennebaud <[email protected]> 00005 // Copyright (C) 2008-2009 Benoit Jacob <[email protected]> 00006 // Copyright (C) 2009 Kenneth Riddile <[email protected]> 00007 // Copyright (C) 2010 Hauke Heibel <[email protected]> 00008 // Copyright (C) 2010 Thomas Capricelli <[email protected]> 00009 // Copyright (C) 2013 Pavel Holoborodko <[email protected]> 00010 // 00011 // This Source Code Form is subject to the terms of the Mozilla 00012 // Public License v. 2.0. If a copy of the MPL was not distributed 00013 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00014 00015 00016 /***************************************************************************** 00017 *** Platform checks for aligned malloc functions *** 00018 *****************************************************************************/ 00019 00020 #ifndef EIGEN_MEMORY_H 00021 #define EIGEN_MEMORY_H 00022 00023 #ifndef EIGEN_MALLOC_ALREADY_ALIGNED 00024 00025 // Try to determine automatically if malloc is already aligned. 00026 00027 // On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see: 00028 // http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html 00029 // This is true at least since glibc 2.8. 00030 // This leaves the question how to detect 64-bit. According to this document, 00031 // http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf 00032 // page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed 00033 // quite safe, at least within the context of glibc, to equate 64-bit with LP64. 00034 #if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \ 00035 && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_DEFAULT_ALIGN_BYTES == 16) 00036 #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1 00037 #else 00038 #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0 00039 #endif 00040 00041 // FreeBSD 6 seems to have 16-byte aligned malloc 00042 // See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup 00043 // FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures 00044 // See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup 00045 #if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16) 00046 #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1 00047 #else 00048 #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0 00049 #endif 00050 00051 #if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \ 00052 || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \ 00053 || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \ 00054 || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 00055 #define EIGEN_MALLOC_ALREADY_ALIGNED 1 00056 #else 00057 #define EIGEN_MALLOC_ALREADY_ALIGNED 0 00058 #endif 00059 00060 #endif 00061 00062 namespace Eigen { 00063 00064 namespace internal { 00065 00066 EIGEN_DEVICE_FUNC 00067 inline void throw_std_bad_alloc() 00068 { 00069 #ifdef EIGEN_EXCEPTIONS 00070 throw std::bad_alloc(); 00071 #else 00072 std::size_t huge = static_cast<std::size_t>(-1); 00073 new int[huge]; 00074 #endif 00075 } 00076 00077 /***************************************************************************** 00078 *** Implementation of handmade aligned functions *** 00079 *****************************************************************************/ 00080 00081 /* ----- Hand made implementations of aligned malloc/free and realloc ----- */ 00082 00086 inline void* handmade_aligned_malloc(std::size_t size) 00087 { 00088 void *original = std::malloc(size+EIGEN_DEFAULT_ALIGN_BYTES); 00089 if (original == 0) return 0; 00090 void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES); 00091 *(reinterpret_cast<void**>(aligned) - 1) = original; 00092 return aligned; 00093 } 00094 00096 inline void handmade_aligned_free(void *ptr) 00097 { 00098 if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1)); 00099 } 00100 00106 inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0) 00107 { 00108 if (ptr == 0) return handmade_aligned_malloc(size); 00109 void *original = *(reinterpret_cast<void**>(ptr) - 1); 00110 std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original); 00111 original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES); 00112 if (original == 0) return 0; 00113 void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES); 00114 void *previous_aligned = static_cast<char *>(original)+previous_offset; 00115 if(aligned!=previous_aligned) 00116 std::memmove(aligned, previous_aligned, size); 00117 00118 *(reinterpret_cast<void**>(aligned) - 1) = original; 00119 return aligned; 00120 } 00121 00122 /***************************************************************************** 00123 *** Implementation of portable aligned versions of malloc/free/realloc *** 00124 *****************************************************************************/ 00125 00126 #ifdef EIGEN_NO_MALLOC 00127 EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() 00128 { 00129 eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)"); 00130 } 00131 #elif defined EIGEN_RUNTIME_NO_MALLOC 00132 EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false) 00133 { 00134 static bool value = true; 00135 if (update == 1) 00136 value = new_value; 00137 return value; 00138 } 00139 EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); } 00140 EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); } 00141 EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() 00142 { 00143 eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)"); 00144 } 00145 #else 00146 EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() 00147 {} 00148 #endif 00149 00153 EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size) 00154 { 00155 check_that_malloc_is_allowed(); 00156 00157 void *result; 00158 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED 00159 result = std::malloc(size); 00160 #if EIGEN_DEFAULT_ALIGN_BYTES==16 00161 eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator."); 00162 #endif 00163 #else 00164 result = handmade_aligned_malloc(size); 00165 #endif 00166 00167 if(!result && size) 00168 throw_std_bad_alloc(); 00169 00170 return result; 00171 } 00172 00174 EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr) 00175 { 00176 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED 00177 std::free(ptr); 00178 #else 00179 handmade_aligned_free(ptr); 00180 #endif 00181 } 00182 00188 inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size) 00189 { 00190 EIGEN_UNUSED_VARIABLE(old_size); 00191 00192 void *result; 00193 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED 00194 result = std::realloc(ptr,new_size); 00195 #else 00196 result = handmade_aligned_realloc(ptr,new_size,old_size); 00197 #endif 00198 00199 if (!result && new_size) 00200 throw_std_bad_alloc(); 00201 00202 return result; 00203 } 00204 00205 /***************************************************************************** 00206 *** Implementation of conditionally aligned functions *** 00207 *****************************************************************************/ 00208 00212 template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(size_t size) 00213 { 00214 return aligned_malloc(size); 00215 } 00216 00217 template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(size_t size) 00218 { 00219 check_that_malloc_is_allowed(); 00220 00221 void *result = std::malloc(size); 00222 if(!result && size) 00223 throw_std_bad_alloc(); 00224 return result; 00225 } 00226 00228 template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr) 00229 { 00230 aligned_free(ptr); 00231 } 00232 00233 template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr) 00234 { 00235 std::free(ptr); 00236 } 00237 00238 template<bool Align> inline void* conditional_aligned_realloc(void* ptr, size_t new_size, size_t old_size) 00239 { 00240 return aligned_realloc(ptr, new_size, old_size); 00241 } 00242 00243 template<> inline void* conditional_aligned_realloc<false>(void* ptr, size_t new_size, size_t) 00244 { 00245 return std::realloc(ptr, new_size); 00246 } 00247 00248 /***************************************************************************** 00249 *** Construction/destruction of array elements *** 00250 *****************************************************************************/ 00251 00255 template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, size_t size) 00256 { 00257 // always destruct an array starting from the end. 00258 if(ptr) 00259 while(size) ptr[--size].~T(); 00260 } 00261 00265 template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, size_t size) 00266 { 00267 size_t i; 00268 EIGEN_TRY 00269 { 00270 for (i = 0; i < size; ++i) ::new (ptr + i) T; 00271 return ptr; 00272 } 00273 EIGEN_CATCH(...) 00274 { 00275 destruct_elements_of_array(ptr, i); 00276 EIGEN_THROW; 00277 } 00278 } 00279 00280 /***************************************************************************** 00281 *** Implementation of aligned new/delete-like functions *** 00282 *****************************************************************************/ 00283 00284 template<typename T> 00285 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size) 00286 { 00287 if(size > size_t(-1) / sizeof(T)) 00288 throw_std_bad_alloc(); 00289 } 00290 00295 template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(size_t size) 00296 { 00297 check_size_for_overflow<T>(size); 00298 T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size)); 00299 EIGEN_TRY 00300 { 00301 return construct_elements_of_array(result, size); 00302 } 00303 EIGEN_CATCH(...) 00304 { 00305 aligned_free(result); 00306 EIGEN_THROW; 00307 } 00308 } 00309 00310 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(size_t size) 00311 { 00312 check_size_for_overflow<T>(size); 00313 T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size)); 00314 EIGEN_TRY 00315 { 00316 return construct_elements_of_array(result, size); 00317 } 00318 EIGEN_CATCH(...) 00319 { 00320 conditional_aligned_free<Align>(result); 00321 EIGEN_THROW; 00322 } 00323 } 00324 00328 template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, size_t size) 00329 { 00330 destruct_elements_of_array<T>(ptr, size); 00331 aligned_free(ptr); 00332 } 00333 00337 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, size_t size) 00338 { 00339 destruct_elements_of_array<T>(ptr, size); 00340 conditional_aligned_free<Align>(ptr); 00341 } 00342 00343 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, size_t new_size, size_t old_size) 00344 { 00345 check_size_for_overflow<T>(new_size); 00346 check_size_for_overflow<T>(old_size); 00347 if(new_size < old_size) 00348 destruct_elements_of_array(pts+new_size, old_size-new_size); 00349 T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size)); 00350 if(new_size > old_size) 00351 { 00352 EIGEN_TRY 00353 { 00354 construct_elements_of_array(result+old_size, new_size-old_size); 00355 } 00356 EIGEN_CATCH(...) 00357 { 00358 conditional_aligned_free<Align>(result); 00359 EIGEN_THROW; 00360 } 00361 } 00362 return result; 00363 } 00364 00365 00366 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(size_t size) 00367 { 00368 if(size==0) 00369 return 0; // short-cut. Also fixes Bug 884 00370 check_size_for_overflow<T>(size); 00371 T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size)); 00372 if(NumTraits<T>::RequireInitialization) 00373 { 00374 EIGEN_TRY 00375 { 00376 construct_elements_of_array(result, size); 00377 } 00378 EIGEN_CATCH(...) 00379 { 00380 conditional_aligned_free<Align>(result); 00381 EIGEN_THROW; 00382 } 00383 } 00384 return result; 00385 } 00386 00387 template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, size_t new_size, size_t old_size) 00388 { 00389 check_size_for_overflow<T>(new_size); 00390 check_size_for_overflow<T>(old_size); 00391 if(NumTraits<T>::RequireInitialization && (new_size < old_size)) 00392 destruct_elements_of_array(pts+new_size, old_size-new_size); 00393 T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size)); 00394 if(NumTraits<T>::RequireInitialization && (new_size > old_size)) 00395 { 00396 EIGEN_TRY 00397 { 00398 construct_elements_of_array(result+old_size, new_size-old_size); 00399 } 00400 EIGEN_CATCH(...) 00401 { 00402 conditional_aligned_free<Align>(result); 00403 EIGEN_THROW; 00404 } 00405 } 00406 return result; 00407 } 00408 00409 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, size_t size) 00410 { 00411 if(NumTraits<T>::RequireInitialization) 00412 destruct_elements_of_array<T>(ptr, size); 00413 conditional_aligned_free<Align>(ptr); 00414 } 00415 00416 /****************************************************************************/ 00417 00435 template<int Alignment, typename Scalar, typename Index> 00436 EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size) 00437 { 00438 const Index ScalarSize = sizeof(Scalar); 00439 const Index AlignmentSize = Alignment / ScalarSize; 00440 const Index AlignmentMask = AlignmentSize-1; 00441 00442 if(AlignmentSize<=1) 00443 { 00444 // Either the requested alignment if smaller than a scalar, or it exactly match a 1 scalar 00445 // so that all elements of the array have the same alignment. 00446 return 0; 00447 } 00448 else if( (std::size_t(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0) 00449 { 00450 // The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size. 00451 // Consequently, no element of the array is well aligned. 00452 return size; 00453 } 00454 else 00455 { 00456 Index first = (AlignmentSize - (Index((std::size_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask; 00457 return (first < size) ? first : size; 00458 } 00459 } 00460 00463 template<typename Scalar, typename Index> 00464 EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size) 00465 { 00466 typedef typename packet_traits<Scalar>::type DefaultPacketType; 00467 return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size); 00468 } 00469 00472 template<typename Index> 00473 inline Index first_multiple(Index size, Index base) 00474 { 00475 return ((size+base-1)/base)*base; 00476 } 00477 00478 // std::copy is much slower than memcpy, so let's introduce a smart_copy which 00479 // use memcpy on trivial types, i.e., on types that does not require an initialization ctor. 00480 template<typename T, bool UseMemcpy> struct smart_copy_helper; 00481 00482 template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target) 00483 { 00484 smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target); 00485 } 00486 00487 template<typename T> struct smart_copy_helper<T,true> { 00488 EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) 00489 { 00490 std::ptrdiff_t size = std::ptrdiff_t(end)-std::ptrdiff_t(start); 00491 if(size==0) return; 00492 eigen_internal_assert(start!=0 && end!=0 && target!=0); 00493 memcpy(target, start, size); 00494 } 00495 }; 00496 00497 template<typename T> struct smart_copy_helper<T,false> { 00498 EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) 00499 { std::copy(start, end, target); } 00500 }; 00501 00502 // intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. 00503 template<typename T, bool UseMemmove> struct smart_memmove_helper; 00504 00505 template<typename T> void smart_memmove(const T* start, const T* end, T* target) 00506 { 00507 smart_memmove_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target); 00508 } 00509 00510 template<typename T> struct smart_memmove_helper<T,true> { 00511 static inline void run(const T* start, const T* end, T* target) 00512 { 00513 std::ptrdiff_t size = std::ptrdiff_t(end)-std::ptrdiff_t(start); 00514 if(size==0) return; 00515 eigen_internal_assert(start!=0 && end!=0 && target!=0); 00516 std::memmove(target, start, size); 00517 } 00518 }; 00519 00520 template<typename T> struct smart_memmove_helper<T,false> { 00521 static inline void run(const T* start, const T* end, T* target) 00522 { 00523 if (uintptr_t(target) < uintptr_t(start)) 00524 { 00525 std::copy(start, end, target); 00526 } 00527 else 00528 { 00529 std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T); 00530 std::copy_backward(start, end, target + count); 00531 } 00532 } 00533 }; 00534 00535 00536 /***************************************************************************** 00537 *** Implementation of runtime stack allocation (falling back to malloc) *** 00538 *****************************************************************************/ 00539 00540 // you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA 00541 // to the appropriate stack allocation function 00542 #ifndef EIGEN_ALLOCA 00543 #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca) 00544 #define EIGEN_ALLOCA alloca 00545 #elif EIGEN_COMP_MSVC 00546 #define EIGEN_ALLOCA _alloca 00547 #endif 00548 #endif 00549 00550 // This helper class construct the allocated memory, and takes care of destructing and freeing the handled data 00551 // at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions. 00552 template<typename T> class aligned_stack_memory_handler : noncopyable 00553 { 00554 public: 00555 /* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size. 00556 * Note that \a ptr can be 0 regardless of the other parameters. 00557 * This constructor takes care of constructing/initializing the elements of the buffer if required by the scalar type T (see NumTraits<T>::RequireInitialization). 00558 * In this case, the buffer elements will also be destructed when this handler will be destructed. 00559 * Finally, if \a dealloc is true, then the pointer \a ptr is freed. 00560 **/ 00561 aligned_stack_memory_handler(T* ptr, size_t size, bool dealloc) 00562 : m_ptr(ptr), m_size(size), m_deallocate(dealloc) 00563 { 00564 if(NumTraits<T>::RequireInitialization && m_ptr) 00565 Eigen::internal::construct_elements_of_array(m_ptr, size); 00566 } 00567 ~aligned_stack_memory_handler() 00568 { 00569 if(NumTraits<T>::RequireInitialization && m_ptr) 00570 Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size); 00571 if(m_deallocate) 00572 Eigen::internal::aligned_free(m_ptr); 00573 } 00574 protected: 00575 T* m_ptr; 00576 size_t m_size; 00577 bool m_deallocate; 00578 }; 00579 00580 template<typename T> class scoped_array : noncopyable 00581 { 00582 T* m_ptr; 00583 public: 00584 explicit scoped_array(std::ptrdiff_t size) 00585 { 00586 m_ptr = new T[size]; 00587 } 00588 ~scoped_array() 00589 { 00590 delete[] m_ptr; 00591 } 00592 T& operator[](std::ptrdiff_t i) { return m_ptr[i]; } 00593 const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; } 00594 T* &ptr() { return m_ptr; } 00595 const T* ptr() const { return m_ptr; } 00596 operator const T*() const { return m_ptr; } 00597 }; 00598 00599 template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) 00600 { 00601 std::swap(a.ptr(),b.ptr()); 00602 } 00603 00604 } // end namespace internal 00605 00621 #ifdef EIGEN_ALLOCA 00622 00623 #if EIGEN_DEFAULT_ALIGN_BYTES>0 00624 // We always manually re-align the result of EIGEN_ALLOCA. 00625 // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment. 00626 #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<std::size_t>(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) 00627 #else 00628 #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE) 00629 #endif 00630 00631 #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \ 00632 Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \ 00633 TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \ 00634 : reinterpret_cast<TYPE*>( \ 00635 (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \ 00636 : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \ 00637 Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) 00638 00639 #else 00640 00641 #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \ 00642 Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \ 00643 TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \ 00644 Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true) 00645 00646 #endif 00647 00648 00649 /***************************************************************************** 00650 *** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] *** 00651 *****************************************************************************/ 00652 00653 #if EIGEN_MAX_ALIGN_BYTES!=0 00654 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ 00655 void* operator new(size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \ 00656 EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \ 00657 EIGEN_CATCH (...) { return 0; } \ 00658 } 00659 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \ 00660 void *operator new(size_t size) { \ 00661 return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \ 00662 } \ 00663 void *operator new[](size_t size) { \ 00664 return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \ 00665 } \ 00666 void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ 00667 void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ 00668 void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ 00669 void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ 00670 /* in-place new and delete. since (at least afaik) there is no actual */ \ 00671 /* memory allocated we can safely let the default implementation handle */ \ 00672 /* this particular case. */ \ 00673 static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \ 00674 static void *operator new[](size_t size, void* ptr) { return ::operator new[](size,ptr); } \ 00675 void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \ 00676 void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \ 00677 /* nothrow-new (returns zero instead of std::bad_alloc) */ \ 00678 EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ 00679 void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \ 00680 Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \ 00681 } \ 00682 typedef void eigen_aligned_operator_new_marker_type; 00683 #else 00684 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) 00685 #endif 00686 00687 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true) 00688 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \ 00689 EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0))) 00690 00691 /****************************************************************************/ 00692 00709 template<class T> 00710 class aligned_allocator : public std::allocator<T> 00711 { 00712 public: 00713 typedef size_t size_type; 00714 typedef std::ptrdiff_t difference_type; 00715 typedef T* pointer; 00716 typedef const T* const_pointer; 00717 typedef T& reference; 00718 typedef const T& const_reference; 00719 typedef T value_type; 00720 00721 template<class U> 00722 struct rebind 00723 { 00724 typedef aligned_allocator<U> other; 00725 }; 00726 00727 aligned_allocator() : std::allocator<T>() {} 00728 00729 aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {} 00730 00731 template<class U> 00732 aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {} 00733 00734 ~aligned_allocator() {} 00735 00736 pointer allocate(size_type num, const void* /*hint*/ = 0) 00737 { 00738 internal::check_size_for_overflow<T>(num); 00739 return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) ); 00740 } 00741 00742 void deallocate(pointer p, size_type /*num*/) 00743 { 00744 internal::aligned_free(p); 00745 } 00746 }; 00747 00748 //---------- Cache sizes ---------- 00749 00750 #if !defined(EIGEN_NO_CPUID) 00751 # if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64 00752 # if defined(__PIC__) && EIGEN_ARCH_i386 00753 // Case for x86 with PIC 00754 # define EIGEN_CPUID(abcd,func,id) \ 00755 __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id)); 00756 # elif defined(__PIC__) && EIGEN_ARCH_x86_64 00757 // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model. 00758 // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway. 00759 # define EIGEN_CPUID(abcd,func,id) \ 00760 __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id)); 00761 # else 00762 // Case for x86_64 or x86 w/o PIC 00763 # define EIGEN_CPUID(abcd,func,id) \ 00764 __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) ); 00765 # endif 00766 # elif EIGEN_COMP_MSVC 00767 # if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64 00768 # define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id) 00769 # endif 00770 # endif 00771 #endif 00772 00773 namespace internal { 00774 00775 #ifdef EIGEN_CPUID 00776 00777 inline bool cpuid_is_vendor(int abcd[4], const int vendor[3]) 00778 { 00779 return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2]; 00780 } 00781 00782 inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3) 00783 { 00784 int abcd[4]; 00785 l1 = l2 = l3 = 0; 00786 int cache_id = 0; 00787 int cache_type = 0; 00788 do { 00789 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; 00790 EIGEN_CPUID(abcd,0x4,cache_id); 00791 cache_type = (abcd[0] & 0x0F) >> 0; 00792 if(cache_type==1||cache_type==3) // data or unified cache 00793 { 00794 int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5] 00795 int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22] 00796 int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12] 00797 int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0] 00798 int sets = (abcd[2]); // C[31:0] 00799 00800 int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1); 00801 00802 switch(cache_level) 00803 { 00804 case 1: l1 = cache_size; break; 00805 case 2: l2 = cache_size; break; 00806 case 3: l3 = cache_size; break; 00807 default: break; 00808 } 00809 } 00810 cache_id++; 00811 } while(cache_type>0 && cache_id<16); 00812 } 00813 00814 inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3) 00815 { 00816 int abcd[4]; 00817 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; 00818 l1 = l2 = l3 = 0; 00819 EIGEN_CPUID(abcd,0x00000002,0); 00820 unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2; 00821 bool check_for_p2_core2 = false; 00822 for(int i=0; i<14; ++i) 00823 { 00824 switch(bytes[i]) 00825 { 00826 case 0x0A: l1 = 8; break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines 00827 case 0x0C: l1 = 16; break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines 00828 case 0x0E: l1 = 24; break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines 00829 case 0x10: l1 = 16; break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64) 00830 case 0x15: l1 = 16; break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64) 00831 case 0x2C: l1 = 32; break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines 00832 case 0x30: l1 = 32; break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines 00833 case 0x60: l1 = 16; break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored 00834 case 0x66: l1 = 8; break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored 00835 case 0x67: l1 = 16; break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored 00836 case 0x68: l1 = 32; break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored 00837 case 0x1A: l2 = 96; break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64) 00838 case 0x22: l3 = 512; break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored 00839 case 0x23: l3 = 1024; break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored 00840 case 0x25: l3 = 2048; break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored 00841 case 0x29: l3 = 4096; break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored 00842 case 0x39: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored 00843 case 0x3A: l2 = 192; break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored 00844 case 0x3B: l2 = 128; break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored 00845 case 0x3C: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored 00846 case 0x3D: l2 = 384; break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored 00847 case 0x3E: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored 00848 case 0x40: l2 = 0; break; // no integrated L2 cache (P6 core) or L3 cache (P4 core) 00849 case 0x41: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines 00850 case 0x42: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines 00851 case 0x43: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines 00852 case 0x44: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines 00853 case 0x45: l2 = 2048; break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines 00854 case 0x46: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines 00855 case 0x47: l3 = 8192; break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines 00856 case 0x48: l2 = 3072; break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines 00857 case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;// code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2 00858 case 0x4A: l3 = 6144; break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines 00859 case 0x4B: l3 = 8192; break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines 00860 case 0x4C: l3 = 12288; break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines 00861 case 0x4D: l3 = 16384; break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines 00862 case 0x4E: l2 = 6144; break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines 00863 case 0x78: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines 00864 case 0x79: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored 00865 case 0x7A: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored 00866 case 0x7B: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored 00867 case 0x7C: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored 00868 case 0x7D: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines 00869 case 0x7E: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64) 00870 case 0x7F: l2 = 512; break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines 00871 case 0x80: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines 00872 case 0x81: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines 00873 case 0x82: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines 00874 case 0x83: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines 00875 case 0x84: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines 00876 case 0x85: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines 00877 case 0x86: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines 00878 case 0x87: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines 00879 case 0x88: l3 = 2048; break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64) 00880 case 0x89: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64) 00881 case 0x8A: l3 = 8192; break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64) 00882 case 0x8D: l3 = 3072; break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64) 00883 00884 default: break; 00885 } 00886 } 00887 if(check_for_p2_core2 && l2 == l3) 00888 l3 = 0; 00889 l1 *= 1024; 00890 l2 *= 1024; 00891 l3 *= 1024; 00892 } 00893 00894 inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs) 00895 { 00896 if(max_std_funcs>=4) 00897 queryCacheSizes_intel_direct(l1,l2,l3); 00898 else 00899 queryCacheSizes_intel_codes(l1,l2,l3); 00900 } 00901 00902 inline void queryCacheSizes_amd(int& l1, int& l2, int& l3) 00903 { 00904 int abcd[4]; 00905 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; 00906 EIGEN_CPUID(abcd,0x80000005,0); 00907 l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB 00908 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; 00909 EIGEN_CPUID(abcd,0x80000006,0); 00910 l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB 00911 l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB 00912 } 00913 #endif 00914 00917 inline void queryCacheSizes(int& l1, int& l2, int& l3) 00918 { 00919 #ifdef EIGEN_CPUID 00920 int abcd[4]; 00921 const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e}; 00922 const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163}; 00923 const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!" 00924 00925 // identify the CPU vendor 00926 EIGEN_CPUID(abcd,0x0,0); 00927 int max_std_funcs = abcd[1]; 00928 if(cpuid_is_vendor(abcd,GenuineIntel)) 00929 queryCacheSizes_intel(l1,l2,l3,max_std_funcs); 00930 else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_)) 00931 queryCacheSizes_amd(l1,l2,l3); 00932 else 00933 // by default let's use Intel's API 00934 queryCacheSizes_intel(l1,l2,l3,max_std_funcs); 00935 00936 // here is the list of other vendors: 00937 // ||cpuid_is_vendor(abcd,"VIA VIA VIA ") 00938 // ||cpuid_is_vendor(abcd,"CyrixInstead") 00939 // ||cpuid_is_vendor(abcd,"CentaurHauls") 00940 // ||cpuid_is_vendor(abcd,"GenuineTMx86") 00941 // ||cpuid_is_vendor(abcd,"TransmetaCPU") 00942 // ||cpuid_is_vendor(abcd,"RiseRiseRise") 00943 // ||cpuid_is_vendor(abcd,"Geode by NSC") 00944 // ||cpuid_is_vendor(abcd,"SiS SiS SiS ") 00945 // ||cpuid_is_vendor(abcd,"UMC UMC UMC ") 00946 // ||cpuid_is_vendor(abcd,"NexGenDriven") 00947 #else 00948 l1 = l2 = l3 = -1; 00949 #endif 00950 } 00951 00954 inline int queryL1CacheSize() 00955 { 00956 int l1(-1), l2, l3; 00957 queryCacheSizes(l1,l2,l3); 00958 return l1; 00959 } 00960 00963 inline int queryTopLevelCacheSize() 00964 { 00965 int l1, l2(-1), l3(-1); 00966 queryCacheSizes(l1,l2,l3); 00967 return (std::max)(l2,l3); 00968 } 00969 00970 } // end namespace internal 00971 00972 } // end namespace Eigen 00973 00974 #endif // EIGEN_MEMORY_H