MOAB
4.9.3pre
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2008 Gael Guennebaud <[email protected]> 00005 // Copyright (C) 2006-2008 Benoit Jacob <[email protected]> 00006 // 00007 // This Source Code Form is subject to the terms of the Mozilla 00008 // Public License v. 2.0. If a copy of the MPL was not distributed 00009 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00010 00011 #ifndef EIGEN_GENERIC_PACKET_MATH_H 00012 #define EIGEN_GENERIC_PACKET_MATH_H 00013 00014 namespace Eigen { 00015 00016 namespace internal { 00017 00026 #ifndef EIGEN_DEBUG_ALIGNED_LOAD 00027 #define EIGEN_DEBUG_ALIGNED_LOAD 00028 #endif 00029 00030 #ifndef EIGEN_DEBUG_UNALIGNED_LOAD 00031 #define EIGEN_DEBUG_UNALIGNED_LOAD 00032 #endif 00033 00034 #ifndef EIGEN_DEBUG_ALIGNED_STORE 00035 #define EIGEN_DEBUG_ALIGNED_STORE 00036 #endif 00037 00038 #ifndef EIGEN_DEBUG_UNALIGNED_STORE 00039 #define EIGEN_DEBUG_UNALIGNED_STORE 00040 #endif 00041 00042 struct default_packet_traits 00043 { 00044 enum { 00045 HasHalfPacket = 0, 00046 00047 HasAdd = 1, 00048 HasSub = 1, 00049 HasMul = 1, 00050 HasNegate = 1, 00051 HasAbs = 1, 00052 HasArg = 0, 00053 HasAbs2 = 1, 00054 HasMin = 1, 00055 HasMax = 1, 00056 HasConj = 1, 00057 HasSetLinear = 1, 00058 HasBlend = 0, 00059 00060 HasDiv = 0, 00061 HasSqrt = 0, 00062 HasRsqrt = 0, 00063 HasExp = 0, 00064 HasLog = 0, 00065 HasLog10 = 0, 00066 HasPow = 0, 00067 00068 HasSin = 0, 00069 HasCos = 0, 00070 HasTan = 0, 00071 HasASin = 0, 00072 HasACos = 0, 00073 HasATan = 0, 00074 HasSinh = 0, 00075 HasCosh = 0, 00076 HasTanh = 0, 00077 HasLGamma = 0, 00078 HasDiGamma = 0, 00079 HasErf = 0, 00080 HasErfc = 0, 00081 00082 HasRound = 0, 00083 HasFloor = 0, 00084 HasCeil = 0, 00085 00086 HasSign = 0 00087 }; 00088 }; 00089 00090 template<typename T> struct packet_traits : default_packet_traits 00091 { 00092 typedef T type; 00093 typedef T half; 00094 enum { 00095 Vectorizable = 0, 00096 size = 1, 00097 AlignedOnScalar = 0, 00098 HasHalfPacket = 0 00099 }; 00100 enum { 00101 HasAdd = 0, 00102 HasSub = 0, 00103 HasMul = 0, 00104 HasNegate = 0, 00105 HasAbs = 0, 00106 HasAbs2 = 0, 00107 HasMin = 0, 00108 HasMax = 0, 00109 HasConj = 0, 00110 HasSetLinear = 0 00111 }; 00112 }; 00113 00114 template<typename T> struct packet_traits<const T> : packet_traits<T> { }; 00115 00116 template <typename Src, typename Tgt> struct type_casting_traits { 00117 enum { 00118 VectorizedCast = 0, 00119 SrcCoeffRatio = 1, 00120 TgtCoeffRatio = 1 00121 }; 00122 }; 00123 00124 00126 template <typename SrcPacket, typename TgtPacket> 00127 EIGEN_DEVICE_FUNC inline TgtPacket 00128 pcast(const SrcPacket& a) { 00129 return static_cast<TgtPacket>(a); 00130 } 00131 template <typename SrcPacket, typename TgtPacket> 00132 EIGEN_DEVICE_FUNC inline TgtPacket 00133 pcast(const SrcPacket& a, const SrcPacket& /*b*/) { 00134 return static_cast<TgtPacket>(a); 00135 } 00136 00137 template <typename SrcPacket, typename TgtPacket> 00138 EIGEN_DEVICE_FUNC inline TgtPacket 00139 pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) { 00140 return static_cast<TgtPacket>(a); 00141 } 00142 00144 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00145 padd(const Packet& a, 00146 const Packet& b) { return a+b; } 00147 00149 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00150 psub(const Packet& a, 00151 const Packet& b) { return a-b; } 00152 00154 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00155 pnegate(const Packet& a) { return -a; } 00156 00159 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00160 pconj(const Packet& a) { return numext::conj(a); } 00161 00163 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00164 pmul(const Packet& a, 00165 const Packet& b) { return a*b; } 00166 00168 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00169 pdiv(const Packet& a, 00170 const Packet& b) { return a/b; } 00171 00173 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00174 pmin(const Packet& a, 00175 const Packet& b) { return numext::mini(a, b); } 00176 00178 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00179 pmax(const Packet& a, 00180 const Packet& b) { return numext::maxi(a, b); } 00181 00183 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00184 pabs(const Packet& a) { using std::abs; return abs(a); } 00185 00187 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00188 parg(const Packet& a) { using numext::arg; return arg(a); } 00189 00191 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00192 pand(const Packet& a, const Packet& b) { return a & b; } 00193 00195 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00196 por(const Packet& a, const Packet& b) { return a | b; } 00197 00199 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00200 pxor(const Packet& a, const Packet& b) { return a ^ b; } 00201 00203 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00204 pandnot(const Packet& a, const Packet& b) { return a & (!b); } 00205 00207 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00208 pload(const typename unpacket_traits<Packet>::type* from) { return *from; } 00209 00211 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00212 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; } 00213 00215 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00216 pset1(const typename unpacket_traits<Packet>::type& a) { return a; } 00217 00219 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00220 pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); } 00221 00227 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00228 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; } 00229 00236 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00237 ploadquad(const typename unpacket_traits<Packet>::type* from) 00238 { return pload1<Packet>(from); } 00239 00249 template<typename Packet> EIGEN_DEVICE_FUNC 00250 inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a, 00251 Packet& a0, Packet& a1, Packet& a2, Packet& a3) 00252 { 00253 a0 = pload1<Packet>(a+0); 00254 a1 = pload1<Packet>(a+1); 00255 a2 = pload1<Packet>(a+2); 00256 a3 = pload1<Packet>(a+3); 00257 } 00258 00266 template<typename Packet> EIGEN_DEVICE_FUNC 00267 inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a, 00268 Packet& a0, Packet& a1) 00269 { 00270 a0 = pload1<Packet>(a+0); 00271 a1 = pload1<Packet>(a+1); 00272 } 00273 00275 template<typename Packet> inline Packet 00276 plset(const typename unpacket_traits<Packet>::type& a) { return a; } 00277 00279 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from) 00280 { (*to) = from; } 00281 00283 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) 00284 { (*to) = from; } 00285 00286 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/) 00287 { return ploadu<Packet>(from); } 00288 00289 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/) 00290 { pstore(to, from); } 00291 00293 template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr) 00294 { 00295 #ifdef __CUDA_ARCH__ 00296 #if defined(__LP64__) 00297 // 64-bit pointer operand constraint for inlined asm 00298 asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr)); 00299 #else 00300 // 32-bit pointer operand constraint for inlined asm 00301 asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr)); 00302 #endif 00303 #elif !EIGEN_COMP_MSVC 00304 __builtin_prefetch(addr); 00305 #endif 00306 } 00307 00309 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a) 00310 { return a; } 00311 00313 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00314 preduxp(const Packet* vecs) { return vecs[0]; } 00315 00317 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a) 00318 { return a; } 00319 00324 template<typename Packet> EIGEN_DEVICE_FUNC inline 00325 typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type 00326 predux4(const Packet& a) 00327 { return a; } 00328 00330 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a) 00331 { return a; } 00332 00334 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) 00335 { return a; } 00336 00338 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) 00339 { return a; } 00340 00342 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) 00343 { return a; } 00344 00345 template<size_t offset, typename Packet> 00346 struct protate_impl 00347 { 00348 // Empty so attempts to use this unimplemented path will fail to compile. 00349 // Only specializations of this template should be used. 00350 }; 00351 00356 template<size_t offset, typename Packet> EIGEN_DEVICE_FUNC inline Packet protate(const Packet& a) 00357 { 00358 return offset ? protate_impl<offset, Packet>::run(a) : a; 00359 } 00360 00362 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) 00363 { 00364 // FIXME: uncomment the following in case we drop the internal imag and real functions. 00365 // using std::imag; 00366 // using std::real; 00367 return Packet(imag(a),real(a)); 00368 } 00369 00370 /************************** 00371 * Special math functions 00372 ***************************/ 00373 00375 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00376 Packet psin(const Packet& a) { using std::sin; return sin(a); } 00377 00379 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00380 Packet pcos(const Packet& a) { using std::cos; return cos(a); } 00381 00383 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00384 Packet ptan(const Packet& a) { using std::tan; return tan(a); } 00385 00387 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00388 Packet pasin(const Packet& a) { using std::asin; return asin(a); } 00389 00391 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00392 Packet pacos(const Packet& a) { using std::acos; return acos(a); } 00393 00395 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00396 Packet patan(const Packet& a) { using std::atan; return atan(a); } 00397 00399 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00400 Packet psinh(const Packet& a) { using std::sinh; return sinh(a); } 00401 00403 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00404 Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); } 00405 00407 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00408 Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); } 00409 00411 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00412 Packet pexp(const Packet& a) { using std::exp; return exp(a); } 00413 00415 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00416 Packet plog(const Packet& a) { using std::log; return log(a); } 00417 00419 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00420 Packet plog10(const Packet& a) { using std::log10; return log10(a); } 00421 00423 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00424 Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); } 00425 00427 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00428 Packet prsqrt(const Packet& a) { 00429 return pdiv(pset1<Packet>(1), psqrt(a)); 00430 } 00431 00433 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00434 Packet pround(const Packet& a) { using numext::round; return round(a); } 00435 00437 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00438 Packet pfloor(const Packet& a) { using numext::floor; return floor(a); } 00439 00441 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00442 Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); } 00443 00445 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00446 Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); } 00447 00449 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00450 Packet pdigamma(const Packet& a) { using numext::digamma; return digamma(a); } 00451 00453 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00454 Packet perf(const Packet& a) { using numext::erf; return erf(a); } 00455 00457 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00458 Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } 00459 00460 /*************************************************************************** 00461 * The following functions might not have to be overwritten for vectorized types 00462 ***************************************************************************/ 00463 00465 // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type) 00466 template<typename Packet> 00467 inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a) 00468 { 00469 pstore(to, pset1<Packet>(a)); 00470 } 00471 00473 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00474 pmadd(const Packet& a, 00475 const Packet& b, 00476 const Packet& c) 00477 { return padd(pmul(a, b),c); } 00478 00481 template<typename Packet, int Alignment> 00482 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from) 00483 { 00484 if(Alignment >= unpacket_traits<Packet>::alignment) 00485 return pload<Packet>(from); 00486 else 00487 return ploadu<Packet>(from); 00488 } 00489 00492 template<typename Scalar, typename Packet, int Alignment> 00493 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) 00494 { 00495 if(Alignment >= unpacket_traits<Packet>::alignment) 00496 pstore(to, from); 00497 else 00498 pstoreu(to, from); 00499 } 00500 00506 template<typename Packet, int LoadMode> 00507 inline Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from) 00508 { 00509 return ploadt<Packet, LoadMode>(from); 00510 } 00511 00513 template<int Offset,typename PacketType> 00514 struct palign_impl 00515 { 00516 // by default data are aligned, so there is nothing to be done :) 00517 static inline void run(PacketType&, const PacketType&) {} 00518 }; 00519 00535 template<int Offset,typename PacketType> 00536 inline void palign(PacketType& first, const PacketType& second) 00537 { 00538 palign_impl<Offset,PacketType>::run(first,second); 00539 } 00540 00541 /*************************************************************************** 00542 * Fast complex products (GCC generates a function call which is very slow) 00543 ***************************************************************************/ 00544 00545 // Eigen+CUDA does not support complexes. 00546 #ifndef __CUDACC__ 00547 00548 template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b) 00549 { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } 00550 00551 template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b) 00552 { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } 00553 00554 #endif 00555 00556 00557 /*************************************************************************** 00558 * PacketBlock, that is a collection of N packets where the number of words 00559 * in the packet is a multiple of N. 00560 ***************************************************************************/ 00561 template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock { 00562 Packet packet[N]; 00563 }; 00564 00565 template<typename Packet> EIGEN_DEVICE_FUNC inline void 00566 ptranspose(PacketBlock<Packet,1>& /*kernel*/) { 00567 // Nothing to do in the scalar case, i.e. a 1x1 matrix. 00568 } 00569 00570 /*************************************************************************** 00571 * Selector, i.e. vector of N boolean values used to select (i.e. blend) 00572 * words from 2 packets. 00573 ***************************************************************************/ 00574 template <size_t N> struct Selector { 00575 bool select[N]; 00576 }; 00577 00578 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00579 pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) { 00580 return ifPacket.select[0] ? thenPacket : elsePacket; 00581 } 00582 00583 } // end namespace internal 00584 00585 } // end namespace Eigen 00586 00587 #endif // EIGEN_GENERIC_PACKET_MATH_H