fathom/moab-docs/GenericPacketMath_8h_source.html

00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2008 Gael Guennebaud <[email protected]>
00005 // Copyright (C) 2006-2008 Benoit Jacob <[email protected]>
00006 //
00007 // This Source Code Form is subject to the terms of the Mozilla
00008 // Public License v. 2.0. If a copy of the MPL was not distributed
00009 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00010
00011 #ifndef EIGEN_GENERIC_PACKET_MATH_H
00012 #define EIGEN_GENERIC_PACKET_MATH_H
00013
00014 namespace Eigen {
00015
00016 namespace internal {
00017
00026 #ifndef EIGEN_DEBUG_ALIGNED_LOAD
00027 #define EIGEN_DEBUG_ALIGNED_LOAD
00028 #endif
00029
00030 #ifndef EIGEN_DEBUG_UNALIGNED_LOAD
00031 #define EIGEN_DEBUG_UNALIGNED_LOAD
00032 #endif
00033
00034 #ifndef EIGEN_DEBUG_ALIGNED_STORE
00035 #define EIGEN_DEBUG_ALIGNED_STORE
00036 #endif
00037
00038 #ifndef EIGEN_DEBUG_UNALIGNED_STORE
00039 #define EIGEN_DEBUG_UNALIGNED_STORE
00040 #endif
00041
00042 struct default_packet_traits
00043 {
00044   enum {
00045     HasHalfPacket = 0,
00046
00047     HasAdd    = 1,
00048     HasSub    = 1,
00049     HasMul    = 1,
00050     HasNegate = 1,
00051     HasAbs    = 1,
00052     HasArg    = 0,
00053     HasAbs2   = 1,
00054     HasMin    = 1,
00055     HasMax    = 1,
00056     HasConj   = 1,
00057     HasSetLinear = 1,
00058     HasBlend  = 0,
00059
00060     HasDiv    = 0,
00061     HasSqrt   = 0,
00062     HasRsqrt  = 0,
00063     HasExp    = 0,
00064     HasLog    = 0,
00065     HasLog10    = 0,
00066     HasPow    = 0,
00067
00068     HasSin    = 0,
00069     HasCos    = 0,
00070     HasTan    = 0,
00071     HasASin   = 0,
00072     HasACos   = 0,
00073     HasATan   = 0,
00074     HasSinh    = 0,
00075     HasCosh    = 0,
00076     HasTanh    = 0,
00077     HasLGamma = 0,
00078     HasDiGamma = 0,
00079     HasErf = 0,
00080     HasErfc = 0,
00081
00082     HasRound  = 0,
00083     HasFloor  = 0,
00084     HasCeil   = 0,
00085
00086     HasSign   = 0
00087   };
00088 };
00089
00090 template<typename T> struct packet_traits : default_packet_traits
00091 {
00092   typedef T type;
00093   typedef T half;
00094   enum {
00095     Vectorizable = 0,
00096     size = 1,
00097     AlignedOnScalar = 0,
00098     HasHalfPacket = 0
00099   };
00100   enum {
00101     HasAdd    = 0,
00102     HasSub    = 0,
00103     HasMul    = 0,
00104     HasNegate = 0,
00105     HasAbs    = 0,
00106     HasAbs2   = 0,
00107     HasMin    = 0,
00108     HasMax    = 0,
00109     HasConj   = 0,
00110     HasSetLinear = 0
00111   };
00112 };
00113
00114 template<typename T> struct packet_traits<const T> : packet_traits<T> { };
00115
00116 template <typename Src, typename Tgt> struct type_casting_traits {
00117   enum {
00118     VectorizedCast = 0,
00119     SrcCoeffRatio = 1,
00120     TgtCoeffRatio = 1
00121   };
00122 };
00123
00124
00126 template <typename SrcPacket, typename TgtPacket>
00127 EIGEN_DEVICE_FUNC inline TgtPacket
00128 pcast(const SrcPacket& a) {
00129   return static_cast<TgtPacket>(a);
00130 }
00131 template <typename SrcPacket, typename TgtPacket>
00132 EIGEN_DEVICE_FUNC inline TgtPacket
00133 pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
00134   return static_cast<TgtPacket>(a);
00135 }
00136
00137 template <typename SrcPacket, typename TgtPacket>
00138 EIGEN_DEVICE_FUNC inline TgtPacket
00139 pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
00140   return static_cast<TgtPacket>(a);
00141 }
00142
00144 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00145 padd(const Packet& a,
00146         const Packet& b) { return a+b; }
00147
00149 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00150 psub(const Packet& a,
00151         const Packet& b) { return a-b; }
00152
00154 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00155 pnegate(const Packet& a) { return -a; }
00156
00159 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00160 pconj(const Packet& a) { return numext::conj(a); }
00161
00163 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00164 pmul(const Packet& a,
00165         const Packet& b) { return a*b; }
00166
00168 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00169 pdiv(const Packet& a,
00170         const Packet& b) { return a/b; }
00171
00173 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00174 pmin(const Packet& a,
00175         const Packet& b) { return numext::mini(a, b); }
00176
00178 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00179 pmax(const Packet& a,
00180         const Packet& b) { return numext::maxi(a, b); }
00181
00183 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00184 pabs(const Packet& a) { using std::abs; return abs(a); }
00185
00187 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00188 parg(const Packet& a) { using numext::arg; return arg(a); }
00189
00191 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00192 pand(const Packet& a, const Packet& b) { return a & b; }
00193
00195 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00196 por(const Packet& a, const Packet& b) { return a | b; }
00197
00199 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00200 pxor(const Packet& a, const Packet& b) { return a ^ b; }
00201
00203 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00204 pandnot(const Packet& a, const Packet& b) { return a & (!b); }
00205
00207 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00208 pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
00209
00211 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00212 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
00213
00215 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00216 pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
00217
00219 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00220 pload1(const typename unpacket_traits<Packet>::type  *a) { return pset1<Packet>(*a); }
00221
00227 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00228 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
00229
00236 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00237 ploadquad(const typename unpacket_traits<Packet>::type* from)
00238 { return pload1<Packet>(from); }
00239
00249 template<typename Packet> EIGEN_DEVICE_FUNC
00250 inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
00251                         Packet& a0, Packet& a1, Packet& a2, Packet& a3)
00252 {
00253   a0 = pload1<Packet>(a+0);
00254   a1 = pload1<Packet>(a+1);
00255   a2 = pload1<Packet>(a+2);
00256   a3 = pload1<Packet>(a+3);
00257 }
00258
00266 template<typename Packet> EIGEN_DEVICE_FUNC
00267 inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
00268                         Packet& a0, Packet& a1)
00269 {
00270   a0 = pload1<Packet>(a+0);
00271   a1 = pload1<Packet>(a+1);
00272 }
00273
00275 template<typename Packet> inline Packet
00276 plset(const typename unpacket_traits<Packet>::type& a) { return a; }
00277
00279 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
00280 { (*to) = from; }
00281
00283 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
00284 {  (*to) = from; }
00285
00286  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
00287  { return ploadu<Packet>(from); }
00288
00289  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)
00290  { pstore(to, from); }
00291
00293 template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
00294 {
00295 #ifdef __CUDA_ARCH__
00296 #if defined(__LP64__)
00297   // 64-bit pointer operand constraint for inlined asm
00298   asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
00299 #else
00300   // 32-bit pointer operand constraint for inlined asm
00301   asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
00302 #endif
00303 #elif !EIGEN_COMP_MSVC
00304   __builtin_prefetch(addr);
00305 #endif
00306 }
00307
00309 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
00310 { return a; }
00311
00313 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00314 preduxp(const Packet* vecs) { return vecs[0]; }
00315
00317 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
00318 { return a; }
00319
00324 template<typename Packet> EIGEN_DEVICE_FUNC inline
00325 typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
00326 predux4(const Packet& a)
00327 { return a; }
00328
00330 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
00331 { return a; }
00332
00334 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
00335 { return a; }
00336
00338 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
00339 { return a; }
00340
00342 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
00343 { return a; }
00344
00345 template<size_t offset, typename Packet>
00346 struct protate_impl
00347 {
00348   // Empty so attempts to use this unimplemented path will fail to compile.
00349   // Only specializations of this template should be used.
00350 };
00351
00356 template<size_t offset, typename Packet> EIGEN_DEVICE_FUNC inline Packet protate(const Packet& a)
00357 {
00358   return offset ? protate_impl<offset, Packet>::run(a) : a;
00359 }
00360
00362 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
00363 {
00364   // FIXME: uncomment the following in case we drop the internal imag and real functions.
00365 //   using std::imag;
00366 //   using std::real;
00367   return Packet(imag(a),real(a));
00368 }
00369
00370 /**************************
00371 * Special math functions
00372 ***************************/
00373
00375 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00376 Packet psin(const Packet& a) { using std::sin; return sin(a); }
00377
00379 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00380 Packet pcos(const Packet& a) { using std::cos; return cos(a); }
00381
00383 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00384 Packet ptan(const Packet& a) { using std::tan; return tan(a); }
00385
00387 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00388 Packet pasin(const Packet& a) { using std::asin; return asin(a); }
00389
00391 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00392 Packet pacos(const Packet& a) { using std::acos; return acos(a); }
00393
00395 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00396 Packet patan(const Packet& a) { using std::atan; return atan(a); }
00397
00399 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00400 Packet psinh(const Packet& a) { using std::sinh; return sinh(a); }
00401
00403 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00404 Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); }
00405
00407 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00408 Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
00409
00411 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00412 Packet pexp(const Packet& a) { using std::exp; return exp(a); }
00413
00415 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00416 Packet plog(const Packet& a) { using std::log; return log(a); }
00417
00419 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00420 Packet plog10(const Packet& a) { using std::log10; return log10(a); }
00421
00423 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00424 Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
00425
00427 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00428 Packet prsqrt(const Packet& a) {
00429   return pdiv(pset1<Packet>(1), psqrt(a));
00430 }
00431
00433 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00434 Packet pround(const Packet& a) { using numext::round; return round(a); }
00435
00437 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00438 Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
00439
00441 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00442 Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
00443
00445 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00446 Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); }
00447
00449 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00450 Packet pdigamma(const Packet& a) { using numext::digamma; return digamma(a); }
00451
00453 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00454 Packet perf(const Packet& a) { using numext::erf; return erf(a); }
00455
00457 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00458 Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
00459
00460 /***************************************************************************
00461 * The following functions might not have to be overwritten for vectorized types
00462 ***************************************************************************/
00463
00465 // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
00466 template<typename Packet>
00467 inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
00468 {
00469   pstore(to, pset1<Packet>(a));
00470 }
00471
00473 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00474 pmadd(const Packet&  a,
00475          const Packet&  b,
00476          const Packet&  c)
00477 { return padd(pmul(a, b),c); }
00478
00481 template<typename Packet, int Alignment>
00482 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
00483 {
00484   if(Alignment >= unpacket_traits<Packet>::alignment)
00485     return pload<Packet>(from);
00486   else
00487     return ploadu<Packet>(from);
00488 }
00489
00492 template<typename Scalar, typename Packet, int Alignment>
00493 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
00494 {
00495   if(Alignment >= unpacket_traits<Packet>::alignment)
00496     pstore(to, from);
00497   else
00498     pstoreu(to, from);
00499 }
00500
00506 template<typename Packet, int LoadMode>
00507 inline Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
00508 {
00509   return ploadt<Packet, LoadMode>(from);
00510 }
00511
00513 template<int Offset,typename PacketType>
00514 struct palign_impl
00515 {
00516   // by default data are aligned, so there is nothing to be done :)
00517   static inline void run(PacketType&, const PacketType&) {}
00518 };
00519
00535 template<int Offset,typename PacketType>
00536 inline void palign(PacketType& first, const PacketType& second)
00537 {
00538   palign_impl<Offset,PacketType>::run(first,second);
00539 }
00540
00541 /***************************************************************************
00542 * Fast complex products (GCC generates a function call which is very slow)
00543 ***************************************************************************/
00544
00545 // Eigen+CUDA does not support complexes.
00546 #ifndef __CUDACC__
00547
00548 template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
00549 { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
00550
00551 template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
00552 { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
00553
00554 #endif
00555
00556
00557 /***************************************************************************
00558  * PacketBlock, that is a collection of N packets where the number of words
00559  * in the packet is a multiple of N.
00560 ***************************************************************************/
00561 template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
00562   Packet packet[N];
00563 };
00564
00565 template<typename Packet> EIGEN_DEVICE_FUNC inline void
00566 ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
00567   // Nothing to do in the scalar case, i.e. a 1x1 matrix.
00568 }
00569
00570 /***************************************************************************
00571  * Selector, i.e. vector of N boolean values used to select (i.e. blend)
00572  * words from 2 packets.
00573 ***************************************************************************/
00574 template <size_t N> struct Selector {
00575   bool select[N];
00576 };
00577
00578 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00579 pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
00580   return ifPacket.select[0] ? thenPacket : elsePacket;
00581 }
00582
00583 } // end namespace internal
00584
00585 } // end namespace Eigen
00586
00587 #endif // EIGEN_GENERIC_PACKET_MATH_H