MOAB
4.9.3pre
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2010 Gael Guennebaud <[email protected]> 00005 // 00006 // This Source Code Form is subject to the terms of the Mozilla 00007 // Public License v. 2.0. If a copy of the MPL was not distributed 00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00009 00010 #ifndef EIGEN_COMPLEX32_ALTIVEC_H 00011 #define EIGEN_COMPLEX32_ALTIVEC_H 00012 00013 namespace Eigen { 00014 00015 namespace internal { 00016 00017 static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; 00018 #ifdef _BIG_ENDIAN 00019 static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; 00020 static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 }; 00021 #else 00022 static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 }; 00023 static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; 00024 #endif 00025 00026 //---------- float ---------- 00027 struct Packet2cf 00028 { 00029 EIGEN_STRONG_INLINE Packet2cf() {} 00030 EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {} 00031 Packet4f v; 00032 }; 00033 00034 template<> struct packet_traits<std::complex<float> > : default_packet_traits 00035 { 00036 typedef Packet2cf type; 00037 typedef Packet2cf half; 00038 enum { 00039 Vectorizable = 1, 00040 AlignedOnScalar = 1, 00041 size = 2, 00042 00043 HasAdd = 1, 00044 HasSub = 1, 00045 HasMul = 1, 00046 HasDiv = 1, 00047 HasNegate = 1, 00048 HasAbs = 0, 00049 HasAbs2 = 0, 00050 HasMin = 0, 00051 HasMax = 0, 00052 HasSetLinear = 0 00053 }; 00054 }; 00055 00056 template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; }; 00057 00058 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) 00059 { 00060 Packet2cf res; 00061 /* On AltiVec we cannot load 64-bit registers, so wa have to take care of alignment */ 00062 if((ptrdiff_t(&from) % 16) == 0) 00063 res.v = pload<Packet4f>((const float *)&from); 00064 else 00065 res.v = ploadu<Packet4f>((const float *)&from); 00066 res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI); 00067 return res; 00068 } 00069 00070 template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride) 00071 { 00072 std::complex<float> EIGEN_ALIGN16 af[2]; 00073 af[0] = from[0*stride]; 00074 af[1] = from[1*stride]; 00075 return Packet2cf(vec_ld(0, (const float*)af)); 00076 } 00077 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride) 00078 { 00079 std::complex<float> EIGEN_ALIGN16 af[2]; 00080 vec_st(from.v, 0, (float*)af); 00081 to[0*stride] = af[0]; 00082 to[1*stride] = af[1]; 00083 } 00084 00085 00086 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); } 00087 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); } 00088 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); } 00089 template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf((Packet4f)vec_xor((Packet4ui)a.v, p4ui_CONJ_XOR)); } 00090 00091 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) 00092 { 00093 Packet4f v1, v2; 00094 00095 // Permute and multiply the real parts of a and b 00096 v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD); 00097 // Get the imaginary parts of a 00098 v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN); 00099 // multiply a_re * b 00100 v1 = vec_madd(v1, b.v, p4f_ZERO); 00101 // multiply a_im * b and get the conjugate result 00102 v2 = vec_madd(v2, b.v, p4f_ZERO); 00103 v2 = (Packet4f) vec_xor((Packet4ui)v2, p4ui_CONJ_XOR); 00104 // permute back to a proper order 00105 v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV); 00106 00107 return Packet2cf(vec_add(v1, v2)); 00108 } 00109 00110 template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v,b.v)); } 00111 template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_or(a.v,b.v)); } 00112 template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_xor(a.v,b.v)); } 00113 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v, vec_nor(b.v,b.v))); } 00114 00115 template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); } 00116 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); } 00117 00118 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) 00119 { 00120 return pset1<Packet2cf>(*from); 00121 } 00122 00123 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } 00124 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } 00125 00126 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { vec_dstt((float *)addr, DST_CTRL(2,2,32), DST_CHAN); } 00127 00128 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) 00129 { 00130 std::complex<float> EIGEN_ALIGN16 res[2]; 00131 pstore((float *)&res, a.v); 00132 00133 return res[0]; 00134 } 00135 00136 template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) 00137 { 00138 Packet4f rev_a; 00139 rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2); 00140 return Packet2cf(rev_a); 00141 } 00142 00143 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a) 00144 { 00145 Packet4f b; 00146 b = (Packet4f) vec_sld(a.v, a.v, 8); 00147 b = padd(a.v, b); 00148 return pfirst(Packet2cf(b)); 00149 } 00150 00151 template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs) 00152 { 00153 Packet4f b1, b2; 00154 #ifdef _BIG_ENDIAN 00155 b1 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8); 00156 b2 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8); 00157 #else 00158 b1 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8); 00159 b2 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8); 00160 #endif 00161 b2 = (Packet4f) vec_sld(b2, b2, 8); 00162 b2 = padd(b1, b2); 00163 00164 return Packet2cf(b2); 00165 } 00166 00167 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a) 00168 { 00169 Packet4f b; 00170 Packet2cf prod; 00171 b = (Packet4f) vec_sld(a.v, a.v, 8); 00172 prod = pmul(a, Packet2cf(b)); 00173 00174 return pfirst(prod); 00175 } 00176 00177 template<int Offset> 00178 struct palign_impl<Offset,Packet2cf> 00179 { 00180 static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second) 00181 { 00182 if (Offset==1) 00183 { 00184 #ifdef _BIG_ENDIAN 00185 first.v = vec_sld(first.v, second.v, 8); 00186 #else 00187 first.v = vec_sld(second.v, first.v, 8); 00188 #endif 00189 } 00190 } 00191 }; 00192 00193 template<> struct conj_helper<Packet2cf, Packet2cf, false,true> 00194 { 00195 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const 00196 { return padd(pmul(x,y),c); } 00197 00198 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const 00199 { 00200 return internal::pmul(a, pconj(b)); 00201 } 00202 }; 00203 00204 template<> struct conj_helper<Packet2cf, Packet2cf, true,false> 00205 { 00206 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const 00207 { return padd(pmul(x,y),c); } 00208 00209 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const 00210 { 00211 return internal::pmul(pconj(a), b); 00212 } 00213 }; 00214 00215 template<> struct conj_helper<Packet2cf, Packet2cf, true,true> 00216 { 00217 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const 00218 { return padd(pmul(x,y),c); } 00219 00220 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const 00221 { 00222 return pconj(internal::pmul(a, b)); 00223 } 00224 }; 00225 00226 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) 00227 { 00228 // TODO optimize it for AltiVec 00229 Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b); 00230 Packet4f s = vec_madd(b.v, b.v, p4f_ZERO); 00231 return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX32_REV)))); 00232 } 00233 00234 template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x) 00235 { 00236 return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV)); 00237 } 00238 00239 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel) 00240 { 00241 Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI); 00242 kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO); 00243 kernel.packet[0].v = tmp; 00244 } 00245 00246 //---------- double ---------- 00247 #ifdef __VSX__ 00248 struct Packet1cd 00249 { 00250 EIGEN_STRONG_INLINE Packet1cd() {} 00251 EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {} 00252 Packet2d v; 00253 }; 00254 00255 template<> struct packet_traits<std::complex<double> > : default_packet_traits 00256 { 00257 typedef Packet1cd type; 00258 typedef Packet1cd half; 00259 enum { 00260 Vectorizable = 1, 00261 AlignedOnScalar = 0, 00262 size = 1, 00263 HasHalfPacket = 0, 00264 00265 HasAdd = 1, 00266 HasSub = 1, 00267 HasMul = 1, 00268 HasDiv = 1, 00269 HasNegate = 1, 00270 HasAbs = 0, 00271 HasAbs2 = 0, 00272 HasMin = 0, 00273 HasMax = 0, 00274 HasSetLinear = 0 00275 }; 00276 }; 00277 00278 template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; }; 00279 00280 template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); } 00281 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); } 00282 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } 00283 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } 00284 00285 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from) 00286 { /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); } 00287 00288 template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride) 00289 { 00290 std::complex<double> EIGEN_ALIGN16 af[2]; 00291 af[0] = from[0*stride]; 00292 af[1] = from[1*stride]; 00293 return pload<Packet1cd>(af); 00294 } 00295 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride) 00296 { 00297 std::complex<double> EIGEN_ALIGN16 af[2]; 00298 pstore<std::complex<double> >(af, from); 00299 to[0*stride] = af[0]; 00300 to[1*stride] = af[1]; 00301 } 00302 00303 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_add(a.v,b.v)); } 00304 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_sub(a.v,b.v)); } 00305 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); } 00306 template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); } 00307 00308 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) 00309 { 00310 Packet2d a_re, a_im, v1, v2; 00311 00312 // Permute and multiply the real parts of a and b 00313 a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI); 00314 // Get the imaginary parts of a 00315 a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO); 00316 // multiply a_re * b 00317 v1 = vec_madd(a_re, b.v, p2d_ZERO); 00318 // multiply a_im * b and get the conjugate result 00319 v2 = vec_madd(a_im, b.v, p2d_ZERO); 00320 v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8); 00321 v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1); 00322 00323 return Packet1cd(vec_add(v1, v2)); 00324 } 00325 00326 template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); } 00327 template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); } 00328 template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); } 00329 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); } 00330 00331 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) 00332 { 00333 return pset1<Packet1cd>(*from); 00334 } 00335 00336 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { vec_dstt((long *)addr, DST_CTRL(2,2,32), DST_CHAN); } 00337 00338 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) 00339 { 00340 std::complex<double> EIGEN_ALIGN16 res[2]; 00341 pstore<std::complex<double> >(res, a); 00342 00343 return res[0]; 00344 } 00345 00346 template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; } 00347 00348 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) 00349 { 00350 return pfirst(a); 00351 } 00352 00353 template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) 00354 { 00355 return vecs[0]; 00356 } 00357 00358 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) 00359 { 00360 return pfirst(a); 00361 } 00362 00363 template<int Offset> 00364 struct palign_impl<Offset,Packet1cd> 00365 { 00366 static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/) 00367 { 00368 // FIXME is it sure we never have to align a Packet1cd? 00369 // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary... 00370 } 00371 }; 00372 00373 template<> struct conj_helper<Packet1cd, Packet1cd, false,true> 00374 { 00375 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const 00376 { return padd(pmul(x,y),c); } 00377 00378 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const 00379 { 00380 return internal::pmul(a, pconj(b)); 00381 } 00382 }; 00383 00384 template<> struct conj_helper<Packet1cd, Packet1cd, true,false> 00385 { 00386 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const 00387 { return padd(pmul(x,y),c); } 00388 00389 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const 00390 { 00391 return internal::pmul(pconj(a), b); 00392 } 00393 }; 00394 00395 template<> struct conj_helper<Packet1cd, Packet1cd, true,true> 00396 { 00397 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const 00398 { return padd(pmul(x,y),c); } 00399 00400 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const 00401 { 00402 return pconj(internal::pmul(a, b)); 00403 } 00404 }; 00405 00406 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) 00407 { 00408 // TODO optimize it for AltiVec 00409 Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b); 00410 Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_); 00411 return Packet1cd(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_REVERSE64)))); 00412 } 00413 00414 EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x) 00415 { 00416 return Packet1cd(preverse(Packet2d(x.v))); 00417 } 00418 00419 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel) 00420 { 00421 Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI); 00422 kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO); 00423 kernel.packet[0].v = tmp; 00424 } 00425 #endif // __VSX__ 00426 } // end namespace internal 00427 00428 } // end namespace Eigen 00429 00430 #endif // EIGEN_COMPLEX32_ALTIVEC_H