MOAB
4.9.3pre
|
typedef __vector unsigned char Eigen::internal::Packet16uc |
Definition at line 39 of file AltiVec/PacketMath.h.
typedef __m128d Eigen::internal::Packet2d |
Definition at line 57 of file SSE/PacketMath.h.
typedef float32x2_t Eigen::internal::Packet2f |
Definition at line 37 of file NEON/PacketMath.h.
typedef int32x2_t Eigen::internal::Packet2i |
Definition at line 40 of file NEON/PacketMath.h.
typedef __vector __bool int Eigen::internal::Packet4bi |
Definition at line 37 of file AltiVec/PacketMath.h.
typedef __m256d Eigen::internal::Packet4d |
Definition at line 33 of file AVX/PacketMath.h.
typedef __m128 Eigen::internal::Packet4f |
Definition at line 34 of file AltiVec/PacketMath.h.
typedef __m128i Eigen::internal::Packet4i |
Definition at line 35 of file AltiVec/PacketMath.h.
typedef uint32x4_t Eigen::internal::Packet4ui |
Definition at line 36 of file AltiVec/PacketMath.h.
typedef __m256 Eigen::internal::Packet8f |
Definition at line 31 of file AVX/PacketMath.h.
typedef __m256i Eigen::internal::Packet8i |
Definition at line 38 of file AltiVec/PacketMath.h.
anonymous enum |
Definition at line 29 of file SparseDiagonalProduct.h.
anonymous enum |
Definition at line 49 of file SparseVector.h.
{ SVA_RuntimeSwitch, SVA_Inner, SVA_Outer };
anonymous enum |
anonymous enum |
anonymous enum |
Definition at line 30 of file JacobiSVD.h.
static Eigen::internal::_EIGEN_DECLARE_CONST_FAST_Packet4f | ( | ZERO | , |
0 | |||
) | [static] |
static Eigen::internal::_EIGEN_DECLARE_CONST_FAST_Packet4i | ( | ZERO | , |
0 | |||
) | [static] |
static Eigen::internal::_EIGEN_DECLARE_CONST_FAST_Packet4i | ( | ONE | , |
1 | |||
) | [static] |
static Eigen::internal::_EIGEN_DECLARE_CONST_FAST_Packet4i | ( | MINUS16 | , |
- | 16 | ||
) | [static] |
static Eigen::internal::_EIGEN_DECLARE_CONST_FAST_Packet4i | ( | MINUS1 | , |
- | 1 | ||
) | [static] |
EIGEN_DEVICE_FUNC void Eigen::internal::aligned_delete | ( | T * | ptr, |
size_t | size | ||
) | [inline] |
Deletes objects constructed with aligned_new The size parameters tells on how many objects to call the destructor of T.
Definition at line 328 of file Memory.h.
{ destruct_elements_of_array<T>(ptr, size); aligned_free(ptr); }
EIGEN_DEVICE_FUNC void Eigen::internal::aligned_free | ( | void * | ptr | ) | [inline] |
Frees memory allocated with aligned_malloc.
Definition at line 174 of file Memory.h.
{ #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED std::free(ptr); #else handmade_aligned_free(ptr); #endif }
EIGEN_DEVICE_FUNC void* Eigen::internal::aligned_malloc | ( | size_t | size | ) | [inline] |
Allocates size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements. On allocation error, the returned pointer is null, and std::bad_alloc is thrown.
Definition at line 153 of file Memory.h.
{ check_that_malloc_is_allowed(); void *result; #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED result = std::malloc(size); #if EIGEN_DEFAULT_ALIGN_BYTES==16 eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator."); #endif #else result = handmade_aligned_malloc(size); #endif if(!result && size) throw_std_bad_alloc(); return result; }
EIGEN_DEVICE_FUNC T* Eigen::internal::aligned_new | ( | size_t | size | ) | [inline] |
Allocates size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment. On allocation error, the returned pointer is undefined, but a std::bad_alloc is thrown. The default constructor of T is called.
Definition at line 295 of file Memory.h.
{ check_size_for_overflow<T>(size); T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size)); EIGEN_TRY { return construct_elements_of_array(result, size); } EIGEN_CATCH(...) { aligned_free(result); EIGEN_THROW; } }
void* Eigen::internal::aligned_realloc | ( | void * | ptr, |
size_t | new_size, | ||
size_t | old_size | ||
) | [inline] |
Reallocates an aligned block of memory.
std::bad_alloc | on allocation failure |
Definition at line 188 of file Memory.h.
{ EIGEN_UNUSED_VARIABLE(old_size); void *result; #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED result = std::realloc(ptr,new_size); #else result = handmade_aligned_realloc(ptr,new_size,old_size); #endif if (!result && new_size) throw_std_bad_alloc(); return result; }
T Eigen::internal::amd_flip | ( | const T & | i | ) | [inline] |
void Eigen::internal::amd_mark | ( | const T0 * | w, |
const T1 & | j | ||
) | [inline] |
bool Eigen::internal::amd_marked | ( | const T0 * | w, |
const T1 & | j | ||
) | [inline] |
T Eigen::internal::amd_unflip | ( | const T & | i | ) | [inline] |
void Eigen::internal::apply_block_householder_on_the_left | ( | MatrixType & | mat, |
const VectorsType & | vectors, | ||
const CoeffsType & | hCoeffs, | ||
bool | forward | ||
) |
if forward then perform mat = H0 * H1 * H2 * mat otherwise perform mat = H2 * H1 * H0 * mat
Definition at line 79 of file BlockHouseholder.h.
{ enum { TFactorSize = MatrixType::ColsAtCompileTime }; Index nbVecs = vectors.cols(); Matrix<typename MatrixType::Scalar, TFactorSize, TFactorSize, RowMajor> T(nbVecs,nbVecs); if(forward) make_block_householder_triangular_factor(T, vectors, hCoeffs); else make_block_householder_triangular_factor(T, vectors, hCoeffs.conjugate()); const TriangularView<const VectorsType, UnitLower> V(vectors); // A -= V T V^* A Matrix<typename MatrixType::Scalar,VectorsType::ColsAtCompileTime,MatrixType::ColsAtCompileTime,0, VectorsType::MaxColsAtCompileTime,MatrixType::MaxColsAtCompileTime> tmp = V.adjoint() * mat; // FIXME add .noalias() once the triangular product can work inplace if(forward) tmp = T.template triangularView<Upper>() * tmp; else tmp = T.template triangularView<Upper>().adjoint() * tmp; mat.noalias() -= V * tmp; }
void Eigen::internal::apply_rotation_in_the_plane | ( | DenseBase< VectorX > & | xpr_x, |
DenseBase< VectorY > & | xpr_y, | ||
const JacobiRotation< OtherScalar > & | j | ||
) |
Applies the clock wise 2D rotation j to the set of 2D vectors of cordinates x and y:
Definition at line 301 of file Jacobi.h.
{ typedef typename VectorX::Scalar Scalar; enum { PacketSize = packet_traits<Scalar>::size }; typedef typename packet_traits<Scalar>::type Packet; eigen_assert(xpr_x.size() == xpr_y.size()); Index size = xpr_x.size(); Index incrx = xpr_x.derived().innerStride(); Index incry = xpr_y.derived().innerStride(); Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0); Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0); OtherScalar c = j.c(); OtherScalar s = j.s(); if (c==OtherScalar(1) && s==OtherScalar(0)) return; /*** dynamic-size vectorized paths ***/ if(VectorX::SizeAtCompileTime == Dynamic && (VectorX::Flags & VectorY::Flags & PacketAccessBit) && ((incrx==1 && incry==1) || PacketSize == 1)) { // both vectors are sequentially stored in memory => vectorization enum { Peeling = 2 }; Index alignedStart = internal::first_default_aligned(y, size); Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize; const Packet pc = pset1<Packet>(c); const Packet ps = pset1<Packet>(s); conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj; for(Index i=0; i<alignedStart; ++i) { Scalar xi = x[i]; Scalar yi = y[i]; x[i] = c * xi + numext::conj(s) * yi; y[i] = -s * xi + numext::conj(c) * yi; } Scalar* EIGEN_RESTRICT px = x + alignedStart; Scalar* EIGEN_RESTRICT py = y + alignedStart; if(internal::first_default_aligned(x, size)==alignedStart) { for(Index i=alignedStart; i<alignedEnd; i+=PacketSize) { Packet xi = pload<Packet>(px); Packet yi = pload<Packet>(py); pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); px += PacketSize; py += PacketSize; } } else { Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize); for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize) { Packet xi = ploadu<Packet>(px); Packet xi1 = ploadu<Packet>(px+PacketSize); Packet yi = pload <Packet>(py); Packet yi1 = pload <Packet>(py+PacketSize); pstoreu(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); pstoreu(px+PacketSize, padd(pmul(pc,xi1),pcj.pmul(ps,yi1))); pstore (py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pmul(ps,xi1))); px += Peeling*PacketSize; py += Peeling*PacketSize; } if(alignedEnd!=peelingEnd) { Packet xi = ploadu<Packet>(x+peelingEnd); Packet yi = pload <Packet>(y+peelingEnd); pstoreu(x+peelingEnd, padd(pmul(pc,xi),pcj.pmul(ps,yi))); pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pmul(ps,xi))); } } for(Index i=alignedEnd; i<size; ++i) { Scalar xi = x[i]; Scalar yi = y[i]; x[i] = c * xi + numext::conj(s) * yi; y[i] = -s * xi + numext::conj(c) * yi; } } /*** fixed-size vectorized path ***/ else if(VectorX::SizeAtCompileTime != Dynamic && (VectorX::Flags & VectorY::Flags & PacketAccessBit) && (EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment { const Packet pc = pset1<Packet>(c); const Packet ps = pset1<Packet>(s); conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj; Scalar* EIGEN_RESTRICT px = x; Scalar* EIGEN_RESTRICT py = y; for(Index i=0; i<size; i+=PacketSize) { Packet xi = pload<Packet>(px); Packet yi = pload<Packet>(py); pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); px += PacketSize; py += PacketSize; } } /*** non-vectorized path ***/ else { for(Index i=0; i<size; ++i) { Scalar xi = *x; Scalar yi = *y; *x = c * xi + numext::conj(s) * yi; *y = -s * xi + numext::conj(c) * yi; x += incrx; y += incry; } } }
void Eigen::internal::assign_sparse_to_sparse | ( | DstXprType & | dst, |
const SrcXprType & | src | ||
) |
Definition at line 71 of file SparseAssign.h.
{ typedef typename DstXprType::Scalar Scalar; typedef internal::evaluator<DstXprType> DstEvaluatorType; typedef internal::evaluator<SrcXprType> SrcEvaluatorType; SrcEvaluatorType srcEvaluator(src); const bool transpose = (DstEvaluatorType::Flags & RowMajorBit) != (SrcEvaluatorType::Flags & RowMajorBit); const Index outerEvaluationSize = (SrcEvaluatorType::Flags&RowMajorBit) ? src.rows() : src.cols(); if ((!transpose) && src.isRValue()) { // eval without temporary dst.resize(src.rows(), src.cols()); dst.setZero(); dst.reserve((std::max)(src.rows(),src.cols())*2); for (Index j=0; j<outerEvaluationSize; ++j) { dst.startVec(j); for (typename SrcEvaluatorType::InnerIterator it(srcEvaluator, j); it; ++it) { Scalar v = it.value(); dst.insertBackByOuterInner(j,it.index()) = v; } } dst.finalize(); } else { // eval through a temporary eigen_assert(( ((internal::traits<DstXprType>::SupportedAccessPatterns & OuterRandomAccessPattern)==OuterRandomAccessPattern) || (!((DstEvaluatorType::Flags & RowMajorBit) != (SrcEvaluatorType::Flags & RowMajorBit)))) && "the transpose operation is supposed to be handled in SparseMatrix::operator="); enum { Flip = (DstEvaluatorType::Flags & RowMajorBit) != (SrcEvaluatorType::Flags & RowMajorBit) }; DstXprType temp(src.rows(), src.cols()); temp.reserve((std::max)(src.rows(),src.cols())*2); for (Index j=0; j<outerEvaluationSize; ++j) { temp.startVec(j); for (typename SrcEvaluatorType::InnerIterator it(srcEvaluator, j); it; ++it) { Scalar v = it.value(); temp.insertBackByOuterInner(Flip?it.index():j,Flip?j:it.index()) = v; } } temp.finalize(); dst = temp.markAsRValue(); } }
SluMatrix Eigen::internal::asSluMatrix | ( | MatrixType & | mat | ) |
Definition at line 265 of file SuperLUSupport.h.
{
return SluMatrix::Map(mat);
}
bool Eigen::internal::bicgstab | ( | const MatrixType & | mat, |
const Rhs & | rhs, | ||
Dest & | x, | ||
const Preconditioner & | precond, | ||
Index & | iters, | ||
typename Dest::RealScalar & | tol_error | ||
) |
Low-level bi conjugate gradient stabilized algorithm
mat | The matrix A |
rhs | The right hand side vector b |
x | On input and initial solution, on output the computed solution. |
precond | A preconditioner being able to efficiently solve for an approximation of Ax=b (regardless of b) |
iters | On input the max number of iteration, on output the number of performed iterations. |
tol_error | On input the tolerance error, on output an estimation of the relative error. |
Definition at line 29 of file BiCGSTAB.h.
{ using std::sqrt; using std::abs; typedef typename Dest::RealScalar RealScalar; typedef typename Dest::Scalar Scalar; typedef Matrix<Scalar,Dynamic,1> VectorType; RealScalar tol = tol_error; Index maxIters = iters; Index n = mat.cols(); VectorType r = rhs - mat * x; VectorType r0 = r; RealScalar r0_sqnorm = r0.squaredNorm(); RealScalar rhs_sqnorm = rhs.squaredNorm(); if(rhs_sqnorm == 0) { x.setZero(); return true; } Scalar rho = 1; Scalar alpha = 1; Scalar w = 1; VectorType v = VectorType::Zero(n), p = VectorType::Zero(n); VectorType y(n), z(n); VectorType kt(n), ks(n); VectorType s(n), t(n); RealScalar tol2 = tol*tol*rhs_sqnorm; RealScalar eps2 = NumTraits<Scalar>::epsilon()*NumTraits<Scalar>::epsilon(); Index i = 0; Index restarts = 0; while ( r.squaredNorm() > tol2 && i<maxIters ) { Scalar rho_old = rho; rho = r0.dot(r); if (abs(rho) < eps2*r0_sqnorm) { // The new residual vector became too orthogonal to the arbitrarily chosen direction r0 // Let's restart with a new r0: r = rhs - mat * x; r0 = r; rho = r0_sqnorm = r.squaredNorm(); if(restarts++ == 0) i = 0; } Scalar beta = (rho/rho_old) * (alpha / w); p = r + beta * (p - w * v); y = precond.solve(p); v.noalias() = mat * y; alpha = rho / r0.dot(v); s = r - alpha * v; z = precond.solve(s); t.noalias() = mat * z; RealScalar tmp = t.squaredNorm(); if(tmp>RealScalar(0)) w = t.dot(s) / tmp; else w = Scalar(0); x += alpha * y + w * z; r = s - w * t; ++i; } tol_error = sqrt(r.squaredNorm()/rhs_sqnorm); iters = i; return true; }
NumTraits<typename traits<Derived>::Scalar>::Real Eigen::internal::blueNorm_impl | ( | const EigenBase< Derived > & | _vec | ) | [inline] |
Definition at line 55 of file StableNorm.h.
{ typedef typename Derived::RealScalar RealScalar; using std::pow; using std::sqrt; using std::abs; const Derived& vec(_vec.derived()); static bool initialized = false; static RealScalar b1, b2, s1m, s2m, rbig, relerr; if(!initialized) { int ibeta, it, iemin, iemax, iexp; RealScalar eps; // This program calculates the machine-dependent constants // bl, b2, slm, s2m, relerr overfl // from the "basic" machine-dependent numbers // nbig, ibeta, it, iemin, iemax, rbig. // The following define the basic machine-dependent constants. // For portability, the PORT subprograms "ilmaeh" and "rlmach" // are used. For any specific computer, each of the assignment // statements can be replaced ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers it = std::numeric_limits<RealScalar>::digits; // number of base-beta digits in mantissa iemin = std::numeric_limits<RealScalar>::min_exponent; // minimum exponent iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent rbig = (std::numeric_limits<RealScalar>::max)(); // largest floating-point number iexp = -((1-iemin)/2); b1 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // lower boundary of midrange iexp = (iemax + 1 - it)/2; b2 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // upper boundary of midrange iexp = (2-iemin)/2; s1m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for lower range iexp = - ((iemax+it)/2); s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range eps = RealScalar(pow(double(ibeta), 1-it)); relerr = sqrt(eps); // tolerance for neglecting asml initialized = true; } Index n = vec.size(); RealScalar ab2 = b2 / RealScalar(n); RealScalar asml = RealScalar(0); RealScalar amed = RealScalar(0); RealScalar abig = RealScalar(0); for(typename Derived::InnerIterator it(vec, 0); it; ++it) { RealScalar ax = abs(it.value()); if(ax > ab2) abig += numext::abs2(ax*s2m); else if(ax < b1) asml += numext::abs2(ax*s1m); else amed += numext::abs2(ax); } if(amed!=amed) return amed; // we got a NaN if(abig > RealScalar(0)) { abig = sqrt(abig); if(abig > rbig) // overflow, or *this contains INF values return abig; // return INF if(amed > RealScalar(0)) { abig = abig/s2m; amed = sqrt(amed); } else return abig/s2m; } else if(asml > RealScalar(0)) { if (amed > RealScalar(0)) { abig = sqrt(amed); amed = sqrt(asml) / s1m; } else return sqrt(asml)/s1m; } else return sqrt(amed); asml = numext::mini(abig, amed); abig = numext::maxi(abig, amed); if(asml <= abig*relerr) return abig; else return abig * sqrt(RealScalar(1) + numext::abs2(asml/abig)); }
const Derived::Scalar Eigen::internal::bruteforce_det3_helper | ( | const MatrixBase< Derived > & | matrix, |
int | a, | ||
int | b, | ||
int | c | ||
) | [inline] |
Definition at line 19 of file Determinant.h.
{
return matrix.coeff(0,a)
* (matrix.coeff(1,b) * matrix.coeff(2,c) - matrix.coeff(1,c) * matrix.coeff(2,b));
}
const Derived::Scalar Eigen::internal::bruteforce_det4_helper | ( | const MatrixBase< Derived > & | matrix, |
int | j, | ||
int | k, | ||
int | m, | ||
int | n | ||
) |
Definition at line 27 of file Determinant.h.
{
return (matrix.coeff(j,0) * matrix.coeff(k,1) - matrix.coeff(k,0) * matrix.coeff(j,1))
* (matrix.coeff(m,2) * matrix.coeff(n,3) - matrix.coeff(n,2) * matrix.coeff(m,3));
}
void Eigen::internal::c_to_fortran_numbering | ( | MatrixType & | mat | ) |
Definition at line 97 of file PaStiXSupport.h.
{ if ( !(mat.outerIndexPtr()[0]) ) { int i; for(i = 0; i <= mat.rows(); ++i) ++mat.outerIndexPtr()[i]; for(i = 0; i < mat.nonZeros(); ++i) ++mat.innerIndexPtr()[i]; } }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::internal::call_assignment | ( | Dst & | dst, |
const Src & | src | ||
) |
Definition at line 692 of file AssignEvaluator.h.
{ call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>()); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::internal::call_assignment | ( | const Dst & | dst, |
const Src & | src | ||
) |
Definition at line 698 of file AssignEvaluator.h.
{ call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>()); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::internal::call_assignment | ( | Dst & | dst, |
const Src & | src, | ||
const Func & | func, | ||
typename enable_if< evaluator_assume_aliasing< Src >::value, void * >::type | = 0 |
||
) |
Definition at line 706 of file AssignEvaluator.h.
{ typename plain_matrix_type<Src>::type tmp(src); call_assignment_no_alias(dst, tmp, func); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::internal::call_assignment | ( | Dst & | dst, |
const Src & | src, | ||
const Func & | func, | ||
typename enable_if<!evaluator_assume_aliasing< Src >::value, void * >::type | = 0 |
||
) |
Definition at line 714 of file AssignEvaluator.h.
{ call_assignment_no_alias(dst, src, func); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::internal::call_assignment | ( | NoAlias< Dst, StorageBase > & | dst, |
const Src & | src, | ||
const Func & | func | ||
) |
Definition at line 723 of file AssignEvaluator.h.
{ call_assignment_no_alias(dst.expression(), src, func); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::internal::call_assignment_no_alias | ( | Dst & | dst, |
const Src & | src, | ||
const Func & | func | ||
) |
Definition at line 731 of file AssignEvaluator.h.
{ enum { NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1) ) && int(Dst::SizeAtCompileTime) != 1 }; Index dstRows = NeedToTranspose ? src.cols() : src.rows(); Index dstCols = NeedToTranspose ? src.rows() : src.cols(); if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) dst.resize(dstRows, dstCols); typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned; typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType; ActualDstType actualDst(dst); // TODO check whether this is the right place to perform these checks: EIGEN_STATIC_ASSERT_LVALUE(Dst) EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src) EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar); Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::internal::call_assignment_no_alias | ( | Dst & | dst, |
const Src & | src | ||
) |
Definition at line 757 of file AssignEvaluator.h.
{ call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>()); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::internal::call_assignment_no_alias_no_transpose | ( | Dst & | dst, |
const Src & | src, | ||
const Func & | func | ||
) |
Definition at line 764 of file AssignEvaluator.h.
{ Index dstRows = src.rows(); Index dstCols = src.cols(); if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) dst.resize(dstRows, dstCols); // TODO check whether this is the right place to perform these checks: EIGEN_STATIC_ASSERT_LVALUE(Dst) EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src) Assignment<Dst,Src,Func>::run(dst, src, func); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::internal::call_assignment_no_alias_no_transpose | ( | Dst & | dst, |
const Src & | src | ||
) |
Definition at line 779 of file AssignEvaluator.h.
{ call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>()); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::internal::call_dense_assignment_loop | ( | const DstXprType & | dst, |
const SrcXprType & | src, | ||
const Functor & | func | ||
) |
Definition at line 640 of file AssignEvaluator.h.
{ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); typedef evaluator<DstXprType> DstEvaluatorType; typedef evaluator<SrcXprType> SrcEvaluatorType; DstEvaluatorType dstEvaluator(dst); SrcEvaluatorType srcEvaluator(src); typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel; Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); dense_assignment_loop<Kernel>::run(kernel); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::internal::call_dense_assignment_loop | ( | const DstXprType & | dst, |
const SrcXprType & | src | ||
) |
Definition at line 657 of file AssignEvaluator.h.
{ call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>()); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::internal::call_triangular_assignment_loop | ( | const DstXprType & | dst, |
const SrcXprType & | src, | ||
const Functor & | func | ||
) |
Definition at line 780 of file TriangularMatrix.h.
{ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); typedef evaluator<DstXprType> DstEvaluatorType; typedef evaluator<SrcXprType> SrcEvaluatorType; DstEvaluatorType dstEvaluator(dst); SrcEvaluatorType srcEvaluator(src); typedef triangular_dense_assignment_kernel< Mode&(Lower|Upper),Mode&(UnitDiag|ZeroDiag|SelfAdjoint),SetOpposite, DstEvaluatorType,SrcEvaluatorType,Functor> Kernel; Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); enum { unroll = DstXprType::SizeAtCompileTime != Dynamic && SrcEvaluatorType::CoeffReadCost < HugeCost && DstXprType::SizeAtCompileTime * SrcEvaluatorType::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT }; triangular_assignment_loop<Kernel, Mode, unroll ? int(DstXprType::SizeAtCompileTime) : Dynamic, SetOpposite>::run(kernel); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::internal::call_triangular_assignment_loop | ( | const DstXprType & | dst, |
const SrcXprType & | src | ||
) |
Definition at line 805 of file TriangularMatrix.h.
{ call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar>()); }
static void Eigen::internal::check_DenseIndex_is_signed | ( | ) | [inline, static] |
Definition at line 20 of file DenseBase.h.
{ EIGEN_STATIC_ASSERT(NumTraits<DenseIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE); }
void Eigen::internal::check_for_aliasing | ( | const Dst & | dst, |
const Src & | src | ||
) |
Definition at line 387 of file Transpose.h.
{ internal::checkTransposeAliasing_impl<Dst, Src>::run(dst, src); }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void Eigen::internal::check_size_for_overflow | ( | size_t | size | ) |
Definition at line 285 of file Memory.h.
{ if(size > size_t(-1) / sizeof(T)) throw_std_bad_alloc(); }
EIGEN_DEVICE_FUNC void Eigen::internal::check_static_allocation_size | ( | ) |
Definition at line 29 of file DenseStorage.h.
{ // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit #if EIGEN_STACK_ALLOCATION_LIMIT EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG); #endif }
EIGEN_DEVICE_FUNC void Eigen::internal::check_that_malloc_is_allowed | ( | ) | [inline] |
void Eigen::internal::cholmod_configure_matrix | ( | CholmodType & | mat | ) |
Definition at line 18 of file CholmodSupport.h.
{ if (internal::is_same<Scalar,float>::value) { mat.xtype = CHOLMOD_REAL; mat.dtype = CHOLMOD_SINGLE; } else if (internal::is_same<Scalar,double>::value) { mat.xtype = CHOLMOD_REAL; mat.dtype = CHOLMOD_DOUBLE; } else if (internal::is_same<Scalar,std::complex<float> >::value) { mat.xtype = CHOLMOD_COMPLEX; mat.dtype = CHOLMOD_SINGLE; } else if (internal::is_same<Scalar,std::complex<double> >::value) { mat.xtype = CHOLMOD_COMPLEX; mat.dtype = CHOLMOD_DOUBLE; } else { eigen_assert(false && "Scalar type not supported by CHOLMOD"); } }
static IndexType Eigen::internal::clear_mark | ( | IndexType | n_row, |
Colamd_Row< IndexType > | Row[] | ||
) | [inline, static] |
Definition at line 1821 of file Ordering.h.
EIGEN_DEVICE_FUNC MatrixType::Scalar Eigen::internal::cofactor_3x3 | ( | const MatrixType & | m | ) | [inline] |
Definition at line 126 of file InverseImpl.h.
{ enum { i1 = (i+1) % 3, i2 = (i+2) % 3, j1 = (j+1) % 3, j2 = (j+2) % 3 }; return m.coeff(i1, j1) * m.coeff(i2, j2) - m.coeff(i1, j2) * m.coeff(i2, j1); }
EIGEN_DEVICE_FUNC MatrixType::Scalar Eigen::internal::cofactor_4x4 | ( | const MatrixType & | matrix | ) | [inline] |
Definition at line 213 of file InverseImpl.h.
{ enum { i1 = (i+1) % 4, i2 = (i+2) % 4, i3 = (i+3) % 4, j1 = (j+1) % 4, j2 = (j+2) % 4, j3 = (j+3) % 4 }; return general_det3_helper(matrix, i1, i2, i3, j1, j2, j3) + general_det3_helper(matrix, i2, i3, i1, j1, j2, j3) + general_det3_helper(matrix, i3, i1, i2, j1, j2, j3); }
static bool Eigen::internal::colamd | ( | IndexType | n_row, |
IndexType | n_col, | ||
IndexType | Alen, | ||
IndexType * | A, | ||
IndexType * | p, | ||
double | knobs[COLAMD_KNOBS], | ||
IndexType | stats[COLAMD_STATS] | ||
) | [static] |
Computes a column ordering using the column approximate minimum degree ordering.
Computes a column ordering (Q) of A such that P(AQ)=LU or (AQ)'AQ=LL' have less fill-in and require fewer floating point operations than factorizing the unpermuted matrix A or A'A, respectively.
n_row | number of rows in A |
n_col | number of columns in A |
Alen,size | of the array A |
A | row indices of the matrix, of size ALen |
p | column pointers of A, of size n_col+1 |
knobs | parameter settings for colamd |
stats | colamd output statistics and error codes |
Definition at line 323 of file Ordering.h.
IndexType Eigen::internal::colamd_c | ( | IndexType | n_col | ) | [inline] |
Definition at line 203 of file Ordering.h.
IndexType Eigen::internal::colamd_r | ( | IndexType | n_row | ) | [inline] |
Definition at line 207 of file Ordering.h.
IndexType Eigen::internal::colamd_recommended | ( | IndexType | nnz, |
IndexType | n_row, | ||
IndexType | n_col | ||
) | [inline] |
Returns the recommended value of Alen.
Returns recommended value of Alen for use by colamd. Returns -1 if any input argument is negative. The use of this routine or macro is optional. Note that the macro uses its arguments more than once, so be careful for side effects, if you pass expressions as arguments to COLAMD_RECOMMENDED.
nnz | nonzeros in A |
n_row | number of rows in A |
n_col | number of columns in A |
Definition at line 258 of file Ordering.h.
static void Eigen::internal::colamd_set_defaults | ( | double | knobs[COLAMD_KNOBS] | ) | [inline, static] |
set default parameters The use of this routine is optional.
Colamd: rows with more than (knobs [COLAMD_DENSE_ROW] * n_col) entries are removed prior to ordering. Columns with more than (knobs [COLAMD_DENSE_COL] * n_row) entries are removed prior to ordering, and placed last in the output column ordering.
COLAMD_DENSE_ROW and COLAMD_DENSE_COL are defined as 0 and 1, respectively, in colamd.h. Default values of these two knobs are both 0.5. Currently, only knobs [0] and knobs [1] are used, but future versions may use more knobs. If so, they will be properly set to their defaults by the future version of colamd_set_defaults, so that the code that calls colamd will not need to change, assuming that you either use colamd_set_defaults, or pass a (double *) NULL pointer as the knobs array to colamd or symamd.
knobs | parameter settings for colamd |
Definition at line 287 of file Ordering.h.
int Eigen::internal::coletree | ( | const MatrixType & | mat, |
IndexVector & | parent, | ||
IndexVector & | firstRowElt, | ||
typename MatrixType::StorageIndex * | perm = 0 |
||
) |
Compute the column elimination tree of a sparse matrix
mat | The matrix in column-major format. |
parent | The elimination tree |
firstRowElt | The column index of the first element in each row |
perm | The permutation to apply to the column of mat |
Definition at line 61 of file SparseColEtree.h.
{ typedef typename MatrixType::StorageIndex StorageIndex; StorageIndex nc = convert_index<StorageIndex>(mat.cols()); // Number of columns StorageIndex m = convert_index<StorageIndex>(mat.rows()); StorageIndex diagSize = (std::min)(nc,m); IndexVector root(nc); // root of subtree of etree root.setZero(); IndexVector pp(nc); // disjoint sets pp.setZero(); // Initialize disjoint sets parent.resize(mat.cols()); //Compute first nonzero column in each row firstRowElt.resize(m); firstRowElt.setConstant(nc); firstRowElt.segment(0, diagSize).setLinSpaced(diagSize, 0, diagSize-1); bool found_diag; for (StorageIndex col = 0; col < nc; col++) { StorageIndex pcol = col; if(perm) pcol = perm[col]; for (typename MatrixType::InnerIterator it(mat, pcol); it; ++it) { Index row = it.row(); firstRowElt(row) = (std::min)(firstRowElt(row), col); } } /* Compute etree by Liu's algorithm for symmetric matrices, except use (firstRowElt[r],c) in place of an edge (r,c) of A. Thus each row clique in A'*A is replaced by a star centered at its first vertex, which has the same fill. */ StorageIndex rset, cset, rroot; for (StorageIndex col = 0; col < nc; col++) { found_diag = col>=m; pp(col) = col; cset = col; root(cset) = col; parent(col) = nc; /* The diagonal element is treated here even if it does not exist in the matrix * hence the loop is executed once more */ StorageIndex pcol = col; if(perm) pcol = perm[col]; for (typename MatrixType::InnerIterator it(mat, pcol); it||!found_diag; ++it) { // A sequence of interleaved find and union is performed Index i = col; if(it) i = it.index(); if (i == col) found_diag = true; StorageIndex row = firstRowElt(i); if (row >= col) continue; rset = internal::etree_find(row, pp); // Find the name of the set containing row rroot = root(rset); if (rroot != col) { parent(rroot) = col; pp(cset) = rset; cset = rset; root(cset) = col; } } } return 0; }
EIGEN_DEVICE_FUNC void Eigen::internal::compute_inverse_size2_helper | ( | const MatrixType & | matrix, |
const typename ResultType::Scalar & | invdet, | ||
ResultType & | result | ||
) | [inline] |
Definition at line 76 of file InverseImpl.h.
{ result.coeffRef(0,0) = matrix.coeff(1,1) * invdet; result.coeffRef(1,0) = -matrix.coeff(1,0) * invdet; result.coeffRef(0,1) = -matrix.coeff(0,1) * invdet; result.coeffRef(1,1) = matrix.coeff(0,0) * invdet; }
EIGEN_DEVICE_FUNC void Eigen::internal::compute_inverse_size3_helper | ( | const MatrixType & | matrix, |
const typename ResultType::Scalar & | invdet, | ||
const Matrix< typename ResultType::Scalar, 3, 1 > & | cofactors_col0, | ||
ResultType & | result | ||
) | [inline] |
Definition at line 140 of file InverseImpl.h.
{ result.row(0) = cofactors_col0 * invdet; result.coeffRef(1,0) = cofactor_3x3<MatrixType,0,1>(matrix) * invdet; result.coeffRef(1,1) = cofactor_3x3<MatrixType,1,1>(matrix) * invdet; result.coeffRef(1,2) = cofactor_3x3<MatrixType,2,1>(matrix) * invdet; result.coeffRef(2,0) = cofactor_3x3<MatrixType,0,2>(matrix) * invdet; result.coeffRef(2,1) = cofactor_3x3<MatrixType,1,2>(matrix) * invdet; result.coeffRef(2,2) = cofactor_3x3<MatrixType,2,2>(matrix) * invdet; }
ComputationInfo Eigen::internal::computeFromTridiagonal_impl | ( | DiagType & | diag, |
SubDiagType & | subdiag, | ||
const Index | maxIterations, | ||
bool | computeEigenvectors, | ||
MatrixType & | eivec | ||
) |
Compute the eigendecomposition from a tridiagonal matrix.
[in,out] | diag | : On input, the diagonal of the matrix, on output the eigenvalues |
[in,out] | subdiag | : The subdiagonal part of the matrix (entries are modified during the decomposition) |
[in] | maxIterations | : the maximum number of iterations |
[in] | computeEigenvectors | : whether the eigenvectors have to be computed or not |
[out] | eivec | : The matrix to store the eigenvectors if computeEigenvectors==true. Must be allocated on input. |
Success
or NoConvergence
Definition at line 481 of file SelfAdjointEigenSolver.h.
{ using std::abs; ComputationInfo info; typedef typename MatrixType::Scalar Scalar; Index n = diag.size(); Index end = n-1; Index start = 0; Index iter = 0; // total number of iterations typedef typename DiagType::RealScalar RealScalar; const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)(); while (end>0) { for (Index i = start; i<end; ++i) if (internal::isMuchSmallerThan(abs(subdiag[i]),(abs(diag[i])+abs(diag[i+1]))) || abs(subdiag[i]) <= considerAsZero) subdiag[i] = 0; // find the largest unreduced block while (end>0 && subdiag[end-1]==0) { end--; } if (end<=0) break; // if we spent too many iterations, we give up iter++; if(iter > maxIterations * n) break; start = end - 1; while (start>0 && subdiag[start-1]!=0) start--; internal::tridiagonal_qr_step<MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor>(diag.data(), subdiag.data(), start, end, computeEigenvectors ? eivec.data() : (Scalar*)0, n); } if (iter <= maxIterations * n) info = Success; else info = NoConvergence; // Sort eigenvalues and corresponding vectors. // TODO make the sort optional ? // TODO use a better sort algorithm !! if (info == Success) { for (Index i = 0; i < n-1; ++i) { Index k; diag.segment(i,n-i).minCoeff(&k); if (k > 0) { std::swap(diag[i], diag[k+i]); if(computeEigenvectors) eivec.col(i).swap(eivec.col(k+i)); } } } return info; }
void Eigen::internal::computeProductBlockingSizes | ( | Index & | k, |
Index & | m, | ||
Index & | n, | ||
Index | num_threads = 1 |
||
) |
Computes the blocking parameters for a m x k times k x n matrix product.
[in,out] | k | Input: the third dimension of the product. Output: the blocking size along the same dimension. |
[in,out] | m | Input: the number of rows of the left hand side. Output: the blocking size along the same dimension. |
[in,out] | n | Input: the number of columns of the right hand side. Output: the blocking size along the same dimension. |
Given a m x k times k x n matrix product of scalar types LhsScalar
and RhsScalar
, this function computes the blocking size parameters along the respective dimensions for matrix products and related algorithms.
The blocking size parameters may be evaluated:
Definition at line 300 of file GeneralBlockPanelKernel.h.
{ if (!useSpecificBlockingSizes(k, m, n)) { evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor>(k, m, n, num_threads); } typedef gebp_traits<LhsScalar,RhsScalar> Traits; enum { kr = 8, mr = Traits::mr, nr = Traits::nr }; if (k > kr) k -= k % kr; if (m > mr) m -= m % mr; if (n > nr) n -= n % nr; }
void Eigen::internal::computeProductBlockingSizes | ( | Index & | k, |
Index & | m, | ||
Index & | n, | ||
Index | num_threads = 1 |
||
) | [inline] |
Definition at line 318 of file GeneralBlockPanelKernel.h.
{ computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n, num_threads); }
EIGEN_DEVICE_FUNC void Eigen::internal::conditional_aligned_delete | ( | T * | ptr, |
size_t | size | ||
) | [inline] |
EIGEN_DEVICE_FUNC void Eigen::internal::conditional_aligned_delete_auto | ( | T * | ptr, |
size_t | size | ||
) | [inline] |
EIGEN_DEVICE_FUNC void Eigen::internal::conditional_aligned_free | ( | void * | ptr | ) | [inline] |
Frees memory allocated with conditional_aligned_malloc
Definition at line 228 of file Memory.h.
{ aligned_free(ptr); }
EIGEN_DEVICE_FUNC void Eigen::internal::conditional_aligned_free< false > | ( | void * | ptr | ) | [inline] |
EIGEN_DEVICE_FUNC void* Eigen::internal::conditional_aligned_malloc | ( | size_t | size | ) | [inline] |
Allocates size bytes. If Align is true, then the returned ptr is 16-byte-aligned. On allocation error, the returned pointer is null, and a std::bad_alloc is thrown.
Definition at line 212 of file Memory.h.
{ return aligned_malloc(size); }
EIGEN_DEVICE_FUNC void* Eigen::internal::conditional_aligned_malloc< false > | ( | size_t | size | ) | [inline] |
Definition at line 217 of file Memory.h.
{ check_that_malloc_is_allowed(); void *result = std::malloc(size); if(!result && size) throw_std_bad_alloc(); return result; }
EIGEN_DEVICE_FUNC T* Eigen::internal::conditional_aligned_new | ( | size_t | size | ) | [inline] |
Definition at line 310 of file Memory.h.
{ check_size_for_overflow<T>(size); T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size)); EIGEN_TRY { return construct_elements_of_array(result, size); } EIGEN_CATCH(...) { conditional_aligned_free<Align>(result); EIGEN_THROW; } }
EIGEN_DEVICE_FUNC T* Eigen::internal::conditional_aligned_new_auto | ( | size_t | size | ) | [inline] |
Definition at line 366 of file Memory.h.
{ if(size==0) return 0; // short-cut. Also fixes Bug 884 check_size_for_overflow<T>(size); T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size)); if(NumTraits<T>::RequireInitialization) { EIGEN_TRY { construct_elements_of_array(result, size); } EIGEN_CATCH(...) { conditional_aligned_free<Align>(result); EIGEN_THROW; } } return result; }
void* Eigen::internal::conditional_aligned_realloc | ( | void * | ptr, |
size_t | new_size, | ||
size_t | old_size | ||
) | [inline] |
Definition at line 238 of file Memory.h.
{ return aligned_realloc(ptr, new_size, old_size); }
void* Eigen::internal::conditional_aligned_realloc< false > | ( | void * | ptr, |
size_t | new_size, | ||
size_t | |||
) | [inline] |
EIGEN_DEVICE_FUNC T* Eigen::internal::conditional_aligned_realloc_new | ( | T * | pts, |
size_t | new_size, | ||
size_t | old_size | ||
) | [inline] |
Definition at line 343 of file Memory.h.
{ check_size_for_overflow<T>(new_size); check_size_for_overflow<T>(old_size); if(new_size < old_size) destruct_elements_of_array(pts+new_size, old_size-new_size); T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size)); if(new_size > old_size) { EIGEN_TRY { construct_elements_of_array(result+old_size, new_size-old_size); } EIGEN_CATCH(...) { conditional_aligned_free<Align>(result); EIGEN_THROW; } } return result; }
T* Eigen::internal::conditional_aligned_realloc_new_auto | ( | T * | pts, |
size_t | new_size, | ||
size_t | old_size | ||
) | [inline] |
Definition at line 387 of file Memory.h.
{ check_size_for_overflow<T>(new_size); check_size_for_overflow<T>(old_size); if(NumTraits<T>::RequireInitialization && (new_size < old_size)) destruct_elements_of_array(pts+new_size, old_size-new_size); T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size)); if(NumTraits<T>::RequireInitialization && (new_size > old_size)) { EIGEN_TRY { construct_elements_of_array(result+old_size, new_size-old_size); } EIGEN_CATCH(...) { conditional_aligned_free<Align>(result); EIGEN_THROW; } } return result; }
EIGEN_DONT_INLINE void Eigen::internal::conjugate_gradient | ( | const MatrixType & | mat, |
const Rhs & | rhs, | ||
Dest & | x, | ||
const Preconditioner & | precond, | ||
Index & | iters, | ||
typename Dest::RealScalar & | tol_error | ||
) |
Low-level conjugate gradient algorithm
mat | The matrix A |
rhs | The right hand side vector b |
x | On input and initial solution, on output the computed solution. |
precond | A preconditioner being able to efficiently solve for an approximation of Ax=b (regardless of b) |
iters | On input the max number of iteration, on output the number of performed iterations. |
tol_error | On input the tolerance error, on output an estimation of the relative error. |
Definition at line 28 of file ConjugateGradient.h.
{ using std::sqrt; using std::abs; typedef typename Dest::RealScalar RealScalar; typedef typename Dest::Scalar Scalar; typedef Matrix<Scalar,Dynamic,1> VectorType; RealScalar tol = tol_error; Index maxIters = iters; Index n = mat.cols(); VectorType residual = rhs - mat * x; //initial residual RealScalar rhsNorm2 = rhs.squaredNorm(); if(rhsNorm2 == 0) { x.setZero(); iters = 0; tol_error = 0; return; } RealScalar threshold = tol*tol*rhsNorm2; RealScalar residualNorm2 = residual.squaredNorm(); if (residualNorm2 < threshold) { iters = 0; tol_error = sqrt(residualNorm2 / rhsNorm2); return; } VectorType p(n); p = precond.solve(residual); // initial search direction VectorType z(n), tmp(n); RealScalar absNew = numext::real(residual.dot(p)); // the square of the absolute value of r scaled by invM Index i = 0; while(i < maxIters) { tmp.noalias() = mat * p; // the bottleneck of the algorithm Scalar alpha = absNew / p.dot(tmp); // the amount we travel on dir x += alpha * p; // update solution residual -= alpha * tmp; // update residual residualNorm2 = residual.squaredNorm(); if(residualNorm2 < threshold) break; z = precond.solve(residual); // approximately solve for "A z = residual" RealScalar absOld = absNew; absNew = numext::real(residual.dot(z)); // update the absolute value of r RealScalar beta = absNew / absOld; // calculate the Gram-Schmidt value used to create the new search direction p = z + beta * p; // update search direction i++; } tol_error = sqrt(residualNorm2 / rhsNorm2); iters = i; }
static void Eigen::internal::conservative_sparse_sparse_product_impl | ( | const Lhs & | lhs, |
const Rhs & | rhs, | ||
ResultType & | res, | ||
bool | sortedInsertion = false |
||
) | [static] |
Definition at line 18 of file ConservativeSparseSparseProduct.h.
{ typedef typename remove_all<Lhs>::type::Scalar Scalar; // make sure to call innerSize/outerSize since we fake the storage order. Index rows = lhs.innerSize(); Index cols = rhs.outerSize(); eigen_assert(lhs.outerSize() == rhs.innerSize()); ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0); ei_declare_aligned_stack_constructed_variable(Scalar, values, rows, 0); ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0); std::memset(mask,0,sizeof(bool)*rows); evaluator<Lhs> lhsEval(lhs); evaluator<Rhs> rhsEval(rhs); // estimate the number of non zero entries // given a rhs column containing Y non zeros, we assume that the respective Y columns // of the lhs differs in average of one non zeros, thus the number of non zeros for // the product of a rhs column with the lhs is X+Y where X is the average number of non zero // per column of the lhs. // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate(); res.setZero(); res.reserve(Index(estimated_nnz_prod)); // we compute each column of the result, one after the other for (Index j=0; j<cols; ++j) { res.startVec(j); Index nnz = 0; for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) { Scalar y = rhsIt.value(); Index k = rhsIt.index(); for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) { Index i = lhsIt.index(); Scalar x = lhsIt.value(); if(!mask[i]) { mask[i] = true; values[i] = x * y; indices[nnz] = i; ++nnz; } else values[i] += x * y; } } if(!sortedInsertion) { // unordered insertion for(Index k=0; k<nnz; ++k) { Index i = indices[k]; res.insertBackByOuterInnerUnordered(j,i) = values[i]; mask[i] = false; } } else { // alternative ordered insertion code: const Index t200 = rows/11; // 11 == (log2(200)*1.39) const Index t = (rows*100)/139; // FIXME reserve nnz non zeros // FIXME implement faster sorting algorithms for very small nnz // if the result is sparse enough => use a quick sort // otherwise => loop through the entire vector // In order to avoid to perform an expensive log2 when the // result is clearly very sparse we use a linear bound up to 200. if((nnz<200 && nnz<t200) || nnz * numext::log2(int(nnz)) < t) { if(nnz>1) std::sort(indices,indices+nnz); for(Index k=0; k<nnz; ++k) { Index i = indices[k]; res.insertBackByOuterInner(j,i) = values[i]; mask[i] = false; } } else { // dense path for(Index i=0; i<rows; ++i) { if(mask[i]) { mask[i] = false; res.insertBackByOuterInner(j,i) = values[i]; } } } } } res.finalize(); }
EIGEN_DEVICE_FUNC T* Eigen::internal::const_cast_ptr | ( | const T * | ptr | ) | [inline] |
Definition at line 421 of file XprHelper.h.
{ return const_cast<T*>(ptr); }
EIGEN_DEVICE_FUNC T* Eigen::internal::construct_elements_of_array | ( | T * | ptr, |
size_t | size | ||
) | [inline] |
Constructs the elements of an array. The size parameter tells on how many objects to call the constructor of T.
Definition at line 265 of file Memory.h.
{ size_t i; EIGEN_TRY { for (i = 0; i < size; ++i) ::new (ptr + i) T; return ptr; } EIGEN_CATCH(...) { destruct_elements_of_array(ptr, i); EIGEN_THROW; } }
EIGEN_DEVICE_FUNC IndexDest Eigen::internal::convert_index | ( | const IndexSrc & | idx | ) | [inline] |
Definition at line 41 of file XprHelper.h.
{ // for sizeof(IndexDest)>=sizeof(IndexSrc) compilers should be able to optimize this away: eigen_internal_assert(idx <= NumTraits<IndexDest>::highest() && "Index value to big for target type"); return IndexDest(idx); }
bool Eigen::internal::copy_bool | ( | bool | b | ) | [inline] |
StorageIndex Eigen::internal::cs_tdfs | ( | StorageIndex | j, |
StorageIndex | k, | ||
StorageIndex * | head, | ||
const StorageIndex * | next, | ||
StorageIndex * | post, | ||
StorageIndex * | stack | ||
) |
Definition at line 60 of file Amd.h.
{ StorageIndex i, p, top = 0; if(!head || !next || !post || !stack) return (-1); /* check inputs */ stack[0] = j; /* place j on the stack */ while (top >= 0) /* while (stack is not empty) */ { p = stack[top]; /* p = top of stack */ i = head[p]; /* i = youngest child of p */ if(i == -1) { top--; /* p has no unordered children left */ post[k++] = p; /* node p is the kth postordered node */ } else { head[p] = next[i]; /* remove i from children of p */ stack[++top] = i; /* start dfs on child node i */ } } return k; }
static StorageIndex Eigen::internal::cs_wclear | ( | StorageIndex | mark, |
StorageIndex | lemax, | ||
StorageIndex * | w, | ||
StorageIndex | n | ||
) | [static] |
EIGEN_DEVICE_FUNC void Eigen::internal::destruct_elements_of_array | ( | T * | ptr, |
size_t | size | ||
) | [inline] |
static void Eigen::internal::detect_super_cols | ( | colamd_col< IndexType > | Col[], |
IndexType | A[], | ||
IndexType | head[], | ||
IndexType | row_start, | ||
IndexType | row_length | ||
) | [static] |
Definition at line 1549 of file Ordering.h.
Eigen::internal::EIGEN_MEMBER_FUNCTOR | ( | squaredNorm | , |
Size *NumTraits< Scalar >::MulCost+(Size-1)*NumTraits< Scalar >::AddCost | |||
) |
Eigen::internal::EIGEN_MEMBER_FUNCTOR | ( | norm | , |
(Size+5)*NumTraits< Scalar >::MulCost+(Size-1)*NumTraits< Scalar >::AddCost | |||
) |
Eigen::internal::EIGEN_MEMBER_FUNCTOR | ( | stableNorm | , |
(Size+5)*NumTraits< Scalar >::MulCost+(Size-1)*NumTraits< Scalar >::AddCost | |||
) |
Eigen::internal::EIGEN_MEMBER_FUNCTOR | ( | blueNorm | , |
(Size+5)*NumTraits< Scalar >::MulCost+(Size-1)*NumTraits< Scalar >::AddCost | |||
) |
Eigen::internal::EIGEN_MEMBER_FUNCTOR | ( | hypotNorm | , |
(Size-1)*functor_traits< scalar_hypot_op< Scalar > >::Cost | |||
) |
Eigen::internal::EIGEN_MEMBER_FUNCTOR | ( | sum | , |
(Size-1)*NumTraits< Scalar >::AddCost | |||
) |
Eigen::internal::EIGEN_MEMBER_FUNCTOR | ( | mean | , |
(Size-1)*NumTraits< Scalar >::AddCost+NumTraits< Scalar >::MulCost | |||
) |
Eigen::internal::EIGEN_MEMBER_FUNCTOR | ( | minCoeff | , |
(Size-1)*NumTraits< Scalar >::AddCost | |||
) |
Eigen::internal::EIGEN_MEMBER_FUNCTOR | ( | maxCoeff | , |
(Size-1)*NumTraits< Scalar >::AddCost | |||
) |
Eigen::internal::EIGEN_MEMBER_FUNCTOR | ( | all | , |
(Size-1)*NumTraits< Scalar >::AddCost | |||
) |
Eigen::internal::EIGEN_MEMBER_FUNCTOR | ( | any | , |
(Size-1)*NumTraits< Scalar >::AddCost | |||
) |
Eigen::internal::EIGEN_MEMBER_FUNCTOR | ( | count | , |
(Size-1)*NumTraits< Scalar >::AddCost | |||
) |
Eigen::internal::EIGEN_MEMBER_FUNCTOR | ( | prod | , |
(Size-1)*NumTraits< Scalar >::MulCost | |||
) |
void Eigen::internal::eigen_pastix | ( | pastix_data_t ** | pastix_data, |
int | pastix_comm, | ||
int | n, | ||
int * | ptr, | ||
int * | idx, | ||
float * | vals, | ||
int * | perm, | ||
int * | invp, | ||
float * | x, | ||
int | nbrhs, | ||
int * | iparm, | ||
double * | dparm | ||
) |
Definition at line 67 of file PaStiXSupport.h.
{ if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (nbrhs == 0) {x = NULL; nbrhs=1;} s_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); }
void Eigen::internal::eigen_pastix | ( | pastix_data_t ** | pastix_data, |
int | pastix_comm, | ||
int | n, | ||
int * | ptr, | ||
int * | idx, | ||
double * | vals, | ||
int * | perm, | ||
int * | invp, | ||
double * | x, | ||
int | nbrhs, | ||
int * | iparm, | ||
double * | dparm | ||
) |
Definition at line 74 of file PaStiXSupport.h.
{ if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (nbrhs == 0) {x = NULL; nbrhs=1;} d_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); }
void Eigen::internal::eigen_pastix | ( | pastix_data_t ** | pastix_data, |
int | pastix_comm, | ||
int | n, | ||
int * | ptr, | ||
int * | idx, | ||
std::complex< float > * | vals, | ||
int * | perm, | ||
int * | invp, | ||
std::complex< float > * | x, | ||
int | nbrhs, | ||
int * | iparm, | ||
double * | dparm | ||
) |
Definition at line 81 of file PaStiXSupport.h.
{ if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (nbrhs == 0) {x = NULL; nbrhs=1;} c_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<PASTIX_COMPLEX*>(vals), perm, invp, reinterpret_cast<PASTIX_COMPLEX*>(x), nbrhs, iparm, dparm); }
void Eigen::internal::eigen_pastix | ( | pastix_data_t ** | pastix_data, |
int | pastix_comm, | ||
int | n, | ||
int * | ptr, | ||
int * | idx, | ||
std::complex< double > * | vals, | ||
int * | perm, | ||
int * | invp, | ||
std::complex< double > * | x, | ||
int | nbrhs, | ||
int * | iparm, | ||
double * | dparm | ||
) |
Definition at line 88 of file PaStiXSupport.h.
{ if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (nbrhs == 0) {x = NULL; nbrhs=1;} z_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<PASTIX_DCOMPLEX*>(vals), perm, invp, reinterpret_cast<PASTIX_DCOMPLEX*>(x), nbrhs, iparm, dparm); }
Index Eigen::internal::etree_find | ( | Index | i, |
IndexVector & | pp | ||
) |
Find the root of the tree/set containing the vertex i : Use Path halving
Definition at line 40 of file SparseColEtree.h.
void Eigen::internal::evaluateProductBlockingSizesHeuristic | ( | Index & | k, |
Index & | m, | ||
Index & | n, | ||
Index | num_threads = 1 |
||
) |
Definition at line 93 of file GeneralBlockPanelKernel.h.
{ typedef gebp_traits<LhsScalar,RhsScalar> Traits; // Explanations: // Let's recall that the product algorithms form mc x kc vertical panels A' on the lhs and // kc x nc blocks B' on the rhs. B' has to fit into L2/L3 cache. Moreover, A' is processed // per mr x kc horizontal small panels where mr is the blocking size along the m dimension // at the register level. This small horizontal panel has to stay within L1 cache. std::ptrdiff_t l1, l2, l3; manage_caching_sizes(GetAction, &l1, &l2, &l3); if (num_threads > 1) { typedef typename Traits::ResScalar ResScalar; enum { kdiv = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)), ksub = Traits::mr * Traits::nr * sizeof(ResScalar), k_mask = -8, mr = Traits::mr, mr_mask = -mr, nr = Traits::nr, nr_mask = -nr }; // Increasing k gives us more time to prefetch the content of the "C" // registers. However once the latency is hidden there is no point in // increasing the value of k, so we'll cap it at 320 (value determined // experimentally). const Index k_cache = (std::min<Index>)((l1-ksub)/kdiv, 320); if (k_cache < k) { k = k_cache & k_mask; eigen_internal_assert(k > 0); } const Index n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k); const Index n_per_thread = numext::div_ceil(n, num_threads); if (n_cache <= n_per_thread) { // Don't exceed the capacity of the l2 cache. eigen_internal_assert(n_cache >= static_cast<Index>(nr)); n = n_cache & nr_mask; eigen_internal_assert(n > 0); } else { n = (std::min<Index>)(n, (n_per_thread + nr - 1) & nr_mask); } if (l3 > l2) { // l3 is shared between all cores, so we'll give each thread its own chunk of l3. const Index m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads); const Index m_per_thread = numext::div_ceil(m, num_threads); if(m_cache < m_per_thread && m_cache >= static_cast<Index>(mr)) { m = m_cache & mr_mask; eigen_internal_assert(m > 0); } else { m = (std::min<Index>)(m, (m_per_thread + mr - 1) & mr_mask); } } } else { // In unit tests we do not want to use extra large matrices, // so we reduce the cache size to check the blocking strategy is not flawed #ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS l1 = 9*1024; l2 = 32*1024; l3 = 512*1024; #endif // Early return for small problems because the computation below are time consuming for small problems. // Perhaps it would make more sense to consider k*n*m?? // Note that for very tiny problem, this function should be bypassed anyway // because we use the coefficient-based implementation for them. if((std::max)(k,(std::max)(m,n))<48) return; typedef typename Traits::ResScalar ResScalar; enum { k_peeling = 8, k_div = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)), k_sub = Traits::mr * Traits::nr * sizeof(ResScalar) }; // ---- 1st level of blocking on L1, yields kc ---- // Blocking on the third dimension (i.e., k) is chosen so that an horizontal panel // of size mr x kc of the lhs plus a vertical panel of kc x nr of the rhs both fits within L1 cache. // We also include a register-level block of the result (mx x nr). // (In an ideal world only the lhs panel would stay in L1) // Moreover, kc has to be a multiple of 8 to be compatible with loop peeling, leading to a maximum blocking size of: const Index max_kc = std::max<Index>(((l1-k_sub)/k_div) & (~(k_peeling-1)),1); const Index old_k = k; if(k>max_kc) { // We are really blocking on the third dimension: // -> reduce blocking size to make sure the last block is as large as possible // while keeping the same number of sweeps over the result. k = (k%max_kc)==0 ? max_kc : max_kc - k_peeling * ((max_kc-1-(k%max_kc))/(k_peeling*(k/max_kc+1))); eigen_internal_assert(((old_k/k) == (old_k/max_kc)) && "the number of sweeps has to remain the same"); } // ---- 2nd level of blocking on max(L2,L3), yields nc ---- // TODO find a reliable way to get the actual amount of cache per core to use for 2nd level blocking, that is: // actual_l2 = max(l2, l3/nb_core_sharing_l3) // The number below is quite conservative: it is better to underestimate the cache size rather than overestimating it) // For instance, it corresponds to 6MB of L3 shared among 4 cores. #ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS const Index actual_l2 = l3; #else const Index actual_l2 = 1572864; // == 1.5 MB #endif // Here, nc is chosen such that a block of kc x nc of the rhs fit within half of L2. // The second half is implicitly reserved to access the result and lhs coefficients. // When k<max_kc, then nc can arbitrarily growth. In practice, it seems to be fruitful // to limit this growth: we bound nc to growth by a factor x1.5. // However, if the entire lhs block fit within L1, then we are not going to block on the rows at all, // and it becomes fruitful to keep the packed rhs blocks in L1 if there is enough remaining space. Index max_nc; const Index lhs_bytes = m * k * sizeof(LhsScalar); const Index remaining_l1 = l1- k_sub - lhs_bytes; if(remaining_l1 >= Index(Traits::nr*sizeof(RhsScalar))*k) { // L1 blocking max_nc = remaining_l1 / (k*sizeof(RhsScalar)); } else { // L2 blocking max_nc = (3*actual_l2)/(2*2*max_kc*sizeof(RhsScalar)); } // WARNING Below, we assume that Traits::nr is a power of two. Index nc = std::min<Index>(actual_l2/(2*k*sizeof(RhsScalar)), max_nc) & (~(Traits::nr-1)); if(n>nc) { // We are really blocking over the columns: // -> reduce blocking size to make sure the last block is as large as possible // while keeping the same number of sweeps over the packed lhs. // Here we allow one more sweep if this gives us a perfect match, thus the commented "-1" n = (n%nc)==0 ? nc : (nc - Traits::nr * ((nc/*-1*/-(n%nc))/(Traits::nr*(n/nc+1)))); } else if(old_k==k) { // So far, no blocking at all, i.e., kc==k, and nc==n. // In this case, let's perform a blocking over the rows such that the packed lhs data is kept in cache L1/L2 // TODO: part of this blocking strategy is now implemented within the kernel itself, so the L1-based heuristic here should be obsolete. Index problem_size = k*n*sizeof(LhsScalar); Index actual_lm = actual_l2; Index max_mc = m; if(problem_size<=1024) { // problem is small enough to keep in L1 // Let's choose m such that lhs's block fit in 1/3 of L1 actual_lm = l1; } else if(l3!=0 && problem_size<=32768) { // we have both L2 and L3, and problem is small enough to be kept in L2 // Let's choose m such that lhs's block fit in 1/3 of L2 actual_lm = l2; max_mc = (std::min<Index>)(576,max_mc); } Index mc = (std::min<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc); if (mc > Traits::mr) mc -= mc % Traits::mr; else if (mc==0) return; m = (m%mc)==0 ? mc : (mc - Traits::mr * ((mc/*-1*/-(m%mc))/(Traits::mr*(m/mc+1)))); } } }
const T::Scalar* Eigen::internal::extract_data | ( | const T & | m | ) |
Definition at line 360 of file BlasUtil.h.
{
return extract_data_selector<T>::run(m);
}
static IndexType Eigen::internal::find_ordering | ( | IndexType | n_row, |
IndexType | n_col, | ||
IndexType | Alen, | ||
Colamd_Row< IndexType > | Row[], | ||
colamd_col< IndexType > | Col[], | ||
IndexType | A[], | ||
IndexType | head[], | ||
IndexType | n_col2, | ||
IndexType | max_deg, | ||
IndexType | pfree | ||
) | [static] |
Definition at line 937 of file Ordering.h.
EIGEN_DEVICE_FUNC Index Eigen::internal::first_aligned | ( | const Scalar * | array, |
Index | size | ||
) | [inline] |
Returns the index of the first element of the array that is well aligned with respect to the requested Alignment.
Alignment | requested alignment in Bytes. |
array | the address of the start of the array |
size | the size of the array |
There is also the variant first_aligned(const MatrixBase&) defined in DenseCoeffsBase.h.
Definition at line 436 of file Memory.h.
{ const Index ScalarSize = sizeof(Scalar); const Index AlignmentSize = Alignment / ScalarSize; const Index AlignmentMask = AlignmentSize-1; if(AlignmentSize<=1) { // Either the requested alignment if smaller than a scalar, or it exactly match a 1 scalar // so that all elements of the array have the same alignment. return 0; } else if( (std::size_t(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0) { // The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size. // Consequently, no element of the array is well aligned. return size; } else { Index first = (AlignmentSize - (Index((std::size_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask; return (first < size) ? first : size; } }
static Index Eigen::internal::first_aligned | ( | const DenseBase< Derived > & | m | ) | [inline, static] |
Alignment | requested alignment in Bytes. |
There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more documentation.
Definition at line 639 of file DenseCoeffsBase.h.
{ enum { ReturnZero = (int(evaluator<Derived>::Alignment) >= Alignment) || !(Derived::Flags & DirectAccessBit) }; return first_aligned_impl<Alignment, Derived, ReturnZero>::run(m.derived()); }
EIGEN_DEVICE_FUNC Index Eigen::internal::first_default_aligned | ( | const Scalar * | array, |
Index | size | ||
) | [inline] |
Returns the index of the first element of the array that is well aligned with respect the largest packet requirement.
Definition at line 464 of file Memory.h.
{ typedef typename packet_traits<Scalar>::type DefaultPacketType; return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size); }
static Index Eigen::internal::first_default_aligned | ( | const DenseBase< Derived > & | m | ) | [inline, static] |
Definition at line 646 of file DenseCoeffsBase.h.
{ typedef typename Derived::Scalar Scalar; typedef typename packet_traits<Scalar>::type DefaultPacketType; return internal::first_aligned<int(unpacket_traits<DefaultPacketType>::alignment),Derived>(m); }
Index Eigen::internal::first_multiple | ( | Index | size, |
Index | base | ||
) | [inline] |
void Eigen::internal::fortran_to_c_numbering | ( | MatrixType & | mat | ) |
Definition at line 111 of file PaStiXSupport.h.
{ // Check the Numbering if ( mat.outerIndexPtr()[0] == 1 ) { // Convert to C-style numbering int i; for(i = 0; i <= mat.rows(); ++i) --mat.outerIndexPtr()[i]; for(i = 0; i < mat.nonZeros(); ++i) --mat.innerIndexPtr()[i]; } }
static IndexType Eigen::internal::garbage_collection | ( | IndexType | n_row, |
IndexType | n_col, | ||
Colamd_Row< IndexType > | Row[], | ||
colamd_col< IndexType > | Col[], | ||
IndexType | A[], | ||
IndexType * | pfree | ||
) | [static] |
Definition at line 1700 of file Ordering.h.
EIGEN_STRONG_INLINE void Eigen::internal::gebp_madd | ( | const CJ & | cj, |
A & | a, | ||
B & | b, | ||
C & | c, | ||
T & | t | ||
) |
Definition at line 344 of file GeneralBlockPanelKernel.h.
{ gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t); }
EIGEN_DEVICE_FUNC const Derived::Scalar Eigen::internal::general_det3_helper | ( | const MatrixBase< Derived > & | matrix, |
int | i1, | ||
int | i2, | ||
int | i3, | ||
int | j1, | ||
int | j2, | ||
int | j3 | ||
) | [inline] |
Definition at line 205 of file InverseImpl.h.
{
return matrix.coeff(i1,j1)
* (matrix.coeff(i2,j2) * matrix.coeff(i3,j3) - matrix.coeff(i2,j3) * matrix.coeff(i3,j2));
}
void Eigen::internal::handmade_aligned_free | ( | void * | ptr | ) | [inline] |
void* Eigen::internal::handmade_aligned_malloc | ( | std::size_t | size | ) | [inline] |
Like malloc, but the returned pointer is guaranteed to be 16-byte aligned. Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
Definition at line 86 of file Memory.h.
{ void *original = std::malloc(size+EIGEN_DEFAULT_ALIGN_BYTES); if (original == 0) return 0; void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES); *(reinterpret_cast<void**>(aligned) - 1) = original; return aligned; }
void* Eigen::internal::handmade_aligned_realloc | ( | void * | ptr, |
std::size_t | size, | ||
std::size_t | = 0 |
||
) | [inline] |
Reallocates aligned memory. Since we know that our handmade version is based on std::malloc we can use std::realloc to implement efficient reallocation.
Definition at line 106 of file Memory.h.
{ if (ptr == 0) return handmade_aligned_malloc(size); void *original = *(reinterpret_cast<void**>(ptr) - 1); std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original); original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES); if (original == 0) return 0; void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES); void *previous_aligned = static_cast<char *>(original)+previous_offset; if(aligned!=previous_aligned) std::memmove(aligned, previous_aligned, size); *(reinterpret_cast<void**>(aligned) - 1) = original; return aligned; }
void Eigen::internal::householder_qr_inplace_unblocked | ( | MatrixQR & | mat, |
HCoeffs & | hCoeffs, | ||
typename MatrixQR::Scalar * | tempData = 0 |
||
) |
Definition at line 234 of file HouseholderQR.h.
{ typedef typename MatrixQR::Scalar Scalar; typedef typename MatrixQR::RealScalar RealScalar; Index rows = mat.rows(); Index cols = mat.cols(); Index size = (std::min)(rows,cols); eigen_assert(hCoeffs.size() == size); typedef Matrix<Scalar,MatrixQR::ColsAtCompileTime,1> TempType; TempType tempVector; if(tempData==0) { tempVector.resize(cols); tempData = tempVector.data(); } for(Index k = 0; k < size; ++k) { Index remainingRows = rows - k; Index remainingCols = cols - k - 1; RealScalar beta; mat.col(k).tail(remainingRows).makeHouseholderInPlace(hCoeffs.coeffRef(k), beta); mat.coeffRef(k,k) = beta; // apply H to remaining part of m_qr from the left mat.bottomRightCorner(remainingRows, remainingCols) .applyHouseholderOnTheLeft(mat.col(k).tail(remainingRows-1), hCoeffs.coeffRef(k), tempData+k+1); } }
EIGEN_DEVICE_FUNC void Eigen::internal::ignore_unused_variable | ( | const T & | ) |
static IndexType Eigen::internal::init_rows_cols | ( | IndexType | n_row, |
IndexType | n_col, | ||
Colamd_Row< IndexType > | Row[], | ||
colamd_col< IndexType > | col[], | ||
IndexType | A[], | ||
IndexType | p[], | ||
IndexType | stats[COLAMD_STATS] | ||
) | [static] |
Definition at line 484 of file Ordering.h.
static void Eigen::internal::init_scoring | ( | IndexType | n_row, |
IndexType | n_col, | ||
Colamd_Row< IndexType > | Row[], | ||
colamd_col< IndexType > | Col[], | ||
IndexType | A[], | ||
IndexType | head[], | ||
double | knobs[COLAMD_KNOBS], | ||
IndexType * | p_n_row2, | ||
IndexType * | p_n_col2, | ||
IndexType * | p_max_deg | ||
) | [static] |
Definition at line 700 of file Ordering.h.
bool Eigen::internal::is_same_dense | ( | const T1 & | mat1, |
const T2 & | mat2, | ||
typename enable_if< has_direct_access< T1 >::ret &&has_direct_access< T2 >::ret, T1 >::type * | = 0 |
||
) |
Definition at line 648 of file XprHelper.h.
{
return (mat1.data()==mat2.data()) && (mat1.innerStride()==mat2.innerStride()) && (mat1.outerStride()==mat2.outerStride());
}
bool Eigen::internal::is_same_dense | ( | const T1 & | , |
const T2 & | , | ||
typename enable_if<!(has_direct_access< T1 >::ret &&has_direct_access< T2 >::ret), T1 >::type * | = 0 |
||
) |
Definition at line 654 of file XprHelper.h.
{ return false; }
EIGEN_DONT_INLINE void Eigen::internal::least_square_conjugate_gradient | ( | const MatrixType & | mat, |
const Rhs & | rhs, | ||
Dest & | x, | ||
const Preconditioner & | precond, | ||
Index & | iters, | ||
typename Dest::RealScalar & | tol_error | ||
) |
Low-level conjugate gradient algorithm for least-square problems
mat | The matrix A |
rhs | The right hand side vector b |
x | On input and initial solution, on output the computed solution. |
precond | A preconditioner being able to efficiently solve for an approximation of A'Ax=b (regardless of b) |
iters | On input the max number of iteration, on output the number of performed iterations. |
tol_error | On input the tolerance error, on output an estimation of the relative error. |
Definition at line 28 of file LeastSquareConjugateGradient.h.
{ using std::sqrt; using std::abs; typedef typename Dest::RealScalar RealScalar; typedef typename Dest::Scalar Scalar; typedef Matrix<Scalar,Dynamic,1> VectorType; RealScalar tol = tol_error; Index maxIters = iters; Index m = mat.rows(), n = mat.cols(); VectorType residual = rhs - mat * x; VectorType normal_residual = mat.adjoint() * residual; RealScalar rhsNorm2 = (mat.adjoint()*rhs).squaredNorm(); if(rhsNorm2 == 0) { x.setZero(); iters = 0; tol_error = 0; return; } RealScalar threshold = tol*tol*rhsNorm2; RealScalar residualNorm2 = normal_residual.squaredNorm(); if (residualNorm2 < threshold) { iters = 0; tol_error = sqrt(residualNorm2 / rhsNorm2); return; } VectorType p(n); p = precond.solve(normal_residual); // initial search direction VectorType z(n), tmp(m); RealScalar absNew = numext::real(normal_residual.dot(p)); // the square of the absolute value of r scaled by invM Index i = 0; while(i < maxIters) { tmp.noalias() = mat * p; Scalar alpha = absNew / tmp.squaredNorm(); // the amount we travel on dir x += alpha * p; // update solution residual -= alpha * tmp; // update residual normal_residual = mat.adjoint() * residual; // update residual of the normal equation residualNorm2 = normal_residual.squaredNorm(); if(residualNorm2 < threshold) break; z = precond.solve(normal_residual); // approximately solve for "A'A z = normal_residual" RealScalar absOld = absNew; absNew = numext::real(normal_residual.dot(z)); // update the absolute value of r RealScalar beta = absNew / absOld; // calculate the Gram-Schmidt value used to create the new search direction p = z + beta * p; // update search direction i++; } tol_error = sqrt(residualNorm2 / rhsNorm2); iters = i; }
static Index Eigen::internal::llt_rank_update_lower | ( | MatrixType & | mat, |
const VectorType & | vec, | ||
const typename MatrixType::RealScalar & | sigma | ||
) | [static] |
Definition at line 195 of file LLT.h.
{ using std::sqrt; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; typedef typename MatrixType::ColXpr ColXpr; typedef typename internal::remove_all<ColXpr>::type ColXprCleaned; typedef typename ColXprCleaned::SegmentReturnType ColXprSegment; typedef Matrix<Scalar,Dynamic,1> TempVectorType; typedef typename TempVectorType::SegmentReturnType TempVecSegment; Index n = mat.cols(); eigen_assert(mat.rows()==n && vec.size()==n); TempVectorType temp; if(sigma>0) { // This version is based on Givens rotations. // It is faster than the other one below, but only works for updates, // i.e., for sigma > 0 temp = sqrt(sigma) * vec; for(Index i=0; i<n; ++i) { JacobiRotation<Scalar> g; g.makeGivens(mat(i,i), -temp(i), &mat(i,i)); Index rs = n-i-1; if(rs>0) { ColXprSegment x(mat.col(i).tail(rs)); TempVecSegment y(temp.tail(rs)); apply_rotation_in_the_plane(x, y, g); } } } else { temp = vec; RealScalar beta = 1; for(Index j=0; j<n; ++j) { RealScalar Ljj = numext::real(mat.coeff(j,j)); RealScalar dj = numext::abs2(Ljj); Scalar wj = temp.coeff(j); RealScalar swj2 = sigma*numext::abs2(wj); RealScalar gamma = dj*beta + swj2; RealScalar x = dj + swj2/beta; if (x<=RealScalar(0)) return j; RealScalar nLjj = sqrt(x); mat.coeffRef(j,j) = nLjj; beta += swj2/dj; // Update the terms of L Index rs = n-j-1; if(rs) { temp.tail(rs) -= (wj/Ljj) * mat.col(j).tail(rs); if(gamma != 0) mat.col(j).tail(rs) = (nLjj/Ljj) * mat.col(j).tail(rs) + (nLjj * sigma*numext::conj(wj)/gamma)*temp.tail(rs); } } } return -1; }
Index Eigen::internal::LUnumTempV | ( | Index & | m, |
Index & | w, | ||
Index & | t, | ||
Index & | b | ||
) | [inline] |
Definition at line 39 of file SparseLU_Memory.h.
{ return (std::max)(m, (t+b)*w); }
Index Eigen::internal::LUTempSpace | ( | Index & | m, |
Index & | w | ||
) | [inline] |
Definition at line 45 of file SparseLU_Memory.h.
{ return (2*w + 4 + LUNoMarker) * m * sizeof(Index) + (w + 1) * m * sizeof(Scalar); }
void Eigen::internal::make_block_householder_triangular_factor | ( | TriangularFactorType & | triFactor, |
const VectorsType & | vectors, | ||
const CoeffsType & | hCoeffs | ||
) |
Definition at line 51 of file BlockHouseholder.h.
{ const Index nbVecs = vectors.cols(); eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs); for(Index i = nbVecs-1; i >=0 ; --i) { Index rs = vectors.rows() - i - 1; Index rt = nbVecs-i-1; if(rt>0) { triFactor.row(i).tail(rt).noalias() = -hCoeffs(i) * vectors.col(i).tail(rs).adjoint() * vectors.bottomRightCorner(rs, rt).template triangularView<UnitLower>(); // FIXME add .noalias() once the triangular product can work inplace triFactor.row(i).tail(rt) = triFactor.row(i).tail(rt) * triFactor.bottomRightCorner(rt,rt).template triangularView<Upper>(); } triFactor(i,i) = hCoeffs(i); } }
void Eigen::internal::manage_caching_sizes | ( | Action | action, |
std::ptrdiff_t * | l1, | ||
std::ptrdiff_t * | l2, | ||
std::ptrdiff_t * | l3 | ||
) | [inline] |
Definition at line 55 of file GeneralBlockPanelKernel.h.
{ static CacheSizes m_cacheSizes; if(action==SetAction) { // set the cpu cache size and cache all block sizes from a global cache size in byte eigen_internal_assert(l1!=0 && l2!=0); m_cacheSizes.m_l1 = *l1; m_cacheSizes.m_l2 = *l2; m_cacheSizes.m_l3 = *l3; } else if(action==GetAction) { eigen_internal_assert(l1!=0 && l2!=0); *l1 = m_cacheSizes.m_l1; *l2 = m_cacheSizes.m_l2; *l3 = m_cacheSizes.m_l3; } else { eigen_internal_assert(false); } }
std::ptrdiff_t Eigen::internal::manage_caching_sizes_helper | ( | std::ptrdiff_t | a, |
std::ptrdiff_t | b | ||
) | [inline] |
Definition at line 23 of file GeneralBlockPanelKernel.h.
{
return a<=0 ? b : a;
}
void Eigen::internal::manage_multi_threading | ( | Action | action, |
int * | v | ||
) | [inline] |
Definition at line 18 of file Parallelizer.h.
{ static EIGEN_UNUSED int m_maxThreads = -1; if(action==SetAction) { eigen_internal_assert(v!=0); m_maxThreads = *v; } else if(action==GetAction) { eigen_internal_assert(v!=0); #ifdef EIGEN_HAS_OPENMP if(m_maxThreads>0) *v = m_maxThreads; else *v = omp_get_max_threads(); #else *v = 1; #endif } else { eigen_internal_assert(false); } }
MappedSparseMatrix<Scalar,Flags,Index> Eigen::internal::map_superlu | ( | SluMatrix & | sluMat | ) |
View a Super LU matrix as an Eigen expression
Definition at line 272 of file SuperLUSupport.h.
{ eigen_assert((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR || (Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC); Index outerSize = (Flags&RowMajor)==RowMajor ? sluMat.ncol : sluMat.nrow; return MappedSparseMatrix<Scalar,Flags,Index>( sluMat.nrow, sluMat.ncol, sluMat.storage.outerInd[outerSize], sluMat.storage.outerInd, sluMat.storage.innerInd, reinterpret_cast<Scalar*>(sluMat.storage.values) ); }
void Eigen::internal::minimum_degree_ordering | ( | SparseMatrix< Scalar, ColMajor, StorageIndex > & | C, |
PermutationMatrix< Dynamic, Dynamic, StorageIndex > & | perm | ||
) |
Approximate minimum degree ordering algorithm.
[in] | C | the input selfadjoint matrix stored in compressed column major format. |
[out] | perm | the permutation P reducing the fill-in of the input matrix C |
Note that the input matrix C must be complete, that is both the upper and lower parts have to be stored, as well as the diagonal entries. On exit the values of C are destroyed
Definition at line 94 of file Amd.h.
{ using std::sqrt; StorageIndex d, dk, dext, lemax = 0, e, elenk, eln, i, j, k, k1, k2, k3, jlast, ln, dense, nzmax, mindeg = 0, nvi, nvj, nvk, mark, wnvi, ok, nel = 0, p, p1, p2, p3, p4, pj, pk, pk1, pk2, pn, q, t, h; StorageIndex n = StorageIndex(C.cols()); dense = std::max<StorageIndex> (16, StorageIndex(10 * sqrt(double(n)))); /* find dense threshold */ dense = (std::min)(n-2, dense); StorageIndex cnz = StorageIndex(C.nonZeros()); perm.resize(n+1); t = cnz + cnz/5 + 2*n; /* add elbow room to C */ C.resizeNonZeros(t); // get workspace ei_declare_aligned_stack_constructed_variable(StorageIndex,W,8*(n+1),0); StorageIndex* len = W; StorageIndex* nv = W + (n+1); StorageIndex* next = W + 2*(n+1); StorageIndex* head = W + 3*(n+1); StorageIndex* elen = W + 4*(n+1); StorageIndex* degree = W + 5*(n+1); StorageIndex* w = W + 6*(n+1); StorageIndex* hhead = W + 7*(n+1); StorageIndex* last = perm.indices().data(); /* use P as workspace for last */ /* --- Initialize quotient graph ---------------------------------------- */ StorageIndex* Cp = C.outerIndexPtr(); StorageIndex* Ci = C.innerIndexPtr(); for(k = 0; k < n; k++) len[k] = Cp[k+1] - Cp[k]; len[n] = 0; nzmax = t; for(i = 0; i <= n; i++) { head[i] = -1; // degree list i is empty last[i] = -1; next[i] = -1; hhead[i] = -1; // hash list i is empty nv[i] = 1; // node i is just one node w[i] = 1; // node i is alive elen[i] = 0; // Ek of node i is empty degree[i] = len[i]; // degree of node i } mark = internal::cs_wclear<StorageIndex>(0, 0, w, n); /* clear w */ /* --- Initialize degree lists ------------------------------------------ */ for(i = 0; i < n; i++) { bool has_diag = false; for(p = Cp[i]; p<Cp[i+1]; ++p) if(Ci[p]==i) { has_diag = true; break; } d = degree[i]; if(d == 1 && has_diag) /* node i is empty */ { elen[i] = -2; /* element i is dead */ nel++; Cp[i] = -1; /* i is a root of assembly tree */ w[i] = 0; } else if(d > dense || !has_diag) /* node i is dense or has no structural diagonal element */ { nv[i] = 0; /* absorb i into element n */ elen[i] = -1; /* node i is dead */ nel++; Cp[i] = amd_flip (n); nv[n]++; } else { if(head[d] != -1) last[head[d]] = i; next[i] = head[d]; /* put node i in degree list d */ head[d] = i; } } elen[n] = -2; /* n is a dead element */ Cp[n] = -1; /* n is a root of assembly tree */ w[n] = 0; /* n is a dead element */ while (nel < n) /* while (selecting pivots) do */ { /* --- Select node of minimum approximate degree -------------------- */ for(k = -1; mindeg < n && (k = head[mindeg]) == -1; mindeg++) {} if(next[k] != -1) last[next[k]] = -1; head[mindeg] = next[k]; /* remove k from degree list */ elenk = elen[k]; /* elenk = |Ek| */ nvk = nv[k]; /* # of nodes k represents */ nel += nvk; /* nv[k] nodes of A eliminated */ /* --- Garbage collection ------------------------------------------- */ if(elenk > 0 && cnz + mindeg >= nzmax) { for(j = 0; j < n; j++) { if((p = Cp[j]) >= 0) /* j is a live node or element */ { Cp[j] = Ci[p]; /* save first entry of object */ Ci[p] = amd_flip (j); /* first entry is now amd_flip(j) */ } } for(q = 0, p = 0; p < cnz; ) /* scan all of memory */ { if((j = amd_flip (Ci[p++])) >= 0) /* found object j */ { Ci[q] = Cp[j]; /* restore first entry of object */ Cp[j] = q++; /* new pointer to object j */ for(k3 = 0; k3 < len[j]-1; k3++) Ci[q++] = Ci[p++]; } } cnz = q; /* Ci[cnz...nzmax-1] now free */ } /* --- Construct new element ---------------------------------------- */ dk = 0; nv[k] = -nvk; /* flag k as in Lk */ p = Cp[k]; pk1 = (elenk == 0) ? p : cnz; /* do in place if elen[k] == 0 */ pk2 = pk1; for(k1 = 1; k1 <= elenk + 1; k1++) { if(k1 > elenk) { e = k; /* search the nodes in k */ pj = p; /* list of nodes starts at Ci[pj]*/ ln = len[k] - elenk; /* length of list of nodes in k */ } else { e = Ci[p++]; /* search the nodes in e */ pj = Cp[e]; ln = len[e]; /* length of list of nodes in e */ } for(k2 = 1; k2 <= ln; k2++) { i = Ci[pj++]; if((nvi = nv[i]) <= 0) continue; /* node i dead, or seen */ dk += nvi; /* degree[Lk] += size of node i */ nv[i] = -nvi; /* negate nv[i] to denote i in Lk*/ Ci[pk2++] = i; /* place i in Lk */ if(next[i] != -1) last[next[i]] = last[i]; if(last[i] != -1) /* remove i from degree list */ { next[last[i]] = next[i]; } else { head[degree[i]] = next[i]; } } if(e != k) { Cp[e] = amd_flip (k); /* absorb e into k */ w[e] = 0; /* e is now a dead element */ } } if(elenk != 0) cnz = pk2; /* Ci[cnz...nzmax] is free */ degree[k] = dk; /* external degree of k - |Lk\i| */ Cp[k] = pk1; /* element k is in Ci[pk1..pk2-1] */ len[k] = pk2 - pk1; elen[k] = -2; /* k is now an element */ /* --- Find set differences ----------------------------------------- */ mark = internal::cs_wclear<StorageIndex>(mark, lemax, w, n); /* clear w if necessary */ for(pk = pk1; pk < pk2; pk++) /* scan 1: find |Le\Lk| */ { i = Ci[pk]; if((eln = elen[i]) <= 0) continue;/* skip if elen[i] empty */ nvi = -nv[i]; /* nv[i] was negated */ wnvi = mark - nvi; for(p = Cp[i]; p <= Cp[i] + eln - 1; p++) /* scan Ei */ { e = Ci[p]; if(w[e] >= mark) { w[e] -= nvi; /* decrement |Le\Lk| */ } else if(w[e] != 0) /* ensure e is a live element */ { w[e] = degree[e] + wnvi; /* 1st time e seen in scan 1 */ } } } /* --- Degree update ------------------------------------------------ */ for(pk = pk1; pk < pk2; pk++) /* scan2: degree update */ { i = Ci[pk]; /* consider node i in Lk */ p1 = Cp[i]; p2 = p1 + elen[i] - 1; pn = p1; for(h = 0, d = 0, p = p1; p <= p2; p++) /* scan Ei */ { e = Ci[p]; if(w[e] != 0) /* e is an unabsorbed element */ { dext = w[e] - mark; /* dext = |Le\Lk| */ if(dext > 0) { d += dext; /* sum up the set differences */ Ci[pn++] = e; /* keep e in Ei */ h += e; /* compute the hash of node i */ } else { Cp[e] = amd_flip (k); /* aggressive absorb. e->k */ w[e] = 0; /* e is a dead element */ } } } elen[i] = pn - p1 + 1; /* elen[i] = |Ei| */ p3 = pn; p4 = p1 + len[i]; for(p = p2 + 1; p < p4; p++) /* prune edges in Ai */ { j = Ci[p]; if((nvj = nv[j]) <= 0) continue; /* node j dead or in Lk */ d += nvj; /* degree(i) += |j| */ Ci[pn++] = j; /* place j in node list of i */ h += j; /* compute hash for node i */ } if(d == 0) /* check for mass elimination */ { Cp[i] = amd_flip (k); /* absorb i into k */ nvi = -nv[i]; dk -= nvi; /* |Lk| -= |i| */ nvk += nvi; /* |k| += nv[i] */ nel += nvi; nv[i] = 0; elen[i] = -1; /* node i is dead */ } else { degree[i] = std::min<StorageIndex> (degree[i], d); /* update degree(i) */ Ci[pn] = Ci[p3]; /* move first node to end */ Ci[p3] = Ci[p1]; /* move 1st el. to end of Ei */ Ci[p1] = k; /* add k as 1st element in of Ei */ len[i] = pn - p1 + 1; /* new len of adj. list of node i */ h %= n; /* finalize hash of i */ next[i] = hhead[h]; /* place i in hash bucket */ hhead[h] = i; last[i] = h; /* save hash of i in last[i] */ } } /* scan2 is done */ degree[k] = dk; /* finalize |Lk| */ lemax = std::max<StorageIndex>(lemax, dk); mark = internal::cs_wclear<StorageIndex>(mark+lemax, lemax, w, n); /* clear w */ /* --- Supernode detection ------------------------------------------ */ for(pk = pk1; pk < pk2; pk++) { i = Ci[pk]; if(nv[i] >= 0) continue; /* skip if i is dead */ h = last[i]; /* scan hash bucket of node i */ i = hhead[h]; hhead[h] = -1; /* hash bucket will be empty */ for(; i != -1 && next[i] != -1; i = next[i], mark++) { ln = len[i]; eln = elen[i]; for(p = Cp[i]+1; p <= Cp[i] + ln-1; p++) w[Ci[p]] = mark; jlast = i; for(j = next[i]; j != -1; ) /* compare i with all j */ { ok = (len[j] == ln) && (elen[j] == eln); for(p = Cp[j] + 1; ok && p <= Cp[j] + ln - 1; p++) { if(w[Ci[p]] != mark) ok = 0; /* compare i and j*/ } if(ok) /* i and j are identical */ { Cp[j] = amd_flip (i); /* absorb j into i */ nv[i] += nv[j]; nv[j] = 0; elen[j] = -1; /* node j is dead */ j = next[j]; /* delete j from hash bucket */ next[jlast] = j; } else { jlast = j; /* j and i are different */ j = next[j]; } } } } /* --- Finalize new element------------------------------------------ */ for(p = pk1, pk = pk1; pk < pk2; pk++) /* finalize Lk */ { i = Ci[pk]; if((nvi = -nv[i]) <= 0) continue;/* skip if i is dead */ nv[i] = nvi; /* restore nv[i] */ d = degree[i] + dk - nvi; /* compute external degree(i) */ d = std::min<StorageIndex> (d, n - nel - nvi); if(head[d] != -1) last[head[d]] = i; next[i] = head[d]; /* put i back in degree list */ last[i] = -1; head[d] = i; mindeg = std::min<StorageIndex> (mindeg, d); /* find new minimum degree */ degree[i] = d; Ci[p++] = i; /* place i in Lk */ } nv[k] = nvk; /* # nodes absorbed into k */ if((len[k] = p-pk1) == 0) /* length of adj list of element k*/ { Cp[k] = -1; /* k is a root of the tree */ w[k] = 0; /* k is now a dead element */ } if(elenk != 0) cnz = p; /* free unused space in Lk */ } /* --- Postordering ----------------------------------------------------- */ for(i = 0; i < n; i++) Cp[i] = amd_flip (Cp[i]);/* fix assembly tree */ for(j = 0; j <= n; j++) head[j] = -1; for(j = n; j >= 0; j--) /* place unordered nodes in lists */ { if(nv[j] > 0) continue; /* skip if j is an element */ next[j] = head[Cp[j]]; /* place j in list of its parent */ head[Cp[j]] = j; } for(e = n; e >= 0; e--) /* place elements in lists */ { if(nv[e] <= 0) continue; /* skip unless e is an element */ if(Cp[e] != -1) { next[e] = head[Cp[e]]; /* place e in list of its parent */ head[Cp[e]] = e; } } for(k = 0, i = 0; i <= n; i++) /* postorder the assembly tree */ { if(Cp[i] == -1) k = internal::cs_tdfs<StorageIndex>(i, k, head, next, perm.indices().data(), w); } perm.indices().conservativeResize(n); }
void Eigen::internal::nr_etdfs | ( | typename IndexVector::Scalar | n, |
IndexVector & | parent, | ||
IndexVector & | first_kid, | ||
IndexVector & | next_kid, | ||
IndexVector & | post, | ||
typename IndexVector::Scalar | postnum | ||
) |
Depth-first search from vertex n. No recursion. This routine was contributed by Cédric Doucet, CEDRAT Group, Meylan, France.
Definition at line 130 of file SparseColEtree.h.
{ typedef typename IndexVector::Scalar StorageIndex; StorageIndex current = n, first, next; while (postnum != n) { // No kid for the current node first = first_kid(current); // no kid for the current node if (first == -1) { // Numbering this node because it has no kid post(current) = postnum++; // looking for the next kid next = next_kid(current); while (next == -1) { // No more kids : back to the parent node current = parent(current); // numbering the parent node post(current) = postnum++; // Get the next kid next = next_kid(current); } // stopping criterion if (postnum == n+1) return; // Updating current node current = next; } else { current = first; } } }
std::ostream& Eigen::internal::operator<< | ( | std::ostream & | s, |
const Packet16uc & | v | ||
) | [inline] |
Definition at line 159 of file AltiVec/PacketMath.h.
{ union { Packet16uc v; unsigned char n[16]; } vt; vt.v = v; for (int i=0; i< 16; i++) s << (int)vt.n[i] << ", "; return s; }
std::ostream& Eigen::internal::operator<< | ( | std::ostream & | s, |
const Packet4f & | v | ||
) | [inline] |
Definition at line 171 of file AltiVec/PacketMath.h.
{ union { Packet4f v; float n[4]; } vt; vt.v = v; s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; return s; }
std::ostream& Eigen::internal::operator<< | ( | std::ostream & | s, |
const Packet4i & | v | ||
) | [inline] |
Definition at line 182 of file AltiVec/PacketMath.h.
{ union { Packet4i v; int n[4]; } vt; vt.v = v; s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; return s; }
std::ostream& Eigen::internal::operator<< | ( | std::ostream & | s, |
const Packet4ui & | v | ||
) | [inline] |
Definition at line 193 of file AltiVec/PacketMath.h.
{ union { Packet4ui v; unsigned int n[4]; } vt; vt.v = v; s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; return s; }
static void Eigen::internal::order_children | ( | IndexType | n_col, |
colamd_col< IndexType > | Col[], | ||
IndexType | p[] | ||
) | [inline, static] |
Definition at line 1448 of file Ordering.h.
void Eigen::internal::ordering_helper_at_plus_a | ( | const MatrixType & | A, |
MatrixType & | symmat | ||
) |
[in] | A | the input non-symmetric matrix |
[out] | symmat | the symmetric pattern A^T+A from the input matrix A. FIXME: The values should not be considered here |
Definition at line 27 of file Ordering.h.
{ MatrixType C; C = A.transpose(); // NOTE: Could be costly for (int i = 0; i < C.rows(); i++) { for (typename MatrixType::InnerIterator it(C, i); it; ++it) it.valueRef() = 0.0; } symmat = C + A; }
EIGEN_DONT_INLINE void Eigen::internal::outer_product_selector_run | ( | Dst & | dst, |
const Lhs & | lhs, | ||
const Rhs & | rhs, | ||
const Func & | func, | ||
const false_type & | |||
) |
Definition at line 246 of file ProductEvaluators.h.
{ evaluator<Rhs> rhsEval(rhs); typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs); // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored // FIXME not very good if rhs is real and lhs complex while alpha is real too const Index cols = dst.cols(); for (Index j=0; j<cols; ++j) func(dst.col(j), rhsEval.coeff(0,j) * actual_lhs); }
EIGEN_DONT_INLINE void Eigen::internal::outer_product_selector_run | ( | Dst & | dst, |
const Lhs & | lhs, | ||
const Rhs & | rhs, | ||
const Func & | func, | ||
const true_type & | |||
) |
Definition at line 259 of file ProductEvaluators.h.
{ evaluator<Lhs> lhsEval(lhs); typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs); // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored // FIXME not very good if lhs is real and rhs complex while alpha is real too const Index rows = dst.rows(); for (Index i=0; i<rows; ++i) func(dst.row(i), lhsEval.coeff(i,0) * actual_rhs); }
EIGEN_DEVICE_FUNC Packet Eigen::internal::pabs | ( | const Packet & | a | ) | [inline] |
Definition at line 184 of file GenericPacketMath.h.
EIGEN_STRONG_INLINE Packet8f Eigen::internal::pabs | ( | const Packet8f & | a | ) |
Definition at line 334 of file AVX/PacketMath.h.
{ const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF)); return _mm256_and_ps(a,mask); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::pabs | ( | const Packet4d & | a | ) |
Definition at line 339 of file AVX/PacketMath.h.
{ const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF)); return _mm256_and_pd(a,mask); }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::pabs | ( | const Packet2d & | a | ) |
Definition at line 466 of file SSE/PacketMath.h.
{ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF)); return _mm_and_pd(a,mask); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::pabs | ( | const Packet4f & | a | ) |
Definition at line 508 of file AltiVec/PacketMath.h.
{ return vec_abs(a); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::pabs | ( | const Packet4i & | a | ) |
Definition at line 509 of file AltiVec/PacketMath.h.
{ return vec_abs(a); }
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::pacos | ( | const Packet & | a | ) |
Definition at line 392 of file GenericPacketMath.h.
EIGEN_DEVICE_FUNC Packet Eigen::internal::padd | ( | const Packet & | a, |
const Packet & | b | ||
) | [inline] |
DoublePacket<Packet> Eigen::internal::padd | ( | const DoublePacket< Packet > & | a, |
const DoublePacket< Packet > & | b | ||
) |
Definition at line 589 of file GeneralBlockPanelKernel.h.
EIGEN_STRONG_INLINE Packet1cd Eigen::internal::padd< Packet1cd > | ( | const Packet1cd & | a, |
const Packet1cd & | b | ||
) |
Definition at line 301 of file SSE/Complex.h.
{ return Packet1cd(_mm_add_pd(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet2cd Eigen::internal::padd< Packet2cd > | ( | const Packet2cd & | a, |
const Packet2cd & | b | ||
) |
Definition at line 272 of file AVX/Complex.h.
{ return Packet2cd(_mm256_add_pd(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::padd< Packet2cf > | ( | const Packet2cf & | a, |
const Packet2cf & | b | ||
) |
Definition at line 86 of file AltiVec/Complex.h.
{ return Packet2cf(vec_add(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::padd< Packet2d > | ( | const Packet2d & | a, |
const Packet2d & | b | ||
) |
Definition at line 194 of file SSE/PacketMath.h.
{ return _mm_add_pd(a,b); }
EIGEN_STRONG_INLINE Packet4cf Eigen::internal::padd< Packet4cf > | ( | const Packet4cf & | a, |
const Packet4cf & | b | ||
) |
Definition at line 50 of file AVX/Complex.h.
{ return Packet4cf(_mm256_add_ps(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::padd< Packet4d > | ( | const Packet4d & | a, |
const Packet4d & | b | ||
) |
Definition at line 125 of file AVX/PacketMath.h.
{ return _mm256_add_pd(a,b); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::padd< Packet4f > | ( | const Packet4f & | a, |
const Packet4f & | b | ||
) |
Definition at line 300 of file AltiVec/PacketMath.h.
{ return vec_add(a,b); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::padd< Packet4i > | ( | const Packet4i & | a, |
const Packet4i & | b | ||
) |
Definition at line 301 of file AltiVec/PacketMath.h.
{ return vec_add(a,b); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::padd< Packet8f > | ( | const Packet8f & | a, |
const Packet8f & | b | ||
) |
Definition at line 124 of file AVX/PacketMath.h.
{ return _mm256_add_ps(a,b); }
void Eigen::internal::palign | ( | PacketType & | first, |
const PacketType & | second | ||
) | [inline] |
update first using the concatenation of the packet_size minus Offset last elements of first and Offset first elements of second.
This function is currently only used to optimize matrix-vector products on unligned matrices. It takes 2 packets that represent a contiguous memory array, and returns a packet starting at the position Offset. For instance, for packets of 4 elements, we have: Input:
Definition at line 536 of file GenericPacketMath.h.
{ palign_impl<Offset,PacketType>::run(first,second); }
EIGEN_DEVICE_FUNC Packet Eigen::internal::pand | ( | const Packet & | a, |
const Packet & | b | ||
) | [inline] |
Definition at line 192 of file GenericPacketMath.h.
{ return a & b; }
EIGEN_STRONG_INLINE Packet1cd Eigen::internal::pand< Packet1cd > | ( | const Packet1cd & | a, |
const Packet1cd & | b | ||
) |
Definition at line 324 of file SSE/Complex.h.
{ return Packet1cd(_mm_and_pd(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet2cd Eigen::internal::pand< Packet2cd > | ( | const Packet2cd & | a, |
const Packet2cd & | b | ||
) |
Definition at line 291 of file AVX/Complex.h.
{ return Packet2cd(_mm256_and_pd(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::pand< Packet2cf > | ( | const Packet2cf & | a, |
const Packet2cf & | b | ||
) |
Definition at line 110 of file AltiVec/Complex.h.
{ return Packet2cf(vec_and(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::pand< Packet2d > | ( | const Packet2d & | a, |
const Packet2d & | b | ||
) |
Definition at line 286 of file SSE/PacketMath.h.
{ return _mm_and_pd(a,b); }
EIGEN_STRONG_INLINE Packet4cf Eigen::internal::pand< Packet4cf > | ( | const Packet4cf & | a, |
const Packet4cf & | b | ||
) |
Definition at line 70 of file AVX/Complex.h.
{ return Packet4cf(_mm256_and_ps(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::pand< Packet4d > | ( | const Packet4d & | a, |
const Packet4d & | b | ||
) |
Definition at line 196 of file AVX/PacketMath.h.
{ return _mm256_and_pd(a,b); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::pand< Packet4f > | ( | const Packet4f & | a, |
const Packet4f & | b | ||
) |
Definition at line 382 of file AltiVec/PacketMath.h.
{ return vec_and(a, b); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::pand< Packet4i > | ( | const Packet4i & | a, |
const Packet4i & | b | ||
) |
Definition at line 383 of file AltiVec/PacketMath.h.
{ return vec_and(a, b); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::pand< Packet8f > | ( | const Packet8f & | a, |
const Packet8f & | b | ||
) |
Definition at line 195 of file AVX/PacketMath.h.
{ return _mm256_and_ps(a,b); }
EIGEN_DEVICE_FUNC Packet Eigen::internal::pandnot | ( | const Packet & | a, |
const Packet & | b | ||
) | [inline] |
Definition at line 204 of file GenericPacketMath.h.
{ return a & (!b); }
EIGEN_STRONG_INLINE Packet1cd Eigen::internal::pandnot< Packet1cd > | ( | const Packet1cd & | a, |
const Packet1cd & | b | ||
) |
Definition at line 327 of file SSE/Complex.h.
{ return Packet1cd(_mm_andnot_pd(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet2cd Eigen::internal::pandnot< Packet2cd > | ( | const Packet2cd & | a, |
const Packet2cd & | b | ||
) |
Definition at line 294 of file AVX/Complex.h.
{ return Packet2cd(_mm256_andnot_pd(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::pandnot< Packet2cf > | ( | const Packet2cf & | a, |
const Packet2cf & | b | ||
) |
Definition at line 113 of file AltiVec/Complex.h.
{ return Packet2cf(vec_and(a.v, vec_nor(b.v,b.v))); }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::pandnot< Packet2d > | ( | const Packet2d & | a, |
const Packet2d & | b | ||
) |
Definition at line 298 of file SSE/PacketMath.h.
{ return _mm_andnot_pd(a,b); }
EIGEN_STRONG_INLINE Packet4cf Eigen::internal::pandnot< Packet4cf > | ( | const Packet4cf & | a, |
const Packet4cf & | b | ||
) |
Definition at line 73 of file AVX/Complex.h.
{ return Packet4cf(_mm256_andnot_ps(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::pandnot< Packet4d > | ( | const Packet4d & | a, |
const Packet4d & | b | ||
) |
Definition at line 205 of file AVX/PacketMath.h.
{ return _mm256_andnot_pd(a,b); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::pandnot< Packet4f > | ( | const Packet4f & | a, |
const Packet4f & | b | ||
) |
Definition at line 391 of file AltiVec/PacketMath.h.
{ return vec_and(a, vec_nor(b, b)); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::pandnot< Packet4i > | ( | const Packet4i & | a, |
const Packet4i & | b | ||
) |
Definition at line 392 of file AltiVec/PacketMath.h.
{ return vec_and(a, vec_nor(b, b)); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::pandnot< Packet8f > | ( | const Packet8f & | a, |
const Packet8f & | b | ||
) |
Definition at line 204 of file AVX/PacketMath.h.
{ return _mm256_andnot_ps(a,b); }
void Eigen::internal::parallelize_gemm | ( | const Functor & | func, |
Index | rows, | ||
Index | cols, | ||
bool | transpose | ||
) |
Definition at line 86 of file Parallelizer.h.
{ // TODO when EIGEN_USE_BLAS is defined, // we should still enable OMP for other scalar types #if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS) // FIXME the transpose variable is only needed to properly split // the matrix product when multithreading is enabled. This is a temporary // fix to support row-major destination matrices. This whole // parallelizer mechanism has to be redisigned anyway. EIGEN_UNUSED_VARIABLE(transpose); func(0,rows, 0,cols); #else // Dynamically check whether we should enable or disable OpenMP. // The conditions are: // - the max number of threads we can create is greater than 1 // - we are not already in a parallel code // - the sizes are large enough // compute the maximal number of threads from the size of the product: // FIXME this has to be fine tuned Index size = transpose ? rows : cols; Index pb_max_threads = std::max<Index>(1,size / 32); // compute the number of threads we are going to use Index threads = std::min<Index>(nbThreads(), pb_max_threads); // if multi-threading is explicitely disabled, not useful, or if we already are in a parallel session, // then abort multi-threading // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp? if((!Condition) || (threads==1) || (omp_get_num_threads()>1)) return func(0,rows, 0,cols); Eigen::initParallel(); func.initParallelSession(threads); if(transpose) std::swap(rows,cols); ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0); #pragma omp parallel num_threads(threads) { Index i = omp_get_thread_num(); // Note that the actual number of threads might be lower than the number of request ones. Index actual_threads = omp_get_num_threads(); Index blockCols = (cols / actual_threads) & ~Index(0x3); Index blockRows = (rows / actual_threads); blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr; Index r0 = i*blockRows; Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows; Index c0 = i*blockCols; Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols; info[i].lhs_start = r0; info[i].lhs_length = actualBlockRows; if(transpose) func(c0, actualBlockCols, 0, rows, info); else func(0, rows, c0, actualBlockCols, info); } #endif }
EIGEN_DEVICE_FUNC Packet Eigen::internal::parg | ( | const Packet & | a | ) | [inline] |
Definition at line 188 of file GenericPacketMath.h.
{ using numext::arg; return arg(a); }
void Eigen::internal::partial_lu_inplace | ( | MatrixType & | lu, |
TranspositionType & | row_transpositions, | ||
typename TranspositionType::StorageIndex & | nb_transpositions | ||
) |
performs the LU decomposition with partial pivoting in-place.
Definition at line 448 of file PartialPivLU.h.
{ eigen_assert(lu.cols() == row_transpositions.size()); eigen_assert((&row_transpositions.coeffRef(1)-&row_transpositions.coeffRef(0)) == 1); partial_lu_impl <typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor, typename TranspositionType::StorageIndex> ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions); }
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::pasin | ( | const Packet & | a | ) |
Definition at line 388 of file GenericPacketMath.h.
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::patan | ( | const Packet & | a | ) |
Definition at line 396 of file GenericPacketMath.h.
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::pblend | ( | const Selector< 2 > & | ifPacket, |
const Packet2cf & | thenPacket, | ||
const Packet2cf & | elsePacket | ||
) |
Definition at line 474 of file SSE/Complex.h.
{
__m128d result = pblend<Packet2d>(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
return Packet2cf(_mm_castpd_ps(result));
}
EIGEN_DEVICE_FUNC Packet Eigen::internal::pblend | ( | const Selector< unpacket_traits< Packet >::size > & | ifPacket, |
const Packet & | thenPacket, | ||
const Packet & | elsePacket | ||
) | [inline] |
Definition at line 579 of file GenericPacketMath.h.
{
return ifPacket.select[0] ? thenPacket : elsePacket;
}
EIGEN_STRONG_INLINE Packet8f Eigen::internal::pblend | ( | const Selector< 8 > & | ifPacket, |
const Packet8f & | thenPacket, | ||
const Packet8f & | elsePacket | ||
) |
Definition at line 591 of file AVX/PacketMath.h.
{ const __m256 zero = _mm256_setzero_ps(); const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); __m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ); return _mm256_blendv_ps(thenPacket, elsePacket, false_mask); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::pblend | ( | const Selector< 4 > & | ifPacket, |
const Packet4d & | thenPacket, | ||
const Packet4d & | elsePacket | ||
) |
Definition at line 597 of file AVX/PacketMath.h.
{ const __m256d zero = _mm256_setzero_pd(); const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); __m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ); return _mm256_blendv_pd(thenPacket, elsePacket, false_mask); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::pblend | ( | const Selector< 4 > & | ifPacket, |
const Packet4i & | thenPacket, | ||
const Packet4i & | elsePacket | ||
) |
Definition at line 809 of file SSE/PacketMath.h.
{ const __m128i zero = _mm_setzero_si128(); const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); __m128i false_mask = _mm_cmpeq_epi32(select, zero); #ifdef EIGEN_VECTORIZE_SSE4_1 return _mm_blendv_epi8(thenPacket, elsePacket, false_mask); #else return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket)); #endif }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::pblend | ( | const Selector< 4 > & | ifPacket, |
const Packet4f & | thenPacket, | ||
const Packet4f & | elsePacket | ||
) |
Definition at line 819 of file SSE/PacketMath.h.
{ const __m128 zero = _mm_setzero_ps(); const __m128 select = _mm_set_ps(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); __m128 false_mask = _mm_cmpeq_ps(select, zero); #ifdef EIGEN_VECTORIZE_SSE4_1 return _mm_blendv_ps(thenPacket, elsePacket, false_mask); #else return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket)); #endif }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::pblend | ( | const Selector< 2 > & | ifPacket, |
const Packet2d & | thenPacket, | ||
const Packet2d & | elsePacket | ||
) |
Definition at line 829 of file SSE/PacketMath.h.
{ const __m128d zero = _mm_setzero_pd(); const __m128d select = _mm_set_pd(ifPacket.select[1], ifPacket.select[0]); __m128d false_mask = _mm_cmpeq_pd(select, zero); #ifdef EIGEN_VECTORIZE_SSE4_1 return _mm_blendv_pd(thenPacket, elsePacket, false_mask); #else return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket)); #endif }
EIGEN_DEVICE_FUNC void Eigen::internal::pbroadcast2 | ( | const typename unpacket_traits< Packet >::type * | a, |
Packet & | a0, | ||
Packet & | a1 | ||
) | [inline] |
equivalent to
Definition at line 267 of file GenericPacketMath.h.
{ a0 = pload1<Packet>(a+0); a1 = pload1<Packet>(a+1); }
EIGEN_DEVICE_FUNC void Eigen::internal::pbroadcast4 | ( | const typename unpacket_traits< Packet >::type * | a, |
Packet & | a0, | ||
Packet & | a1, | ||
Packet & | a2, | ||
Packet & | a3 | ||
) | [inline] |
equivalent to
Definition at line 250 of file GenericPacketMath.h.
{ a0 = pload1<Packet>(a+0); a1 = pload1<Packet>(a+1); a2 = pload1<Packet>(a+2); a3 = pload1<Packet>(a+3); }
EIGEN_STRONG_INLINE void Eigen::internal::pbroadcast4< Packet2d > | ( | const double * | a, |
Packet2d & | a0, | ||
Packet2d & | a1, | ||
Packet2d & | a2, | ||
Packet2d & | a3 | ||
) |
Definition at line 494 of file SSE/PacketMath.h.
{ #ifdef EIGEN_VECTORIZE_SSE3 a0 = _mm_loaddup_pd(a+0); a1 = _mm_loaddup_pd(a+1); a2 = _mm_loaddup_pd(a+2); a3 = _mm_loaddup_pd(a+3); #else a1 = pload<Packet2d>(a); a0 = vec2d_swizzle1(a1, 0,0); a1 = vec2d_swizzle1(a1, 1,1); a3 = pload<Packet2d>(a+2); a2 = vec2d_swizzle1(a3, 0,0); a3 = vec2d_swizzle1(a3, 1,1); #endif }
EIGEN_STRONG_INLINE void Eigen::internal::pbroadcast4< Packet4f > | ( | const float * | a, |
Packet4f & | a0, | ||
Packet4f & | a1, | ||
Packet4f & | a2, | ||
Packet4f & | a3 | ||
) |
Definition at line 240 of file AltiVec/PacketMath.h.
{ a3 = pload<Packet4f>(a); a0 = vec_splat(a3, 0); a1 = vec_splat(a3, 1); a2 = vec_splat(a3, 2); a3 = vec_splat(a3, 3); }
EIGEN_STRONG_INLINE void Eigen::internal::pbroadcast4< Packet4i > | ( | const int * | a, |
Packet4i & | a0, | ||
Packet4i & | a1, | ||
Packet4i & | a2, | ||
Packet4i & | a3 | ||
) |
Definition at line 250 of file AltiVec/PacketMath.h.
{ a3 = pload<Packet4i>(a); a0 = vec_splat(a3, 0); a1 = vec_splat(a3, 1); a2 = vec_splat(a3, 2); a3 = vec_splat(a3, 3); }
EIGEN_DEVICE_FUNC TgtPacket Eigen::internal::pcast | ( | const SrcPacket & | a | ) | [inline] |
Definition at line 128 of file GenericPacketMath.h.
{ return static_cast<TgtPacket>(a); }
EIGEN_DEVICE_FUNC TgtPacket Eigen::internal::pcast | ( | const SrcPacket & | a, |
const SrcPacket & | |||
) | [inline] |
Definition at line 133 of file GenericPacketMath.h.
{ return static_cast<TgtPacket>(a); }
EIGEN_DEVICE_FUNC TgtPacket Eigen::internal::pcast | ( | const SrcPacket & | a, |
const SrcPacket & | , | ||
const SrcPacket & | , | ||
const SrcPacket & | |||
) | [inline] |
Definition at line 139 of file GenericPacketMath.h.
{ return static_cast<TgtPacket>(a); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::pcast< Packet2d, Packet4f > | ( | const Packet2d & | a, |
const Packet2d & | b | ||
) |
Definition at line 54 of file SSE/TypeCasting.h.
{
return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
}
EIGEN_STRONG_INLINE Packet2d Eigen::internal::pcast< Packet4f, Packet2d > | ( | const Packet4f & | a | ) |
Definition at line 67 of file SSE/TypeCasting.h.
{ // Simply discard the second half of the input return _mm_cvtps_pd(a); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::pcast< Packet4f, Packet4i > | ( | const Packet4f & | a | ) |
Definition at line 26 of file SSE/TypeCasting.h.
{
return _mm_cvttps_epi32(a);
}
EIGEN_STRONG_INLINE Packet4f Eigen::internal::pcast< Packet4i, Packet4f > | ( | const Packet4i & | a | ) |
Definition at line 40 of file SSE/TypeCasting.h.
{
return _mm_cvtepi32_ps(a);
}
EIGEN_STRONG_INLINE Packet8i Eigen::internal::pcast< Packet8f, Packet8i > | ( | const Packet8f & | a | ) |
Definition at line 39 of file AVX/TypeCasting.h.
{
return _mm256_cvtps_epi32(a);
}
EIGEN_STRONG_INLINE Packet8f Eigen::internal::pcast< Packet8i, Packet8f > | ( | const Packet8i & | a | ) |
Definition at line 43 of file AVX/TypeCasting.h.
{
return _mm256_cvtepi32_ps(a);
}
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::pceil | ( | const Packet & | a | ) |
Definition at line 442 of file GenericPacketMath.h.
{ using numext::ceil; return ceil(a); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::pceil< Packet4d > | ( | const Packet4d & | a | ) |
Definition at line 190 of file AVX/PacketMath.h.
{ return _mm256_ceil_pd(a); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::pceil< Packet8f > | ( | const Packet8f & | a | ) |
Definition at line 189 of file AVX/PacketMath.h.
{ return _mm256_ceil_ps(a); }
EIGEN_STRONG_INLINE Packet4cf Eigen::internal::pconj | ( | const Packet4cf & | a | ) |
Definition at line 56 of file AVX/Complex.h.
{ const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000)); return Packet4cf(_mm256_xor_ps(a.v,mask)); }
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::pconj | ( | const Packet2cf & | a | ) |
Definition at line 89 of file AltiVec/Complex.h.
{ return Packet2cf((Packet4f)vec_xor((Packet4ui)a.v, p4ui_CONJ_XOR)); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::pconj | ( | const Packet8f & | a | ) |
Definition at line 139 of file AVX/PacketMath.h.
{ return a; }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::pconj | ( | const Packet4d & | a | ) |
Definition at line 140 of file AVX/PacketMath.h.
{ return a; }
EIGEN_STRONG_INLINE Packet8i Eigen::internal::pconj | ( | const Packet8i & | a | ) |
Definition at line 141 of file AVX/PacketMath.h.
{ return a; }
EIGEN_DEVICE_FUNC Packet Eigen::internal::pconj | ( | const Packet & | a | ) | [inline] |
Definition at line 160 of file GenericPacketMath.h.
{ return numext::conj(a); }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::pconj | ( | const Packet2d & | a | ) |
Definition at line 217 of file SSE/PacketMath.h.
{ return a; }
EIGEN_STRONG_INLINE Packet2cd Eigen::internal::pconj | ( | const Packet2cd & | a | ) |
Definition at line 275 of file AVX/Complex.h.
{ const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000,0x0,0x0,0x0,0x80000000,0x0,0x0,0x0)); return Packet2cd(_mm256_xor_pd(a.v,mask)); }
EIGEN_STRONG_INLINE Packet1cd Eigen::internal::pconj | ( | const Packet1cd & | a | ) |
Definition at line 304 of file SSE/Complex.h.
{ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0)); return Packet1cd(_mm_xor_pd(a.v,mask)); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::pconj | ( | const Packet4f & | a | ) |
Definition at line 309 of file AltiVec/PacketMath.h.
{ return a; }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::pconj | ( | const Packet4i & | a | ) |
Definition at line 310 of file AltiVec/PacketMath.h.
{ return a; }
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::pcos | ( | const Packet & | a | ) |
Definition at line 380 of file GenericPacketMath.h.
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f Eigen::internal::pcos< Packet4f > | ( | const Packet4f & | _x | ) |
Definition at line 359 of file arch/SSE/MathFunctions.h.
{ Packet4f x = _x; _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f); _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f); _EIGEN_DECLARE_CONST_Packet4i(1, 1); _EIGEN_DECLARE_CONST_Packet4i(not1, ~1); _EIGEN_DECLARE_CONST_Packet4i(2, 2); _EIGEN_DECLARE_CONST_Packet4i(4, 4); _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f); _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f); _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f); _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f); _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f); _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f); _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f); _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f); _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f); _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI Packet4f xmm1, xmm2, xmm3, y; Packet4i emm0, emm2; x = pabs(x); /* scale by 4/Pi */ y = pmul(x, p4f_cephes_FOPI); /* get the integer part of y */ emm2 = _mm_cvttps_epi32(y); /* j=(j+1) & (~1) (see the cephes sources) */ emm2 = _mm_add_epi32(emm2, p4i_1); emm2 = _mm_and_si128(emm2, p4i_not1); y = _mm_cvtepi32_ps(emm2); emm2 = _mm_sub_epi32(emm2, p4i_2); /* get the swap sign flag */ emm0 = _mm_andnot_si128(emm2, p4i_4); emm0 = _mm_slli_epi32(emm0, 29); /* get the polynom selection mask */ emm2 = _mm_and_si128(emm2, p4i_2); emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128()); Packet4f sign_bit = _mm_castsi128_ps(emm0); Packet4f poly_mask = _mm_castsi128_ps(emm2); /* The magic pass: "Extended precision modular arithmetic" x = ((x - y * DP1) - y * DP2) - y * DP3; */ xmm1 = pmul(y, p4f_minus_cephes_DP1); xmm2 = pmul(y, p4f_minus_cephes_DP2); xmm3 = pmul(y, p4f_minus_cephes_DP3); x = padd(x, xmm1); x = padd(x, xmm2); x = padd(x, xmm3); /* Evaluate the first polynom (0 <= x <= Pi/4) */ y = p4f_coscof_p0; Packet4f z = pmul(x,x); y = pmadd(y,z,p4f_coscof_p1); y = pmadd(y,z,p4f_coscof_p2); y = pmul(y, z); y = pmul(y, z); Packet4f tmp = _mm_mul_ps(z, p4f_half); y = psub(y, tmp); y = padd(y, p4f_1); /* Evaluate the second polynom (Pi/4 <= x <= 0) */ Packet4f y2 = p4f_sincof_p0; y2 = pmadd(y2, z, p4f_sincof_p1); y2 = pmadd(y2, z, p4f_sincof_p2); y2 = pmul(y2, z); y2 = pmadd(y2, x, x); /* select the correct result from the two polynoms */ y2 = _mm_and_ps(poly_mask, y2); y = _mm_andnot_ps(poly_mask, y); y = _mm_or_ps(y,y2); /* update the sign */ return _mm_xor_ps(y, sign_bit); }
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::pcosh | ( | const Packet & | a | ) |
Definition at line 404 of file GenericPacketMath.h.
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::pcplxflip | ( | const Packet2cf & | x | ) |
Definition at line 258 of file SSE/Complex.h.
{ return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2)); }
EIGEN_DEVICE_FUNC Packet Eigen::internal::pcplxflip | ( | const Packet & | a | ) | [inline] |
Definition at line 362 of file GenericPacketMath.h.
EIGEN_STRONG_INLINE Packet1cd Eigen::internal::pcplxflip | ( | const Packet1cd & | x | ) |
Definition at line 459 of file SSE/Complex.h.
EIGEN_STRONG_INLINE Packet2cd Eigen::internal::pcplxflip< Packet2cd > | ( | const Packet2cd & | x | ) |
Definition at line 429 of file AVX/Complex.h.
{
return Packet2cd(_mm256_shuffle_pd(x.v, x.v, 0x5));
}
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::pcplxflip< Packet2cf > | ( | const Packet2cf & | x | ) |
Definition at line 234 of file AltiVec/Complex.h.
{ return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV)); }
EIGEN_STRONG_INLINE Packet4cf Eigen::internal::pcplxflip< Packet4cf > | ( | const Packet4cf & | x | ) |
Definition at line 234 of file AVX/Complex.h.
{
return Packet4cf(_mm256_shuffle_ps(x.v, x.v, _MM_SHUFFLE(2, 3, 0 ,1)));
}
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::pdigamma | ( | const Packet & | a | ) |
Definition at line 450 of file GenericPacketMath.h.
{ using numext::digamma; return digamma(a); }
EIGEN_DEVICE_FUNC Packet Eigen::internal::pdiv | ( | const Packet & | a, |
const Packet & | b | ||
) | [inline] |
EIGEN_STRONG_INLINE Packet1cd Eigen::internal::pdiv< Packet1cd > | ( | const Packet1cd & | a, |
const Packet1cd & | b | ||
) |
Definition at line 451 of file SSE/Complex.h.
{ // TODO optimize it for SSE3 and 4 Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b); __m128d s = _mm_mul_pd(b.v,b.v); return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1)))); }
EIGEN_STRONG_INLINE Packet2cd Eigen::internal::pdiv< Packet2cd > | ( | const Packet2cd & | a, |
const Packet2cd & | b | ||
) |
Definition at line 421 of file AVX/Complex.h.
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::pdiv< Packet2cf > | ( | const Packet2cf & | a, |
const Packet2cf & | b | ||
) |
Definition at line 226 of file AltiVec/Complex.h.
{ // TODO optimize it for AltiVec Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b); Packet4f s = vec_madd(b.v, b.v, p4f_ZERO); return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX32_REV)))); }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::pdiv< Packet2d > | ( | const Packet2d & | a, |
const Packet2d & | b | ||
) |
Definition at line 239 of file SSE/PacketMath.h.
{ return _mm_div_pd(a,b); }
EIGEN_STRONG_INLINE Packet4cf Eigen::internal::pdiv< Packet4cf > | ( | const Packet4cf & | a, |
const Packet4cf & | b | ||
) |
Definition at line 225 of file AVX/Complex.h.
EIGEN_STRONG_INLINE Packet4d Eigen::internal::pdiv< Packet4d > | ( | const Packet4d & | a, |
const Packet4d & | b | ||
) |
Definition at line 148 of file AVX/PacketMath.h.
{ return _mm256_div_pd(a,b); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::pdiv< Packet4f > | ( | const Packet4f & | a, |
const Packet4f & | b | ||
) |
Definition at line 349 of file AltiVec/PacketMath.h.
{ #ifndef __VSX__ // VSX actually provides a div instruction Packet4f t, y_0, y_1; // Altivec does not offer a divide instruction, we have to do a reciprocal approximation y_0 = vec_re(b); // Do one Newton-Raphson iteration to get the needed accuracy t = vec_nmsub(y_0, b, p4f_ONE); y_1 = vec_madd(y_0, t, y_0); return vec_madd(a, y_1, p4f_ZERO); #else return vec_div(a, b); #endif }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::pdiv< Packet4i > | ( | const Packet4i & | , |
const Packet4i & | |||
) |
Definition at line 367 of file AltiVec/PacketMath.h.
{ eigen_assert(false && "packet integer division are not supported by AltiVec"); return pset1<Packet4i>(0); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::pdiv< Packet8f > | ( | const Packet8f & | a, |
const Packet8f & | b | ||
) |
Definition at line 147 of file AVX/PacketMath.h.
{ return _mm256_div_ps(a,b); }
EIGEN_STRONG_INLINE Packet8i Eigen::internal::pdiv< Packet8i > | ( | const Packet8i & | , |
const Packet8i & | |||
) |
Definition at line 149 of file AVX/PacketMath.h.
{ eigen_assert(false && "packet integer division are not supported by AVX"); return pset1<Packet8i>(0); }
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::perf | ( | const Packet & | a | ) |
Definition at line 454 of file GenericPacketMath.h.
{ using numext::erf; return erf(a); }
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::perfc | ( | const Packet & | a | ) |
Definition at line 458 of file GenericPacketMath.h.
{ using numext::erfc; return erfc(a); }
void Eigen::internal::permute_symm_to_fullsymm | ( | const MatrixType & | mat, |
SparseMatrix< typename MatrixType::Scalar, DestOrder, typename MatrixType::StorageIndex > & | _dest, | ||
const typename MatrixType::StorageIndex * | perm = 0 |
||
) |
Definition at line 384 of file SparseSelfAdjointView.h.
{ typedef typename MatrixType::StorageIndex StorageIndex; typedef typename MatrixType::Scalar Scalar; typedef SparseMatrix<Scalar,DestOrder,StorageIndex> Dest; typedef Matrix<StorageIndex,Dynamic,1> VectorI; typedef evaluator<MatrixType> MatEval; typedef typename evaluator<MatrixType>::InnerIterator MatIterator; MatEval matEval(mat); Dest& dest(_dest.derived()); enum { StorageOrderMatch = int(Dest::IsRowMajor) == int(MatrixType::IsRowMajor) }; Index size = mat.rows(); VectorI count; count.resize(size); count.setZero(); dest.resize(size,size); for(Index j = 0; j<size; ++j) { Index jp = perm ? perm[j] : j; for(MatIterator it(matEval,j); it; ++it) { Index i = it.index(); Index r = it.row(); Index c = it.col(); Index ip = perm ? perm[i] : i; if(Mode==(Upper|Lower)) count[StorageOrderMatch ? jp : ip]++; else if(r==c) count[ip]++; else if(( Mode==Lower && r>c) || ( Mode==Upper && r<c)) { count[ip]++; count[jp]++; } } } Index nnz = count.sum(); // reserve space dest.resizeNonZeros(nnz); dest.outerIndexPtr()[0] = 0; for(Index j=0; j<size; ++j) dest.outerIndexPtr()[j+1] = dest.outerIndexPtr()[j] + count[j]; for(Index j=0; j<size; ++j) count[j] = dest.outerIndexPtr()[j]; // copy data for(StorageIndex j = 0; j<size; ++j) { for(MatIterator it(matEval,j); it; ++it) { StorageIndex i = internal::convert_index<StorageIndex>(it.index()); Index r = it.row(); Index c = it.col(); StorageIndex jp = perm ? perm[j] : j; StorageIndex ip = perm ? perm[i] : i; if(Mode==(Upper|Lower)) { Index k = count[StorageOrderMatch ? jp : ip]++; dest.innerIndexPtr()[k] = StorageOrderMatch ? ip : jp; dest.valuePtr()[k] = it.value(); } else if(r==c) { Index k = count[ip]++; dest.innerIndexPtr()[k] = ip; dest.valuePtr()[k] = it.value(); } else if(( (Mode&Lower)==Lower && r>c) || ( (Mode&Upper)==Upper && r<c)) { if(!StorageOrderMatch) std::swap(ip,jp); Index k = count[jp]++; dest.innerIndexPtr()[k] = ip; dest.valuePtr()[k] = it.value(); k = count[ip]++; dest.innerIndexPtr()[k] = jp; dest.valuePtr()[k] = numext::conj(it.value()); } } } }
void Eigen::internal::permute_symm_to_symm | ( | const MatrixType & | mat, |
SparseMatrix< typename MatrixType::Scalar, DestOrder, typename MatrixType::StorageIndex > & | _dest, | ||
const typename MatrixType::StorageIndex * | perm = 0 |
||
) |
void Eigen::internal::permute_symm_to_symm | ( | const MatrixType & | mat, |
SparseMatrix< typename MatrixType::Scalar, DstOrder, typename MatrixType::StorageIndex > & | _dest, | ||
const typename MatrixType::StorageIndex * | perm | ||
) |
Definition at line 474 of file SparseSelfAdjointView.h.
{ typedef typename MatrixType::StorageIndex StorageIndex; typedef typename MatrixType::Scalar Scalar; SparseMatrix<Scalar,DstOrder,StorageIndex>& dest(_dest.derived()); typedef Matrix<StorageIndex,Dynamic,1> VectorI; typedef evaluator<MatrixType> MatEval; typedef typename evaluator<MatrixType>::InnerIterator MatIterator; enum { SrcOrder = MatrixType::IsRowMajor ? RowMajor : ColMajor, StorageOrderMatch = int(SrcOrder) == int(DstOrder), DstMode = DstOrder==RowMajor ? (_DstMode==Upper ? Lower : Upper) : _DstMode, SrcMode = SrcOrder==RowMajor ? (_SrcMode==Upper ? Lower : Upper) : _SrcMode }; MatEval matEval(mat); Index size = mat.rows(); VectorI count(size); count.setZero(); dest.resize(size,size); for(StorageIndex j = 0; j<size; ++j) { StorageIndex jp = perm ? perm[j] : j; for(MatIterator it(matEval,j); it; ++it) { StorageIndex i = it.index(); if((int(SrcMode)==int(Lower) && i<j) || (int(SrcMode)==int(Upper) && i>j)) continue; StorageIndex ip = perm ? perm[i] : i; count[int(DstMode)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++; } } dest.outerIndexPtr()[0] = 0; for(Index j=0; j<size; ++j) dest.outerIndexPtr()[j+1] = dest.outerIndexPtr()[j] + count[j]; dest.resizeNonZeros(dest.outerIndexPtr()[size]); for(Index j=0; j<size; ++j) count[j] = dest.outerIndexPtr()[j]; for(StorageIndex j = 0; j<size; ++j) { for(MatIterator it(matEval,j); it; ++it) { StorageIndex i = it.index(); if((int(SrcMode)==int(Lower) && i<j) || (int(SrcMode)==int(Upper) && i>j)) continue; StorageIndex jp = perm ? perm[j] : j; StorageIndex ip = perm? perm[i] : i; Index k = count[int(DstMode)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++; dest.innerIndexPtr()[k] = int(DstMode)==int(Lower) ? (std::max)(ip,jp) : (std::min)(ip,jp); if(!StorageOrderMatch) std::swap(ip,jp); if( ((int(DstMode)==int(Lower) && ip<jp) || (int(DstMode)==int(Upper) && ip>jp))) dest.valuePtr()[k] = numext::conj(it.value()); else dest.valuePtr()[k] = it.value(); } } }
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::pexp | ( | const Packet & | a | ) |
Definition at line 412 of file GenericPacketMath.h.
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d Eigen::internal::pexp< Packet2d > | ( | const Packet2d & | _x | ) |
Definition at line 172 of file arch/SSE/MathFunctions.h.
{ Packet2d x = _x; _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0); _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0); _EIGEN_DECLARE_CONST_Packet2d(half, 0.5); _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437); _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303); _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599); _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4); _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2); _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1); _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6); _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3); _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1); _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0); _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125); _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6); static const __m128i p4i_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0); Packet2d tmp, fx; Packet4i emm0; // clamp x x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo); /* express exp(x) as exp(g + n*log(2)) */ fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half); #ifdef EIGEN_VECTORIZE_SSE4_1 fx = _mm_floor_pd(fx); #else emm0 = _mm_cvttpd_epi32(fx); tmp = _mm_cvtepi32_pd(emm0); /* if greater, substract 1 */ Packet2d mask = _mm_cmpgt_pd(tmp, fx); mask = _mm_and_pd(mask, p2d_1); fx = psub(tmp, mask); #endif tmp = pmul(fx, p2d_cephes_exp_C1); Packet2d z = pmul(fx, p2d_cephes_exp_C2); x = psub(x, tmp); x = psub(x, z); Packet2d x2 = pmul(x,x); Packet2d px = p2d_cephes_exp_p0; px = pmadd(px, x2, p2d_cephes_exp_p1); px = pmadd(px, x2, p2d_cephes_exp_p2); px = pmul (px, x); Packet2d qx = p2d_cephes_exp_q0; qx = pmadd(qx, x2, p2d_cephes_exp_q1); qx = pmadd(qx, x2, p2d_cephes_exp_q2); qx = pmadd(qx, x2, p2d_cephes_exp_q3); x = pdiv(px,psub(qx,px)); x = pmadd(p2d_2,x,p2d_1); // build 2^n emm0 = _mm_cvttpd_epi32(fx); emm0 = _mm_add_epi32(emm0, p4i_1023_0); emm0 = _mm_slli_epi32(emm0, 20); emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3)); return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x); }
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d Eigen::internal::pexp< Packet4d > | ( | const Packet4d & | _x | ) |
Definition at line 319 of file arch/AVX/MathFunctions.h.
{ Packet4d x = _x; _EIGEN_DECLARE_CONST_Packet4d(1, 1.0); _EIGEN_DECLARE_CONST_Packet4d(2, 2.0); _EIGEN_DECLARE_CONST_Packet4d(half, 0.5); _EIGEN_DECLARE_CONST_Packet4d(exp_hi, 709.437); _EIGEN_DECLARE_CONST_Packet4d(exp_lo, -709.436139303); _EIGEN_DECLARE_CONST_Packet4d(cephes_LOG2EF, 1.4426950408889634073599); _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p0, 1.26177193074810590878e-4); _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p1, 3.02994407707441961300e-2); _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p2, 9.99999999999999999910e-1); _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q0, 3.00198505138664455042e-6); _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q1, 2.52448340349684104192e-3); _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q2, 2.27265548208155028766e-1); _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q3, 2.00000000000000000009e0); _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C1, 0.693145751953125); _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C2, 1.42860682030941723212e-6); _EIGEN_DECLARE_CONST_Packet4i(1023, 1023); Packet4d tmp, fx; // clamp x x = pmax(pmin(x, p4d_exp_hi), p4d_exp_lo); // Express exp(x) as exp(g + n*log(2)). fx = pmadd(p4d_cephes_LOG2EF, x, p4d_half); // Get the integer modulus of log(2), i.e. the "n" described above. fx = _mm256_floor_pd(fx); // Get the remainder modulo log(2), i.e. the "g" described above. Subtract // n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last // digits right. tmp = pmul(fx, p4d_cephes_exp_C1); Packet4d z = pmul(fx, p4d_cephes_exp_C2); x = psub(x, tmp); x = psub(x, z); Packet4d x2 = pmul(x, x); // Evaluate the numerator polynomial of the rational interpolant. Packet4d px = p4d_cephes_exp_p0; px = pmadd(px, x2, p4d_cephes_exp_p1); px = pmadd(px, x2, p4d_cephes_exp_p2); px = pmul(px, x); // Evaluate the denominator polynomial of the rational interpolant. Packet4d qx = p4d_cephes_exp_q0; qx = pmadd(qx, x2, p4d_cephes_exp_q1); qx = pmadd(qx, x2, p4d_cephes_exp_q2); qx = pmadd(qx, x2, p4d_cephes_exp_q3); // I don't really get this bit, copied from the SSE2 routines, so... // TODO(gonnet): Figure out what is going on here, perhaps find a better // rational interpolant? x = _mm256_div_pd(px, psub(qx, px)); x = pmadd(p4d_2, x, p4d_1); // Build e=2^n by constructing the exponents in a 128-bit vector and // shifting them to where they belong in double-precision values. __m128i emm0 = _mm256_cvtpd_epi32(fx); emm0 = _mm_add_epi32(emm0, p4i_1023); emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0)); __m128i lo = _mm_slli_epi64(emm0, 52); __m128i hi = _mm_slli_epi64(_mm_srli_epi64(emm0, 32), 52); __m256i e = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0); e = _mm256_insertf128_si256(e, hi, 1); // Construct the result 2^n * exp(g) = e * x. The max is used to catch // non-finite values in the input. return pmax(pmul(x, _mm256_castsi256_pd(e)), _x); }
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f Eigen::internal::pexp< Packet4f > | ( | const Packet4f & | _x | ) |
Definition at line 112 of file arch/AltiVec/MathFunctions.h.
{ Packet4f x = _x; _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f); _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f); _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f); _EIGEN_DECLARE_CONST_Packet4i(23, 23); _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f); _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f); _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f); _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f); _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f); _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f); _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f); _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f); _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f); _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f); _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f); Packet4f tmp, fx; Packet4i emm0; // clamp x x = vec_max(vec_min(x, p4f_exp_hi), p4f_exp_lo); /* express exp(x) as exp(g + n*log(2)) */ fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half); fx = vec_floor(fx); tmp = pmul(fx, p4f_cephes_exp_C1); Packet4f z = pmul(fx, p4f_cephes_exp_C2); x = psub(x, tmp); x = psub(x, z); z = pmul(x,x); Packet4f y = p4f_cephes_exp_p0; y = pmadd(y, x, p4f_cephes_exp_p1); y = pmadd(y, x, p4f_cephes_exp_p2); y = pmadd(y, x, p4f_cephes_exp_p3); y = pmadd(y, x, p4f_cephes_exp_p4); y = pmadd(y, x, p4f_cephes_exp_p5); y = pmadd(y, z, x); y = padd(y, p4f_1); // build 2^n emm0 = vec_cts(fx, 0); emm0 = vec_add(emm0, p4i_0x7f); emm0 = vec_sl(emm0, reinterpret_cast<Packet4ui>(p4i_23)); // Altivec's max & min operators just drop silent NaNs. Check NaNs in // inputs and return them unmodified. Packet4ui isnumber_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(_x, _x)); return vec_sel(_x, pmax(pmul(y, reinterpret_cast<Packet4f>(emm0)), _x), isnumber_mask); }
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f Eigen::internal::pexp< Packet8f > | ( | const Packet8f & | _x | ) |
Definition at line 209 of file arch/AVX/MathFunctions.h.
{ _EIGEN_DECLARE_CONST_Packet8f(1, 1.0f); _EIGEN_DECLARE_CONST_Packet8f(half, 0.5f); _EIGEN_DECLARE_CONST_Packet8f(127, 127.0f); _EIGEN_DECLARE_CONST_Packet8f(exp_hi, 88.3762626647950f); _EIGEN_DECLARE_CONST_Packet8f(exp_lo, -88.3762626647949f); _EIGEN_DECLARE_CONST_Packet8f(cephes_LOG2EF, 1.44269504088896341f); _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p0, 1.9875691500E-4f); _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p1, 1.3981999507E-3f); _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p2, 8.3334519073E-3f); _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p3, 4.1665795894E-2f); _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p4, 1.6666665459E-1f); _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p5, 5.0000001201E-1f); // Clamp x. Packet8f x = pmax(pmin(_x, p8f_exp_hi), p8f_exp_lo); // Express exp(x) as exp(m*ln(2) + r), start by extracting // m = floor(x/ln(2) + 0.5). Packet8f m = _mm256_floor_ps(pmadd(x, p8f_cephes_LOG2EF, p8f_half)); // Get r = x - m*ln(2). If no FMA instructions are available, m*ln(2) is // subtracted out in two parts, m*C1+m*C2 = m*ln(2), to avoid accumulating // truncation errors. Note that we don't use the "pmadd" function here to // ensure that a precision-preserving FMA instruction is used. #ifdef EIGEN_VECTORIZE_FMA _EIGEN_DECLARE_CONST_Packet8f(nln2, -0.6931471805599453f); Packet8f r = _mm256_fmadd_ps(m, p8f_nln2, x); #else _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_C1, 0.693359375f); _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_C2, -2.12194440e-4f); Packet8f r = psub(x, pmul(m, p8f_cephes_exp_C1)); r = psub(r, pmul(m, p8f_cephes_exp_C2)); #endif Packet8f r2 = pmul(r, r); // TODO(gonnet): Split into odd/even polynomials and try to exploit // instruction-level parallelism. Packet8f y = p8f_cephes_exp_p0; y = pmadd(y, r, p8f_cephes_exp_p1); y = pmadd(y, r, p8f_cephes_exp_p2); y = pmadd(y, r, p8f_cephes_exp_p3); y = pmadd(y, r, p8f_cephes_exp_p4); y = pmadd(y, r, p8f_cephes_exp_p5); y = pmadd(y, r2, r); y = padd(y, p8f_1); // Build emm0 = 2^m. Packet8i emm0 = _mm256_cvttps_epi32(padd(m, p8f_127)); emm0 = pshiftleft(emm0, 23); // Return 2^m * exp(r). return pmax(pmul(y, _mm256_castsi256_ps(emm0)), _x); }
EIGEN_DEVICE_FUNC unpacket_traits<Packet>::type Eigen::internal::pfirst | ( | const Packet & | a | ) | [inline] |
Definition at line 309 of file GenericPacketMath.h.
{ return a; }
EIGEN_STRONG_INLINE std::complex<double> Eigen::internal::pfirst< Packet1cd > | ( | const Packet1cd & | a | ) |
Definition at line 345 of file SSE/Complex.h.
{ EIGEN_ALIGN16 double res[2]; _mm_store_pd(res, a.v); return std::complex<double>(res[0],res[1]); }
EIGEN_STRONG_INLINE std::complex<double> Eigen::internal::pfirst< Packet2cd > | ( | const Packet2cd & | a | ) |
Definition at line 327 of file AVX/Complex.h.
{ __m128d low = _mm256_extractf128_pd(a.v, 0); EIGEN_ALIGN16 double res[2]; _mm_store_pd(res, low); return std::complex<double>(res[0],res[1]); }
EIGEN_STRONG_INLINE std::complex< float > Eigen::internal::pfirst< Packet2cf > | ( | const Packet2cf & | a | ) |
Definition at line 128 of file AltiVec/Complex.h.
{ std::complex<float> EIGEN_ALIGN16 res[2]; pstore((float *)&res, a.v); return res[0]; }
EIGEN_STRONG_INLINE double Eigen::internal::pfirst< Packet2d > | ( | const Packet2d & | a | ) |
Definition at line 426 of file SSE/PacketMath.h.
{ return _mm_cvtsd_f64(a); }
EIGEN_STRONG_INLINE std::complex<float> Eigen::internal::pfirst< Packet4cf > | ( | const Packet4cf & | a | ) |
Definition at line 119 of file AVX/Complex.h.
{ return pfirst(Packet2cf(_mm256_castps256_ps128(a.v))); }
EIGEN_STRONG_INLINE double Eigen::internal::pfirst< Packet4d > | ( | const Packet4d & | a | ) |
Definition at line 311 of file AVX/PacketMath.h.
{
return _mm_cvtsd_f64(_mm256_castpd256_pd128(a));
}
EIGEN_STRONG_INLINE float Eigen::internal::pfirst< Packet4f > | ( | const Packet4f & | a | ) |
Definition at line 502 of file AltiVec/PacketMath.h.
{ float EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
EIGEN_STRONG_INLINE int Eigen::internal::pfirst< Packet4i > | ( | const Packet4i & | a | ) |
Definition at line 503 of file AltiVec/PacketMath.h.
{ int EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
EIGEN_STRONG_INLINE float Eigen::internal::pfirst< Packet8f > | ( | const Packet8f & | a | ) |
Definition at line 308 of file AVX/PacketMath.h.
{
return _mm_cvtss_f32(_mm256_castps256_ps128(a));
}
EIGEN_STRONG_INLINE int Eigen::internal::pfirst< Packet8i > | ( | const Packet8i & | a | ) |
Definition at line 314 of file AVX/PacketMath.h.
{
return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
}
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::pfloor | ( | const Packet & | a | ) |
Definition at line 438 of file GenericPacketMath.h.
{ using numext::floor; return floor(a); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::pfloor< Packet4d > | ( | const Packet4d & | a | ) |
Definition at line 193 of file AVX/PacketMath.h.
{ return _mm256_floor_pd(a); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::pfloor< Packet8f > | ( | const Packet8f & | a | ) |
Definition at line 192 of file AVX/PacketMath.h.
{ return _mm256_floor_ps(a); }
EIGEN_DEVICE_FUNC Packet Eigen::internal::pgather | ( | const Scalar * | from, |
Index | |||
) | [inline] |
Definition at line 286 of file GenericPacketMath.h.
{ return ploadu<Packet>(from); }
EIGEN_DEVICE_FUNC Packet2d Eigen::internal::pgather< double, Packet2d > | ( | const double * | from, |
Index | stride | ||
) | [inline] |
Definition at line 365 of file SSE/PacketMath.h.
{
return _mm_set_pd(from[1*stride], from[0*stride]);
}
EIGEN_DEVICE_FUNC Packet4d Eigen::internal::pgather< double, Packet4d > | ( | const double * | from, |
Index | stride | ||
) | [inline] |
Definition at line 259 of file AVX/PacketMath.h.
{
return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
}
EIGEN_DEVICE_FUNC Packet4f Eigen::internal::pgather< float, Packet4f > | ( | const float * | from, |
Index | stride | ||
) | [inline] |
Definition at line 260 of file AltiVec/PacketMath.h.
{ float EIGEN_ALIGN16 af[4]; af[0] = from[0*stride]; af[1] = from[1*stride]; af[2] = from[2*stride]; af[3] = from[3*stride]; return pload<Packet4f>(af); }
EIGEN_DEVICE_FUNC Packet8f Eigen::internal::pgather< float, Packet8f > | ( | const float * | from, |
Index | stride | ||
) | [inline] |
Definition at line 254 of file AVX/PacketMath.h.
{
return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride],
from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
}
EIGEN_DEVICE_FUNC Packet4i Eigen::internal::pgather< int, Packet4i > | ( | const int * | from, |
Index | stride | ||
) | [inline] |
Definition at line 269 of file AltiVec/PacketMath.h.
{ int EIGEN_ALIGN16 ai[4]; ai[0] = from[0*stride]; ai[1] = from[1*stride]; ai[2] = from[2*stride]; ai[3] = from[3*stride]; return pload<Packet4i>(ai); }
EIGEN_DEVICE_FUNC Packet2cd Eigen::internal::pgather< std::complex< double >, Packet2cd > | ( | const std::complex< double > * | from, |
Index | stride | ||
) | [inline] |
EIGEN_DEVICE_FUNC Packet2cf Eigen::internal::pgather< std::complex< float >, Packet2cf > | ( | const std::complex< float > * | from, |
Index | stride | ||
) | [inline] |
Definition at line 70 of file AltiVec/Complex.h.
{ std::complex<float> EIGEN_ALIGN16 af[2]; af[0] = from[0*stride]; af[1] = from[1*stride]; return Packet2cf(vec_ld(0, (const float*)af)); }
EIGEN_DEVICE_FUNC Packet4cf Eigen::internal::pgather< std::complex< float >, Packet4cf > | ( | const std::complex< float > * | from, |
Index | stride | ||
) | [inline] |
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::plgamma | ( | const Packet & | a | ) |
Definition at line 446 of file GenericPacketMath.h.
{ using numext::lgamma; return lgamma(a); }
EIGEN_DEVICE_FUNC Packet Eigen::internal::pload | ( | const typename unpacket_traits< Packet >::type * | from | ) | [inline] |
Definition at line 208 of file GenericPacketMath.h.
{ return *from; }
EIGEN_DEVICE_FUNC Packet Eigen::internal::pload1 | ( | const typename unpacket_traits< Packet >::type * | a | ) | [inline] |
Definition at line 220 of file GenericPacketMath.h.
{ return pset1<Packet>(*a); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::pload1< Packet4d > | ( | const double * | from | ) |
Definition at line 119 of file AVX/PacketMath.h.
{ return _mm256_broadcast_sd(from); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::pload1< Packet8f > | ( | const float * | from | ) |
Definition at line 118 of file AVX/PacketMath.h.
{ return _mm256_broadcast_ss(from); }
EIGEN_STRONG_INLINE Packet1cd Eigen::internal::pload< Packet1cd > | ( | const std::complex< double > * | from | ) |
Definition at line 330 of file SSE/Complex.h.
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
EIGEN_STRONG_INLINE Packet2cd Eigen::internal::pload< Packet2cd > | ( | const std::complex< double > * | from | ) |
Definition at line 296 of file AVX/Complex.h.
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(pload<Packet4d>((const double*)from)); }
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::pload< Packet2cf > | ( | const std::complex< float > * | from | ) |
Definition at line 115 of file AltiVec/Complex.h.
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::pload< Packet2d > | ( | const double * | from | ) |
Definition at line 302 of file SSE/PacketMath.h.
{ EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
EIGEN_STRONG_INLINE Packet4cf Eigen::internal::pload< Packet4cf > | ( | const std::complex< float > * | from | ) |
Definition at line 75 of file AVX/Complex.h.
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(pload<Packet8f>(&numext::real_ref(*from))); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::pload< Packet4d > | ( | const double * | from | ) |
Definition at line 208 of file AVX/PacketMath.h.
{ EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_pd(from); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::pload< Packet4f > | ( | const float * | from | ) |
Definition at line 217 of file AltiVec/PacketMath.h.
{ EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::pload< Packet4i > | ( | const int * | from | ) |
Definition at line 218 of file AltiVec/PacketMath.h.
{ EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::pload< Packet8f > | ( | const float * | from | ) |
Definition at line 207 of file AVX/PacketMath.h.
{ EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_ps(from); }
EIGEN_STRONG_INLINE Packet8i Eigen::internal::pload< Packet8i > | ( | const int * | from | ) |
Definition at line 209 of file AVX/PacketMath.h.
{ EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); }
EIGEN_DEVICE_FUNC Packet Eigen::internal::ploaddup | ( | const typename unpacket_traits< Packet >::type * | from | ) | [inline] |
Definition at line 228 of file GenericPacketMath.h.
{ return *from; }
EIGEN_STRONG_INLINE Packet1cd Eigen::internal::ploaddup< Packet1cd > | ( | const std::complex< double > * | from | ) |
Definition at line 337 of file SSE/Complex.h.
{ return pset1<Packet1cd>(*from); }
EIGEN_STRONG_INLINE Packet2cd Eigen::internal::ploaddup< Packet2cd > | ( | const std::complex< double > * | from | ) |
Definition at line 308 of file AVX/Complex.h.
{ return pset1<Packet2cd>(*from); }
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::ploaddup< Packet2cf > | ( | const std::complex< float > * | from | ) |
Definition at line 118 of file AltiVec/Complex.h.
{ return pset1<Packet2cf>(*from); }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::ploaddup< Packet2d > | ( | const double * | from | ) |
Definition at line 344 of file SSE/PacketMath.h.
{ return pset1<Packet2d>(from[0]); }
EIGEN_STRONG_INLINE Packet4cf Eigen::internal::ploaddup< Packet4cf > | ( | const std::complex< float > * | from | ) |
Definition at line 84 of file AVX/Complex.h.
{ // FIXME The following might be optimized using _mm256_movedup_pd Packet2cf a = ploaddup<Packet2cf>(from); Packet2cf b = ploaddup<Packet2cf>(from+1); return Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(a.v), b.v, 1)); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::ploaddup< Packet4d > | ( | const double * | from | ) |
Definition at line 231 of file AVX/PacketMath.h.
{ Packet4d tmp = _mm256_broadcast_pd((const __m128d*)(const void*)from); return _mm256_permute_pd(tmp, 3<<2); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::ploaddup< Packet4f > | ( | const float * | from | ) |
Definition at line 431 of file AltiVec/PacketMath.h.
{ Packet4f p; if((ptrdiff_t(from) % 16) == 0) p = pload<Packet4f>(from); else p = ploadu<Packet4f>(from); return vec_perm(p, p, p16uc_DUPLICATE32_HI); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::ploaddup< Packet4i > | ( | const int * | from | ) |
Definition at line 438 of file AltiVec/PacketMath.h.
{ Packet4i p; if((ptrdiff_t(from) % 16) == 0) p = pload<Packet4i>(from); else p = ploadu<Packet4i>(from); return vec_perm(p, p, p16uc_DUPLICATE32_HI); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::ploaddup< Packet8f > | ( | const float * | from | ) |
Definition at line 216 of file AVX/PacketMath.h.
{ // TODO try to find a way to avoid the need of a temporary register // Packet8f tmp = _mm256_castps128_ps256(_mm_loadu_ps(from)); // tmp = _mm256_insertf128_ps(tmp, _mm_movehl_ps(_mm256_castps256_ps128(tmp),_mm256_castps256_ps128(tmp)), 1); // return _mm256_unpacklo_ps(tmp,tmp); // _mm256_insertf128_ps is very slow on Haswell, thus: Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from); // mimic an "inplace" permutation of the lower 128bits using a blend tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15); // then we can perform a consistent permutation on the global register to get everything in shape: return _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2)); }
EIGEN_DEVICE_FUNC Packet Eigen::internal::ploadquad | ( | const typename unpacket_traits< Packet >::type * | from | ) | [inline] |
Definition at line 237 of file GenericPacketMath.h.
{ return pload1<Packet>(from); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::ploadquad< Packet8f > | ( | const float * | from | ) |
Definition at line 238 of file AVX/PacketMath.h.
{ Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(from)); return _mm256_insertf128_ps(tmp, _mm_broadcast_ss(from+1), 1); }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet Eigen::internal::ploadt | ( | const typename unpacket_traits< Packet >::type * | from | ) |
Definition at line 482 of file GenericPacketMath.h.
{ if(Alignment >= unpacket_traits<Packet>::alignment) return pload<Packet>(from); else return ploadu<Packet>(from); }
Packet Eigen::internal::ploadt_ro | ( | const typename unpacket_traits< Packet >::type * | from | ) | [inline] |
Definition at line 507 of file GenericPacketMath.h.
{
return ploadt<Packet, LoadMode>(from);
}
EIGEN_DEVICE_FUNC Packet Eigen::internal::ploadu | ( | const typename unpacket_traits< Packet >::type * | from | ) | [inline] |
Definition at line 212 of file GenericPacketMath.h.
{ return *from; }
EIGEN_STRONG_INLINE Packet1cd Eigen::internal::ploadu< Packet1cd > | ( | const std::complex< double > * | from | ) |
Definition at line 332 of file SSE/Complex.h.
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
EIGEN_STRONG_INLINE Packet2cd Eigen::internal::ploadu< Packet2cd > | ( | const std::complex< double > * | from | ) |
Definition at line 298 of file AVX/Complex.h.
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(ploadu<Packet4d>((const double*)from)); }
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::ploadu< Packet2cf > | ( | const std::complex< float > * | from | ) |
Definition at line 116 of file AltiVec/Complex.h.
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::ploadu< Packet2d > | ( | const double * | from | ) |
Definition at line 328 of file SSE/PacketMath.h.
{ EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); }
EIGEN_STRONG_INLINE Packet4cf Eigen::internal::ploadu< Packet4cf > | ( | const std::complex< float > * | from | ) |
Definition at line 76 of file AVX/Complex.h.
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(ploadu<Packet8f>(&numext::real_ref(*from))); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::ploadu< Packet4d > | ( | const double * | from | ) |
Definition at line 212 of file AVX/PacketMath.h.
{ EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_pd(from); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::ploadu< Packet4f > | ( | const float * | from | ) |
Definition at line 424 of file AltiVec/PacketMath.h.
{ EIGEN_DEBUG_ALIGNED_LOAD return (Packet4f) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from)); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::ploadu< Packet4i > | ( | const int * | from | ) |
Definition at line 419 of file AltiVec/PacketMath.h.
{ EIGEN_DEBUG_ALIGNED_LOAD return (Packet4i) vec_vsx_ld((long)from & 15, (const int*) _EIGEN_ALIGNED_PTR(from)); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::ploadu< Packet8f > | ( | const float * | from | ) |
Definition at line 211 of file AVX/PacketMath.h.
{ EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_ps(from); }
EIGEN_STRONG_INLINE Packet8i Eigen::internal::ploadu< Packet8i > | ( | const int * | from | ) |
Definition at line 213 of file AVX/PacketMath.h.
{ EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::plog | ( | const Packet & | a | ) |
Definition at line 416 of file GenericPacketMath.h.
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::plog10 | ( | const Packet & | a | ) |
Definition at line 420 of file GenericPacketMath.h.
{ using std::log10; return log10(a); }
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f Eigen::internal::plog< Packet4f > | ( | const Packet4f & | _x | ) |
Definition at line 23 of file arch/AltiVec/MathFunctions.h.
{ Packet4f x = _x; _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f); _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f); _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f); _EIGEN_DECLARE_CONST_Packet4i(23, 23); _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000); /* the smallest non denormalized float number */ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000); _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000); // -1.f/0.f _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan, 0xffffffff); /* natural logarithm computed for 4 simultaneous float return NaN for x <= 0 */ _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f); _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f); _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f); _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f); _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f); _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f); _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f); _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f); _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f); _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f); _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f); _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f); Packet4i emm0; /* isvalid_mask is 0 if x < 0 or x is NaN. */ Packet4ui isvalid_mask = reinterpret_cast<Packet4ui>(vec_cmpge(x, p4f_ZERO)); Packet4ui iszero_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(x, p4f_ZERO)); x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */ emm0 = vec_sr(reinterpret_cast<Packet4i>(x), reinterpret_cast<Packet4ui>(p4i_23)); /* keep only the fractional part */ x = pand(x, p4f_inv_mant_mask); x = por(x, p4f_half); emm0 = psub(emm0, p4i_0x7f); Packet4f e = padd(vec_ctf(emm0, 0), p4f_1); /* part2: if( x < SQRTHF ) { e -= 1; x = x + x - 1.0; } else { x = x - 1.0; } */ Packet4f mask = reinterpret_cast<Packet4f>(vec_cmplt(x, p4f_cephes_SQRTHF)); Packet4f tmp = pand(x, mask); x = psub(x, p4f_1); e = psub(e, pand(p4f_1, mask)); x = padd(x, tmp); Packet4f x2 = pmul(x,x); Packet4f x3 = pmul(x2,x); Packet4f y, y1, y2; y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1); y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4); y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7); y = pmadd(y , x, p4f_cephes_log_p2); y1 = pmadd(y1, x, p4f_cephes_log_p5); y2 = pmadd(y2, x, p4f_cephes_log_p8); y = pmadd(y, x3, y1); y = pmadd(y, x3, y2); y = pmul(y, x3); y1 = pmul(e, p4f_cephes_log_q1); tmp = pmul(x2, p4f_half); y = padd(y, y1); x = psub(x, tmp); y2 = pmul(e, p4f_cephes_log_q2); x = padd(x, y); x = padd(x, y2); // negative arg will be NAN, 0 will be -INF x = vec_sel(x, p4f_minus_inf, iszero_mask); x = vec_sel(p4f_minus_nan, x, isvalid_mask); return x; }
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f Eigen::internal::plog< Packet8f > | ( | const Packet8f & | _x | ) |
Definition at line 121 of file arch/AVX/MathFunctions.h.
{ Packet8f x = _x; _EIGEN_DECLARE_CONST_Packet8f(1, 1.0f); _EIGEN_DECLARE_CONST_Packet8f(half, 0.5f); _EIGEN_DECLARE_CONST_Packet8f(126f, 126.0f); _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inv_mant_mask, ~0x7f800000); // The smallest non denormalized float number. _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(min_norm_pos, 0x00800000); _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(minus_inf, 0xff800000); // Polynomial coefficients. _EIGEN_DECLARE_CONST_Packet8f(cephes_SQRTHF, 0.707106781186547524f); _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p0, 7.0376836292E-2f); _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p1, -1.1514610310E-1f); _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p2, 1.1676998740E-1f); _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p3, -1.2420140846E-1f); _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p4, +1.4249322787E-1f); _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p5, -1.6668057665E-1f); _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p6, +2.0000714765E-1f); _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p7, -2.4999993993E-1f); _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p8, +3.3333331174E-1f); _EIGEN_DECLARE_CONST_Packet8f(cephes_log_q1, -2.12194440e-4f); _EIGEN_DECLARE_CONST_Packet8f(cephes_log_q2, 0.693359375f); Packet8f invalid_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_NGE_UQ); // not greater equal is true if x is NaN Packet8f iszero_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_EQ_OQ); // Truncate input values to the minimum positive normal. x = pmax(x, p8f_min_norm_pos); Packet8f emm0 = pshiftright(x,23); Packet8f e = _mm256_sub_ps(emm0, p8f_126f); // Set the exponents to -1, i.e. x are in the range [0.5,1). x = _mm256_and_ps(x, p8f_inv_mant_mask); x = _mm256_or_ps(x, p8f_half); // part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2)) // and shift by -1. The values are then centered around 0, which improves // the stability of the polynomial evaluation. // if( x < SQRTHF ) { // e -= 1; // x = x + x - 1.0; // } else { x = x - 1.0; } Packet8f mask = _mm256_cmp_ps(x, p8f_cephes_SQRTHF, _CMP_LT_OQ); Packet8f tmp = _mm256_and_ps(x, mask); x = psub(x, p8f_1); e = psub(e, _mm256_and_ps(p8f_1, mask)); x = padd(x, tmp); Packet8f x2 = pmul(x, x); Packet8f x3 = pmul(x2, x); // Evaluate the polynomial approximant of degree 8 in three parts, probably // to improve instruction-level parallelism. Packet8f y, y1, y2; y = pmadd(p8f_cephes_log_p0, x, p8f_cephes_log_p1); y1 = pmadd(p8f_cephes_log_p3, x, p8f_cephes_log_p4); y2 = pmadd(p8f_cephes_log_p6, x, p8f_cephes_log_p7); y = pmadd(y, x, p8f_cephes_log_p2); y1 = pmadd(y1, x, p8f_cephes_log_p5); y2 = pmadd(y2, x, p8f_cephes_log_p8); y = pmadd(y, x3, y1); y = pmadd(y, x3, y2); y = pmul(y, x3); // Add the logarithm of the exponent back to the result of the interpolation. y1 = pmul(e, p8f_cephes_log_q1); tmp = pmul(x2, p8f_half); y = padd(y, y1); x = psub(x, tmp); y2 = pmul(e, p8f_cephes_log_q2); x = padd(x, y); x = padd(x, y2); // Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF. return _mm256_or_ps( _mm256_andnot_ps(iszero_mask, _mm256_or_ps(x, invalid_mask)), _mm256_and_ps(iszero_mask, p8f_minus_inf)); }
Packet Eigen::internal::plset | ( | const typename unpacket_traits< Packet >::type & | a | ) | [inline] |
Returns a packet with coefficients (a,a+1,...,a+packet_size-1).
Definition at line 276 of file GenericPacketMath.h.
{ return a; }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::plset< Packet2d > | ( | const double & | a | ) |
Definition at line 190 of file SSE/PacketMath.h.
{ return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::plset< Packet4d > | ( | const double & | a | ) |
Definition at line 122 of file AVX/PacketMath.h.
{ return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::plset< Packet4f > | ( | const float & | a | ) |
Definition at line 297 of file AltiVec/PacketMath.h.
{ return vec_add(pset1<Packet4f>(a), p4f_COUNTDOWN); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::plset< Packet4i > | ( | const int & | a | ) |
Definition at line 298 of file AltiVec/PacketMath.h.
{ return vec_add(pset1<Packet4i>(a), p4i_COUNTDOWN); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::plset< Packet8f > | ( | const float & | a | ) |
Definition at line 121 of file AVX/PacketMath.h.
{ return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::pmadd | ( | const Packet4f & | a, |
const Packet4f & | b, | ||
const Packet4f & | c | ||
) |
Definition at line 373 of file AltiVec/PacketMath.h.
{ return vec_madd(a, b, c); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::pmadd | ( | const Packet4i & | a, |
const Packet4i & | b, | ||
const Packet4i & | c | ||
) |
Definition at line 374 of file AltiVec/PacketMath.h.
EIGEN_DEVICE_FUNC Packet Eigen::internal::pmadd | ( | const Packet & | a, |
const Packet & | b, | ||
const Packet & | c | ||
) | [inline] |
Definition at line 474 of file GenericPacketMath.h.
EIGEN_DEVICE_FUNC Packet Eigen::internal::pmax | ( | const Packet & | a, |
const Packet & | b | ||
) | [inline] |
Definition at line 179 of file GenericPacketMath.h.
{ return numext::maxi(a, b); }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::pmax< Packet2d > | ( | const Packet2d & | a, |
const Packet2d & | b | ||
) |
Definition at line 262 of file SSE/PacketMath.h.
{ return _mm_max_pd(a,b); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::pmax< Packet4d > | ( | const Packet4d & | a, |
const Packet4d & | b | ||
) |
Definition at line 184 of file AVX/PacketMath.h.
{ return _mm256_max_pd(a,b); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::pmax< Packet4f > | ( | const Packet4f & | a, |
const Packet4f & | b | ||
) |
Definition at line 379 of file AltiVec/PacketMath.h.
{ return vec_max(a, b); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::pmax< Packet4i > | ( | const Packet4i & | a, |
const Packet4i & | b | ||
) |
Definition at line 380 of file AltiVec/PacketMath.h.
{ return vec_max(a, b); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::pmax< Packet8f > | ( | const Packet8f & | a, |
const Packet8f & | b | ||
) |
Definition at line 183 of file AVX/PacketMath.h.
{ return _mm256_max_ps(a,b); }
EIGEN_DEVICE_FUNC Packet Eigen::internal::pmin | ( | const Packet & | a, |
const Packet & | b | ||
) | [inline] |
Definition at line 174 of file GenericPacketMath.h.
{ return numext::mini(a, b); }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::pmin< Packet2d > | ( | const Packet2d & | a, |
const Packet2d & | b | ||
) |
Definition at line 249 of file SSE/PacketMath.h.
{ return _mm_min_pd(a,b); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::pmin< Packet4d > | ( | const Packet4d & | a, |
const Packet4d & | b | ||
) |
Definition at line 181 of file AVX/PacketMath.h.
{ return _mm256_min_pd(a,b); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::pmin< Packet4f > | ( | const Packet4f & | a, |
const Packet4f & | b | ||
) |
Definition at line 376 of file AltiVec/PacketMath.h.
{ return vec_min(a, b); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::pmin< Packet4i > | ( | const Packet4i & | a, |
const Packet4i & | b | ||
) |
Definition at line 377 of file AltiVec/PacketMath.h.
{ return vec_min(a, b); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::pmin< Packet8f > | ( | const Packet8f & | a, |
const Packet8f & | b | ||
) |
Definition at line 180 of file AVX/PacketMath.h.
{ return _mm256_min_ps(a,b); }
EIGEN_DEVICE_FUNC Packet Eigen::internal::pmul | ( | const Packet & | a, |
const Packet & | b | ||
) | [inline] |
std::complex<float> Eigen::internal::pmul | ( | const std::complex< float > & | a, |
const std::complex< float > & | b | ||
) | [inline] |
std::complex<double> Eigen::internal::pmul | ( | const std::complex< double > & | a, |
const std::complex< double > & | b | ||
) | [inline] |
EIGEN_STRONG_INLINE Packet1cd Eigen::internal::pmul< Packet1cd > | ( | const Packet1cd & | a, |
const Packet1cd & | b | ||
) |
Definition at line 310 of file SSE/Complex.h.
{ #ifdef EIGEN_VECTORIZE_SSE3 return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(a.v), b.v), _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), vec2d_swizzle1(b.v, 1, 0)))); #else const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), vec2d_swizzle1(b.v, 1, 0)), mask))); #endif }
EIGEN_STRONG_INLINE Packet2cd Eigen::internal::pmul< Packet2cd > | ( | const Packet2cd & | a, |
const Packet2cd & | b | ||
) |
Definition at line 281 of file AVX/Complex.h.
{
__m256d tmp1 = _mm256_shuffle_pd(a.v,a.v,0x0);
__m256d even = _mm256_mul_pd(tmp1, b.v);
__m256d tmp2 = _mm256_shuffle_pd(a.v,a.v,0xF);
__m256d tmp3 = _mm256_shuffle_pd(b.v,b.v,0x5);
__m256d odd = _mm256_mul_pd(tmp2, tmp3);
return Packet2cd(_mm256_addsub_pd(even, odd));
}
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::pmul< Packet2cf > | ( | const Packet2cf & | a, |
const Packet2cf & | b | ||
) |
Definition at line 91 of file AltiVec/Complex.h.
{ Packet4f v1, v2; // Permute and multiply the real parts of a and b v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD); // Get the imaginary parts of a v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN); // multiply a_re * b v1 = vec_madd(v1, b.v, p4f_ZERO); // multiply a_im * b and get the conjugate result v2 = vec_madd(v2, b.v, p4f_ZERO); v2 = (Packet4f) vec_xor((Packet4ui)v2, p4ui_CONJ_XOR); // permute back to a proper order v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV); return Packet2cf(vec_add(v1, v2)); }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::pmul< Packet2d > | ( | const Packet2d & | a, |
const Packet2d & | b | ||
) |
Definition at line 221 of file SSE/PacketMath.h.
{ return _mm_mul_pd(a,b); }
EIGEN_STRONG_INLINE Packet4cf Eigen::internal::pmul< Packet4cf > | ( | const Packet4cf & | a, |
const Packet4cf & | b | ||
) |
Definition at line 62 of file AVX/Complex.h.
{
__m256 tmp1 = _mm256_mul_ps(_mm256_moveldup_ps(a.v), b.v);
__m256 tmp2 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2,3,0,1)));
__m256 result = _mm256_addsub_ps(tmp1, tmp2);
return Packet4cf(result);
}
EIGEN_STRONG_INLINE Packet4d Eigen::internal::pmul< Packet4d > | ( | const Packet4d & | a, |
const Packet4d & | b | ||
) |
Definition at line 144 of file AVX/PacketMath.h.
{ return _mm256_mul_pd(a,b); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::pmul< Packet4f > | ( | const Packet4f & | a, |
const Packet4f & | b | ||
) |
Definition at line 312 of file AltiVec/PacketMath.h.
{ return vec_madd(a,b,p4f_ZERO); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::pmul< Packet4i > | ( | const Packet4i & | a, |
const Packet4i & | b | ||
) |
Definition at line 147 of file NEON/PacketMath.h.
{ return vmulq_s32(a,b); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::pmul< Packet8f > | ( | const Packet8f & | a, |
const Packet8f & | b | ||
) |
Definition at line 143 of file AVX/PacketMath.h.
{ return _mm256_mul_ps(a,b); }
EIGEN_STRONG_INLINE Packet4cf Eigen::internal::pnegate | ( | const Packet4cf & | a | ) |
Definition at line 52 of file AVX/Complex.h.
{ return Packet4cf(pnegate(a.v)); }
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::pnegate | ( | const Packet2cf & | a | ) |
Definition at line 88 of file AltiVec/Complex.h.
{ return Packet2cf(pnegate(a.v)); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::pnegate | ( | const Packet8f & | a | ) |
Definition at line 130 of file AVX/PacketMath.h.
{
return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
}
EIGEN_STRONG_INLINE Packet4d Eigen::internal::pnegate | ( | const Packet4d & | a | ) |
Definition at line 134 of file AVX/PacketMath.h.
{
return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
}
EIGEN_DEVICE_FUNC Packet Eigen::internal::pnegate | ( | const Packet & | a | ) | [inline] |
EIGEN_STRONG_INLINE Packet2d Eigen::internal::pnegate | ( | const Packet2d & | a | ) |
Definition at line 206 of file SSE/PacketMath.h.
{ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000)); return _mm_xor_pd(a,mask); }
EIGEN_STRONG_INLINE Packet2cd Eigen::internal::pnegate | ( | const Packet2cd & | a | ) |
Definition at line 274 of file AVX/Complex.h.
{ return Packet2cd(pnegate(a.v)); }
EIGEN_STRONG_INLINE Packet1cd Eigen::internal::pnegate | ( | const Packet1cd & | a | ) |
Definition at line 303 of file SSE/Complex.h.
EIGEN_STRONG_INLINE Packet4f Eigen::internal::pnegate | ( | const Packet4f & | a | ) |
Definition at line 306 of file AltiVec/PacketMath.h.
{ return psub<Packet4f>(p4f_ZERO, a); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::pnegate | ( | const Packet4i & | a | ) |
Definition at line 307 of file AltiVec/PacketMath.h.
{ return psub<Packet4i>(p4i_ZERO, a); }
EIGEN_DEVICE_FUNC Packet Eigen::internal::por | ( | const Packet & | a, |
const Packet & | b | ||
) | [inline] |
Definition at line 196 of file GenericPacketMath.h.
{ return a | b; }
EIGEN_STRONG_INLINE Packet1cd Eigen::internal::por< Packet1cd > | ( | const Packet1cd & | a, |
const Packet1cd & | b | ||
) |
Definition at line 325 of file SSE/Complex.h.
{ return Packet1cd(_mm_or_pd(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet2cd Eigen::internal::por< Packet2cd > | ( | const Packet2cd & | a, |
const Packet2cd & | b | ||
) |
Definition at line 292 of file AVX/Complex.h.
{ return Packet2cd(_mm256_or_pd(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::por< Packet2cf > | ( | const Packet2cf & | a, |
const Packet2cf & | b | ||
) |
Definition at line 111 of file AltiVec/Complex.h.
{ return Packet2cf(vec_or(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::por< Packet2d > | ( | const Packet2d & | a, |
const Packet2d & | b | ||
) |
Definition at line 290 of file SSE/PacketMath.h.
{ return _mm_or_pd(a,b); }
EIGEN_STRONG_INLINE Packet4cf Eigen::internal::por< Packet4cf > | ( | const Packet4cf & | a, |
const Packet4cf & | b | ||
) |
Definition at line 71 of file AVX/Complex.h.
{ return Packet4cf(_mm256_or_ps(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::por< Packet4d > | ( | const Packet4d & | a, |
const Packet4d & | b | ||
) |
Definition at line 199 of file AVX/PacketMath.h.
{ return _mm256_or_pd(a,b); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::por< Packet4f > | ( | const Packet4f & | a, |
const Packet4f & | b | ||
) |
Definition at line 385 of file AltiVec/PacketMath.h.
{ return vec_or(a, b); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::por< Packet4i > | ( | const Packet4i & | a, |
const Packet4i & | b | ||
) |
Definition at line 386 of file AltiVec/PacketMath.h.
{ return vec_or(a, b); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::por< Packet8f > | ( | const Packet8f & | a, |
const Packet8f & | b | ||
) |
Definition at line 198 of file AVX/PacketMath.h.
{ return _mm256_or_ps(a,b); }
EIGEN_DEVICE_FUNC unpacket_traits<Packet>::type Eigen::internal::predux | ( | const Packet & | a | ) | [inline] |
Definition at line 317 of file GenericPacketMath.h.
{ return a; }
EIGEN_DEVICE_FUNC conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type Eigen::internal::predux4 | ( | const Packet & | a | ) | [inline] |
Definition at line 326 of file GenericPacketMath.h.
{ return a; }
const DoublePacket<Packet>& Eigen::internal::predux4 | ( | const DoublePacket< Packet > & | a | ) |
Definition at line 598 of file GeneralBlockPanelKernel.h.
{
return a;
}
EIGEN_STRONG_INLINE Packet4f Eigen::internal::predux4< Packet8f > | ( | const Packet8f & | a | ) |
Definition at line 400 of file AVX/PacketMath.h.
{
return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
}
EIGEN_STRONG_INLINE std::complex<double> Eigen::internal::predux< Packet1cd > | ( | const Packet1cd & | a | ) |
Definition at line 354 of file SSE/Complex.h.
{ return pfirst(a); }
EIGEN_STRONG_INLINE std::complex<double> Eigen::internal::predux< Packet2cd > | ( | const Packet2cd & | a | ) |
Definition at line 340 of file AVX/Complex.h.
EIGEN_STRONG_INLINE std::complex< float > Eigen::internal::predux< Packet2cf > | ( | const Packet2cf & | a | ) |
EIGEN_STRONG_INLINE double Eigen::internal::predux< Packet2d > | ( | const Packet2d & | a | ) |
Definition at line 545 of file SSE/PacketMath.h.
{ return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a))); }
EIGEN_STRONG_INLINE std::complex<float> Eigen::internal::predux< Packet4cf > | ( | const Packet4cf & | a | ) |
Definition at line 137 of file AVX/Complex.h.
EIGEN_STRONG_INLINE double Eigen::internal::predux< Packet4d > | ( | const Packet4d & | a | ) |
Definition at line 394 of file AVX/PacketMath.h.
EIGEN_STRONG_INLINE float Eigen::internal::predux< Packet4f > | ( | const Packet4f & | a | ) |
EIGEN_STRONG_INLINE int Eigen::internal::predux< Packet4i > | ( | const Packet4i & | a | ) |
Definition at line 549 of file AltiVec/PacketMath.h.
EIGEN_STRONG_INLINE float Eigen::internal::predux< Packet8f > | ( | const Packet8f & | a | ) |
Definition at line 388 of file AVX/PacketMath.h.
EIGEN_DEVICE_FUNC unpacket_traits<Packet>::type Eigen::internal::predux_max | ( | const Packet & | a | ) | [inline] |
Definition at line 338 of file GenericPacketMath.h.
{ return a; }
EIGEN_STRONG_INLINE double Eigen::internal::predux_max< Packet2d > | ( | const Packet2d & | a | ) |
Definition at line 656 of file SSE/PacketMath.h.
{ return pfirst<Packet2d>(_mm_max_sd(a, _mm_unpackhi_pd(a,a))); }
EIGEN_STRONG_INLINE double Eigen::internal::predux_max< Packet4d > | ( | const Packet4d & | a | ) |
Definition at line 438 of file AVX/PacketMath.h.
EIGEN_STRONG_INLINE float Eigen::internal::predux_max< Packet4f > | ( | const Packet4f & | a | ) |
Definition at line 623 of file AltiVec/PacketMath.h.
EIGEN_STRONG_INLINE int Eigen::internal::predux_max< Packet4i > | ( | const Packet4i & | a | ) |
Definition at line 631 of file AltiVec/PacketMath.h.
EIGEN_STRONG_INLINE float Eigen::internal::predux_max< Packet8f > | ( | const Packet8f & | a | ) |
Definition at line 431 of file AVX/PacketMath.h.
EIGEN_DEVICE_FUNC unpacket_traits<Packet>::type Eigen::internal::predux_min | ( | const Packet & | a | ) | [inline] |
Definition at line 334 of file GenericPacketMath.h.
{ return a; }
EIGEN_STRONG_INLINE double Eigen::internal::predux_min< Packet2d > | ( | const Packet2d & | a | ) |
Definition at line 630 of file SSE/PacketMath.h.
{ return pfirst<Packet2d>(_mm_min_sd(a, _mm_unpackhi_pd(a,a))); }
EIGEN_STRONG_INLINE double Eigen::internal::predux_min< Packet4d > | ( | const Packet4d & | a | ) |
Definition at line 425 of file AVX/PacketMath.h.
EIGEN_STRONG_INLINE float Eigen::internal::predux_min< Packet4f > | ( | const Packet4f & | a | ) |
Definition at line 606 of file AltiVec/PacketMath.h.
EIGEN_STRONG_INLINE int Eigen::internal::predux_min< Packet4i > | ( | const Packet4i & | a | ) |
Definition at line 614 of file AltiVec/PacketMath.h.
EIGEN_STRONG_INLINE float Eigen::internal::predux_min< Packet8f > | ( | const Packet8f & | a | ) |
Definition at line 419 of file AVX/PacketMath.h.
EIGEN_DEVICE_FUNC unpacket_traits<Packet>::type Eigen::internal::predux_mul | ( | const Packet & | a | ) | [inline] |
Definition at line 330 of file GenericPacketMath.h.
{ return a; }
EIGEN_STRONG_INLINE std::complex<double> Eigen::internal::predux_mul< Packet1cd > | ( | const Packet1cd & | a | ) |
Definition at line 364 of file SSE/Complex.h.
{ return pfirst(a); }
EIGEN_STRONG_INLINE std::complex<double> Eigen::internal::predux_mul< Packet2cd > | ( | const Packet2cd & | a | ) |
Definition at line 354 of file AVX/Complex.h.
EIGEN_STRONG_INLINE std::complex< float > Eigen::internal::predux_mul< Packet2cf > | ( | const Packet2cf & | a | ) |
EIGEN_STRONG_INLINE double Eigen::internal::predux_mul< Packet2d > | ( | const Packet2d & | a | ) |
Definition at line 610 of file SSE/PacketMath.h.
{ return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a,a))); }
EIGEN_STRONG_INLINE std::complex<float> Eigen::internal::predux_mul< Packet4cf > | ( | const Packet4cf & | a | ) |
Definition at line 158 of file AVX/Complex.h.
{ return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)), Packet2cf(_mm256_extractf128_ps(a.v, 1)))); }
EIGEN_STRONG_INLINE double Eigen::internal::predux_mul< Packet4d > | ( | const Packet4d & | a | ) |
Definition at line 412 of file AVX/PacketMath.h.
EIGEN_STRONG_INLINE float Eigen::internal::predux_mul< Packet4f > | ( | const Packet4f & | a | ) |
EIGEN_STRONG_INLINE int Eigen::internal::predux_mul< Packet4i > | ( | const Packet4i & | a | ) |
Definition at line 598 of file AltiVec/PacketMath.h.
{ EIGEN_ALIGN16 int aux[4]; pstore(aux, a); return aux[0] * aux[1] * aux[2] * aux[3]; }
EIGEN_STRONG_INLINE float Eigen::internal::predux_mul< Packet8f > | ( | const Packet8f & | a | ) |
Definition at line 405 of file AVX/PacketMath.h.
EIGEN_DEVICE_FUNC Packet Eigen::internal::preduxp | ( | const Packet * | vecs | ) | [inline] |
Definition at line 314 of file GenericPacketMath.h.
{ return vecs[0]; }
EIGEN_STRONG_INLINE Packet1cd Eigen::internal::preduxp< Packet1cd > | ( | const Packet1cd * | vecs | ) |
Definition at line 359 of file SSE/Complex.h.
{
return vecs[0];
}
EIGEN_STRONG_INLINE Packet2cd Eigen::internal::preduxp< Packet2cd > | ( | const Packet2cd * | vecs | ) |
Definition at line 346 of file AVX/Complex.h.
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::preduxp< Packet2cf > | ( | const Packet2cf * | vecs | ) |
Definition at line 151 of file AltiVec/Complex.h.
{ Packet4f b1, b2; #ifdef _BIG_ENDIAN b1 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8); b2 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8); #else b1 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8); b2 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8); #endif b2 = (Packet4f) vec_sld(b2, b2, 8); b2 = padd(b1, b2); return Packet2cf(b2); }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::preduxp< Packet2d > | ( | const Packet2d * | vecs | ) |
Definition at line 564 of file SSE/PacketMath.h.
{
return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
}
EIGEN_STRONG_INLINE Packet4cf Eigen::internal::preduxp< Packet4cf > | ( | const Packet4cf * | vecs | ) |
Definition at line 143 of file AVX/Complex.h.
{ Packet8f t0 = _mm256_shuffle_ps(vecs[0].v, vecs[0].v, _MM_SHUFFLE(3, 1, 2 ,0)); Packet8f t1 = _mm256_shuffle_ps(vecs[1].v, vecs[1].v, _MM_SHUFFLE(3, 1, 2 ,0)); t0 = _mm256_hadd_ps(t0,t1); Packet8f t2 = _mm256_shuffle_ps(vecs[2].v, vecs[2].v, _MM_SHUFFLE(3, 1, 2 ,0)); Packet8f t3 = _mm256_shuffle_ps(vecs[3].v, vecs[3].v, _MM_SHUFFLE(3, 1, 2 ,0)); t2 = _mm256_hadd_ps(t2,t3); t1 = _mm256_permute2f128_ps(t0,t2, 0 + (2<<4)); t3 = _mm256_permute2f128_ps(t0,t2, 1 + (3<<4)); return Packet4cf(_mm256_add_ps(t1,t3)); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::preduxp< Packet4d > | ( | const Packet4d * | vecs | ) |
Definition at line 375 of file AVX/PacketMath.h.
{ Packet4d tmp0, tmp1; tmp0 = _mm256_hadd_pd(vecs[0], vecs[1]); tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1)); tmp1 = _mm256_hadd_pd(vecs[2], vecs[3]); tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1)); return _mm256_blend_pd(tmp0, tmp1, 0xC); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::preduxp< Packet4f > | ( | const Packet4f * | vecs | ) |
Definition at line 521 of file AltiVec/PacketMath.h.
{ Packet4f v[4], sum[4]; // It's easier and faster to transpose then add as columns // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation // Do the transpose, first set of moves v[0] = vec_mergeh(vecs[0], vecs[2]); v[1] = vec_mergel(vecs[0], vecs[2]); v[2] = vec_mergeh(vecs[1], vecs[3]); v[3] = vec_mergel(vecs[1], vecs[3]); // Get the resulting vectors sum[0] = vec_mergeh(v[0], v[2]); sum[1] = vec_mergel(v[0], v[2]); sum[2] = vec_mergeh(v[1], v[3]); sum[3] = vec_mergel(v[1], v[3]); // Now do the summation: // Lines 0+1 sum[0] = vec_add(sum[0], sum[1]); // Lines 2+3 sum[1] = vec_add(sum[2], sum[3]); // Add the results sum[0] = vec_add(sum[0], sum[1]); return sum[0]; }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::preduxp< Packet4i > | ( | const Packet4i * | vecs | ) |
Definition at line 561 of file AltiVec/PacketMath.h.
{ Packet4i v[4], sum[4]; // It's easier and faster to transpose then add as columns // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation // Do the transpose, first set of moves v[0] = vec_mergeh(vecs[0], vecs[2]); v[1] = vec_mergel(vecs[0], vecs[2]); v[2] = vec_mergeh(vecs[1], vecs[3]); v[3] = vec_mergel(vecs[1], vecs[3]); // Get the resulting vectors sum[0] = vec_mergeh(v[0], v[2]); sum[1] = vec_mergel(v[0], v[2]); sum[2] = vec_mergeh(v[1], v[3]); sum[3] = vec_mergel(v[1], v[3]); // Now do the summation: // Lines 0+1 sum[0] = vec_add(sum[0], sum[1]); // Lines 2+3 sum[1] = vec_add(sum[2], sum[3]); // Add the results sum[0] = vec_add(sum[0], sum[1]); return sum[0]; }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::preduxp< Packet8f > | ( | const Packet8f * | vecs | ) |
Definition at line 347 of file AVX/PacketMath.h.
{ __m256 hsum1 = _mm256_hadd_ps(vecs[0], vecs[1]); __m256 hsum2 = _mm256_hadd_ps(vecs[2], vecs[3]); __m256 hsum3 = _mm256_hadd_ps(vecs[4], vecs[5]); __m256 hsum4 = _mm256_hadd_ps(vecs[6], vecs[7]); __m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1); __m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2); __m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3); __m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4); __m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23); __m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23); __m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23); __m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23); __m256 sum1 = _mm256_add_ps(perm1, hsum5); __m256 sum2 = _mm256_add_ps(perm2, hsum6); __m256 sum3 = _mm256_add_ps(perm3, hsum7); __m256 sum4 = _mm256_add_ps(perm4, hsum8); __m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc); __m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc); __m256 final = _mm256_blend_ps(blend1, blend2, 0xf0); return final; }
EIGEN_DEVICE_FUNC void Eigen::internal::prefetch | ( | const Scalar * | addr | ) | [inline] |
tries to do cache prefetching of addr
Definition at line 293 of file GenericPacketMath.h.
{ #ifdef __CUDA_ARCH__ #if defined(__LP64__) // 64-bit pointer operand constraint for inlined asm asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr)); #else // 32-bit pointer operand constraint for inlined asm asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr)); #endif #elif !EIGEN_COMP_MSVC __builtin_prefetch(addr); #endif }
EIGEN_STRONG_INLINE void Eigen::internal::prefetch< double > | ( | const double * | addr | ) |
Definition at line 305 of file AVX/PacketMath.h.
{ _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
EIGEN_STRONG_INLINE void Eigen::internal::prefetch< float > | ( | const float * | addr | ) |
Definition at line 498 of file AltiVec/PacketMath.h.
EIGEN_STRONG_INLINE void Eigen::internal::prefetch< int > | ( | const int * | addr | ) |
Definition at line 499 of file AltiVec/PacketMath.h.
EIGEN_STRONG_INLINE void Eigen::internal::prefetch< std::complex< double > > | ( | const std::complex< double > * | addr | ) |
Definition at line 343 of file SSE/Complex.h.
{ _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
EIGEN_STRONG_INLINE void Eigen::internal::prefetch< std::complex< float > > | ( | const std::complex< float > * | addr | ) |
Definition at line 126 of file AltiVec/Complex.h.
EIGEN_STRONG_INLINE Packet4cf Eigen::internal::preverse | ( | const Packet4cf & | a | ) |
Definition at line 124 of file AVX/Complex.h.
{
__m128 low = _mm256_extractf128_ps(a.v, 0);
__m128 high = _mm256_extractf128_ps(a.v, 1);
__m128d lowd = _mm_castps_pd(low);
__m128d highd = _mm_castps_pd(high);
low = _mm_castpd_ps(_mm_shuffle_pd(lowd,lowd,0x1));
high = _mm_castpd_ps(_mm_shuffle_pd(highd,highd,0x1));
__m256 result = _mm256_setzero_ps();
result = _mm256_insertf128_ps(result, low, 1);
result = _mm256_insertf128_ps(result, high, 0);
return Packet4cf(result);
}
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::preverse | ( | const Packet2cf & | a | ) |
Definition at line 136 of file AltiVec/Complex.h.
{ Packet4f rev_a; rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2); return Packet2cf(rev_a); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::preverse | ( | const Packet8f & | a | ) |
Definition at line 319 of file AVX/PacketMath.h.
{
__m256 tmp = _mm256_shuffle_ps(a,a,0x1b);
return _mm256_permute2f128_ps(tmp, tmp, 1);
}
EIGEN_STRONG_INLINE Packet4d Eigen::internal::preverse | ( | const Packet4d & | a | ) |
Definition at line 324 of file AVX/PacketMath.h.
{ __m256d tmp = _mm256_shuffle_pd(a,a,5); return _mm256_permute2f128_pd(tmp, tmp, 1); __m256d swap_halves = _mm256_permute2f128_pd(a,a,1); return _mm256_permute_pd(swap_halves,5); }
EIGEN_STRONG_INLINE Packet2cd Eigen::internal::preverse | ( | const Packet2cd & | a | ) |
Definition at line 335 of file AVX/Complex.h.
{
__m256d result = _mm256_permute2f128_pd(a.v, a.v, 1);
return Packet2cd(result);
}
EIGEN_DEVICE_FUNC Packet Eigen::internal::preverse | ( | const Packet & | a | ) | [inline] |
Definition at line 342 of file GenericPacketMath.h.
{ return a; }
EIGEN_STRONG_INLINE Packet1cd Eigen::internal::preverse | ( | const Packet1cd & | a | ) |
Definition at line 352 of file SSE/Complex.h.
{ return a; }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::preverse | ( | const Packet2d & | a | ) |
Definition at line 432 of file SSE/PacketMath.h.
{ return _mm_shuffle_pd(a,a,0x1); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::preverse | ( | const Packet4f & | a | ) |
Definition at line 505 of file AltiVec/PacketMath.h.
{ return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE32); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::preverse | ( | const Packet4i & | a | ) |
Definition at line 506 of file AltiVec/PacketMath.h.
{ return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE32); }
std::ostream & Eigen::internal::print_matrix | ( | std::ostream & | s, |
const Derived & | _m, | ||
const IOFormat & | fmt | ||
) |
print the matrix _m to the output stream s using the output format fmt
Definition at line 157 of file IO.h.
{ if(_m.size() == 0) { s << fmt.matPrefix << fmt.matSuffix; return s; } typename Derived::Nested m = _m; typedef typename Derived::Scalar Scalar; Index width = 0; std::streamsize explicit_precision; if(fmt.precision == StreamPrecision) { explicit_precision = 0; } else if(fmt.precision == FullPrecision) { if (NumTraits<Scalar>::IsInteger) { explicit_precision = 0; } else { explicit_precision = significant_decimals_impl<Scalar>::run(); } } else { explicit_precision = fmt.precision; } std::streamsize old_precision = 0; if(explicit_precision) old_precision = s.precision(explicit_precision); bool align_cols = !(fmt.flags & DontAlignCols); if(align_cols) { // compute the largest width for(Index j = 0; j < m.cols(); ++j) for(Index i = 0; i < m.rows(); ++i) { std::stringstream sstr; sstr.copyfmt(s); sstr << m.coeff(i,j); width = std::max<Index>(width, Index(sstr.str().length())); } } s << fmt.matPrefix; for(Index i = 0; i < m.rows(); ++i) { if (i) s << fmt.rowSpacer; s << fmt.rowPrefix; if(width) s.width(width); s << m.coeff(i, 0); for(Index j = 1; j < m.cols(); ++j) { s << fmt.coeffSeparator; if (width) s.width(width); s << m.coeff(i, j); } s << fmt.rowSuffix; if( i < m.rows() - 1) s << fmt.rowSeparator; } s << fmt.matSuffix; if(explicit_precision) s.precision(old_precision); return s; }
EIGEN_DEVICE_FUNC Packet Eigen::internal::protate | ( | const Packet & | a | ) | [inline] |
Definition at line 356 of file GenericPacketMath.h.
{
return offset ? protate_impl<offset, Packet>::run(a) : a;
}
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::pround | ( | const Packet & | a | ) |
Definition at line 434 of file GenericPacketMath.h.
{ using numext::round; return round(a); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::pround< Packet4d > | ( | const Packet4d & | a | ) |
Definition at line 187 of file AVX/PacketMath.h.
{ return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::pround< Packet8f > | ( | const Packet8f & | a | ) |
Definition at line 186 of file AVX/PacketMath.h.
{ return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); }
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::prsqrt | ( | const Packet & | a | ) |
Definition at line 428 of file GenericPacketMath.h.
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d Eigen::internal::prsqrt< Packet2d > | ( | const Packet2d & | x | ) |
Definition at line 514 of file arch/SSE/MathFunctions.h.
{ // Unfortunately we can't use the much faster mm_rqsrt_pd since it only provides an approximation. return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x)); }
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d Eigen::internal::prsqrt< Packet4d > | ( | const Packet4d & | x | ) |
Definition at line 474 of file arch/AVX/MathFunctions.h.
{ _EIGEN_DECLARE_CONST_Packet4d(one, 1.0); return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(x)); }
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f Eigen::internal::prsqrt< Packet4f > | ( | const Packet4f & | x | ) |
Definition at line 506 of file arch/SSE/MathFunctions.h.
{ // Unfortunately we can't use the much faster mm_rqsrt_ps since it only provides an approximation. return _mm_div_ps(pset1<Packet4f>(1.0f), _mm_sqrt_ps(x)); }
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f Eigen::internal::prsqrt< Packet8f > | ( | const Packet8f & | x | ) |
Definition at line 467 of file arch/AVX/MathFunctions.h.
{ _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f); return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(x)); }
EIGEN_DEVICE_FUNC void Eigen::internal::pscatter | ( | Scalar * | to, |
const Packet & | from, | ||
Index | |||
) | [inline] |
Definition at line 289 of file GenericPacketMath.h.
{ pstore(to, from); }
EIGEN_DEVICE_FUNC void Eigen::internal::pscatter< double, Packet2d > | ( | double * | to, |
const Packet2d & | from, | ||
Index | stride | ||
) | [inline] |
Definition at line 381 of file SSE/PacketMath.h.
{ to[stride*0] = _mm_cvtsd_f64(from); to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1)); }
EIGEN_DEVICE_FUNC void Eigen::internal::pscatter< double, Packet4d > | ( | double * | to, |
const Packet4d & | from, | ||
Index | stride | ||
) | [inline] |
Definition at line 278 of file AVX/PacketMath.h.
{ __m128d low = _mm256_extractf128_pd(from, 0); to[stride*0] = _mm_cvtsd_f64(low); to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)); __m128d high = _mm256_extractf128_pd(from, 1); to[stride*2] = _mm_cvtsd_f64(high); to[stride*3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)); }
EIGEN_DEVICE_FUNC void Eigen::internal::pscatter< float, Packet4f > | ( | float * | to, |
const Packet4f & | from, | ||
Index | stride | ||
) | [inline] |
Definition at line 278 of file AltiVec/PacketMath.h.
{ float EIGEN_ALIGN16 af[4]; pstore<float>(af, from); to[0*stride] = af[0]; to[1*stride] = af[1]; to[2*stride] = af[2]; to[3*stride] = af[3]; }
EIGEN_DEVICE_FUNC void Eigen::internal::pscatter< float, Packet8f > | ( | float * | to, |
const Packet8f & | from, | ||
Index | stride | ||
) | [inline] |
Definition at line 264 of file AVX/PacketMath.h.
{ __m128 low = _mm256_extractf128_ps(from, 0); to[stride*0] = _mm_cvtss_f32(low); to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1)); to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)); to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3)); __m128 high = _mm256_extractf128_ps(from, 1); to[stride*4] = _mm_cvtss_f32(high); to[stride*5] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1)); to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)); to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)); }
EIGEN_DEVICE_FUNC void Eigen::internal::pscatter< int, Packet4i > | ( | int * | to, |
const Packet4i & | from, | ||
Index | stride | ||
) | [inline] |
Definition at line 287 of file AltiVec/PacketMath.h.
{ int EIGEN_ALIGN16 ai[4]; pstore<int>((int *)ai, from); to[0*stride] = ai[0]; to[1*stride] = ai[1]; to[2*stride] = ai[2]; to[3*stride] = ai[3]; }
EIGEN_DEVICE_FUNC void Eigen::internal::pscatter< std::complex< double >, Packet2cd > | ( | std::complex< double > * | to, |
const Packet2cd & | from, | ||
Index | stride | ||
) | [inline] |
Definition at line 319 of file AVX/Complex.h.
{ __m128d low = _mm256_extractf128_pd(from.v, 0); to[stride*0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1))); __m128d high = _mm256_extractf128_pd(from.v, 1); to[stride*1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1))); }
EIGEN_DEVICE_FUNC void Eigen::internal::pscatter< std::complex< float >, Packet2cf > | ( | std::complex< float > * | to, |
const Packet2cf & | from, | ||
Index | stride | ||
) | [inline] |
Definition at line 77 of file AltiVec/Complex.h.
{ std::complex<float> EIGEN_ALIGN16 af[2]; vec_st(from.v, 0, (float*)af); to[0*stride] = af[0]; to[1*stride] = af[1]; }
EIGEN_DEVICE_FUNC void Eigen::internal::pscatter< std::complex< float >, Packet4cf > | ( | std::complex< float > * | to, |
const Packet4cf & | from, | ||
Index | stride | ||
) | [inline] |
Definition at line 103 of file AVX/Complex.h.
{ __m128 low = _mm256_extractf128_ps(from.v, 0); to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)), _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1))); to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)), _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3))); __m128 high = _mm256_extractf128_ps(from.v, 1); to[stride*2] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 0)), _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1))); to[stride*3] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)), _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3))); }
EIGEN_DEVICE_FUNC Packet Eigen::internal::pset1 | ( | const typename unpacket_traits< Packet >::type & | a | ) | [inline] |
Definition at line 216 of file GenericPacketMath.h.
{ return a; }
EIGEN_STRONG_INLINE Packet1cd Eigen::internal::pset1< Packet1cd > | ( | const std::complex< double > & | from | ) |
Definition at line 334 of file SSE/Complex.h.
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
EIGEN_STRONG_INLINE Packet2cd Eigen::internal::pset1< Packet2cd > | ( | const std::complex< double > & | from | ) |
Definition at line 301 of file AVX/Complex.h.
{ // in case casting to a __m128d* is really not safe, then we can still fallback to this version: (much slower though) // return Packet2cd(_mm256_loadu2_m128d((const double*)&from,(const double*)&from)); return Packet2cd(_mm256_broadcast_pd((const __m128d*)(const void*)&from)); }
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::pset1< Packet2cf > | ( | const std::complex< float > & | from | ) |
Definition at line 58 of file AltiVec/Complex.h.
{ Packet2cf res; /* On AltiVec we cannot load 64-bit registers, so wa have to take care of alignment */ if((ptrdiff_t(&from) % 16) == 0) res.v = pload<Packet4f>((const float *)&from); else res.v = ploadu<Packet4f>((const float *)&from); res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI); return res; }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::pset1< Packet2d > | ( | const double & | from | ) |
Definition at line 174 of file SSE/PacketMath.h.
{ return _mm_set1_pd(from); }
EIGEN_STRONG_INLINE Packet4cf Eigen::internal::pset1< Packet4cf > | ( | const std::complex< float > & | from | ) |
Definition at line 79 of file AVX/Complex.h.
{ return Packet4cf(_mm256_castpd_ps(_mm256_broadcast_sd((const double*)(const void*)&from))); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::pset1< Packet4d > | ( | const double & | from | ) |
Definition at line 115 of file AVX/PacketMath.h.
{ return _mm256_set1_pd(from); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::pset1< Packet4f > | ( | const float & | from | ) |
Definition at line 223 of file AltiVec/PacketMath.h.
{ // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html float EIGEN_ALIGN16 af[4]; af[0] = from; Packet4f vc = pload<Packet4f>(af); vc = vec_splat(vc, 0); return vc; }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::pset1< Packet4i > | ( | const int & | from | ) |
Definition at line 232 of file AltiVec/PacketMath.h.
{ int EIGEN_ALIGN16 ai[4]; ai[0] = from; Packet4i vc = pload<Packet4i>(ai); vc = vec_splat(vc, 0); return vc; }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::pset1< Packet8f > | ( | const float & | from | ) |
Definition at line 114 of file AVX/PacketMath.h.
{ return _mm256_set1_ps(from); }
EIGEN_STRONG_INLINE Packet8i Eigen::internal::pset1< Packet8i > | ( | const int & | from | ) |
Definition at line 116 of file AVX/PacketMath.h.
{ return _mm256_set1_epi32(from); }
Packet8i Eigen::internal::pshiftleft | ( | Packet8i | v, |
int | n | ||
) | [inline] |
Definition at line 21 of file arch/AVX/MathFunctions.h.
{ #ifdef EIGEN_VECTORIZE_AVX2 return _mm256_slli_epi32(v, n); #else __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(v, 0), n); __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(v, 1), n); return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1); #endif }
Packet8f Eigen::internal::pshiftright | ( | Packet8f | v, |
int | n | ||
) | [inline] |
Definition at line 32 of file arch/AVX/MathFunctions.h.
{ #ifdef EIGEN_VECTORIZE_AVX2 return _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(v), n)); #else __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 0), n); __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 1), n); return _mm256_cvtepi32_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1)); #endif }
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::psin | ( | const Packet & | a | ) |
Definition at line 376 of file GenericPacketMath.h.
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f Eigen::internal::psin< Packet4f > | ( | const Packet4f & | _x | ) |
Definition at line 258 of file arch/SSE/MathFunctions.h.
{ Packet4f x = _x; _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f); _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f); _EIGEN_DECLARE_CONST_Packet4i(1, 1); _EIGEN_DECLARE_CONST_Packet4i(not1, ~1); _EIGEN_DECLARE_CONST_Packet4i(2, 2); _EIGEN_DECLARE_CONST_Packet4i(4, 4); _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000); _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f); _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f); _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f); _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f); _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f); _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f); _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f); _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f); _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f); _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI Packet4f xmm1, xmm2, xmm3, sign_bit, y; Packet4i emm0, emm2; sign_bit = x; /* take the absolute value */ x = pabs(x); /* take the modulo */ /* extract the sign bit (upper one) */ sign_bit = _mm_and_ps(sign_bit, p4f_sign_mask); /* scale by 4/Pi */ y = pmul(x, p4f_cephes_FOPI); /* store the integer part of y in mm0 */ emm2 = _mm_cvttps_epi32(y); /* j=(j+1) & (~1) (see the cephes sources) */ emm2 = _mm_add_epi32(emm2, p4i_1); emm2 = _mm_and_si128(emm2, p4i_not1); y = _mm_cvtepi32_ps(emm2); /* get the swap sign flag */ emm0 = _mm_and_si128(emm2, p4i_4); emm0 = _mm_slli_epi32(emm0, 29); /* get the polynom selection mask there is one polynom for 0 <= x <= Pi/4 and another one for Pi/4<x<=Pi/2 Both branches will be computed. */ emm2 = _mm_and_si128(emm2, p4i_2); emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128()); Packet4f swap_sign_bit = _mm_castsi128_ps(emm0); Packet4f poly_mask = _mm_castsi128_ps(emm2); sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit); /* The magic pass: "Extended precision modular arithmetic" x = ((x - y * DP1) - y * DP2) - y * DP3; */ xmm1 = pmul(y, p4f_minus_cephes_DP1); xmm2 = pmul(y, p4f_minus_cephes_DP2); xmm3 = pmul(y, p4f_minus_cephes_DP3); x = padd(x, xmm1); x = padd(x, xmm2); x = padd(x, xmm3); /* Evaluate the first polynom (0 <= x <= Pi/4) */ y = p4f_coscof_p0; Packet4f z = _mm_mul_ps(x,x); y = pmadd(y, z, p4f_coscof_p1); y = pmadd(y, z, p4f_coscof_p2); y = pmul(y, z); y = pmul(y, z); Packet4f tmp = pmul(z, p4f_half); y = psub(y, tmp); y = padd(y, p4f_1); /* Evaluate the second polynom (Pi/4 <= x <= 0) */ Packet4f y2 = p4f_sincof_p0; y2 = pmadd(y2, z, p4f_sincof_p1); y2 = pmadd(y2, z, p4f_sincof_p2); y2 = pmul(y2, z); y2 = pmul(y2, x); y2 = padd(y2, x); /* select the correct result from the two polynoms */ y2 = _mm_and_ps(poly_mask, y2); y = _mm_andnot_ps(poly_mask, y); y = _mm_or_ps(y,y2); /* update the sign */ return _mm_xor_ps(y, sign_bit); }
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f Eigen::internal::psin< Packet8f > | ( | const Packet8f & | _x | ) |
Definition at line 49 of file arch/AVX/MathFunctions.h.
{ Packet8f x = _x; // Some useful values. _EIGEN_DECLARE_CONST_Packet8i(one, 1); _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f); _EIGEN_DECLARE_CONST_Packet8f(two, 2.0f); _EIGEN_DECLARE_CONST_Packet8f(one_over_four, 0.25f); _EIGEN_DECLARE_CONST_Packet8f(one_over_pi, 3.183098861837907e-01f); _EIGEN_DECLARE_CONST_Packet8f(neg_pi_first, -3.140625000000000e+00f); _EIGEN_DECLARE_CONST_Packet8f(neg_pi_second, -9.670257568359375e-04f); _EIGEN_DECLARE_CONST_Packet8f(neg_pi_third, -6.278329571784980e-07f); _EIGEN_DECLARE_CONST_Packet8f(four_over_pi, 1.273239544735163e+00f); // Map x from [-Pi/4,3*Pi/4] to z in [-1,3] and subtract the shifted period. Packet8f z = pmul(x, p8f_one_over_pi); Packet8f shift = _mm256_floor_ps(padd(z, p8f_one_over_four)); x = pmadd(shift, p8f_neg_pi_first, x); x = pmadd(shift, p8f_neg_pi_second, x); x = pmadd(shift, p8f_neg_pi_third, x); z = pmul(x, p8f_four_over_pi); // Make a mask for the entries that need flipping, i.e. wherever the shift // is odd. Packet8i shift_ints = _mm256_cvtps_epi32(shift); Packet8i shift_isodd = _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one))); Packet8i sign_flip_mask = pshiftleft(shift_isodd, 31); // Create a mask for which interpolant to use, i.e. if z > 1, then the mask // is set to ones for that entry. Packet8f ival_mask = _mm256_cmp_ps(z, p8f_one, _CMP_GT_OQ); // Evaluate the polynomial for the interval [1,3] in z. _EIGEN_DECLARE_CONST_Packet8f(coeff_right_0, 9.999999724233232e-01f); _EIGEN_DECLARE_CONST_Packet8f(coeff_right_2, -3.084242535619928e-01f); _EIGEN_DECLARE_CONST_Packet8f(coeff_right_4, 1.584991525700324e-02f); _EIGEN_DECLARE_CONST_Packet8f(coeff_right_6, -3.188805084631342e-04f); Packet8f z_minus_two = psub(z, p8f_two); Packet8f z_minus_two2 = pmul(z_minus_two, z_minus_two); Packet8f right = pmadd(p8f_coeff_right_6, z_minus_two2, p8f_coeff_right_4); right = pmadd(right, z_minus_two2, p8f_coeff_right_2); right = pmadd(right, z_minus_two2, p8f_coeff_right_0); // Evaluate the polynomial for the interval [-1,1] in z. _EIGEN_DECLARE_CONST_Packet8f(coeff_left_1, 7.853981525427295e-01f); _EIGEN_DECLARE_CONST_Packet8f(coeff_left_3, -8.074536727092352e-02f); _EIGEN_DECLARE_CONST_Packet8f(coeff_left_5, 2.489871967827018e-03f); _EIGEN_DECLARE_CONST_Packet8f(coeff_left_7, -3.587725841214251e-05f); Packet8f z2 = pmul(z, z); Packet8f left = pmadd(p8f_coeff_left_7, z2, p8f_coeff_left_5); left = pmadd(left, z2, p8f_coeff_left_3); left = pmadd(left, z2, p8f_coeff_left_1); left = pmul(left, z); // Assemble the results, i.e. select the left and right polynomials. left = _mm256_andnot_ps(ival_mask, left); right = _mm256_and_ps(ival_mask, right); Packet8f res = _mm256_or_ps(left, right); // Flip the sign on the odd intervals and return the result. res = _mm256_xor_ps(res, _mm256_castsi256_ps(sign_flip_mask)); return res; }
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::psinh | ( | const Packet & | a | ) |
Definition at line 400 of file GenericPacketMath.h.
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::psqrt | ( | const Packet & | a | ) |
Definition at line 424 of file GenericPacketMath.h.
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d Eigen::internal::psqrt< Packet2d > | ( | const Packet2d & | x | ) |
Definition at line 471 of file arch/SSE/MathFunctions.h.
{ return _mm_sqrt_pd(x); }
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d Eigen::internal::psqrt< Packet4d > | ( | const Packet4d & | x | ) |
Definition at line 432 of file arch/AVX/MathFunctions.h.
{
return _mm256_sqrt_pd(x);
}
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f Eigen::internal::psqrt< Packet4f > | ( | const Packet4f & | x | ) |
Definition at line 466 of file arch/SSE/MathFunctions.h.
{ return _mm_sqrt_ps(x); }
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f Eigen::internal::psqrt< Packet8f > | ( | const Packet8f & | x | ) |
Definition at line 427 of file arch/AVX/MathFunctions.h.
{
return _mm256_sqrt_ps(x);
}
EIGEN_DEVICE_FUNC void Eigen::internal::pstore | ( | Scalar * | to, |
const Packet & | from | ||
) | [inline] |
copy the packet from to *to, to must be 16 bytes aligned
Definition at line 279 of file GenericPacketMath.h.
{ (*to) = from; }
void Eigen::internal::pstore1 | ( | typename unpacket_traits< Packet >::type * | to, |
const typename unpacket_traits< Packet >::type & | a | ||
) | [inline] |
copy a packet with constant coeficient a (e.g., [a,a,a,a]) to *to. to must be 16 bytes aligned
Definition at line 467 of file GenericPacketMath.h.
{ pstore(to, pset1<Packet>(a)); }
EIGEN_STRONG_INLINE void Eigen::internal::pstore1< Packet2d > | ( | double * | to, |
const double & | a | ||
) |
Definition at line 401 of file SSE/PacketMath.h.
{ Packet2d pa = _mm_set_sd(a); pstore(to, Packet2d(vec2d_swizzle1(pa,0,0))); }
EIGEN_STRONG_INLINE void Eigen::internal::pstore1< Packet4d > | ( | double * | to, |
const double & | a | ||
) |
Definition at line 293 of file AVX/PacketMath.h.
{ Packet4d pa = pset1<Packet4d>(a); pstore(to, pa); }
EIGEN_STRONG_INLINE void Eigen::internal::pstore1< Packet4f > | ( | float * | to, |
const float & | a | ||
) |
Definition at line 395 of file SSE/PacketMath.h.
{ Packet4f pa = _mm_set_ss(a); pstore(to, Packet4f(vec4f_swizzle1(pa,0,0,0,0))); }
EIGEN_STRONG_INLINE void Eigen::internal::pstore1< Packet8f > | ( | float * | to, |
const float & | a | ||
) |
Definition at line 288 of file AVX/PacketMath.h.
{ Packet8f pa = pset1<Packet8f>(a); pstore(to, pa); }
EIGEN_STRONG_INLINE void Eigen::internal::pstore1< Packet8i > | ( | int * | to, |
const int & | a | ||
) |
Definition at line 298 of file AVX/PacketMath.h.
{ Packet8i pa = pset1<Packet8i>(a); pstore(to, pa); }
EIGEN_STRONG_INLINE void Eigen::internal::pstore< double > | ( | double * | to, |
const Packet4d & | from | ||
) |
Definition at line 245 of file AVX/PacketMath.h.
{ EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd(to, from); }
EIGEN_STRONG_INLINE void Eigen::internal::pstore< double > | ( | double * | to, |
const Packet2d & | from | ||
) |
Definition at line 354 of file SSE/PacketMath.h.
{ EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
EIGEN_STRONG_INLINE void Eigen::internal::pstore< float > | ( | float * | to, |
const Packet4f & | from | ||
) |
Definition at line 220 of file AltiVec/PacketMath.h.
{ EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
EIGEN_STRONG_INLINE void Eigen::internal::pstore< float > | ( | float * | to, |
const Packet8f & | from | ||
) |
Definition at line 244 of file AVX/PacketMath.h.
{ EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(to, from); }
EIGEN_STRONG_INLINE void Eigen::internal::pstore< int > | ( | int * | to, |
const Packet4i & | from | ||
) |
Definition at line 221 of file AltiVec/PacketMath.h.
{ EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
EIGEN_STRONG_INLINE void Eigen::internal::pstore< int > | ( | int * | to, |
const Packet8i & | from | ||
) |
Definition at line 246 of file AVX/PacketMath.h.
{ EIGEN_DEBUG_ALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
EIGEN_STRONG_INLINE void Eigen::internal::pstore< std::complex< double > > | ( | std::complex< double > * | to, |
const Packet2cd & | from | ||
) |
Definition at line 310 of file AVX/Complex.h.
{ EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
EIGEN_STRONG_INLINE void Eigen::internal::pstore< std::complex< double > > | ( | std::complex< double > * | to, |
const Packet1cd & | from | ||
) |
Definition at line 340 of file SSE/Complex.h.
{ EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
EIGEN_STRONG_INLINE void Eigen::internal::pstore< std::complex< float > > | ( | std::complex< float > * | to, |
const Packet4cf & | from | ||
) |
Definition at line 92 of file AVX/Complex.h.
{ EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
EIGEN_STRONG_INLINE void Eigen::internal::pstore< std::complex< float > > | ( | std::complex< float > * | to, |
const Packet2cf & | from | ||
) |
Definition at line 123 of file AltiVec/Complex.h.
{ EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void Eigen::internal::pstoret | ( | Scalar * | to, |
const Packet & | from | ||
) |
copy the packet from to *to. The pointer from must be aligned on a Alignment bytes boundary.
Definition at line 493 of file GenericPacketMath.h.
EIGEN_DEVICE_FUNC void Eigen::internal::pstoreu | ( | Scalar * | to, |
const Packet & | from | ||
) | [inline] |
copy the packet from to *to, (un-aligned store)
Definition at line 283 of file GenericPacketMath.h.
{ (*to) = from; }
EIGEN_STRONG_INLINE void Eigen::internal::pstoreu< double > | ( | double * | to, |
const Packet4d & | from | ||
) |
Definition at line 249 of file AVX/PacketMath.h.
{ EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd(to, from); }
EIGEN_STRONG_INLINE void Eigen::internal::pstoreu< double > | ( | double * | to, |
const Packet2d & | from | ||
) |
Definition at line 357 of file SSE/PacketMath.h.
{ EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd(to, from); }
EIGEN_STRONG_INLINE void Eigen::internal::pstoreu< float > | ( | float * | to, |
const Packet8f & | from | ||
) |
Definition at line 248 of file AVX/PacketMath.h.
{ EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(to, from); }
EIGEN_STRONG_INLINE void Eigen::internal::pstoreu< float > | ( | float * | to, |
const Packet4f & | from | ||
) |
Definition at line 490 of file AltiVec/PacketMath.h.
{ EIGEN_DEBUG_ALIGNED_STORE vec_vsx_st(from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to)); }
EIGEN_STRONG_INLINE void Eigen::internal::pstoreu< int > | ( | int * | to, |
const Packet8i & | from | ||
) |
Definition at line 250 of file AVX/PacketMath.h.
{ EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
EIGEN_STRONG_INLINE void Eigen::internal::pstoreu< int > | ( | int * | to, |
const Packet4i & | from | ||
) |
Definition at line 485 of file AltiVec/PacketMath.h.
{ EIGEN_DEBUG_ALIGNED_STORE vec_vsx_st(from, (long)to & 15, (int*) _EIGEN_ALIGNED_PTR(to)); }
EIGEN_STRONG_INLINE void Eigen::internal::pstoreu< std::complex< double > > | ( | std::complex< double > * | to, |
const Packet2cd & | from | ||
) |
Definition at line 311 of file AVX/Complex.h.
{ EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
EIGEN_STRONG_INLINE void Eigen::internal::pstoreu< std::complex< double > > | ( | std::complex< double > * | to, |
const Packet1cd & | from | ||
) |
Definition at line 341 of file SSE/Complex.h.
{ EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
EIGEN_STRONG_INLINE void Eigen::internal::pstoreu< std::complex< float > > | ( | std::complex< float > * | to, |
const Packet4cf & | from | ||
) |
Definition at line 93 of file AVX/Complex.h.
{ EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }
EIGEN_STRONG_INLINE void Eigen::internal::pstoreu< std::complex< float > > | ( | std::complex< float > * | to, |
const Packet2cf & | from | ||
) |
Definition at line 124 of file AltiVec/Complex.h.
{ EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
EIGEN_DEVICE_FUNC Packet Eigen::internal::psub | ( | const Packet & | a, |
const Packet & | b | ||
) | [inline] |
EIGEN_STRONG_INLINE Packet1cd Eigen::internal::psub< Packet1cd > | ( | const Packet1cd & | a, |
const Packet1cd & | b | ||
) |
Definition at line 302 of file SSE/Complex.h.
{ return Packet1cd(_mm_sub_pd(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet2cd Eigen::internal::psub< Packet2cd > | ( | const Packet2cd & | a, |
const Packet2cd & | b | ||
) |
Definition at line 273 of file AVX/Complex.h.
{ return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::psub< Packet2cf > | ( | const Packet2cf & | a, |
const Packet2cf & | b | ||
) |
Definition at line 87 of file AltiVec/Complex.h.
{ return Packet2cf(vec_sub(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::psub< Packet2d > | ( | const Packet2d & | a, |
const Packet2d & | b | ||
) |
Definition at line 198 of file SSE/PacketMath.h.
{ return _mm_sub_pd(a,b); }
EIGEN_STRONG_INLINE Packet4cf Eigen::internal::psub< Packet4cf > | ( | const Packet4cf & | a, |
const Packet4cf & | b | ||
) |
Definition at line 51 of file AVX/Complex.h.
{ return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::psub< Packet4d > | ( | const Packet4d & | a, |
const Packet4d & | b | ||
) |
Definition at line 128 of file AVX/PacketMath.h.
{ return _mm256_sub_pd(a,b); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::psub< Packet4f > | ( | const Packet4f & | a, |
const Packet4f & | b | ||
) |
Definition at line 303 of file AltiVec/PacketMath.h.
{ return vec_sub(a,b); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::psub< Packet4i > | ( | const Packet4i & | a, |
const Packet4i & | b | ||
) |
Definition at line 304 of file AltiVec/PacketMath.h.
{ return vec_sub(a,b); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::psub< Packet8f > | ( | const Packet8f & | a, |
const Packet8f & | b | ||
) |
Definition at line 127 of file AVX/PacketMath.h.
{ return _mm256_sub_ps(a,b); }
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::ptan | ( | const Packet & | a | ) |
Definition at line 384 of file GenericPacketMath.h.
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet Eigen::internal::ptanh | ( | const Packet & | a | ) |
Definition at line 408 of file GenericPacketMath.h.
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f Eigen::internal::ptanh< Packet4f > | ( | const Packet4f & | _x | ) |
Definition at line 525 of file arch/SSE/MathFunctions.h.
{ // Clamp the inputs to the range [-9, 9] since anything outside // this range is +/-1.0f in single-precision. _EIGEN_DECLARE_CONST_Packet4f(plus_9, 9.0f); _EIGEN_DECLARE_CONST_Packet4f(minus_9, -9.0f); const Packet4f x = pmax(p4f_minus_9, pmin(p4f_plus_9, _x)); // The monomial coefficients of the numerator polynomial (odd). _EIGEN_DECLARE_CONST_Packet4f(alpha_1, 4.89352455891786e-03f); _EIGEN_DECLARE_CONST_Packet4f(alpha_3, 6.37261928875436e-04f); _EIGEN_DECLARE_CONST_Packet4f(alpha_5, 1.48572235717979e-05f); _EIGEN_DECLARE_CONST_Packet4f(alpha_7, 5.12229709037114e-08f); _EIGEN_DECLARE_CONST_Packet4f(alpha_9, -8.60467152213735e-11f); _EIGEN_DECLARE_CONST_Packet4f(alpha_11, 2.00018790482477e-13f); _EIGEN_DECLARE_CONST_Packet4f(alpha_13, -2.76076847742355e-16f); // The monomial coefficients of the denominator polynomial (even). _EIGEN_DECLARE_CONST_Packet4f(beta_0, 4.89352518554385e-03f); _EIGEN_DECLARE_CONST_Packet4f(beta_2, 2.26843463243900e-03f); _EIGEN_DECLARE_CONST_Packet4f(beta_4, 1.18534705686654e-04f); _EIGEN_DECLARE_CONST_Packet4f(beta_6, 1.19825839466702e-06f); // Since the polynomials are odd/even, we need x^2. const Packet4f x2 = pmul(x, x); // Evaluate the numerator polynomial p. Packet4f p = pmadd(x2, p4f_alpha_13, p4f_alpha_11); p = pmadd(x2, p, p4f_alpha_9); p = pmadd(x2, p, p4f_alpha_7); p = pmadd(x2, p, p4f_alpha_5); p = pmadd(x2, p, p4f_alpha_3); p = pmadd(x2, p, p4f_alpha_1); p = pmul(x, p); // Evaluate the denominator polynomial p. Packet4f q = pmadd(x2, p4f_beta_6, p4f_beta_4); q = pmadd(x2, q, p4f_beta_2); q = pmadd(x2, q, p4f_beta_0); // Divide the numerator by the denominator. return pdiv(p, q); }
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f Eigen::internal::ptanh< Packet8f > | ( | const Packet8f & | _x | ) |
Definition at line 274 of file arch/AVX/MathFunctions.h.
{ // Clamp the inputs to the range [-9, 9] since anything outside // this range is +/-1.0f in single-precision. _EIGEN_DECLARE_CONST_Packet8f(plus_9, 9.0f); _EIGEN_DECLARE_CONST_Packet8f(minus_9, -9.0f); const Packet8f x = pmax(p8f_minus_9, pmin(p8f_plus_9, _x)); // The monomial coefficients of the numerator polynomial (odd). _EIGEN_DECLARE_CONST_Packet8f(alpha_1, 4.89352455891786e-03f); _EIGEN_DECLARE_CONST_Packet8f(alpha_3, 6.37261928875436e-04f); _EIGEN_DECLARE_CONST_Packet8f(alpha_5, 1.48572235717979e-05f); _EIGEN_DECLARE_CONST_Packet8f(alpha_7, 5.12229709037114e-08f); _EIGEN_DECLARE_CONST_Packet8f(alpha_9, -8.60467152213735e-11f); _EIGEN_DECLARE_CONST_Packet8f(alpha_11, 2.00018790482477e-13f); _EIGEN_DECLARE_CONST_Packet8f(alpha_13, -2.76076847742355e-16f); // The monomial coefficients of the denominator polynomial (even). _EIGEN_DECLARE_CONST_Packet8f(beta_0, 4.89352518554385e-03f); _EIGEN_DECLARE_CONST_Packet8f(beta_2, 2.26843463243900e-03f); _EIGEN_DECLARE_CONST_Packet8f(beta_4, 1.18534705686654e-04f); _EIGEN_DECLARE_CONST_Packet8f(beta_6, 1.19825839466702e-06f); // Since the polynomials are odd/even, we need x^2. const Packet8f x2 = pmul(x, x); // Evaluate the numerator polynomial p. Packet8f p = pmadd(x2, p8f_alpha_13, p8f_alpha_11); p = pmadd(x2, p, p8f_alpha_9); p = pmadd(x2, p, p8f_alpha_7); p = pmadd(x2, p, p8f_alpha_5); p = pmadd(x2, p, p8f_alpha_3); p = pmadd(x2, p, p8f_alpha_1); p = pmul(x, p); // Evaluate the denominator polynomial p. Packet8f q = pmadd(x2, p8f_beta_6, p8f_beta_4); q = pmadd(x2, q, p8f_beta_2); q = pmadd(x2, q, p8f_beta_0); // Divide the numerator by the denominator. return pdiv(p, q); }
EIGEN_DEVICE_FUNC void Eigen::internal::ptranspose | ( | PacketBlock< Packet2cf, 2 > & | kernel | ) | [inline] |
Definition at line 239 of file AltiVec/Complex.h.
{ Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI); kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO); kernel.packet[0].v = tmp; }
EIGEN_DEVICE_FUNC void Eigen::internal::ptranspose | ( | PacketBlock< Packet4cf, 4 > & | kernel | ) | [inline] |
Definition at line 435 of file AVX/Complex.h.
{ __m256d P0 = _mm256_castps_pd(kernel.packet[0].v); __m256d P1 = _mm256_castps_pd(kernel.packet[1].v); __m256d P2 = _mm256_castps_pd(kernel.packet[2].v); __m256d P3 = _mm256_castps_pd(kernel.packet[3].v); __m256d T0 = _mm256_shuffle_pd(P0, P1, 15); __m256d T1 = _mm256_shuffle_pd(P0, P1, 0); __m256d T2 = _mm256_shuffle_pd(P2, P3, 15); __m256d T3 = _mm256_shuffle_pd(P2, P3, 0); kernel.packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32)); kernel.packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49)); kernel.packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32)); kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49)); }
EIGEN_DEVICE_FUNC void Eigen::internal::ptranspose | ( | PacketBlock< Packet2cd, 2 > & | kernel | ) | [inline] |
Definition at line 453 of file AVX/Complex.h.
{ __m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0+(2<<4)); kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1+(3<<4)); kernel.packet[0].v = tmp; }
EIGEN_DEVICE_FUNC void Eigen::internal::ptranspose | ( | PacketBlock< Packet8f, 8 > & | kernel | ) | [inline] |
Definition at line 533 of file AVX/PacketMath.h.
{ __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]); __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]); __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]); __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]); __m256 T4 = _mm256_unpacklo_ps(kernel.packet[4], kernel.packet[5]); __m256 T5 = _mm256_unpackhi_ps(kernel.packet[4], kernel.packet[5]); __m256 T6 = _mm256_unpacklo_ps(kernel.packet[6], kernel.packet[7]); __m256 T7 = _mm256_unpackhi_ps(kernel.packet[6], kernel.packet[7]); __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0)); __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2)); __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0)); __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2)); __m256 S4 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(1,0,1,0)); __m256 S5 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(3,2,3,2)); __m256 S6 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(1,0,1,0)); __m256 S7 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(3,2,3,2)); kernel.packet[0] = _mm256_permute2f128_ps(S0, S4, 0x20); kernel.packet[1] = _mm256_permute2f128_ps(S1, S5, 0x20); kernel.packet[2] = _mm256_permute2f128_ps(S2, S6, 0x20); kernel.packet[3] = _mm256_permute2f128_ps(S3, S7, 0x20); kernel.packet[4] = _mm256_permute2f128_ps(S0, S4, 0x31); kernel.packet[5] = _mm256_permute2f128_ps(S1, S5, 0x31); kernel.packet[6] = _mm256_permute2f128_ps(S2, S6, 0x31); kernel.packet[7] = _mm256_permute2f128_ps(S3, S7, 0x31); }
EIGEN_DEVICE_FUNC void Eigen::internal::ptranspose | ( | PacketBlock< Packet8f, 4 > & | kernel | ) | [inline] |
Definition at line 561 of file AVX/PacketMath.h.
{ __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]); __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]); __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]); __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]); __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0)); __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2)); __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0)); __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2)); kernel.packet[0] = _mm256_permute2f128_ps(S0, S1, 0x20); kernel.packet[1] = _mm256_permute2f128_ps(S2, S3, 0x20); kernel.packet[2] = _mm256_permute2f128_ps(S0, S1, 0x31); kernel.packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31); }
EIGEN_DEVICE_FUNC void Eigen::internal::ptranspose | ( | PacketBlock< Packet, 1 > & | ) | [inline] |
Definition at line 566 of file GenericPacketMath.h.
{
// Nothing to do in the scalar case, i.e. a 1x1 matrix.
}
EIGEN_DEVICE_FUNC void Eigen::internal::ptranspose | ( | PacketBlock< Packet4d, 4 > & | kernel | ) | [inline] |
Definition at line 579 of file AVX/PacketMath.h.
{ __m256d T0 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 15); __m256d T1 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 0); __m256d T2 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 15); __m256d T3 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 0); kernel.packet[1] = _mm256_permute2f128_pd(T0, T2, 32); kernel.packet[3] = _mm256_permute2f128_pd(T0, T2, 49); kernel.packet[0] = _mm256_permute2f128_pd(T1, T3, 32); kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49); }
EIGEN_DEVICE_FUNC void Eigen::internal::ptranspose | ( | PacketBlock< Packet4f, 4 > & | kernel | ) | [inline] |
Definition at line 694 of file AltiVec/PacketMath.h.
{ Packet4f t0, t1, t2, t3; t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]); t1 = vec_mergel(kernel.packet[0], kernel.packet[2]); t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]); t3 = vec_mergel(kernel.packet[1], kernel.packet[3]); kernel.packet[0] = vec_mergeh(t0, t2); kernel.packet[1] = vec_mergel(t0, t2); kernel.packet[2] = vec_mergeh(t1, t3); kernel.packet[3] = vec_mergel(t1, t3); }
EIGEN_DEVICE_FUNC void Eigen::internal::ptranspose | ( | PacketBlock< Packet4i, 4 > & | kernel | ) | [inline] |
Definition at line 707 of file AltiVec/PacketMath.h.
{ Packet4i t0, t1, t2, t3; t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]); t1 = vec_mergel(kernel.packet[0], kernel.packet[2]); t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]); t3 = vec_mergel(kernel.packet[1], kernel.packet[3]); kernel.packet[0] = vec_mergeh(t0, t2); kernel.packet[1] = vec_mergel(t0, t2); kernel.packet[2] = vec_mergeh(t1, t3); kernel.packet[3] = vec_mergel(t1, t3); }
EIGEN_DEVICE_FUNC void Eigen::internal::ptranspose | ( | PacketBlock< Packet2d, 2 > & | kernel | ) | [inline] |
Definition at line 790 of file SSE/PacketMath.h.
{ __m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]); kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]); kernel.packet[1] = tmp; }
EIGEN_STRONG_INLINE void Eigen::internal::punpackp | ( | Packet4f * | vecs | ) |
Definition at line 513 of file SSE/PacketMath.h.
{ vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55)); vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA)); vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF)); vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00)); }
EIGEN_DEVICE_FUNC Packet Eigen::internal::pxor | ( | const Packet & | a, |
const Packet & | b | ||
) | [inline] |
Definition at line 200 of file GenericPacketMath.h.
{ return a ^ b; }
EIGEN_STRONG_INLINE Packet1cd Eigen::internal::pxor< Packet1cd > | ( | const Packet1cd & | a, |
const Packet1cd & | b | ||
) |
Definition at line 326 of file SSE/Complex.h.
{ return Packet1cd(_mm_xor_pd(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet2cd Eigen::internal::pxor< Packet2cd > | ( | const Packet2cd & | a, |
const Packet2cd & | b | ||
) |
Definition at line 293 of file AVX/Complex.h.
{ return Packet2cd(_mm256_xor_pd(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet2cf Eigen::internal::pxor< Packet2cf > | ( | const Packet2cf & | a, |
const Packet2cf & | b | ||
) |
Definition at line 112 of file AltiVec/Complex.h.
{ return Packet2cf(vec_xor(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet2d Eigen::internal::pxor< Packet2d > | ( | const Packet2d & | a, |
const Packet2d & | b | ||
) |
Definition at line 294 of file SSE/PacketMath.h.
{ return _mm_xor_pd(a,b); }
EIGEN_STRONG_INLINE Packet4cf Eigen::internal::pxor< Packet4cf > | ( | const Packet4cf & | a, |
const Packet4cf & | b | ||
) |
Definition at line 72 of file AVX/Complex.h.
{ return Packet4cf(_mm256_xor_ps(a.v,b.v)); }
EIGEN_STRONG_INLINE Packet4d Eigen::internal::pxor< Packet4d > | ( | const Packet4d & | a, |
const Packet4d & | b | ||
) |
Definition at line 202 of file AVX/PacketMath.h.
{ return _mm256_xor_pd(a,b); }
EIGEN_STRONG_INLINE Packet4f Eigen::internal::pxor< Packet4f > | ( | const Packet4f & | a, |
const Packet4f & | b | ||
) |
Definition at line 388 of file AltiVec/PacketMath.h.
{ return vec_xor(a, b); }
EIGEN_STRONG_INLINE Packet4i Eigen::internal::pxor< Packet4i > | ( | const Packet4i & | a, |
const Packet4i & | b | ||
) |
Definition at line 389 of file AltiVec/PacketMath.h.
{ return vec_xor(a, b); }
EIGEN_STRONG_INLINE Packet8f Eigen::internal::pxor< Packet8f > | ( | const Packet8f & | a, |
const Packet8f & | b | ||
) |
Definition at line 201 of file AVX/PacketMath.h.
{ return _mm256_xor_ps(a,b); }
void Eigen::internal::queryCacheSizes | ( | int & | l1, |
int & | l2, | ||
int & | l3 | ||
) | [inline] |
Queries and returns the cache sizes in Bytes of the L1, L2, and L3 data caches respectively
Definition at line 917 of file Memory.h.
{ #ifdef EIGEN_CPUID int abcd[4]; const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e}; const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163}; const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!" // identify the CPU vendor EIGEN_CPUID(abcd,0x0,0); int max_std_funcs = abcd[1]; if(cpuid_is_vendor(abcd,GenuineIntel)) queryCacheSizes_intel(l1,l2,l3,max_std_funcs); else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_)) queryCacheSizes_amd(l1,l2,l3); else // by default let's use Intel's API queryCacheSizes_intel(l1,l2,l3,max_std_funcs); // here is the list of other vendors: // ||cpuid_is_vendor(abcd,"VIA VIA VIA ") // ||cpuid_is_vendor(abcd,"CyrixInstead") // ||cpuid_is_vendor(abcd,"CentaurHauls") // ||cpuid_is_vendor(abcd,"GenuineTMx86") // ||cpuid_is_vendor(abcd,"TransmetaCPU") // ||cpuid_is_vendor(abcd,"RiseRiseRise") // ||cpuid_is_vendor(abcd,"Geode by NSC") // ||cpuid_is_vendor(abcd,"SiS SiS SiS ") // ||cpuid_is_vendor(abcd,"UMC UMC UMC ") // ||cpuid_is_vendor(abcd,"NexGenDriven") #else l1 = l2 = l3 = -1; #endif }
int Eigen::internal::queryL1CacheSize | ( | ) | [inline] |
Definition at line 954 of file Memory.h.
{ int l1(-1), l2, l3; queryCacheSizes(l1,l2,l3); return l1; }
int Eigen::internal::queryTopLevelCacheSize | ( | ) | [inline] |
Definition at line 963 of file Memory.h.
{ int l1, l2(-1), l3(-1); queryCacheSizes(l1,l2,l3); return (std::max)(l2,l3); }
Index Eigen::internal::QuickSplit | ( | VectorV & | row, |
VectorI & | ind, | ||
Index | ncut | ||
) |
Compute a quick-sort split of a vector On output, the vector row is permuted such that its elements satisfy abs(row(i)) >= abs(row(ncut)) if i<ncut abs(row(i)) <= abs(row(ncut)) if i>ncut
row | The vector of values |
ind | The array of index for the elements in row |
ncut | The number of largest elements to keep |
Definition at line 29 of file IncompleteLUT.h.
{ typedef typename VectorV::RealScalar RealScalar; using std::swap; using std::abs; Index mid; Index n = row.size(); /* length of the vector */ Index first, last ; ncut--; /* to fit the zero-based indices */ first = 0; last = n-1; if (ncut < first || ncut > last ) return 0; do { mid = first; RealScalar abskey = abs(row(mid)); for (Index j = first + 1; j <= last; j++) { if ( abs(row(j)) > abskey) { ++mid; swap(row(mid), row(j)); swap(ind(mid), ind(j)); } } /* Interchange for the pivot element */ swap(row(mid), row(first)); swap(ind(mid), ind(first)); if (mid > ncut) last = mid - 1; else if (mid < ncut ) first = mid + 1; } while (mid != ncut ); return 0; /* mid is equal to ncut */ }
void Eigen::internal::real_2x2_jacobi_svd | ( | const MatrixType & | matrix, |
Index | p, | ||
Index | q, | ||
JacobiRotation< RealScalar > * | j_left, | ||
JacobiRotation< RealScalar > * | j_right | ||
) |
Definition at line 405 of file JacobiSVD.h.
{ using std::sqrt; using std::abs; Matrix<RealScalar,2,2> m; m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)), numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q)); JacobiRotation<RealScalar> rot1; RealScalar t = m.coeff(0,0) + m.coeff(1,1); RealScalar d = m.coeff(1,0) - m.coeff(0,1); if(d == RealScalar(0)) { rot1.s() = RealScalar(0); rot1.c() = RealScalar(1); } else { // If d!=0, then t/d cannot overflow because the magnitude of the // entries forming d are not too small compared to the ones forming t. RealScalar u = t / d; RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u)); rot1.s() = RealScalar(1) / tmp; rot1.c() = u / tmp; } m.applyOnTheLeft(0,1,rot1); j_right->makeJacobi(m,0,1); *j_left = rot1 * j_right->transpose(); }
void Eigen::internal::set_from_triplets | ( | const InputIterator & | begin, |
const InputIterator & | end, | ||
SparseMatrixType & | mat, | ||
DupFunctor | dup_func | ||
) |
Definition at line 907 of file SparseMatrix.h.
{ enum { IsRowMajor = SparseMatrixType::IsRowMajor }; typedef typename SparseMatrixType::Scalar Scalar; typedef typename SparseMatrixType::StorageIndex StorageIndex; SparseMatrix<Scalar,IsRowMajor?ColMajor:RowMajor,StorageIndex> trMat(mat.rows(),mat.cols()); if(begin!=end) { // pass 1: count the nnz per inner-vector typename SparseMatrixType::IndexVector wi(trMat.outerSize()); wi.setZero(); for(InputIterator it(begin); it!=end; ++it) { eigen_assert(it->row()>=0 && it->row()<mat.rows() && it->col()>=0 && it->col()<mat.cols()); wi(IsRowMajor ? it->col() : it->row())++; } // pass 2: insert all the elements into trMat trMat.reserve(wi); for(InputIterator it(begin); it!=end; ++it) trMat.insertBackUncompressed(it->row(),it->col()) = it->value(); // pass 3: trMat.collapseDuplicates(dup_func); } // pass 4: transposed copy -> implicit sorting mat = trMat; }
EIGEN_DEVICE_FUNC void Eigen::internal::smart_copy | ( | const T * | start, |
const T * | end, | ||
T * | target | ||
) |
void Eigen::internal::smart_memmove | ( | const T * | start, |
const T * | end, | ||
T * | target | ||
) |
void Eigen::internal::solve_sparse_through_dense_panels | ( | const Decomposition & | dec, |
const Rhs & | rhs, | ||
Dest & | dest | ||
) |
Helper functions to solve with a sparse right-hand-side and result. The rhs is decomposed into small vertical panels which are solved through dense temporaries.
Definition at line 22 of file SparseSolverBase.h.
{ EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); typedef typename Dest::Scalar DestScalar; // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix. static const Index NbColsAtOnce = 4; Index rhsCols = rhs.cols(); Index size = rhs.rows(); // the temporary matrices do not need more columns than NbColsAtOnce: Index tmpCols = (std::min)(rhsCols, NbColsAtOnce); Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,tmpCols); Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmpX(size,tmpCols); for(Index k=0; k<rhsCols; k+=NbColsAtOnce) { Index actualCols = std::min<Index>(rhsCols-k, NbColsAtOnce); tmp.leftCols(actualCols) = rhs.middleCols(k,actualCols); tmpX.leftCols(actualCols) = dec.solve(tmp.leftCols(actualCols)); dest.middleCols(k,actualCols) = tmpX.leftCols(actualCols).sparseView(); } }
void Eigen::internal::sparse_selfadjoint_time_dense_product | ( | const SparseLhsType & | lhs, |
const DenseRhsType & | rhs, | ||
DenseResType & | res, | ||
const AlphaType & | alpha | ||
) | [inline] |
Definition at line 250 of file SparseSelfAdjointView.h.
{ EIGEN_ONLY_USED_FOR_DEBUG(alpha); // TODO use alpha eigen_assert(alpha==AlphaType(1) && "alpha != 1 is not implemented yet, sorry"); typedef evaluator<SparseLhsType> LhsEval; typedef typename evaluator<SparseLhsType>::InnerIterator LhsIterator; typedef typename SparseLhsType::Scalar LhsScalar; enum { LhsIsRowMajor = (LhsEval::Flags&RowMajorBit)==RowMajorBit, ProcessFirstHalf = ((Mode&(Upper|Lower))==(Upper|Lower)) || ( (Mode&Upper) && !LhsIsRowMajor) || ( (Mode&Lower) && LhsIsRowMajor), ProcessSecondHalf = !ProcessFirstHalf }; LhsEval lhsEval(lhs); for (Index j=0; j<lhs.outerSize(); ++j) { LhsIterator i(lhsEval,j); if (ProcessSecondHalf) { while (i && i.index()<j) ++i; if(i && i.index()==j) { res.row(j) += i.value() * rhs.row(j); ++i; } } for(; (ProcessFirstHalf ? i && i.index() < j : i) ; ++i) { Index a = LhsIsRowMajor ? j : i.index(); Index b = LhsIsRowMajor ? i.index() : j; LhsScalar v = i.value(); res.row(a) += (v) * rhs.row(b); res.row(b) += numext::conj(v) * rhs.row(a); } if (ProcessFirstHalf && i && (i.index()==j)) res.row(j) += i.value() * rhs.row(j); } }
static void Eigen::internal::sparse_sparse_product_with_pruning_impl | ( | const Lhs & | lhs, |
const Rhs & | rhs, | ||
ResultType & | res, | ||
const typename ResultType::RealScalar & | tolerance | ||
) | [static] |
Definition at line 20 of file SparseSparseProductWithPruning.h.
{ // return sparse_sparse_product_with_pruning_impl2(lhs,rhs,res); typedef typename remove_all<Lhs>::type::Scalar Scalar; typedef typename remove_all<Lhs>::type::StorageIndex StorageIndex; // make sure to call innerSize/outerSize since we fake the storage order. Index rows = lhs.innerSize(); Index cols = rhs.outerSize(); //Index size = lhs.outerSize(); eigen_assert(lhs.outerSize() == rhs.innerSize()); // allocate a temporary buffer AmbiVector<Scalar,StorageIndex> tempVector(rows); // mimics a resizeByInnerOuter: if(ResultType::IsRowMajor) res.resize(cols, rows); else res.resize(rows, cols); evaluator<Lhs> lhsEval(lhs); evaluator<Rhs> rhsEval(rhs); // estimate the number of non zero entries // given a rhs column containing Y non zeros, we assume that the respective Y columns // of the lhs differs in average of one non zeros, thus the number of non zeros for // the product of a rhs column with the lhs is X+Y where X is the average number of non zero // per column of the lhs. // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate(); res.reserve(estimated_nnz_prod); double ratioColRes = double(estimated_nnz_prod)/double(lhs.rows()*rhs.cols()); for (Index j=0; j<cols; ++j) { // FIXME: //double ratioColRes = (double(rhs.innerVector(j).nonZeros()) + double(lhs.nonZeros())/double(lhs.cols()))/double(lhs.rows()); // let's do a more accurate determination of the nnz ratio for the current column j of res tempVector.init(ratioColRes); tempVector.setZero(); for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) { // FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index()) tempVector.restart(); Scalar x = rhsIt.value(); for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, rhsIt.index()); lhsIt; ++lhsIt) { tempVector.coeffRef(lhsIt.index()) += lhsIt.value() * x; } } res.startVec(j); for (typename AmbiVector<Scalar,StorageIndex>::Iterator it(tempVector,tolerance); it; ++it) res.insertBackByOuterInner(j,it.index()) = it.value(); } res.finalize(); }
static void Eigen::internal::sparse_sparse_to_dense_product_impl | ( | const Lhs & | lhs, |
const Rhs & | rhs, | ||
ResultType & | res | ||
) | [static] |
Definition at line 264 of file ConservativeSparseSparseProduct.h.
{ typedef typename remove_all<Lhs>::type::Scalar Scalar; Index cols = rhs.outerSize(); eigen_assert(lhs.outerSize() == rhs.innerSize()); evaluator<Lhs> lhsEval(lhs); evaluator<Rhs> rhsEval(rhs); for (Index j=0; j<cols; ++j) { for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) { Scalar y = rhsIt.value(); Index k = rhsIt.index(); for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) { Index i = lhsIt.index(); Scalar x = lhsIt.value(); res.coeffRef(i,j) += x * y; } } } }
void Eigen::internal::sparse_time_dense_product | ( | const SparseLhsType & | lhs, |
const DenseRhsType & | rhs, | ||
DenseResType & | res, | ||
const AlphaType & | alpha | ||
) | [inline] |
Definition at line 145 of file SparseDenseProduct.h.
{ sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, AlphaType>::run(lhs, rhs, res, alpha); }
EIGEN_DONT_INLINE void Eigen::internal::sparselu_gemm | ( | Index | m, |
Index | n, | ||
Index | d, | ||
const Scalar * | A, | ||
Index | lda, | ||
const Scalar * | B, | ||
Index | ldb, | ||
Scalar * | C, | ||
Index | ldc | ||
) |
A general matrix-matrix product kernel optimized for the SparseLU factorization.
Definition at line 26 of file SparseLU_gemm_kernel.h.
{ using namespace Eigen::internal; typedef typename packet_traits<Scalar>::type Packet; enum { NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, PacketSize = packet_traits<Scalar>::size, PM = 8, // peeling in M RN = 2, // register blocking RK = NumberOfRegisters>=16 ? 4 : 2, // register blocking BM = 4096/sizeof(Scalar), // number of rows of A-C per chunk SM = PM*PacketSize // step along M }; Index d_end = (d/RK)*RK; // number of columns of A (rows of B) suitable for full register blocking Index n_end = (n/RN)*RN; // number of columns of B-C suitable for processing RN columns at once Index i0 = internal::first_default_aligned(A,m); eigen_internal_assert(((lda%PacketSize)==0) && ((ldc%PacketSize)==0) && (i0==internal::first_default_aligned(C,m))); // handle the non aligned rows of A and C without any optimization: for(Index i=0; i<i0; ++i) { for(Index j=0; j<n; ++j) { Scalar c = C[i+j*ldc]; for(Index k=0; k<d; ++k) c += B[k+j*ldb] * A[i+k*lda]; C[i+j*ldc] = c; } } // process the remaining rows per chunk of BM rows for(Index ib=i0; ib<m; ib+=BM) { Index actual_b = std::min<Index>(BM, m-ib); // actual number of rows Index actual_b_end1 = (actual_b/SM)*SM; // actual number of rows suitable for peeling Index actual_b_end2 = (actual_b/PacketSize)*PacketSize; // actual number of rows suitable for vectorization // Let's process two columns of B-C at once for(Index j=0; j<n_end; j+=RN) { const Scalar* Bc0 = B+(j+0)*ldb; const Scalar* Bc1 = B+(j+1)*ldb; for(Index k=0; k<d_end; k+=RK) { // load and expand a RN x RK block of B Packet b00, b10, b20, b30, b01, b11, b21, b31; b00 = pset1<Packet>(Bc0[0]); b10 = pset1<Packet>(Bc0[1]); if(RK==4) b20 = pset1<Packet>(Bc0[2]); if(RK==4) b30 = pset1<Packet>(Bc0[3]); b01 = pset1<Packet>(Bc1[0]); b11 = pset1<Packet>(Bc1[1]); if(RK==4) b21 = pset1<Packet>(Bc1[2]); if(RK==4) b31 = pset1<Packet>(Bc1[3]); Packet a0, a1, a2, a3, c0, c1, t0, t1; const Scalar* A0 = A+ib+(k+0)*lda; const Scalar* A1 = A+ib+(k+1)*lda; const Scalar* A2 = A+ib+(k+2)*lda; const Scalar* A3 = A+ib+(k+3)*lda; Scalar* C0 = C+ib+(j+0)*ldc; Scalar* C1 = C+ib+(j+1)*ldc; a0 = pload<Packet>(A0); a1 = pload<Packet>(A1); if(RK==4) { a2 = pload<Packet>(A2); a3 = pload<Packet>(A3); } else { // workaround "may be used uninitialized in this function" warning a2 = a3 = a0; } #define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);} #define WORK(I) \ c0 = pload<Packet>(C0+i+(I)*PacketSize); \ c1 = pload<Packet>(C1+i+(I)*PacketSize); \ KMADD(c0, a0, b00, t0) \ KMADD(c1, a0, b01, t1) \ a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \ KMADD(c0, a1, b10, t0) \ KMADD(c1, a1, b11, t1) \ a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \ if(RK==4) KMADD(c0, a2, b20, t0) \ if(RK==4) KMADD(c1, a2, b21, t1) \ if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \ if(RK==4) KMADD(c0, a3, b30, t0) \ if(RK==4) KMADD(c1, a3, b31, t1) \ if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \ pstore(C0+i+(I)*PacketSize, c0); \ pstore(C1+i+(I)*PacketSize, c1) // process rows of A' - C' with aggressive vectorization and peeling for(Index i=0; i<actual_b_end1; i+=PacketSize*8) { EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL1"); prefetch((A0+i+(5)*PacketSize)); prefetch((A1+i+(5)*PacketSize)); if(RK==4) prefetch((A2+i+(5)*PacketSize)); if(RK==4) prefetch((A3+i+(5)*PacketSize)); WORK(0); WORK(1); WORK(2); WORK(3); WORK(4); WORK(5); WORK(6); WORK(7); } // process the remaining rows with vectorization only for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize) { WORK(0); } #undef WORK // process the remaining rows without vectorization for(Index i=actual_b_end2; i<actual_b; ++i) { if(RK==4) { C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3]; C1[i] += A0[i]*Bc1[0]+A1[i]*Bc1[1]+A2[i]*Bc1[2]+A3[i]*Bc1[3]; } else { C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]; C1[i] += A0[i]*Bc1[0]+A1[i]*Bc1[1]; } } Bc0 += RK; Bc1 += RK; } // peeled loop on k } // peeled loop on the columns j // process the last column (we now perform a matrix-vector product) if((n-n_end)>0) { const Scalar* Bc0 = B+(n-1)*ldb; for(Index k=0; k<d_end; k+=RK) { // load and expand a 1 x RK block of B Packet b00, b10, b20, b30; b00 = pset1<Packet>(Bc0[0]); b10 = pset1<Packet>(Bc0[1]); if(RK==4) b20 = pset1<Packet>(Bc0[2]); if(RK==4) b30 = pset1<Packet>(Bc0[3]); Packet a0, a1, a2, a3, c0, t0/*, t1*/; const Scalar* A0 = A+ib+(k+0)*lda; const Scalar* A1 = A+ib+(k+1)*lda; const Scalar* A2 = A+ib+(k+2)*lda; const Scalar* A3 = A+ib+(k+3)*lda; Scalar* C0 = C+ib+(n_end)*ldc; a0 = pload<Packet>(A0); a1 = pload<Packet>(A1); if(RK==4) { a2 = pload<Packet>(A2); a3 = pload<Packet>(A3); } else { // workaround "may be used uninitialized in this function" warning a2 = a3 = a0; } #define WORK(I) \ c0 = pload<Packet>(C0+i+(I)*PacketSize); \ KMADD(c0, a0, b00, t0) \ a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \ KMADD(c0, a1, b10, t0) \ a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \ if(RK==4) KMADD(c0, a2, b20, t0) \ if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \ if(RK==4) KMADD(c0, a3, b30, t0) \ if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \ pstore(C0+i+(I)*PacketSize, c0); // agressive vectorization and peeling for(Index i=0; i<actual_b_end1; i+=PacketSize*8) { EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL2"); WORK(0); WORK(1); WORK(2); WORK(3); WORK(4); WORK(5); WORK(6); WORK(7); } // vectorization only for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize) { WORK(0); } // remaining scalars for(Index i=actual_b_end2; i<actual_b; ++i) { if(RK==4) C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3]; else C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]; } Bc0 += RK; #undef WORK } } // process the last columns of A, corresponding to the last rows of B Index rd = d-d_end; if(rd>0) { for(Index j=0; j<n; ++j) { enum { Alignment = PacketSize>1 ? Aligned : 0 }; typedef Map<Matrix<Scalar,Dynamic,1>, Alignment > MapVector; typedef Map<const Matrix<Scalar,Dynamic,1>, Alignment > ConstMapVector; if(rd==1) MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b); else if(rd==2) MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b) + B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b); else MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b) + B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b) + B[2+d_end+j*ldb] * ConstMapVector(A+(d_end+2)*lda+ib, actual_b); } } } // blocking on the rows of A and C }
void Eigen::internal::stable_norm_kernel | ( | const ExpressionType & | bl, |
Scalar & | ssq, | ||
Scalar & | scale, | ||
Scalar & | invScale | ||
) | [inline] |
Definition at line 18 of file StableNorm.h.
{ Scalar maxCoeff = bl.cwiseAbs().maxCoeff(); if(maxCoeff>scale) { ssq = ssq * numext::abs2(scale/maxCoeff); Scalar tmp = Scalar(1)/maxCoeff; if(tmp > NumTraits<Scalar>::highest()) { invScale = NumTraits<Scalar>::highest(); scale = Scalar(1)/invScale; } else if(maxCoeff>NumTraits<Scalar>::highest()) // we got a INF { invScale = Scalar(1); scale = maxCoeff; } else { scale = maxCoeff; invScale = tmp; } } else if(maxCoeff!=maxCoeff) // we got a NaN { scale = maxCoeff; } // TODO if the maxCoeff is much much smaller than the current scale, // then we can neglect this sub vector if(scale>Scalar(0)) // if scale==0, then bl is 0 ssq += (bl*invScale).squaredNorm(); }
void Eigen::internal::swap | ( | scoped_array< T > & | a, |
scoped_array< T > & | b | ||
) |
EIGEN_DEVICE_FUNC void Eigen::internal::throw_std_bad_alloc | ( | ) | [inline] |
static Matrix<Scalar,2,2> Eigen::internal::toRotationMatrix | ( | const Scalar & | s | ) | [inline, static] |
Helper function to return an arbitrary rotation object to a rotation matrix.
Scalar | the numeric type of the matrix coefficients |
Dim | the dimension of the current space |
It returns a Dim x Dim fixed size matrix.
Default specializations are provided for:
Currently toRotationMatrix is only used by Transform.
Definition at line 182 of file RotationBase.h.
{ EIGEN_STATIC_ASSERT(Dim==2,YOU_MADE_A_PROGRAMMING_MISTAKE) return Rotation2D<Scalar>(s).toRotationMatrix(); }
static Matrix<Scalar,Dim,Dim> Eigen::internal::toRotationMatrix | ( | const RotationBase< OtherDerived, Dim > & | r | ) | [inline, static] |
Definition at line 189 of file RotationBase.h.
{
return r.toRotationMatrix();
}
static const MatrixBase<OtherDerived>& Eigen::internal::toRotationMatrix | ( | const MatrixBase< OtherDerived > & | mat | ) | [inline, static] |
Definition at line 195 of file RotationBase.h.
{ EIGEN_STATIC_ASSERT(OtherDerived::RowsAtCompileTime==Dim && OtherDerived::ColsAtCompileTime==Dim, YOU_MADE_A_PROGRAMMING_MISTAKE) return mat; }
void Eigen::internal::treePostorder | ( | typename IndexVector::Scalar | n, |
IndexVector & | parent, | ||
IndexVector & | post | ||
) |
Post order a tree.
n | the number of nodes |
parent | Input tree |
post | postordered tree |
Definition at line 178 of file SparseColEtree.h.
{ typedef typename IndexVector::Scalar StorageIndex; IndexVector first_kid, next_kid; // Linked list of children StorageIndex postnum; // Allocate storage for working arrays and results first_kid.resize(n+1); next_kid.setZero(n+1); post.setZero(n+1); // Set up structure describing children first_kid.setConstant(-1); for (StorageIndex v = n-1; v >= 0; v--) { StorageIndex dad = parent(v); next_kid(v) = first_kid(dad); first_kid(dad) = v; } // Depth-first search from dummy root vertex #n postnum = 0; internal::nr_etdfs(n, parent, first_kid, next_kid, post, postnum); }
static EIGEN_DEVICE_FUNC void Eigen::internal::tridiagonal_qr_step | ( | RealScalar * | diag, |
RealScalar * | subdiag, | ||
Index | start, | ||
Index | end, | ||
Scalar * | matrixQ, | ||
Index | n | ||
) | [static] |
Performs a QR step on a tridiagonal symmetric matrix represented as a pair of two vectors diag and subdiag.
diag | the diagonal part of the input selfadjoint tridiagonal matrix |
subdiag | the sub-diagonal part of the input selfadjoint tridiagonal matrix |
start | starting index of the submatrix to work on |
end | last+1 index of the submatrix to work on |
matrixQ | pointer to the column-major matrix holding the eigenvectors, can be 0 |
n | size of the input matrix |
For compilation efficiency reasons, this procedure does not use eigen expression for its arguments.
Implemented from Golub's "Matrix Computations", algorithm 8.3.2: "implicit symmetric QR step with Wilkinson shift"
Definition at line 801 of file SelfAdjointEigenSolver.h.
{ using std::abs; RealScalar td = (diag[end-1] - diag[end])*RealScalar(0.5); RealScalar e = subdiag[end-1]; // Note that thanks to scaling, e^2 or td^2 cannot overflow, however they can still // underflow thus leading to inf/NaN values when using the following commented code: // RealScalar e2 = numext::abs2(subdiag[end-1]); // RealScalar mu = diag[end] - e2 / (td + (td>0 ? 1 : -1) * sqrt(td*td + e2)); // This explain the following, somewhat more complicated, version: RealScalar mu = diag[end]; if(td==0) mu -= abs(e); else { RealScalar e2 = numext::abs2(subdiag[end-1]); RealScalar h = numext::hypot(td,e); if(e2==0) mu -= (e / (td + (td>0 ? 1 : -1))) * (e / h); else mu -= e2 / (td + (td>0 ? h : -h)); } RealScalar x = diag[start] - mu; RealScalar z = subdiag[start]; for (Index k = start; k < end; ++k) { JacobiRotation<RealScalar> rot; rot.makeGivens(x, z); // do T = G' T G RealScalar sdk = rot.s() * diag[k] + rot.c() * subdiag[k]; RealScalar dkp1 = rot.s() * subdiag[k] + rot.c() * diag[k+1]; diag[k] = rot.c() * (rot.c() * diag[k] - rot.s() * subdiag[k]) - rot.s() * (rot.c() * subdiag[k] - rot.s() * diag[k+1]); diag[k+1] = rot.s() * sdk + rot.c() * dkp1; subdiag[k] = rot.c() * sdk - rot.s() * dkp1; if (k > start) subdiag[k - 1] = rot.c() * subdiag[k-1] - rot.s() * z; x = subdiag[k]; if (k < end - 1) { z = -rot.s() * subdiag[k+1]; subdiag[k + 1] = rot.c() * subdiag[k+1]; } // apply the givens rotation to the unit matrix Q = Q * G if (matrixQ) { // FIXME if StorageOrder == RowMajor this operation is not very efficient Map<Matrix<Scalar,Dynamic,Dynamic,StorageOrder> > q(matrixQ,n,n); q.applyOnTheRight(k,k+1,rot); } } }
void Eigen::internal::tridiagonalization_inplace | ( | MatrixType & | matA, |
CoeffVectorType & | hCoeffs | ||
) |
Performs a tridiagonal decomposition of the selfadjoint matrix matA in-place.
[in,out] | matA | On input the selfadjoint matrix. Only the lower triangular part is referenced. On output, the strict upper part is left unchanged, and the lower triangular part represents the T and Q matrices in packed format has detailed below. |
[out] | hCoeffs | returned Householder coefficients (see below) |
On output, the tridiagonal selfadjoint matrix T is stored in the diagonal and lower sub-diagonal of the matrix matA. The unitary matrix Q is represented in a compact way as a product of Householder reflectors such that:
. The Householder reflectors are defined as
where
is the
th Householder coefficient and
is the Householder vector defined by
.
Implemented from Golub's "Matrix Computations", algorithm 8.3.1.
Definition at line 347 of file Tridiagonalization.h.
{ using numext::conj; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; Index n = matA.rows(); eigen_assert(n==matA.cols()); eigen_assert(n==hCoeffs.size()+1 || n==1); for (Index i = 0; i<n-1; ++i) { Index remainingSize = n-i-1; RealScalar beta; Scalar h; matA.col(i).tail(remainingSize).makeHouseholderInPlace(h, beta); // Apply similarity transformation to remaining columns, // i.e., A = H A H' where H = I - h v v' and v = matA.col(i).tail(n-i-1) matA.col(i).coeffRef(i+1) = 1; hCoeffs.tail(n-i-1).noalias() = (matA.bottomRightCorner(remainingSize,remainingSize).template selfadjointView<Lower>() * (conj(h) * matA.col(i).tail(remainingSize))); hCoeffs.tail(n-i-1) += (conj(h)*Scalar(-0.5)*(hCoeffs.tail(remainingSize).dot(matA.col(i).tail(remainingSize)))) * matA.col(i).tail(n-i-1); matA.bottomRightCorner(remainingSize, remainingSize).template selfadjointView<Lower>() .rankUpdate(matA.col(i).tail(remainingSize), hCoeffs.tail(remainingSize), -1); matA.col(i).coeffRef(i+1) = beta; hCoeffs.coeffRef(i) = h; } }
void Eigen::internal::tridiagonalization_inplace | ( | MatrixType & | mat, |
DiagonalType & | diag, | ||
SubDiagonalType & | subdiag, | ||
bool | extractQ | ||
) |
Performs a full tridiagonalization in place.
[in,out] | mat | On input, the selfadjoint matrix whose tridiagonal decomposition is to be computed. Only the lower triangular part referenced. The rest is left unchanged. On output, the orthogonal matrix Q in the decomposition if extractQ is true. |
[out] | diag | The diagonal of the tridiagonal matrix T in the decomposition. |
[out] | subdiag | The subdiagonal of the tridiagonal matrix T in the decomposition. |
[in] | extractQ | If true, the orthogonal matrix Q in the decomposition is computed and stored in mat . |
Computes the tridiagonal decomposition of the selfadjoint matrix mat
in place such that where
is unitary and
a real symmetric tridiagonal matrix.
The tridiagonal matrix T is passed to the output parameters diag
and subdiag
. If extractQ
is true, then the orthogonal matrix Q is passed to mat
. Otherwise the lower part of the matrix mat
is destroyed.
The vectors diag
and subdiag
are not resized. The function assumes that they are already of the correct size. The length of the vector diag
should equal the number of rows in mat
, and the length of the vector subdiag
should be one left.
This implementation contains an optimized path for 3-by-3 matrices which is especially useful for plane fitting.
Example (this uses the same matrix as the example in Tridiagonalization::Tridiagonalization(const MatrixType&)):
Output:
Definition at line 427 of file Tridiagonalization.h.
{ eigen_assert(mat.cols()==mat.rows() && diag.size()==mat.rows() && subdiag.size()==mat.rows()-1); tridiagonalization_inplace_selector<MatrixType>::run(mat, diag, subdiag, extractQ); }
void Eigen::internal::upperbidiagonalization_blocked_helper | ( | MatrixType & | A, |
typename MatrixType::RealScalar * | diagonal, | ||
typename MatrixType::RealScalar * | upper_diagonal, | ||
Index | bs, | ||
Ref< Matrix< typename MatrixType::Scalar, Dynamic, Dynamic, traits< MatrixType >::Flags &RowMajorBit > > | X, | ||
Ref< Matrix< typename MatrixType::Scalar, Dynamic, Dynamic, traits< MatrixType >::Flags &RowMajorBit > > | Y | ||
) |
Helper routine for the block reduction to upper bidiagonal form.
Let's partition the matrix A:
| A00 A01 | A = | | | A10 A11 |
This function reduces to bidiagonal form the left rows
x blockSize vertical panel [A00/A10] and the blockSize x cols
horizontal panel [A00 A01] of the matrix A. The bottom-right block A11 is updated using matrix-matrix products: A22 -= V * Y^T - X * U^T where V and U contains the left and right Householder vectors. U and V are stored in A10, and A01 respectively, and the update matrices X and Y are computed during the reduction.
Definition at line 152 of file UpperBidiagonalization.h.
{ typedef typename MatrixType::Scalar Scalar; enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit }; typedef InnerStride<int(StorageOrder) == int(ColMajor) ? 1 : Dynamic> ColInnerStride; typedef InnerStride<int(StorageOrder) == int(ColMajor) ? Dynamic : 1> RowInnerStride; typedef Ref<Matrix<Scalar, Dynamic, 1>, 0, ColInnerStride> SubColumnType; typedef Ref<Matrix<Scalar, 1, Dynamic>, 0, RowInnerStride> SubRowType; typedef Ref<Matrix<Scalar, Dynamic, Dynamic, StorageOrder > > SubMatType; Index brows = A.rows(); Index bcols = A.cols(); Scalar tau_u, tau_u_prev(0), tau_v; for(Index k = 0; k < bs; ++k) { Index remainingRows = brows - k; Index remainingCols = bcols - k - 1; SubMatType X_k1( X.block(k,0, remainingRows,k) ); SubMatType V_k1( A.block(k,0, remainingRows,k) ); // 1 - update the k-th column of A SubColumnType v_k = A.col(k).tail(remainingRows); v_k -= V_k1 * Y.row(k).head(k).adjoint(); if(k) v_k -= X_k1 * A.col(k).head(k); // 2 - construct left Householder transform in-place v_k.makeHouseholderInPlace(tau_v, diagonal[k]); if(k+1<bcols) { SubMatType Y_k ( Y.block(k+1,0, remainingCols, k+1) ); SubMatType U_k1 ( A.block(0,k+1, k,remainingCols) ); // this eases the application of Householder transforAions // A(k,k) will store tau_v later A(k,k) = Scalar(1); // 3 - Compute y_k^T = tau_v * ( A^T*v_k - Y_k-1*V_k-1^T*v_k - U_k-1*X_k-1^T*v_k ) { SubColumnType y_k( Y.col(k).tail(remainingCols) ); // let's use the begining of column k of Y as a temporary vector SubColumnType tmp( Y.col(k).head(k) ); y_k.noalias() = A.block(k,k+1, remainingRows,remainingCols).adjoint() * v_k; // bottleneck tmp.noalias() = V_k1.adjoint() * v_k; y_k.noalias() -= Y_k.leftCols(k) * tmp; tmp.noalias() = X_k1.adjoint() * v_k; y_k.noalias() -= U_k1.adjoint() * tmp; y_k *= numext::conj(tau_v); } // 4 - update k-th row of A (it will become u_k) SubRowType u_k( A.row(k).tail(remainingCols) ); u_k = u_k.conjugate(); { u_k -= Y_k * A.row(k).head(k+1).adjoint(); if(k) u_k -= U_k1.adjoint() * X.row(k).head(k).adjoint(); } // 5 - construct right Householder transform in-place u_k.makeHouseholderInPlace(tau_u, upper_diagonal[k]); // this eases the application of Householder transformations // A(k,k+1) will store tau_u later A(k,k+1) = Scalar(1); // 6 - Compute x_k = tau_u * ( A*u_k - X_k-1*U_k-1^T*u_k - V_k*Y_k^T*u_k ) { SubColumnType x_k ( X.col(k).tail(remainingRows-1) ); // let's use the begining of column k of X as a temporary vectors // note that tmp0 and tmp1 overlaps SubColumnType tmp0 ( X.col(k).head(k) ), tmp1 ( X.col(k).head(k+1) ); x_k.noalias() = A.block(k+1,k+1, remainingRows-1,remainingCols) * u_k.transpose(); // bottleneck tmp0.noalias() = U_k1 * u_k.transpose(); x_k.noalias() -= X_k1.bottomRows(remainingRows-1) * tmp0; tmp1.noalias() = Y_k.adjoint() * u_k.transpose(); x_k.noalias() -= A.block(k+1,0, remainingRows-1,k+1) * tmp1; x_k *= numext::conj(tau_u); tau_u = numext::conj(tau_u); u_k = u_k.conjugate(); } if(k>0) A.coeffRef(k-1,k) = tau_u_prev; tau_u_prev = tau_u; } else A.coeffRef(k-1,k) = tau_u_prev; A.coeffRef(k,k) = tau_v; } if(bs<bcols) A.coeffRef(bs-1,bs) = tau_u_prev; // update A22 if(bcols>bs && brows>bs) { SubMatType A11( A.bottomRightCorner(brows-bs,bcols-bs) ); SubMatType A10( A.block(bs,0, brows-bs,bs) ); SubMatType A01( A.block(0,bs, bs,bcols-bs) ); Scalar tmp = A01(bs-1,0); A01(bs-1,0) = 1; A11.noalias() -= A10 * Y.topLeftCorner(bcols,bs).bottomRows(bcols-bs).adjoint(); A11.noalias() -= X.topLeftCorner(brows,bs).bottomRows(brows-bs) * A01; A01(bs-1,0) = tmp; } }
void Eigen::internal::upperbidiagonalization_inplace_blocked | ( | MatrixType & | A, |
BidiagType & | bidiagonal, | ||
Index | maxBlockSize = 32 , |
||
typename MatrixType::Scalar * | = 0 |
||
) |
Implementation of a block-bidiagonal reduction. It is based on the following paper: The Design of a Parallel Dense Linear Algebra Software Library: Reduction to Hessenberg, Tridiagonal, and Bidiagonal Form. by Jaeyoung Choi, Jack J. Dongarra, David W. Walker. (1995) section 3.3
Definition at line 282 of file UpperBidiagonalization.h.
{ typedef typename MatrixType::Scalar Scalar; typedef Block<MatrixType,Dynamic,Dynamic> BlockType; Index rows = A.rows(); Index cols = A.cols(); Index size = (std::min)(rows, cols); // X and Y are work space enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit }; Matrix<Scalar, MatrixType::RowsAtCompileTime, Dynamic, StorageOrder, MatrixType::MaxRowsAtCompileTime> X(rows,maxBlockSize); Matrix<Scalar, MatrixType::ColsAtCompileTime, Dynamic, StorageOrder, MatrixType::MaxColsAtCompileTime> Y(cols,maxBlockSize); Index blockSize = (std::min)(maxBlockSize,size); Index k = 0; for(k = 0; k < size; k += blockSize) { Index bs = (std::min)(size-k,blockSize); // actual size of the block Index brows = rows - k; // rows of the block Index bcols = cols - k; // columns of the block // partition the matrix A: // // | A00 A01 A02 | // | | // A = | A10 A11 A12 | // | | // | A20 A21 A22 | // // where A11 is a bs x bs diagonal block, // and let: // | A11 A12 | // B = | | // | A21 A22 | BlockType B = A.block(k,k,brows,bcols); // This stage performs the bidiagonalization of A11, A21, A12, and updating of A22. // Finally, the algorithm continue on the updated A22. // // However, if B is too small, or A22 empty, then let's use an unblocked strategy if(k+bs==cols || bcols<48) // somewhat arbitrary threshold { upperbidiagonalization_inplace_unblocked(B, &(bidiagonal.template diagonal<0>().coeffRef(k)), &(bidiagonal.template diagonal<1>().coeffRef(k)), X.data() ); break; // We're done } else { upperbidiagonalization_blocked_helper<BlockType>( B, &(bidiagonal.template diagonal<0>().coeffRef(k)), &(bidiagonal.template diagonal<1>().coeffRef(k)), bs, X.topLeftCorner(brows,bs), Y.topLeftCorner(bcols,bs) ); } } }
void Eigen::internal::upperbidiagonalization_inplace_unblocked | ( | MatrixType & | mat, |
typename MatrixType::RealScalar * | diagonal, | ||
typename MatrixType::RealScalar * | upper_diagonal, | ||
typename MatrixType::Scalar * | tempData = 0 |
||
) |
Definition at line 93 of file UpperBidiagonalization.h.
{ typedef typename MatrixType::Scalar Scalar; Index rows = mat.rows(); Index cols = mat.cols(); typedef Matrix<Scalar,Dynamic,1,ColMajor,MatrixType::MaxRowsAtCompileTime,1> TempType; TempType tempVector; if(tempData==0) { tempVector.resize(rows); tempData = tempVector.data(); } for (Index k = 0; /* breaks at k==cols-1 below */ ; ++k) { Index remainingRows = rows - k; Index remainingCols = cols - k - 1; // construct left householder transform in-place in A mat.col(k).tail(remainingRows) .makeHouseholderInPlace(mat.coeffRef(k,k), diagonal[k]); // apply householder transform to remaining part of A on the left mat.bottomRightCorner(remainingRows, remainingCols) .applyHouseholderOnTheLeft(mat.col(k).tail(remainingRows-1), mat.coeff(k,k), tempData); if(k == cols-1) break; // construct right householder transform in-place in mat mat.row(k).tail(remainingCols) .makeHouseholderInPlace(mat.coeffRef(k,k+1), upper_diagonal[k]); // apply householder transform to remaining part of mat on the left mat.bottomRightCorner(remainingRows-1, remainingCols) .applyHouseholderOnTheRight(mat.row(k).tail(remainingCols-1).transpose(), mat.coeff(k,k+1), tempData); } }
bool Eigen::internal::useSpecificBlockingSizes | ( | Index & | k, |
Index & | m, | ||
Index & | n | ||
) | [inline] |
Definition at line 266 of file GeneralBlockPanelKernel.h.
{ #ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) { k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K); m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M); n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N); return true; } #else EIGEN_UNUSED_VARIABLE(k) EIGEN_UNUSED_VARIABLE(m) EIGEN_UNUSED_VARIABLE(n) #endif return false; }
const std::ptrdiff_t Eigen::internal::defaultL1CacheSize = 16*1024 |
Definition at line 33 of file GeneralBlockPanelKernel.h.
const std::ptrdiff_t Eigen::internal::defaultL2CacheSize = 512*1024 |
Definition at line 34 of file GeneralBlockPanelKernel.h.
const std::ptrdiff_t Eigen::internal::defaultL3CacheSize = 512*1024 |
Definition at line 35 of file GeneralBlockPanelKernel.h.
Packet16uc Eigen::internal::p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8) [static] |
Definition at line 116 of file AltiVec/PacketMath.h.
Packet16uc Eigen::internal::p16uc_COMPLEX32_REV2 = vec_sld(p16uc_PSET64_HI, p16uc_PSET64_LO, 8) [static] |
Definition at line 121 of file AltiVec/PacketMath.h.
Packet16uc Eigen::internal::p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 } [static] |
Definition at line 84 of file AltiVec/PacketMath.h.
Definition at line 104 of file AltiVec/PacketMath.h.
Packet16uc Eigen::internal::p16uc_HALF64_0_16 = vec_sld(vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 0), (Packet16uc)p4i_ZERO, 8) [static] |
Definition at line 108 of file AltiVec/PacketMath.h.
Packet16uc Eigen::internal::p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8) [static] |
Definition at line 107 of file AltiVec/PacketMath.h.
Packet16uc Eigen::internal::p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8) [static] |
Definition at line 106 of file AltiVec/PacketMath.h.
Packet16uc Eigen::internal::p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN) [static] |
Definition at line 111 of file AltiVec/PacketMath.h.
Packet16uc Eigen::internal::p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN) [static] |
Definition at line 112 of file AltiVec/PacketMath.h.
Packet16uc Eigen::internal::p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 } [static] |
Definition at line 83 of file AltiVec/PacketMath.h.
Packet16uc Eigen::internal::p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 } [static] |
Definition at line 105 of file AltiVec/PacketMath.h.
Packet16uc Eigen::internal::p16uc_TRANSPOSE64_HI = vec_add(p16uc_PSET64_HI, p16uc_HALF64_0_16) [static] |
Definition at line 113 of file AltiVec/PacketMath.h.
Packet16uc Eigen::internal::p16uc_TRANSPOSE64_LO = vec_add(p16uc_PSET64_LO, p16uc_HALF64_0_16) [static] |
Definition at line 114 of file AltiVec/PacketMath.h.
uint32x2_t Eigen::internal::p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000) [static] |
Definition at line 18 of file NEON/Complex.h.
Packet2ul Eigen::internal::p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8) [static] |
Definition at line 22 of file AltiVec/Complex.h.
Packet2ul Eigen::internal::p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8) [static] |
Definition at line 23 of file AltiVec/Complex.h.
Packet4f Eigen::internal::p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 } [static] |
Definition at line 80 of file AltiVec/PacketMath.h.
Packet4f Eigen::internal::p4f_ONE = vec_ctf(p4i_ONE, 0) [static] |
Definition at line 74 of file AltiVec/PacketMath.h.
Packet4f Eigen::internal::p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1) [static] |
Definition at line 78 of file AltiVec/PacketMath.h.
Packet4i Eigen::internal::p4i_COUNTDOWN = { 0, 1, 2, 3 } [static] |
Definition at line 81 of file AltiVec/PacketMath.h.
static uint32x4_t Eigen::internal::p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_) [static] |
Definition at line 17 of file AltiVec/Complex.h.