MOAB
4.9.3pre
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2008 Gael Guennebaud <[email protected]> 00005 // Copyright (C) 2006-2008 Benoit Jacob <[email protected]> 00006 // 00007 // This Source Code Form is subject to the terms of the Mozilla 00008 // Public License v. 2.0. If a copy of the MPL was not distributed 00009 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00010 00011 #ifndef EIGEN_REDUX_H 00012 #define EIGEN_REDUX_H 00013 00014 namespace Eigen { 00015 00016 namespace internal { 00017 00018 // TODO 00019 // * implement other kind of vectorization 00020 // * factorize code 00021 00022 /*************************************************************************** 00023 * Part 1 : the logic deciding a strategy for vectorization and unrolling 00024 ***************************************************************************/ 00025 00026 template<typename Func, typename Derived> 00027 struct redux_traits 00028 { 00029 public: 00030 enum { 00031 PacketSize = packet_traits<typename Derived::Scalar>::size, 00032 InnerMaxSize = int(Derived::IsRowMajor) 00033 ? Derived::MaxColsAtCompileTime 00034 : Derived::MaxRowsAtCompileTime 00035 }; 00036 00037 enum { 00038 MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit) 00039 && (functor_traits<Func>::PacketAccess), 00040 MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit), 00041 MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize 00042 }; 00043 00044 public: 00045 enum { 00046 Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal) 00047 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) 00048 : int(DefaultTraversal) 00049 }; 00050 00051 public: 00052 enum { 00053 Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost 00054 : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost, 00055 UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) 00056 }; 00057 00058 public: 00059 enum { 00060 Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling 00061 }; 00062 00063 #ifdef EIGEN_DEBUG_ASSIGN 00064 static void debug() 00065 { 00066 std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl; 00067 std::cerr.setf(std::ios::hex, std::ios::basefield); 00068 EIGEN_DEBUG_VAR(Derived::Flags) 00069 std::cerr.unsetf(std::ios::hex); 00070 EIGEN_DEBUG_VAR(InnerMaxSize) 00071 EIGEN_DEBUG_VAR(PacketSize) 00072 EIGEN_DEBUG_VAR(MightVectorize) 00073 EIGEN_DEBUG_VAR(MayLinearVectorize) 00074 EIGEN_DEBUG_VAR(MaySliceVectorize) 00075 EIGEN_DEBUG_VAR(Traversal) 00076 EIGEN_DEBUG_VAR(UnrollingLimit) 00077 EIGEN_DEBUG_VAR(Unrolling) 00078 std::cerr << std::endl; 00079 } 00080 #endif 00081 }; 00082 00083 /*************************************************************************** 00084 * Part 2 : unrollers 00085 ***************************************************************************/ 00086 00087 /*** no vectorization ***/ 00088 00089 template<typename Func, typename Derived, int Start, int Length> 00090 struct redux_novec_unroller 00091 { 00092 enum { 00093 HalfLength = Length/2 00094 }; 00095 00096 typedef typename Derived::Scalar Scalar; 00097 00098 EIGEN_DEVICE_FUNC 00099 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) 00100 { 00101 return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func), 00102 redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func)); 00103 } 00104 }; 00105 00106 template<typename Func, typename Derived, int Start> 00107 struct redux_novec_unroller<Func, Derived, Start, 1> 00108 { 00109 enum { 00110 outer = Start / Derived::InnerSizeAtCompileTime, 00111 inner = Start % Derived::InnerSizeAtCompileTime 00112 }; 00113 00114 typedef typename Derived::Scalar Scalar; 00115 00116 EIGEN_DEVICE_FUNC 00117 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&) 00118 { 00119 return mat.coeffByOuterInner(outer, inner); 00120 } 00121 }; 00122 00123 // This is actually dead code and will never be called. It is required 00124 // to prevent false warnings regarding failed inlining though 00125 // for 0 length run() will never be called at all. 00126 template<typename Func, typename Derived, int Start> 00127 struct redux_novec_unroller<Func, Derived, Start, 0> 00128 { 00129 typedef typename Derived::Scalar Scalar; 00130 EIGEN_DEVICE_FUNC 00131 static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); } 00132 }; 00133 00134 /*** vectorization ***/ 00135 00136 template<typename Func, typename Derived, int Start, int Length> 00137 struct redux_vec_unroller 00138 { 00139 enum { 00140 PacketSize = packet_traits<typename Derived::Scalar>::size, 00141 HalfLength = Length/2 00142 }; 00143 00144 typedef typename Derived::Scalar Scalar; 00145 typedef typename packet_traits<Scalar>::type PacketScalar; 00146 00147 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func) 00148 { 00149 return func.packetOp( 00150 redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func), 00151 redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) ); 00152 } 00153 }; 00154 00155 template<typename Func, typename Derived, int Start> 00156 struct redux_vec_unroller<Func, Derived, Start, 1> 00157 { 00158 enum { 00159 index = Start * packet_traits<typename Derived::Scalar>::size, 00160 outer = index / int(Derived::InnerSizeAtCompileTime), 00161 inner = index % int(Derived::InnerSizeAtCompileTime), 00162 alignment = Derived::Alignment 00163 }; 00164 00165 typedef typename Derived::Scalar Scalar; 00166 typedef typename packet_traits<Scalar>::type PacketScalar; 00167 00168 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&) 00169 { 00170 return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner); 00171 } 00172 }; 00173 00174 /*************************************************************************** 00175 * Part 3 : implementation of all cases 00176 ***************************************************************************/ 00177 00178 template<typename Func, typename Derived, 00179 int Traversal = redux_traits<Func, Derived>::Traversal, 00180 int Unrolling = redux_traits<Func, Derived>::Unrolling 00181 > 00182 struct redux_impl; 00183 00184 template<typename Func, typename Derived> 00185 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling> 00186 { 00187 typedef typename Derived::Scalar Scalar; 00188 EIGEN_DEVICE_FUNC 00189 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) 00190 { 00191 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 00192 Scalar res; 00193 res = mat.coeffByOuterInner(0, 0); 00194 for(Index i = 1; i < mat.innerSize(); ++i) 00195 res = func(res, mat.coeffByOuterInner(0, i)); 00196 for(Index i = 1; i < mat.outerSize(); ++i) 00197 for(Index j = 0; j < mat.innerSize(); ++j) 00198 res = func(res, mat.coeffByOuterInner(i, j)); 00199 return res; 00200 } 00201 }; 00202 00203 template<typename Func, typename Derived> 00204 struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling> 00205 : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime> 00206 {}; 00207 00208 template<typename Func, typename Derived> 00209 struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling> 00210 { 00211 typedef typename Derived::Scalar Scalar; 00212 typedef typename packet_traits<Scalar>::type PacketScalar; 00213 00214 static Scalar run(const Derived &mat, const Func& func) 00215 { 00216 const Index size = mat.size(); 00217 00218 const Index packetSize = packet_traits<Scalar>::size; 00219 const int packetAlignment = unpacket_traits<PacketScalar>::alignment; 00220 enum { 00221 alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned), 00222 alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment) 00223 }; 00224 const Index alignedStart = internal::first_default_aligned(mat.nestedExpression()); 00225 const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize); 00226 const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize); 00227 const Index alignedEnd2 = alignedStart + alignedSize2; 00228 const Index alignedEnd = alignedStart + alignedSize; 00229 Scalar res; 00230 if(alignedSize) 00231 { 00232 PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart); 00233 if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop 00234 { 00235 PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize); 00236 for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize) 00237 { 00238 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index)); 00239 packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize)); 00240 } 00241 00242 packet_res0 = func.packetOp(packet_res0,packet_res1); 00243 if(alignedEnd>alignedEnd2) 00244 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2)); 00245 } 00246 res = func.predux(packet_res0); 00247 00248 for(Index index = 0; index < alignedStart; ++index) 00249 res = func(res,mat.coeff(index)); 00250 00251 for(Index index = alignedEnd; index < size; ++index) 00252 res = func(res,mat.coeff(index)); 00253 } 00254 else // too small to vectorize anything. 00255 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. 00256 { 00257 res = mat.coeff(0); 00258 for(Index index = 1; index < size; ++index) 00259 res = func(res,mat.coeff(index)); 00260 } 00261 00262 return res; 00263 } 00264 }; 00265 00266 // NOTE: for SliceVectorizedTraversal we simply bypass unrolling 00267 template<typename Func, typename Derived, int Unrolling> 00268 struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling> 00269 { 00270 typedef typename Derived::Scalar Scalar; 00271 typedef typename packet_traits<Scalar>::type PacketType; 00272 00273 EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func) 00274 { 00275 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 00276 const Index innerSize = mat.innerSize(); 00277 const Index outerSize = mat.outerSize(); 00278 enum { 00279 packetSize = packet_traits<Scalar>::size 00280 }; 00281 const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize; 00282 Scalar res; 00283 if(packetedInnerSize) 00284 { 00285 PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0); 00286 for(Index j=0; j<outerSize; ++j) 00287 for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize)) 00288 packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i)); 00289 00290 res = func.predux(packet_res); 00291 for(Index j=0; j<outerSize; ++j) 00292 for(Index i=packetedInnerSize; i<innerSize; ++i) 00293 res = func(res, mat.coeffByOuterInner(j,i)); 00294 } 00295 else // too small to vectorize anything. 00296 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. 00297 { 00298 res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func); 00299 } 00300 00301 return res; 00302 } 00303 }; 00304 00305 template<typename Func, typename Derived> 00306 struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling> 00307 { 00308 typedef typename Derived::Scalar Scalar; 00309 typedef typename packet_traits<Scalar>::type PacketScalar; 00310 enum { 00311 PacketSize = packet_traits<Scalar>::size, 00312 Size = Derived::SizeAtCompileTime, 00313 VectorizedSize = (Size / PacketSize) * PacketSize 00314 }; 00315 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) 00316 { 00317 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 00318 if (VectorizedSize > 0) { 00319 Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func)); 00320 if (VectorizedSize != Size) 00321 res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func)); 00322 return res; 00323 } 00324 else { 00325 return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func); 00326 } 00327 } 00328 }; 00329 00330 // evaluator adaptor 00331 template<typename _XprType> 00332 class redux_evaluator 00333 { 00334 public: 00335 typedef _XprType XprType; 00336 EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {} 00337 00338 typedef typename XprType::Scalar Scalar; 00339 typedef typename XprType::CoeffReturnType CoeffReturnType; 00340 typedef typename XprType::PacketScalar PacketScalar; 00341 typedef typename XprType::PacketReturnType PacketReturnType; 00342 00343 enum { 00344 MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime, 00345 MaxColsAtCompileTime = XprType::MaxColsAtCompileTime, 00346 // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator 00347 Flags = evaluator<XprType>::Flags & ~DirectAccessBit, 00348 IsRowMajor = XprType::IsRowMajor, 00349 SizeAtCompileTime = XprType::SizeAtCompileTime, 00350 InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime, 00351 CoeffReadCost = evaluator<XprType>::CoeffReadCost, 00352 Alignment = evaluator<XprType>::Alignment 00353 }; 00354 00355 EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } 00356 EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } 00357 EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); } 00358 EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); } 00359 EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); } 00360 00361 EIGEN_DEVICE_FUNC 00362 CoeffReturnType coeff(Index row, Index col) const 00363 { return m_evaluator.coeff(row, col); } 00364 00365 EIGEN_DEVICE_FUNC 00366 CoeffReturnType coeff(Index index) const 00367 { return m_evaluator.coeff(index); } 00368 00369 template<int LoadMode, typename PacketType> 00370 PacketReturnType packet(Index row, Index col) const 00371 { return m_evaluator.template packet<LoadMode,PacketType>(row, col); } 00372 00373 template<int LoadMode, typename PacketType> 00374 PacketReturnType packet(Index index) const 00375 { return m_evaluator.template packet<LoadMode,PacketType>(index); } 00376 00377 EIGEN_DEVICE_FUNC 00378 CoeffReturnType coeffByOuterInner(Index outer, Index inner) const 00379 { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } 00380 00381 template<int LoadMode, typename PacketType> 00382 PacketReturnType packetByOuterInner(Index outer, Index inner) const 00383 { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } 00384 00385 const XprType & nestedExpression() const { return m_xpr; } 00386 00387 protected: 00388 internal::evaluator<XprType> m_evaluator; 00389 const XprType &m_xpr; 00390 }; 00391 00392 } // end namespace internal 00393 00394 /*************************************************************************** 00395 * Part 4 : public API 00396 ***************************************************************************/ 00397 00398 00406 template<typename Derived> 00407 template<typename Func> 00408 typename internal::traits<Derived>::Scalar 00409 DenseBase<Derived>::redux(const Func& func) const 00410 { 00411 eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); 00412 00413 typedef typename internal::redux_evaluator<Derived> ThisEvaluator; 00414 ThisEvaluator thisEval(derived()); 00415 00416 return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func); 00417 } 00418 00422 template<typename Derived> 00423 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00424 DenseBase<Derived>::minCoeff() const 00425 { 00426 return derived().redux(Eigen::internal::scalar_min_op<Scalar>()); 00427 } 00428 00432 template<typename Derived> 00433 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00434 DenseBase<Derived>::maxCoeff() const 00435 { 00436 return derived().redux(Eigen::internal::scalar_max_op<Scalar>()); 00437 } 00438 00443 template<typename Derived> 00444 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00445 DenseBase<Derived>::sum() const 00446 { 00447 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) 00448 return Scalar(0); 00449 return derived().redux(Eigen::internal::scalar_sum_op<Scalar>()); 00450 } 00451 00456 template<typename Derived> 00457 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00458 DenseBase<Derived>::mean() const 00459 { 00460 return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size()); 00461 } 00462 00470 template<typename Derived> 00471 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00472 DenseBase<Derived>::prod() const 00473 { 00474 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) 00475 return Scalar(1); 00476 return derived().redux(Eigen::internal::scalar_product_op<Scalar>()); 00477 } 00478 00485 template<typename Derived> 00486 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00487 MatrixBase<Derived>::trace() const 00488 { 00489 return derived().diagonal().sum(); 00490 } 00491 00492 } // end namespace Eigen 00493 00494 #endif // EIGEN_REDUX_H