MOAB
4.9.3pre
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2006-2008 Benoit Jacob <[email protected]> 00005 // Copyright (C) 2008-2010 Gael Guennebaud <[email protected]> 00006 // Copyright (C) 2011 Jitse Niesen <[email protected]> 00007 // 00008 // This Source Code Form is subject to the terms of the Mozilla 00009 // Public License v. 2.0. If a copy of the MPL was not distributed 00010 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00011 00012 00013 #ifndef EIGEN_PRODUCTEVALUATORS_H 00014 #define EIGEN_PRODUCTEVALUATORS_H 00015 00016 namespace Eigen { 00017 00018 namespace internal { 00019 00028 template<typename Lhs, typename Rhs, int Options> 00029 struct evaluator<Product<Lhs, Rhs, Options> > 00030 : public product_evaluator<Product<Lhs, Rhs, Options> > 00031 { 00032 typedef Product<Lhs, Rhs, Options> XprType; 00033 typedef product_evaluator<XprType> Base; 00034 00035 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} 00036 }; 00037 00038 // Catch scalar * ( A * B ) and transform it to (A*scalar) * B 00039 // TODO we should apply that rule only if that's really helpful 00040 template<typename Lhs, typename Rhs, typename Scalar> 00041 struct evaluator_assume_aliasing<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > > 00042 { 00043 static const bool value = true; 00044 }; 00045 template<typename Lhs, typename Rhs, typename Scalar> 00046 struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > > 00047 : public evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> > 00048 { 00049 typedef CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > XprType; 00050 typedef evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> > Base; 00051 00052 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) 00053 : Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs()) 00054 {} 00055 }; 00056 00057 00058 template<typename Lhs, typename Rhs, int DiagIndex> 00059 struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> > 00060 : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > 00061 { 00062 typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType; 00063 typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base; 00064 00065 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) 00066 : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>( 00067 Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), 00068 xpr.index() )) 00069 {} 00070 }; 00071 00072 00073 // Helper class to perform a matrix product with the destination at hand. 00074 // Depending on the sizes of the factors, there are different evaluation strategies 00075 // as controlled by internal::product_type. 00076 template< typename Lhs, typename Rhs, 00077 typename LhsShape = typename evaluator_traits<Lhs>::Shape, 00078 typename RhsShape = typename evaluator_traits<Rhs>::Shape, 00079 int ProductType = internal::product_type<Lhs,Rhs>::value> 00080 struct generic_product_impl; 00081 00082 template<typename Lhs, typename Rhs> 00083 struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct> > { 00084 static const bool value = true; 00085 }; 00086 00087 // This is the default evaluator implementation for products: 00088 // It creates a temporary and call generic_product_impl 00089 template<typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape> 00090 struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape> 00091 : public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject> 00092 { 00093 typedef Product<Lhs, Rhs, Options> XprType; 00094 typedef typename XprType::PlainObject PlainObject; 00095 typedef evaluator<PlainObject> Base; 00096 enum { 00097 Flags = Base::Flags | EvalBeforeNestingBit 00098 }; 00099 00100 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00101 explicit product_evaluator(const XprType& xpr) 00102 : m_result(xpr.rows(), xpr.cols()) 00103 { 00104 ::new (static_cast<Base*>(this)) Base(m_result); 00105 00106 // FIXME shall we handle nested_eval here?, 00107 // if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.) 00108 // typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; 00109 // typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; 00110 // typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned; 00111 // typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned; 00112 // 00113 // const LhsNested lhs(xpr.lhs()); 00114 // const RhsNested rhs(xpr.rhs()); 00115 // 00116 // generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs); 00117 00118 generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs()); 00119 } 00120 00121 protected: 00122 PlainObject m_result; 00123 }; 00124 00125 // Dense = Product 00126 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> 00127 struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar>, Dense2Dense, 00128 typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> 00129 { 00130 typedef Product<Lhs,Rhs,Options> SrcXprType; 00131 static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) 00132 { 00133 // FIXME shall we handle nested_eval here? 00134 generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs()); 00135 } 00136 }; 00137 00138 // Dense += Product 00139 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> 00140 struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar>, Dense2Dense, 00141 typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> 00142 { 00143 typedef Product<Lhs,Rhs,Options> SrcXprType; 00144 static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &) 00145 { 00146 // FIXME shall we handle nested_eval here? 00147 generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs()); 00148 } 00149 }; 00150 00151 // Dense -= Product 00152 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> 00153 struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar>, Dense2Dense, 00154 typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> 00155 { 00156 typedef Product<Lhs,Rhs,Options> SrcXprType; 00157 static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &) 00158 { 00159 // FIXME shall we handle nested_eval here? 00160 generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs()); 00161 } 00162 }; 00163 00164 00165 // Dense ?= scalar * Product 00166 // TODO we should apply that rule if that's really helpful 00167 // for instance, this is not good for inner products 00168 template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis> 00169 struct Assignment<DstXprType, CwiseUnaryOp<internal::scalar_multiple_op<ScalarBis>, 00170 const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense, Scalar> 00171 { 00172 typedef CwiseUnaryOp<internal::scalar_multiple_op<ScalarBis>, 00173 const Product<Lhs,Rhs,DefaultProduct> > SrcXprType; 00174 static void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) 00175 { 00176 call_assignment_no_alias(dst, (src.functor().m_other * src.nestedExpression().lhs())*src.nestedExpression().rhs(), func); 00177 } 00178 }; 00179 00180 //---------------------------------------- 00181 // Catch "Dense ?= xpr + Product<>" expression to save one temporary 00182 // FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct 00183 // TODO enable it for "Dense ?= xpr - Product<>" as well. 00184 00185 template<typename OtherXpr, typename Lhs, typename Rhs> 00186 struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar>, const OtherXpr, 00187 const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > { 00188 static const bool value = true; 00189 }; 00190 00191 template<typename DstXprType, typename OtherXpr, typename ProductType, typename Scalar, typename Func1, typename Func2> 00192 struct assignment_from_xpr_plus_product 00193 { 00194 typedef CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr, const ProductType> SrcXprType; 00195 static void run(DstXprType &dst, const SrcXprType &src, const Func1& func) 00196 { 00197 call_assignment_no_alias(dst, src.lhs(), func); 00198 call_assignment_no_alias(dst, src.rhs(), Func2()); 00199 } 00200 }; 00201 00202 template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> 00203 struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr, 00204 const Product<Lhs,Rhs,DefaultProduct> >, internal::assign_op<Scalar>, Dense2Dense> 00205 : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::assign_op<Scalar>, internal::add_assign_op<Scalar> > 00206 {}; 00207 template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> 00208 struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr, 00209 const Product<Lhs,Rhs,DefaultProduct> >, internal::add_assign_op<Scalar>, Dense2Dense> 00210 : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::add_assign_op<Scalar>, internal::add_assign_op<Scalar> > 00211 {}; 00212 template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> 00213 struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr, 00214 const Product<Lhs,Rhs,DefaultProduct> >, internal::sub_assign_op<Scalar>, Dense2Dense> 00215 : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::sub_assign_op<Scalar>, internal::sub_assign_op<Scalar> > 00216 {}; 00217 //---------------------------------------- 00218 00219 template<typename Lhs, typename Rhs> 00220 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct> 00221 { 00222 template<typename Dst> 00223 static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00224 { 00225 dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); 00226 } 00227 00228 template<typename Dst> 00229 static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00230 { 00231 dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); 00232 } 00233 00234 template<typename Dst> 00235 static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00236 { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } 00237 }; 00238 00239 00240 /*********************************************************************** 00241 * Implementation of outer dense * dense vector product 00242 ***********************************************************************/ 00243 00244 // Column major result 00245 template<typename Dst, typename Lhs, typename Rhs, typename Func> 00246 EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) 00247 { 00248 evaluator<Rhs> rhsEval(rhs); 00249 typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs); 00250 // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored 00251 // FIXME not very good if rhs is real and lhs complex while alpha is real too 00252 const Index cols = dst.cols(); 00253 for (Index j=0; j<cols; ++j) 00254 func(dst.col(j), rhsEval.coeff(0,j) * actual_lhs); 00255 } 00256 00257 // Row major result 00258 template<typename Dst, typename Lhs, typename Rhs, typename Func> 00259 EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) 00260 { 00261 evaluator<Lhs> lhsEval(lhs); 00262 typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs); 00263 // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored 00264 // FIXME not very good if lhs is real and rhs complex while alpha is real too 00265 const Index rows = dst.rows(); 00266 for (Index i=0; i<rows; ++i) 00267 func(dst.row(i), lhsEval.coeff(i,0) * actual_rhs); 00268 } 00269 00270 template<typename Lhs, typename Rhs> 00271 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct> 00272 { 00273 template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; 00274 typedef typename Product<Lhs,Rhs>::Scalar Scalar; 00275 00276 // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose 00277 struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; 00278 struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; 00279 struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; 00280 struct adds { 00281 Scalar m_scale; 00282 explicit adds(const Scalar& s) : m_scale(s) {} 00283 template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { 00284 dst.const_cast_derived() += m_scale * src; 00285 } 00286 }; 00287 00288 template<typename Dst> 00289 static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00290 { 00291 internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>()); 00292 } 00293 00294 template<typename Dst> 00295 static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00296 { 00297 internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>()); 00298 } 00299 00300 template<typename Dst> 00301 static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00302 { 00303 internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>()); 00304 } 00305 00306 template<typename Dst> 00307 static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) 00308 { 00309 internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>()); 00310 } 00311 00312 }; 00313 00314 00315 // This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo 00316 template<typename Lhs, typename Rhs, typename Derived> 00317 struct generic_product_impl_base 00318 { 00319 typedef typename Product<Lhs,Rhs>::Scalar Scalar; 00320 00321 template<typename Dst> 00322 static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00323 { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } 00324 00325 template<typename Dst> 00326 static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00327 { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); } 00328 00329 template<typename Dst> 00330 static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00331 { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } 00332 00333 template<typename Dst> 00334 static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) 00335 { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } 00336 00337 }; 00338 00339 template<typename Lhs, typename Rhs> 00340 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> 00341 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> > 00342 { 00343 typedef typename Product<Lhs,Rhs>::Scalar Scalar; 00344 enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; 00345 typedef typename internal::conditional<int(Side)==OnTheRight,Lhs,Rhs>::type MatrixType; 00346 00347 template<typename Dest> 00348 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) 00349 { 00350 internal::gemv_dense_selector<Side, 00351 (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor, 00352 bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess) 00353 >::run(lhs, rhs, dst, alpha); 00354 } 00355 }; 00356 00357 template<typename Lhs, typename Rhs> 00358 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> 00359 { 00360 typedef typename Product<Lhs,Rhs>::Scalar Scalar; 00361 00362 template<typename Dst> 00363 static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00364 { 00365 // Same as: dst.noalias() = lhs.lazyProduct(rhs); 00366 // but easier on the compiler side 00367 call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<Scalar>()); 00368 } 00369 00370 template<typename Dst> 00371 static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00372 { 00373 // dst.noalias() += lhs.lazyProduct(rhs); 00374 call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<Scalar>()); 00375 } 00376 00377 template<typename Dst> 00378 static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00379 { 00380 // dst.noalias() -= lhs.lazyProduct(rhs); 00381 call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<Scalar>()); 00382 } 00383 00384 // template<typename Dst> 00385 // static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) 00386 // { dst.noalias() += alpha * lhs.lazyProduct(rhs); } 00387 }; 00388 00389 // This specialization enforces the use of a coefficient-based evaluation strategy 00390 template<typename Lhs, typename Rhs> 00391 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,LazyCoeffBasedProductMode> 00392 : generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> {}; 00393 00394 // Case 2: Evaluate coeff by coeff 00395 // 00396 // This is mostly taken from CoeffBasedProduct.h 00397 // The main difference is that we add an extra argument to the etor_product_*_impl::run() function 00398 // for the inner dimension of the product, because evaluator object do not know their size. 00399 00400 template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> 00401 struct etor_product_coeff_impl; 00402 00403 template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 00404 struct etor_product_packet_impl; 00405 00406 template<typename Lhs, typename Rhs, int ProductTag> 00407 struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape> 00408 : evaluator_base<Product<Lhs, Rhs, LazyProduct> > 00409 { 00410 typedef Product<Lhs, Rhs, LazyProduct> XprType; 00411 typedef typename XprType::Scalar Scalar; 00412 typedef typename XprType::CoeffReturnType CoeffReturnType; 00413 typedef typename XprType::PacketScalar PacketScalar; 00414 typedef typename XprType::PacketReturnType PacketReturnType; 00415 00416 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00417 explicit product_evaluator(const XprType& xpr) 00418 : m_lhs(xpr.lhs()), 00419 m_rhs(xpr.rhs()), 00420 m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that! 00421 m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed, 00422 // or perhaps declare them on the fly on the packet method... We have experiment to check what's best. 00423 m_innerDim(xpr.lhs().cols()) 00424 { 00425 EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost); 00426 EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost); 00427 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); 00428 } 00429 00430 // Everything below here is taken from CoeffBasedProduct.h 00431 00432 typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; 00433 typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; 00434 00435 typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned; 00436 typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned; 00437 00438 typedef evaluator<LhsNestedCleaned> LhsEtorType; 00439 typedef evaluator<RhsNestedCleaned> RhsEtorType; 00440 00441 enum { 00442 RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime, 00443 ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime, 00444 InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime), 00445 MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime, 00446 MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime, 00447 00448 PacketSize = packet_traits<Scalar>::size, 00449 00450 LhsCoeffReadCost = LhsEtorType::CoeffReadCost, 00451 RhsCoeffReadCost = RhsEtorType::CoeffReadCost, 00452 CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost 00453 : InnerSize == Dynamic ? HugeCost 00454 : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) 00455 + (InnerSize - 1) * NumTraits<Scalar>::AddCost, 00456 00457 Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, 00458 00459 LhsFlags = LhsEtorType::Flags, 00460 RhsFlags = RhsEtorType::Flags, 00461 00462 LhsAlignment = LhsEtorType::Alignment, 00463 RhsAlignment = RhsEtorType::Alignment, 00464 00465 LhsRowMajor = LhsFlags & RowMajorBit, 00466 RhsRowMajor = RhsFlags & RowMajorBit, 00467 00468 SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value, 00469 00470 CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) 00471 && (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % PacketSize) == 0) ), 00472 00473 CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) 00474 && (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % PacketSize) == 0) ), 00475 00476 EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 00477 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 00478 : (RhsRowMajor && !CanVectorizeLhs), 00479 00480 Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) 00481 | (EvalToRowMajor ? RowMajorBit : 0) 00482 // TODO enable vectorization for mixed types 00483 | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) 00484 | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0), 00485 00486 LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)), 00487 RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)), 00488 00489 Alignment = CanVectorizeLhs ? (LhsOuterStrideBytes<0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) 00490 : CanVectorizeRhs ? (RhsOuterStrideBytes<0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) 00491 : 0, 00492 00493 /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside 00494 * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner 00495 * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect 00496 * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. 00497 */ 00498 CanVectorizeInner = SameType 00499 && LhsRowMajor 00500 && (!RhsRowMajor) 00501 && (LhsFlags & RhsFlags & ActualPacketAccessBit) 00502 && (InnerSize % packet_traits<Scalar>::size == 0) 00503 }; 00504 00505 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const 00506 { 00507 return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); 00508 } 00509 00510 /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, 00511 * which is why we don't set the LinearAccessBit. 00512 * TODO: this seems possible when the result is a vector 00513 */ 00514 EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const 00515 { 00516 const Index row = RowsAtCompileTime == 1 ? 0 : index; 00517 const Index col = RowsAtCompileTime == 1 ? index : 0; 00518 return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); 00519 } 00520 00521 template<int LoadMode, typename PacketType> 00522 const PacketType packet(Index row, Index col) const 00523 { 00524 PacketType res; 00525 typedef etor_product_packet_impl<bool(int(Flags)&RowMajorBit) ? RowMajor : ColMajor, 00526 Unroll ? int(InnerSize) : Dynamic, 00527 LhsEtorType, RhsEtorType, PacketType, LoadMode> PacketImpl; 00528 PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); 00529 return res; 00530 } 00531 00532 template<int LoadMode, typename PacketType> 00533 const PacketType packet(Index index) const 00534 { 00535 const Index row = RowsAtCompileTime == 1 ? 0 : index; 00536 const Index col = RowsAtCompileTime == 1 ? index : 0; 00537 return packet<LoadMode,PacketType>(row,col); 00538 } 00539 00540 protected: 00541 const LhsNested m_lhs; 00542 const RhsNested m_rhs; 00543 00544 LhsEtorType m_lhsImpl; 00545 RhsEtorType m_rhsImpl; 00546 00547 // TODO: Get rid of m_innerDim if known at compile time 00548 Index m_innerDim; 00549 }; 00550 00551 template<typename Lhs, typename Rhs> 00552 struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape> 00553 : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape> 00554 { 00555 typedef Product<Lhs, Rhs, DefaultProduct> XprType; 00556 typedef Product<Lhs, Rhs, LazyProduct> BaseProduct; 00557 typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape> Base; 00558 enum { 00559 Flags = Base::Flags | EvalBeforeNestingBit 00560 }; 00561 EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) 00562 : Base(BaseProduct(xpr.lhs(),xpr.rhs())) 00563 {} 00564 }; 00565 00566 /**************************************** 00567 *** Coeff based product, Packet path *** 00568 ****************************************/ 00569 00570 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 00571 struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> 00572 { 00573 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) 00574 { 00575 etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); 00576 res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode,Packet>(UnrollingIndex-1, col), res); 00577 } 00578 }; 00579 00580 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 00581 struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> 00582 { 00583 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) 00584 { 00585 etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); 00586 res = pmadd(lhs.template packet<LoadMode,Packet>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res); 00587 } 00588 }; 00589 00590 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 00591 struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode> 00592 { 00593 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) 00594 { 00595 res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode,Packet>(0, col)); 00596 } 00597 }; 00598 00599 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 00600 struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode> 00601 { 00602 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) 00603 { 00604 res = pmul(lhs.template packet<LoadMode,Packet>(row, 0), pset1<Packet>(rhs.coeff(0, col))); 00605 } 00606 }; 00607 00608 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 00609 struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> 00610 { 00611 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) 00612 { 00613 res = pset1<Packet>(0); 00614 } 00615 }; 00616 00617 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 00618 struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> 00619 { 00620 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) 00621 { 00622 res = pset1<Packet>(0); 00623 } 00624 }; 00625 00626 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 00627 struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> 00628 { 00629 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) 00630 { 00631 res = pset1<Packet>(0); 00632 for(Index i = 0; i < innerDim; ++i) 00633 res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res); 00634 } 00635 }; 00636 00637 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 00638 struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> 00639 { 00640 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) 00641 { 00642 res = pset1<Packet>(0); 00643 for(Index i = 0; i < innerDim; ++i) 00644 res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); 00645 } 00646 }; 00647 00648 00649 /*************************************************************************** 00650 * Triangular products 00651 ***************************************************************************/ 00652 template<int Mode, bool LhsIsTriangular, 00653 typename Lhs, bool LhsIsVector, 00654 typename Rhs, bool RhsIsVector> 00655 struct triangular_product_impl; 00656 00657 template<typename Lhs, typename Rhs, int ProductTag> 00658 struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> 00659 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> > 00660 { 00661 typedef typename Product<Lhs,Rhs>::Scalar Scalar; 00662 00663 template<typename Dest> 00664 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) 00665 { 00666 triangular_product_impl<Lhs::Mode,true,typename Lhs::MatrixType,false,Rhs, Rhs::ColsAtCompileTime==1> 00667 ::run(dst, lhs.nestedExpression(), rhs, alpha); 00668 } 00669 }; 00670 00671 template<typename Lhs, typename Rhs, int ProductTag> 00672 struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> 00673 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> > 00674 { 00675 typedef typename Product<Lhs,Rhs>::Scalar Scalar; 00676 00677 template<typename Dest> 00678 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) 00679 { 00680 triangular_product_impl<Rhs::Mode,false,Lhs,Lhs::RowsAtCompileTime==1, typename Rhs::MatrixType, false>::run(dst, lhs, rhs.nestedExpression(), alpha); 00681 } 00682 }; 00683 00684 00685 /*************************************************************************** 00686 * SelfAdjoint products 00687 ***************************************************************************/ 00688 template <typename Lhs, int LhsMode, bool LhsIsVector, 00689 typename Rhs, int RhsMode, bool RhsIsVector> 00690 struct selfadjoint_product_impl; 00691 00692 template<typename Lhs, typename Rhs, int ProductTag> 00693 struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> 00694 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> > 00695 { 00696 typedef typename Product<Lhs,Rhs>::Scalar Scalar; 00697 00698 template<typename Dest> 00699 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) 00700 { 00701 selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha); 00702 } 00703 }; 00704 00705 template<typename Lhs, typename Rhs, int ProductTag> 00706 struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> 00707 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> > 00708 { 00709 typedef typename Product<Lhs,Rhs>::Scalar Scalar; 00710 00711 template<typename Dest> 00712 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) 00713 { 00714 selfadjoint_product_impl<Lhs,0,Lhs::IsVectorAtCompileTime,typename Rhs::MatrixType,Rhs::Mode,false>::run(dst, lhs, rhs.nestedExpression(), alpha); 00715 } 00716 }; 00717 00718 00719 /*************************************************************************** 00720 * Diagonal products 00721 ***************************************************************************/ 00722 00723 template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder> 00724 struct diagonal_product_evaluator_base 00725 : evaluator_base<Derived> 00726 { 00727 typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar; 00728 public: 00729 enum { 00730 CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost, 00731 00732 MatrixFlags = evaluator<MatrixType>::Flags, 00733 DiagFlags = evaluator<DiagonalType>::Flags, 00734 _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor, 00735 _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) 00736 ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), 00737 _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value, 00738 // FIXME currently we need same types, but in the future the next rule should be the one 00739 //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))), 00740 _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), 00741 _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, 00742 Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0), 00743 Alignment = evaluator<MatrixType>::Alignment 00744 }; 00745 00746 diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) 00747 : m_diagImpl(diag), m_matImpl(mat) 00748 { 00749 EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost); 00750 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); 00751 } 00752 00753 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const 00754 { 00755 return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); 00756 } 00757 00758 protected: 00759 template<int LoadMode,typename PacketType> 00760 EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const 00761 { 00762 return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col), 00763 internal::pset1<PacketType>(m_diagImpl.coeff(id))); 00764 } 00765 00766 template<int LoadMode,typename PacketType> 00767 EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const 00768 { 00769 enum { 00770 InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, 00771 DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!! 00772 }; 00773 return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col), 00774 m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id)); 00775 } 00776 00777 evaluator<DiagonalType> m_diagImpl; 00778 evaluator<MatrixType> m_matImpl; 00779 }; 00780 00781 // diagonal * dense 00782 template<typename Lhs, typename Rhs, int ProductKind, int ProductTag> 00783 struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape> 00784 : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> 00785 { 00786 typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base; 00787 using Base::m_diagImpl; 00788 using Base::m_matImpl; 00789 using Base::coeff; 00790 typedef typename Base::Scalar Scalar; 00791 00792 typedef Product<Lhs, Rhs, ProductKind> XprType; 00793 typedef typename XprType::PlainObject PlainObject; 00794 00795 enum { 00796 StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor 00797 }; 00798 00799 EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) 00800 : Base(xpr.rhs(), xpr.lhs().diagonal()) 00801 { 00802 } 00803 00804 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const 00805 { 00806 return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col); 00807 } 00808 00809 #ifndef __CUDACC__ 00810 template<int LoadMode,typename PacketType> 00811 EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const 00812 { 00813 // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case. 00814 // See also similar calls below. 00815 return this->template packet_impl<LoadMode,PacketType>(row,col, row, 00816 typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type()); 00817 } 00818 00819 template<int LoadMode,typename PacketType> 00820 EIGEN_STRONG_INLINE PacketType packet(Index idx) const 00821 { 00822 return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); 00823 } 00824 #endif 00825 }; 00826 00827 // dense * diagonal 00828 template<typename Lhs, typename Rhs, int ProductKind, int ProductTag> 00829 struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape> 00830 : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> 00831 { 00832 typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base; 00833 using Base::m_diagImpl; 00834 using Base::m_matImpl; 00835 using Base::coeff; 00836 typedef typename Base::Scalar Scalar; 00837 00838 typedef Product<Lhs, Rhs, ProductKind> XprType; 00839 typedef typename XprType::PlainObject PlainObject; 00840 00841 enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor }; 00842 00843 EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) 00844 : Base(xpr.lhs(), xpr.rhs().diagonal()) 00845 { 00846 } 00847 00848 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const 00849 { 00850 return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col); 00851 } 00852 00853 #ifndef __CUDACC__ 00854 template<int LoadMode,typename PacketType> 00855 EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const 00856 { 00857 return this->template packet_impl<LoadMode,PacketType>(row,col, col, 00858 typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type()); 00859 } 00860 00861 template<int LoadMode,typename PacketType> 00862 EIGEN_STRONG_INLINE PacketType packet(Index idx) const 00863 { 00864 return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); 00865 } 00866 #endif 00867 }; 00868 00869 /*************************************************************************** 00870 * Products with permutation matrices 00871 ***************************************************************************/ 00872 00878 template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape> 00879 struct permutation_matrix_product; 00880 00881 template<typename ExpressionType, int Side, bool Transposed> 00882 struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape> 00883 { 00884 typedef typename nested_eval<ExpressionType, 1>::type MatrixType; 00885 typedef typename remove_all<MatrixType>::type MatrixTypeCleaned; 00886 00887 template<typename Dest, typename PermutationType> 00888 static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr) 00889 { 00890 MatrixType mat(xpr); 00891 const Index n = Side==OnTheLeft ? mat.rows() : mat.cols(); 00892 // FIXME we need an is_same for expression that is not sensitive to constness. For instance 00893 // is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true. 00894 //if(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat)) 00895 if(is_same_dense(dst, mat)) 00896 { 00897 // apply the permutation inplace 00898 Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(perm.size()); 00899 mask.fill(false); 00900 Index r = 0; 00901 while(r < perm.size()) 00902 { 00903 // search for the next seed 00904 while(r<perm.size() && mask[r]) r++; 00905 if(r>=perm.size()) 00906 break; 00907 // we got one, let's follow it until we are back to the seed 00908 Index k0 = r++; 00909 Index kPrev = k0; 00910 mask.coeffRef(k0) = true; 00911 for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k)) 00912 { 00913 Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k) 00914 .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime> 00915 (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev)); 00916 00917 mask.coeffRef(k) = true; 00918 kPrev = k; 00919 } 00920 } 00921 } 00922 else 00923 { 00924 for(Index i = 0; i < n; ++i) 00925 { 00926 Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime> 00927 (dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i) 00928 00929 = 00930 00931 Block<const MatrixTypeCleaned,Side==OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixTypeCleaned::ColsAtCompileTime> 00932 (mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i); 00933 } 00934 } 00935 } 00936 }; 00937 00938 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> 00939 struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag> 00940 { 00941 template<typename Dest> 00942 static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) 00943 { 00944 permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs); 00945 } 00946 }; 00947 00948 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> 00949 struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag> 00950 { 00951 template<typename Dest> 00952 static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) 00953 { 00954 permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs); 00955 } 00956 }; 00957 00958 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> 00959 struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag> 00960 { 00961 template<typename Dest> 00962 static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs) 00963 { 00964 permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs); 00965 } 00966 }; 00967 00968 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> 00969 struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag> 00970 { 00971 template<typename Dest> 00972 static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs) 00973 { 00974 permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs); 00975 } 00976 }; 00977 00978 00979 /*************************************************************************** 00980 * Products with transpositions matrices 00981 ***************************************************************************/ 00982 00983 // FIXME could we unify Transpositions and Permutation into a single "shape"?? 00984 00989 template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape> 00990 struct transposition_matrix_product 00991 { 00992 typedef typename nested_eval<ExpressionType, 1>::type MatrixType; 00993 typedef typename remove_all<MatrixType>::type MatrixTypeCleaned; 00994 00995 template<typename Dest, typename TranspositionType> 00996 static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr) 00997 { 00998 MatrixType mat(xpr); 00999 typedef typename TranspositionType::StorageIndex StorageIndex; 01000 const Index size = tr.size(); 01001 StorageIndex j = 0; 01002 01003 if(!(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat))) 01004 dst = mat; 01005 01006 for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k) 01007 if(Index(j=tr.coeff(k))!=k) 01008 { 01009 if(Side==OnTheLeft) dst.row(k).swap(dst.row(j)); 01010 else if(Side==OnTheRight) dst.col(k).swap(dst.col(j)); 01011 } 01012 } 01013 }; 01014 01015 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> 01016 struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag> 01017 { 01018 template<typename Dest> 01019 static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) 01020 { 01021 transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs); 01022 } 01023 }; 01024 01025 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> 01026 struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag> 01027 { 01028 template<typename Dest> 01029 static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) 01030 { 01031 transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs); 01032 } 01033 }; 01034 01035 01036 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> 01037 struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag> 01038 { 01039 template<typename Dest> 01040 static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs) 01041 { 01042 transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs); 01043 } 01044 }; 01045 01046 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> 01047 struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag> 01048 { 01049 template<typename Dest> 01050 static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs) 01051 { 01052 transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs); 01053 } 01054 }; 01055 01056 } // end namespace internal 01057 01058 } // end namespace Eigen 01059 01060 #endif // EIGEN_PRODUCT_EVALUATORS_H